001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lzw; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.MemoryLimitException; 026import org.apache.commons.compress.compressors.CompressorInputStream; 027import org.apache.commons.compress.utils.BitInputStream; 028import org.apache.commons.compress.utils.InputStreamStatistics; 029 030/** 031 * <p> 032 * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party 033 * projects in implementing their own LZW variations. 034 * </p> 035 * 036 * @NotThreadSafe 037 * @since 1.10 038 */ 039public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics { 040 protected static final int DEFAULT_CODE_SIZE = 9; 041 protected static final int UNUSED_PREFIX = -1; 042 043 private final byte[] oneByte = new byte[1]; 044 045 protected final BitInputStream in; 046 private int clearCode = -1; 047 private int codeSize = DEFAULT_CODE_SIZE; 048 private byte previousCodeFirstChar; 049 private int previousCode = UNUSED_PREFIX; 050 private int tableSize; 051 private int[] prefixes; 052 private byte[] characters; 053 private byte[] outputStack; 054 private int outputStackLocation; 055 056 protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { 057 this.in = new BitInputStream(inputStream, byteOrder); 058 } 059 060 /** 061 * Add a new entry to the dictionary. 062 * 063 * @param previousCode the previous code 064 * @param character the next character to append 065 * @return the new code 066 * @throws IOException on error 067 */ 068 protected abstract int addEntry(int previousCode, byte character) throws IOException; 069 070 /** 071 * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index. 072 * 073 * @param previousCode the previous code 074 * @param character the character to append 075 * @param maxTableSize the maximum table size 076 * @return the new code or -1 if maxTableSize has been reached already 077 */ 078 protected int addEntry(final int previousCode, final byte character, final int maxTableSize) { 079 if (tableSize < maxTableSize) { 080 prefixes[tableSize] = previousCode; 081 characters[tableSize] = character; 082 return tableSize++; 083 } 084 return -1; 085 } 086 087 /** 088 * Add entry for repeat of previousCode we haven't added, yet. 089 * 090 * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already 091 * @throws IOException on error 092 */ 093 protected int addRepeatOfPreviousCode() throws IOException { 094 if (previousCode == -1) { 095 // can't have a repeat for the very first code 096 throw new IOException("The first code can't be a reference to its preceding code"); 097 } 098 return addEntry(previousCode, previousCodeFirstChar); 099 } 100 101 @Override 102 public void close() throws IOException { 103 in.close(); 104 } 105 106 /** 107 * Read the next code and expand it. 108 * 109 * @return the expanded next code, negative on EOF 110 * @throws IOException on error 111 */ 112 protected abstract int decompressNextSymbol() throws IOException; 113 114 /** 115 * Expands the entry with index code to the output stack and may create a new entry 116 * 117 * @param code the code 118 * @param addedUnfinishedEntry whether unfinished entries have been added 119 * @return the new location of the output stack 120 * @throws IOException on error 121 */ 122 protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException { 123 for (int entry = code; entry >= 0; entry = prefixes[entry]) { 124 outputStack[--outputStackLocation] = characters[entry]; 125 } 126 if (previousCode != -1 && !addedUnfinishedEntry) { 127 addEntry(previousCode, outputStack[outputStackLocation]); 128 } 129 previousCode = code; 130 previousCodeFirstChar = outputStack[outputStackLocation]; 131 return outputStackLocation; 132 } 133 134 protected int getClearCode() { 135 return clearCode; 136 } 137 138 protected int getCodeSize() { 139 return codeSize; 140 } 141 142 /** 143 * @since 1.17 144 */ 145 @Override 146 public long getCompressedCount() { 147 return in.getBytesRead(); 148 } 149 150 protected int getPrefix(final int offset) { 151 return prefixes[offset]; 152 } 153 154 protected int getPrefixesLength() { 155 return prefixes.length; 156 } 157 158 protected int getTableSize() { 159 return tableSize; 160 } 161 162 protected void incrementCodeSize() { 163 codeSize++; 164 } 165 166 /** 167 * Initializes the arrays based on the maximum code size. 168 * 169 * @param maxCodeSize maximum code size 170 * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}. 171 */ 172 protected void initializeTables(final int maxCodeSize) { 173 // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException 174 // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException 175 if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) { 176 // TODO test against prefixes.length and characters.length? 177 throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds."); 178 } 179 final int maxTableSize = 1 << maxCodeSize; 180 prefixes = new int[maxTableSize]; 181 characters = new byte[maxTableSize]; 182 outputStack = new byte[maxTableSize]; 183 outputStackLocation = maxTableSize; 184 final int max = 1 << 8; 185 for (int i = 0; i < max; i++) { 186 prefixes[i] = -1; 187 characters[i] = (byte) i; 188 } 189 } 190 191 /** 192 * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb 193 * 194 * @param maxCodeSize maximum code size 195 * @param memoryLimitInKb maximum allowed estimated memory usage in Kb 196 * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb 197 * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0 198 */ 199 protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) throws MemoryLimitException { 200 if (maxCodeSize <= 0) { 201 throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0"); 202 } 203 204 if (memoryLimitInKb > -1) { 205 final int maxTableSize = 1 << maxCodeSize; 206 // account for potential overflow 207 final long memoryUsageInBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack)) 208 final long memoryUsageInKb = memoryUsageInBytes >> 10; 209 210 if (memoryUsageInKb > memoryLimitInKb) { 211 throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb); 212 } 213 } 214 initializeTables(maxCodeSize); 215 } 216 217 @Override 218 public int read() throws IOException { 219 final int ret = read(oneByte); 220 if (ret < 0) { 221 return ret; 222 } 223 return 0xff & oneByte[0]; 224 } 225 226 @Override 227 public int read(final byte[] b, final int off, final int len) throws IOException { 228 if (len == 0) { 229 return 0; 230 } 231 int bytesRead = readFromStack(b, off, len); 232 while (len - bytesRead > 0) { 233 final int result = decompressNextSymbol(); 234 if (result < 0) { 235 if (bytesRead > 0) { 236 count(bytesRead); 237 return bytesRead; 238 } 239 return result; 240 } 241 bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); 242 } 243 count(bytesRead); 244 return bytesRead; 245 } 246 247 private int readFromStack(final byte[] b, final int off, final int len) { 248 final int remainingInStack = outputStack.length - outputStackLocation; 249 if (remainingInStack > 0) { 250 final int maxLength = Math.min(remainingInStack, len); 251 System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); 252 outputStackLocation += maxLength; 253 return maxLength; 254 } 255 return 0; 256 } 257 258 /** 259 * Reads the next code from the stream. 260 * 261 * @return the next code 262 * @throws IOException on error 263 */ 264 protected int readNextCode() throws IOException { 265 if (codeSize > 31) { 266 throw new IllegalArgumentException("Code size must not be bigger than 31"); 267 } 268 return (int) in.readBits(codeSize); 269 } 270 271 protected void resetCodeSize() { 272 setCodeSize(DEFAULT_CODE_SIZE); 273 } 274 275 protected void resetPreviousCode() { 276 this.previousCode = -1; 277 } 278 279 /** 280 * Sets the clear code based on the code size. 281 * 282 * @param codeSize code size 283 */ 284 protected void setClearCode(final int codeSize) { 285 clearCode = 1 << codeSize - 1; 286 } 287 288 protected void setCodeSize(final int cs) { 289 this.codeSize = cs; 290 } 291 292 protected void setPrefix(final int offset, final int value) { 293 prefixes[offset] = value; 294 } 295 296 protected void setTableSize(final int newSize) { 297 tableSize = newSize; 298 } 299 300}