View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lzw;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.nio.ByteOrder;
24  
25  import org.apache.commons.compress.MemoryLimitException;
26  import org.apache.commons.compress.compressors.CompressorInputStream;
27  import org.apache.commons.compress.utils.BitInputStream;
28  import org.apache.commons.compress.utils.InputStreamStatistics;
29  
30  /**
31   * <p>
32   * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party
33   * projects in implementing their own LZW variations.
34   * </p>
35   *
36   * @NotThreadSafe
37   * @since 1.10
38   */
39  public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
40      protected static final int DEFAULT_CODE_SIZE = 9;
41      protected static final int UNUSED_PREFIX = -1;
42  
43      private final byte[] oneByte = new byte[1];
44  
45      protected final BitInputStream in;
46      private int clearCode = -1;
47      private int codeSize = DEFAULT_CODE_SIZE;
48      private byte previousCodeFirstChar;
49      private int previousCode = UNUSED_PREFIX;
50      private int tableSize;
51      private int[] prefixes;
52      private byte[] characters;
53      private byte[] outputStack;
54      private int outputStackLocation;
55  
56      protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
57          this.in = new BitInputStream(inputStream, byteOrder);
58      }
59  
60      /**
61       * Add a new entry to the dictionary.
62       *
63       * @param previousCode the previous code
64       * @param character    the next character to append
65       * @return the new code
66       * @throws IOException on error
67       */
68      protected abstract int addEntry(int previousCode, byte character) throws IOException;
69  
70      /**
71       * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index.
72       *
73       * @param previousCode the previous code
74       * @param character    the character to append
75       * @param maxTableSize the maximum table size
76       * @return the new code or -1 if maxTableSize has been reached already
77       */
78      protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
79          if (tableSize < maxTableSize) {
80              prefixes[tableSize] = previousCode;
81              characters[tableSize] = character;
82              return tableSize++;
83          }
84          return -1;
85      }
86  
87      /**
88       * Add entry for repeat of previousCode we haven't added, yet.
89       *
90       * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already
91       * @throws IOException on error
92       */
93      protected int addRepeatOfPreviousCode() throws IOException {
94          if (previousCode == -1) {
95              // can't have a repeat for the very first code
96              throw new IOException("The first code can't be a reference to its preceding code");
97          }
98          return addEntry(previousCode, previousCodeFirstChar);
99      }
100 
101     @Override
102     public void close() throws IOException {
103         in.close();
104     }
105 
106     /**
107      * Read the next code and expand it.
108      *
109      * @return the expanded next code, negative on EOF
110      * @throws IOException on error
111      */
112     protected abstract int decompressNextSymbol() throws IOException;
113 
114     /**
115      * Expands the entry with index code to the output stack and may create a new entry
116      *
117      * @param code                 the code
118      * @param addedUnfinishedEntry whether unfinished entries have been added
119      * @return the new location of the output stack
120      * @throws IOException on error
121      */
122     protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException {
123         for (int entry = code; entry >= 0; entry = prefixes[entry]) {
124             outputStack[--outputStackLocation] = characters[entry];
125         }
126         if (previousCode != -1 && !addedUnfinishedEntry) {
127             addEntry(previousCode, outputStack[outputStackLocation]);
128         }
129         previousCode = code;
130         previousCodeFirstChar = outputStack[outputStackLocation];
131         return outputStackLocation;
132     }
133 
134     protected int getClearCode() {
135         return clearCode;
136     }
137 
138     protected int getCodeSize() {
139         return codeSize;
140     }
141 
142     /**
143      * @since 1.17
144      */
145     @Override
146     public long getCompressedCount() {
147         return in.getBytesRead();
148     }
149 
150     protected int getPrefix(final int offset) {
151         return prefixes[offset];
152     }
153 
154     protected int getPrefixesLength() {
155         return prefixes.length;
156     }
157 
158     protected int getTableSize() {
159         return tableSize;
160     }
161 
162     protected void incrementCodeSize() {
163         codeSize++;
164     }
165 
166     /**
167      * Initializes the arrays based on the maximum code size.
168      *
169      * @param maxCodeSize maximum code size
170      * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}.
171      */
172     protected void initializeTables(final int maxCodeSize) {
173         // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException
174         // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException
175         if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) {
176             // TODO test against prefixes.length and characters.length?
177             throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds.");
178         }
179         final int maxTableSize = 1 << maxCodeSize;
180         prefixes = new int[maxTableSize];
181         characters = new byte[maxTableSize];
182         outputStack = new byte[maxTableSize];
183         outputStackLocation = maxTableSize;
184         final int max = 1 << 8;
185         for (int i = 0; i < max; i++) {
186             prefixes[i] = -1;
187             characters[i] = (byte) i;
188         }
189     }
190 
191     /**
192      * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb
193      *
194      * @param maxCodeSize     maximum code size
195      * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
196      * @throws MemoryLimitException     if estimated memory usage is greater than memoryLimitInKb
197      * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0
198      */
199     protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) throws MemoryLimitException {
200         if (maxCodeSize <= 0) {
201             throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0");
202         }
203 
204         if (memoryLimitInKb > -1) {
205             final int maxTableSize = 1 << maxCodeSize;
206             // account for potential overflow
207             final long memoryUsageInBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack))
208             final long memoryUsageInKb = memoryUsageInBytes >> 10;
209 
210             if (memoryUsageInKb > memoryLimitInKb) {
211                 throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
212             }
213         }
214         initializeTables(maxCodeSize);
215     }
216 
217     @Override
218     public int read() throws IOException {
219         final int ret = read(oneByte);
220         if (ret < 0) {
221             return ret;
222         }
223         return 0xff & oneByte[0];
224     }
225 
226     @Override
227     public int read(final byte[] b, final int off, final int len) throws IOException {
228         if (len == 0) {
229             return 0;
230         }
231         int bytesRead = readFromStack(b, off, len);
232         while (len - bytesRead > 0) {
233             final int result = decompressNextSymbol();
234             if (result < 0) {
235                 if (bytesRead > 0) {
236                     count(bytesRead);
237                     return bytesRead;
238                 }
239                 return result;
240             }
241             bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
242         }
243         count(bytesRead);
244         return bytesRead;
245     }
246 
247     private int readFromStack(final byte[] b, final int off, final int len) {
248         final int remainingInStack = outputStack.length - outputStackLocation;
249         if (remainingInStack > 0) {
250             final int maxLength = Math.min(remainingInStack, len);
251             System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
252             outputStackLocation += maxLength;
253             return maxLength;
254         }
255         return 0;
256     }
257 
258     /**
259      * Reads the next code from the stream.
260      *
261      * @return the next code
262      * @throws IOException on error
263      */
264     protected int readNextCode() throws IOException {
265         if (codeSize > 31) {
266             throw new IllegalArgumentException("Code size must not be bigger than 31");
267         }
268         return (int) in.readBits(codeSize);
269     }
270 
271     protected void resetCodeSize() {
272         setCodeSize(DEFAULT_CODE_SIZE);
273     }
274 
275     protected void resetPreviousCode() {
276         this.previousCode = -1;
277     }
278 
279     /**
280      * Sets the clear code based on the code size.
281      *
282      * @param codeSize code size
283      */
284     protected void setClearCode(final int codeSize) {
285         clearCode = 1 << codeSize - 1;
286     }
287 
288     protected void setCodeSize(final int cs) {
289         this.codeSize = cs;
290     }
291 
292     protected void setPrefix(final int offset, final int value) {
293         prefixes[offset] = value;
294     }
295 
296     protected void setTableSize(final int newSize) {
297         tableSize = newSize;
298     }
299 
300 }