View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  package org.apache.commons.compress.compressors.deflate64;
18  
19  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.DYNAMIC_CODES;
20  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.FIXED_CODES;
21  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.INITIAL;
22  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.STORED;
23  
24  import java.io.Closeable;
25  import java.io.EOFException;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.nio.ByteOrder;
29  import java.util.Arrays;
30  
31  import org.apache.commons.compress.utils.BitInputStream;
32  import org.apache.commons.compress.utils.ByteUtils;
33  import org.apache.commons.compress.utils.ExactMath;
34  import org.apache.commons.lang3.ArrayFill;
35  
36  /**
37   * TODO This class can't be final because it is mocked by Mockito.
38   */
39  class HuffmanDecoder implements Closeable {
40  
41      private static final class BinaryTreeNode {
42          private final int bits;
43          int literal = -1;
44          BinaryTreeNode leftNode;
45          BinaryTreeNode rightNode;
46  
47          private BinaryTreeNode(final int bits) {
48              this.bits = bits;
49          }
50  
51          void leaf(final int symbol) {
52              literal = symbol;
53              leftNode = null;
54              rightNode = null;
55          }
56  
57          BinaryTreeNode left() {
58              if (leftNode == null && literal == -1) {
59                  leftNode = new BinaryTreeNode(bits + 1);
60              }
61              return leftNode;
62          }
63  
64          BinaryTreeNode right() {
65              if (rightNode == null && literal == -1) {
66                  rightNode = new BinaryTreeNode(bits + 1);
67              }
68              return rightNode;
69          }
70      }
71  
72      private abstract static class DecoderState {
73          abstract int available() throws IOException;
74  
75          abstract boolean hasData();
76  
77          abstract int read(byte[] b, int off, int len) throws IOException;
78  
79          abstract HuffmanState state();
80      }
81  
82      private static final class DecodingMemory {
83          private final byte[] memory;
84          private final int mask;
85          private int wHead;
86          private boolean wrappedAround;
87  
88          private DecodingMemory() {
89              this(16);
90          }
91  
92          private DecodingMemory(final int bits) {
93              memory = new byte[1 << bits];
94              mask = memory.length - 1;
95          }
96  
97          byte add(final byte b) {
98              memory[wHead] = b;
99              wHead = incCounter(wHead);
100             return b;
101         }
102 
103         void add(final byte[] b, final int off, final int len) {
104             for (int i = off; i < off + len; i++) {
105                 add(b[i]);
106             }
107         }
108 
109         private int incCounter(final int counter) {
110             final int newCounter = counter + 1 & mask;
111             if (!wrappedAround && newCounter < counter) {
112                 wrappedAround = true;
113             }
114             return newCounter;
115         }
116 
117         void recordToBuffer(final int distance, final int length, final byte[] buff) {
118             if (distance > memory.length) {
119                 throw new IllegalStateException("Illegal distance parameter: " + distance);
120             }
121             final int start = wHead - distance & mask;
122             if (!wrappedAround && start >= wHead) {
123                 throw new IllegalStateException("Attempt to read beyond memory: dist=" + distance);
124             }
125             for (int i = 0, pos = start; i < length; i++, pos = incCounter(pos)) {
126                 buff[i] = add(memory[pos]);
127             }
128         }
129     }
130 
131     private final class HuffmanCodes extends DecoderState {
132         private boolean endOfBlock;
133         private final HuffmanState state;
134         private final BinaryTreeNode lengthTree;
135         private final BinaryTreeNode distanceTree;
136 
137         private int runBufferPos;
138         private byte[] runBuffer = ByteUtils.EMPTY_BYTE_ARRAY;
139         private int runBufferLength;
140 
141         HuffmanCodes(final HuffmanState state, final int[] lengths, final int[] distance) {
142             this.state = state;
143             lengthTree = buildTree(lengths);
144             distanceTree = buildTree(distance);
145         }
146 
147         @Override
148         int available() {
149             return runBufferLength - runBufferPos;
150         }
151 
152         private int copyFromRunBuffer(final byte[] b, final int off, final int len) {
153             final int bytesInBuffer = runBufferLength - runBufferPos;
154             int copiedBytes = 0;
155             if (bytesInBuffer > 0) {
156                 copiedBytes = Math.min(len, bytesInBuffer);
157                 System.arraycopy(runBuffer, runBufferPos, b, off, copiedBytes);
158                 runBufferPos += copiedBytes;
159             }
160             return copiedBytes;
161         }
162 
163         private int decodeNext(final byte[] b, final int off, final int len) throws IOException {
164             if (endOfBlock) {
165                 return -1;
166             }
167             int result = copyFromRunBuffer(b, off, len);
168 
169             while (result < len) {
170                 final int symbol = nextSymbol(reader, lengthTree);
171                 if (symbol < 256) {
172                     b[off + result++] = memory.add((byte) symbol);
173                 } else if (symbol > 256) {
174                     final int runMask = RUN_LENGTH_TABLE[symbol - 257];
175                     int run = runMask >>> 5;
176                     final int runXtra = runMask & 0x1F;
177                     run = ExactMath.add(run, readBits(runXtra));
178 
179                     final int distSym = nextSymbol(reader, distanceTree);
180 
181                     final int distMask = DISTANCE_TABLE[distSym];
182                     int dist = distMask >>> 4;
183                     final int distXtra = distMask & 0xF;
184                     dist = ExactMath.add(dist, readBits(distXtra));
185 
186                     if (runBuffer.length < run) {
187                         runBuffer = new byte[run];
188                     }
189                     runBufferLength = run;
190                     runBufferPos = 0;
191                     memory.recordToBuffer(dist, run, runBuffer);
192 
193                     result += copyFromRunBuffer(b, off + result, len - result);
194                 } else {
195                     endOfBlock = true;
196                     return result;
197                 }
198             }
199 
200             return result;
201         }
202 
203         @Override
204         boolean hasData() {
205             return !endOfBlock;
206         }
207 
208         @Override
209         int read(final byte[] b, final int off, final int len) throws IOException {
210             if (len == 0) {
211                 return 0;
212             }
213             return decodeNext(b, off, len);
214         }
215 
216         @Override
217         HuffmanState state() {
218             return endOfBlock ? INITIAL : state;
219         }
220     }
221 
222     private static final class InitialState extends DecoderState {
223         @Override
224         int available() {
225             return 0;
226         }
227 
228         @Override
229         boolean hasData() {
230             return false;
231         }
232 
233         @Override
234         int read(final byte[] b, final int off, final int len) throws IOException {
235             if (len == 0) {
236                 return 0;
237             }
238             throw new IllegalStateException("Cannot read in this state");
239         }
240 
241         @Override
242         HuffmanState state() {
243             return INITIAL;
244         }
245     }
246 
247     private final class UncompressedState extends DecoderState {
248         private final long blockLength;
249         private long read;
250 
251         private UncompressedState(final long blockLength) {
252             this.blockLength = blockLength;
253         }
254 
255         @Override
256         int available() throws IOException {
257             return (int) Math.min(blockLength - read, reader.bitsAvailable() / Byte.SIZE);
258         }
259 
260         @Override
261         boolean hasData() {
262             return read < blockLength;
263         }
264 
265         @Override
266         int read(final byte[] b, final int off, final int len) throws IOException {
267             if (len == 0) {
268                 return 0;
269             }
270             // as len is an int and (blockLength - read) is >= 0 the min must fit into an int as well
271             final int max = (int) Math.min(blockLength - read, len);
272             int readSoFar = 0;
273             while (readSoFar < max) {
274                 final int readNow;
275                 if (reader.bitsCached() > 0) {
276                     final byte next = (byte) readBits(Byte.SIZE);
277                     b[off + readSoFar] = memory.add(next);
278                     readNow = 1;
279                 } else {
280                     readNow = in.read(b, off + readSoFar, max - readSoFar);
281                     if (readNow == -1) {
282                         throw new EOFException("Truncated Deflate64 Stream");
283                     }
284                     memory.add(b, off + readSoFar, readNow);
285                 }
286                 read += readNow;
287                 readSoFar += readNow;
288             }
289             return max;
290         }
291 
292         @Override
293         HuffmanState state() {
294             return read < blockLength ? STORED : INITIAL;
295         }
296     }
297 
298     /**
299      * <pre>
300      * --------------------------------------------------------------------
301      * idx  xtra  base     idx  xtra  base     idx  xtra  base
302      * --------------------------------------------------------------------
303      * 257   0     3       267   1   15,16     277   4   67-82
304      * 258   0     4       268   1   17,18     278   4   83-98
305      * 259   0     5       269   2   19-22     279   4   99-114
306      * 260   0     6       270   2   23-26     280   4   115-130
307      * 261   0     7       271   2   27-30     281   5   131-162
308      * 262   0     8       272   2   31-34     282   5   163-194
309      * 263   0     9       273   3   35-42     283   5   195-226
310      * 264   0     10      274   3   43-50     284   5   227-257
311      * 265   1     11,12   275   3   51-58     285   16  3
312      * 266   1     13,14   276   3   59-66
313      * --------------------------------------------------------------------
314      * </pre>
315      *
316      * value = (base of run length) << 5 | (number of extra bits to read)
317      */
318     private static final short[] RUN_LENGTH_TABLE = { 96, 128, 160, 192, 224, 256, 288, 320, 353, 417, 481, 545, 610, 738, 866, 994, 1123, 1379, 1635, 1891,
319             2148, 2660, 3172, 3684, 4197, 5221, 6245, 7269, 112 };
320     /**
321      * <pre>
322      * --------------------------------------------------------------------
323      * idx  xtra  dist     idx  xtra  dist       idx  xtra  dist
324      * --------------------------------------------------------------------
325      * 0    0     1        10   4     33-48      20    9   1025-1536
326      * 1    0     2        11   4     49-64      21    9   1537-2048
327      * 2    0     3        12   5     65-96      22   10   2049-3072
328      * 3    0     4        13   5     97-128     23   10   3073-4096
329      * 4    1     5,6      14   6     129-192    24   11   4097-6144
330      * 5    1     7,8      15   6     193-256    25   11   6145-8192
331      * 6    2     9-12     16   7     257-384    26   12   8193-12288
332      * 7    2     13-16    17   7     385-512    27   12   12289-16384
333      * 8    3     17-24    18   8     513-768    28   13   16385-24576
334      * 9    3     25-32    19   8     769-1024   29   13   24577-32768
335      * 30   14   32769-49152
336      * 31   14   49153-65536
337      * --------------------------------------------------------------------
338      * </pre>
339      *
340      * value = (base of distance) << 4 | (number of extra bits to read)
341      */
342     private static final int[] DISTANCE_TABLE = { 16, 32, 48, 64, 81, 113, 146, 210, 275, 403, // 0-9
343             532, 788, 1045, 1557, 2070, 3094, 4119, 6167, 8216, 12312, // 10-19
344             16409, 24601, 32794, 49178, 65563, 98331, 131100, 196636, 262173, 393245, // 20-29
345             524318, 786462 // 30-31
346     };
347     /**
348      * When using dynamic huffman codes the order in which the values are stored follows the positioning below
349      */
350     private static final int[] CODE_LENGTHS_ORDER = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
351     /**
352      * Huffman Fixed Literal / Distance tables for mode 1
353      */
354     private static final int[] FIXED_LITERALS;
355 
356     private static final int[] FIXED_DISTANCE;
357 
358     static {
359         FIXED_LITERALS = new int[288];
360         Arrays.fill(FIXED_LITERALS, 0, 144, 8);
361         Arrays.fill(FIXED_LITERALS, 144, 256, 9);
362         Arrays.fill(FIXED_LITERALS, 256, 280, 7);
363         Arrays.fill(FIXED_LITERALS, 280, 288, 8);
364 
365         FIXED_DISTANCE = ArrayFill.fill(new int[32], 5);
366     }
367 
368     private static BinaryTreeNode buildTree(final int[] litTable) {
369         final int[] literalCodes = getCodes(litTable);
370 
371         final BinaryTreeNode root = new BinaryTreeNode(0);
372 
373         for (int i = 0; i < litTable.length; i++) {
374             final int len = litTable[i];
375             if (len != 0) {
376                 BinaryTreeNode node = root;
377                 final int lit = literalCodes[len - 1];
378                 for (int p = len - 1; p >= 0; p--) {
379                     final int bit = lit & 1 << p;
380                     node = bit == 0 ? node.left() : node.right();
381                     if (node == null) {
382                         throw new IllegalStateException("node doesn't exist in Huffman tree");
383                     }
384                 }
385                 node.leaf(i);
386                 literalCodes[len - 1]++;
387             }
388         }
389         return root;
390     }
391 
392     private static int[] getCodes(final int[] litTable) {
393         int max = 0;
394         int[] blCount = new int[65];
395 
396         for (final int aLitTable : litTable) {
397             if (aLitTable < 0 || aLitTable > 64) {
398                 throw new IllegalArgumentException("Invalid code " + aLitTable + " in literal table");
399             }
400             max = Math.max(max, aLitTable);
401             blCount[aLitTable]++;
402         }
403         blCount = Arrays.copyOf(blCount, max + 1);
404 
405         int code = 0;
406         final int[] nextCode = new int[max + 1];
407         for (int i = 0; i <= max; i++) {
408             code = code + blCount[i] << 1;
409             nextCode[i] = code;
410         }
411 
412         return nextCode;
413     }
414 
415     private static int nextSymbol(final BitInputStream reader, final BinaryTreeNode tree) throws IOException {
416         BinaryTreeNode node = tree;
417         while (node != null && node.literal == -1) {
418             final long bit = readBits(reader, 1);
419             node = bit == 0 ? node.leftNode : node.rightNode;
420         }
421         return node != null ? node.literal : -1;
422     }
423 
424     private static void populateDynamicTables(final BitInputStream reader, final int[] literals, final int[] distances) throws IOException {
425         final int codeLengths = (int) (readBits(reader, 4) + 4);
426 
427         final int[] codeLengthValues = new int[19];
428         for (int cLen = 0; cLen < codeLengths; cLen++) {
429             codeLengthValues[CODE_LENGTHS_ORDER[cLen]] = (int) readBits(reader, 3);
430         }
431 
432         final BinaryTreeNode codeLengthTree = buildTree(codeLengthValues);
433 
434         final int[] auxBuffer = new int[literals.length + distances.length];
435 
436         int value = -1;
437         int length = 0;
438         int off = 0;
439         while (off < auxBuffer.length) {
440             if (length > 0) {
441                 auxBuffer[off++] = value;
442                 length--;
443             } else {
444                 final int symbol = nextSymbol(reader, codeLengthTree);
445                 if (symbol < 16) {
446                     value = symbol;
447                     auxBuffer[off++] = value;
448                 } else {
449                     switch (symbol) {
450                     case 16:
451                         length = (int) (readBits(reader, 2) + 3);
452                         break;
453                     case 17:
454                         value = 0;
455                         length = (int) (readBits(reader, 3) + 3);
456                         break;
457                     case 18:
458                         value = 0;
459                         length = (int) (readBits(reader, 7) + 11);
460                         break;
461                     default:
462                         break;
463                     }
464                 }
465             }
466         }
467 
468         System.arraycopy(auxBuffer, 0, literals, 0, literals.length);
469         System.arraycopy(auxBuffer, literals.length, distances, 0, distances.length);
470     }
471 
472     private static long readBits(final BitInputStream reader, final int numBits) throws IOException {
473         final long r = reader.readBits(numBits);
474         if (r == -1) {
475             throw new EOFException("Truncated Deflate64 Stream");
476         }
477         return r;
478     }
479 
480     private boolean finalBlock;
481 
482     private DecoderState state;
483 
484     private BitInputStream reader;
485 
486     private final InputStream in;
487 
488     private final DecodingMemory memory = new DecodingMemory();
489 
490     HuffmanDecoder(final InputStream in) {
491         this.reader = new BitInputStream(in, ByteOrder.LITTLE_ENDIAN);
492         this.in = in;
493         state = new InitialState();
494     }
495 
496     int available() throws IOException {
497         return state.available();
498     }
499 
500     @Override
501     public void close() {
502         state = new InitialState();
503         reader = null;
504     }
505 
506     public int decode(final byte[] b) throws IOException {
507         return decode(b, 0, b.length);
508     }
509 
510     public int decode(final byte[] b, final int off, final int len) throws IOException {
511         while (!finalBlock || state.hasData()) {
512             if (state.state() == INITIAL) {
513                 finalBlock = readBits(1) == 1;
514                 final int mode = (int) readBits(2);
515                 switch (mode) {
516                 case 0:
517                     switchToUncompressedState();
518                     break;
519                 case 1:
520                     state = new HuffmanCodes(FIXED_CODES, FIXED_LITERALS, FIXED_DISTANCE);
521                     break;
522                 case 2:
523                     final int[][] tables = readDynamicTables();
524                     state = new HuffmanCodes(DYNAMIC_CODES, tables[0], tables[1]);
525                     break;
526                 default:
527                     throw new IllegalStateException("Unsupported compression: " + mode);
528                 }
529             } else {
530                 final int r = state.read(b, off, len);
531                 if (r != 0) {
532                     return r;
533                 }
534             }
535         }
536         return -1;
537     }
538 
539     /**
540      * @since 1.17
541      */
542     long getBytesRead() {
543         return reader.getBytesRead();
544     }
545 
546     private long readBits(final int numBits) throws IOException {
547         return readBits(reader, numBits);
548     }
549 
550     private int[][] readDynamicTables() throws IOException {
551         final int[][] result = new int[2][];
552         final int literals = (int) (readBits(5) + 257);
553         result[0] = new int[literals];
554 
555         final int distances = (int) (readBits(5) + 1);
556         result[1] = new int[distances];
557 
558         populateDynamicTables(reader, result[0], result[1]);
559         return result;
560     }
561 
562     private void switchToUncompressedState() throws IOException {
563         reader.alignWithByteBoundary();
564         final long bLen = readBits(16);
565         final long bNLen = readBits(16);
566         if (((bLen ^ 0xFFFF) & 0xFFFF) != bNLen) {
567             // noinspection DuplicateStringLiteralInspection
568             throw new IllegalStateException("Illegal LEN / NLEN values");
569         }
570         state = new UncompressedState(bLen);
571     }
572 }