001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020/* 021 * This package is based on the work done by Keiron Liddle, Aftex Software 022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his 023 * great code. 024 */ 025package org.apache.commons.compress.compressors.bzip2; 026 027import java.io.IOException; 028import java.io.InputStream; 029import java.nio.ByteOrder; 030import java.util.Arrays; 031 032import org.apache.commons.compress.compressors.CompressorInputStream; 033import org.apache.commons.compress.utils.BitInputStream; 034import org.apache.commons.compress.utils.InputStreamStatistics; 035import org.apache.commons.io.input.CloseShieldInputStream; 036 037/** 038 * An input stream that decompresses from the BZip2 format to be read as any other stream. 039 * 040 * @NotThreadSafe 041 */ 042public class BZip2CompressorInputStream extends CompressorInputStream implements BZip2Constants, InputStreamStatistics { 043 044 private static final class Data { 045 046 // (with blockSize 900k) 047 final boolean[] inUse = new boolean[256]; // 256 byte 048 049 final byte[] seqToUnseq = new byte[256]; // 256 byte 050 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 051 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 052 053 /** 054 * Freq table collected to save a pass over the data during decompression. 055 */ 056 final int[] unzftab = new int[256]; // 1024 byte 057 058 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 059 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 060 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 061 final int[] minLens = new int[N_GROUPS]; // 24 byte 062 063 final int[] cftab = new int[257]; // 1028 byte 064 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte 065 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 066 // byte 067 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte 068 // --------------- 069 // 60798 byte 070 071 int[] tt; // 3600000 byte 072 final byte[] ll8; // 900000 byte 073 074 // --------------- 075 // 4560782 byte 076 // =============== 077 078 Data(final int blockSize100k) { 079 this.ll8 = new byte[blockSize100k * BASEBLOCKSIZE]; 080 } 081 082 /** 083 * Initializes the {@link #tt} array. 084 * 085 * This method is called when the required length of the array is known. I don't initialize it at construction time to avoid unnecessary memory 086 * allocation when compressing small files. 087 */ 088 int[] initTT(final int length) { 089 int[] ttShadow = this.tt; 090 091 // tt.length should always be >= length, but theoretically 092 // it can happen, if the compressor mixed small and large 093 // blocks. Normally only the last block will be smaller 094 // than others. 095 if (ttShadow == null || ttShadow.length < length) { 096 this.tt = ttShadow = new int[length]; 097 } 098 099 return ttShadow; 100 } 101 102 } 103 104 private static final int EOF = 0; 105 106 private static final int START_BLOCK_STATE = 1; 107 108 private static final int RAND_PART_A_STATE = 2; 109 110 private static final int RAND_PART_B_STATE = 3; 111 112 private static final int RAND_PART_C_STATE = 4; 113 114 private static final int NO_RAND_PART_A_STATE = 5; 115 private static final int NO_RAND_PART_B_STATE = 6; 116 117 private static final int NO_RAND_PART_C_STATE = 7; 118 119 private static boolean bsGetBit(final BitInputStream bin) throws IOException { 120 return bsR(bin, 1) != 0; 121 } 122 123 private static int bsGetInt(final BitInputStream bin) throws IOException { 124 return bsR(bin, 32); 125 } 126 127 private static char bsGetUByte(final BitInputStream bin) throws IOException { 128 return (char) bsR(bin, 8); 129 } 130 131 /** 132 * read bits from the input stream 133 * 134 * @param n the number of bits to read, must not exceed 32? 135 * @return the requested bits combined into an int 136 * @throws IOException 137 */ 138 private static int bsR(final BitInputStream bin, final int n) throws IOException { 139 final long thech = bin.readBits(n); 140 if (thech < 0) { 141 throw new IOException("Unexpected end of stream"); 142 } 143 return (int) thech; 144 } 145 146 private static void checkBounds(final int checkVal, final int limitExclusive, final String name) throws IOException { 147 if (checkVal < 0) { 148 throw new IOException("Corrupted input, " + name + " value negative"); 149 } 150 if (checkVal >= limitExclusive) { 151 throw new IOException("Corrupted input, " + name + " value too big"); 152 } 153 } 154 155 /** 156 * Called by createHuffmanDecodingTables() exclusively. 157 */ 158 private static void hbCreateDecodeTables(final int[] limit, final int[] base, final int[] perm, final char[] length, final int minLen, final int maxLen, 159 final int alphaSize) throws IOException { 160 for (int i = minLen, pp = 0; i <= maxLen; i++) { 161 for (int j = 0; j < alphaSize; j++) { 162 if (length[j] == i) { 163 perm[pp++] = j; 164 } 165 } 166 } 167 168 for (int i = MAX_CODE_LEN; --i > 0;) { 169 base[i] = 0; 170 limit[i] = 0; 171 } 172 173 for (int i = 0; i < alphaSize; i++) { 174 final int l = length[i]; 175 checkBounds(l, MAX_ALPHA_SIZE, "length"); 176 base[l + 1]++; 177 } 178 179 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { 180 b += base[i]; 181 base[i] = b; 182 } 183 184 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { 185 final int nb = base[i + 1]; 186 vec += nb - b; 187 b = nb; 188 limit[i] = vec - 1; 189 vec <<= 1; 190 } 191 192 for (int i = minLen + 1; i <= maxLen; i++) { 193 base[i] = (limit[i - 1] + 1 << 1) - base[i]; 194 } 195 } 196 197 /** 198 * Checks if the signature matches what is expected for a bzip2 file. 199 * 200 * @param signature the bytes to check 201 * @param length the number of bytes to check 202 * @return true, if this stream is a bzip2 compressed stream, false otherwise 203 * 204 * @since 1.1 205 */ 206 public static boolean matches(final byte[] signature, final int length) { 207 return length >= 3 && signature[0] == 'B' && signature[1] == 'Z' && signature[2] == 'h'; 208 } 209 210 /** 211 * Index of the last char in the block, so the block size == last + 1. 212 */ 213 private int last; 214 215 /** 216 * Index in zptr[] of original string after sorting. 217 */ 218 private int origPtr; 219 /** 220 * always: in the range 0 .. 9. The current block size is 100000 * this number. 221 */ 222 private int blockSize100k; 223 224 // Variables used by setup* methods exclusively 225 226 private boolean blockRandomised; 227 private final CRC crc = new CRC(); 228 private int nInUse; 229 private BitInputStream bin; 230 private final boolean decompressConcatenated; 231 private int currentState = START_BLOCK_STATE; 232 private int storedBlockCRC, storedCombinedCRC; 233 private int computedCombinedCRC; 234 private int su_count; 235 private int su_ch2; 236 private int su_chPrev; 237 private int su_i2; 238 private int su_j2; 239 private int su_rNToGo; 240 private int su_rTPos; 241 private int su_tPos; 242 private char su_z; 243 244 /** 245 * All memory intensive stuff. This field is initialized by initBlock(). 246 */ 247 private BZip2CompressorInputStream.Data data; 248 249 /** 250 * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the specified stream. This doesn't support decompressing concatenated .bz2 251 * files. 252 * 253 * @param in the InputStream from which this object should be created 254 * @throws IOException if the stream content is malformed or an I/O error occurs. 255 * @throws NullPointerException if {@code in == null} 256 */ 257 public BZip2CompressorInputStream(final InputStream in) throws IOException { 258 this(in, false); 259 } 260 261 /** 262 * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the specified stream. 263 * 264 * @param in the InputStream from which this object should be created 265 * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first .bz2 stream and leave the input position to 266 * point to the next byte after the .bz2 stream 267 * 268 * @throws IOException if {@code in == null}, the stream content is malformed, or an I/O error occurs. 269 */ 270 public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 271 this.bin = new BitInputStream(in == System.in ? CloseShieldInputStream.wrap(in) : in, ByteOrder.BIG_ENDIAN); 272 this.decompressConcatenated = decompressConcatenated; 273 init(true); 274 initBlock(); 275 } 276 277 @Override 278 public void close() throws IOException { 279 final BitInputStream inShadow = this.bin; 280 if (inShadow != null) { 281 try { 282 inShadow.close(); 283 } finally { 284 this.data = null; 285 this.bin = null; 286 } 287 } 288 } 289 290 private boolean complete() throws IOException { 291 this.storedCombinedCRC = bsGetInt(bin); 292 this.currentState = EOF; 293 this.data = null; 294 if (this.storedCombinedCRC != this.computedCombinedCRC) { 295 throw new IOException("BZip2 CRC error"); 296 } 297 // Look for the next .bz2 stream if decompressing 298 // concatenated files. 299 return !decompressConcatenated || !init(false); 300 } 301 302 /** 303 * Called by recvDecodingTables() exclusively. 304 */ 305 private void createHuffmanDecodingTables(final int alphaSize, final int nGroups) throws IOException { 306 final Data dataShadow = this.data; 307 final char[][] len = dataShadow.temp_charArray2d; 308 final int[] minLens = dataShadow.minLens; 309 final int[][] limit = dataShadow.limit; 310 final int[][] base = dataShadow.base; 311 final int[][] perm = dataShadow.perm; 312 313 for (int t = 0; t < nGroups; t++) { 314 int minLen = 32; 315 int maxLen = 0; 316 final char[] len_t = len[t]; 317 for (int i = alphaSize; --i >= 0;) { 318 final char lent = len_t[i]; 319 if (lent > maxLen) { 320 maxLen = lent; 321 } 322 if (lent < minLen) { 323 minLen = lent; 324 } 325 } 326 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize); 327 minLens[t] = minLen; 328 } 329 } 330 331 private void endBlock() throws IOException { 332 final int computedBlockCRC = this.crc.getValue(); 333 // A bad CRC is considered a fatal error. 334 if (this.storedBlockCRC != computedBlockCRC) { 335 // make next blocks readable without error 336 // (repair feature, not yet documented, not tested) 337 this.computedCombinedCRC = this.storedCombinedCRC << 1 | this.storedCombinedCRC >>> 31; 338 this.computedCombinedCRC ^= this.storedBlockCRC; 339 throw new IOException("BZip2 CRC error"); 340 } 341 this.computedCombinedCRC = this.computedCombinedCRC << 1 | this.computedCombinedCRC >>> 31; 342 this.computedCombinedCRC ^= computedBlockCRC; 343 } 344 345 private void getAndMoveToFrontDecode() throws IOException { 346 final BitInputStream bin = this.bin; 347 this.origPtr = bsR(bin, 24); 348 recvDecodingTables(); 349 final Data dataShadow = this.data; 350 final byte[] ll8 = dataShadow.ll8; 351 final int[] unzftab = dataShadow.unzftab; 352 final byte[] selector = dataShadow.selector; 353 final byte[] seqToUnseq = dataShadow.seqToUnseq; 354 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; 355 final int[] minLens = dataShadow.minLens; 356 final int[][] limit = dataShadow.limit; 357 final int[][] base = dataShadow.base; 358 final int[][] perm = dataShadow.perm; 359 final int limitLast = this.blockSize100k * 100000; 360 361 /* 362 * Setting up the unzftab entries here is not strictly necessary, but it does save having to do it later in a separate pass, and so saves a block's 363 * worth of cache misses. 364 */ 365 for (int i = 256; --i >= 0;) { 366 yy[i] = (char) i; 367 unzftab[i] = 0; 368 } 369 370 int groupNo = 0; 371 int groupPos = G_SIZE - 1; 372 final int eob = this.nInUse + 1; 373 int nextSym = getAndMoveToFrontDecode0(); 374 int lastShadow = -1; 375 int zt = selector[groupNo] & 0xff; 376 checkBounds(zt, N_GROUPS, "zt"); 377 int[] base_zt = base[zt]; 378 int[] limit_zt = limit[zt]; 379 int[] perm_zt = perm[zt]; 380 int minLens_zt = minLens[zt]; 381 382 while (nextSym != eob) { 383 if (nextSym == RUNA || nextSym == RUNB) { 384 int s = -1; 385 386 for (int n = 1; true; n <<= 1) { 387 if (nextSym == RUNA) { 388 s += n; 389 } else if (nextSym == RUNB) { 390 s += n << 1; 391 } else { 392 break; 393 } 394 395 if (groupPos == 0) { 396 groupPos = G_SIZE - 1; 397 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 398 zt = selector[groupNo] & 0xff; 399 checkBounds(zt, N_GROUPS, "zt"); 400 base_zt = base[zt]; 401 limit_zt = limit[zt]; 402 perm_zt = perm[zt]; 403 minLens_zt = minLens[zt]; 404 } else { 405 groupPos--; 406 } 407 408 int zn = minLens_zt; 409 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 410 int zvec = bsR(bin, zn); 411 while (zvec > limit_zt[zn]) { 412 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 413 zvec = zvec << 1 | bsR(bin, 1); 414 } 415 final int tmp = zvec - base_zt[zn]; 416 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 417 nextSym = perm_zt[tmp]; 418 } 419 checkBounds(s, this.data.ll8.length, "s"); 420 421 final int yy0 = yy[0]; 422 checkBounds(yy0, 256, "yy"); 423 final byte ch = seqToUnseq[yy0]; 424 unzftab[ch & 0xff] += s + 1; 425 426 final int from = ++lastShadow; 427 lastShadow += s; 428 checkBounds(lastShadow, this.data.ll8.length, "lastShadow"); 429 Arrays.fill(ll8, from, lastShadow + 1, ch); 430 431 if (lastShadow >= limitLast) { 432 throw new IOException("Block overrun while expanding RLE in MTF, " + lastShadow + " exceeds " + limitLast); 433 } 434 } else { 435 if (++lastShadow >= limitLast) { 436 throw new IOException("Block overrun in MTF, " + lastShadow + " exceeds " + limitLast); 437 } 438 checkBounds(nextSym, 256 + 1, "nextSym"); 439 440 final char tmp = yy[nextSym - 1]; 441 checkBounds(tmp, 256, "yy"); 442 unzftab[seqToUnseq[tmp] & 0xff]++; 443 ll8[lastShadow] = seqToUnseq[tmp]; 444 445 /* 446 * This loop is hammered during decompression, hence avoid native method call overhead of System.arraycopy for very small ranges to copy. 447 */ 448 if (nextSym <= 16) { 449 for (int j = nextSym - 1; j > 0;) { 450 yy[j] = yy[--j]; 451 } 452 } else { 453 System.arraycopy(yy, 0, yy, 1, nextSym - 1); 454 } 455 456 yy[0] = tmp; 457 458 if (groupPos == 0) { 459 groupPos = G_SIZE - 1; 460 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 461 zt = selector[groupNo] & 0xff; 462 checkBounds(zt, N_GROUPS, "zt"); 463 base_zt = base[zt]; 464 limit_zt = limit[zt]; 465 perm_zt = perm[zt]; 466 minLens_zt = minLens[zt]; 467 } else { 468 groupPos--; 469 } 470 471 int zn = minLens_zt; 472 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 473 int zvec = bsR(bin, zn); 474 while (zvec > limit_zt[zn]) { 475 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 476 zvec = zvec << 1 | bsR(bin, 1); 477 } 478 final int idx = zvec - base_zt[zn]; 479 checkBounds(idx, MAX_ALPHA_SIZE, "zvec"); 480 nextSym = perm_zt[idx]; 481 } 482 } 483 484 this.last = lastShadow; 485 } 486 487 private int getAndMoveToFrontDecode0() throws IOException { 488 final Data dataShadow = this.data; 489 final int zt = dataShadow.selector[0] & 0xff; 490 checkBounds(zt, N_GROUPS, "zt"); 491 final int[] limit_zt = dataShadow.limit[zt]; 492 int zn = dataShadow.minLens[zt]; 493 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 494 int zvec = bsR(bin, zn); 495 while (zvec > limit_zt[zn]) { 496 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 497 zvec = zvec << 1 | bsR(bin, 1); 498 } 499 final int tmp = zvec - dataShadow.base[zt][zn]; 500 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 501 502 return dataShadow.perm[zt][tmp]; 503 } 504 505 /** 506 * @since 1.17 507 */ 508 @Override 509 public long getCompressedCount() { 510 return bin.getBytesRead(); 511 } 512 513 private boolean init(final boolean isFirstStream) throws IOException { 514 if (null == bin) { 515 throw new IOException("No InputStream"); 516 } 517 518 if (!isFirstStream) { 519 bin.clearBitCache(); 520 } 521 522 final int magic0 = readNextByte(this.bin); 523 if (magic0 == -1 && !isFirstStream) { 524 return false; 525 } 526 final int magic1 = readNextByte(this.bin); 527 final int magic2 = readNextByte(this.bin); 528 529 if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') { 530 throw new IOException(isFirstStream ? "Stream is not in the BZip2 format" : "Garbage after a valid BZip2 stream"); 531 } 532 533 final int blockSize = readNextByte(this.bin); 534 if (blockSize < '1' || blockSize > '9') { 535 throw new IOException("BZip2 block size is invalid"); 536 } 537 538 this.blockSize100k = blockSize - '0'; 539 540 this.computedCombinedCRC = 0; 541 542 return true; 543 } 544 545 private void initBlock() throws IOException { 546 final BitInputStream bin = this.bin; 547 char magic0; 548 char magic1; 549 char magic2; 550 char magic3; 551 char magic4; 552 char magic5; 553 554 while (true) { 555 // Get the block magic bytes. 556 magic0 = bsGetUByte(bin); 557 magic1 = bsGetUByte(bin); 558 magic2 = bsGetUByte(bin); 559 magic3 = bsGetUByte(bin); 560 magic4 = bsGetUByte(bin); 561 magic5 = bsGetUByte(bin); 562 563 // If isn't end of stream magic, break out of the loop. 564 if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) { 565 break; 566 } 567 568 // End of stream was reached. Check the combined CRC and 569 // advance to the next .bz2 stream if decoding concatenated 570 // streams. 571 if (complete()) { 572 return; 573 } 574 } 575 576 if (magic0 != 0x31 || // '1' 577 magic1 != 0x41 || // ')' 578 magic2 != 0x59 || // 'Y' 579 magic3 != 0x26 || // '&' 580 magic4 != 0x53 || // 'S' 581 magic5 != 0x59 // 'Y' 582 ) { 583 this.currentState = EOF; 584 throw new IOException("Bad block header"); 585 } 586 this.storedBlockCRC = bsGetInt(bin); 587 this.blockRandomised = bsR(bin, 1) == 1; 588 589 /* 590 * Allocate data here instead in constructor, so we do not allocate it if the input file is empty. 591 */ 592 if (this.data == null) { 593 this.data = new Data(this.blockSize100k); 594 } 595 596 // currBlockNo++; 597 getAndMoveToFrontDecode(); 598 599 this.crc.reset(); 600 this.currentState = START_BLOCK_STATE; 601 } 602 603 private void makeMaps() { 604 final boolean[] inUse = this.data.inUse; 605 final byte[] seqToUnseq = this.data.seqToUnseq; 606 607 int nInUseShadow = 0; 608 609 for (int i = 0; i < 256; i++) { 610 if (inUse[i]) { 611 seqToUnseq[nInUseShadow++] = (byte) i; 612 } 613 } 614 615 this.nInUse = nInUseShadow; 616 } 617 618 @Override 619 public int read() throws IOException { 620 if (this.bin != null) { 621 final int r = read0(); 622 count(r < 0 ? -1 : 1); 623 return r; 624 } 625 throw new IOException("Stream closed"); 626 } 627 628 /* 629 * (non-Javadoc) 630 * 631 * @see java.io.InputStream#read(byte[], int, int) 632 */ 633 @Override 634 public int read(final byte[] dest, final int offs, final int len) throws IOException { 635 if (offs < 0) { 636 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 637 } 638 if (len < 0) { 639 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 640 } 641 if (offs + len > dest.length) { 642 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > dest.length(" + dest.length + ")."); 643 } 644 if (this.bin == null) { 645 throw new IOException("Stream closed"); 646 } 647 if (len == 0) { 648 return 0; 649 } 650 651 final int hi = offs + len; 652 int destOffs = offs; 653 int b; 654 while (destOffs < hi && (b = read0()) >= 0) { 655 dest[destOffs++] = (byte) b; 656 count(1); 657 } 658 659 return destOffs == offs ? -1 : destOffs - offs; 660 } 661 662 private int read0() throws IOException { 663 switch (currentState) { 664 case EOF: 665 return -1; 666 667 case START_BLOCK_STATE: 668 return setupBlock(); 669 670 case RAND_PART_A_STATE: 671 throw new IllegalStateException(); 672 673 case RAND_PART_B_STATE: 674 return setupRandPartB(); 675 676 case RAND_PART_C_STATE: 677 return setupRandPartC(); 678 679 case NO_RAND_PART_A_STATE: 680 throw new IllegalStateException(); 681 682 case NO_RAND_PART_B_STATE: 683 return setupNoRandPartB(); 684 685 case NO_RAND_PART_C_STATE: 686 return setupNoRandPartC(); 687 688 default: 689 throw new IllegalStateException(); 690 } 691 } 692 693 private int readNextByte(final BitInputStream in) throws IOException { 694 final long b = in.readBits(8); 695 return (int) b; 696 } 697 698 private void recvDecodingTables() throws IOException { 699 final BitInputStream bin = this.bin; 700 final Data dataShadow = this.data; 701 final boolean[] inUse = dataShadow.inUse; 702 final byte[] pos = dataShadow.recvDecodingTables_pos; 703 final byte[] selector = dataShadow.selector; 704 final byte[] selectorMtf = dataShadow.selectorMtf; 705 706 int inUse16 = 0; 707 708 /* Receive the mapping table */ 709 for (int i = 0; i < 16; i++) { 710 if (bsGetBit(bin)) { 711 inUse16 |= 1 << i; 712 } 713 } 714 715 Arrays.fill(inUse, false); 716 for (int i = 0; i < 16; i++) { 717 if ((inUse16 & 1 << i) != 0) { 718 final int i16 = i << 4; 719 for (int j = 0; j < 16; j++) { 720 if (bsGetBit(bin)) { 721 inUse[i16 + j] = true; 722 } 723 } 724 } 725 } 726 727 makeMaps(); 728 final int alphaSize = this.nInUse + 2; 729 /* Now the selectors */ 730 final int nGroups = bsR(bin, 3); 731 final int selectors = bsR(bin, 15); 732 if (selectors < 0) { 733 throw new IOException("Corrupted input, nSelectors value negative"); 734 } 735 checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize"); 736 checkBounds(nGroups, N_GROUPS + 1, "nGroups"); 737 738 // Don't fail on nSelectors overflowing boundaries but discard the values in overflow 739 // See https://gnu.wildebeest.org/blog/mjw/2019/08/02/bzip2-and-the-cve-that-wasnt/ 740 // and https://sourceware.org/ml/bzip2-devel/2019-q3/msg00007.html 741 742 for (int i = 0; i < selectors; i++) { 743 int j = 0; 744 while (bsGetBit(bin)) { 745 j++; 746 } 747 if (i < MAX_SELECTORS) { 748 selectorMtf[i] = (byte) j; 749 } 750 } 751 final int nSelectors = Math.min(selectors, MAX_SELECTORS); 752 753 /* Undo the MTF values for the selectors. */ 754 for (int v = nGroups; --v >= 0;) { 755 pos[v] = (byte) v; 756 } 757 758 for (int i = 0; i < nSelectors; i++) { 759 int v = selectorMtf[i] & 0xff; 760 checkBounds(v, N_GROUPS, "selectorMtf"); 761 final byte tmp = pos[v]; 762 while (v > 0) { 763 // nearly all times v is zero, 4 in most other cases 764 pos[v] = pos[v - 1]; 765 v--; 766 } 767 pos[0] = tmp; 768 selector[i] = tmp; 769 } 770 771 final char[][] len = dataShadow.temp_charArray2d; 772 773 /* Now the coding tables */ 774 for (int t = 0; t < nGroups; t++) { 775 int curr = bsR(bin, 5); 776 final char[] len_t = len[t]; 777 for (int i = 0; i < alphaSize; i++) { 778 while (bsGetBit(bin)) { 779 curr += bsGetBit(bin) ? -1 : 1; 780 } 781 len_t[i] = (char) curr; 782 } 783 } 784 785 // finally create the Huffman tables 786 createHuffmanDecodingTables(alphaSize, nGroups); 787 } 788 789 private int setupBlock() throws IOException { 790 if (currentState == EOF || this.data == null) { 791 return -1; 792 } 793 794 final int[] cftab = this.data.cftab; 795 final int ttLen = this.last + 1; 796 final int[] tt = this.data.initTT(ttLen); 797 final byte[] ll8 = this.data.ll8; 798 cftab[0] = 0; 799 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); 800 801 for (int i = 1, c = cftab[0]; i <= 256; i++) { 802 c += cftab[i]; 803 cftab[i] = c; 804 } 805 806 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { 807 final int tmp = cftab[ll8[i] & 0xff]++; 808 checkBounds(tmp, ttLen, "tt index"); 809 tt[tmp] = i; 810 } 811 812 if (this.origPtr < 0 || this.origPtr >= tt.length) { 813 throw new IOException("Stream corrupted"); 814 } 815 816 this.su_tPos = tt[this.origPtr]; 817 this.su_count = 0; 818 this.su_i2 = 0; 819 this.su_ch2 = 256; /* not a char and not EOF */ 820 821 if (this.blockRandomised) { 822 this.su_rNToGo = 0; 823 this.su_rTPos = 0; 824 return setupRandPartA(); 825 } 826 return setupNoRandPartA(); 827 } 828 829 private int setupNoRandPartA() throws IOException { 830 if (this.su_i2 <= this.last) { 831 this.su_chPrev = this.su_ch2; 832 final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 833 this.su_ch2 = su_ch2Shadow; 834 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 835 this.su_tPos = this.data.tt[this.su_tPos]; 836 this.su_i2++; 837 this.currentState = NO_RAND_PART_B_STATE; 838 this.crc.update(su_ch2Shadow); 839 return su_ch2Shadow; 840 } 841 this.currentState = NO_RAND_PART_A_STATE; 842 endBlock(); 843 initBlock(); 844 return setupBlock(); 845 } 846 847 private int setupNoRandPartB() throws IOException { 848 if (this.su_ch2 != this.su_chPrev) { 849 this.su_count = 1; 850 return setupNoRandPartA(); 851 } 852 if (++this.su_count >= 4) { 853 checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos"); 854 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 855 this.su_tPos = this.data.tt[this.su_tPos]; 856 this.su_j2 = 0; 857 return setupNoRandPartC(); 858 } 859 return setupNoRandPartA(); 860 } 861 862 private int setupNoRandPartC() throws IOException { 863 if (this.su_j2 < this.su_z) { 864 final int su_ch2Shadow = this.su_ch2; 865 this.crc.update(su_ch2Shadow); 866 this.su_j2++; 867 this.currentState = NO_RAND_PART_C_STATE; 868 return su_ch2Shadow; 869 } 870 this.su_i2++; 871 this.su_count = 0; 872 return setupNoRandPartA(); 873 } 874 875 private int setupRandPartA() throws IOException { 876 if (this.su_i2 <= this.last) { 877 this.su_chPrev = this.su_ch2; 878 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 879 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 880 this.su_tPos = this.data.tt[this.su_tPos]; 881 if (this.su_rNToGo == 0) { 882 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 883 if (++this.su_rTPos == 512) { 884 this.su_rTPos = 0; 885 } 886 } else { 887 this.su_rNToGo--; 888 } 889 this.su_ch2 = su_ch2Shadow ^= this.su_rNToGo == 1 ? 1 : 0; 890 this.su_i2++; 891 this.currentState = RAND_PART_B_STATE; 892 this.crc.update(su_ch2Shadow); 893 return su_ch2Shadow; 894 } 895 endBlock(); 896 initBlock(); 897 return setupBlock(); 898 } 899 900 private int setupRandPartB() throws IOException { 901 if (this.su_ch2 != this.su_chPrev) { 902 this.currentState = RAND_PART_A_STATE; 903 this.su_count = 1; 904 return setupRandPartA(); 905 } 906 if (++this.su_count < 4) { 907 this.currentState = RAND_PART_A_STATE; 908 return setupRandPartA(); 909 } 910 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 911 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 912 this.su_tPos = this.data.tt[this.su_tPos]; 913 if (this.su_rNToGo == 0) { 914 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 915 if (++this.su_rTPos == 512) { 916 this.su_rTPos = 0; 917 } 918 } else { 919 this.su_rNToGo--; 920 } 921 this.su_j2 = 0; 922 this.currentState = RAND_PART_C_STATE; 923 if (this.su_rNToGo == 1) { 924 this.su_z ^= 1; 925 } 926 return setupRandPartC(); 927 } 928 929 private int setupRandPartC() throws IOException { 930 if (this.su_j2 < this.su_z) { 931 this.crc.update(this.su_ch2); 932 this.su_j2++; 933 return this.su_ch2; 934 } 935 this.currentState = RAND_PART_A_STATE; 936 this.su_i2++; 937 this.su_count = 0; 938 return setupRandPartA(); 939 } 940}