001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.sevenz; 018 019import static java.nio.charset.StandardCharsets.UTF_16LE; 020 021import java.io.BufferedInputStream; 022import java.io.ByteArrayInputStream; 023import java.io.Closeable; 024import java.io.DataInputStream; 025import java.io.EOFException; 026import java.io.File; 027import java.io.FilterInputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.nio.ByteBuffer; 031import java.nio.ByteOrder; 032import java.nio.channels.Channels; 033import java.nio.channels.SeekableByteChannel; 034import java.nio.file.Files; 035import java.nio.file.OpenOption; 036import java.nio.file.Path; 037import java.nio.file.StandardOpenOption; 038import java.util.ArrayList; 039import java.util.Arrays; 040import java.util.BitSet; 041import java.util.EnumSet; 042import java.util.LinkedHashMap; 043import java.util.LinkedList; 044import java.util.List; 045import java.util.Map; 046import java.util.Objects; 047import java.util.zip.CRC32; 048import java.util.zip.CheckedInputStream; 049 050import org.apache.commons.compress.MemoryLimitException; 051import org.apache.commons.compress.utils.ByteUtils; 052import org.apache.commons.compress.utils.IOUtils; 053import org.apache.commons.compress.utils.InputStreamStatistics; 054import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 055import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; 056import org.apache.commons.io.build.AbstractStreamBuilder; 057import org.apache.commons.io.input.BoundedInputStream; 058import org.apache.commons.io.input.ChecksumInputStream; 059 060/** 061 * Reads a 7z file, using SeekableByteChannel under the covers. 062 * <p> 063 * The 7z file format is a flexible container that can contain many compression and encryption types, but at the moment only only Copy, LZMA, LZMA2, BZIP2, 064 * Deflate and AES-256 + SHA-256 are supported. 065 * </p> 066 * <p> 067 * The format is very Windows/Intel specific, so it uses little-endian byte order, doesn't store user/group or permission bits, and represents times using NTFS 068 * timestamps (100 nanosecond units since 1 January 1601). Hence the official tools recommend against using it for backup purposes on *nix, and recommend 069 * .tar.7z or .tar.lzma or .tar.xz instead. 070 * </p> 071 * <p> 072 * Both the header and file contents may be compressed and/or encrypted. With both encrypted, neither file names nor file contents can be read, but the use of 073 * encryption isn't plausibly deniable. 074 * </p> 075 * <p> 076 * Multi volume archives can be read by concatenating the parts in correct order - either manually or by using {link 077 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} for example. 078 * </p> 079 * 080 * @NotThreadSafe 081 * @since 1.6 082 */ 083public class SevenZFile implements Closeable { 084 085 private static final class ArchiveStatistics { 086 private int numberOfPackedStreams; 087 private long numberOfCoders; 088 private long numberOfOutStreams; 089 private long numberOfInStreams; 090 private long numberOfUnpackSubStreams; 091 private int numberOfFolders; 092 private BitSet folderHasCrc; 093 private int numberOfEntries; 094 private int numberOfEntriesWithStream; 095 096 void assertValidity(final int maxMemoryLimitInKb) throws IOException { 097 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 098 throw new IOException("archive with entries but no folders"); 099 } 100 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 101 throw new IOException("archive doesn't contain enough substreams for entries"); 102 } 103 104 final long memoryNeededInKb = estimateSize() / 1024; 105 if (maxMemoryLimitInKb < memoryNeededInKb) { 106 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 107 } 108 } 109 110 private long bindPairSize() { 111 return 16; 112 } 113 114 private long coderSize() { 115 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 116 + 16 + 4 /* properties, guess */ 117 ; 118 } 119 120 private long entrySize() { 121 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 122 } 123 124 long estimateSize() { 125 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 126 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 127 + numberOfFolders * folderSize() /* folders in Archive */ 128 + numberOfCoders * coderSize() /* coders in Folder */ 129 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 130 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 131 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 132 + numberOfEntries * entrySize() /* files in Archive */ 133 + streamMapSize(); 134 return 2 * lowerBound /* conservative guess */; 135 } 136 137 private long folderSize() { 138 return 30; /* nested arrays are accounted for separately */ 139 } 140 141 private long streamMapSize() { 142 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 143 + 8 * numberOfPackedStreams /* packStreamOffsets */ 144 + 4 * numberOfEntries /* fileFolderIndex */ 145 ; 146 } 147 148 @Override 149 public String toString() { 150 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders + " folders. Estimated size " + estimateSize() / 1024L + " kB."; 151 } 152 } 153 154 /** 155 * Builds new instances of {@link SevenZFile}. 156 * 157 * @since 1.26.0 158 */ 159 public static class Builder extends AbstractStreamBuilder<SevenZFile, Builder> { 160 161 static final int MEMORY_LIMIT_IN_KB = Integer.MAX_VALUE; 162 static final boolean USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES = false; 163 static final boolean TRY_TO_RECOVER_BROKEN_ARCHIVES = false; 164 165 private SeekableByteChannel seekableByteChannel; 166 private String defaultName = DEFAULT_FILE_NAME; 167 private byte[] password; 168 private int maxMemoryLimitKb = MEMORY_LIMIT_IN_KB; 169 private boolean useDefaultNameForUnnamedEntries = USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES; 170 private boolean tryToRecoverBrokenArchives = TRY_TO_RECOVER_BROKEN_ARCHIVES; 171 172 @SuppressWarnings("resource") // Caller closes 173 @Override 174 public SevenZFile get() throws IOException { 175 final SeekableByteChannel actualChannel; 176 final String actualDescription; 177 if (seekableByteChannel != null) { 178 actualChannel = seekableByteChannel; 179 actualDescription = defaultName; 180 } else if (checkOrigin() instanceof ByteArrayOrigin) { 181 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray()); 182 actualDescription = defaultName; 183 } else { 184 OpenOption[] openOptions = getOpenOptions(); 185 if (openOptions.length == 0) { 186 openOptions = new OpenOption[] { StandardOpenOption.READ }; 187 } 188 final Path path = getPath(); 189 actualChannel = Files.newByteChannel(path, openOptions); 190 actualDescription = path.toAbsolutePath().toString(); 191 } 192 final boolean closeOnError = seekableByteChannel != null; 193 return new SevenZFile(actualChannel, actualDescription, password, closeOnError, maxMemoryLimitKb, useDefaultNameForUnnamedEntries, 194 tryToRecoverBrokenArchives); 195 } 196 197 /** 198 * Sets the default name. 199 * 200 * @param defaultName the default name. 201 * @return {@code this} instance. 202 */ 203 public Builder setDefaultName(final String defaultName) { 204 this.defaultName = defaultName; 205 return this; 206 } 207 208 /** 209 * Sets the maximum amount of memory in kilobytes to use for parsing the archive and during extraction. 210 * <p> 211 * Not all codecs honor this setting. Currently only LZMA and LZMA2 are supported. 212 * </p> 213 * 214 * @param maxMemoryLimitKb the max memory limit in kilobytes. 215 * @return {@code this} instance. 216 */ 217 public Builder setMaxMemoryLimitKb(final int maxMemoryLimitKb) { 218 this.maxMemoryLimitKb = maxMemoryLimitKb; 219 return this; 220 } 221 222 /** 223 * Sets the password. 224 * 225 * @param password the password. 226 * @return {@code this} instance. 227 */ 228 public Builder setPassword(final byte[] password) { 229 this.password = password != null ? password.clone() : null; 230 return this; 231 } 232 233 /** 234 * Sets the password. 235 * 236 * @param password the password. 237 * @return {@code this} instance. 238 */ 239 public Builder setPassword(final char[] password) { 240 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.clone()) : null; 241 return this; 242 } 243 244 /** 245 * Sets the password. 246 * 247 * @param password the password. 248 * @return {@code this} instance. 249 */ 250 public Builder setPassword(final String password) { 251 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.toCharArray()) : null; 252 return this; 253 } 254 255 /** 256 * Sets the input channel. 257 * 258 * @param seekableByteChannel the input channel. 259 * @return {@code this} instance. 260 */ 261 public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) { 262 this.seekableByteChannel = seekableByteChannel; 263 return this; 264 } 265 266 /** 267 * Sets whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 268 * <p> 269 * This special kind of broken archive is encountered when mutli volume archives are closed prematurely. If you enable this option SevenZFile will trust 270 * data that looks as if it could contain metadata of an archive and allocate big amounts of memory. It is strongly recommended to not enable this 271 * option without setting {@link #setMaxMemoryLimitKb(int)} at the same time. 272 * </p> 273 * 274 * @param tryToRecoverBrokenArchives whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 275 * @return {@code this} instance. 276 */ 277 public Builder setTryToRecoverBrokenArchives(final boolean tryToRecoverBrokenArchives) { 278 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 279 return this; 280 } 281 282 /** 283 * Sets whether entries without a name should get their names set to the archive's default file name. 284 * 285 * @param useDefaultNameForUnnamedEntries whether entries without a name should get their names set to the archive's default file name. 286 * @return {@code this} instance. 287 */ 288 public Builder setUseDefaultNameForUnnamedEntries(final boolean useDefaultNameForUnnamedEntries) { 289 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 290 return this; 291 } 292 293 } 294 295 static final int SIGNATURE_HEADER_SIZE = 32; 296 297 private static final String DEFAULT_FILE_NAME = "unknown archive"; 298 299 /** Shared with SevenZOutputFile and tests, neither mutates it. */ 300 static final byte[] sevenZSignature = { // NOSONAR 301 (byte) '7', (byte) 'z', (byte) 0xBC, (byte) 0xAF, (byte) 0x27, (byte) 0x1C }; 302 303 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 304 if (value > Integer.MAX_VALUE || value < 0) { 305 throw new IOException(String.format("Cannot handle % %,d", what, value)); 306 } 307 return (int) value; 308 } 309 310 /** 311 * Creates a new Builder. 312 * 313 * @return a new Builder. 314 * @since 1.26.0 315 */ 316 public static Builder builder() { 317 return new Builder(); 318 } 319 320 private static ByteBuffer checkEndOfFile(final ByteBuffer buf, final int expectRemaining) throws EOFException { 321 final int remaining = buf.remaining(); 322 if (remaining < expectRemaining) { 323 throw new EOFException(String.format("remaining %,d < expectRemaining %,d", remaining, expectRemaining)); 324 } 325 return buf; 326 } 327 328 private static void get(final ByteBuffer buf, final byte[] to) throws EOFException { 329 checkEndOfFile(buf, to.length).get(to); 330 } 331 332 private static char getChar(final ByteBuffer buf) throws EOFException { 333 return checkEndOfFile(buf, Character.BYTES).getChar(); 334 } 335 336 private static int getInt(final ByteBuffer buf) throws EOFException { 337 return checkEndOfFile(buf, Integer.BYTES).getInt(); 338 } 339 340 private static long getLong(final ByteBuffer buf) throws EOFException { 341 return checkEndOfFile(buf, Long.BYTES).getLong(); 342 } 343 344 private static int getUnsignedByte(final ByteBuffer buf) throws EOFException { 345 if (!buf.hasRemaining()) { 346 throw new EOFException(); 347 } 348 return buf.get() & 0xff; 349 } 350 351 /** 352 * Checks if the signature matches what is expected for a 7z file. 353 * 354 * @param signature the bytes to check 355 * @param length the number of bytes to check 356 * @return true, if this is the signature of a 7z archive. 357 * @since 1.8 358 */ 359 public static boolean matches(final byte[] signature, final int length) { 360 if (length < sevenZSignature.length) { 361 return false; 362 } 363 for (int i = 0; i < sevenZSignature.length; i++) { 364 if (signature[i] != sevenZSignature[i]) { 365 return false; 366 } 367 } 368 return true; 369 } 370 371 private static SeekableByteChannel newByteChannel(final File file) throws IOException { 372 return Files.newByteChannel(file.toPath(), EnumSet.of(StandardOpenOption.READ)); 373 } 374 375 private static long readUint64(final ByteBuffer in) throws IOException { 376 // long rather than int as it might get shifted beyond the range of an int 377 final long firstByte = getUnsignedByte(in); 378 int mask = 0x80; 379 long value = 0; 380 for (int i = 0; i < 8; i++) { 381 if ((firstByte & mask) == 0) { 382 return value | (firstByte & mask - 1) << 8 * i; 383 } 384 final long nextByte = getUnsignedByte(in); 385 value |= nextByte << 8 * i; 386 mask >>>= 1; 387 } 388 return value; 389 } 390 391 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 392 if (bytesToSkip < 1) { 393 return 0; 394 } 395 final int current = input.position(); 396 final int maxSkip = input.remaining(); 397 if (maxSkip < bytesToSkip) { 398 bytesToSkip = maxSkip; 399 } 400 input.position(current + (int) bytesToSkip); 401 return bytesToSkip; 402 } 403 404 private final String fileName; 405 private SeekableByteChannel channel; 406 private final Archive archive; 407 private int currentEntryIndex = -1; 408 private int currentFolderIndex = -1; 409 private InputStream currentFolderInputStream; 410 private byte[] password; 411 private long compressedBytesReadFromCurrentEntry; 412 private long uncompressedBytesReadFromCurrentEntry; 413 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 414 private final int maxMemoryLimitKb; 415 private final boolean useDefaultNameForUnnamedEntries; 416 417 private final boolean tryToRecoverBrokenArchives; 418 419 /** 420 * Reads a file as unencrypted 7z archive. 421 * 422 * @param fileName the file to read. 423 * @throws IOException if reading the archive fails. 424 * @deprecated Use {@link Builder#get()}. 425 */ 426 @Deprecated 427 public SevenZFile(final File fileName) throws IOException { 428 this(fileName, SevenZFileOptions.DEFAULT); 429 } 430 431 /** 432 * Reads a file as 7z archive 433 * 434 * @param file the file to read 435 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 436 * @throws IOException if reading the archive fails 437 * @deprecated Use {@link Builder#get()}. 438 */ 439 @SuppressWarnings("resource") // caller closes 440 @Deprecated 441 public SevenZFile(final File file, final byte[] password) throws IOException { 442 this(newByteChannel(file), file.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 443 } 444 445 /** 446 * Reads a file as 7z archive 447 * 448 * @param file the file to read 449 * @param password optional password if the archive is encrypted 450 * @throws IOException if reading the archive fails 451 * @since 1.17 452 * @deprecated Use {@link Builder#get()}. 453 */ 454 @Deprecated 455 public SevenZFile(final File file, final char[] password) throws IOException { 456 this(file, password, SevenZFileOptions.DEFAULT); 457 } 458 459 /** 460 * Reads a file as 7z archive with additional options. 461 * 462 * @param file the file to read 463 * @param password optional password if the archive is encrypted 464 * @param options the options to apply 465 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 466 * @since 1.19 467 * @deprecated Use {@link Builder#get()}. 468 */ 469 @SuppressWarnings("resource") // caller closes 470 @Deprecated 471 public SevenZFile(final File file, final char[] password, final SevenZFileOptions options) throws IOException { 472 this(newByteChannel(file), // NOSONAR 473 file.getAbsolutePath(), AES256SHA256Decoder.utf16Decode(password), true, options); 474 } 475 476 /** 477 * Reads a file as unencrypted 7z archive 478 * 479 * @param file the file to read 480 * @param options the options to apply 481 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 482 * @since 1.19 483 * @deprecated Use {@link Builder#get()}. 484 */ 485 @Deprecated 486 public SevenZFile(final File file, final SevenZFileOptions options) throws IOException { 487 this(file, null, options); 488 } 489 490 /** 491 * Reads a SeekableByteChannel as 7z archive 492 * <p> 493 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 494 * </p> 495 * 496 * @param channel the channel to read 497 * @throws IOException if reading the archive fails 498 * @since 1.13 499 * @deprecated Use {@link Builder#get()}. 500 */ 501 @Deprecated 502 public SevenZFile(final SeekableByteChannel channel) throws IOException { 503 this(channel, SevenZFileOptions.DEFAULT); 504 } 505 506 /** 507 * Reads a SeekableByteChannel as 7z archive 508 * <p> 509 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 510 * </p> 511 * 512 * @param channel the channel to read 513 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 514 * @throws IOException if reading the archive fails 515 * @since 1.13 516 * @deprecated Use {@link Builder#get()}. 517 */ 518 @Deprecated 519 public SevenZFile(final SeekableByteChannel channel, final byte[] password) throws IOException { 520 this(channel, DEFAULT_FILE_NAME, password); 521 } 522 523 /** 524 * Reads a SeekableByteChannel as 7z archive 525 * <p> 526 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 527 * </p> 528 * 529 * @param channel the channel to read 530 * @param password optional password if the archive is encrypted 531 * @throws IOException if reading the archive fails 532 * @since 1.17 533 * @deprecated Use {@link Builder#get()}. 534 */ 535 @Deprecated 536 public SevenZFile(final SeekableByteChannel channel, final char[] password) throws IOException { 537 this(channel, password, SevenZFileOptions.DEFAULT); 538 } 539 540 /** 541 * Reads a SeekableByteChannel as 7z archive with additional options. 542 * <p> 543 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 544 * </p> 545 * 546 * @param channel the channel to read 547 * @param password optional password if the archive is encrypted 548 * @param options the options to apply 549 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 550 * @since 1.19 551 * @deprecated Use {@link Builder#get()}. 552 */ 553 @Deprecated 554 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) throws IOException { 555 this(channel, DEFAULT_FILE_NAME, password, options); 556 } 557 558 /** 559 * Reads a SeekableByteChannel as 7z archive with additional options. 560 * <p> 561 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 562 * </p> 563 * 564 * @param channel the channel to read 565 * @param options the options to apply 566 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 567 * @since 1.19 568 * @deprecated Use {@link Builder#get()}. 569 */ 570 @Deprecated 571 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 572 this(channel, DEFAULT_FILE_NAME, null, options); 573 } 574 575 /** 576 * Reads a SeekableByteChannel as 7z archive 577 * <p> 578 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 579 * </p> 580 * 581 * @param channel the channel to read 582 * @param fileName name of the archive - only used for error reporting 583 * @throws IOException if reading the archive fails 584 * @since 1.17 585 * @deprecated Use {@link Builder#get()}. 586 */ 587 @Deprecated 588 public SevenZFile(final SeekableByteChannel channel, final String fileName) throws IOException { 589 this(channel, fileName, SevenZFileOptions.DEFAULT); 590 } 591 592 /** 593 * Reads a SeekableByteChannel as 7z archive 594 * <p> 595 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 596 * </p> 597 * 598 * @param channel the channel to read 599 * @param fileName name of the archive - only used for error reporting 600 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 601 * @throws IOException if reading the archive fails 602 * @since 1.13 603 * @deprecated Use {@link Builder#get()}. 604 */ 605 @Deprecated 606 public SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password) throws IOException { 607 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 608 } 609 610 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, final int maxMemoryLimitKb, 611 final boolean useDefaultNameForUnnamedEntries, final boolean tryToRecoverBrokenArchives) throws IOException { 612 boolean succeeded = false; 613 this.channel = channel; 614 this.fileName = fileName; 615 this.maxMemoryLimitKb = maxMemoryLimitKb; 616 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 617 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 618 try { 619 archive = readHeaders(password); 620 if (password != null) { 621 this.password = Arrays.copyOf(password, password.length); 622 } else { 623 this.password = null; 624 } 625 succeeded = true; 626 } finally { 627 if (!succeeded && closeOnError) { 628 this.channel.close(); 629 } 630 } 631 } 632 633 /** 634 * Constructs a new instance. 635 * 636 * @param channel the channel to read. 637 * @param fileName name of the archive - only used for error reporting. 638 * @param password optional password if the archive is encrypted. 639 * @param closeOnError closes the channel on error. 640 * @param options options. 641 * @throws IOException if reading the archive fails 642 * @deprecated Use {@link Builder#get()}. 643 */ 644 @Deprecated 645 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, 646 final SevenZFileOptions options) throws IOException { 647 this(channel, fileName, password, closeOnError, options.getMaxMemoryLimitInKb(), options.getUseDefaultNameForUnnamedEntries(), 648 options.getTryToRecoverBrokenArchives()); 649 } 650 651 /** 652 * Reads a SeekableByteChannel as 7z archive 653 * <p> 654 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 655 * </p> 656 * 657 * @param channel the channel to read 658 * @param fileName name of the archive - only used for error reporting 659 * @param password optional password if the archive is encrypted 660 * @throws IOException if reading the archive fails 661 * @since 1.17 662 * @deprecated Use {@link Builder#get()}. 663 */ 664 @Deprecated 665 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password) throws IOException { 666 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 667 } 668 669 /** 670 * Reads a SeekableByteChannel as 7z archive with additional options. 671 * <p> 672 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 673 * </p> 674 * 675 * @param channel the channel to read 676 * @param fileName name of the archive - only used for error reporting 677 * @param password optional password if the archive is encrypted 678 * @param options the options to apply 679 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 680 * @since 1.19 681 * @deprecated Use {@link Builder#get()}. 682 */ 683 @Deprecated 684 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, final SevenZFileOptions options) throws IOException { 685 this(channel, fileName, AES256SHA256Decoder.utf16Decode(password), false, options); 686 } 687 688 /** 689 * Reads a SeekableByteChannel as 7z archive with additional options. 690 * <p> 691 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 692 * </p> 693 * 694 * @param channel the channel to read 695 * @param fileName name of the archive - only used for error reporting 696 * @param options the options to apply 697 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 698 * @since 1.19 699 * @deprecated Use {@link Builder#get()}. 700 */ 701 @Deprecated 702 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) throws IOException { 703 this(channel, fileName, null, false, options); 704 } 705 706 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, final SevenZArchiveEntry entry) 707 throws IOException { 708 channel.position(folderOffset); 709 InputStream inputStreamStack = new FilterInputStream( 710 new BufferedInputStream(new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]))) { 711 private void count(final int c) { 712 compressedBytesReadFromCurrentEntry += c; 713 } 714 715 @Override 716 public int read() throws IOException { 717 final int r = in.read(); 718 if (r >= 0) { 719 count(1); 720 } 721 return r; 722 } 723 724 @Override 725 public int read(final byte[] b) throws IOException { 726 return read(b, 0, b.length); 727 } 728 729 @Override 730 public int read(final byte[] b, final int off, final int len) throws IOException { 731 if (len == 0) { 732 return 0; 733 } 734 final int r = in.read(b, off, len); 735 if (r >= 0) { 736 count(r); 737 } 738 return r; 739 } 740 }; 741 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 742 for (final Coder coder : folder.getOrderedCoders()) { 743 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 744 throw new IOException("Multi input/output stream coders are not yet supported"); 745 } 746 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 747 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKb); 748 methods.addFirst(new SevenZMethodConfiguration(method, Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 749 } 750 entry.setContentMethods(methods); 751 if (folder.hasCrc) { 752 // @formatter:off 753 return ChecksumInputStream.builder() 754 .setChecksum(new CRC32()) 755 .setInputStream(inputStreamStack) 756 .setCountThreshold(folder.getUnpackSize()) 757 .setExpectedChecksumValue(folder.crc) 758 .get(); 759 // @formatter:on 760 } 761 return inputStreamStack; 762 } 763 764 /** 765 * Builds the decoding stream for the entry to be read. This method may be called from a random access(getInputStream) or sequential access(getNextEntry). 766 * If this method is called from a random access, some entries may need to be skipped(we put them to the deferredBlockStreams and skip them when actually 767 * needed to improve the performance) 768 * 769 * @param entryIndex the index of the entry to be read 770 * @param isRandomAccess is this called in a random access 771 * @throws IOException if there are exceptions when reading the file 772 */ 773 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 774 if (archive.streamMap == null) { 775 throw new IOException("Archive doesn't contain stream information to read entries"); 776 } 777 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 778 if (folderIndex < 0) { 779 deferredBlockStreams.clear(); 780 // TODO: previously it'd return an empty stream? 781 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 782 return; 783 } 784 final SevenZArchiveEntry file = archive.files[entryIndex]; 785 boolean isInSameFolder = false; 786 if (currentFolderIndex == folderIndex) { 787 // (COMPRESS-320). 788 // The current entry is within the same (potentially opened) folder. The 789 // previous stream has to be fully decoded before we can start reading 790 // but don't do it eagerly -- if the user skips over the entire folder nothing 791 // is effectively decompressed. 792 if (entryIndex > 0) { 793 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 794 } 795 796 // if this is called in a random access, then the content methods of previous entry may be null 797 // the content methods should be set to methods of the first entry as it must not be null, 798 // and the content methods would only be set if the content methods was not set 799 if (isRandomAccess && file.getContentMethods() == null) { 800 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 801 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 802 file.setContentMethods(folderFirstFile.getContentMethods()); 803 } 804 isInSameFolder = true; 805 } else { 806 currentFolderIndex = folderIndex; 807 // We're opening a new folder. Discard any queued streams/ folder stream. 808 reopenFolderInputStream(folderIndex, file); 809 } 810 811 boolean haveSkippedEntries = false; 812 if (isRandomAccess) { 813 // entries will only need to be skipped if it's a random access 814 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 815 } 816 817 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 818 // we don't need to add another entry to the deferredBlockStreams when : 819 // 1. If this method is called in a random access and the entry index 820 // to be read equals to the current entry index, the input stream 821 // has already been put in the deferredBlockStreams 822 // 2. If this entry has not been read(which means no entries are skipped) 823 return; 824 } 825 826 InputStream fileStream = BoundedInputStream.builder() 827 .setInputStream(currentFolderInputStream) 828 .setMaxCount(file.getSize()) 829 .setPropagateClose(false) 830 .get(); 831 if (file.getHasCrc()) { 832 // @formatter:off 833 fileStream = ChecksumInputStream.builder() 834 .setChecksum(new CRC32()) 835 .setInputStream(fileStream) 836 .setExpectedChecksumValue(file.getCrcValue()) 837 .get(); 838 // @formatter:on 839 } 840 841 deferredBlockStreams.add(fileStream); 842 } 843 844 private void calculateStreamMap(final Archive archive) throws IOException { 845 int nextFolderPackStreamIndex = 0; 846 final int numFolders = archive.folders != null ? archive.folders.length : 0; 847 final int[] folderFirstPackStreamIndex = new int[numFolders]; 848 for (int i = 0; i < numFolders; i++) { 849 folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 850 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 851 } 852 long nextPackStreamOffset = 0; 853 final int numPackSizes = archive.packSizes.length; 854 final long[] packStreamOffsets = new long[numPackSizes]; 855 for (int i = 0; i < numPackSizes; i++) { 856 packStreamOffsets[i] = nextPackStreamOffset; 857 nextPackStreamOffset += archive.packSizes[i]; 858 } 859 final int[] folderFirstFileIndex = new int[numFolders]; 860 final int[] fileFolderIndex = new int[archive.files.length]; 861 int nextFolderIndex = 0; 862 int nextFolderUnpackStreamIndex = 0; 863 for (int i = 0; i < archive.files.length; i++) { 864 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 865 fileFolderIndex[i] = -1; 866 continue; 867 } 868 if (nextFolderUnpackStreamIndex == 0) { 869 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 870 folderFirstFileIndex[nextFolderIndex] = i; 871 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 872 break; 873 } 874 } 875 if (nextFolderIndex >= archive.folders.length) { 876 throw new IOException("Too few folders in archive"); 877 } 878 } 879 fileFolderIndex[i] = nextFolderIndex; 880 if (!archive.files[i].hasStream()) { 881 continue; 882 } 883 ++nextFolderUnpackStreamIndex; 884 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 885 ++nextFolderIndex; 886 nextFolderUnpackStreamIndex = 0; 887 } 888 } 889 archive.streamMap = new StreamMap(folderFirstPackStreamIndex, packStreamOffsets, folderFirstFileIndex, fileFolderIndex); 890 } 891 892 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 893 archiveEntries.computeIfAbsent(index, i -> new SevenZArchiveEntry()); 894 } 895 896 /** 897 * Closes the archive. 898 * 899 * @throws IOException if closing the file fails 900 */ 901 @Override 902 public void close() throws IOException { 903 if (channel != null) { 904 try { 905 channel.close(); 906 } finally { 907 channel = null; 908 if (password != null) { 909 Arrays.fill(password, (byte) 0); 910 } 911 password = null; 912 } 913 } 914 } 915 916 private InputStream getCurrentStream() throws IOException { 917 if (archive.files[currentEntryIndex].getSize() == 0) { 918 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 919 } 920 if (deferredBlockStreams.isEmpty()) { 921 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 922 } 923 while (deferredBlockStreams.size() > 1) { 924 // In solid compression mode we need to decompress all leading folder' 925 // streams to get access to an entry. We defer this until really needed 926 // so that entire blocks can be skipped without wasting time for decompression. 927 try (InputStream stream = deferredBlockStreams.remove(0)) { 928 org.apache.commons.io.IOUtils.skip(stream, Long.MAX_VALUE, org.apache.commons.io.IOUtils::byteArray); 929 } 930 compressedBytesReadFromCurrentEntry = 0; 931 } 932 return deferredBlockStreams.get(0); 933 } 934 935 /** 936 * Gets a default file name from the archive name - if known. 937 * <p> 938 * This implements the same heuristics the 7z tools use. In 7z's case if an archive contains entries without a name - i.e. 939 * {@link SevenZArchiveEntry#getName} returns {@code null} - then its command line and GUI tools will use this default name when extracting the entries. 940 * </p> 941 * 942 * @return null if the name of the archive is unknown. Otherwise, if the name of the archive has got any extension, it is stripped and the remainder 943 * returned. Finally, if the name of the archive hasn't got any extension, then a {@code ~} character is appended to the archive name. 944 * @since 1.19 945 */ 946 public String getDefaultName() { 947 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 948 return null; 949 } 950 951 final String lastSegment = new File(fileName).getName(); 952 final int dotPos = lastSegment.lastIndexOf("."); 953 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 954 return lastSegment.substring(0, dotPos); 955 } 956 return lastSegment + "~"; 957 } 958 959 /** 960 * Gets a copy of meta-data of all archive entries. 961 * <p> 962 * This method only provides meta-data, the entries can not be used to read the contents, you still need to process all entries in order using 963 * {@link #getNextEntry} for that. 964 * </p> 965 * <p> 966 * The content methods are only available for entries that have already been reached via {@link #getNextEntry}. 967 * </p> 968 * 969 * @return a copy of meta-data of all archive entries. 970 * @since 1.11 971 */ 972 public Iterable<SevenZArchiveEntry> getEntries() { 973 return new ArrayList<>(Arrays.asList(archive.files)); 974 } 975 976 /** 977 * Gets an InputStream for reading the contents of the given entry. 978 * <p> 979 * For archives using solid compression randomly accessing entries will be significantly slower than reading the archive sequentially. 980 * </p> 981 * 982 * @param entry the entry to get the stream for. 983 * @return a stream to read the entry from. 984 * @throws IOException if unable to create an input stream from the entry 985 * @since 1.20 986 */ 987 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 988 int entryIndex = -1; 989 for (int i = 0; i < archive.files.length; i++) { 990 if (entry == archive.files[i]) { 991 entryIndex = i; 992 break; 993 } 994 } 995 996 if (entryIndex < 0) { 997 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + fileName); 998 } 999 1000 buildDecodingStream(entryIndex, true); 1001 currentEntryIndex = entryIndex; 1002 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1003 return getCurrentStream(); 1004 } 1005 1006 /** 1007 * Gets the next Archive Entry in this archive. 1008 * 1009 * @return the next entry, or {@code null} if there are no more entries 1010 * @throws IOException if the next entry could not be read 1011 */ 1012 public SevenZArchiveEntry getNextEntry() throws IOException { 1013 if (currentEntryIndex >= archive.files.length - 1) { 1014 return null; 1015 } 1016 ++currentEntryIndex; 1017 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 1018 if (entry.getName() == null && useDefaultNameForUnnamedEntries) { 1019 entry.setName(getDefaultName()); 1020 } 1021 buildDecodingStream(currentEntryIndex, false); 1022 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 1023 return entry; 1024 } 1025 1026 /** 1027 * Gets statistics for bytes read from the current entry. 1028 * 1029 * @return statistics for bytes read from the current entry 1030 * @since 1.17 1031 */ 1032 public InputStreamStatistics getStatisticsForCurrentEntry() { 1033 return new InputStreamStatistics() { 1034 @Override 1035 public long getCompressedCount() { 1036 return compressedBytesReadFromCurrentEntry; 1037 } 1038 1039 @Override 1040 public long getUncompressedCount() { 1041 return uncompressedBytesReadFromCurrentEntry; 1042 } 1043 }; 1044 } 1045 1046 /** 1047 * Tests if any data of current entry has been read or not. This is achieved by comparing the bytes remaining to read and the size of the file. 1048 * 1049 * @return true if any data of current entry has been read 1050 * @since 1.21 1051 */ 1052 private boolean hasCurrentEntryBeenRead() { 1053 boolean hasCurrentEntryBeenRead = false; 1054 if (!deferredBlockStreams.isEmpty()) { 1055 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1056 // get the bytes remaining to read, and compare it with the size of 1057 // the file to figure out if the file has been read 1058 if (currentEntryInputStream instanceof ChecksumInputStream) { 1059 hasCurrentEntryBeenRead = ((ChecksumInputStream) currentEntryInputStream).getRemaining() != archive.files[currentEntryIndex].getSize(); 1060 } else if (currentEntryInputStream instanceof BoundedInputStream) { 1061 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getRemaining() != archive.files[currentEntryIndex].getSize(); 1062 } 1063 } 1064 return hasCurrentEntryBeenRead; 1065 } 1066 1067 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 1068 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 1069 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 1070 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 1071 if (verifyCrc) { 1072 final long position = channel.position(); 1073 final CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 1074 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 1075 throw new IOException("Problem computing NextHeader CRC-32"); 1076 } 1077 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 1078 throw new IOException("NextHeader CRC-32 mismatch"); 1079 } 1080 channel.position(position); 1081 } 1082 Archive archive = new Archive(); 1083 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 1084 readFully(buf); 1085 int nid = getUnsignedByte(buf); 1086 if (nid == NID.kEncodedHeader) { 1087 buf = readEncodedHeader(buf, archive, password); 1088 // Archive gets rebuilt with the new header 1089 archive = new Archive(); 1090 nid = getUnsignedByte(buf); 1091 } 1092 if (nid != NID.kHeader) { 1093 throw new IOException("Broken or unsupported archive: no Header"); 1094 } 1095 readHeader(buf, archive); 1096 archive.subStreamsInfo = null; 1097 return archive; 1098 } 1099 1100 /** 1101 * Reads a byte of data. 1102 * 1103 * @return the byte read, or -1 if end of input is reached 1104 * @throws IOException if an I/O error has occurred 1105 */ 1106 public int read() throws IOException { 1107 final int b = getCurrentStream().read(); 1108 if (b >= 0) { 1109 uncompressedBytesReadFromCurrentEntry++; 1110 } 1111 return b; 1112 } 1113 1114 /** 1115 * Reads data into an array of bytes. 1116 * 1117 * @param b the array to write data to 1118 * @return the number of bytes read, or -1 if end of input is reached 1119 * @throws IOException if an I/O error has occurred 1120 */ 1121 public int read(final byte[] b) throws IOException { 1122 return read(b, 0, b.length); 1123 } 1124 1125 /** 1126 * Reads data into an array of bytes. 1127 * 1128 * @param b the array to write data to 1129 * @param off offset into the buffer to start filling at 1130 * @param len of bytes to read 1131 * @return the number of bytes read, or -1 if end of input is reached 1132 * @throws IOException if an I/O error has occurred 1133 */ 1134 public int read(final byte[] b, final int off, final int len) throws IOException { 1135 if (len == 0) { 1136 return 0; 1137 } 1138 final int cnt = getCurrentStream().read(b, off, len); 1139 if (cnt > 0) { 1140 uncompressedBytesReadFromCurrentEntry += cnt; 1141 } 1142 return cnt; 1143 } 1144 1145 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1146 final int areAllDefined = getUnsignedByte(header); 1147 final BitSet bits; 1148 if (areAllDefined != 0) { 1149 bits = new BitSet(size); 1150 for (int i = 0; i < size; i++) { 1151 bits.set(i, true); 1152 } 1153 } else { 1154 bits = readBits(header, size); 1155 } 1156 return bits; 1157 } 1158 1159 private void readArchiveProperties(final ByteBuffer input) throws IOException { 1160 // FIXME: the reference implementation just throws them away? 1161 long nid = readUint64(input); 1162 while (nid != NID.kEnd) { 1163 final long propertySize = readUint64(input); 1164 final byte[] property = new byte[(int) propertySize]; 1165 get(input, property); 1166 nid = readUint64(input); 1167 } 1168 } 1169 1170 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1171 final BitSet bits = new BitSet(size); 1172 int mask = 0; 1173 int cache = 0; 1174 for (int i = 0; i < size; i++) { 1175 if (mask == 0) { 1176 mask = 0x80; 1177 cache = getUnsignedByte(header); 1178 } 1179 bits.set(i, (cache & mask) != 0); 1180 mask >>>= 1; 1181 } 1182 return bits; 1183 } 1184 1185 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, final byte[] password) throws IOException { 1186 final int pos = header.position(); 1187 final ArchiveStatistics stats = new ArchiveStatistics(); 1188 sanityCheckStreamsInfo(header, stats); 1189 stats.assertValidity(maxMemoryLimitKb); 1190 header.position(pos); 1191 1192 readStreamsInfo(header, archive); 1193 1194 if (archive.folders == null || archive.folders.length == 0) { 1195 throw new IOException("no folders, can't read encoded header"); 1196 } 1197 if (archive.packSizes == null || archive.packSizes.length == 0) { 1198 throw new IOException("no packed streams, can't read encoded header"); 1199 } 1200 1201 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 1202 final Folder folder = archive.folders[0]; 1203 final int firstPackStreamIndex = 0; 1204 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 0; 1205 1206 channel.position(folderOffset); 1207 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]); 1208 for (final Coder coder : folder.getOrderedCoders()) { 1209 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1210 throw new IOException("Multi input/output stream coders are not yet supported"); 1211 } 1212 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, // NOSONAR 1213 folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKb); 1214 } 1215 if (folder.hasCrc) { 1216 // @formatter:off 1217 inputStreamStack = ChecksumInputStream.builder() 1218 .setChecksum(new CRC32()) 1219 .setInputStream(inputStreamStack) 1220 .setCountThreshold(folder.getUnpackSize()) 1221 .setExpectedChecksumValue(folder.crc) 1222 .get(); 1223 // @formatter:on 1224 } 1225 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 1226 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 1227 if (nextHeader.length < unpackSize) { 1228 throw new IOException("premature end of stream"); 1229 } 1230 inputStreamStack.close(); 1231 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 1232 } 1233 1234 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1235 final int numFilesInt = (int) readUint64(header); 1236 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1237 BitSet isEmptyStream = null; 1238 BitSet isEmptyFile = null; 1239 BitSet isAnti = null; 1240 while (true) { 1241 final int propertyType = getUnsignedByte(header); 1242 if (propertyType == 0) { 1243 break; 1244 } 1245 final long size = readUint64(header); 1246 switch (propertyType) { 1247 case NID.kEmptyStream: { 1248 isEmptyStream = readBits(header, numFilesInt); 1249 break; 1250 } 1251 case NID.kEmptyFile: { 1252 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1253 break; 1254 } 1255 case NID.kAnti: { 1256 isAnti = readBits(header, isEmptyStream.cardinality()); 1257 break; 1258 } 1259 case NID.kName: { 1260 /* final int external = */ getUnsignedByte(header); 1261 final byte[] names = new byte[(int) (size - 1)]; 1262 final int namesLength = names.length; 1263 get(header, names); 1264 int nextFile = 0; 1265 int nextName = 0; 1266 for (int i = 0; i < namesLength; i += 2) { 1267 if (names[i] == 0 && names[i + 1] == 0) { 1268 checkEntryIsInitialized(fileMap, nextFile); 1269 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1270 nextName = i + 2; 1271 nextFile++; 1272 } 1273 } 1274 if (nextName != namesLength || nextFile != numFilesInt) { 1275 throw new IOException("Error parsing file names"); 1276 } 1277 break; 1278 } 1279 case NID.kCTime: { 1280 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1281 /* final int external = */ getUnsignedByte(header); 1282 for (int i = 0; i < numFilesInt; i++) { 1283 checkEntryIsInitialized(fileMap, i); 1284 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1285 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1286 if (entryAtIndex.getHasCreationDate()) { 1287 entryAtIndex.setCreationDate(getLong(header)); 1288 } 1289 } 1290 break; 1291 } 1292 case NID.kATime: { 1293 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1294 /* final int external = */ getUnsignedByte(header); 1295 for (int i = 0; i < numFilesInt; i++) { 1296 checkEntryIsInitialized(fileMap, i); 1297 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1298 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1299 if (entryAtIndex.getHasAccessDate()) { 1300 entryAtIndex.setAccessDate(getLong(header)); 1301 } 1302 } 1303 break; 1304 } 1305 case NID.kMTime: { 1306 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1307 /* final int external = */ getUnsignedByte(header); 1308 for (int i = 0; i < numFilesInt; i++) { 1309 checkEntryIsInitialized(fileMap, i); 1310 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1311 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1312 if (entryAtIndex.getHasLastModifiedDate()) { 1313 entryAtIndex.setLastModifiedDate(getLong(header)); 1314 } 1315 } 1316 break; 1317 } 1318 case NID.kWinAttributes: { 1319 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1320 /* final int external = */ getUnsignedByte(header); 1321 for (int i = 0; i < numFilesInt; i++) { 1322 checkEntryIsInitialized(fileMap, i); 1323 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1324 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1325 if (entryAtIndex.getHasWindowsAttributes()) { 1326 entryAtIndex.setWindowsAttributes(getInt(header)); 1327 } 1328 } 1329 break; 1330 } 1331 case NID.kDummy: { 1332 // 7z 9.20 asserts the content is all zeros and ignores the property 1333 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1334 1335 skipBytesFully(header, size); 1336 break; 1337 } 1338 1339 default: { 1340 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1341 skipBytesFully(header, size); 1342 break; 1343 } 1344 } 1345 } 1346 int nonEmptyFileCounter = 0; 1347 int emptyFileCounter = 0; 1348 for (int i = 0; i < numFilesInt; i++) { 1349 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1350 if (entryAtIndex == null) { 1351 continue; 1352 } 1353 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1354 if (entryAtIndex.hasStream()) { 1355 if (archive.subStreamsInfo == null) { 1356 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1357 } 1358 entryAtIndex.setDirectory(false); 1359 entryAtIndex.setAntiItem(false); 1360 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1361 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1362 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1363 if (entryAtIndex.getSize() < 0) { 1364 throw new IOException("broken archive, entry with negative size"); 1365 } 1366 ++nonEmptyFileCounter; 1367 } else { 1368 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1369 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1370 entryAtIndex.setHasCrc(false); 1371 entryAtIndex.setSize(0); 1372 ++emptyFileCounter; 1373 } 1374 } 1375 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1376 calculateStreamMap(archive); 1377 } 1378 1379 private Folder readFolder(final ByteBuffer header) throws IOException { 1380 final Folder folder = new Folder(); 1381 1382 final long numCoders = readUint64(header); 1383 final Coder[] coders = new Coder[(int) numCoders]; 1384 long totalInStreams = 0; 1385 long totalOutStreams = 0; 1386 for (int i = 0; i < coders.length; i++) { 1387 final int bits = getUnsignedByte(header); 1388 final int idSize = bits & 0xf; 1389 final boolean isSimple = (bits & 0x10) == 0; 1390 final boolean hasAttributes = (bits & 0x20) != 0; 1391 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1392 1393 final byte[] decompressionMethodId = new byte[idSize]; 1394 get(header, decompressionMethodId); 1395 final long numInStreams; 1396 final long numOutStreams; 1397 if (isSimple) { 1398 numInStreams = 1; 1399 numOutStreams = 1; 1400 } else { 1401 numInStreams = readUint64(header); 1402 numOutStreams = readUint64(header); 1403 } 1404 totalInStreams += numInStreams; 1405 totalOutStreams += numOutStreams; 1406 byte[] properties = null; 1407 if (hasAttributes) { 1408 final long propertiesSize = readUint64(header); 1409 properties = new byte[(int) propertiesSize]; 1410 get(header, properties); 1411 } 1412 // would need to keep looping as above: 1413 if (moreAlternativeMethods) { 1414 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1415 "The reference implementation doesn't support them either."); 1416 } 1417 coders[i] = new Coder(decompressionMethodId, numInStreams, numOutStreams, properties); 1418 } 1419 folder.coders = coders; 1420 folder.totalInputStreams = totalInStreams; 1421 folder.totalOutputStreams = totalOutStreams; 1422 1423 final long numBindPairs = totalOutStreams - 1; 1424 final BindPair[] bindPairs = new BindPair[(int) numBindPairs]; 1425 for (int i = 0; i < bindPairs.length; i++) { 1426 bindPairs[i] = new BindPair(readUint64(header), readUint64(header)); 1427 } 1428 folder.bindPairs = bindPairs; 1429 1430 final long numPackedStreams = totalInStreams - numBindPairs; 1431 final long[] packedStreams = new long[(int) numPackedStreams]; 1432 if (numPackedStreams == 1) { 1433 int i; 1434 for (i = 0; i < (int) totalInStreams; i++) { 1435 if (folder.findBindPairForInStream(i) < 0) { 1436 break; 1437 } 1438 } 1439 packedStreams[0] = i; 1440 } else { 1441 for (int i = 0; i < (int) numPackedStreams; i++) { 1442 packedStreams[i] = readUint64(header); 1443 } 1444 } 1445 folder.packedStreams = packedStreams; 1446 1447 return folder; 1448 } 1449 1450 private void readFully(final ByteBuffer buf) throws IOException { 1451 buf.rewind(); 1452 IOUtils.readFully(channel, buf); 1453 buf.flip(); 1454 } 1455 1456 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 1457 final int pos = header.position(); 1458 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 1459 stats.assertValidity(maxMemoryLimitKb); 1460 header.position(pos); 1461 1462 int nid = getUnsignedByte(header); 1463 1464 if (nid == NID.kArchiveProperties) { 1465 readArchiveProperties(header); 1466 nid = getUnsignedByte(header); 1467 } 1468 1469 if (nid == NID.kAdditionalStreamsInfo) { 1470 throw new IOException("Additional streams unsupported"); 1471 // nid = getUnsignedByte(header); 1472 } 1473 1474 if (nid == NID.kMainStreamsInfo) { 1475 readStreamsInfo(header, archive); 1476 nid = getUnsignedByte(header); 1477 } 1478 1479 if (nid == NID.kFilesInfo) { 1480 readFilesInfo(header, archive); 1481 nid = getUnsignedByte(header); 1482 } 1483 } 1484 1485 private Archive readHeaders(final byte[] password) throws IOException { 1486 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */).order(ByteOrder.LITTLE_ENDIAN); 1487 readFully(buf); 1488 final byte[] signature = new byte[6]; 1489 buf.get(signature); 1490 if (!Arrays.equals(signature, sevenZSignature)) { 1491 throw new IOException("Bad 7z signature"); 1492 } 1493 // 7zFormat.txt has it wrong - it's first major then minor 1494 final byte archiveVersionMajor = buf.get(); 1495 final byte archiveVersionMinor = buf.get(); 1496 if (archiveVersionMajor != 0) { 1497 throw new IOException(String.format("Unsupported 7z version (%d,%d)", archiveVersionMajor, archiveVersionMinor)); 1498 } 1499 1500 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 1501 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 1502 if (startHeaderCrc == 0) { 1503 // This is an indication of a corrupt header - peek the next 20 bytes 1504 final long currentPosition = channel.position(); 1505 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 1506 readFully(peekBuf); 1507 channel.position(currentPosition); 1508 // Header invalid if all data is 0 1509 while (peekBuf.hasRemaining()) { 1510 if (peekBuf.get() != 0) { 1511 headerLooksValid = true; 1512 break; 1513 } 1514 } 1515 } else { 1516 headerLooksValid = true; 1517 } 1518 1519 if (headerLooksValid) { 1520 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 1521 } 1522 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 1523 if (tryToRecoverBrokenArchives) { 1524 return tryToLocateEndHeader(password); 1525 } 1526 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 1527 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" + " prematurely."); 1528 } 1529 1530 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1531 archive.packPos = readUint64(header); 1532 final int numPackStreamsInt = (int) readUint64(header); 1533 int nid = getUnsignedByte(header); 1534 if (nid == NID.kSize) { 1535 archive.packSizes = new long[numPackStreamsInt]; 1536 for (int i = 0; i < archive.packSizes.length; i++) { 1537 archive.packSizes[i] = readUint64(header); 1538 } 1539 nid = getUnsignedByte(header); 1540 } 1541 1542 if (nid == NID.kCRC) { 1543 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 1544 archive.packCrcs = new long[numPackStreamsInt]; 1545 for (int i = 0; i < numPackStreamsInt; i++) { 1546 if (archive.packCrcsDefined.get(i)) { 1547 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 1548 } 1549 } 1550 // read one more 1551 getUnsignedByte(header); 1552 } 1553 } 1554 1555 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 1556 // using Stream rather than ByteBuffer for the benefit of the built-in CRC check 1557 try (DataInputStream dataInputStream = new DataInputStream(ChecksumInputStream.builder() 1558 // @formatter:off 1559 .setChecksum(new CRC32()) 1560 .setInputStream(new BoundedSeekableByteChannelInputStream(channel, 20)) 1561 .setCountThreshold(20L) 1562 .setExpectedChecksumValue(startHeaderCrc) 1563 .get())) { 1564 // @formatter:on 1565 final long nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 1566 if (nextHeaderOffset < 0 || nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 1567 throw new IOException("nextHeaderOffset is out of bounds"); 1568 } 1569 final long nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 1570 final long nextHeaderEnd = nextHeaderOffset + nextHeaderSize; 1571 if (nextHeaderEnd < nextHeaderOffset || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 1572 throw new IOException("nextHeaderSize is out of bounds"); 1573 } 1574 final long nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 1575 return new StartHeader(nextHeaderOffset, nextHeaderSize, nextHeaderCrc); 1576 } 1577 } 1578 1579 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1580 int nid = getUnsignedByte(header); 1581 1582 if (nid == NID.kPackInfo) { 1583 readPackInfo(header, archive); 1584 nid = getUnsignedByte(header); 1585 } 1586 1587 if (nid == NID.kUnpackInfo) { 1588 readUnpackInfo(header, archive); 1589 nid = getUnsignedByte(header); 1590 } else { 1591 // archive without unpack/coders info 1592 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 1593 } 1594 1595 if (nid == NID.kSubStreamsInfo) { 1596 readSubStreamsInfo(header, archive); 1597 nid = getUnsignedByte(header); 1598 } 1599 } 1600 1601 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1602 for (final Folder folder : archive.folders) { 1603 folder.numUnpackSubStreams = 1; 1604 } 1605 long unpackStreamsCount = archive.folders.length; 1606 1607 int nid = getUnsignedByte(header); 1608 if (nid == NID.kNumUnpackStream) { 1609 unpackStreamsCount = 0; 1610 for (final Folder folder : archive.folders) { 1611 final long numStreams = readUint64(header); 1612 folder.numUnpackSubStreams = (int) numStreams; 1613 unpackStreamsCount += numStreams; 1614 } 1615 nid = getUnsignedByte(header); 1616 } 1617 1618 final int totalUnpackStreams = (int) unpackStreamsCount; 1619 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(totalUnpackStreams); 1620 int nextUnpackStream = 0; 1621 for (final Folder folder : archive.folders) { 1622 if (folder.numUnpackSubStreams == 0) { 1623 continue; 1624 } 1625 long sum = 0; 1626 if (nid == NID.kSize) { 1627 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1628 final long size = readUint64(header); 1629 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1630 sum += size; 1631 } 1632 } 1633 if (sum > folder.getUnpackSize()) { 1634 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1635 } 1636 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1637 } 1638 if (nid == NID.kSize) { 1639 nid = getUnsignedByte(header); 1640 } 1641 1642 int numDigests = 0; 1643 for (final Folder folder : archive.folders) { 1644 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1645 numDigests += folder.numUnpackSubStreams; 1646 } 1647 } 1648 1649 if (nid == NID.kCRC) { 1650 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1651 final long[] missingCrcs = new long[numDigests]; 1652 for (int i = 0; i < numDigests; i++) { 1653 if (hasMissingCrc.get(i)) { 1654 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1655 } 1656 } 1657 int nextCrc = 0; 1658 int nextMissingCrc = 0; 1659 for (final Folder folder : archive.folders) { 1660 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1661 subStreamsInfo.hasCrc.set(nextCrc, true); 1662 subStreamsInfo.crcs[nextCrc] = folder.crc; 1663 ++nextCrc; 1664 } else { 1665 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1666 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1667 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1668 ++nextCrc; 1669 ++nextMissingCrc; 1670 } 1671 } 1672 } 1673 1674 nid = getUnsignedByte(header); 1675 } 1676 1677 archive.subStreamsInfo = subStreamsInfo; 1678 } 1679 1680 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1681 int nid = getUnsignedByte(header); 1682 final int numFoldersInt = (int) readUint64(header); 1683 final Folder[] folders = new Folder[numFoldersInt]; 1684 archive.folders = folders; 1685 /* final int external = */ getUnsignedByte(header); 1686 for (int i = 0; i < numFoldersInt; i++) { 1687 folders[i] = readFolder(header); 1688 } 1689 1690 nid = getUnsignedByte(header); 1691 for (final Folder folder : folders) { 1692 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 1693 folder.unpackSizes = new long[(int) folder.totalOutputStreams]; 1694 for (int i = 0; i < folder.totalOutputStreams; i++) { 1695 folder.unpackSizes[i] = readUint64(header); 1696 } 1697 } 1698 1699 nid = getUnsignedByte(header); 1700 if (nid == NID.kCRC) { 1701 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 1702 for (int i = 0; i < numFoldersInt; i++) { 1703 if (crcsDefined.get(i)) { 1704 folders[i].hasCrc = true; 1705 folders[i].crc = 0xffffFFFFL & getInt(header); 1706 } else { 1707 folders[i].hasCrc = false; 1708 } 1709 } 1710 1711 nid = getUnsignedByte(header); 1712 } 1713 } 1714 1715 /** 1716 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1717 * 1718 * @param folderIndex the index of the folder to reopen 1719 * @param file the 7z entry to read 1720 * @throws IOException if exceptions occur when reading the 7z file 1721 */ 1722 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1723 deferredBlockStreams.clear(); 1724 if (currentFolderInputStream != null) { 1725 currentFolderInputStream.close(); 1726 currentFolderInputStream = null; 1727 } 1728 final Folder folder = archive.folders[folderIndex]; 1729 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1730 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1731 1732 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1733 } 1734 1735 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) throws IOException { 1736 final ArchiveStatistics stats = new ArchiveStatistics(); 1737 1738 int nid = getUnsignedByte(header); 1739 1740 if (nid == NID.kArchiveProperties) { 1741 sanityCheckArchiveProperties(header); 1742 nid = getUnsignedByte(header); 1743 } 1744 1745 if (nid == NID.kAdditionalStreamsInfo) { 1746 throw new IOException("Additional streams unsupported"); 1747 // nid = getUnsignedByte(header); 1748 } 1749 1750 if (nid == NID.kMainStreamsInfo) { 1751 sanityCheckStreamsInfo(header, stats); 1752 nid = getUnsignedByte(header); 1753 } 1754 1755 if (nid == NID.kFilesInfo) { 1756 sanityCheckFilesInfo(header, stats); 1757 nid = getUnsignedByte(header); 1758 } 1759 1760 if (nid != NID.kEnd) { 1761 throw new IOException("Badly terminated header, found " + nid); 1762 } 1763 1764 return stats; 1765 } 1766 1767 private void sanityCheckArchiveProperties(final ByteBuffer header) throws IOException { 1768 long nid = readUint64(header); 1769 while (nid != NID.kEnd) { 1770 final int propertySize = assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 1771 if (skipBytesFully(header, propertySize) < propertySize) { 1772 throw new IOException("invalid property size"); 1773 } 1774 nid = readUint64(header); 1775 } 1776 } 1777 1778 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1779 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1780 1781 int emptyStreams = -1; 1782 while (true) { 1783 final int propertyType = getUnsignedByte(header); 1784 if (propertyType == 0) { 1785 break; 1786 } 1787 final long size = readUint64(header); 1788 switch (propertyType) { 1789 case NID.kEmptyStream: { 1790 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1791 break; 1792 } 1793 case NID.kEmptyFile: { 1794 if (emptyStreams == -1) { 1795 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1796 } 1797 readBits(header, emptyStreams); 1798 break; 1799 } 1800 case NID.kAnti: { 1801 if (emptyStreams == -1) { 1802 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1803 } 1804 readBits(header, emptyStreams); 1805 break; 1806 } 1807 case NID.kName: { 1808 final int external = getUnsignedByte(header); 1809 if (external != 0) { 1810 throw new IOException("Not implemented"); 1811 } 1812 final int namesLength = assertFitsIntoNonNegativeInt("file names length", size - 1); 1813 if ((namesLength & 1) != 0) { 1814 throw new IOException("File names length invalid"); 1815 } 1816 1817 int filesSeen = 0; 1818 for (int i = 0; i < namesLength; i += 2) { 1819 final char c = getChar(header); 1820 if (c == 0) { 1821 filesSeen++; 1822 } 1823 } 1824 if (filesSeen != stats.numberOfEntries) { 1825 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " + stats.numberOfEntries + ")"); 1826 } 1827 break; 1828 } 1829 case NID.kCTime: { 1830 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1831 final int external = getUnsignedByte(header); 1832 if (external != 0) { 1833 throw new IOException("Not implemented"); 1834 } 1835 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1836 throw new IOException("invalid creation dates size"); 1837 } 1838 break; 1839 } 1840 case NID.kATime: { 1841 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1842 final int external = getUnsignedByte(header); 1843 if (external != 0) { 1844 throw new IOException("Not implemented"); 1845 } 1846 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1847 throw new IOException("invalid access dates size"); 1848 } 1849 break; 1850 } 1851 case NID.kMTime: { 1852 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1853 final int external = getUnsignedByte(header); 1854 if (external != 0) { 1855 throw new IOException("Not implemented"); 1856 } 1857 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1858 throw new IOException("invalid modification dates size"); 1859 } 1860 break; 1861 } 1862 case NID.kWinAttributes: { 1863 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1864 final int external = getUnsignedByte(header); 1865 if (external != 0) { 1866 throw new IOException("Not implemented"); 1867 } 1868 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1869 throw new IOException("invalid windows attributes size"); 1870 } 1871 break; 1872 } 1873 case NID.kStartPos: { 1874 throw new IOException("kStartPos is unsupported, please report"); 1875 } 1876 case NID.kDummy: { 1877 // 7z 9.20 asserts the content is all zeros and ignores the property 1878 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1879 1880 if (skipBytesFully(header, size) < size) { 1881 throw new IOException("Incomplete kDummy property"); 1882 } 1883 break; 1884 } 1885 1886 default: { 1887 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1888 if (skipBytesFully(header, size) < size) { 1889 throw new IOException("Incomplete property of type " + propertyType); 1890 } 1891 break; 1892 } 1893 } 1894 } 1895 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1896 } 1897 1898 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1899 1900 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1901 if (numCoders == 0) { 1902 throw new IOException("Folder without coders"); 1903 } 1904 stats.numberOfCoders += numCoders; 1905 1906 long totalOutStreams = 0; 1907 long totalInStreams = 0; 1908 for (int i = 0; i < numCoders; i++) { 1909 final int bits = getUnsignedByte(header); 1910 final int idSize = bits & 0xf; 1911 get(header, new byte[idSize]); 1912 1913 final boolean isSimple = (bits & 0x10) == 0; 1914 final boolean hasAttributes = (bits & 0x20) != 0; 1915 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1916 if (moreAlternativeMethods) { 1917 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1918 "The reference implementation doesn't support them either."); 1919 } 1920 1921 if (isSimple) { 1922 totalInStreams++; 1923 totalOutStreams++; 1924 } else { 1925 totalInStreams += assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1926 totalOutStreams += assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1927 } 1928 1929 if (hasAttributes) { 1930 final int propertiesSize = assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1931 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1932 throw new IOException("invalid propertiesSize in folder"); 1933 } 1934 } 1935 } 1936 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1937 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1938 stats.numberOfOutStreams += totalOutStreams; 1939 stats.numberOfInStreams += totalInStreams; 1940 1941 if (totalOutStreams == 0) { 1942 throw new IOException("Total output streams can't be 0"); 1943 } 1944 1945 final int numBindPairs = assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1946 if (totalInStreams < numBindPairs) { 1947 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1948 } 1949 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1950 for (int i = 0; i < numBindPairs; i++) { 1951 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1952 if (totalInStreams <= inIndex) { 1953 throw new IOException("inIndex is bigger than number of inStreams"); 1954 } 1955 inStreamsBound.set(inIndex); 1956 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1957 if (totalOutStreams <= outIndex) { 1958 throw new IOException("outIndex is bigger than number of outStreams"); 1959 } 1960 } 1961 1962 final int numPackedStreams = assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1963 1964 if (numPackedStreams == 1) { 1965 if (inStreamsBound.nextClearBit(0) == -1) { 1966 throw new IOException("Couldn't find stream's bind pair index"); 1967 } 1968 } else { 1969 for (int i = 0; i < numPackedStreams; i++) { 1970 final int packedStreamIndex = assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1971 if (packedStreamIndex >= totalInStreams) { 1972 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1973 } 1974 } 1975 } 1976 1977 return (int) totalOutStreams; 1978 } 1979 1980 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1981 final long packPos = readUint64(header); 1982 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() || SIGNATURE_HEADER_SIZE + packPos < 0) { 1983 throw new IOException("packPos (" + packPos + ") is out of range"); 1984 } 1985 final long numPackStreams = readUint64(header); 1986 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 1987 int nid = getUnsignedByte(header); 1988 if (nid == NID.kSize) { 1989 long totalPackSizes = 0; 1990 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 1991 final long packSize = readUint64(header); 1992 totalPackSizes += packSize; 1993 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 1994 if (packSize < 0 || endOfPackStreams > channel.size() || endOfPackStreams < packPos) { 1995 throw new IOException("packSize (" + packSize + ") is out of range"); 1996 } 1997 } 1998 nid = getUnsignedByte(header); 1999 } 2000 2001 if (nid == NID.kCRC) { 2002 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams).cardinality(); 2003 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2004 throw new IOException("invalid number of CRCs in PackInfo"); 2005 } 2006 nid = getUnsignedByte(header); 2007 } 2008 2009 if (nid != NID.kEnd) { 2010 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 2011 } 2012 } 2013 2014 private void sanityCheckStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2015 int nid = getUnsignedByte(header); 2016 2017 if (nid == NID.kPackInfo) { 2018 sanityCheckPackInfo(header, stats); 2019 nid = getUnsignedByte(header); 2020 } 2021 2022 if (nid == NID.kUnpackInfo) { 2023 sanityCheckUnpackInfo(header, stats); 2024 nid = getUnsignedByte(header); 2025 } 2026 2027 if (nid == NID.kSubStreamsInfo) { 2028 sanityCheckSubStreamsInfo(header, stats); 2029 nid = getUnsignedByte(header); 2030 } 2031 2032 if (nid != NID.kEnd) { 2033 throw new IOException("Badly terminated StreamsInfo"); 2034 } 2035 } 2036 2037 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2038 2039 int nid = getUnsignedByte(header); 2040 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 2041 if (nid == NID.kNumUnpackStream) { 2042 for (int i = 0; i < stats.numberOfFolders; i++) { 2043 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 2044 } 2045 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 2046 nid = getUnsignedByte(header); 2047 } else { 2048 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 2049 } 2050 2051 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 2052 2053 if (nid == NID.kSize) { 2054 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2055 if (numUnpackSubStreams == 0) { 2056 continue; 2057 } 2058 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 2059 final long size = readUint64(header); 2060 if (size < 0) { 2061 throw new IOException("negative unpackSize"); 2062 } 2063 } 2064 } 2065 nid = getUnsignedByte(header); 2066 } 2067 2068 int numDigests = 0; 2069 if (numUnpackSubStreamsPerFolder.isEmpty()) { 2070 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 2071 } else { 2072 int folderIdx = 0; 2073 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2074 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null || !stats.folderHasCrc.get(folderIdx++)) { 2075 numDigests += numUnpackSubStreams; 2076 } 2077 } 2078 } 2079 2080 if (nid == NID.kCRC) { 2081 assertFitsIntoNonNegativeInt("numDigests", numDigests); 2082 final int missingCrcs = readAllOrBits(header, numDigests).cardinality(); 2083 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 2084 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 2085 } 2086 nid = getUnsignedByte(header); 2087 } 2088 2089 if (nid != NID.kEnd) { 2090 throw new IOException("Badly terminated SubStreamsInfo"); 2091 } 2092 } 2093 2094 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2095 int nid = getUnsignedByte(header); 2096 if (nid != NID.kFolder) { 2097 throw new IOException("Expected kFolder, got " + nid); 2098 } 2099 final long numFolders = readUint64(header); 2100 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 2101 final int external = getUnsignedByte(header); 2102 if (external != 0) { 2103 throw new IOException("External unsupported"); 2104 } 2105 2106 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 2107 for (int i = 0; i < stats.numberOfFolders; i++) { 2108 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 2109 } 2110 2111 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 2112 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 2113 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 2114 throw new IOException("archive doesn't contain enough packed streams"); 2115 } 2116 2117 nid = getUnsignedByte(header); 2118 if (nid != NID.kCodersUnpackSize) { 2119 throw new IOException("Expected kCodersUnpackSize, got " + nid); 2120 } 2121 2122 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 2123 for (int i = 0; i < numberOfOutputStreams; i++) { 2124 final long unpackSize = readUint64(header); 2125 if (unpackSize < 0) { 2126 throw new IllegalArgumentException("negative unpackSize"); 2127 } 2128 } 2129 } 2130 2131 nid = getUnsignedByte(header); 2132 if (nid == NID.kCRC) { 2133 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 2134 final int crcsDefined = stats.folderHasCrc.cardinality(); 2135 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2136 throw new IOException("invalid number of CRCs in UnpackInfo"); 2137 } 2138 nid = getUnsignedByte(header); 2139 } 2140 2141 if (nid != NID.kEnd) { 2142 throw new IOException("Badly terminated UnpackInfo"); 2143 } 2144 } 2145 2146 /** 2147 * Skips all the entries if needed. Entries need to be skipped when: 2148 * <p> 2149 * 1. it's a random access 2. one of these 2 condition is meet : 2150 * </p> 2151 * <p> 2152 * 2.1 currentEntryIndex != entryIndex : this means there are some entries to be skipped(currentEntryIndex < entryIndex) or the entry has already been 2153 * read(currentEntryIndex > entryIndex) 2154 * </p> 2155 * <p> 2156 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: if the entry to be read is the current entry, but some data of it has been read before, 2157 * then we need to reopen the stream of the folder and skip all the entries before the current entries 2158 * </p> 2159 * 2160 * @param entryIndex the entry to be read 2161 * @param isInSameFolder are the entry to be read and the current entry in the same folder 2162 * @param folderIndex the index of the folder which contains the entry 2163 * @return true if there are entries actually skipped 2164 * @throws IOException there are exceptions when skipping entries 2165 * @since 1.21 2166 */ 2167 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 2168 final SevenZArchiveEntry file = archive.files[entryIndex]; 2169 // if the entry to be read is the current entry, and the entry has not 2170 // been read yet, then there's nothing we need to do 2171 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 2172 return false; 2173 } 2174 2175 // 1. if currentEntryIndex < entryIndex : 2176 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 2177 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 2178 // this means the entry has already been read before, and we need to reopen the 2179 // stream of the folder and skip all the entries before the current entries 2180 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 2181 if (isInSameFolder) { 2182 if (currentEntryIndex < entryIndex) { 2183 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 2184 filesToSkipStartIndex = currentEntryIndex + 1; 2185 } else { 2186 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 2187 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 2188 // from the start entry of the folder again 2189 reopenFolderInputStream(folderIndex, file); 2190 } 2191 } 2192 2193 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 2194 final SevenZArchiveEntry fileToSkip = archive.files[i]; 2195 InputStream fileStreamToSkip = BoundedInputStream.builder() 2196 .setInputStream(currentFolderInputStream) 2197 .setMaxCount(fileToSkip.getSize()) 2198 .setPropagateClose(false) 2199 .get(); 2200 if (fileToSkip.getHasCrc()) { 2201 // @formatter:off 2202 fileStreamToSkip = ChecksumInputStream.builder() 2203 .setChecksum(new CRC32()) 2204 .setInputStream(fileStreamToSkip) 2205 .setCountThreshold(fileToSkip.getSize()) 2206 .setExpectedChecksumValue(fileToSkip.getCrcValue()) 2207 .get(); 2208 // @formatter:on 2209 } 2210 deferredBlockStreams.add(fileStreamToSkip); 2211 2212 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 2213 fileToSkip.setContentMethods(file.getContentMethods()); 2214 } 2215 return true; 2216 } 2217 2218 @Override 2219 public String toString() { 2220 return archive.toString(); 2221 } 2222 2223 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 2224 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 2225 final long searchLimit = 1024L * 1024 * 1; 2226 // Main header, plus bytes that readStartHeader would read 2227 final long previousDataSize = channel.position() + 20; 2228 final long minPos; 2229 // Determine minimal position - can't start before current position 2230 if (channel.position() + searchLimit > channel.size()) { 2231 minPos = channel.position(); 2232 } else { 2233 minPos = channel.size() - searchLimit; 2234 } 2235 long pos = channel.size() - 1; 2236 // Loop: Try from end of archive 2237 while (pos > minPos) { 2238 pos--; 2239 channel.position(pos); 2240 nidBuf.rewind(); 2241 if (channel.read(nidBuf) < 1) { 2242 throw new EOFException(); 2243 } 2244 final int nid = nidBuf.array()[0]; 2245 // First indicator: Byte equals one of these header identifiers 2246 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 2247 try { 2248 // Try to initialize Archive structure from here 2249 final long nextHeaderOffset = pos - previousDataSize; 2250 final long nextHeaderSize = channel.size() - pos; 2251 final StartHeader startHeader = new StartHeader(nextHeaderOffset, nextHeaderSize, 0); 2252 final Archive result = initializeArchive(startHeader, password, false); 2253 // Sanity check: There must be some data... 2254 if (result.packSizes.length > 0 && result.files.length > 0) { 2255 return result; 2256 } 2257 } catch (final Exception ignored) { 2258 // Wrong guess... 2259 } 2260 } 2261 } 2262 throw new IOException("Start header corrupt and unable to guess end header"); 2263 } 2264}