001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.zip; 018 019import java.io.BufferedInputStream; 020import java.io.ByteArrayInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.SequenceInputStream; 027import java.nio.ByteBuffer; 028import java.nio.ByteOrder; 029import java.nio.channels.FileChannel; 030import java.nio.channels.SeekableByteChannel; 031import java.nio.charset.Charset; 032import java.nio.charset.StandardCharsets; 033import java.nio.file.Files; 034import java.nio.file.OpenOption; 035import java.nio.file.Path; 036import java.nio.file.StandardOpenOption; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.Collections; 040import java.util.Comparator; 041import java.util.EnumSet; 042import java.util.Enumeration; 043import java.util.HashMap; 044import java.util.LinkedList; 045import java.util.List; 046import java.util.Map; 047import java.util.Objects; 048import java.util.stream.Collectors; 049import java.util.stream.IntStream; 050import java.util.zip.Inflater; 051import java.util.zip.ZipException; 052 053import org.apache.commons.compress.archivers.EntryStreamOffsets; 054import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 055import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 056import org.apache.commons.compress.utils.BoundedArchiveInputStream; 057import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 058import org.apache.commons.compress.utils.IOUtils; 059import org.apache.commons.compress.utils.InputStreamStatistics; 060import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 061import org.apache.commons.io.Charsets; 062import org.apache.commons.io.FilenameUtils; 063import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; 064import org.apache.commons.io.build.AbstractStreamBuilder; 065import org.apache.commons.io.input.BoundedInputStream; 066 067/** 068 * Replacement for {@link java.util.zip.ZipFile}. 069 * <p> 070 * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a 071 * preamble like the one found in self extracting archives. Furthermore it returns instances of 072 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@link java.util.zip.ZipEntry}. 073 * </p> 074 * <p> 075 * It doesn't extend {@link java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@link java.util.zip.ZipFile}, it uses 076 * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64 077 * extensions and thus individual entries and archives larger than 4 GB or with more than 65,536 entries. 078 * </p> 079 * <p> 080 * The method signatures mimic the ones of {@link java.util.zip.ZipFile}, with a couple of exceptions: 081 * </p> 082 * <ul> 083 * <li>There is no getName method.</li> 084 * <li>entries has been renamed to getEntries.</li> 085 * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li> 086 * <li>close is allowed to throw IOException.</li> 087 * </ul> 088 */ 089public class ZipFile implements Closeable { 090 091 /** 092 * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs 093 * significantly faster in concurrent environment. 094 */ 095 private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 096 private final FileChannel archive; 097 098 BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) { 099 super(start, remaining); 100 this.archive = archive; 101 } 102 103 @Override 104 protected int read(final long pos, final ByteBuffer buf) throws IOException { 105 final int read = archive.read(buf, pos); 106 buf.flip(); 107 return read; 108 } 109 } 110 111 /** 112 * Builds new {@link ZipFile} instances. 113 * <p> 114 * The channel will be opened for reading, assuming the specified encoding for file names. 115 * </p> 116 * <p> 117 * See {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} to read from an in-memory archive. 118 * </p> 119 * <p> 120 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 121 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 122 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 123 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 124 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 125 * </p> 126 * 127 * @since 1.26.0 128 */ 129 public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> { 130 131 static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; 132 133 private SeekableByteChannel seekableByteChannel; 134 private boolean useUnicodeExtraFields = true; 135 private boolean ignoreLocalFileHeader; 136 private long maxNumberOfDisks = 1; 137 138 public Builder() { 139 setCharset(DEFAULT_CHARSET); 140 setCharsetDefault(DEFAULT_CHARSET); 141 } 142 143 @Override 144 public ZipFile get() throws IOException { 145 final SeekableByteChannel actualChannel; 146 final String actualDescription; 147 if (seekableByteChannel != null) { 148 actualChannel = seekableByteChannel; 149 actualDescription = actualChannel.getClass().getSimpleName(); 150 } else if (checkOrigin() instanceof ByteArrayOrigin) { 151 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray()); 152 actualDescription = actualChannel.getClass().getSimpleName(); 153 } else { 154 OpenOption[] openOptions = getOpenOptions(); 155 if (openOptions.length == 0) { 156 openOptions = new OpenOption[] { StandardOpenOption.READ }; 157 } 158 final Path path = getPath(); 159 actualChannel = openZipChannel(path, maxNumberOfDisks, openOptions); 160 actualDescription = path.toString(); 161 } 162 final boolean closeOnError = seekableByteChannel != null; 163 return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader); 164 } 165 166 /** 167 * Sets whether to ignore information stored inside the local file header. 168 * 169 * @param ignoreLocalFileHeader whether to ignore information stored inside. 170 * @return {@code this} instance. 171 */ 172 public Builder setIgnoreLocalFileHeader(final boolean ignoreLocalFileHeader) { 173 this.ignoreLocalFileHeader = ignoreLocalFileHeader; 174 return this; 175 } 176 177 /** 178 * Sets max number of multi archive disks, default is 1 (no multi archive). 179 * 180 * @param maxNumberOfDisks max number of multi archive disks. 181 * 182 * @return {@code this} instance. 183 */ 184 public Builder setMaxNumberOfDisks(final long maxNumberOfDisks) { 185 this.maxNumberOfDisks = maxNumberOfDisks; 186 return this; 187 } 188 189 /** 190 * The actual channel, overrides any other input aspects like a File, Path, and so on. 191 * 192 * @param seekableByteChannel The actual channel. 193 * @return {@code this} instance. 194 */ 195 public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) { 196 this.seekableByteChannel = seekableByteChannel; 197 return this; 198 } 199 200 /** 201 * Sets whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 202 * 203 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 204 * @return {@code this} instance. 205 */ 206 public Builder setUseUnicodeExtraFields(final boolean useUnicodeExtraFields) { 207 this.useUnicodeExtraFields = useUnicodeExtraFields; 208 return this; 209 } 210 211 } 212 213 /** 214 * Extends ZipArchiveEntry to store the offset within the archive. 215 */ 216 private static final class Entry extends ZipArchiveEntry { 217 218 @Override 219 public boolean equals(final Object other) { 220 if (super.equals(other)) { 221 // super.equals would return false if other were not an Entry 222 final Entry otherEntry = (Entry) other; 223 return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset() 224 && super.getDiskNumberStart() == otherEntry.getDiskNumberStart(); 225 } 226 return false; 227 } 228 229 @Override 230 public int hashCode() { 231 return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32); 232 } 233 } 234 235 private static final class NameAndComment { 236 private final byte[] name; 237 private final byte[] comment; 238 239 private NameAndComment(final byte[] name, final byte[] comment) { 240 this.name = name; 241 this.comment = comment; 242 } 243 } 244 245 private static final class StoredStatisticsStream extends BoundedInputStream implements InputStreamStatistics { 246 StoredStatisticsStream(final InputStream in) { 247 super(in); 248 } 249 250 @Override 251 public long getCompressedCount() { 252 return super.getCount(); 253 } 254 255 @Override 256 public long getUncompressedCount() { 257 return getCompressedCount(); 258 } 259 } 260 261 private static final String DEFAULT_CHARSET_NAME = StandardCharsets.UTF_8.name(); 262 263 private static final EnumSet<StandardOpenOption> READ = EnumSet.of(StandardOpenOption.READ); 264 265 private static final int HASH_SIZE = 509; 266 static final int NIBLET_MASK = 0x0f; 267 static final int BYTE_SHIFT = 8; 268 private static final int POS_0 = 0; 269 private static final int POS_1 = 1; 270 private static final int POS_2 = 2; 271 private static final int POS_3 = 3; 272 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 273 274 /** 275 * Length of a "central directory" entry structure without file name, extra fields or comment. 276 */ 277 private static final int CFH_LEN = 278 // @formatter:off 279 /* version made by */ ZipConstants.SHORT 280 /* version needed to extract */ + ZipConstants.SHORT 281 /* general purpose bit flag */ + ZipConstants.SHORT 282 /* compression method */ + ZipConstants.SHORT 283 /* last mod file time */ + ZipConstants.SHORT 284 /* last mod file date */ + ZipConstants.SHORT 285 /* crc-32 */ + ZipConstants.WORD 286 /* compressed size */ + ZipConstants.WORD 287 /* uncompressed size */ + ZipConstants.WORD 288 /* file name length */ + ZipConstants. SHORT 289 /* extra field length */ + ZipConstants.SHORT 290 /* file comment length */ + ZipConstants.SHORT 291 /* disk number start */ + ZipConstants.SHORT 292 /* internal file attributes */ + ZipConstants.SHORT 293 /* external file attributes */ + ZipConstants.WORD 294 /* relative offset of local header */ + ZipConstants.WORD; 295 // @formatter:on 296 297 private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 298 299 /** 300 * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment. 301 */ 302 static final int MIN_EOCD_SIZE = 303 // @formatter:off 304 /* end of central dir signature */ ZipConstants.WORD 305 /* number of this disk */ + ZipConstants.SHORT 306 /* number of the disk with the */ 307 /* start of the central directory */ + ZipConstants.SHORT 308 /* total number of entries in */ 309 /* the central dir on this disk */ + ZipConstants.SHORT 310 /* total number of entries in */ 311 /* the central dir */ + ZipConstants.SHORT 312 /* size of the central directory */ + ZipConstants.WORD 313 /* offset of start of central */ 314 /* directory with respect to */ 315 /* the starting disk number */ + ZipConstants.WORD 316 /* ZIP file comment length */ + ZipConstants.SHORT; 317 // @formatter:on 318 319 /** 320 * Maximum length of the "End of central directory record" with a file comment. 321 */ 322 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 323 // @formatter:off 324 /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT; 325 // @formatter:on 326 327 /** 328 * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of 329 * the "End of central directory record". 330 */ 331 private static final int CFD_LENGTH_OFFSET = 332 // @formatter:off 333 /* end of central dir signature */ ZipConstants.WORD 334 /* number of this disk */ + ZipConstants.SHORT 335 /* number of the disk with the */ 336 /* start of the central directory */ + ZipConstants.SHORT 337 /* total number of entries in */ 338 /* the central dir on this disk */ + ZipConstants.SHORT 339 /* total number of entries in */ 340 /* the central dir */ + ZipConstants.SHORT; 341 // @formatter:on 342 343 /** 344 * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of 345 * the "End of central directory record". 346 */ 347 private static final int CFD_DISK_OFFSET = 348 // @formatter:off 349 /* end of central dir signature */ ZipConstants.WORD 350 /* number of this disk */ + ZipConstants.SHORT; 351 // @formatter:on 352 353 /** 354 * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of 355 * the disk with the start of the central directory". 356 */ 357 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 358 // @formatter:off 359 /* total number of entries in */ 360 /* the central dir on this disk */ + ZipConstants.SHORT 361 /* total number of entries in */ 362 /* the central dir */ + ZipConstants.SHORT 363 /* size of the central directory */ + ZipConstants.WORD; 364 // @formatter:on 365 366 /** 367 * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at 368 * all. 369 */ 370 private static final int ZIP64_EOCDL_LENGTH = 371 // @formatter:off 372 /* zip64 end of central dir locator sig */ ZipConstants.WORD 373 /* number of the disk with the start */ 374 /* start of the zip64 end of */ 375 /* central directory */ + ZipConstants.WORD 376 /* relative offset of the zip64 */ 377 /* end of central directory record */ + ZipConstants.DWORD 378 /* total number of disks */ + ZipConstants.WORD; 379 // @formatter:on 380 381 /** 382 * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative 383 * to the start of the "Zip64 end of central directory locator". 384 */ 385 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 386 // @formatter:off 387 /* zip64 end of central dir locator sig */ ZipConstants.WORD 388 /* number of the disk with the start */ 389 /* start of the zip64 end of */ 390 /* central directory */ + ZipConstants.WORD; 391 // @formatter:on 392 393 /** 394 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start 395 * of the "Zip64 end of central directory record". 396 */ 397 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 398 // @formatter:off 399 /* zip64 end of central dir */ 400 /* signature */ ZipConstants.WORD 401 /* size of zip64 end of central */ 402 /* directory record */ + ZipConstants.DWORD 403 /* version made by */ + ZipConstants.SHORT 404 /* version needed to extract */ + ZipConstants.SHORT 405 /* number of this disk */ + ZipConstants.WORD 406 /* number of the disk with the */ 407 /* start of the central directory */ + ZipConstants.WORD 408 /* total number of entries in the */ 409 /* central directory on this disk */ + ZipConstants.DWORD 410 /* total number of entries in the */ 411 /* central directory */ + ZipConstants.DWORD 412 /* size of the central directory */ + ZipConstants.DWORD; 413 // @formatter:on 414 415 /** 416 * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the 417 * start of the "Zip64 end of central directory record". 418 */ 419 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 420 // @formatter:off 421 /* zip64 end of central dir */ 422 /* signature */ ZipConstants.WORD 423 /* size of zip64 end of central */ 424 /* directory record */ + ZipConstants.DWORD 425 /* version made by */ + ZipConstants.SHORT 426 /* version needed to extract */ + ZipConstants.SHORT 427 /* number of this disk */ + ZipConstants.WORD; 428 // @formatter:on 429 430 /** 431 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the 432 * "number of the disk with the start of the central directory". 433 */ 434 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 435 // @formatter:off 436 /* total number of entries in the */ 437 /* central directory on this disk */ ZipConstants.DWORD 438 /* total number of entries in the */ 439 /* central directory */ + ZipConstants.DWORD 440 /* size of the central directory */ + ZipConstants.DWORD; 441 // @formatter:on 442 443 /** 444 * Number of bytes in local file header up to the "length of file name" entry. 445 */ 446 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 447 // @formatter:off 448 /* local file header signature */ ZipConstants.WORD 449 /* version needed to extract */ + ZipConstants.SHORT 450 /* general purpose bit flag */ + ZipConstants.SHORT 451 /* compression method */ + ZipConstants.SHORT 452 /* last mod file time */ + ZipConstants.SHORT 453 /* last mod file date */ + ZipConstants.SHORT 454 /* crc-32 */ + ZipConstants.WORD 455 /* compressed size */ + ZipConstants.WORD 456 /* uncompressed size */ + (long) ZipConstants.WORD; 457 // @formatter:on 458 459 /** 460 * Compares two ZipArchiveEntries based on their offset within the archive. 461 * <p> 462 * Won't return any meaningful results if one of the entries isn't part of the archive at all. 463 * </p> 464 * 465 * @since 1.1 466 */ 467 private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 468 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 469 470 /** 471 * Creates a new Builder. 472 * 473 * @return a new Builder. 474 * @since 1.26.0 475 */ 476 public static Builder builder() { 477 return new Builder(); 478 } 479 480 /** 481 * Closes a ZIP file quietly; throwing no IOException, does nothing on null input. 482 * 483 * @param zipFile file to close, can be null 484 */ 485 public static void closeQuietly(final ZipFile zipFile) { 486 org.apache.commons.io.IOUtils.closeQuietly(zipFile); 487 } 488 489 /** 490 * Creates a new SeekableByteChannel for reading. 491 * 492 * @param path the path to the file to open or create 493 * @return a new seekable byte channel 494 * @throws IOException if an I/O error occurs 495 */ 496 private static SeekableByteChannel newReadByteChannel(final Path path) throws IOException { 497 return Files.newByteChannel(path, READ); 498 } 499 500 private static SeekableByteChannel openZipChannel(final Path path, final long maxNumberOfDisks, final OpenOption[] openOptions) throws IOException { 501 final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ); 502 final List<FileChannel> channels = new ArrayList<>(); 503 try { 504 final boolean is64 = positionAtEndOfCentralDirectoryRecord(channel); 505 long numberOfDisks; 506 if (is64) { 507 channel.position(channel.position() + ZipConstants.WORD + ZipConstants.WORD + ZipConstants.DWORD); 508 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.WORD); 509 buf.order(ByteOrder.LITTLE_ENDIAN); 510 IOUtils.readFully(channel, buf); 511 buf.flip(); 512 numberOfDisks = buf.getInt() & 0xffffffffL; 513 } else { 514 channel.position(channel.position() + ZipConstants.WORD); 515 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.SHORT); 516 buf.order(ByteOrder.LITTLE_ENDIAN); 517 IOUtils.readFully(channel, buf); 518 buf.flip(); 519 numberOfDisks = (buf.getShort() & 0xffff) + 1; 520 } 521 if (numberOfDisks > Math.min(maxNumberOfDisks, Integer.MAX_VALUE)) { 522 throw new IOException("Too many disks for zip archive, max=" + Math.min(maxNumberOfDisks, Integer.MAX_VALUE) + " actual=" + numberOfDisks); 523 } 524 525 if (numberOfDisks <= 1) { 526 return channel; 527 } 528 channel.close(); 529 530 final Path parent = path.getParent(); 531 final String basename = FilenameUtils.removeExtension(Objects.toString(path.getFileName(), null)); 532 533 return ZipSplitReadOnlySeekableByteChannel.forPaths(IntStream.range(0, (int) numberOfDisks).mapToObj(i -> { 534 if (i == numberOfDisks - 1) { 535 return path; 536 } 537 final Path lowercase = parent.resolve(String.format("%s.z%02d", basename, i + 1)); 538 if (Files.exists(lowercase)) { 539 return lowercase; 540 } 541 final Path uppercase = parent.resolve(String.format("%s.Z%02d", basename, i + 1)); 542 if (Files.exists(uppercase)) { 543 return uppercase; 544 } 545 return lowercase; 546 }).collect(Collectors.toList()), openOptions); 547 } catch (final Throwable ex) { 548 org.apache.commons.io.IOUtils.closeQuietly(channel); 549 channels.forEach(org.apache.commons.io.IOUtils::closeQuietly); 550 throw ex; 551 } 552 } 553 554 /** 555 * Searches for the and positions the stream at the start of the "End of central dir record". 556 * 557 * @return true if it's Zip64 end of central directory or false if it's Zip32 558 */ 559 private static boolean positionAtEndOfCentralDirectoryRecord(final SeekableByteChannel channel) throws IOException { 560 final boolean found = tryToLocateSignature(channel, MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG); 561 if (!found) { 562 throw new ZipException("Archive is not a ZIP archive"); 563 } 564 boolean found64 = false; 565 final long position = channel.position(); 566 if (position > ZIP64_EOCDL_LENGTH) { 567 final ByteBuffer wordBuf = ByteBuffer.allocate(4); 568 channel.position(channel.position() - ZIP64_EOCDL_LENGTH); 569 wordBuf.rewind(); 570 IOUtils.readFully(channel, wordBuf); 571 wordBuf.flip(); 572 found64 = wordBuf.equals(ByteBuffer.wrap(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG)); 573 if (!found64) { 574 channel.position(position); 575 } else { 576 channel.position(channel.position() - ZipConstants.WORD); 577 } 578 } 579 580 return found64; 581 } 582 583 /** 584 * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has 585 * been found. 586 */ 587 private static boolean tryToLocateSignature(final SeekableByteChannel channel, final long minDistanceFromEnd, final long maxDistanceFromEnd, 588 final byte[] sig) throws IOException { 589 final ByteBuffer wordBuf = ByteBuffer.allocate(ZipConstants.WORD); 590 boolean found = false; 591 long off = channel.size() - minDistanceFromEnd; 592 final long stopSearching = Math.max(0L, channel.size() - maxDistanceFromEnd); 593 if (off >= 0) { 594 for (; off >= stopSearching; off--) { 595 channel.position(off); 596 try { 597 wordBuf.rewind(); 598 IOUtils.readFully(channel, wordBuf); 599 wordBuf.flip(); 600 } catch (final EOFException ex) { // NOSONAR 601 break; 602 } 603 int curr = wordBuf.get(); 604 if (curr == sig[POS_0]) { 605 curr = wordBuf.get(); 606 if (curr == sig[POS_1]) { 607 curr = wordBuf.get(); 608 if (curr == sig[POS_2]) { 609 curr = wordBuf.get(); 610 if (curr == sig[POS_3]) { 611 found = true; 612 break; 613 } 614 } 615 } 616 } 617 } 618 } 619 if (found) { 620 channel.position(off); 621 } 622 return found; 623 } 624 625 /** 626 * List of entries in the order they appear inside the central directory. 627 */ 628 private final List<ZipArchiveEntry> entries = new LinkedList<>(); 629 630 /** 631 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 632 */ 633 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE); 634 635 /** 636 * The encoding to use for file names and the file comment. 637 * <p> 638 * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>. 639 * Defaults to UTF-8. 640 * </p> 641 */ 642 private final Charset encoding; 643 644 /** 645 * The ZIP encoding to use for file names and the file comment. 646 */ 647 private final ZipEncoding zipEncoding; 648 649 /** 650 * The actual data source. 651 */ 652 private final SeekableByteChannel archive; 653 654 /** 655 * Whether to look for and use Unicode extra fields. 656 */ 657 private final boolean useUnicodeExtraFields; 658 659 /** 660 * Whether the file is closed. 661 */ 662 private volatile boolean closed = true; 663 664 /** 665 * Whether the ZIP archive is a split ZIP archive 666 */ 667 private final boolean isSplitZipArchive; 668 669 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 670 private final byte[] dwordBuf = new byte[ZipConstants.DWORD]; 671 672 private final byte[] wordBuf = new byte[ZipConstants.WORD]; 673 674 private final byte[] cfhBuf = new byte[CFH_LEN]; 675 676 private final byte[] shortBuf = new byte[ZipConstants.SHORT]; 677 678 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 679 680 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 681 682 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 683 684 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 685 686 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 687 688 private long centralDirectoryStartOffset; 689 690 private long firstLocalFileHeaderOffset; 691 692 /** 693 * Opens the given file for reading, assuming "UTF8" for file names. 694 * 695 * @param file the archive. 696 * 697 * @throws IOException if an error occurs while reading the file. 698 * @deprecated Use {@link Builder#get()}. 699 */ 700 @Deprecated 701 public ZipFile(final File file) throws IOException { 702 this(file, DEFAULT_CHARSET_NAME); 703 } 704 705 /** 706 * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields. 707 * 708 * @param file the archive. 709 * @param encoding the encoding to use for file names, use null for the platform's default encoding 710 * @throws IOException if an error occurs while reading the file. 711 * @deprecated Use {@link Builder#get()}. 712 */ 713 @Deprecated 714 public ZipFile(final File file, final String encoding) throws IOException { 715 this(file.toPath(), encoding, true); 716 } 717 718 /** 719 * Opens the given file for reading, assuming the specified encoding for file names. 720 * 721 * @param file the archive. 722 * @param encoding the encoding to use for file names, use null for the platform's default encoding 723 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 724 * @throws IOException if an error occurs while reading the file. 725 * @deprecated Use {@link Builder#get()}. 726 */ 727 @Deprecated 728 public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields) throws IOException { 729 this(file.toPath(), encoding, useUnicodeExtraFields, false); 730 } 731 732 /** 733 * Opens the given file for reading, assuming the specified encoding for file names. 734 * <p> 735 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 736 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 737 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 738 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 739 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 740 * </p> 741 * 742 * @param file the archive. 743 * @param encoding the encoding to use for file names, use null for the platform's default encoding 744 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 745 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc) 746 * @throws IOException if an error occurs while reading the file. 747 * @since 1.19 748 * @deprecated Use {@link Builder#get()}. 749 */ 750 @Deprecated 751 @SuppressWarnings("resource") // Caller closes 752 public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException { 753 this(newReadByteChannel(file.toPath()), file.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 754 } 755 756 /** 757 * Opens the given path for reading, assuming "UTF-8" for file names. 758 * 759 * @param path path to the archive. 760 * @throws IOException if an error occurs while reading the file. 761 * @since 1.22 762 * @deprecated Use {@link Builder#get()}. 763 */ 764 @Deprecated 765 public ZipFile(final Path path) throws IOException { 766 this(path, DEFAULT_CHARSET_NAME); 767 } 768 769 /** 770 * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields. 771 * 772 * @param path path to the archive. 773 * @param encoding the encoding to use for file names, use null for the platform's default encoding 774 * @throws IOException if an error occurs while reading the file. 775 * @since 1.22 776 * @deprecated Use {@link Builder#get()}. 777 */ 778 @Deprecated 779 public ZipFile(final Path path, final String encoding) throws IOException { 780 this(path, encoding, true); 781 } 782 783 /** 784 * Opens the given path for reading, assuming the specified encoding for file names. 785 * 786 * @param path path to the archive. 787 * @param encoding the encoding to use for file names, use null for the platform's default encoding 788 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 789 * @throws IOException if an error occurs while reading the file. 790 * @since 1.22 791 * @deprecated Use {@link Builder#get()}. 792 */ 793 @Deprecated 794 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException { 795 this(path, encoding, useUnicodeExtraFields, false); 796 } 797 798 /** 799 * Opens the given path for reading, assuming the specified encoding for file names. 800 * <p> 801 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 802 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 803 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 804 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 805 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 806 * </p> 807 * 808 * @param path path to the archive. 809 * @param encoding the encoding to use for file names, use null for the platform's default encoding 810 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 811 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc) 812 * @throws IOException if an error occurs while reading the file. 813 * @since 1.22 814 * @deprecated Use {@link Builder#get()}. 815 */ 816 @SuppressWarnings("resource") // Caller closes 817 @Deprecated 818 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException { 819 this(newReadByteChannel(path), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 820 } 821 822 /** 823 * Opens the given channel for reading, assuming "UTF-8" for file names. 824 * <p> 825 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 826 * </p> 827 * 828 * @param channel the archive. 829 * 830 * @throws IOException if an error occurs while reading the file. 831 * @since 1.13 832 * @deprecated Use {@link Builder#get()}. 833 */ 834 @Deprecated 835 public ZipFile(final SeekableByteChannel channel) throws IOException { 836 this(channel, "a SeekableByteChannel", DEFAULT_CHARSET_NAME, true); 837 } 838 839 /** 840 * Opens the given channel for reading, assuming the specified encoding for file names. 841 * <p> 842 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 843 * </p> 844 * 845 * @param channel the archive. 846 * @param encoding the encoding to use for file names, use null for the platform's default encoding 847 * @throws IOException if an error occurs while reading the file. 848 * @since 1.13 849 * @deprecated Use {@link Builder#get()}. 850 */ 851 @Deprecated 852 public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException { 853 this(channel, "a SeekableByteChannel", encoding, true); 854 } 855 856 private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields, 857 final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException { 858 this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel; 859 this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET); 860 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 861 this.useUnicodeExtraFields = useUnicodeExtraFields; 862 this.archive = channel; 863 boolean success = false; 864 try { 865 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory(); 866 if (!ignoreLocalFileHeader) { 867 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 868 } 869 fillNameMap(); 870 success = true; 871 } catch (final IOException e) { 872 throw new IOException("Error reading Zip content from " + channelDescription, e); 873 } finally { 874 this.closed = !success; 875 if (!success && closeOnError) { 876 org.apache.commons.io.IOUtils.closeQuietly(archive); 877 } 878 } 879 } 880 881 /** 882 * Opens the given channel for reading, assuming the specified encoding for file names. 883 * <p> 884 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 885 * </p> 886 * 887 * @param channel the archive. 888 * @param channelDescription description of the archive, used for error messages only. 889 * @param encoding the encoding to use for file names, use null for the platform's default encoding 890 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 891 * @throws IOException if an error occurs while reading the file. 892 * @since 1.13 893 * @deprecated Use {@link Builder#get()}. 894 */ 895 @Deprecated 896 public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields) 897 throws IOException { 898 this(channel, channelDescription, encoding, useUnicodeExtraFields, false, false); 899 } 900 901 /** 902 * Opens the given channel for reading, assuming the specified encoding for file names. 903 * <p> 904 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 905 * </p> 906 * <p> 907 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 908 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 909 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 910 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 911 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 912 * </p> 913 * 914 * @param channel the archive. 915 * @param channelDescription description of the archive, used for error messages only. 916 * @param encoding the encoding to use for file names, use null for the platform's default encoding 917 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 918 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc) 919 * @throws IOException if an error occurs while reading the file. 920 * @since 1.19 921 * @deprecated Use {@link Builder#get()}. 922 */ 923 @Deprecated 924 public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields, 925 final boolean ignoreLocalFileHeader) throws IOException { 926 this(channel, channelDescription, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 927 } 928 929 private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields, 930 final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException { 931 this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader); 932 } 933 934 /** 935 * Opens the given file for reading, assuming "UTF-8". 936 * 937 * @param name name of the archive. 938 * @throws IOException if an error occurs while reading the file. 939 * @deprecated Use {@link Builder#get()}. 940 */ 941 @Deprecated 942 public ZipFile(final String name) throws IOException { 943 this(new File(name).toPath(), DEFAULT_CHARSET_NAME); 944 } 945 946 /** 947 * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields. 948 * 949 * @param name name of the archive. 950 * @param encoding the encoding to use for file names, use null for the platform's default encoding 951 * @throws IOException if an error occurs while reading the file. 952 * @deprecated Use {@link Builder#get()}. 953 */ 954 @Deprecated 955 public ZipFile(final String name, final String encoding) throws IOException { 956 this(new File(name).toPath(), encoding, true); 957 } 958 959 /** 960 * Whether this class is able to read the given entry. 961 * <p> 962 * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet. 963 * </p> 964 * 965 * @since 1.1 966 * @param entry the entry 967 * @return whether this class is able to read the given entry. 968 */ 969 public boolean canReadEntryData(final ZipArchiveEntry entry) { 970 return ZipUtil.canHandleEntryData(entry); 971 } 972 973 /** 974 * Closes the archive. 975 * 976 * @throws IOException if an error occurs closing the archive. 977 */ 978 @Override 979 public void close() throws IOException { 980 // this flag is only written here and read in finalize() which 981 // can never be run in parallel. 982 // no synchronization needed. 983 closed = true; 984 archive.close(); 985 } 986 987 /** 988 * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file. 989 * <p> 990 * This method transfers entries based on the central directory of the ZIP file. 991 * </p> 992 * 993 * @param target The zipArchiveOutputStream to write the entries to 994 * @param predicate A predicate that selects which entries to write 995 * @throws IOException on error 996 */ 997 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException { 998 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 999 while (src.hasMoreElements()) { 1000 final ZipArchiveEntry entry = src.nextElement(); 1001 if (predicate.test(entry)) { 1002 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 1003 } 1004 } 1005 } 1006 1007 /** 1008 * Creates new BoundedInputStream, according to implementation of underlying archive channel. 1009 */ 1010 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 1011 if (start < 0 || remaining < 0 || start + remaining < start) { 1012 throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range"); 1013 } 1014 return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive) 1015 : new BoundedSeekableByteChannelInputStream(start, remaining, archive); 1016 } 1017 1018 private void fillNameMap() { 1019 entries.forEach(ze -> { 1020 // entries are filled in populateFromCentralDirectory and 1021 // never modified 1022 final String name = ze.getName(); 1023 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 1024 entriesOfThatName.addLast(ze); 1025 }); 1026 } 1027 1028 /** 1029 * Ensures that the close method of this ZIP file is called when there are no more references to it. 1030 * 1031 * @see #close() 1032 */ 1033 @Override 1034 protected void finalize() throws Throwable { 1035 try { 1036 if (!closed) { 1037 close(); 1038 } 1039 } finally { 1040 super.finalize(); 1041 } 1042 } 1043 1044 /** 1045 * Gets an InputStream for reading the content before the first local file header. 1046 * 1047 * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file 1048 * header. 1049 * @since 1.23 1050 */ 1051 public InputStream getContentBeforeFirstLocalFileHeader() { 1052 return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset); 1053 } 1054 1055 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 1056 final long s = ze.getDataOffset(); 1057 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 1058 setDataOffset(ze); 1059 return ze.getDataOffset(); 1060 } 1061 return s; 1062 } 1063 1064 /** 1065 * Gets the encoding to use for file names and the file comment. 1066 * 1067 * @return null if using the platform's default character encoding. 1068 */ 1069 public String getEncoding() { 1070 return encoding.name(); 1071 } 1072 1073 /** 1074 * Gets all entries. 1075 * <p> 1076 * Entries will be returned in the same order they appear within the archive's central directory. 1077 * </p> 1078 * 1079 * @return all entries as {@link ZipArchiveEntry} instances 1080 */ 1081 public Enumeration<ZipArchiveEntry> getEntries() { 1082 return Collections.enumeration(entries); 1083 } 1084 1085 /** 1086 * Gets all named entries in the same order they appear within the archive's central directory. 1087 * 1088 * @param name name of the entry. 1089 * @return the Iterable<ZipArchiveEntry> corresponding to the given name 1090 * @since 1.6 1091 */ 1092 public Iterable<ZipArchiveEntry> getEntries(final String name) { 1093 return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST); 1094 } 1095 1096 /** 1097 * Gets all entries in physical order. 1098 * <p> 1099 * Entries will be returned in the same order their contents appear within the archive. 1100 * </p> 1101 * 1102 * @return all entries as {@link ZipArchiveEntry} instances 1103 * 1104 * @since 1.1 1105 */ 1106 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 1107 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY); 1108 return Collections.enumeration(Arrays.asList(sortByOffset(allEntries))); 1109 } 1110 1111 /** 1112 * Gets all named entries in the same order their contents appear within the archive. 1113 * 1114 * @param name name of the entry. 1115 * @return the Iterable<ZipArchiveEntry> corresponding to the given name 1116 * @since 1.6 1117 */ 1118 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 1119 final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST); 1120 return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY))); 1121 } 1122 1123 /** 1124 * Gets a named entry or {@code null} if no entry by that name exists. 1125 * <p> 1126 * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned. 1127 * </p> 1128 * 1129 * @param name name of the entry. 1130 * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present. 1131 */ 1132 public ZipArchiveEntry getEntry(final String name) { 1133 final LinkedList<ZipArchiveEntry> entries = nameMap.get(name); 1134 return entries != null ? entries.getFirst() : null; 1135 } 1136 1137 /** 1138 * Gets the offset of the first local file header in the file. 1139 * 1140 * @return the length of the content before the first local file header 1141 * @since 1.23 1142 */ 1143 public long getFirstLocalFileHeaderOffset() { 1144 return firstLocalFileHeaderOffset; 1145 } 1146 1147 /** 1148 * Gets an InputStream for reading the contents of the given entry. 1149 * 1150 * @param entry the entry to get the stream for. 1151 * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}. 1152 * @throws IOException if unable to create an input stream from the zipEntry. 1153 */ 1154 public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException { 1155 if (!(entry instanceof Entry)) { 1156 return null; 1157 } 1158 // cast validity is checked just above 1159 ZipUtil.checkRequestedFeatures(entry); 1160 1161 // doesn't get closed if the method is not supported - which 1162 // should never happen because of the checkRequestedFeatures 1163 // call above 1164 final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR 1165 switch (ZipMethod.getMethodByCode(entry.getMethod())) { 1166 case STORED: 1167 return new StoredStatisticsStream(is); 1168 case UNSHRINKING: 1169 return new UnshrinkingInputStream(is); 1170 case IMPLODING: 1171 try { 1172 return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(), 1173 entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 1174 } catch (final IllegalArgumentException ex) { 1175 throw new IOException("bad IMPLODE data", ex); 1176 } 1177 case DEFLATED: 1178 final Inflater inflater = new Inflater(true); 1179 // Inflater with nowrap=true has this odd contract for a zero padding 1180 // byte following the data stream; this used to be zlib's requirement 1181 // and has been fixed a long time ago, but the contract persists so 1182 // we comply. 1183 // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 1184 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) { 1185 @Override 1186 public void close() throws IOException { 1187 try { 1188 super.close(); 1189 } finally { 1190 inflater.end(); 1191 } 1192 } 1193 }; 1194 case BZIP2: 1195 return new BZip2CompressorInputStream(is); 1196 case ENHANCED_DEFLATED: 1197 return new Deflate64CompressorInputStream(is); 1198 case AES_ENCRYPTED: 1199 case EXPANDING_LEVEL_1: 1200 case EXPANDING_LEVEL_2: 1201 case EXPANDING_LEVEL_3: 1202 case EXPANDING_LEVEL_4: 1203 case JPEG: 1204 case LZMA: 1205 case PKWARE_IMPLODING: 1206 case PPMD: 1207 case TOKENIZATION: 1208 case UNKNOWN: 1209 case WAVPACK: 1210 case XZ: 1211 default: 1212 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry); 1213 } 1214 } 1215 1216 /** 1217 * Gets the raw stream of the archive entry (compressed form). 1218 * <p> 1219 * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else. 1220 * </p> 1221 * <p> 1222 * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was 1223 * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason. 1224 * </p> 1225 * 1226 * @param entry The entry to get the stream for 1227 * @return The raw input stream containing (possibly) compressed data. 1228 * @since 1.11 1229 * @throws IOException if there is a problem reading data offset (added in version 1.22). 1230 */ 1231 public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException { 1232 if (!(entry instanceof Entry)) { 1233 return null; 1234 } 1235 final long start = getDataOffset(entry); 1236 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 1237 return null; 1238 } 1239 return createBoundedInputStream(start, entry.getCompressedSize()); 1240 } 1241 1242 /** 1243 * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null. 1244 * <p> 1245 * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile. 1246 * </p> 1247 * 1248 * @param entry ZipArchiveEntry object that represents the symbolic link 1249 * @return entry's content as a String 1250 * @throws IOException problem with content's input stream 1251 * @since 1.5 1252 */ 1253 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 1254 if (entry != null && entry.isUnixSymlink()) { 1255 try (InputStream in = getInputStream(entry)) { 1256 return zipEncoding.decode(org.apache.commons.io.IOUtils.toByteArray(in)); 1257 } 1258 } 1259 return null; 1260 } 1261 1262 /** 1263 * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances. 1264 * <p> 1265 * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or 1266 * additional data to be read. 1267 * </p> 1268 * 1269 * @return a map of zip entries that didn't have the language encoding flag set when read. 1270 */ 1271 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException { 1272 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>(); 1273 1274 positionAtCentralDirectory(); 1275 centralDirectoryStartOffset = archive.position(); 1276 1277 wordBbuf.rewind(); 1278 IOUtils.readFully(archive, wordBbuf); 1279 long sig = ZipLong.getValue(wordBuf); 1280 1281 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 1282 throw new IOException("Central directory is empty, can't expand" + " corrupt archive."); 1283 } 1284 1285 while (sig == CFH_SIG) { 1286 readCentralDirectoryEntry(noUTF8Flag); 1287 wordBbuf.rewind(); 1288 IOUtils.readFully(archive, wordBbuf); 1289 sig = ZipLong.getValue(wordBuf); 1290 } 1291 return noUTF8Flag; 1292 } 1293 1294 /** 1295 * Searches for either the "Zip64 end of central directory locator" or the "End of central dir record", parses it and positions the 1296 * stream at the first central directory record. 1297 */ 1298 private void positionAtCentralDirectory() throws IOException { 1299 final boolean is64 = positionAtEndOfCentralDirectoryRecord(archive); 1300 if (!is64) { 1301 positionAtCentralDirectory32(); 1302 } else { 1303 positionAtCentralDirectory64(); 1304 } 1305 } 1306 1307 /** 1308 * Parses the "End of central dir record" and positions the stream at the first central directory record. 1309 * 1310 * Expects stream to be positioned at the beginning of the "End of central dir record". 1311 */ 1312 private void positionAtCentralDirectory32() throws IOException { 1313 final long endOfCentralDirectoryRecordOffset = archive.position(); 1314 if (isSplitZipArchive) { 1315 skipBytes(CFD_DISK_OFFSET); 1316 shortBbuf.rewind(); 1317 IOUtils.readFully(archive, shortBbuf); 1318 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1319 1320 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1321 1322 wordBbuf.rewind(); 1323 IOUtils.readFully(archive, wordBbuf); 1324 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1325 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1326 } else { 1327 skipBytes(CFD_LENGTH_OFFSET); 1328 wordBbuf.rewind(); 1329 IOUtils.readFully(archive, wordBbuf); 1330 final long centralDirectoryLength = ZipLong.getValue(wordBuf); 1331 1332 wordBbuf.rewind(); 1333 IOUtils.readFully(archive, wordBbuf); 1334 centralDirectoryStartDiskNumber = 0; 1335 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1336 1337 firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L); 1338 archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset); 1339 } 1340 } 1341 1342 /** 1343 * Parses the "Zip64 end of central directory locator", finds the "Zip64 end of central directory record" using the parsed information, 1344 * parses that and positions the stream at the first central directory record. 1345 * 1346 * Expects stream to be positioned right behind the "Zip64 end of central directory locator"'s signature. 1347 */ 1348 private void positionAtCentralDirectory64() throws IOException { 1349 skipBytes(ZipConstants.WORD); 1350 if (isSplitZipArchive) { 1351 wordBbuf.rewind(); 1352 IOUtils.readFully(archive, wordBbuf); 1353 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1354 1355 dwordBbuf.rewind(); 1356 IOUtils.readFully(archive, dwordBbuf); 1357 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1358 ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1359 } else { 1360 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */); 1361 dwordBbuf.rewind(); 1362 IOUtils.readFully(archive, dwordBbuf); 1363 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1364 } 1365 1366 wordBbuf.rewind(); 1367 IOUtils.readFully(archive, wordBbuf); 1368 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1369 throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt."); 1370 } 1371 1372 if (isSplitZipArchive) { 1373 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */); 1374 wordBbuf.rewind(); 1375 IOUtils.readFully(archive, wordBbuf); 1376 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1377 1378 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1379 1380 dwordBbuf.rewind(); 1381 IOUtils.readFully(archive, dwordBbuf); 1382 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1383 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1384 } else { 1385 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */); 1386 dwordBbuf.rewind(); 1387 IOUtils.readFully(archive, dwordBbuf); 1388 centralDirectoryStartDiskNumber = 0; 1389 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1390 archive.position(centralDirectoryStartRelativeOffset); 1391 } 1392 } 1393 1394 /** 1395 * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps. 1396 * 1397 * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header 1398 * later. The current entry may be added to this map. 1399 */ 1400 private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException { 1401 cfhBbuf.rewind(); 1402 IOUtils.readFully(archive, cfhBbuf); 1403 int off = 0; 1404 final Entry ze = new Entry(); 1405 1406 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 1407 off += ZipConstants.SHORT; 1408 ze.setVersionMadeBy(versionMadeBy); 1409 ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK); 1410 1411 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 1412 off += ZipConstants.SHORT; // version required 1413 1414 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 1415 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 1416 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding; 1417 if (hasUTF8Flag) { 1418 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 1419 } 1420 ze.setGeneralPurposeBit(gpFlag); 1421 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 1422 1423 off += ZipConstants.SHORT; 1424 1425 // noinspection MagicConstant 1426 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 1427 off += ZipConstants.SHORT; 1428 1429 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 1430 ze.setTime(time); 1431 off += ZipConstants.WORD; 1432 1433 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 1434 off += ZipConstants.WORD; 1435 1436 long size = ZipLong.getValue(cfhBuf, off); 1437 if (size < 0) { 1438 throw new IOException("broken archive, entry with negative compressed size"); 1439 } 1440 ze.setCompressedSize(size); 1441 off += ZipConstants.WORD; 1442 1443 size = ZipLong.getValue(cfhBuf, off); 1444 if (size < 0) { 1445 throw new IOException("broken archive, entry with negative size"); 1446 } 1447 ze.setSize(size); 1448 off += ZipConstants.WORD; 1449 1450 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 1451 off += ZipConstants.SHORT; 1452 if (fileNameLen < 0) { 1453 throw new IOException("broken archive, entry with negative fileNameLen"); 1454 } 1455 1456 final int extraLen = ZipShort.getValue(cfhBuf, off); 1457 off += ZipConstants.SHORT; 1458 if (extraLen < 0) { 1459 throw new IOException("broken archive, entry with negative extraLen"); 1460 } 1461 1462 final int commentLen = ZipShort.getValue(cfhBuf, off); 1463 off += ZipConstants.SHORT; 1464 if (commentLen < 0) { 1465 throw new IOException("broken archive, entry with negative commentLen"); 1466 } 1467 1468 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 1469 off += ZipConstants.SHORT; 1470 1471 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 1472 off += ZipConstants.SHORT; 1473 1474 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 1475 off += ZipConstants.WORD; 1476 1477 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 1478 if (fileName.length < fileNameLen) { 1479 throw new EOFException(); 1480 } 1481 ze.setName(entryEncoding.decode(fileName), fileName); 1482 1483 // LFH offset, 1484 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset); 1485 // data offset will be filled later 1486 entries.add(ze); 1487 1488 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 1489 if (cdExtraData.length < extraLen) { 1490 throw new EOFException(); 1491 } 1492 try { 1493 ze.setCentralDirectoryExtra(cdExtraData); 1494 } catch (final RuntimeException e) { 1495 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1496 z.initCause(e); 1497 throw z; 1498 } 1499 1500 setSizesAndOffsetFromZip64Extra(ze); 1501 sanityCheckLFHOffset(ze); 1502 1503 final byte[] comment = IOUtils.readRange(archive, commentLen); 1504 if (comment.length < commentLen) { 1505 throw new EOFException(); 1506 } 1507 ze.setComment(entryEncoding.decode(comment)); 1508 1509 if (!hasUTF8Flag && useUnicodeExtraFields) { 1510 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 1511 } 1512 1513 ze.setStreamContiguous(true); 1514 } 1515 1516 /** 1517 * Walks through all recorded entries and adds the data available from the local file header. 1518 * <p> 1519 * Also records the offsets for the data to read from the entries. 1520 * </p> 1521 */ 1522 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException { 1523 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1524 // entries are filled in populateFromCentralDirectory and never modified 1525 final Entry ze = (Entry) zipArchiveEntry; 1526 final int[] lens = setDataOffset(ze); 1527 final int fileNameLen = lens[0]; 1528 final int extraFieldLen = lens[1]; 1529 skipBytes(fileNameLen); 1530 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1531 if (localExtraData.length < extraFieldLen) { 1532 throw new EOFException(); 1533 } 1534 try { 1535 ze.setExtra(localExtraData); 1536 } catch (final RuntimeException e) { 1537 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1538 z.initCause(e); 1539 throw z; 1540 } 1541 1542 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1543 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1544 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment); 1545 } 1546 } 1547 } 1548 1549 private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException { 1550 if (entry.getDiskNumberStart() < 0) { 1551 throw new IOException("broken archive, entry with negative disk number"); 1552 } 1553 if (entry.getLocalHeaderOffset() < 0) { 1554 throw new IOException("broken archive, entry with negative local file header offset"); 1555 } 1556 if (isSplitZipArchive) { 1557 if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 1558 throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory"); 1559 } 1560 if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 1561 throw new IOException("local file header for " + entry.getName() + " starts after central directory"); 1562 } 1563 } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) { 1564 throw new IOException("local file header for " + entry.getName() + " starts after central directory"); 1565 } 1566 } 1567 1568 private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException { 1569 long offset = entry.getLocalHeaderOffset(); 1570 if (isSplitZipArchive) { 1571 ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1572 // the offset should be updated to the global offset 1573 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1574 } else { 1575 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1576 } 1577 wordBbuf.rewind(); 1578 IOUtils.readFully(archive, wordBbuf); 1579 wordBbuf.flip(); 1580 wordBbuf.get(shortBuf); 1581 final int fileNameLen = ZipShort.getValue(shortBuf); 1582 wordBbuf.get(shortBuf); 1583 final int extraFieldLen = ZipShort.getValue(shortBuf); 1584 entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen); 1585 if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) { 1586 throw new IOException("data for " + entry.getName() + " overlaps with central directory."); 1587 } 1588 return new int[] { fileNameLen, extraFieldLen }; 1589 } 1590 1591 /** 1592 * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the 1593 * offset of the local file header. 1594 * <p> 1595 * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field 1596 * to create local header data even if they are never used - and here a field with only one size would be invalid. 1597 * </p> 1598 */ 1599 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException { 1600 final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 1601 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 1602 throw new ZipException("archive contains unparseable zip64 extra field"); 1603 } 1604 final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra; 1605 if (z64 != null) { 1606 final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC; 1607 final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC; 1608 final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC; 1609 final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT; 1610 z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart); 1611 1612 if (hasUncompressedSize) { 1613 final long size = z64.getSize().getLongValue(); 1614 if (size < 0) { 1615 throw new IOException("broken archive, entry with negative size"); 1616 } 1617 entry.setSize(size); 1618 } else if (hasCompressedSize) { 1619 z64.setSize(new ZipEightByteInteger(entry.getSize())); 1620 } 1621 1622 if (hasCompressedSize) { 1623 final long size = z64.getCompressedSize().getLongValue(); 1624 if (size < 0) { 1625 throw new IOException("broken archive, entry with negative compressed size"); 1626 } 1627 entry.setCompressedSize(size); 1628 } else if (hasUncompressedSize) { 1629 z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize())); 1630 } 1631 1632 if (hasRelativeHeaderOffset) { 1633 entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 1634 } 1635 1636 if (hasDiskStart) { 1637 entry.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 1638 } 1639 } 1640 } 1641 1642 /** 1643 * Skips the given number of bytes or throws an EOFException if skipping failed. 1644 */ 1645 private void skipBytes(final int count) throws IOException { 1646 final long currentPosition = archive.position(); 1647 final long newPosition = currentPosition + count; 1648 if (newPosition > archive.size()) { 1649 throw new EOFException(); 1650 } 1651 archive.position(newPosition); 1652 } 1653 1654 /** 1655 * Sorts entries in place by offset. 1656 * 1657 * @param allEntries entries to sort 1658 * @return the given entries, sorted. 1659 */ 1660 private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) { 1661 Arrays.sort(allEntries, offsetComparator); 1662 return allEntries; 1663 } 1664 1665 /** 1666 * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive. 1667 */ 1668 private boolean startsWithLocalFileHeader() throws IOException { 1669 archive.position(firstLocalFileHeaderOffset); 1670 wordBbuf.rewind(); 1671 IOUtils.readFully(archive, wordBbuf); 1672 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1673 } 1674}