View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  package org.apache.commons.compress.archivers.zip;
18  
19  import java.io.BufferedInputStream;
20  import java.io.ByteArrayInputStream;
21  import java.io.Closeable;
22  import java.io.EOFException;
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.SequenceInputStream;
27  import java.nio.ByteBuffer;
28  import java.nio.ByteOrder;
29  import java.nio.channels.FileChannel;
30  import java.nio.channels.SeekableByteChannel;
31  import java.nio.charset.Charset;
32  import java.nio.charset.StandardCharsets;
33  import java.nio.file.Files;
34  import java.nio.file.OpenOption;
35  import java.nio.file.Path;
36  import java.nio.file.StandardOpenOption;
37  import java.util.ArrayList;
38  import java.util.Arrays;
39  import java.util.Collections;
40  import java.util.Comparator;
41  import java.util.EnumSet;
42  import java.util.Enumeration;
43  import java.util.HashMap;
44  import java.util.LinkedList;
45  import java.util.List;
46  import java.util.Map;
47  import java.util.Objects;
48  import java.util.stream.Collectors;
49  import java.util.stream.IntStream;
50  import java.util.zip.Inflater;
51  import java.util.zip.ZipException;
52  
53  import org.apache.commons.compress.archivers.EntryStreamOffsets;
54  import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
55  import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
56  import org.apache.commons.compress.utils.BoundedArchiveInputStream;
57  import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
58  import org.apache.commons.compress.utils.IOUtils;
59  import org.apache.commons.compress.utils.InputStreamStatistics;
60  import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
61  import org.apache.commons.io.Charsets;
62  import org.apache.commons.io.FilenameUtils;
63  import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin;
64  import org.apache.commons.io.build.AbstractStreamBuilder;
65  import org.apache.commons.io.input.BoundedInputStream;
66  
67  /**
68   * Replacement for {@link java.util.zip.ZipFile}.
69   * <p>
70   * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a
71   * preamble like the one found in self extracting archives. Furthermore it returns instances of
72   * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@link java.util.zip.ZipEntry}.
73   * </p>
74   * <p>
75   * It doesn't extend {@link java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@link java.util.zip.ZipFile}, it uses
76   * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64
77   * extensions and thus individual entries and archives larger than 4 GB or with more than 65,536 entries.
78   * </p>
79   * <p>
80   * The method signatures mimic the ones of {@link java.util.zip.ZipFile}, with a couple of exceptions:
81   * </p>
82   * <ul>
83   * <li>There is no getName method.</li>
84   * <li>entries has been renamed to getEntries.</li>
85   * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li>
86   * <li>close is allowed to throw IOException.</li>
87   * </ul>
88   */
89  public class ZipFile implements Closeable {
90  
91      /**
92       * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs
93       * significantly faster in concurrent environment.
94       */
95      private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
96          private final FileChannel archive;
97  
98          BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) {
99              super(start, remaining);
100             this.archive = archive;
101         }
102 
103         @Override
104         protected int read(final long pos, final ByteBuffer buf) throws IOException {
105             final int read = archive.read(buf, pos);
106             buf.flip();
107             return read;
108         }
109     }
110 
111     /**
112      * Builds new {@link ZipFile} instances.
113      * <p>
114      * The channel will be opened for reading, assuming the specified encoding for file names.
115      * </p>
116      * <p>
117      * See {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} to read from an in-memory archive.
118      * </p>
119      * <p>
120      * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
121      * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
122      * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
123      * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
124      * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
125      * </p>
126      *
127      * @since 1.26.0
128      */
129     public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> {
130 
131         static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
132 
133         private SeekableByteChannel seekableByteChannel;
134         private boolean useUnicodeExtraFields = true;
135         private boolean ignoreLocalFileHeader;
136         private long maxNumberOfDisks = 1;
137 
138         public Builder() {
139             setCharset(DEFAULT_CHARSET);
140             setCharsetDefault(DEFAULT_CHARSET);
141         }
142 
143         @Override
144         public ZipFile get() throws IOException {
145             final SeekableByteChannel actualChannel;
146             final String actualDescription;
147             if (seekableByteChannel != null) {
148                 actualChannel = seekableByteChannel;
149                 actualDescription = actualChannel.getClass().getSimpleName();
150             } else if (checkOrigin() instanceof ByteArrayOrigin) {
151                 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray());
152                 actualDescription = actualChannel.getClass().getSimpleName();
153             } else {
154                 OpenOption[] openOptions = getOpenOptions();
155                 if (openOptions.length == 0) {
156                     openOptions = new OpenOption[] { StandardOpenOption.READ };
157                 }
158                 final Path path = getPath();
159                 actualChannel = openZipChannel(path, maxNumberOfDisks, openOptions);
160                 actualDescription = path.toString();
161             }
162             final boolean closeOnError = seekableByteChannel != null;
163             return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
164         }
165 
166         /**
167          * Sets whether to ignore information stored inside the local file header.
168          *
169          * @param ignoreLocalFileHeader whether to ignore information stored inside.
170          * @return {@code this} instance.
171          */
172         public Builder setIgnoreLocalFileHeader(final boolean ignoreLocalFileHeader) {
173             this.ignoreLocalFileHeader = ignoreLocalFileHeader;
174             return this;
175         }
176 
177         /**
178          * Sets max number of multi archive disks, default is 1 (no multi archive).
179          *
180          * @param maxNumberOfDisks max number of multi archive disks.
181          *
182          * @return {@code this} instance.
183          */
184         public Builder setMaxNumberOfDisks(final long maxNumberOfDisks) {
185             this.maxNumberOfDisks = maxNumberOfDisks;
186             return this;
187         }
188 
189         /**
190          * The actual channel, overrides any other input aspects like a File, Path, and so on.
191          *
192          * @param seekableByteChannel The actual channel.
193          * @return {@code this} instance.
194          */
195         public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) {
196             this.seekableByteChannel = seekableByteChannel;
197             return this;
198         }
199 
200         /**
201          * Sets whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
202          *
203          * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
204          * @return {@code this} instance.
205          */
206         public Builder setUseUnicodeExtraFields(final boolean useUnicodeExtraFields) {
207             this.useUnicodeExtraFields = useUnicodeExtraFields;
208             return this;
209         }
210 
211     }
212 
213     /**
214      * Extends ZipArchiveEntry to store the offset within the archive.
215      */
216     private static final class Entry extends ZipArchiveEntry {
217 
218         @Override
219         public boolean equals(final Object other) {
220             if (super.equals(other)) {
221                 // super.equals would return false if other were not an Entry
222                 final Entry otherEntry = (Entry) other;
223                 return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset()
224                         && super.getDiskNumberStart() == otherEntry.getDiskNumberStart();
225             }
226             return false;
227         }
228 
229         @Override
230         public int hashCode() {
231             return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32);
232         }
233     }
234 
235     private static final class NameAndComment {
236         private final byte[] name;
237         private final byte[] comment;
238 
239         private NameAndComment(final byte[] name, final byte[] comment) {
240             this.name = name;
241             this.comment = comment;
242         }
243     }
244 
245     private static final class StoredStatisticsStream extends BoundedInputStream implements InputStreamStatistics {
246         StoredStatisticsStream(final InputStream in) {
247             super(in);
248         }
249 
250         @Override
251         public long getCompressedCount() {
252             return super.getCount();
253         }
254 
255         @Override
256         public long getUncompressedCount() {
257             return getCompressedCount();
258         }
259     }
260 
261     private static final String DEFAULT_CHARSET_NAME = StandardCharsets.UTF_8.name();
262 
263     private static final EnumSet<StandardOpenOption> READ = EnumSet.of(StandardOpenOption.READ);
264 
265     private static final int HASH_SIZE = 509;
266     static final int NIBLET_MASK = 0x0f;
267     static final int BYTE_SHIFT = 8;
268     private static final int POS_0 = 0;
269     private static final int POS_1 = 1;
270     private static final int POS_2 = 2;
271     private static final int POS_3 = 3;
272     private static final byte[] ONE_ZERO_BYTE = new byte[1];
273 
274     /**
275      * Length of a "central directory" entry structure without file name, extra fields or comment.
276      */
277     private static final int CFH_LEN =
278     // @formatter:off
279         /* version made by                 */ ZipConstants.SHORT
280         /* version needed to extract       */ + ZipConstants.SHORT
281         /* general purpose bit flag        */ + ZipConstants.SHORT
282         /* compression method              */ + ZipConstants.SHORT
283         /* last mod file time              */ + ZipConstants.SHORT
284         /* last mod file date              */ + ZipConstants.SHORT
285         /* crc-32                          */ + ZipConstants.WORD
286         /* compressed size                 */ + ZipConstants.WORD
287         /* uncompressed size               */ + ZipConstants.WORD
288         /* file name length                */ + ZipConstants. SHORT
289         /* extra field length              */ + ZipConstants.SHORT
290         /* file comment length             */ + ZipConstants.SHORT
291         /* disk number start               */ + ZipConstants.SHORT
292         /* internal file attributes        */ + ZipConstants.SHORT
293         /* external file attributes        */ + ZipConstants.WORD
294         /* relative offset of local header */ + ZipConstants.WORD;
295     // @formatter:on
296 
297     private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
298 
299     /**
300      * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment.
301      */
302     static final int MIN_EOCD_SIZE =
303     // @formatter:off
304         /* end of central dir signature    */ ZipConstants.WORD
305         /* number of this disk             */ + ZipConstants.SHORT
306         /* number of the disk with the     */
307         /* start of the central directory  */ + ZipConstants.SHORT
308         /* total number of entries in      */
309         /* the central dir on this disk    */ + ZipConstants.SHORT
310         /* total number of entries in      */
311         /* the central dir                 */ + ZipConstants.SHORT
312         /* size of the central directory   */ + ZipConstants.WORD
313         /* offset of start of central      */
314         /* directory with respect to       */
315         /* the starting disk number        */ + ZipConstants.WORD
316         /* ZIP file comment length         */ + ZipConstants.SHORT;
317     // @formatter:on
318 
319     /**
320      * Maximum length of the "End of central directory record" with a file comment.
321      */
322     private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
323     // @formatter:off
324         /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT;
325     // @formatter:on
326 
327     /**
328      * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of
329      * the "End of central directory record".
330      */
331     private static final int CFD_LENGTH_OFFSET =
332     // @formatter:off
333         /* end of central dir signature    */ ZipConstants.WORD
334         /* number of this disk             */ + ZipConstants.SHORT
335         /* number of the disk with the     */
336         /* start of the central directory  */ + ZipConstants.SHORT
337         /* total number of entries in      */
338         /* the central dir on this disk    */ + ZipConstants.SHORT
339         /* total number of entries in      */
340         /* the central dir                 */ + ZipConstants.SHORT;
341     // @formatter:on
342 
343     /**
344      * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of
345      * the "End of central directory record".
346      */
347     private static final int CFD_DISK_OFFSET =
348     // @formatter:off
349             /* end of central dir signature    */ ZipConstants.WORD
350             /* number of this disk             */ + ZipConstants.SHORT;
351     // @formatter:on
352 
353     /**
354      * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of
355      * the disk with the start of the central directory".
356      */
357     private static final int CFD_LOCATOR_RELATIVE_OFFSET =
358     // @formatter:off
359             /* total number of entries in      */
360             /* the central dir on this disk    */ + ZipConstants.SHORT
361             /* total number of entries in      */
362             /* the central dir                 */ + ZipConstants.SHORT
363             /* size of the central directory   */ + ZipConstants.WORD;
364     // @formatter:on
365 
366     /**
367      * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at
368      * all.
369      */
370     private static final int ZIP64_EOCDL_LENGTH =
371     // @formatter:off
372         /* zip64 end of central dir locator sig */ ZipConstants.WORD
373         /* number of the disk with the start    */
374         /* start of the zip64 end of            */
375         /* central directory                    */ + ZipConstants.WORD
376         /* relative offset of the zip64         */
377         /* end of central directory record      */ + ZipConstants.DWORD
378         /* total number of disks                */ + ZipConstants.WORD;
379     // @formatter:on
380 
381     /**
382      * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative
383      * to the start of the "Zip64 end of central directory locator".
384      */
385     private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
386     // @formatter:off
387         /* zip64 end of central dir locator sig */ ZipConstants.WORD
388         /* number of the disk with the start    */
389         /* start of the zip64 end of            */
390         /* central directory                    */ + ZipConstants.WORD;
391     // @formatter:on
392 
393     /**
394      * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start
395      * of the "Zip64 end of central directory record".
396      */
397     private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
398     // @formatter:off
399         /* zip64 end of central dir        */
400         /* signature                       */ ZipConstants.WORD
401         /* size of zip64 end of central    */
402         /* directory record                */ + ZipConstants.DWORD
403         /* version made by                 */ + ZipConstants.SHORT
404         /* version needed to extract       */ + ZipConstants.SHORT
405         /* number of this disk             */ + ZipConstants.WORD
406         /* number of the disk with the     */
407         /* start of the central directory  */ + ZipConstants.WORD
408         /* total number of entries in the  */
409         /* central directory on this disk  */ + ZipConstants.DWORD
410         /* total number of entries in the  */
411         /* central directory               */ + ZipConstants.DWORD
412         /* size of the central directory   */ + ZipConstants.DWORD;
413     // @formatter:on
414 
415     /**
416      * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the
417      * start of the "Zip64 end of central directory record".
418      */
419     private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
420     // @formatter:off
421             /* zip64 end of central dir        */
422             /* signature                       */ ZipConstants.WORD
423             /* size of zip64 end of central    */
424             /* directory record                */ + ZipConstants.DWORD
425             /* version made by                 */ + ZipConstants.SHORT
426             /* version needed to extract       */ + ZipConstants.SHORT
427             /* number of this disk             */ + ZipConstants.WORD;
428     // @formatter:on
429 
430     /**
431      * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the
432      * "number of the disk with the start of the central directory".
433      */
434     private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
435     // @formatter:off
436             /* total number of entries in the  */
437             /* central directory on this disk  */ ZipConstants.DWORD
438             /* total number of entries in the  */
439             /* central directory               */ + ZipConstants.DWORD
440             /* size of the central directory   */ + ZipConstants.DWORD;
441     // @formatter:on
442 
443     /**
444      * Number of bytes in local file header up to the &quot;length of file name&quot; entry.
445      */
446     private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
447     // @formatter:off
448         /* local file header signature     */ ZipConstants.WORD
449         /* version needed to extract       */ + ZipConstants.SHORT
450         /* general purpose bit flag        */ + ZipConstants.SHORT
451         /* compression method              */ + ZipConstants.SHORT
452         /* last mod file time              */ + ZipConstants.SHORT
453         /* last mod file date              */ + ZipConstants.SHORT
454         /* crc-32                          */ + ZipConstants.WORD
455         /* compressed size                 */ + ZipConstants.WORD
456         /* uncompressed size               */ + (long) ZipConstants.WORD;
457     // @formatter:on
458 
459     /**
460      * Compares two ZipArchiveEntries based on their offset within the archive.
461      * <p>
462      * Won't return any meaningful results if one of the entries isn't part of the archive at all.
463      * </p>
464      *
465      * @since 1.1
466      */
467     private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
468             .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
469 
470     /**
471      * Creates a new Builder.
472      *
473      * @return a new Builder.
474      * @since 1.26.0
475      */
476     public static Builder builder() {
477         return new Builder();
478     }
479 
480     /**
481      * Closes a ZIP file quietly; throwing no IOException, does nothing on null input.
482      *
483      * @param zipFile file to close, can be null
484      */
485     public static void closeQuietly(final ZipFile zipFile) {
486         org.apache.commons.io.IOUtils.closeQuietly(zipFile);
487     }
488 
489     /**
490      * Creates a new SeekableByteChannel for reading.
491      *
492      * @param path the path to the file to open or create
493      * @return a new seekable byte channel
494      * @throws IOException if an I/O error occurs
495      */
496     private static SeekableByteChannel newReadByteChannel(final Path path) throws IOException {
497         return Files.newByteChannel(path, READ);
498     }
499 
500     private static SeekableByteChannel openZipChannel(final Path path, final long maxNumberOfDisks, final OpenOption[] openOptions) throws IOException {
501         final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ);
502         final List<FileChannel> channels = new ArrayList<>();
503         try {
504             final boolean is64 = positionAtEndOfCentralDirectoryRecord(channel);
505             long numberOfDisks;
506             if (is64) {
507                 channel.position(channel.position() + ZipConstants.WORD + ZipConstants.WORD + ZipConstants.DWORD);
508                 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.WORD);
509                 buf.order(ByteOrder.LITTLE_ENDIAN);
510                 IOUtils.readFully(channel, buf);
511                 buf.flip();
512                 numberOfDisks = buf.getInt() & 0xffffffffL;
513             } else {
514                 channel.position(channel.position() + ZipConstants.WORD);
515                 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.SHORT);
516                 buf.order(ByteOrder.LITTLE_ENDIAN);
517                 IOUtils.readFully(channel, buf);
518                 buf.flip();
519                 numberOfDisks = (buf.getShort() & 0xffff) + 1;
520             }
521             if (numberOfDisks > Math.min(maxNumberOfDisks, Integer.MAX_VALUE)) {
522                 throw new IOException("Too many disks for zip archive, max=" + Math.min(maxNumberOfDisks, Integer.MAX_VALUE) + " actual=" + numberOfDisks);
523             }
524 
525             if (numberOfDisks <= 1) {
526                 return channel;
527             }
528             channel.close();
529 
530             final Path parent = path.getParent();
531             final String basename = FilenameUtils.removeExtension(Objects.toString(path.getFileName(), null));
532 
533             return ZipSplitReadOnlySeekableByteChannel.forPaths(IntStream.range(0, (int) numberOfDisks).mapToObj(i -> {
534                 if (i == numberOfDisks - 1) {
535                     return path;
536                 }
537                 final Path lowercase = parent.resolve(String.format("%s.z%02d", basename, i + 1));
538                 if (Files.exists(lowercase)) {
539                     return lowercase;
540                 }
541                 final Path uppercase = parent.resolve(String.format("%s.Z%02d", basename, i + 1));
542                 if (Files.exists(uppercase)) {
543                     return uppercase;
544                 }
545                 return lowercase;
546             }).collect(Collectors.toList()), openOptions);
547         } catch (final Throwable ex) {
548             org.apache.commons.io.IOUtils.closeQuietly(channel);
549             channels.forEach(org.apache.commons.io.IOUtils::closeQuietly);
550             throw ex;
551         }
552     }
553 
554     /**
555      * Searches for the and positions the stream at the start of the &quot;End of central dir record&quot;.
556      *
557      * @return true if it's Zip64 end of central directory or false if it's Zip32
558      */
559     private static boolean positionAtEndOfCentralDirectoryRecord(final SeekableByteChannel channel) throws IOException {
560         final boolean found = tryToLocateSignature(channel, MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG);
561         if (!found) {
562             throw new ZipException("Archive is not a ZIP archive");
563         }
564         boolean found64 = false;
565         final long position = channel.position();
566         if (position > ZIP64_EOCDL_LENGTH) {
567             final ByteBuffer wordBuf = ByteBuffer.allocate(4);
568             channel.position(channel.position() - ZIP64_EOCDL_LENGTH);
569             wordBuf.rewind();
570             IOUtils.readFully(channel, wordBuf);
571             wordBuf.flip();
572             found64 = wordBuf.equals(ByteBuffer.wrap(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG));
573             if (!found64) {
574                 channel.position(position);
575             } else {
576                 channel.position(channel.position() - ZipConstants.WORD);
577             }
578         }
579 
580         return found64;
581     }
582 
583     /**
584      * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has
585      * been found.
586      */
587     private static boolean tryToLocateSignature(final SeekableByteChannel channel, final long minDistanceFromEnd, final long maxDistanceFromEnd,
588             final byte[] sig) throws IOException {
589         final ByteBuffer wordBuf = ByteBuffer.allocate(ZipConstants.WORD);
590         boolean found = false;
591         long off = channel.size() - minDistanceFromEnd;
592         final long stopSearching = Math.max(0L, channel.size() - maxDistanceFromEnd);
593         if (off >= 0) {
594             for (; off >= stopSearching; off--) {
595                 channel.position(off);
596                 try {
597                     wordBuf.rewind();
598                     IOUtils.readFully(channel, wordBuf);
599                     wordBuf.flip();
600                 } catch (final EOFException ex) { // NOSONAR
601                     break;
602                 }
603                 int curr = wordBuf.get();
604                 if (curr == sig[POS_0]) {
605                     curr = wordBuf.get();
606                     if (curr == sig[POS_1]) {
607                         curr = wordBuf.get();
608                         if (curr == sig[POS_2]) {
609                             curr = wordBuf.get();
610                             if (curr == sig[POS_3]) {
611                                 found = true;
612                                 break;
613                             }
614                         }
615                     }
616                 }
617             }
618         }
619         if (found) {
620             channel.position(off);
621         }
622         return found;
623     }
624 
625     /**
626      * List of entries in the order they appear inside the central directory.
627      */
628     private final List<ZipArchiveEntry> entries = new LinkedList<>();
629 
630     /**
631      * Maps String to list of ZipArchiveEntrys, name -> actual entries.
632      */
633     private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE);
634 
635     /**
636      * The encoding to use for file names and the file comment.
637      * <p>
638      * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>.
639      * Defaults to UTF-8.
640      * </p>
641      */
642     private final Charset encoding;
643 
644     /**
645      * The ZIP encoding to use for file names and the file comment.
646      */
647     private final ZipEncoding zipEncoding;
648 
649     /**
650      * The actual data source.
651      */
652     private final SeekableByteChannel archive;
653 
654     /**
655      * Whether to look for and use Unicode extra fields.
656      */
657     private final boolean useUnicodeExtraFields;
658 
659     /**
660      * Whether the file is closed.
661      */
662     private volatile boolean closed = true;
663 
664     /**
665      * Whether the ZIP archive is a split ZIP archive
666      */
667     private final boolean isSplitZipArchive;
668 
669     // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
670     private final byte[] dwordBuf = new byte[ZipConstants.DWORD];
671 
672     private final byte[] wordBuf = new byte[ZipConstants.WORD];
673 
674     private final byte[] cfhBuf = new byte[CFH_LEN];
675 
676     private final byte[] shortBuf = new byte[ZipConstants.SHORT];
677 
678     private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
679 
680     private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
681 
682     private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
683 
684     private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
685 
686     private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
687 
688     private long centralDirectoryStartOffset;
689 
690     private long firstLocalFileHeaderOffset;
691 
692     /**
693      * Opens the given file for reading, assuming "UTF8" for file names.
694      *
695      * @param file the archive.
696      *
697      * @throws IOException if an error occurs while reading the file.
698      * @deprecated Use {@link Builder#get()}.
699      */
700     @Deprecated
701     public ZipFile(final File file) throws IOException {
702         this(file, DEFAULT_CHARSET_NAME);
703     }
704 
705     /**
706      * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
707      *
708      * @param file     the archive.
709      * @param encoding the encoding to use for file names, use null for the platform's default encoding
710      * @throws IOException if an error occurs while reading the file.
711      * @deprecated Use {@link Builder#get()}.
712      */
713     @Deprecated
714     public ZipFile(final File file, final String encoding) throws IOException {
715         this(file.toPath(), encoding, true);
716     }
717 
718     /**
719      * Opens the given file for reading, assuming the specified encoding for file names.
720      *
721      * @param file                  the archive.
722      * @param encoding              the encoding to use for file names, use null for the platform's default encoding
723      * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
724      * @throws IOException if an error occurs while reading the file.
725      * @deprecated Use {@link Builder#get()}.
726      */
727     @Deprecated
728     public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
729         this(file.toPath(), encoding, useUnicodeExtraFields, false);
730     }
731 
732     /**
733      * Opens the given file for reading, assuming the specified encoding for file names.
734      * <p>
735      * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
736      * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
737      * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
738      * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
739      * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
740      * </p>
741      *
742      * @param file                  the archive.
743      * @param encoding              the encoding to use for file names, use null for the platform's default encoding
744      * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
745      * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
746      * @throws IOException if an error occurs while reading the file.
747      * @since 1.19
748      * @deprecated Use {@link Builder#get()}.
749      */
750     @Deprecated
751     @SuppressWarnings("resource") // Caller closes
752     public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
753         this(newReadByteChannel(file.toPath()), file.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
754     }
755 
756     /**
757      * Opens the given path for reading, assuming "UTF-8" for file names.
758      *
759      * @param path path to the archive.
760      * @throws IOException if an error occurs while reading the file.
761      * @since 1.22
762      * @deprecated Use {@link Builder#get()}.
763      */
764     @Deprecated
765     public ZipFile(final Path path) throws IOException {
766         this(path, DEFAULT_CHARSET_NAME);
767     }
768 
769     /**
770      * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
771      *
772      * @param path     path to the archive.
773      * @param encoding the encoding to use for file names, use null for the platform's default encoding
774      * @throws IOException if an error occurs while reading the file.
775      * @since 1.22
776      * @deprecated Use {@link Builder#get()}.
777      */
778     @Deprecated
779     public ZipFile(final Path path, final String encoding) throws IOException {
780         this(path, encoding, true);
781     }
782 
783     /**
784      * Opens the given path for reading, assuming the specified encoding for file names.
785      *
786      * @param path                  path to the archive.
787      * @param encoding              the encoding to use for file names, use null for the platform's default encoding
788      * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
789      * @throws IOException if an error occurs while reading the file.
790      * @since 1.22
791      * @deprecated Use {@link Builder#get()}.
792      */
793     @Deprecated
794     public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
795         this(path, encoding, useUnicodeExtraFields, false);
796     }
797 
798     /**
799      * Opens the given path for reading, assuming the specified encoding for file names.
800      * <p>
801      * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
802      * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
803      * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
804      * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
805      * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
806      * </p>
807      *
808      * @param path                  path to the archive.
809      * @param encoding              the encoding to use for file names, use null for the platform's default encoding
810      * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
811      * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
812      * @throws IOException if an error occurs while reading the file.
813      * @since 1.22
814      * @deprecated Use {@link Builder#get()}.
815      */
816     @SuppressWarnings("resource") // Caller closes
817     @Deprecated
818     public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
819         this(newReadByteChannel(path), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
820     }
821 
822     /**
823      * Opens the given channel for reading, assuming "UTF-8" for file names.
824      * <p>
825      * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
826      * </p>
827      *
828      * @param channel the archive.
829      *
830      * @throws IOException if an error occurs while reading the file.
831      * @since 1.13
832      * @deprecated Use {@link Builder#get()}.
833      */
834     @Deprecated
835     public ZipFile(final SeekableByteChannel channel) throws IOException {
836         this(channel, "a SeekableByteChannel", DEFAULT_CHARSET_NAME, true);
837     }
838 
839     /**
840      * Opens the given channel for reading, assuming the specified encoding for file names.
841      * <p>
842      * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
843      * </p>
844      *
845      * @param channel  the archive.
846      * @param encoding the encoding to use for file names, use null for the platform's default encoding
847      * @throws IOException if an error occurs while reading the file.
848      * @since 1.13
849      * @deprecated Use {@link Builder#get()}.
850      */
851     @Deprecated
852     public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException {
853         this(channel, "a SeekableByteChannel", encoding, true);
854     }
855 
856     private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields,
857             final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
858         this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
859         this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET);
860         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
861         this.useUnicodeExtraFields = useUnicodeExtraFields;
862         this.archive = channel;
863         boolean success = false;
864         try {
865             final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
866             if (!ignoreLocalFileHeader) {
867                 resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
868             }
869             fillNameMap();
870             success = true;
871         } catch (final IOException e) {
872             throw new IOException("Error reading Zip content from " + channelDescription, e);
873         } finally {
874             this.closed = !success;
875             if (!success && closeOnError) {
876                 org.apache.commons.io.IOUtils.closeQuietly(archive);
877             }
878         }
879     }
880 
881     /**
882      * Opens the given channel for reading, assuming the specified encoding for file names.
883      * <p>
884      * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
885      * </p>
886      *
887      * @param channel               the archive.
888      * @param channelDescription    description of the archive, used for error messages only.
889      * @param encoding              the encoding to use for file names, use null for the platform's default encoding
890      * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
891      * @throws IOException if an error occurs while reading the file.
892      * @since 1.13
893      * @deprecated Use {@link Builder#get()}.
894      */
895     @Deprecated
896     public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields)
897             throws IOException {
898         this(channel, channelDescription, encoding, useUnicodeExtraFields, false, false);
899     }
900 
901     /**
902      * Opens the given channel for reading, assuming the specified encoding for file names.
903      * <p>
904      * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
905      * </p>
906      * <p>
907      * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
908      * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
909      * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
910      * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
911      * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
912      * </p>
913      *
914      * @param channel               the archive.
915      * @param channelDescription    description of the archive, used for error messages only.
916      * @param encoding              the encoding to use for file names, use null for the platform's default encoding
917      * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
918      * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
919      * @throws IOException if an error occurs while reading the file.
920      * @since 1.19
921      * @deprecated Use {@link Builder#get()}.
922      */
923     @Deprecated
924     public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
925             final boolean ignoreLocalFileHeader) throws IOException {
926         this(channel, channelDescription, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
927     }
928 
929     private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
930             final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
931         this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
932     }
933 
934     /**
935      * Opens the given file for reading, assuming "UTF-8".
936      *
937      * @param name name of the archive.
938      * @throws IOException if an error occurs while reading the file.
939      * @deprecated Use {@link Builder#get()}.
940      */
941     @Deprecated
942     public ZipFile(final String name) throws IOException {
943         this(new File(name).toPath(), DEFAULT_CHARSET_NAME);
944     }
945 
946     /**
947      * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields.
948      *
949      * @param name     name of the archive.
950      * @param encoding the encoding to use for file names, use null for the platform's default encoding
951      * @throws IOException if an error occurs while reading the file.
952      * @deprecated Use {@link Builder#get()}.
953      */
954     @Deprecated
955     public ZipFile(final String name, final String encoding) throws IOException {
956         this(new File(name).toPath(), encoding, true);
957     }
958 
959     /**
960      * Whether this class is able to read the given entry.
961      * <p>
962      * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet.
963      * </p>
964      *
965      * @since 1.1
966      * @param entry the entry
967      * @return whether this class is able to read the given entry.
968      */
969     public boolean canReadEntryData(final ZipArchiveEntry entry) {
970         return ZipUtil.canHandleEntryData(entry);
971     }
972 
973     /**
974      * Closes the archive.
975      *
976      * @throws IOException if an error occurs closing the archive.
977      */
978     @Override
979     public void close() throws IOException {
980         // this flag is only written here and read in finalize() which
981         // can never be run in parallel.
982         // no synchronization needed.
983         closed = true;
984         archive.close();
985     }
986 
987     /**
988      * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file.
989      * <p>
990      * This method transfers entries based on the central directory of the ZIP file.
991      * </p>
992      *
993      * @param target    The zipArchiveOutputStream to write the entries to
994      * @param predicate A predicate that selects which entries to write
995      * @throws IOException on error
996      */
997     public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException {
998         final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
999         while (src.hasMoreElements()) {
1000             final ZipArchiveEntry entry = src.nextElement();
1001             if (predicate.test(entry)) {
1002                 target.addRawArchiveEntry(entry, getRawInputStream(entry));
1003             }
1004         }
1005     }
1006 
1007     /**
1008      * Creates new BoundedInputStream, according to implementation of underlying archive channel.
1009      */
1010     private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
1011         if (start < 0 || remaining < 0 || start + remaining < start) {
1012             throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range");
1013         }
1014         return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive)
1015                 : new BoundedSeekableByteChannelInputStream(start, remaining, archive);
1016     }
1017 
1018     private void fillNameMap() {
1019         entries.forEach(ze -> {
1020             // entries are filled in populateFromCentralDirectory and
1021             // never modified
1022             final String name = ze.getName();
1023             final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
1024             entriesOfThatName.addLast(ze);
1025         });
1026     }
1027 
1028     /**
1029      * Ensures that the close method of this ZIP file is called when there are no more references to it.
1030      *
1031      * @see #close()
1032      */
1033     @Override
1034     protected void finalize() throws Throwable {
1035         try {
1036             if (!closed) {
1037                 close();
1038             }
1039         } finally {
1040             super.finalize();
1041         }
1042     }
1043 
1044     /**
1045      * Gets an InputStream for reading the content before the first local file header.
1046      *
1047      * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file
1048      *         header.
1049      * @since 1.23
1050      */
1051     public InputStream getContentBeforeFirstLocalFileHeader() {
1052         return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset);
1053     }
1054 
1055     private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
1056         final long s = ze.getDataOffset();
1057         if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
1058             setDataOffset(ze);
1059             return ze.getDataOffset();
1060         }
1061         return s;
1062     }
1063 
1064     /**
1065      * Gets the encoding to use for file names and the file comment.
1066      *
1067      * @return null if using the platform's default character encoding.
1068      */
1069     public String getEncoding() {
1070         return encoding.name();
1071     }
1072 
1073     /**
1074      * Gets all entries.
1075      * <p>
1076      * Entries will be returned in the same order they appear within the archive's central directory.
1077      * </p>
1078      *
1079      * @return all entries as {@link ZipArchiveEntry} instances
1080      */
1081     public Enumeration<ZipArchiveEntry> getEntries() {
1082         return Collections.enumeration(entries);
1083     }
1084 
1085     /**
1086      * Gets all named entries in the same order they appear within the archive's central directory.
1087      *
1088      * @param name name of the entry.
1089      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
1090      * @since 1.6
1091      */
1092     public Iterable<ZipArchiveEntry> getEntries(final String name) {
1093         return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1094     }
1095 
1096     /**
1097      * Gets all entries in physical order.
1098      * <p>
1099      * Entries will be returned in the same order their contents appear within the archive.
1100      * </p>
1101      *
1102      * @return all entries as {@link ZipArchiveEntry} instances
1103      *
1104      * @since 1.1
1105      */
1106     public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
1107         final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY);
1108         return Collections.enumeration(Arrays.asList(sortByOffset(allEntries)));
1109     }
1110 
1111     /**
1112      * Gets all named entries in the same order their contents appear within the archive.
1113      *
1114      * @param name name of the entry.
1115      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
1116      * @since 1.6
1117      */
1118     public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
1119         final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1120         return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY)));
1121     }
1122 
1123     /**
1124      * Gets a named entry or {@code null} if no entry by that name exists.
1125      * <p>
1126      * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned.
1127      * </p>
1128      *
1129      * @param name name of the entry.
1130      * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present.
1131      */
1132     public ZipArchiveEntry getEntry(final String name) {
1133         final LinkedList<ZipArchiveEntry> entries = nameMap.get(name);
1134         return entries != null ? entries.getFirst() : null;
1135     }
1136 
1137     /**
1138      * Gets the offset of the first local file header in the file.
1139      *
1140      * @return the length of the content before the first local file header
1141      * @since 1.23
1142      */
1143     public long getFirstLocalFileHeaderOffset() {
1144         return firstLocalFileHeaderOffset;
1145     }
1146 
1147     /**
1148      * Gets an InputStream for reading the contents of the given entry.
1149      *
1150      * @param entry the entry to get the stream for.
1151      * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}.
1152      * @throws IOException if unable to create an input stream from the zipEntry.
1153      */
1154     public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException {
1155         if (!(entry instanceof Entry)) {
1156             return null;
1157         }
1158         // cast validity is checked just above
1159         ZipUtil.checkRequestedFeatures(entry);
1160 
1161         // doesn't get closed if the method is not supported - which
1162         // should never happen because of the checkRequestedFeatures
1163         // call above
1164         final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR
1165         switch (ZipMethod.getMethodByCode(entry.getMethod())) {
1166         case STORED:
1167             return new StoredStatisticsStream(is);
1168         case UNSHRINKING:
1169             return new UnshrinkingInputStream(is);
1170         case IMPLODING:
1171             try {
1172                 return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(),
1173                         entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
1174             } catch (final IllegalArgumentException ex) {
1175                 throw new IOException("bad IMPLODE data", ex);
1176             }
1177         case DEFLATED:
1178             final Inflater inflater = new Inflater(true);
1179             // Inflater with nowrap=true has this odd contract for a zero padding
1180             // byte following the data stream; this used to be zlib's requirement
1181             // and has been fixed a long time ago, but the contract persists so
1182             // we comply.
1183             // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
1184             return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) {
1185                 @Override
1186                 public void close() throws IOException {
1187                     try {
1188                         super.close();
1189                     } finally {
1190                         inflater.end();
1191                     }
1192                 }
1193             };
1194         case BZIP2:
1195             return new BZip2CompressorInputStream(is);
1196         case ENHANCED_DEFLATED:
1197             return new Deflate64CompressorInputStream(is);
1198         case AES_ENCRYPTED:
1199         case EXPANDING_LEVEL_1:
1200         case EXPANDING_LEVEL_2:
1201         case EXPANDING_LEVEL_3:
1202         case EXPANDING_LEVEL_4:
1203         case JPEG:
1204         case LZMA:
1205         case PKWARE_IMPLODING:
1206         case PPMD:
1207         case TOKENIZATION:
1208         case UNKNOWN:
1209         case WAVPACK:
1210         case XZ:
1211         default:
1212             throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry);
1213         }
1214     }
1215 
1216     /**
1217      * Gets the raw stream of the archive entry (compressed form).
1218      * <p>
1219      * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else.
1220      * </p>
1221      * <p>
1222      * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was
1223      * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason.
1224      * </p>
1225      *
1226      * @param entry The entry to get the stream for
1227      * @return The raw input stream containing (possibly) compressed data.
1228      * @since 1.11
1229      * @throws IOException if there is a problem reading data offset (added in version 1.22).
1230      */
1231     public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException {
1232         if (!(entry instanceof Entry)) {
1233             return null;
1234         }
1235         final long start = getDataOffset(entry);
1236         if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
1237             return null;
1238         }
1239         return createBoundedInputStream(start, entry.getCompressedSize());
1240     }
1241 
1242     /**
1243      * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null.
1244      * <p>
1245      * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile.
1246      * </p>
1247      *
1248      * @param entry ZipArchiveEntry object that represents the symbolic link
1249      * @return entry's content as a String
1250      * @throws IOException problem with content's input stream
1251      * @since 1.5
1252      */
1253     public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
1254         if (entry != null && entry.isUnixSymlink()) {
1255             try (InputStream in = getInputStream(entry)) {
1256                 return zipEncoding.decode(org.apache.commons.io.IOUtils.toByteArray(in));
1257             }
1258         }
1259         return null;
1260     }
1261 
1262     /**
1263      * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances.
1264      * <p>
1265      * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or
1266      * additional data to be read.
1267      * </p>
1268      *
1269      * @return a map of zip entries that didn't have the language encoding flag set when read.
1270      */
1271     private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException {
1272         final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>();
1273 
1274         positionAtCentralDirectory();
1275         centralDirectoryStartOffset = archive.position();
1276 
1277         wordBbuf.rewind();
1278         IOUtils.readFully(archive, wordBbuf);
1279         long sig = ZipLong.getValue(wordBuf);
1280 
1281         if (sig != CFH_SIG && startsWithLocalFileHeader()) {
1282             throw new IOException("Central directory is empty, can't expand" + " corrupt archive.");
1283         }
1284 
1285         while (sig == CFH_SIG) {
1286             readCentralDirectoryEntry(noUTF8Flag);
1287             wordBbuf.rewind();
1288             IOUtils.readFully(archive, wordBbuf);
1289             sig = ZipLong.getValue(wordBuf);
1290         }
1291         return noUTF8Flag;
1292     }
1293 
1294     /**
1295      * Searches for either the &quot;Zip64 end of central directory locator&quot; or the &quot;End of central dir record&quot;, parses it and positions the
1296      * stream at the first central directory record.
1297      */
1298     private void positionAtCentralDirectory() throws IOException {
1299         final boolean is64 = positionAtEndOfCentralDirectoryRecord(archive);
1300         if (!is64) {
1301             positionAtCentralDirectory32();
1302         } else {
1303             positionAtCentralDirectory64();
1304         }
1305     }
1306 
1307     /**
1308      * Parses the &quot;End of central dir record&quot; and positions the stream at the first central directory record.
1309      *
1310      * Expects stream to be positioned at the beginning of the &quot;End of central dir record&quot;.
1311      */
1312     private void positionAtCentralDirectory32() throws IOException {
1313         final long endOfCentralDirectoryRecordOffset = archive.position();
1314         if (isSplitZipArchive) {
1315             skipBytes(CFD_DISK_OFFSET);
1316             shortBbuf.rewind();
1317             IOUtils.readFully(archive, shortBbuf);
1318             centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1319 
1320             skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1321 
1322             wordBbuf.rewind();
1323             IOUtils.readFully(archive, wordBbuf);
1324             centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1325             ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1326         } else {
1327             skipBytes(CFD_LENGTH_OFFSET);
1328             wordBbuf.rewind();
1329             IOUtils.readFully(archive, wordBbuf);
1330             final long centralDirectoryLength = ZipLong.getValue(wordBuf);
1331 
1332             wordBbuf.rewind();
1333             IOUtils.readFully(archive, wordBbuf);
1334             centralDirectoryStartDiskNumber = 0;
1335             centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1336 
1337             firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L);
1338             archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset);
1339         }
1340     }
1341 
1342     /**
1343      * Parses the &quot;Zip64 end of central directory locator&quot;, finds the &quot;Zip64 end of central directory record&quot; using the parsed information,
1344      * parses that and positions the stream at the first central directory record.
1345      *
1346      * Expects stream to be positioned right behind the &quot;Zip64 end of central directory locator&quot;'s signature.
1347      */
1348     private void positionAtCentralDirectory64() throws IOException {
1349         skipBytes(ZipConstants.WORD);
1350         if (isSplitZipArchive) {
1351             wordBbuf.rewind();
1352             IOUtils.readFully(archive, wordBbuf);
1353             final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1354 
1355             dwordBbuf.rewind();
1356             IOUtils.readFully(archive, dwordBbuf);
1357             final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1358             ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1359         } else {
1360             skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1361             dwordBbuf.rewind();
1362             IOUtils.readFully(archive, dwordBbuf);
1363             archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1364         }
1365 
1366         wordBbuf.rewind();
1367         IOUtils.readFully(archive, wordBbuf);
1368         if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1369             throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt.");
1370         }
1371 
1372         if (isSplitZipArchive) {
1373             skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */);
1374             wordBbuf.rewind();
1375             IOUtils.readFully(archive, wordBbuf);
1376             centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1377 
1378             skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1379 
1380             dwordBbuf.rewind();
1381             IOUtils.readFully(archive, dwordBbuf);
1382             centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1383             ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1384         } else {
1385             skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1386             dwordBbuf.rewind();
1387             IOUtils.readFully(archive, dwordBbuf);
1388             centralDirectoryStartDiskNumber = 0;
1389             centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1390             archive.position(centralDirectoryStartRelativeOffset);
1391         }
1392     }
1393 
1394     /**
1395      * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps.
1396      *
1397      * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header
1398      *                   later. The current entry may be added to this map.
1399      */
1400     private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException {
1401         cfhBbuf.rewind();
1402         IOUtils.readFully(archive, cfhBbuf);
1403         int off = 0;
1404         final Entry ze = new Entry();
1405 
1406         final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
1407         off += ZipConstants.SHORT;
1408         ze.setVersionMadeBy(versionMadeBy);
1409         ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK);
1410 
1411         ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
1412         off += ZipConstants.SHORT; // version required
1413 
1414         final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
1415         final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
1416         final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding;
1417         if (hasUTF8Flag) {
1418             ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
1419         }
1420         ze.setGeneralPurposeBit(gpFlag);
1421         ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
1422 
1423         off += ZipConstants.SHORT;
1424 
1425         // noinspection MagicConstant
1426         ze.setMethod(ZipShort.getValue(cfhBuf, off));
1427         off += ZipConstants.SHORT;
1428 
1429         final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
1430         ze.setTime(time);
1431         off += ZipConstants.WORD;
1432 
1433         ze.setCrc(ZipLong.getValue(cfhBuf, off));
1434         off += ZipConstants.WORD;
1435 
1436         long size = ZipLong.getValue(cfhBuf, off);
1437         if (size < 0) {
1438             throw new IOException("broken archive, entry with negative compressed size");
1439         }
1440         ze.setCompressedSize(size);
1441         off += ZipConstants.WORD;
1442 
1443         size = ZipLong.getValue(cfhBuf, off);
1444         if (size < 0) {
1445             throw new IOException("broken archive, entry with negative size");
1446         }
1447         ze.setSize(size);
1448         off += ZipConstants.WORD;
1449 
1450         final int fileNameLen = ZipShort.getValue(cfhBuf, off);
1451         off += ZipConstants.SHORT;
1452         if (fileNameLen < 0) {
1453             throw new IOException("broken archive, entry with negative fileNameLen");
1454         }
1455 
1456         final int extraLen = ZipShort.getValue(cfhBuf, off);
1457         off += ZipConstants.SHORT;
1458         if (extraLen < 0) {
1459             throw new IOException("broken archive, entry with negative extraLen");
1460         }
1461 
1462         final int commentLen = ZipShort.getValue(cfhBuf, off);
1463         off += ZipConstants.SHORT;
1464         if (commentLen < 0) {
1465             throw new IOException("broken archive, entry with negative commentLen");
1466         }
1467 
1468         ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
1469         off += ZipConstants.SHORT;
1470 
1471         ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
1472         off += ZipConstants.SHORT;
1473 
1474         ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
1475         off += ZipConstants.WORD;
1476 
1477         final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
1478         if (fileName.length < fileNameLen) {
1479             throw new EOFException();
1480         }
1481         ze.setName(entryEncoding.decode(fileName), fileName);
1482 
1483         // LFH offset,
1484         ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset);
1485         // data offset will be filled later
1486         entries.add(ze);
1487 
1488         final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
1489         if (cdExtraData.length < extraLen) {
1490             throw new EOFException();
1491         }
1492         try {
1493             ze.setCentralDirectoryExtra(cdExtraData);
1494         } catch (final RuntimeException e) {
1495             final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1496             z.initCause(e);
1497             throw z;
1498         }
1499 
1500         setSizesAndOffsetFromZip64Extra(ze);
1501         sanityCheckLFHOffset(ze);
1502 
1503         final byte[] comment = IOUtils.readRange(archive, commentLen);
1504         if (comment.length < commentLen) {
1505             throw new EOFException();
1506         }
1507         ze.setComment(entryEncoding.decode(comment));
1508 
1509         if (!hasUTF8Flag && useUnicodeExtraFields) {
1510             noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
1511         }
1512 
1513         ze.setStreamContiguous(true);
1514     }
1515 
1516     /**
1517      * Walks through all recorded entries and adds the data available from the local file header.
1518      * <p>
1519      * Also records the offsets for the data to read from the entries.
1520      * </p>
1521      */
1522     private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException {
1523         for (final ZipArchiveEntry zipArchiveEntry : entries) {
1524             // entries are filled in populateFromCentralDirectory and never modified
1525             final Entry ze = (Entry) zipArchiveEntry;
1526             final int[] lens = setDataOffset(ze);
1527             final int fileNameLen = lens[0];
1528             final int extraFieldLen = lens[1];
1529             skipBytes(fileNameLen);
1530             final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1531             if (localExtraData.length < extraFieldLen) {
1532                 throw new EOFException();
1533             }
1534             try {
1535                 ze.setExtra(localExtraData);
1536             } catch (final RuntimeException e) {
1537                 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1538                 z.initCause(e);
1539                 throw z;
1540             }
1541 
1542             if (entriesWithoutUTF8Flag.containsKey(ze)) {
1543                 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1544                 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment);
1545             }
1546         }
1547     }
1548 
1549     private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException {
1550         if (entry.getDiskNumberStart() < 0) {
1551             throw new IOException("broken archive, entry with negative disk number");
1552         }
1553         if (entry.getLocalHeaderOffset() < 0) {
1554             throw new IOException("broken archive, entry with negative local file header offset");
1555         }
1556         if (isSplitZipArchive) {
1557             if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
1558                 throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory");
1559             }
1560             if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
1561                 throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1562             }
1563         } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) {
1564             throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1565         }
1566     }
1567 
1568     private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException {
1569         long offset = entry.getLocalHeaderOffset();
1570         if (isSplitZipArchive) {
1571             ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1572             // the offset should be updated to the global offset
1573             offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1574         } else {
1575             archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1576         }
1577         wordBbuf.rewind();
1578         IOUtils.readFully(archive, wordBbuf);
1579         wordBbuf.flip();
1580         wordBbuf.get(shortBuf);
1581         final int fileNameLen = ZipShort.getValue(shortBuf);
1582         wordBbuf.get(shortBuf);
1583         final int extraFieldLen = ZipShort.getValue(shortBuf);
1584         entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen);
1585         if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) {
1586             throw new IOException("data for " + entry.getName() + " overlaps with central directory.");
1587         }
1588         return new int[] { fileNameLen, extraFieldLen };
1589     }
1590 
1591     /**
1592      * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the
1593      * offset of the local file header.
1594      * <p>
1595      * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field
1596      * to create local header data even if they are never used - and here a field with only one size would be invalid.
1597      * </p>
1598      */
1599     private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException {
1600         final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
1601         if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
1602             throw new ZipException("archive contains unparseable zip64 extra field");
1603         }
1604         final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra;
1605         if (z64 != null) {
1606             final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC;
1607             final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC;
1608             final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC;
1609             final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT;
1610             z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart);
1611 
1612             if (hasUncompressedSize) {
1613                 final long size = z64.getSize().getLongValue();
1614                 if (size < 0) {
1615                     throw new IOException("broken archive, entry with negative size");
1616                 }
1617                 entry.setSize(size);
1618             } else if (hasCompressedSize) {
1619                 z64.setSize(new ZipEightByteInteger(entry.getSize()));
1620             }
1621 
1622             if (hasCompressedSize) {
1623                 final long size = z64.getCompressedSize().getLongValue();
1624                 if (size < 0) {
1625                     throw new IOException("broken archive, entry with negative compressed size");
1626                 }
1627                 entry.setCompressedSize(size);
1628             } else if (hasUncompressedSize) {
1629                 z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize()));
1630             }
1631 
1632             if (hasRelativeHeaderOffset) {
1633                 entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1634             }
1635 
1636             if (hasDiskStart) {
1637                 entry.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1638             }
1639         }
1640     }
1641 
1642     /**
1643      * Skips the given number of bytes or throws an EOFException if skipping failed.
1644      */
1645     private void skipBytes(final int count) throws IOException {
1646         final long currentPosition = archive.position();
1647         final long newPosition = currentPosition + count;
1648         if (newPosition > archive.size()) {
1649             throw new EOFException();
1650         }
1651         archive.position(newPosition);
1652     }
1653 
1654     /**
1655      * Sorts entries in place by offset.
1656      *
1657      * @param allEntries entries to sort
1658      * @return the given entries, sorted.
1659      */
1660     private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) {
1661         Arrays.sort(allEntries, offsetComparator);
1662         return allEntries;
1663     }
1664 
1665     /**
1666      * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive.
1667      */
1668     private boolean startsWithLocalFileHeader() throws IOException {
1669         archive.position(firstLocalFileHeaderOffset);
1670         wordBbuf.rewind();
1671         IOUtils.readFully(archive, wordBbuf);
1672         return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1673     }
1674 }