Source code

001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.archivers.zip;
018
019import java.io.BufferedInputStream;
020import java.io.ByteArrayInputStream;
021import java.io.Closeable;
022import java.io.EOFException;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.SequenceInputStream;
027import java.nio.ByteBuffer;
028import java.nio.ByteOrder;
029import java.nio.channels.FileChannel;
030import java.nio.channels.SeekableByteChannel;
031import java.nio.charset.Charset;
032import java.nio.charset.StandardCharsets;
033import java.nio.file.Files;
034import java.nio.file.OpenOption;
035import java.nio.file.Path;
036import java.nio.file.StandardOpenOption;
037import java.util.ArrayList;
038import java.util.Arrays;
039import java.util.Collections;
040import java.util.Comparator;
041import java.util.EnumSet;
042import java.util.Enumeration;
043import java.util.HashMap;
044import java.util.LinkedList;
045import java.util.List;
046import java.util.Map;
047import java.util.Objects;
048import java.util.stream.Collectors;
049import java.util.stream.IntStream;
050import java.util.zip.Inflater;
051import java.util.zip.ZipException;
052
053import org.apache.commons.compress.archivers.EntryStreamOffsets;
054import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
055import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
056import org.apache.commons.compress.utils.BoundedArchiveInputStream;
057import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
058import org.apache.commons.compress.utils.IOUtils;
059import org.apache.commons.compress.utils.InputStreamStatistics;
060import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
061import org.apache.commons.io.Charsets;
062import org.apache.commons.io.FilenameUtils;
063import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin;
064import org.apache.commons.io.build.AbstractStreamBuilder;
065import org.apache.commons.io.input.BoundedInputStream;
066
067/**
068 * Replacement for {@link java.util.zip.ZipFile}.
069 * <p>
070 * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a
071 * preamble like the one found in self extracting archives. Furthermore it returns instances of
072 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@link java.util.zip.ZipEntry}.
073 * </p>
074 * <p>
075 * It doesn't extend {@link java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@link java.util.zip.ZipFile}, it uses
076 * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64
077 * extensions and thus individual entries and archives larger than 4 GB or with more than 65,536 entries.
078 * </p>
079 * <p>
080 * The method signatures mimic the ones of {@link java.util.zip.ZipFile}, with a couple of exceptions:
081 * </p>
082 * <ul>
083 * <li>There is no getName method.</li>
084 * <li>entries has been renamed to getEntries.</li>
085 * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li>
086 * <li>close is allowed to throw IOException.</li>
087 * </ul>
088 */
089public class ZipFile implements Closeable {
090
091    /**
092     * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs
093     * significantly faster in concurrent environment.
094     */
095    private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
096        private final FileChannel archive;
097
098        BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) {
099            super(start, remaining);
100            this.archive = archive;
101        }
102
103        @Override
104        protected int read(final long pos, final ByteBuffer buf) throws IOException {
105            final int read = archive.read(buf, pos);
106            buf.flip();
107            return read;
108        }
109    }
110
111    /**
112     * Builds new {@link ZipFile} instances.
113     * <p>
114     * The channel will be opened for reading, assuming the specified encoding for file names.
115     * </p>
116     * <p>
117     * See {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} to read from an in-memory archive.
118     * </p>
119     * <p>
120     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
121     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
122     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
123     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
124     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
125     * </p>
126     *
127     * @since 1.26.0
128     */
129    public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> {
130
131        static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
132
133        private SeekableByteChannel seekableByteChannel;
134        private boolean useUnicodeExtraFields = true;
135        private boolean ignoreLocalFileHeader;
136        private long maxNumberOfDisks = 1;
137
138        public Builder() {
139            setCharset(DEFAULT_CHARSET);
140            setCharsetDefault(DEFAULT_CHARSET);
141        }
142
143        @Override
144        public ZipFile get() throws IOException {
145            final SeekableByteChannel actualChannel;
146            final String actualDescription;
147            if (seekableByteChannel != null) {
148                actualChannel = seekableByteChannel;
149                actualDescription = actualChannel.getClass().getSimpleName();
150            } else if (checkOrigin() instanceof ByteArrayOrigin) {
151                actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray());
152                actualDescription = actualChannel.getClass().getSimpleName();
153            } else {
154                OpenOption[] openOptions = getOpenOptions();
155                if (openOptions.length == 0) {
156                    openOptions = new OpenOption[] { StandardOpenOption.READ };
157                }
158                final Path path = getPath();
159                actualChannel = openZipChannel(path, maxNumberOfDisks, openOptions);
160                actualDescription = path.toString();
161            }
162            final boolean closeOnError = seekableByteChannel != null;
163            return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
164        }
165
166        /**
167         * Sets whether to ignore information stored inside the local file header.
168         *
169         * @param ignoreLocalFileHeader whether to ignore information stored inside.
170         * @return {@code this} instance.
171         */
172        public Builder setIgnoreLocalFileHeader(final boolean ignoreLocalFileHeader) {
173            this.ignoreLocalFileHeader = ignoreLocalFileHeader;
174            return this;
175        }
176
177        /**
178         * Sets max number of multi archive disks, default is 1 (no multi archive).
179         *
180         * @param maxNumberOfDisks max number of multi archive disks.
181         *
182         * @return {@code this} instance.
183         */
184        public Builder setMaxNumberOfDisks(final long maxNumberOfDisks) {
185            this.maxNumberOfDisks = maxNumberOfDisks;
186            return this;
187        }
188
189        /**
190         * The actual channel, overrides any other input aspects like a File, Path, and so on.
191         *
192         * @param seekableByteChannel The actual channel.
193         * @return {@code this} instance.
194         */
195        public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) {
196            this.seekableByteChannel = seekableByteChannel;
197            return this;
198        }
199
200        /**
201         * Sets whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
202         *
203         * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
204         * @return {@code this} instance.
205         */
206        public Builder setUseUnicodeExtraFields(final boolean useUnicodeExtraFields) {
207            this.useUnicodeExtraFields = useUnicodeExtraFields;
208            return this;
209        }
210
211    }
212
213    /**
214     * Extends ZipArchiveEntry to store the offset within the archive.
215     */
216    private static final class Entry extends ZipArchiveEntry {
217
218        @Override
219        public boolean equals(final Object other) {
220            if (super.equals(other)) {
221                // super.equals would return false if other were not an Entry
222                final Entry otherEntry = (Entry) other;
223                return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset()
224                        && super.getDiskNumberStart() == otherEntry.getDiskNumberStart();
225            }
226            return false;
227        }
228
229        @Override
230        public int hashCode() {
231            return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32);
232        }
233    }
234
235    private static final class NameAndComment {
236        private final byte[] name;
237        private final byte[] comment;
238
239        private NameAndComment(final byte[] name, final byte[] comment) {
240            this.name = name;
241            this.comment = comment;
242        }
243    }
244
245    private static final class StoredStatisticsStream extends BoundedInputStream implements InputStreamStatistics {
246        StoredStatisticsStream(final InputStream in) {
247            super(in);
248        }
249
250        @Override
251        public long getCompressedCount() {
252            return super.getCount();
253        }
254
255        @Override
256        public long getUncompressedCount() {
257            return getCompressedCount();
258        }
259    }
260
261    private static final String DEFAULT_CHARSET_NAME = StandardCharsets.UTF_8.name();
262
263    private static final EnumSet<StandardOpenOption> READ = EnumSet.of(StandardOpenOption.READ);
264
265    private static final int HASH_SIZE = 509;
266    static final int NIBLET_MASK = 0x0f;
267    static final int BYTE_SHIFT = 8;
268    private static final int POS_0 = 0;
269    private static final int POS_1 = 1;
270    private static final int POS_2 = 2;
271    private static final int POS_3 = 3;
272    private static final byte[] ONE_ZERO_BYTE = new byte[1];
273
274    /**
275     * Length of a "central directory" entry structure without file name, extra fields or comment.
276     */
277    private static final int CFH_LEN =
278    // @formatter:off
279        /* version made by                 */ ZipConstants.SHORT
280        /* version needed to extract       */ + ZipConstants.SHORT
281        /* general purpose bit flag        */ + ZipConstants.SHORT
282        /* compression method              */ + ZipConstants.SHORT
283        /* last mod file time              */ + ZipConstants.SHORT
284        /* last mod file date              */ + ZipConstants.SHORT
285        /* crc-32                          */ + ZipConstants.WORD
286        /* compressed size                 */ + ZipConstants.WORD
287        /* uncompressed size               */ + ZipConstants.WORD
288        /* file name length                */ + ZipConstants. SHORT
289        /* extra field length              */ + ZipConstants.SHORT
290        /* file comment length             */ + ZipConstants.SHORT
291        /* disk number start               */ + ZipConstants.SHORT
292        /* internal file attributes        */ + ZipConstants.SHORT
293        /* external file attributes        */ + ZipConstants.WORD
294        /* relative offset of local header */ + ZipConstants.WORD;
295    // @formatter:on
296
297    private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
298
299    /**
300     * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment.
301     */
302    static final int MIN_EOCD_SIZE =
303    // @formatter:off
304        /* end of central dir signature    */ ZipConstants.WORD
305        /* number of this disk             */ + ZipConstants.SHORT
306        /* number of the disk with the     */
307        /* start of the central directory  */ + ZipConstants.SHORT
308        /* total number of entries in      */
309        /* the central dir on this disk    */ + ZipConstants.SHORT
310        /* total number of entries in      */
311        /* the central dir                 */ + ZipConstants.SHORT
312        /* size of the central directory   */ + ZipConstants.WORD
313        /* offset of start of central      */
314        /* directory with respect to       */
315        /* the starting disk number        */ + ZipConstants.WORD
316        /* ZIP file comment length         */ + ZipConstants.SHORT;
317    // @formatter:on
318
319    /**
320     * Maximum length of the "End of central directory record" with a file comment.
321     */
322    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
323    // @formatter:off
324        /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT;
325    // @formatter:on
326
327    /**
328     * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of
329     * the "End of central directory record".
330     */
331    private static final int CFD_LENGTH_OFFSET =
332    // @formatter:off
333        /* end of central dir signature    */ ZipConstants.WORD
334        /* number of this disk             */ + ZipConstants.SHORT
335        /* number of the disk with the     */
336        /* start of the central directory  */ + ZipConstants.SHORT
337        /* total number of entries in      */
338        /* the central dir on this disk    */ + ZipConstants.SHORT
339        /* total number of entries in      */
340        /* the central dir                 */ + ZipConstants.SHORT;
341    // @formatter:on
342
343    /**
344     * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of
345     * the "End of central directory record".
346     */
347    private static final int CFD_DISK_OFFSET =
348    // @formatter:off
349            /* end of central dir signature    */ ZipConstants.WORD
350            /* number of this disk             */ + ZipConstants.SHORT;
351    // @formatter:on
352
353    /**
354     * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of
355     * the disk with the start of the central directory".
356     */
357    private static final int CFD_LOCATOR_RELATIVE_OFFSET =
358    // @formatter:off
359            /* total number of entries in      */
360            /* the central dir on this disk    */ + ZipConstants.SHORT
361            /* total number of entries in      */
362            /* the central dir                 */ + ZipConstants.SHORT
363            /* size of the central directory   */ + ZipConstants.WORD;
364    // @formatter:on
365
366    /**
367     * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at
368     * all.
369     */
370    private static final int ZIP64_EOCDL_LENGTH =
371    // @formatter:off
372        /* zip64 end of central dir locator sig */ ZipConstants.WORD
373        /* number of the disk with the start    */
374        /* start of the zip64 end of            */
375        /* central directory                    */ + ZipConstants.WORD
376        /* relative offset of the zip64         */
377        /* end of central directory record      */ + ZipConstants.DWORD
378        /* total number of disks                */ + ZipConstants.WORD;
379    // @formatter:on
380
381    /**
382     * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative
383     * to the start of the "Zip64 end of central directory locator".
384     */
385    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
386    // @formatter:off
387        /* zip64 end of central dir locator sig */ ZipConstants.WORD
388        /* number of the disk with the start    */
389        /* start of the zip64 end of            */
390        /* central directory                    */ + ZipConstants.WORD;
391    // @formatter:on
392
393    /**
394     * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start
395     * of the "Zip64 end of central directory record".
396     */
397    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
398    // @formatter:off
399        /* zip64 end of central dir        */
400        /* signature                       */ ZipConstants.WORD
401        /* size of zip64 end of central    */
402        /* directory record                */ + ZipConstants.DWORD
403        /* version made by                 */ + ZipConstants.SHORT
404        /* version needed to extract       */ + ZipConstants.SHORT
405        /* number of this disk             */ + ZipConstants.WORD
406        /* number of the disk with the     */
407        /* start of the central directory  */ + ZipConstants.WORD
408        /* total number of entries in the  */
409        /* central directory on this disk  */ + ZipConstants.DWORD
410        /* total number of entries in the  */
411        /* central directory               */ + ZipConstants.DWORD
412        /* size of the central directory   */ + ZipConstants.DWORD;
413    // @formatter:on
414
415    /**
416     * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the
417     * start of the "Zip64 end of central directory record".
418     */
419    private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
420    // @formatter:off
421            /* zip64 end of central dir        */
422            /* signature                       */ ZipConstants.WORD
423            /* size of zip64 end of central    */
424            /* directory record                */ + ZipConstants.DWORD
425            /* version made by                 */ + ZipConstants.SHORT
426            /* version needed to extract       */ + ZipConstants.SHORT
427            /* number of this disk             */ + ZipConstants.WORD;
428    // @formatter:on
429
430    /**
431     * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the
432     * "number of the disk with the start of the central directory".
433     */
434    private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
435    // @formatter:off
436            /* total number of entries in the  */
437            /* central directory on this disk  */ ZipConstants.DWORD
438            /* total number of entries in the  */
439            /* central directory               */ + ZipConstants.DWORD
440            /* size of the central directory   */ + ZipConstants.DWORD;
441    // @formatter:on
442
443    /**
444     * Number of bytes in local file header up to the &quot;length of file name&quot; entry.
445     */
446    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
447    // @formatter:off
448        /* local file header signature     */ ZipConstants.WORD
449        /* version needed to extract       */ + ZipConstants.SHORT
450        /* general purpose bit flag        */ + ZipConstants.SHORT
451        /* compression method              */ + ZipConstants.SHORT
452        /* last mod file time              */ + ZipConstants.SHORT
453        /* last mod file date              */ + ZipConstants.SHORT
454        /* crc-32                          */ + ZipConstants.WORD
455        /* compressed size                 */ + ZipConstants.WORD
456        /* uncompressed size               */ + (long) ZipConstants.WORD;
457    // @formatter:on
458
459    /**
460     * Compares two ZipArchiveEntries based on their offset within the archive.
461     * <p>
462     * Won't return any meaningful results if one of the entries isn't part of the archive at all.
463     * </p>
464     *
465     * @since 1.1
466     */
467    private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
468            .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
469
470    /**
471     * Creates a new Builder.
472     *
473     * @return a new Builder.
474     * @since 1.26.0
475     */
476    public static Builder builder() {
477        return new Builder();
478    }
479
480    /**
481     * Closes a ZIP file quietly; throwing no IOException, does nothing on null input.
482     *
483     * @param zipFile file to close, can be null
484     */
485    public static void closeQuietly(final ZipFile zipFile) {
486        org.apache.commons.io.IOUtils.closeQuietly(zipFile);
487    }
488
489    /**
490     * Creates a new SeekableByteChannel for reading.
491     *
492     * @param path the path to the file to open or create
493     * @return a new seekable byte channel
494     * @throws IOException if an I/O error occurs
495     */
496    private static SeekableByteChannel newReadByteChannel(final Path path) throws IOException {
497        return Files.newByteChannel(path, READ);
498    }
499
500    private static SeekableByteChannel openZipChannel(final Path path, final long maxNumberOfDisks, final OpenOption[] openOptions) throws IOException {
501        final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ);
502        final List<FileChannel> channels = new ArrayList<>();
503        try {
504            final boolean is64 = positionAtEndOfCentralDirectoryRecord(channel);
505            long numberOfDisks;
506            if (is64) {
507                channel.position(channel.position() + ZipConstants.WORD + ZipConstants.WORD + ZipConstants.DWORD);
508                final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.WORD);
509                buf.order(ByteOrder.LITTLE_ENDIAN);
510                IOUtils.readFully(channel, buf);
511                buf.flip();
512                numberOfDisks = buf.getInt() & 0xffffffffL;
513            } else {
514                channel.position(channel.position() + ZipConstants.WORD);
515                final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.SHORT);
516                buf.order(ByteOrder.LITTLE_ENDIAN);
517                IOUtils.readFully(channel, buf);
518                buf.flip();
519                numberOfDisks = (buf.getShort() & 0xffff) + 1;
520            }
521            if (numberOfDisks > Math.min(maxNumberOfDisks, Integer.MAX_VALUE)) {
522                throw new IOException("Too many disks for zip archive, max=" + Math.min(maxNumberOfDisks, Integer.MAX_VALUE) + " actual=" + numberOfDisks);
523            }
524
525            if (numberOfDisks <= 1) {
526                return channel;
527            }
528            channel.close();
529
530            final Path parent = path.getParent();
531            final String basename = FilenameUtils.removeExtension(Objects.toString(path.getFileName(), null));
532
533            return ZipSplitReadOnlySeekableByteChannel.forPaths(IntStream.range(0, (int) numberOfDisks).mapToObj(i -> {
534                if (i == numberOfDisks - 1) {
535                    return path;
536                }
537                final Path lowercase = parent.resolve(String.format("%s.z%02d", basename, i + 1));
538                if (Files.exists(lowercase)) {
539                    return lowercase;
540                }
541                final Path uppercase = parent.resolve(String.format("%s.Z%02d", basename, i + 1));
542                if (Files.exists(uppercase)) {
543                    return uppercase;
544                }
545                return lowercase;
546            }).collect(Collectors.toList()), openOptions);
547        } catch (final Throwable ex) {
548            org.apache.commons.io.IOUtils.closeQuietly(channel);
549            channels.forEach(org.apache.commons.io.IOUtils::closeQuietly);
550            throw ex;
551        }
552    }
553
554    /**
555     * Searches for the and positions the stream at the start of the &quot;End of central dir record&quot;.
556     *
557     * @return true if it's Zip64 end of central directory or false if it's Zip32
558     */
559    private static boolean positionAtEndOfCentralDirectoryRecord(final SeekableByteChannel channel) throws IOException {
560        final boolean found = tryToLocateSignature(channel, MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG);
561        if (!found) {
562            throw new ZipException("Archive is not a ZIP archive");
563        }
564        boolean found64 = false;
565        final long position = channel.position();
566        if (position > ZIP64_EOCDL_LENGTH) {
567            final ByteBuffer wordBuf = ByteBuffer.allocate(4);
568            channel.position(channel.position() - ZIP64_EOCDL_LENGTH);
569            wordBuf.rewind();
570            IOUtils.readFully(channel, wordBuf);
571            wordBuf.flip();
572            found64 = wordBuf.equals(ByteBuffer.wrap(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG));
573            if (!found64) {
574                channel.position(position);
575            } else {
576                channel.position(channel.position() - ZipConstants.WORD);
577            }
578        }
579
580        return found64;
581    }
582
583    /**
584     * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has
585     * been found.
586     */
587    private static boolean tryToLocateSignature(final SeekableByteChannel channel, final long minDistanceFromEnd, final long maxDistanceFromEnd,
588            final byte[] sig) throws IOException {
589        final ByteBuffer wordBuf = ByteBuffer.allocate(ZipConstants.WORD);
590        boolean found = false;
591        long off = channel.size() - minDistanceFromEnd;
592        final long stopSearching = Math.max(0L, channel.size() - maxDistanceFromEnd);
593        if (off >= 0) {
594            for (; off >= stopSearching; off--) {
595                channel.position(off);
596                try {
597                    wordBuf.rewind();
598                    IOUtils.readFully(channel, wordBuf);
599                    wordBuf.flip();
600                } catch (final EOFException ex) { // NOSONAR
601                    break;
602                }
603                int curr = wordBuf.get();
604                if (curr == sig[POS_0]) {
605                    curr = wordBuf.get();
606                    if (curr == sig[POS_1]) {
607                        curr = wordBuf.get();
608                        if (curr == sig[POS_2]) {
609                            curr = wordBuf.get();
610                            if (curr == sig[POS_3]) {
611                                found = true;
612                                break;
613                            }
614                        }
615                    }
616                }
617            }
618        }
619        if (found) {
620            channel.position(off);
621        }
622        return found;
623    }
624
625    /**
626     * List of entries in the order they appear inside the central directory.
627     */
628    private final List<ZipArchiveEntry> entries = new LinkedList<>();
629
630    /**
631     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
632     */
633    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE);
634
635    /**
636     * The encoding to use for file names and the file comment.
637     * <p>
638     * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>.
639     * Defaults to UTF-8.
640     * </p>
641     */
642    private final Charset encoding;
643
644    /**
645     * The ZIP encoding to use for file names and the file comment.
646     */
647    private final ZipEncoding zipEncoding;
648
649    /**
650     * The actual data source.
651     */
652    private final SeekableByteChannel archive;
653
654    /**
655     * Whether to look for and use Unicode extra fields.
656     */
657    private final boolean useUnicodeExtraFields;
658
659    /**
660     * Whether the file is closed.
661     */
662    private volatile boolean closed = true;
663
664    /**
665     * Whether the ZIP archive is a split ZIP archive
666     */
667    private final boolean isSplitZipArchive;
668
669    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
670    private final byte[] dwordBuf = new byte[ZipConstants.DWORD];
671
672    private final byte[] wordBuf = new byte[ZipConstants.WORD];
673
674    private final byte[] cfhBuf = new byte[CFH_LEN];
675
676    private final byte[] shortBuf = new byte[ZipConstants.SHORT];
677
678    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
679
680    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
681
682    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
683
684    private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
685
686    private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
687
688    private long centralDirectoryStartOffset;
689
690    private long firstLocalFileHeaderOffset;
691
692    /**
693     * Opens the given file for reading, assuming "UTF8" for file names.
694     *
695     * @param file the archive.
696     *
697     * @throws IOException if an error occurs while reading the file.
698     * @deprecated Use {@link Builder#get()}.
699     */
700    @Deprecated
701    public ZipFile(final File file) throws IOException {
702        this(file, DEFAULT_CHARSET_NAME);
703    }
704
705    /**
706     * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
707     *
708     * @param file     the archive.
709     * @param encoding the encoding to use for file names, use null for the platform's default encoding
710     * @throws IOException if an error occurs while reading the file.
711     * @deprecated Use {@link Builder#get()}.
712     */
713    @Deprecated
714    public ZipFile(final File file, final String encoding) throws IOException {
715        this(file.toPath(), encoding, true);
716    }
717
718    /**
719     * Opens the given file for reading, assuming the specified encoding for file names.
720     *
721     * @param file                  the archive.
722     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
723     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
724     * @throws IOException if an error occurs while reading the file.
725     * @deprecated Use {@link Builder#get()}.
726     */
727    @Deprecated
728    public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
729        this(file.toPath(), encoding, useUnicodeExtraFields, false);
730    }
731
732    /**
733     * Opens the given file for reading, assuming the specified encoding for file names.
734     * <p>
735     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
736     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
737     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
738     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
739     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
740     * </p>
741     *
742     * @param file                  the archive.
743     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
744     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
745     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
746     * @throws IOException if an error occurs while reading the file.
747     * @since 1.19
748     * @deprecated Use {@link Builder#get()}.
749     */
750    @Deprecated
751    @SuppressWarnings("resource") // Caller closes
752    public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
753        this(newReadByteChannel(file.toPath()), file.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
754    }
755
756    /**
757     * Opens the given path for reading, assuming "UTF-8" for file names.
758     *
759     * @param path path to the archive.
760     * @throws IOException if an error occurs while reading the file.
761     * @since 1.22
762     * @deprecated Use {@link Builder#get()}.
763     */
764    @Deprecated
765    public ZipFile(final Path path) throws IOException {
766        this(path, DEFAULT_CHARSET_NAME);
767    }
768
769    /**
770     * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
771     *
772     * @param path     path to the archive.
773     * @param encoding the encoding to use for file names, use null for the platform's default encoding
774     * @throws IOException if an error occurs while reading the file.
775     * @since 1.22
776     * @deprecated Use {@link Builder#get()}.
777     */
778    @Deprecated
779    public ZipFile(final Path path, final String encoding) throws IOException {
780        this(path, encoding, true);
781    }
782
783    /**
784     * Opens the given path for reading, assuming the specified encoding for file names.
785     *
786     * @param path                  path to the archive.
787     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
788     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
789     * @throws IOException if an error occurs while reading the file.
790     * @since 1.22
791     * @deprecated Use {@link Builder#get()}.
792     */
793    @Deprecated
794    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
795        this(path, encoding, useUnicodeExtraFields, false);
796    }
797
798    /**
799     * Opens the given path for reading, assuming the specified encoding for file names.
800     * <p>
801     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
802     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
803     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
804     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
805     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
806     * </p>
807     *
808     * @param path                  path to the archive.
809     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
810     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
811     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
812     * @throws IOException if an error occurs while reading the file.
813     * @since 1.22
814     * @deprecated Use {@link Builder#get()}.
815     */
816    @SuppressWarnings("resource") // Caller closes
817    @Deprecated
818    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
819        this(newReadByteChannel(path), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
820    }
821
822    /**
823     * Opens the given channel for reading, assuming "UTF-8" for file names.
824     * <p>
825     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
826     * </p>
827     *
828     * @param channel the archive.
829     *
830     * @throws IOException if an error occurs while reading the file.
831     * @since 1.13
832     * @deprecated Use {@link Builder#get()}.
833     */
834    @Deprecated
835    public ZipFile(final SeekableByteChannel channel) throws IOException {
836        this(channel, "a SeekableByteChannel", DEFAULT_CHARSET_NAME, true);
837    }
838
839    /**
840     * Opens the given channel for reading, assuming the specified encoding for file names.
841     * <p>
842     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
843     * </p>
844     *
845     * @param channel  the archive.
846     * @param encoding the encoding to use for file names, use null for the platform's default encoding
847     * @throws IOException if an error occurs while reading the file.
848     * @since 1.13
849     * @deprecated Use {@link Builder#get()}.
850     */
851    @Deprecated
852    public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException {
853        this(channel, "a SeekableByteChannel", encoding, true);
854    }
855
856    private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields,
857            final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
858        this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
859        this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET);
860        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
861        this.useUnicodeExtraFields = useUnicodeExtraFields;
862        this.archive = channel;
863        boolean success = false;
864        try {
865            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
866            if (!ignoreLocalFileHeader) {
867                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
868            }
869            fillNameMap();
870            success = true;
871        } catch (final IOException e) {
872            throw new IOException("Error reading Zip content from " + channelDescription, e);
873        } finally {
874            this.closed = !success;
875            if (!success && closeOnError) {
876                org.apache.commons.io.IOUtils.closeQuietly(archive);
877            }
878        }
879    }
880
881    /**
882     * Opens the given channel for reading, assuming the specified encoding for file names.
883     * <p>
884     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
885     * </p>
886     *
887     * @param channel               the archive.
888     * @param channelDescription    description of the archive, used for error messages only.
889     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
890     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
891     * @throws IOException if an error occurs while reading the file.
892     * @since 1.13
893     * @deprecated Use {@link Builder#get()}.
894     */
895    @Deprecated
896    public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields)
897            throws IOException {
898        this(channel, channelDescription, encoding, useUnicodeExtraFields, false, false);
899    }
900
901    /**
902     * Opens the given channel for reading, assuming the specified encoding for file names.
903     * <p>
904     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
905     * </p>
906     * <p>
907     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
908     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
909     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
910     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
911     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
912     * </p>
913     *
914     * @param channel               the archive.
915     * @param channelDescription    description of the archive, used for error messages only.
916     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
917     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
918     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
919     * @throws IOException if an error occurs while reading the file.
920     * @since 1.19
921     * @deprecated Use {@link Builder#get()}.
922     */
923    @Deprecated
924    public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
925            final boolean ignoreLocalFileHeader) throws IOException {
926        this(channel, channelDescription, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
927    }
928
929    private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
930            final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
931        this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
932    }
933
934    /**
935     * Opens the given file for reading, assuming "UTF-8".
936     *
937     * @param name name of the archive.
938     * @throws IOException if an error occurs while reading the file.
939     * @deprecated Use {@link Builder#get()}.
940     */
941    @Deprecated
942    public ZipFile(final String name) throws IOException {
943        this(new File(name).toPath(), DEFAULT_CHARSET_NAME);
944    }
945
946    /**
947     * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields.
948     *
949     * @param name     name of the archive.
950     * @param encoding the encoding to use for file names, use null for the platform's default encoding
951     * @throws IOException if an error occurs while reading the file.
952     * @deprecated Use {@link Builder#get()}.
953     */
954    @Deprecated
955    public ZipFile(final String name, final String encoding) throws IOException {
956        this(new File(name).toPath(), encoding, true);
957    }
958
959    /**
960     * Whether this class is able to read the given entry.
961     * <p>
962     * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet.
963     * </p>
964     *
965     * @since 1.1
966     * @param entry the entry
967     * @return whether this class is able to read the given entry.
968     */
969    public boolean canReadEntryData(final ZipArchiveEntry entry) {
970        return ZipUtil.canHandleEntryData(entry);
971    }
972
973    /**
974     * Closes the archive.
975     *
976     * @throws IOException if an error occurs closing the archive.
977     */
978    @Override
979    public void close() throws IOException {
980        // this flag is only written here and read in finalize() which
981        // can never be run in parallel.
982        // no synchronization needed.
983        closed = true;
984        archive.close();
985    }
986
987    /**
988     * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file.
989     * <p>
990     * This method transfers entries based on the central directory of the ZIP file.
991     * </p>
992     *
993     * @param target    The zipArchiveOutputStream to write the entries to
994     * @param predicate A predicate that selects which entries to write
995     * @throws IOException on error
996     */
997    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException {
998        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
999        while (src.hasMoreElements()) {
1000            final ZipArchiveEntry entry = src.nextElement();
1001            if (predicate.test(entry)) {
1002                target.addRawArchiveEntry(entry, getRawInputStream(entry));
1003            }
1004        }
1005    }
1006
1007    /**
1008     * Creates new BoundedInputStream, according to implementation of underlying archive channel.
1009     */
1010    private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
1011        if (start < 0 || remaining < 0 || start + remaining < start) {
1012            throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range");
1013        }
1014        return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive)
1015                : new BoundedSeekableByteChannelInputStream(start, remaining, archive);
1016    }
1017
1018    private void fillNameMap() {
1019        entries.forEach(ze -> {
1020            // entries are filled in populateFromCentralDirectory and
1021            // never modified
1022            final String name = ze.getName();
1023            final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
1024            entriesOfThatName.addLast(ze);
1025        });
1026    }
1027
1028    /**
1029     * Ensures that the close method of this ZIP file is called when there are no more references to it.
1030     *
1031     * @see #close()
1032     */
1033    @Override
1034    protected void finalize() throws Throwable {
1035        try {
1036            if (!closed) {
1037                close();
1038            }
1039        } finally {
1040            super.finalize();
1041        }
1042    }
1043
1044    /**
1045     * Gets an InputStream for reading the content before the first local file header.
1046     *
1047     * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file
1048     *         header.
1049     * @since 1.23
1050     */
1051    public InputStream getContentBeforeFirstLocalFileHeader() {
1052        return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset);
1053    }
1054
1055    private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
1056        final long s = ze.getDataOffset();
1057        if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
1058            setDataOffset(ze);
1059            return ze.getDataOffset();
1060        }
1061        return s;
1062    }
1063
1064    /**
1065     * Gets the encoding to use for file names and the file comment.
1066     *
1067     * @return null if using the platform's default character encoding.
1068     */
1069    public String getEncoding() {
1070        return encoding.name();
1071    }
1072
1073    /**
1074     * Gets all entries.
1075     * <p>
1076     * Entries will be returned in the same order they appear within the archive's central directory.
1077     * </p>
1078     *
1079     * @return all entries as {@link ZipArchiveEntry} instances
1080     */
1081    public Enumeration<ZipArchiveEntry> getEntries() {
1082        return Collections.enumeration(entries);
1083    }
1084
1085    /**
1086     * Gets all named entries in the same order they appear within the archive's central directory.
1087     *
1088     * @param name name of the entry.
1089     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
1090     * @since 1.6
1091     */
1092    public Iterable<ZipArchiveEntry> getEntries(final String name) {
1093        return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1094    }
1095
1096    /**
1097     * Gets all entries in physical order.
1098     * <p>
1099     * Entries will be returned in the same order their contents appear within the archive.
1100     * </p>
1101     *
1102     * @return all entries as {@link ZipArchiveEntry} instances
1103     *
1104     * @since 1.1
1105     */
1106    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
1107        final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY);
1108        return Collections.enumeration(Arrays.asList(sortByOffset(allEntries)));
1109    }
1110
1111    /**
1112     * Gets all named entries in the same order their contents appear within the archive.
1113     *
1114     * @param name name of the entry.
1115     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
1116     * @since 1.6
1117     */
1118    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
1119        final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1120        return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY)));
1121    }
1122
1123    /**
1124     * Gets a named entry or {@code null} if no entry by that name exists.
1125     * <p>
1126     * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned.
1127     * </p>
1128     *
1129     * @param name name of the entry.
1130     * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present.
1131     */
1132    public ZipArchiveEntry getEntry(final String name) {
1133        final LinkedList<ZipArchiveEntry> entries = nameMap.get(name);
1134        return entries != null ? entries.getFirst() : null;
1135    }
1136
1137    /**
1138     * Gets the offset of the first local file header in the file.
1139     *
1140     * @return the length of the content before the first local file header
1141     * @since 1.23
1142     */
1143    public long getFirstLocalFileHeaderOffset() {
1144        return firstLocalFileHeaderOffset;
1145    }
1146
1147    /**
1148     * Gets an InputStream for reading the contents of the given entry.
1149     *
1150     * @param entry the entry to get the stream for.
1151     * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}.
1152     * @throws IOException if unable to create an input stream from the zipEntry.
1153     */
1154    public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException {
1155        if (!(entry instanceof Entry)) {
1156            return null;
1157        }
1158        // cast validity is checked just above
1159        ZipUtil.checkRequestedFeatures(entry);
1160
1161        // doesn't get closed if the method is not supported - which
1162        // should never happen because of the checkRequestedFeatures
1163        // call above
1164        final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR
1165        switch (ZipMethod.getMethodByCode(entry.getMethod())) {
1166        case STORED:
1167            return new StoredStatisticsStream(is);
1168        case UNSHRINKING:
1169            return new UnshrinkingInputStream(is);
1170        case IMPLODING:
1171            try {
1172                return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(),
1173                        entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
1174            } catch (final IllegalArgumentException ex) {
1175                throw new IOException("bad IMPLODE data", ex);
1176            }
1177        case DEFLATED:
1178            final Inflater inflater = new Inflater(true);
1179            // Inflater with nowrap=true has this odd contract for a zero padding
1180            // byte following the data stream; this used to be zlib's requirement
1181            // and has been fixed a long time ago, but the contract persists so
1182            // we comply.
1183            // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
1184            return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) {
1185                @Override
1186                public void close() throws IOException {
1187                    try {
1188                        super.close();
1189                    } finally {
1190                        inflater.end();
1191                    }
1192                }
1193            };
1194        case BZIP2:
1195            return new BZip2CompressorInputStream(is);
1196        case ENHANCED_DEFLATED:
1197            return new Deflate64CompressorInputStream(is);
1198        case AES_ENCRYPTED:
1199        case EXPANDING_LEVEL_1:
1200        case EXPANDING_LEVEL_2:
1201        case EXPANDING_LEVEL_3:
1202        case EXPANDING_LEVEL_4:
1203        case JPEG:
1204        case LZMA:
1205        case PKWARE_IMPLODING:
1206        case PPMD:
1207        case TOKENIZATION:
1208        case UNKNOWN:
1209        case WAVPACK:
1210        case XZ:
1211        default:
1212            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry);
1213        }
1214    }
1215
1216    /**
1217     * Gets the raw stream of the archive entry (compressed form).
1218     * <p>
1219     * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else.
1220     * </p>
1221     * <p>
1222     * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was
1223     * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason.
1224     * </p>
1225     *
1226     * @param entry The entry to get the stream for
1227     * @return The raw input stream containing (possibly) compressed data.
1228     * @since 1.11
1229     * @throws IOException if there is a problem reading data offset (added in version 1.22).
1230     */
1231    public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException {
1232        if (!(entry instanceof Entry)) {
1233            return null;
1234        }
1235        final long start = getDataOffset(entry);
1236        if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
1237            return null;
1238        }
1239        return createBoundedInputStream(start, entry.getCompressedSize());
1240    }
1241
1242    /**
1243     * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null.
1244     * <p>
1245     * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile.
1246     * </p>
1247     *
1248     * @param entry ZipArchiveEntry object that represents the symbolic link
1249     * @return entry's content as a String
1250     * @throws IOException problem with content's input stream
1251     * @since 1.5
1252     */
1253    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
1254        if (entry != null && entry.isUnixSymlink()) {
1255            try (InputStream in = getInputStream(entry)) {
1256                return zipEncoding.decode(org.apache.commons.io.IOUtils.toByteArray(in));
1257            }
1258        }
1259        return null;
1260    }
1261
1262    /**
1263     * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances.
1264     * <p>
1265     * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or
1266     * additional data to be read.
1267     * </p>
1268     *
1269     * @return a map of zip entries that didn't have the language encoding flag set when read.
1270     */
1271    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException {
1272        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>();
1273
1274        positionAtCentralDirectory();
1275        centralDirectoryStartOffset = archive.position();
1276
1277        wordBbuf.rewind();
1278        IOUtils.readFully(archive, wordBbuf);
1279        long sig = ZipLong.getValue(wordBuf);
1280
1281        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
1282            throw new IOException("Central directory is empty, can't expand" + " corrupt archive.");
1283        }
1284
1285        while (sig == CFH_SIG) {
1286            readCentralDirectoryEntry(noUTF8Flag);
1287            wordBbuf.rewind();
1288            IOUtils.readFully(archive, wordBbuf);
1289            sig = ZipLong.getValue(wordBuf);
1290        }
1291        return noUTF8Flag;
1292    }
1293
1294    /**
1295     * Searches for either the &quot;Zip64 end of central directory locator&quot; or the &quot;End of central dir record&quot;, parses it and positions the
1296     * stream at the first central directory record.
1297     */
1298    private void positionAtCentralDirectory() throws IOException {
1299        final boolean is64 = positionAtEndOfCentralDirectoryRecord(archive);
1300        if (!is64) {
1301            positionAtCentralDirectory32();
1302        } else {
1303            positionAtCentralDirectory64();
1304        }
1305    }
1306
1307    /**
1308     * Parses the &quot;End of central dir record&quot; and positions the stream at the first central directory record.
1309     *
1310     * Expects stream to be positioned at the beginning of the &quot;End of central dir record&quot;.
1311     */
1312    private void positionAtCentralDirectory32() throws IOException {
1313        final long endOfCentralDirectoryRecordOffset = archive.position();
1314        if (isSplitZipArchive) {
1315            skipBytes(CFD_DISK_OFFSET);
1316            shortBbuf.rewind();
1317            IOUtils.readFully(archive, shortBbuf);
1318            centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1319
1320            skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1321
1322            wordBbuf.rewind();
1323            IOUtils.readFully(archive, wordBbuf);
1324            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1325            ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1326        } else {
1327            skipBytes(CFD_LENGTH_OFFSET);
1328            wordBbuf.rewind();
1329            IOUtils.readFully(archive, wordBbuf);
1330            final long centralDirectoryLength = ZipLong.getValue(wordBuf);
1331
1332            wordBbuf.rewind();
1333            IOUtils.readFully(archive, wordBbuf);
1334            centralDirectoryStartDiskNumber = 0;
1335            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1336
1337            firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L);
1338            archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset);
1339        }
1340    }
1341
1342    /**
1343     * Parses the &quot;Zip64 end of central directory locator&quot;, finds the &quot;Zip64 end of central directory record&quot; using the parsed information,
1344     * parses that and positions the stream at the first central directory record.
1345     *
1346     * Expects stream to be positioned right behind the &quot;Zip64 end of central directory locator&quot;'s signature.
1347     */
1348    private void positionAtCentralDirectory64() throws IOException {
1349        skipBytes(ZipConstants.WORD);
1350        if (isSplitZipArchive) {
1351            wordBbuf.rewind();
1352            IOUtils.readFully(archive, wordBbuf);
1353            final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1354
1355            dwordBbuf.rewind();
1356            IOUtils.readFully(archive, dwordBbuf);
1357            final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1358            ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1359        } else {
1360            skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1361            dwordBbuf.rewind();
1362            IOUtils.readFully(archive, dwordBbuf);
1363            archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1364        }
1365
1366        wordBbuf.rewind();
1367        IOUtils.readFully(archive, wordBbuf);
1368        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1369            throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt.");
1370        }
1371
1372        if (isSplitZipArchive) {
1373            skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */);
1374            wordBbuf.rewind();
1375            IOUtils.readFully(archive, wordBbuf);
1376            centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1377
1378            skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1379
1380            dwordBbuf.rewind();
1381            IOUtils.readFully(archive, dwordBbuf);
1382            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1383            ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1384        } else {
1385            skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1386            dwordBbuf.rewind();
1387            IOUtils.readFully(archive, dwordBbuf);
1388            centralDirectoryStartDiskNumber = 0;
1389            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1390            archive.position(centralDirectoryStartRelativeOffset);
1391        }
1392    }
1393
1394    /**
1395     * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps.
1396     *
1397     * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header
1398     *                   later. The current entry may be added to this map.
1399     */
1400    private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException {
1401        cfhBbuf.rewind();
1402        IOUtils.readFully(archive, cfhBbuf);
1403        int off = 0;
1404        final Entry ze = new Entry();
1405
1406        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
1407        off += ZipConstants.SHORT;
1408        ze.setVersionMadeBy(versionMadeBy);
1409        ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK);
1410
1411        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
1412        off += ZipConstants.SHORT; // version required
1413
1414        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
1415        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
1416        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding;
1417        if (hasUTF8Flag) {
1418            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
1419        }
1420        ze.setGeneralPurposeBit(gpFlag);
1421        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
1422
1423        off += ZipConstants.SHORT;
1424
1425        // noinspection MagicConstant
1426        ze.setMethod(ZipShort.getValue(cfhBuf, off));
1427        off += ZipConstants.SHORT;
1428
1429        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
1430        ze.setTime(time);
1431        off += ZipConstants.WORD;
1432
1433        ze.setCrc(ZipLong.getValue(cfhBuf, off));
1434        off += ZipConstants.WORD;
1435
1436        long size = ZipLong.getValue(cfhBuf, off);
1437        if (size < 0) {
1438            throw new IOException("broken archive, entry with negative compressed size");
1439        }
1440        ze.setCompressedSize(size);
1441        off += ZipConstants.WORD;
1442
1443        size = ZipLong.getValue(cfhBuf, off);
1444        if (size < 0) {
1445            throw new IOException("broken archive, entry with negative size");
1446        }
1447        ze.setSize(size);
1448        off += ZipConstants.WORD;
1449
1450        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
1451        off += ZipConstants.SHORT;
1452        if (fileNameLen < 0) {
1453            throw new IOException("broken archive, entry with negative fileNameLen");
1454        }
1455
1456        final int extraLen = ZipShort.getValue(cfhBuf, off);
1457        off += ZipConstants.SHORT;
1458        if (extraLen < 0) {
1459            throw new IOException("broken archive, entry with negative extraLen");
1460        }
1461
1462        final int commentLen = ZipShort.getValue(cfhBuf, off);
1463        off += ZipConstants.SHORT;
1464        if (commentLen < 0) {
1465            throw new IOException("broken archive, entry with negative commentLen");
1466        }
1467
1468        ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
1469        off += ZipConstants.SHORT;
1470
1471        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
1472        off += ZipConstants.SHORT;
1473
1474        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
1475        off += ZipConstants.WORD;
1476
1477        final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
1478        if (fileName.length < fileNameLen) {
1479            throw new EOFException();
1480        }
1481        ze.setName(entryEncoding.decode(fileName), fileName);
1482
1483        // LFH offset,
1484        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset);
1485        // data offset will be filled later
1486        entries.add(ze);
1487
1488        final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
1489        if (cdExtraData.length < extraLen) {
1490            throw new EOFException();
1491        }
1492        try {
1493            ze.setCentralDirectoryExtra(cdExtraData);
1494        } catch (final RuntimeException e) {
1495            final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1496            z.initCause(e);
1497            throw z;
1498        }
1499
1500        setSizesAndOffsetFromZip64Extra(ze);
1501        sanityCheckLFHOffset(ze);
1502
1503        final byte[] comment = IOUtils.readRange(archive, commentLen);
1504        if (comment.length < commentLen) {
1505            throw new EOFException();
1506        }
1507        ze.setComment(entryEncoding.decode(comment));
1508
1509        if (!hasUTF8Flag && useUnicodeExtraFields) {
1510            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
1511        }
1512
1513        ze.setStreamContiguous(true);
1514    }
1515
1516    /**
1517     * Walks through all recorded entries and adds the data available from the local file header.
1518     * <p>
1519     * Also records the offsets for the data to read from the entries.
1520     * </p>
1521     */
1522    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException {
1523        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1524            // entries are filled in populateFromCentralDirectory and never modified
1525            final Entry ze = (Entry) zipArchiveEntry;
1526            final int[] lens = setDataOffset(ze);
1527            final int fileNameLen = lens[0];
1528            final int extraFieldLen = lens[1];
1529            skipBytes(fileNameLen);
1530            final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1531            if (localExtraData.length < extraFieldLen) {
1532                throw new EOFException();
1533            }
1534            try {
1535                ze.setExtra(localExtraData);
1536            } catch (final RuntimeException e) {
1537                final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1538                z.initCause(e);
1539                throw z;
1540            }
1541
1542            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1543                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1544                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment);
1545            }
1546        }
1547    }
1548
1549    private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException {
1550        if (entry.getDiskNumberStart() < 0) {
1551            throw new IOException("broken archive, entry with negative disk number");
1552        }
1553        if (entry.getLocalHeaderOffset() < 0) {
1554            throw new IOException("broken archive, entry with negative local file header offset");
1555        }
1556        if (isSplitZipArchive) {
1557            if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
1558                throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory");
1559            }
1560            if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
1561                throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1562            }
1563        } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) {
1564            throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1565        }
1566    }
1567
1568    private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException {
1569        long offset = entry.getLocalHeaderOffset();
1570        if (isSplitZipArchive) {
1571            ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1572            // the offset should be updated to the global offset
1573            offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1574        } else {
1575            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1576        }
1577        wordBbuf.rewind();
1578        IOUtils.readFully(archive, wordBbuf);
1579        wordBbuf.flip();
1580        wordBbuf.get(shortBuf);
1581        final int fileNameLen = ZipShort.getValue(shortBuf);
1582        wordBbuf.get(shortBuf);
1583        final int extraFieldLen = ZipShort.getValue(shortBuf);
1584        entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen);
1585        if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) {
1586            throw new IOException("data for " + entry.getName() + " overlaps with central directory.");
1587        }
1588        return new int[] { fileNameLen, extraFieldLen };
1589    }
1590
1591    /**
1592     * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the
1593     * offset of the local file header.
1594     * <p>
1595     * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field
1596     * to create local header data even if they are never used - and here a field with only one size would be invalid.
1597     * </p>
1598     */
1599    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException {
1600        final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
1601        if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
1602            throw new ZipException("archive contains unparseable zip64 extra field");
1603        }
1604        final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra;
1605        if (z64 != null) {
1606            final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC;
1607            final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC;
1608            final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC;
1609            final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT;
1610            z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart);
1611
1612            if (hasUncompressedSize) {
1613                final long size = z64.getSize().getLongValue();
1614                if (size < 0) {
1615                    throw new IOException("broken archive, entry with negative size");
1616                }
1617                entry.setSize(size);
1618            } else if (hasCompressedSize) {
1619                z64.setSize(new ZipEightByteInteger(entry.getSize()));
1620            }
1621
1622            if (hasCompressedSize) {
1623                final long size = z64.getCompressedSize().getLongValue();
1624                if (size < 0) {
1625                    throw new IOException("broken archive, entry with negative compressed size");
1626                }
1627                entry.setCompressedSize(size);
1628            } else if (hasUncompressedSize) {
1629                z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize()));
1630            }
1631
1632            if (hasRelativeHeaderOffset) {
1633                entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1634            }
1635
1636            if (hasDiskStart) {
1637                entry.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1638            }
1639        }
1640    }
1641
1642    /**
1643     * Skips the given number of bytes or throws an EOFException if skipping failed.
1644     */
1645    private void skipBytes(final int count) throws IOException {
1646        final long currentPosition = archive.position();
1647        final long newPosition = currentPosition + count;
1648        if (newPosition > archive.size()) {
1649            throw new EOFException();
1650        }
1651        archive.position(newPosition);
1652    }
1653
1654    /**
1655     * Sorts entries in place by offset.
1656     *
1657     * @param allEntries entries to sort
1658     * @return the given entries, sorted.
1659     */
1660    private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) {
1661        Arrays.sort(allEntries, offsetComparator);
1662        return allEntries;
1663    }
1664
1665    /**
1666     * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive.
1667     */
1668    private boolean startsWithLocalFileHeader() throws IOException {
1669        archive.position(firstLocalFileHeaderOffset);
1670        wordBbuf.rewind();
1671        IOUtils.readFully(archive, wordBbuf);
1672        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1673    }
1674}