001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.security.AccessController;
026import java.security.PrivilegedAction;
027import java.util.Collections;
028import java.util.Locale;
029import java.util.ServiceLoader;
030import java.util.Set;
031import java.util.SortedMap;
032import java.util.TreeMap;
033
034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
042import org.apache.commons.compress.archivers.sevenz.SevenZFile;
043import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
044import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
045import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
046import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
047import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
048import org.apache.commons.compress.utils.IOUtils;
049import org.apache.commons.compress.utils.Sets;
050
051/**
052 * Factory to create Archive[In|Out]putStreams from names or the first bytes of the InputStream. In order to add other implementations, you should extend
053 * ArchiveStreamFactory and override the appropriate methods (and call their implementation from super of course).
054 *
055 * Compressing a ZIP-File:
056 *
057 * <pre>
058 * final OutputStream out = Files.newOutputStream(output.toPath());
059 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
060 *
061 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
062 * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
063 * os.closeArchiveEntry();
064 *
065 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
066 * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
067 * os.closeArchiveEntry();
068 * os.close();
069 * </pre>
070 *
071 * Decompressing a ZIP-File:
072 *
073 * <pre>
074 * final InputStream is = Files.newInputStream(input.toPath());
075 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
076 * ZipArchiveEntry entry = (ZipArchiveEntry) in.getNextEntry();
077 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
078 * IOUtils.copy(in, out);
079 * out.close();
080 * in.close();
081 * </pre>
082 *
083 * @Immutable provided that the deprecated method setEntryEncoding is not used.
084 * @ThreadSafe even if the deprecated method setEntryEncoding is used
085 */
086public class ArchiveStreamFactory implements ArchiveStreamProvider {
087
088    private static final int TAR_HEADER_SIZE = 512;
089
090    private static final int TAR_TEST_ENTRY_COUNT = 10;
091
092    private static final int DUMP_SIGNATURE_SIZE = 32;
093
094    private static final int SIGNATURE_SIZE = 12;
095
096    /**
097     * The singleton instance using the platform default encoding.
098     *
099     * @since 1.21
100     */
101    public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory();
102
103    /**
104     * Constant (value {@value}) used to identify the APK archive format.
105     * <p>
106     * APK file extensions are .apk, .xapk, .apks, .apkm
107     * </p>
108     *
109     * @since 1.22
110     */
111    public static final String APK = "apk";
112
113    /**
114     * Constant (value {@value}) used to identify the XAPK archive format.
115     * <p>
116     * APK file extensions are .apk, .xapk, .apks, .apkm
117     * </p>
118     *
119     * @since 1.22
120     */
121    public static final String XAPK = "xapk";
122
123    /**
124     * Constant (value {@value}) used to identify the APKS archive format.
125     * <p>
126     * APK file extensions are .apk, .xapk, .apks, .apkm
127     * </p>
128     *
129     * @since 1.22
130     */
131    public static final String APKS = "apks";
132
133    /**
134     * Constant (value {@value}) used to identify the APKM archive format.
135     * <p>
136     * APK file extensions are .apk, .xapk, .apks, .apkm
137     * </p>
138     *
139     * @since 1.22
140     */
141    public static final String APKM = "apkm";
142
143    /**
144     * Constant (value {@value}) used to identify the AR archive format.
145     *
146     * @since 1.1
147     */
148    public static final String AR = "ar";
149
150    /**
151     * Constant (value {@value}) used to identify the ARJ archive format. Not supported as an output stream type.
152     *
153     * @since 1.6
154     */
155    public static final String ARJ = "arj";
156
157    /**
158     * Constant (value {@value}) used to identify the CPIO archive format.
159     *
160     * @since 1.1
161     */
162    public static final String CPIO = "cpio";
163
164    /**
165     * Constant (value {@value}) used to identify the UNIX DUMP archive format. Not supported as an output stream type.
166     *
167     * @since 1.3
168     */
169    public static final String DUMP = "dump";
170
171    /**
172     * Constant (value {@value}) used to identify the JAR archive format.
173     *
174     * @since 1.1
175     */
176    public static final String JAR = "jar";
177
178    /**
179     * Constant used to identify the TAR archive format.
180     *
181     * @since 1.1
182     */
183    public static final String TAR = "tar";
184
185    /**
186     * Constant (value {@value}) used to identify the ZIP archive format.
187     *
188     * @since 1.1
189     */
190    public static final String ZIP = "zip";
191
192    /**
193     * Constant (value {@value}) used to identify the 7z archive format.
194     *
195     * @since 1.8
196     */
197    public static final String SEVEN_Z = "7z";
198
199    private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() {
200        return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader());
201    }
202
203    /**
204     * Try to determine the type of Archiver
205     *
206     * @param in input stream
207     * @return type of archiver if found
208     * @throws ArchiveException if an archiver cannot be detected in the stream
209     * @since 1.14
210     */
211    public static String detect(final InputStream in) throws ArchiveException {
212        if (in == null) {
213            throw new IllegalArgumentException("Stream must not be null.");
214        }
215
216        if (!in.markSupported()) {
217            throw new IllegalArgumentException("Mark is not supported.");
218        }
219
220        final byte[] signature = new byte[SIGNATURE_SIZE];
221        in.mark(signature.length);
222        int signatureLength = -1;
223        try {
224            signatureLength = IOUtils.readFully(in, signature);
225            in.reset();
226        } catch (final IOException e) {
227            throw new ArchiveException("IOException while reading signature.", e);
228        }
229
230        // For now JAR files are detected as ZIP files.
231        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
232            return ZIP;
233        }
234        // For now JAR files are detected as ZIP files.
235        if (JarArchiveInputStream.matches(signature, signatureLength)) {
236            return JAR;
237        }
238        if (ArArchiveInputStream.matches(signature, signatureLength)) {
239            return AR;
240        }
241        if (CpioArchiveInputStream.matches(signature, signatureLength)) {
242            return CPIO;
243        }
244        if (ArjArchiveInputStream.matches(signature, signatureLength)) {
245            return ARJ;
246        }
247        if (SevenZFile.matches(signature, signatureLength)) {
248            return SEVEN_Z;
249        }
250
251        // Dump needs a bigger buffer to check the signature;
252        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
253        in.mark(dumpsig.length);
254        try {
255            signatureLength = IOUtils.readFully(in, dumpsig);
256            in.reset();
257        } catch (final IOException e) {
258            throw new ArchiveException("IOException while reading dump signature", e);
259        }
260        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
261            return DUMP;
262        }
263
264        // Tar needs an even bigger buffer to check the signature; read the first block
265        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
266        in.mark(tarHeader.length);
267        try {
268            signatureLength = IOUtils.readFully(in, tarHeader);
269            in.reset();
270        } catch (final IOException e) {
271            throw new ArchiveException("IOException while reading tar signature", e);
272        }
273        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
274            return TAR;
275        }
276
277        // COMPRESS-117
278        if (signatureLength >= TAR_HEADER_SIZE) {
279            try (TarArchiveInputStream inputStream = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader))) {
280                // COMPRESS-191 - verify the header checksum
281                // COMPRESS-644 - do not allow zero byte file entries
282                TarArchiveEntry entry = inputStream.getNextEntry();
283                // try to find the first non-directory entry within the first 10 entries.
284                int count = 0;
285                while (entry != null && entry.isDirectory() && entry.isCheckSumOK() && count++ < TAR_TEST_ENTRY_COUNT) {
286                    entry = inputStream.getNextEntry();
287                }
288                if (entry != null && entry.isCheckSumOK() && !entry.isDirectory() && entry.getSize() > 0 || count > 0) {
289                    return TAR;
290                }
291            } catch (final Exception ignored) {
292                // can generate IllegalArgumentException as well as IOException auto-detection, simply not a TAR ignored
293            }
294        }
295        throw new ArchiveException("No Archiver found for the stream signature");
296    }
297
298    /**
299     * Constructs a new sorted map from input stream provider names to provider objects.
300     *
301     * <p>
302     * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
303     * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
304     * </p>
305     *
306     * <p>
307     * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
308     * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
309     * </p>
310     *
311     * <p>
312     * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
313     * </p>
314     *
315     * @return An immutable, map from names to provider objects
316     * @since 1.13
317     */
318    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
319        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
320            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
321            putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map);
322            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map));
323            return map;
324        });
325    }
326
327    /**
328     * Constructs a new sorted map from output stream provider names to provider objects.
329     *
330     * <p>
331     * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
332     * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
333     * </p>
334     *
335     * <p>
336     * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
337     * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
338     * </p>
339     *
340     * <p>
341     * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
342     * </p>
343     *
344     * @return An immutable, map from names to provider objects
345     * @since 1.13
346     */
347    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
348        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
349            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
350            putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map);
351            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map));
352            return map;
353        });
354    }
355
356    static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) {
357        names.forEach(name -> map.put(toKey(name), provider));
358    }
359
360    private static String toKey(final String name) {
361        return name.toUpperCase(Locale.ROOT);
362    }
363
364    /**
365     * Entry encoding, null for the default.
366     */
367    private volatile String entryEncoding;
368
369    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
370
371    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
372
373    /**
374     * Constructs an instance using the platform default encoding.
375     */
376    public ArchiveStreamFactory() {
377        this(null);
378    }
379
380    /**
381     * Constructs an instance using the specified encoding.
382     *
383     * @param entryEncoding the encoding to be used.
384     *
385     * @since 1.10
386     */
387    public ArchiveStreamFactory(final String entryEncoding) {
388        this.entryEncoding = entryEncoding;
389    }
390
391    /**
392     * Creates an archive input stream from an input stream, autodetecting the archive type from the first few bytes of the stream. The InputStream must support
393     * marks, like BufferedInputStream.
394     *
395     * @param <I> The {@link ArchiveInputStream} type.
396     * @param in  the input stream
397     * @return the archive input stream
398     * @throws ArchiveException               if the archiver name is not known
399     * @throws StreamingNotSupportedException if the format cannot be read from a stream
400     * @throws IllegalArgumentException       if the stream is null or does not support mark
401     */
402    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final InputStream in) throws ArchiveException {
403        return createArchiveInputStream(detect(in), in);
404    }
405
406    /**
407     * Creates an archive input stream from an archiver name and an input stream.
408     *
409     * @param <I>          The {@link ArchiveInputStream} type.
410     * @param archiverName the archive name, i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or
411     *                     {@value #SEVEN_Z}
412     * @param in           the input stream
413     * @return the archive input stream
414     * @throws ArchiveException               if the archiver name is not known
415     * @throws StreamingNotSupportedException if the format cannot be read from a stream
416     * @throws IllegalArgumentException       if the archiver name or stream is null
417     */
418    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in)
419            throws ArchiveException {
420        return createArchiveInputStream(archiverName, in, entryEncoding);
421    }
422
423    @SuppressWarnings("unchecked")
424    @Override
425    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in,
426            final String actualEncoding) throws ArchiveException {
427
428        if (archiverName == null) {
429            throw new IllegalArgumentException("Archiver name must not be null.");
430        }
431
432        if (in == null) {
433            throw new IllegalArgumentException("InputStream must not be null.");
434        }
435
436        if (AR.equalsIgnoreCase(archiverName)) {
437            return (I) new ArArchiveInputStream(in);
438        }
439        if (ARJ.equalsIgnoreCase(archiverName)) {
440            if (actualEncoding != null) {
441                return (I) new ArjArchiveInputStream(in, actualEncoding);
442            }
443            return (I) new ArjArchiveInputStream(in);
444        }
445        if (ZIP.equalsIgnoreCase(archiverName)) {
446            if (actualEncoding != null) {
447                return (I) new ZipArchiveInputStream(in, actualEncoding);
448            }
449            return (I) new ZipArchiveInputStream(in);
450        }
451        if (TAR.equalsIgnoreCase(archiverName)) {
452            if (actualEncoding != null) {
453                return (I) new TarArchiveInputStream(in, actualEncoding);
454            }
455            return (I) new TarArchiveInputStream(in);
456        }
457        if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) {
458            if (actualEncoding != null) {
459                return (I) new JarArchiveInputStream(in, actualEncoding);
460            }
461            return (I) new JarArchiveInputStream(in);
462        }
463        if (CPIO.equalsIgnoreCase(archiverName)) {
464            if (actualEncoding != null) {
465                return (I) new CpioArchiveInputStream(in, actualEncoding);
466            }
467            return (I) new CpioArchiveInputStream(in);
468        }
469        if (DUMP.equalsIgnoreCase(archiverName)) {
470            if (actualEncoding != null) {
471                return (I) new DumpArchiveInputStream(in, actualEncoding);
472            }
473            return (I) new DumpArchiveInputStream(in);
474        }
475        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
476            throw new StreamingNotSupportedException(SEVEN_Z);
477        }
478
479        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
480        if (archiveStreamProvider != null) {
481            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
482        }
483
484        throw new ArchiveException("Archiver: " + archiverName + " not found.");
485    }
486
487    /**
488     * Creates an archive output stream from an archiver name and an output stream.
489     *
490     * @param <O>          The {@link ArchiveOutputStream} type.
491     * @param archiverName the archive name, i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
492     * @param out          the output stream
493     * @return the archive output stream
494     * @throws ArchiveException               if the archiver name is not known
495     * @throws StreamingNotSupportedException if the format cannot be written to a stream
496     * @throws IllegalArgumentException       if the archiver name or stream is null
497     */
498    public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out)
499            throws ArchiveException {
500        return createArchiveOutputStream(archiverName, out, entryEncoding);
501    }
502
503    @SuppressWarnings("unchecked")
504    @Override
505    public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out,
506            final String actualEncoding) throws ArchiveException {
507        if (archiverName == null) {
508            throw new IllegalArgumentException("Archiver name must not be null.");
509        }
510        if (out == null) {
511            throw new IllegalArgumentException("OutputStream must not be null.");
512        }
513
514        if (AR.equalsIgnoreCase(archiverName)) {
515            return (O) new ArArchiveOutputStream(out);
516        }
517        if (ZIP.equalsIgnoreCase(archiverName)) {
518            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
519            if (actualEncoding != null) {
520                zip.setEncoding(actualEncoding);
521            }
522            return (O) zip;
523        }
524        if (TAR.equalsIgnoreCase(archiverName)) {
525            if (actualEncoding != null) {
526                return (O) new TarArchiveOutputStream(out, actualEncoding);
527            }
528            return (O) new TarArchiveOutputStream(out);
529        }
530        if (JAR.equalsIgnoreCase(archiverName)) {
531            if (actualEncoding != null) {
532                return (O) new JarArchiveOutputStream(out, actualEncoding);
533            }
534            return (O) new JarArchiveOutputStream(out);
535        }
536        if (CPIO.equalsIgnoreCase(archiverName)) {
537            if (actualEncoding != null) {
538                return (O) new CpioArchiveOutputStream(out, actualEncoding);
539            }
540            return (O) new CpioArchiveOutputStream(out);
541        }
542        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
543            throw new StreamingNotSupportedException(SEVEN_Z);
544        }
545
546        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
547        if (archiveStreamProvider != null) {
548            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
549        }
550
551        throw new ArchiveException("Archiver: " + archiverName + " not found.");
552    }
553
554    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
555        if (archiveInputStreamProviders == null) {
556            archiveInputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
557        }
558        return archiveInputStreamProviders;
559    }
560
561    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
562        if (archiveOutputStreamProviders == null) {
563            archiveOutputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
564        }
565        return archiveOutputStreamProviders;
566    }
567
568    /**
569     * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar files, or null for the archiver default.
570     *
571     * @return entry encoding, or null for the archiver default
572     * @since 1.5
573     */
574    public String getEntryEncoding() {
575        return entryEncoding;
576    }
577
578    @Override
579    public Set<String> getInputStreamArchiveNames() {
580        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
581    }
582
583    @Override
584    public Set<String> getOutputStreamArchiveNames() {
585        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
586    }
587
588    /**
589     * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default.
590     *
591     * @param entryEncoding the entry encoding, null uses the archiver default.
592     * @since 1.5
593     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
594     */
595    @Deprecated
596    public void setEntryEncoding(final String entryEncoding) {
597        this.entryEncoding = entryEncoding;
598    }
599
600}