001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.xz;
020
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.apache.commons.compress.MemoryLimitException;
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.InputStreamStatistics;
027import org.apache.commons.io.input.BoundedInputStream;
028import org.tukaani.xz.SingleXZInputStream;
029import org.tukaani.xz.XZ;
030import org.tukaani.xz.XZInputStream;
031
032/**
033 * XZ decompressor.
034 *
035 * @since 1.4
036 */
037public class XZCompressorInputStream extends CompressorInputStream implements InputStreamStatistics {
038
039    /**
040     * Checks if the signature matches what is expected for a .xz file.
041     *
042     * @param signature the bytes to check
043     * @param length    the number of bytes to check
044     * @return true if signature matches the .xz magic bytes, false otherwise
045     */
046    public static boolean matches(final byte[] signature, final int length) {
047        if (length < XZ.HEADER_MAGIC.length) {
048            return false;
049        }
050
051        for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i) {
052            if (signature[i] != XZ.HEADER_MAGIC[i]) {
053                return false;
054            }
055        }
056
057        return true;
058    }
059
060    private final BoundedInputStream countingStream;
061
062    private final InputStream in;
063
064    /**
065     * Creates a new input stream that decompresses XZ-compressed data from the specified input stream. This doesn't support concatenated .xz files.
066     *
067     * @param inputStream where to read the compressed data
068     *
069     * @throws IOException if the input is not in the .xz format, the input is corrupt or truncated, the .xz headers specify options that are not supported by
070     *                     this implementation, or the underlying {@code inputStream} throws an exception
071     */
072    public XZCompressorInputStream(final InputStream inputStream) throws IOException {
073        this(inputStream, false);
074    }
075
076    /**
077     * Creates a new input stream that decompresses XZ-compressed data from the specified input stream.
078     *
079     * @param inputStream            where to read the compressed data
080     * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first .xz stream and leave the input position to
081     *                               point to the next byte after the .xz stream
082     *
083     * @throws IOException if the input is not in the .xz format, the input is corrupt or truncated, the .xz headers specify options that are not supported by
084     *                     this implementation, or the underlying {@code inputStream} throws an exception
085     */
086    public XZCompressorInputStream(final InputStream inputStream, final boolean decompressConcatenated) throws IOException {
087        this(inputStream, decompressConcatenated, -1);
088    }
089
090    /**
091     * Creates a new input stream that decompresses XZ-compressed data from the specified input stream.
092     *
093     * @param inputStream            where to read the compressed data
094     * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first .xz stream and leave the input position to
095     *                               point to the next byte after the .xz stream
096     * @param memoryLimitInKb        memory limit used when reading blocks. If the estimated memory limit is exceeded on {@link #read()}, a
097     *                               {@link MemoryLimitException} is thrown.
098     *
099     * @throws IOException if the input is not in the .xz format, the input is corrupt or truncated, the .xz headers specify options that are not supported by
100     *                     this implementation, or the underlying {@code inputStream} throws an exception
101     *
102     * @since 1.14
103     */
104    public XZCompressorInputStream(final InputStream inputStream, final boolean decompressConcatenated, final int memoryLimitInKb) throws IOException {
105        countingStream = BoundedInputStream.builder().setInputStream(inputStream).get();
106        if (decompressConcatenated) {
107            in = new XZInputStream(countingStream, memoryLimitInKb);
108        } else {
109            in = new SingleXZInputStream(countingStream, memoryLimitInKb);
110        }
111    }
112
113    @Override
114    public int available() throws IOException {
115        return in.available();
116    }
117
118    @Override
119    public void close() throws IOException {
120        in.close();
121    }
122
123    /**
124     * @since 1.17
125     */
126    @Override
127    public long getCompressedCount() {
128        return countingStream.getCount();
129    }
130
131    @Override
132    public int read() throws IOException {
133        try {
134            final int ret = in.read();
135            count(ret == -1 ? -1 : 1);
136            return ret;
137        } catch (final org.tukaani.xz.MemoryLimitException e) {
138            throw new MemoryLimitException(e.getMemoryNeeded(), e.getMemoryLimit(), e);
139        }
140    }
141
142    @Override
143    public int read(final byte[] buf, final int off, final int len) throws IOException {
144        if (len == 0) {
145            return 0;
146        }
147        try {
148            final int ret = in.read(buf, off, len);
149            count(ret);
150            return ret;
151        } catch (final org.tukaani.xz.MemoryLimitException e) {
152            // convert to commons-compress MemoryLimtException
153            throw new MemoryLimitException(e.getMemoryNeeded(), e.getMemoryLimit(), e);
154        }
155    }
156
157    @Override
158    public long skip(final long n) throws IOException {
159        try {
160            return org.apache.commons.io.IOUtils.skip(in, n);
161        } catch (final org.tukaani.xz.MemoryLimitException e) {
162            // convert to commons-compress MemoryLimtException
163            throw new MemoryLimitException(e.getMemoryNeeded(), e.getMemoryLimit(), e);
164        }
165    }
166}