001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.FilterInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.charset.Charset;
025import java.util.Iterator;
026import java.util.Objects;
027
028import org.apache.commons.io.Charsets;
029import org.apache.commons.io.function.IOConsumer;
030import org.apache.commons.io.function.IOIterator;
031import org.apache.commons.io.input.NullInputStream;
032
033/**
034 * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF
035 * for the end of data in each entry as well as at the end of the file proper.
036 * <p>
037 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
038 * </p>
039 * <p>
040 * The input stream classes must also implement a method with the signature:
041 * </p>
042 * <pre>
043 * public static boolean matches(byte[] signature, int length)
044 * </pre>
045 * <p>
046 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
047 * </p>
048 *
049 * @param <E> The type of {@link ArchiveEntry} produced.
050 */
051public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
052
053    class ArchiveEntryIOIterator implements IOIterator<E> {
054
055        private E next;
056
057        @Override
058        public boolean hasNext() throws IOException {
059            if (next == null) {
060                next = getNextEntry();
061            }
062            return next != null;
063        }
064
065        @Override
066        public synchronized E next() throws IOException {
067            if (next != null) {
068                final E e = next;
069                next = null;
070                return e;
071            }
072            return getNextEntry();
073        }
074
075        /**
076         * Always returns null, this is a "native" IOIterator.
077         *
078         * @return null.
079         */
080        @Override
081        public Iterator<E> unwrap() {
082            return null;
083        }
084
085    }
086
087    private static final int BYTE_MASK = 0xFF;
088
089    private final byte[] single = new byte[1];
090
091    /** The number of bytes read in this stream */
092    private long bytesRead;
093
094    private Charset charset;
095
096    /**
097     * Constructs a new instance.
098     */
099    public ArchiveInputStream() {
100        this(NullInputStream.INSTANCE, Charset.defaultCharset());
101    }
102
103    /**
104     * Constructs a new instance.
105     *
106     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
107     * @param charset charset.
108     * @since 1.26.0
109     */
110    // This will be protected once subclasses use builders.
111    private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
112        super(inputStream);
113        this.charset = Charsets.toCharset(charset);
114    }
115
116    /**
117     * Constructs a new instance.
118     *
119     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
120     * @param charsetName charset name.
121     * @since 1.26.0
122     */
123    protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
124        this(inputStream, Charsets.toCharset(charsetName));
125    }
126
127    /**
128     * Whether this stream is able to read the given entry.
129     * <p>
130     * Some archive formats support variants or details that are not supported (yet).
131     * </p>
132     *
133     * @param archiveEntry the entry to test
134     * @return This implementation always returns true.
135     *
136     * @since 1.1
137     */
138    public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
139        return true;
140    }
141
142    /**
143     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
144     *
145     * @param read the number of bytes read
146     */
147    protected void count(final int read) {
148        count((long) read);
149    }
150
151    /**
152     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
153     *
154     * @param read the number of bytes read
155     * @since 1.1
156     */
157    protected void count(final long read) {
158        if (read != -1) {
159            bytesRead += read;
160        }
161    }
162
163    /**
164     * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
165     * in the order of iteration. Exceptions thrown by the action are relayed to the caller.
166     * <p>
167     * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
168     * has specified a concurrent modification policy.
169     * </p>
170     *
171     * @param action The action to be performed for each element
172     * @throws IOException          if an I/O error occurs.
173     * @throws NullPointerException if the specified action is null
174     * @since 2.17.0
175     */
176    public void forEach(final IOConsumer<? super E> action) throws IOException {
177        iterator().forEachRemaining(Objects.requireNonNull(action));
178    }
179
180    /**
181     * Gets the current number of bytes read from this stream.
182     *
183     * @return the number of read bytes
184     * @since 1.1
185     */
186    public long getBytesRead() {
187        return bytesRead;
188    }
189
190    /**
191     * Gets the Charest.
192     *
193     * @return the Charest.
194     */
195    public Charset getCharset() {
196        return charset;
197    }
198
199    /**
200     * Gets the current number of bytes read from this stream.
201     *
202     * @return the number of read bytes
203     * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
204     */
205    @Deprecated
206    public int getCount() {
207        return (int) bytesRead;
208    }
209
210    /**
211     * Gets the next Archive Entry in this Stream.
212     *
213     * @return the next entry, or {@code null} if there are no more entries.
214     * @throws IOException if the next entry could not be read.
215     */
216    public abstract E getNextEntry() throws IOException;
217
218    public IOIterator<E> iterator() {
219        return new ArchiveEntryIOIterator();
220    }
221
222    /**
223     * Does nothing.
224     *
225     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
226     *
227     * @param readlimit ignored.
228     */
229    @Override
230    public synchronized void mark(final int readlimit) {
231        // noop
232    }
233
234    /**
235     * Always returns false.
236     *
237     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
238     *
239     * @return Always returns false.
240     */
241    @Override
242    public boolean markSupported() {
243        return false;
244    }
245
246    /**
247     * Decrements the counter of already read bytes.
248     *
249     * @param pushedBack the number of bytes pushed back.
250     * @since 1.1
251     */
252    protected void pushedBackBytes(final long pushedBack) {
253        bytesRead -= pushedBack;
254    }
255
256    /**
257     * Reads a byte of data. This method will block until enough input is available.
258     *
259     * Simply calls the {@link #read(byte[], int, int)} method.
260     *
261     * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
262     *
263     * @return the byte read, or -1 if end of input is reached
264     * @throws IOException if an I/O error has occurred
265     */
266    @Override
267    public int read() throws IOException {
268        final int num = read(single, 0, 1);
269        return num == -1 ? -1 : single[0] & BYTE_MASK;
270    }
271
272    /**
273     * Does nothing.
274     *
275     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
276     *
277     * @throws IOException not thrown here but may be thrown from a subclass.
278     */
279    @Override
280    public synchronized void reset() throws IOException {
281        // noop
282    }
283}