ArchiveInputStream.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.commons.compress.archivers;

import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Objects;

import org.apache.commons.io.Charsets;
import org.apache.commons.io.function.IOConsumer;
import org.apache.commons.io.function.IOIterator;
import org.apache.commons.io.input.NullInputStream;

/**
 * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF
 * for the end of data in each entry as well as at the end of the file proper.
 * <p>
 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
 * </p>
 * <p>
 * The input stream classes must also implement a method with the signature:
 * </p>
 * <pre>
 * public static boolean matches(byte[] signature, int length)
 * </pre>
 * <p>
 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
 * </p>
 *
 * @param <E> The type of {@link ArchiveEntry} produced.
 */
public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {

    class ArchiveEntryIOIterator implements IOIterator<E> {

        private E next;

        @Override
        public boolean hasNext() throws IOException {
            if (next == null) {
                next = getNextEntry();
            }
            return next != null;
        }

        @Override
        public synchronized E next() throws IOException {
            if (next != null) {
                final E e = next;
                next = null;
                return e;
            }
            return getNextEntry();
        }

        /**
         * Always returns null, this is a "native" IOIterator.
         *
         * @return null.
         */
        @Override
        public Iterator<E> unwrap() {
            return null;
        }

    }

    private static final int BYTE_MASK = 0xFF;

    private final byte[] single = new byte[1];

    /** The number of bytes read in this stream */
    private long bytesRead;

    private Charset charset;

    /**
     * Constructs a new instance.
     */
    public ArchiveInputStream() {
        this(NullInputStream.INSTANCE, Charset.defaultCharset());
    }

    /**
     * Constructs a new instance.
     *
     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
     * @param charset charset.
     * @since 1.26.0
     */
    // This will be protected once subclasses use builders.
    private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
        super(inputStream);
        this.charset = Charsets.toCharset(charset);
    }

    /**
     * Constructs a new instance.
     *
     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
     * @param charsetName charset name.
     * @since 1.26.0
     */
    protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
        this(inputStream, Charsets.toCharset(charsetName));
    }

    /**
     * Whether this stream is able to read the given entry.
     * <p>
     * Some archive formats support variants or details that are not supported (yet).
     * </p>
     *
     * @param archiveEntry the entry to test
     * @return This implementation always returns true.
     *
     * @since 1.1
     */
    public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
        return true;
    }

    /**
     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
     *
     * @param read the number of bytes read
     */
    protected void count(final int read) {
        count((long) read);
    }

    /**
     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
     *
     * @param read the number of bytes read
     * @since 1.1
     */
    protected void count(final long read) {
        if (read != -1) {
            bytesRead += read;
        }
    }

    /**
     * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
     * in the order of iteration. Exceptions thrown by the action are relayed to the caller.
     * <p>
     * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
     * has specified a concurrent modification policy.
     * </p>
     *
     * @param action The action to be performed for each element
     * @throws IOException          if an I/O error occurs.
     * @throws NullPointerException if the specified action is null
     * @since 2.17.0
     */
    public void forEach(final IOConsumer<? super E> action) throws IOException {
        iterator().forEachRemaining(Objects.requireNonNull(action));
    }

    /**
     * Gets the current number of bytes read from this stream.
     *
     * @return the number of read bytes
     * @since 1.1
     */
    public long getBytesRead() {
        return bytesRead;
    }

    /**
     * Gets the Charest.
     *
     * @return the Charest.
     */
    public Charset getCharset() {
        return charset;
    }

    /**
     * Gets the current number of bytes read from this stream.
     *
     * @return the number of read bytes
     * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
     */
    @Deprecated
    public int getCount() {
        return (int) bytesRead;
    }

    /**
     * Gets the next Archive Entry in this Stream.
     *
     * @return the next entry, or {@code null} if there are no more entries.
     * @throws IOException if the next entry could not be read.
     */
    public abstract E getNextEntry() throws IOException;

    public IOIterator<E> iterator() {
        return new ArchiveEntryIOIterator();
    }

    /**
     * Does nothing.
     *
     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
     *
     * @param readlimit ignored.
     */
    @Override
    public synchronized void mark(final int readlimit) {
        // noop
    }

    /**
     * Always returns false.
     *
     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
     *
     * @return Always returns false.
     */
    @Override
    public boolean markSupported() {
        return false;
    }

    /**
     * Decrements the counter of already read bytes.
     *
     * @param pushedBack the number of bytes pushed back.
     * @since 1.1
     */
    protected void pushedBackBytes(final long pushedBack) {
        bytesRead -= pushedBack;
    }

    /**
     * Reads a byte of data. This method will block until enough input is available.
     *
     * Simply calls the {@link #read(byte[], int, int)} method.
     *
     * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
     *
     * @return the byte read, or -1 if end of input is reached
     * @throws IOException if an I/O error has occurred
     */
    @Override
    public int read() throws IOException {
        final int num = read(single, 0, 1);
        return num == -1 ? -1 : single[0] & BYTE_MASK;
    }

    /**
     * Does nothing.
     *
     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
     *
     * @throws IOException not thrown here but may be thrown from a subclass.
     */
    @Override
    public synchronized void reset() throws IOException {
        // noop
    }
}