ArchiveInputStream.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.archivers;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Objects;
import org.apache.commons.io.Charsets;
import org.apache.commons.io.function.IOConsumer;
import org.apache.commons.io.function.IOIterator;
import org.apache.commons.io.input.NullInputStream;
/**
* Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF
* for the end of data in each entry as well as at the end of the file proper.
* <p>
* The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
* </p>
* <p>
* The input stream classes must also implement a method with the signature:
* </p>
* <pre>
* public static boolean matches(byte[] signature, int length)
* </pre>
* <p>
* which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
* </p>
*
* @param <E> The type of {@link ArchiveEntry} produced.
*/
public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
class ArchiveEntryIOIterator implements IOIterator<E> {
private E next;
@Override
public boolean hasNext() throws IOException {
if (next == null) {
next = getNextEntry();
}
return next != null;
}
@Override
public synchronized E next() throws IOException {
if (next != null) {
final E e = next;
next = null;
return e;
}
return getNextEntry();
}
/**
* Always returns null, this is a "native" IOIterator.
*
* @return null.
*/
@Override
public Iterator<E> unwrap() {
return null;
}
}
private static final int BYTE_MASK = 0xFF;
private final byte[] single = new byte[1];
/** The number of bytes read in this stream */
private long bytesRead;
private Charset charset;
/**
* Constructs a new instance.
*/
public ArchiveInputStream() {
this(NullInputStream.INSTANCE, Charset.defaultCharset());
}
/**
* Constructs a new instance.
*
* @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
* @param charset charset.
* @since 1.26.0
*/
// This will be protected once subclasses use builders.
private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
super(inputStream);
this.charset = Charsets.toCharset(charset);
}
/**
* Constructs a new instance.
*
* @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
* @param charsetName charset name.
* @since 1.26.0
*/
protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
this(inputStream, Charsets.toCharset(charsetName));
}
/**
* Whether this stream is able to read the given entry.
* <p>
* Some archive formats support variants or details that are not supported (yet).
* </p>
*
* @param archiveEntry the entry to test
* @return This implementation always returns true.
*
* @since 1.1
*/
public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
return true;
}
/**
* Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
*
* @param read the number of bytes read
*/
protected void count(final int read) {
count((long) read);
}
/**
* Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
*
* @param read the number of bytes read
* @since 1.1
*/
protected void count(final long read) {
if (read != -1) {
bytesRead += read;
}
}
/**
* Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
* in the order of iteration. Exceptions thrown by the action are relayed to the caller.
* <p>
* The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
* has specified a concurrent modification policy.
* </p>
*
* @param action The action to be performed for each element
* @throws IOException if an I/O error occurs.
* @throws NullPointerException if the specified action is null
* @since 2.17.0
*/
public void forEach(final IOConsumer<? super E> action) throws IOException {
iterator().forEachRemaining(Objects.requireNonNull(action));
}
/**
* Gets the current number of bytes read from this stream.
*
* @return the number of read bytes
* @since 1.1
*/
public long getBytesRead() {
return bytesRead;
}
/**
* Gets the Charest.
*
* @return the Charest.
*/
public Charset getCharset() {
return charset;
}
/**
* Gets the current number of bytes read from this stream.
*
* @return the number of read bytes
* @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
*/
@Deprecated
public int getCount() {
return (int) bytesRead;
}
/**
* Gets the next Archive Entry in this Stream.
*
* @return the next entry, or {@code null} if there are no more entries.
* @throws IOException if the next entry could not be read.
*/
public abstract E getNextEntry() throws IOException;
public IOIterator<E> iterator() {
return new ArchiveEntryIOIterator();
}
/**
* Does nothing.
*
* TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
*
* @param readlimit ignored.
*/
@Override
public synchronized void mark(final int readlimit) {
// noop
}
/**
* Always returns false.
*
* TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
*
* @return Always returns false.
*/
@Override
public boolean markSupported() {
return false;
}
/**
* Decrements the counter of already read bytes.
*
* @param pushedBack the number of bytes pushed back.
* @since 1.1
*/
protected void pushedBackBytes(final long pushedBack) {
bytesRead -= pushedBack;
}
/**
* Reads a byte of data. This method will block until enough input is available.
*
* Simply calls the {@link #read(byte[], int, int)} method.
*
* MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
*
* @return the byte read, or -1 if end of input is reached
* @throws IOException if an I/O error has occurred
*/
@Override
public int read() throws IOException {
final int num = read(single, 0, 1);
return num == -1 ? -1 : single[0] & BYTE_MASK;
}
/**
* Does nothing.
*
* TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
*
* @throws IOException not thrown here but may be thrown from a subclass.
*/
@Override
public synchronized void reset() throws IOException {
// noop
}
}