View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers;
20  
21  import java.io.FilterInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.charset.Charset;
25  import java.util.Iterator;
26  import java.util.Objects;
27  
28  import org.apache.commons.io.Charsets;
29  import org.apache.commons.io.function.IOConsumer;
30  import org.apache.commons.io.function.IOIterator;
31  import org.apache.commons.io.input.NullInputStream;
32  
33  /**
34   * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF
35   * for the end of data in each entry as well as at the end of the file proper.
36   * <p>
37   * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
38   * </p>
39   * <p>
40   * The input stream classes must also implement a method with the signature:
41   * </p>
42   * <pre>
43   * public static boolean matches(byte[] signature, int length)
44   * </pre>
45   * <p>
46   * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
47   * </p>
48   *
49   * @param <E> The type of {@link ArchiveEntry} produced.
50   */
51  public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
52  
53      class ArchiveEntryIOIterator implements IOIterator<E> {
54  
55          private E next;
56  
57          @Override
58          public boolean hasNext() throws IOException {
59              if (next == null) {
60                  next = getNextEntry();
61              }
62              return next != null;
63          }
64  
65          @Override
66          public synchronized E next() throws IOException {
67              if (next != null) {
68                  final E e = next;
69                  next = null;
70                  return e;
71              }
72              return getNextEntry();
73          }
74  
75          /**
76           * Always returns null, this is a "native" IOIterator.
77           *
78           * @return null.
79           */
80          @Override
81          public Iterator<E> unwrap() {
82              return null;
83          }
84  
85      }
86  
87      private static final int BYTE_MASK = 0xFF;
88  
89      private final byte[] single = new byte[1];
90  
91      /** The number of bytes read in this stream */
92      private long bytesRead;
93  
94      private Charset charset;
95  
96      /**
97       * Constructs a new instance.
98       */
99      public ArchiveInputStream() {
100         this(NullInputStream.INSTANCE, Charset.defaultCharset());
101     }
102 
103     /**
104      * Constructs a new instance.
105      *
106      * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
107      * @param charset charset.
108      * @since 1.26.0
109      */
110     // This will be protected once subclasses use builders.
111     private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
112         super(inputStream);
113         this.charset = Charsets.toCharset(charset);
114     }
115 
116     /**
117      * Constructs a new instance.
118      *
119      * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
120      * @param charsetName charset name.
121      * @since 1.26.0
122      */
123     protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
124         this(inputStream, Charsets.toCharset(charsetName));
125     }
126 
127     /**
128      * Whether this stream is able to read the given entry.
129      * <p>
130      * Some archive formats support variants or details that are not supported (yet).
131      * </p>
132      *
133      * @param archiveEntry the entry to test
134      * @return This implementation always returns true.
135      *
136      * @since 1.1
137      */
138     public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
139         return true;
140     }
141 
142     /**
143      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
144      *
145      * @param read the number of bytes read
146      */
147     protected void count(final int read) {
148         count((long) read);
149     }
150 
151     /**
152      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
153      *
154      * @param read the number of bytes read
155      * @since 1.1
156      */
157     protected void count(final long read) {
158         if (read != -1) {
159             bytesRead += read;
160         }
161     }
162 
163     /**
164      * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
165      * in the order of iteration. Exceptions thrown by the action are relayed to the caller.
166      * <p>
167      * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
168      * has specified a concurrent modification policy.
169      * </p>
170      *
171      * @param action The action to be performed for each element
172      * @throws IOException          if an I/O error occurs.
173      * @throws NullPointerException if the specified action is null
174      * @since 2.17.0
175      */
176     public void forEach(final IOConsumer<? super E> action) throws IOException {
177         iterator().forEachRemaining(Objects.requireNonNull(action));
178     }
179 
180     /**
181      * Gets the current number of bytes read from this stream.
182      *
183      * @return the number of read bytes
184      * @since 1.1
185      */
186     public long getBytesRead() {
187         return bytesRead;
188     }
189 
190     /**
191      * Gets the Charest.
192      *
193      * @return the Charest.
194      */
195     public Charset getCharset() {
196         return charset;
197     }
198 
199     /**
200      * Gets the current number of bytes read from this stream.
201      *
202      * @return the number of read bytes
203      * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
204      */
205     @Deprecated
206     public int getCount() {
207         return (int) bytesRead;
208     }
209 
210     /**
211      * Gets the next Archive Entry in this Stream.
212      *
213      * @return the next entry, or {@code null} if there are no more entries.
214      * @throws IOException if the next entry could not be read.
215      */
216     public abstract E getNextEntry() throws IOException;
217 
218     public IOIterator<E> iterator() {
219         return new ArchiveEntryIOIterator();
220     }
221 
222     /**
223      * Does nothing.
224      *
225      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
226      *
227      * @param readlimit ignored.
228      */
229     @Override
230     public synchronized void mark(final int readlimit) {
231         // noop
232     }
233 
234     /**
235      * Always returns false.
236      *
237      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
238      *
239      * @return Always returns false.
240      */
241     @Override
242     public boolean markSupported() {
243         return false;
244     }
245 
246     /**
247      * Decrements the counter of already read bytes.
248      *
249      * @param pushedBack the number of bytes pushed back.
250      * @since 1.1
251      */
252     protected void pushedBackBytes(final long pushedBack) {
253         bytesRead -= pushedBack;
254     }
255 
256     /**
257      * Reads a byte of data. This method will block until enough input is available.
258      *
259      * Simply calls the {@link #read(byte[], int, int)} method.
260      *
261      * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
262      *
263      * @return the byte read, or -1 if end of input is reached
264      * @throws IOException if an I/O error has occurred
265      */
266     @Override
267     public int read() throws IOException {
268         final int num = read(single, 0, 1);
269         return num == -1 ? -1 : single[0] & BYTE_MASK;
270     }
271 
272     /**
273      * Does nothing.
274      *
275      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
276      *
277      * @throws IOException not thrown here but may be thrown from a subclass.
278      */
279     @Override
280     public synchronized void reset() throws IOException {
281         // noop
282     }
283 }