1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.commons.compress.archivers; 20 21 import java.io.FilterInputStream; 22 import java.io.IOException; 23 import java.io.InputStream; 24 import java.nio.charset.Charset; 25 import java.util.Iterator; 26 import java.util.Objects; 27 28 import org.apache.commons.io.Charsets; 29 import org.apache.commons.io.function.IOConsumer; 30 import org.apache.commons.io.function.IOIterator; 31 import org.apache.commons.io.input.NullInputStream; 32 33 /** 34 * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF 35 * for the end of data in each entry as well as at the end of the file proper. 36 * <p> 37 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry. 38 * </p> 39 * <p> 40 * The input stream classes must also implement a method with the signature: 41 * </p> 42 * <pre> 43 * public static boolean matches(byte[] signature, int length) 44 * </pre> 45 * <p> 46 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream. 47 * </p> 48 * 49 * @param <E> The type of {@link ArchiveEntry} produced. 50 */ 51 public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream { 52 53 class ArchiveEntryIOIterator implements IOIterator<E> { 54 55 private E next; 56 57 @Override 58 public boolean hasNext() throws IOException { 59 if (next == null) { 60 next = getNextEntry(); 61 } 62 return next != null; 63 } 64 65 @Override 66 public synchronized E next() throws IOException { 67 if (next != null) { 68 final E e = next; 69 next = null; 70 return e; 71 } 72 return getNextEntry(); 73 } 74 75 /** 76 * Always returns null, this is a "native" IOIterator. 77 * 78 * @return null. 79 */ 80 @Override 81 public Iterator<E> unwrap() { 82 return null; 83 } 84 85 } 86 87 private static final int BYTE_MASK = 0xFF; 88 89 private final byte[] single = new byte[1]; 90 91 /** The number of bytes read in this stream */ 92 private long bytesRead; 93 94 private Charset charset; 95 96 /** 97 * Constructs a new instance. 98 */ 99 public ArchiveInputStream() { 100 this(NullInputStream.INSTANCE, Charset.defaultCharset()); 101 } 102 103 /** 104 * Constructs a new instance. 105 * 106 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 107 * @param charset charset. 108 * @since 1.26.0 109 */ 110 // This will be protected once subclasses use builders. 111 private ArchiveInputStream(final InputStream inputStream, final Charset charset) { 112 super(inputStream); 113 this.charset = Charsets.toCharset(charset); 114 } 115 116 /** 117 * Constructs a new instance. 118 * 119 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 120 * @param charsetName charset name. 121 * @since 1.26.0 122 */ 123 protected ArchiveInputStream(final InputStream inputStream, final String charsetName) { 124 this(inputStream, Charsets.toCharset(charsetName)); 125 } 126 127 /** 128 * Whether this stream is able to read the given entry. 129 * <p> 130 * Some archive formats support variants or details that are not supported (yet). 131 * </p> 132 * 133 * @param archiveEntry the entry to test 134 * @return This implementation always returns true. 135 * 136 * @since 1.1 137 */ 138 public boolean canReadEntryData(final ArchiveEntry archiveEntry) { 139 return true; 140 } 141 142 /** 143 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 144 * 145 * @param read the number of bytes read 146 */ 147 protected void count(final int read) { 148 count((long) read); 149 } 150 151 /** 152 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 153 * 154 * @param read the number of bytes read 155 * @since 1.1 156 */ 157 protected void count(final long read) { 158 if (read != -1) { 159 bytesRead += read; 160 } 161 } 162 163 /** 164 * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed 165 * in the order of iteration. Exceptions thrown by the action are relayed to the caller. 166 * <p> 167 * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class 168 * has specified a concurrent modification policy. 169 * </p> 170 * 171 * @param action The action to be performed for each element 172 * @throws IOException if an I/O error occurs. 173 * @throws NullPointerException if the specified action is null 174 * @since 2.17.0 175 */ 176 public void forEach(final IOConsumer<? super E> action) throws IOException { 177 iterator().forEachRemaining(Objects.requireNonNull(action)); 178 } 179 180 /** 181 * Gets the current number of bytes read from this stream. 182 * 183 * @return the number of read bytes 184 * @since 1.1 185 */ 186 public long getBytesRead() { 187 return bytesRead; 188 } 189 190 /** 191 * Gets the Charest. 192 * 193 * @return the Charest. 194 */ 195 public Charset getCharset() { 196 return charset; 197 } 198 199 /** 200 * Gets the current number of bytes read from this stream. 201 * 202 * @return the number of read bytes 203 * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead. 204 */ 205 @Deprecated 206 public int getCount() { 207 return (int) bytesRead; 208 } 209 210 /** 211 * Gets the next Archive Entry in this Stream. 212 * 213 * @return the next entry, or {@code null} if there are no more entries. 214 * @throws IOException if the next entry could not be read. 215 */ 216 public abstract E getNextEntry() throws IOException; 217 218 public IOIterator<E> iterator() { 219 return new ArchiveEntryIOIterator(); 220 } 221 222 /** 223 * Does nothing. 224 * 225 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 226 * 227 * @param readlimit ignored. 228 */ 229 @Override 230 public synchronized void mark(final int readlimit) { 231 // noop 232 } 233 234 /** 235 * Always returns false. 236 * 237 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 238 * 239 * @return Always returns false. 240 */ 241 @Override 242 public boolean markSupported() { 243 return false; 244 } 245 246 /** 247 * Decrements the counter of already read bytes. 248 * 249 * @param pushedBack the number of bytes pushed back. 250 * @since 1.1 251 */ 252 protected void pushedBackBytes(final long pushedBack) { 253 bytesRead -= pushedBack; 254 } 255 256 /** 257 * Reads a byte of data. This method will block until enough input is available. 258 * 259 * Simply calls the {@link #read(byte[], int, int)} method. 260 * 261 * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise. 262 * 263 * @return the byte read, or -1 if end of input is reached 264 * @throws IOException if an I/O error has occurred 265 */ 266 @Override 267 public int read() throws IOException { 268 final int num = read(single, 0, 1); 269 return num == -1 ? -1 : single[0] & BYTE_MASK; 270 } 271 272 /** 273 * Does nothing. 274 * 275 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 276 * 277 * @throws IOException not thrown here but may be thrown from a subclass. 278 */ 279 @Override 280 public synchronized void reset() throws IOException { 281 // noop 282 } 283 }