001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.FilterInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.charset.Charset; 025import java.util.Iterator; 026import java.util.Objects; 027 028import org.apache.commons.io.Charsets; 029import org.apache.commons.io.function.IOConsumer; 030import org.apache.commons.io.function.IOIterator; 031import org.apache.commons.io.input.NullInputStream; 032 033/** 034 * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF 035 * for the end of data in each entry as well as at the end of the file proper. 036 * <p> 037 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry. 038 * </p> 039 * <p> 040 * The input stream classes must also implement a method with the signature: 041 * </p> 042 * <pre> 043 * public static boolean matches(byte[] signature, int length) 044 * </pre> 045 * <p> 046 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream. 047 * </p> 048 * 049 * @param <E> The type of {@link ArchiveEntry} produced. 050 */ 051public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream { 052 053 class ArchiveEntryIOIterator implements IOIterator<E> { 054 055 private E next; 056 057 @Override 058 public boolean hasNext() throws IOException { 059 if (next == null) { 060 next = getNextEntry(); 061 } 062 return next != null; 063 } 064 065 @Override 066 public synchronized E next() throws IOException { 067 if (next != null) { 068 final E e = next; 069 next = null; 070 return e; 071 } 072 return getNextEntry(); 073 } 074 075 /** 076 * Always returns null, this is a "native" IOIterator. 077 * 078 * @return null. 079 */ 080 @Override 081 public Iterator<E> unwrap() { 082 return null; 083 } 084 085 } 086 087 private static final int BYTE_MASK = 0xFF; 088 089 private final byte[] single = new byte[1]; 090 091 /** The number of bytes read in this stream */ 092 private long bytesRead; 093 094 private Charset charset; 095 096 /** 097 * Constructs a new instance. 098 */ 099 public ArchiveInputStream() { 100 this(NullInputStream.INSTANCE, Charset.defaultCharset()); 101 } 102 103 /** 104 * Constructs a new instance. 105 * 106 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 107 * @param charset charset. 108 * @since 1.26.0 109 */ 110 // This will be protected once subclasses use builders. 111 private ArchiveInputStream(final InputStream inputStream, final Charset charset) { 112 super(inputStream); 113 this.charset = Charsets.toCharset(charset); 114 } 115 116 /** 117 * Constructs a new instance. 118 * 119 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 120 * @param charsetName charset name. 121 * @since 1.26.0 122 */ 123 protected ArchiveInputStream(final InputStream inputStream, final String charsetName) { 124 this(inputStream, Charsets.toCharset(charsetName)); 125 } 126 127 /** 128 * Whether this stream is able to read the given entry. 129 * <p> 130 * Some archive formats support variants or details that are not supported (yet). 131 * </p> 132 * 133 * @param archiveEntry the entry to test 134 * @return This implementation always returns true. 135 * 136 * @since 1.1 137 */ 138 public boolean canReadEntryData(final ArchiveEntry archiveEntry) { 139 return true; 140 } 141 142 /** 143 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 144 * 145 * @param read the number of bytes read 146 */ 147 protected void count(final int read) { 148 count((long) read); 149 } 150 151 /** 152 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 153 * 154 * @param read the number of bytes read 155 * @since 1.1 156 */ 157 protected void count(final long read) { 158 if (read != -1) { 159 bytesRead += read; 160 } 161 } 162 163 /** 164 * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed 165 * in the order of iteration. Exceptions thrown by the action are relayed to the caller. 166 * <p> 167 * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class 168 * has specified a concurrent modification policy. 169 * </p> 170 * 171 * @param action The action to be performed for each element 172 * @throws IOException if an I/O error occurs. 173 * @throws NullPointerException if the specified action is null 174 * @since 2.17.0 175 */ 176 public void forEach(final IOConsumer<? super E> action) throws IOException { 177 iterator().forEachRemaining(Objects.requireNonNull(action)); 178 } 179 180 /** 181 * Gets the current number of bytes read from this stream. 182 * 183 * @return the number of read bytes 184 * @since 1.1 185 */ 186 public long getBytesRead() { 187 return bytesRead; 188 } 189 190 /** 191 * Gets the Charest. 192 * 193 * @return the Charest. 194 */ 195 public Charset getCharset() { 196 return charset; 197 } 198 199 /** 200 * Gets the current number of bytes read from this stream. 201 * 202 * @return the number of read bytes 203 * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead. 204 */ 205 @Deprecated 206 public int getCount() { 207 return (int) bytesRead; 208 } 209 210 /** 211 * Gets the next Archive Entry in this Stream. 212 * 213 * @return the next entry, or {@code null} if there are no more entries. 214 * @throws IOException if the next entry could not be read. 215 */ 216 public abstract E getNextEntry() throws IOException; 217 218 public IOIterator<E> iterator() { 219 return new ArchiveEntryIOIterator(); 220 } 221 222 /** 223 * Does nothing. 224 * 225 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 226 * 227 * @param readlimit ignored. 228 */ 229 @Override 230 public synchronized void mark(final int readlimit) { 231 // noop 232 } 233 234 /** 235 * Always returns false. 236 * 237 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 238 * 239 * @return Always returns false. 240 */ 241 @Override 242 public boolean markSupported() { 243 return false; 244 } 245 246 /** 247 * Decrements the counter of already read bytes. 248 * 249 * @param pushedBack the number of bytes pushed back. 250 * @since 1.1 251 */ 252 protected void pushedBackBytes(final long pushedBack) { 253 bytesRead -= pushedBack; 254 } 255 256 /** 257 * Reads a byte of data. This method will block until enough input is available. 258 * 259 * Simply calls the {@link #read(byte[], int, int)} method. 260 * 261 * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise. 262 * 263 * @return the byte read, or -1 if end of input is reached 264 * @throws IOException if an I/O error has occurred 265 */ 266 @Override 267 public int read() throws IOException { 268 final int num = read(single, 0, 1); 269 return num == -1 ? -1 : single[0] & BYTE_MASK; 270 } 271 272 /** 273 * Does nothing. 274 * 275 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 276 * 277 * @throws IOException not thrown here but may be thrown from a subclass. 278 */ 279 @Override 280 public synchronized void reset() throws IOException { 281 // noop 282 } 283}