001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.util.Objects; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.charset.CharsetEncoders; 037import org.apache.commons.io.function.Uncheck; 038 039/** 040 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer, 041 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset(). 042 * <p> 043 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 044 * </p> 045 * <p> 046 * To build an instance, use {@link Builder}. 047 * </p> 048 * 049 * @see Builder 050 * @since 2.2 051 */ 052public class CharSequenceInputStream extends InputStream { 053 054 //@formatter:off 055 /** 056 * Builds a new {@link CharSequenceInputStream}. 057 * 058 * <p> 059 * For example: 060 * </p> 061 * <h2>Using a Charset</h2> 062 * <pre>{@code 063 * CharSequenceInputStream s = CharSequenceInputStream.builder() 064 * .setBufferSize(8192) 065 * .setCharSequence("String") 066 * .setCharset(Charset.defaultCharset()) 067 * .get();} 068 * </pre> 069 * <h2>Using a CharsetEncoder</h2> 070 * <pre>{@code 071 * CharSequenceInputStream s = CharSequenceInputStream.builder() 072 * .setBufferSize(8192) 073 * .setCharSequence("String") 074 * .setCharsetEncoder(Charset.defaultCharset().newEncoder() 075 * .onMalformedInput(CodingErrorAction.REPLACE) 076 * .onUnmappableCharacter(CodingErrorAction.REPLACE)) 077 * .get();} 078 * </pre> 079 * 080 * @see #get() 081 * @since 2.13.0 082 */ 083 //@formatter:on 084 public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> { 085 086 private CharsetEncoder charsetEncoder = newEncoder(getCharset()); 087 088 /** 089 * Builds a new {@link CharSequenceInputStream}. 090 * <p> 091 * You must set input that supports {@link #getCharSequence()}, otherwise, this method throws an exception. 092 * </p> 093 * <p> 094 * This builder use the following aspects: 095 * </p> 096 * <ul> 097 * <li>{@link #getCharSequence()}</li> 098 * <li>{@link #getBufferSize()}</li> 099 * <li>{@link CharsetEncoder}</li> 100 * </ul> 101 * 102 * @return a new instance. 103 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 104 */ 105 @Override 106 public CharSequenceInputStream get() { 107 return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder)); 108 } 109 110 CharsetEncoder getCharsetEncoder() { 111 return charsetEncoder; 112 } 113 114 @Override 115 public Builder setCharset(final Charset charset) { 116 super.setCharset(charset); 117 charsetEncoder = newEncoder(getCharset()); 118 return this; 119 } 120 121 /** 122 * Sets the charset encoder. Assumes that the caller has configured the encoder. 123 * 124 * @param newEncoder the charset encoder. 125 * @return {@code this} instance. 126 * @since 2.13.0 127 */ 128 public Builder setCharsetEncoder(final CharsetEncoder newEncoder) { 129 charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault())); 130 super.setCharset(charsetEncoder.charset()); 131 return this; 132 } 133 134 } 135 136 private static final int NO_MARK = -1; 137 138 /** 139 * Constructs a new {@link Builder}. 140 * 141 * @return a new {@link Builder}. 142 * @since 2.12.0 143 */ 144 public static Builder builder() { 145 return new Builder(); 146 } 147 148 private static CharsetEncoder newEncoder(final Charset charset) { 149 // @formatter:off 150 return Charsets.toCharset(charset).newEncoder() 151 .onMalformedInput(CodingErrorAction.REPLACE) 152 .onUnmappableCharacter(CodingErrorAction.REPLACE); 153 // @formatter:on 154 } 155 156 private final ByteBuffer bBuf; 157 private int bBufMark; // position in bBuf 158 private final CharBuffer cBuf; 159 private int cBufMark; // position in cBuf 160 private final CharsetEncoder charsetEncoder; 161 162 /** 163 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 164 * 165 * @param cs the input character sequence. 166 * @param charset the character set name to use. 167 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 168 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 169 */ 170 @Deprecated 171 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 172 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 173 } 174 175 /** 176 * Constructs a new instance. 177 * 178 * @param cs the input character sequence. 179 * @param charset the character set name to use, null maps to the default Charset. 180 * @param bufferSize the buffer size to use. 181 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 182 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 183 */ 184 @Deprecated 185 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 186 // @formatter:off 187 this(cs, bufferSize, newEncoder(charset)); 188 // @formatter:on 189 } 190 191 private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) { 192 this.charsetEncoder = charsetEncoder; 193 // Ensure that buffer is long enough to hold a complete character 194 this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize)); 195 this.bBuf.flip(); 196 this.cBuf = CharBuffer.wrap(cs); 197 this.cBufMark = NO_MARK; 198 this.bBufMark = NO_MARK; 199 try { 200 fillBuffer(); 201 } catch (final CharacterCodingException ex) { 202 // Reset everything without filling the buffer 203 // so the same exception can be thrown again later. 204 this.bBuf.clear(); 205 this.bBuf.flip(); 206 this.cBuf.rewind(); 207 } 208 } 209 210 /** 211 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 212 * 213 * @param cs the input character sequence. 214 * @param charset the character set name to use. 215 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 216 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 217 */ 218 @Deprecated 219 public CharSequenceInputStream(final CharSequence cs, final String charset) { 220 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 221 } 222 223 /** 224 * Constructs a new instance. 225 * 226 * @param cs the input character sequence. 227 * @param charset the character set name to use, null maps to the default Charset. 228 * @param bufferSize the buffer size to use. 229 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 230 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 231 */ 232 @Deprecated 233 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 234 this(cs, Charsets.toCharset(charset), bufferSize); 235 } 236 237 /** 238 * Gets a lower bound on the number of bytes remaining in the byte stream. 239 * 240 * @return the count of bytes that can be read without blocking (or returning EOF). 241 * @throws IOException if an error occurs (probably not possible). 242 */ 243 @Override 244 public int available() throws IOException { 245 return this.bBuf.remaining(); 246 } 247 248 @Override 249 public void close() throws IOException { 250 bBuf.position(bBuf.limit()); 251 } 252 253 /** 254 * Fills the byte output buffer from the input char buffer. 255 * 256 * @throws CharacterCodingException 257 * an error encoding data. 258 */ 259 private void fillBuffer() throws CharacterCodingException { 260 this.bBuf.compact(); 261 final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true); 262 if (result.isError()) { 263 result.throwException(); 264 } 265 this.bBuf.flip(); 266 } 267 268 /** 269 * Gets the CharsetEncoder. 270 * 271 * @return the CharsetEncoder. 272 */ 273 CharsetEncoder getCharsetEncoder() { 274 return charsetEncoder; 275 } 276 277 /** 278 * {@inheritDoc} 279 * @param readLimit max read limit (ignored). 280 */ 281 @Override 282 public synchronized void mark(final int readLimit) { 283 this.cBufMark = this.cBuf.position(); 284 this.bBufMark = this.bBuf.position(); 285 this.cBuf.mark(); 286 this.bBuf.mark(); 287 // It would be nice to be able to use mark & reset on the cBuf and bBuf; 288 // however the bBuf is re-used so that won't work 289 } 290 291 @Override 292 public boolean markSupported() { 293 return true; 294 } 295 296 @Override 297 public int read() throws IOException { 298 for (;;) { 299 if (this.bBuf.hasRemaining()) { 300 return this.bBuf.get() & 0xFF; 301 } 302 fillBuffer(); 303 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 304 return EOF; 305 } 306 } 307 } 308 309 @Override 310 public int read(final byte[] b) throws IOException { 311 return read(b, 0, b.length); 312 } 313 314 @Override 315 public int read(final byte[] array, int off, int len) throws IOException { 316 Objects.requireNonNull(array, "array"); 317 if (len < 0 || off + len > array.length) { 318 throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len); 319 } 320 if (len == 0) { 321 return 0; // must return 0 for zero length read 322 } 323 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 324 return EOF; 325 } 326 int bytesRead = 0; 327 while (len > 0) { 328 if (this.bBuf.hasRemaining()) { 329 final int chunk = Math.min(this.bBuf.remaining(), len); 330 this.bBuf.get(array, off, chunk); 331 off += chunk; 332 len -= chunk; 333 bytesRead += chunk; 334 } else { 335 fillBuffer(); 336 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 337 break; 338 } 339 } 340 } 341 return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead; 342 } 343 344 @Override 345 public synchronized void reset() throws IOException { 346 // 347 // This is not the most efficient implementation, as it re-encodes from the beginning. 348 // 349 // Since the bBuf is re-used, in general it's necessary to re-encode the data. 350 // 351 // It should be possible to apply some optimizations however: 352 // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since 353 // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 354 // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to 355 // restart from there. 356 // 357 if (this.cBufMark != NO_MARK) { 358 // if cBuf is at 0, we have not started reading anything, so skip re-encoding 359 if (this.cBuf.position() != 0) { 360 this.charsetEncoder.reset(); 361 this.cBuf.rewind(); 362 this.bBuf.rewind(); 363 this.bBuf.limit(0); // rewind does not clear the buffer 364 while (this.cBuf.position() < this.cBufMark) { 365 this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 366 this.bBuf.limit(0); 367 fillBuffer(); 368 } 369 } 370 if (this.cBuf.position() != this.cBufMark) { 371 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " + 372 "expected=" + this.cBufMark); 373 } 374 this.bBuf.position(this.bBufMark); 375 this.cBufMark = NO_MARK; 376 this.bBufMark = NO_MARK; 377 } 378 mark(0); 379 } 380 381 @Override 382 public long skip(long n) throws IOException { 383 // 384 // This could be made more efficient by using position to skip within the current buffer. 385 // 386 long skipped = 0; 387 while (n > 0 && available() > 0) { 388 this.read(); 389 n--; 390 skipped++; 391 } 392 return skipped; 393 } 394 395}