001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.util.Objects; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.charset.CharsetEncoders; 037import org.apache.commons.io.function.Uncheck; 038 039/** 040 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer, 041 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset(). 042 * <p> 043 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 044 * </p> 045 * <p> 046 * To build an instance, use {@link Builder}. 047 * </p> 048 * 049 * @see Builder 050 * @since 2.2 051 */ 052public class CharSequenceInputStream extends InputStream { 053 054 //@formatter:off 055 /** 056 * Builds a new {@link CharSequenceInputStream}. 057 * 058 * <p> 059 * For example: 060 * </p> 061 * <h2>Using a Charset</h2> 062 * <pre>{@code 063 * CharSequenceInputStream s = CharSequenceInputStream.builder() 064 * .setBufferSize(8192) 065 * .setCharSequence("String") 066 * .setCharset(Charset.defaultCharset()) 067 * .get();} 068 * </pre> 069 * <h2>Using a CharsetEncoder</h2> 070 * <pre>{@code 071 * CharSequenceInputStream s = CharSequenceInputStream.builder() 072 * .setBufferSize(8192) 073 * .setCharSequence("String") 074 * .setCharsetEncoder(Charset.defaultCharset().newEncoder() 075 * .onMalformedInput(CodingErrorAction.REPLACE) 076 * .onUnmappableCharacter(CodingErrorAction.REPLACE)) 077 * .get();} 078 * </pre> 079 * 080 * @see #get() 081 * @since 2.13.0 082 */ 083 //@formatter:on 084 public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> { 085 086 private CharsetEncoder charsetEncoder = newEncoder(getCharset()); 087 088 /** 089 * Constructs a new builder of {@link CharSequenceInputStream}. 090 */ 091 public Builder() { 092 // empty 093 } 094 095 /** 096 * Builds a new {@link CharSequenceInputStream}. 097 * <p> 098 * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception. 099 * </p> 100 * <p> 101 * This builder uses the following aspects: 102 * </p> 103 * <ul> 104 * <li>{@link #getCharSequence()} gets the target aspect.</li> 105 * <li>{@link #getBufferSize()}</li> 106 * <li>{@link CharsetEncoder}</li> 107 * </ul> 108 * 109 * @return a new instance. 110 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 111 * @see #getUnchecked() 112 */ 113 @Override 114 public CharSequenceInputStream get() { 115 return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder)); 116 } 117 118 CharsetEncoder getCharsetEncoder() { 119 return charsetEncoder; 120 } 121 122 @Override 123 public Builder setCharset(final Charset charset) { 124 super.setCharset(charset); 125 charsetEncoder = newEncoder(getCharset()); 126 return this; 127 } 128 129 /** 130 * Sets the charset encoder. Assumes that the caller has configured the encoder. 131 * 132 * @param newEncoder the charset encoder. 133 * @return {@code this} instance. 134 * @since 2.13.0 135 */ 136 public Builder setCharsetEncoder(final CharsetEncoder newEncoder) { 137 charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault())); 138 super.setCharset(charsetEncoder.charset()); 139 return this; 140 } 141 142 } 143 144 private static final int NO_MARK = -1; 145 146 /** 147 * Constructs a new {@link Builder}. 148 * 149 * @return a new {@link Builder}. 150 * @since 2.12.0 151 */ 152 public static Builder builder() { 153 return new Builder(); 154 } 155 156 private static CharsetEncoder newEncoder(final Charset charset) { 157 // @formatter:off 158 return Charsets.toCharset(charset).newEncoder() 159 .onMalformedInput(CodingErrorAction.REPLACE) 160 .onUnmappableCharacter(CodingErrorAction.REPLACE); 161 // @formatter:on 162 } 163 164 private final ByteBuffer bBuf; 165 private int bBufMark; // position in bBuf 166 private final CharBuffer cBuf; 167 private int cBufMark; // position in cBuf 168 private final CharsetEncoder charsetEncoder; 169 170 /** 171 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 172 * 173 * @param cs the input character sequence. 174 * @param charset the character set name to use. 175 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 176 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 177 */ 178 @Deprecated 179 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 180 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 181 } 182 183 /** 184 * Constructs a new instance. 185 * 186 * @param cs the input character sequence. 187 * @param charset the character set name to use, null maps to the default Charset. 188 * @param bufferSize the buffer size to use. 189 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 190 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 191 */ 192 @Deprecated 193 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 194 // @formatter:off 195 this(cs, bufferSize, newEncoder(charset)); 196 // @formatter:on 197 } 198 199 private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) { 200 this.charsetEncoder = charsetEncoder; 201 // Ensure that buffer is long enough to hold a complete character 202 this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize)); 203 this.bBuf.flip(); 204 this.cBuf = CharBuffer.wrap(cs); 205 this.cBufMark = NO_MARK; 206 this.bBufMark = NO_MARK; 207 try { 208 fillBuffer(); 209 } catch (final CharacterCodingException ex) { 210 // Reset everything without filling the buffer 211 // so the same exception can be thrown again later. 212 this.bBuf.clear(); 213 this.bBuf.flip(); 214 this.cBuf.rewind(); 215 } 216 } 217 218 /** 219 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 220 * 221 * @param cs the input character sequence. 222 * @param charset the character set name to use. 223 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 224 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 225 */ 226 @Deprecated 227 public CharSequenceInputStream(final CharSequence cs, final String charset) { 228 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 229 } 230 231 /** 232 * Constructs a new instance. 233 * 234 * @param cs the input character sequence. 235 * @param charset the character set name to use, null maps to the default Charset. 236 * @param bufferSize the buffer size to use. 237 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 238 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 239 */ 240 @Deprecated 241 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 242 this(cs, Charsets.toCharset(charset), bufferSize); 243 } 244 245 /** 246 * Gets a lower bound on the number of bytes remaining in the byte stream. 247 * 248 * @return the count of bytes that can be read without blocking (or returning EOF). 249 * @throws IOException if an error occurs (probably not possible). 250 */ 251 @Override 252 public int available() throws IOException { 253 return this.bBuf.remaining(); 254 } 255 256 @Override 257 public void close() throws IOException { 258 bBuf.position(bBuf.limit()); 259 } 260 261 /** 262 * Fills the byte output buffer from the input char buffer. 263 * 264 * @throws CharacterCodingException 265 * an error encoding data. 266 */ 267 private void fillBuffer() throws CharacterCodingException { 268 this.bBuf.compact(); 269 final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true); 270 if (result.isError()) { 271 result.throwException(); 272 } 273 this.bBuf.flip(); 274 } 275 276 /** 277 * Gets the CharsetEncoder. 278 * 279 * @return the CharsetEncoder. 280 */ 281 CharsetEncoder getCharsetEncoder() { 282 return charsetEncoder; 283 } 284 285 /** 286 * {@inheritDoc} 287 * @param readLimit max read limit (ignored). 288 */ 289 @Override 290 public synchronized void mark(final int readLimit) { 291 this.cBufMark = this.cBuf.position(); 292 this.bBufMark = this.bBuf.position(); 293 this.cBuf.mark(); 294 this.bBuf.mark(); 295 // It would be nice to be able to use mark & reset on the cBuf and bBuf; 296 // however the bBuf is re-used so that won't work 297 } 298 299 @Override 300 public boolean markSupported() { 301 return true; 302 } 303 304 @Override 305 public int read() throws IOException { 306 for (;;) { 307 if (this.bBuf.hasRemaining()) { 308 return this.bBuf.get() & 0xFF; 309 } 310 fillBuffer(); 311 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 312 return EOF; 313 } 314 } 315 } 316 317 @Override 318 public int read(final byte[] b) throws IOException { 319 return read(b, 0, b.length); 320 } 321 322 @Override 323 public int read(final byte[] array, int off, int len) throws IOException { 324 Objects.requireNonNull(array, "array"); 325 if (len < 0 || off + len > array.length) { 326 throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len); 327 } 328 if (len == 0) { 329 return 0; // must return 0 for zero length read 330 } 331 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 332 return EOF; 333 } 334 int bytesRead = 0; 335 while (len > 0) { 336 if (this.bBuf.hasRemaining()) { 337 final int chunk = Math.min(this.bBuf.remaining(), len); 338 this.bBuf.get(array, off, chunk); 339 off += chunk; 340 len -= chunk; 341 bytesRead += chunk; 342 } else { 343 fillBuffer(); 344 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 345 break; 346 } 347 } 348 } 349 return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead; 350 } 351 352 @Override 353 public synchronized void reset() throws IOException { 354 // 355 // This is not the most efficient implementation, as it re-encodes from the beginning. 356 // 357 // Since the bBuf is re-used, in general it's necessary to re-encode the data. 358 // 359 // It should be possible to apply some optimizations however: 360 // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since 361 // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 362 // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to 363 // restart from there. 364 // 365 if (this.cBufMark != NO_MARK) { 366 // if cBuf is at 0, we have not started reading anything, so skip re-encoding 367 if (this.cBuf.position() != 0) { 368 this.charsetEncoder.reset(); 369 this.cBuf.rewind(); 370 this.bBuf.rewind(); 371 this.bBuf.limit(0); // rewind does not clear the buffer 372 while (this.cBuf.position() < this.cBufMark) { 373 this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 374 this.bBuf.limit(0); 375 fillBuffer(); 376 } 377 } 378 if (this.cBuf.position() != this.cBufMark) { 379 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " + 380 "expected=" + this.cBufMark); 381 } 382 this.bBuf.position(this.bBufMark); 383 this.cBufMark = NO_MARK; 384 this.bBufMark = NO_MARK; 385 } 386 mark(0); 387 } 388 389 @Override 390 public long skip(long n) throws IOException { 391 // 392 // This could be made more efficient by using position to skip within the current buffer. 393 // 394 long skipped = 0; 395 while (n > 0 && available() > 0) { 396 this.read(); 397 n--; 398 skipped++; 399 } 400 return skipped; 401 } 402 403}