001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.CR; 021import static org.apache.commons.io.IOUtils.EOF; 022import static org.apache.commons.io.IOUtils.LF; 023 024import java.io.BufferedReader; 025import java.io.BufferedWriter; 026import java.io.IOException; 027import java.io.Reader; 028 029import org.apache.commons.io.IOUtils; 030 031/** 032 * Wraps an existing {@link Reader} and buffers the input <em>without any synchronization</em>. Expensive interaction with the underlying reader is minimized, 033 * since most (smaller) requests can be satisfied by accessing the buffer alone. The drawback is that some extra space is required to hold the buffer and that 034 * copying takes place when filling that buffer, but this is usually outweighed by the performance benefits. 035 * <p> 036 * A typical application pattern for the class looks like this: 037 * </p> 038 * 039 * <pre>{@code 040 * UnsynchronizedBufferedReader buf = new UnsynchronizedBufferedReader(new FileReader("file")); 041 * }</pre> 042 * <p> 043 * Provenance: Apache Harmony's java.io.BufferedReader, renamed, and modified. 044 * </p> 045 * 046 * @see BufferedReader 047 * @see BufferedWriter 048 * @since 2.17.0 049 */ 050public class UnsynchronizedBufferedReader extends UnsynchronizedReader { 051 052 private static final char NUL = '\0'; 053 054 private final Reader in; 055 056 /** 057 * The characters that can be read and refilled in bulk. We maintain three indices into this buffer: 058 * 059 * <pre> 060 * { X X X X X X X X X X X X - - } 061 * ^ ^ ^ 062 * | | | 063 * mark pos end 064 * </pre> 065 * <p> 066 * Pos points to the next readable character. End is one greater than the last readable character. When {@code pos == end}, the buffer is empty and must be 067 * {@link #fillBuf() filled} before characters can be read. 068 * </p> 069 * <p> 070 * Mark is the value pos will be set to on calls to {@link #reset()}. Its value is in the range {@code [0...pos]}. If the mark is {@code -1}, the buffer 071 * cannot be reset. 072 * </p> 073 * <p> 074 * MarkLimit limits the distance between the mark and the pos. When this limit is exceeded, {@link #reset()} is permitted (but not required) to throw an 075 * exception. For shorter distances, {@link #reset()} shall not throw (unless the reader is closed). 076 * </p> 077 */ 078 private char[] buf; 079 080 private int pos; 081 082 private int end; 083 084 private int mark = -1; 085 086 private int markLimit = -1; 087 088 /** 089 * Constructs a new BufferedReader on the Reader {@code in}. The buffer gets the default size (8 KB). 090 * 091 * @param in the Reader that is buffered. 092 */ 093 public UnsynchronizedBufferedReader(final Reader in) { 094 this(in, IOUtils.DEFAULT_BUFFER_SIZE); 095 } 096 097 /** 098 * Constructs a new BufferedReader on the Reader {@code in}. The buffer size is specified by the parameter {@code size}. 099 * 100 * @param in the Reader that is buffered. 101 * @param size the size of the buffer to allocate. 102 * @throws IllegalArgumentException if {@code size <= 0}. 103 */ 104 public UnsynchronizedBufferedReader(final Reader in, final int size) { 105 if (size <= 0) { 106 throw new IllegalArgumentException("size <= 0"); 107 } 108 this.in = in; 109 buf = new char[size]; 110 } 111 112 /** 113 * Peeks at the next input character, refilling the buffer if necessary. If this character is a newline character ("\n"), it is discarded. 114 */ 115 final void chompNewline() throws IOException { 116 if ((pos != end || fillBuf() != EOF) && buf[pos] == LF) { 117 pos++; 118 } 119 } 120 121 /** 122 * Closes this reader. This implementation closes the buffered source reader and releases the buffer. Nothing is done if this reader has already been 123 * closed. 124 * 125 * @throws IOException if an error occurs while closing this reader. 126 */ 127 @Override 128 public void close() throws IOException { 129 if (!isClosed()) { 130 in.close(); 131 buf = null; 132 super.close(); 133 } 134 } 135 136 /** 137 * Populates the buffer with data. It is an error to call this method when the buffer still contains data; ie. if {@code pos < end}. 138 * 139 * @return the number of bytes read into the buffer, or -1 if the end of the source stream has been reached. 140 */ 141 private int fillBuf() throws IOException { 142 // assert(pos == end); 143 144 if (mark == EOF || pos - mark >= markLimit) { 145 /* mark isn't set or has exceeded its limit. use the whole buffer */ 146 final int result = in.read(buf, 0, buf.length); 147 if (result > 0) { 148 mark = -1; 149 pos = 0; 150 end = result; 151 } 152 return result; 153 } 154 155 if (mark == 0 && markLimit > buf.length) { 156 /* the only way to make room when mark=0 is by growing the buffer */ 157 int newLength = buf.length * 2; 158 if (newLength > markLimit) { 159 newLength = markLimit; 160 } 161 final char[] newbuf = new char[newLength]; 162 System.arraycopy(buf, 0, newbuf, 0, buf.length); 163 buf = newbuf; 164 } else if (mark > 0) { 165 /* make room by shifting the buffered data to left mark positions */ 166 System.arraycopy(buf, mark, buf, 0, buf.length - mark); 167 pos -= mark; 168 end -= mark; 169 mark = 0; 170 } 171 172 /* Set the new position and mark position */ 173 final int count = in.read(buf, pos, buf.length - pos); 174 if (count != EOF) { 175 end += count; 176 } 177 return count; 178 } 179 180 /** 181 * Sets a mark position in this reader. The parameter {@code markLimit} indicates how many characters can be read before the mark is invalidated. Calling 182 * {@link #reset()} will reposition the reader back to the marked position if {@code markLimit} has not been surpassed. 183 * 184 * @param markLimit the number of characters that can be read before the mark is invalidated. 185 * @throws IllegalArgumentException if {@code markLimit < 0}. 186 * @throws IOException if an error occurs while setting a mark in this reader. 187 * @see #markSupported() 188 * @see #reset() 189 */ 190 @Override 191 public void mark(final int markLimit) throws IOException { 192 if (markLimit < 0) { 193 throw new IllegalArgumentException(); 194 } 195 checkOpen(); 196 this.markLimit = markLimit; 197 mark = pos; 198 } 199 200 /** 201 * Tests whether this reader supports the {@link #mark(int)} and {@link #reset()} methods. This implementation returns {@code true}. 202 * 203 * @return {@code true} for {@code BufferedReader}. 204 * @see #mark(int) 205 * @see #reset() 206 */ 207 @Override 208 public boolean markSupported() { 209 return true; 210 } 211 212 /** 213 * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will still return this value. 214 * 215 * @return the next character 216 * @throws IOException If an I/O error occurs 217 */ 218 public int peek() throws IOException { 219 mark(1); 220 final int c = read(); 221 reset(); 222 return c; 223 } 224 225 /** 226 * Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will 227 * still return the next value. 228 * 229 * @param buf the buffer to fill for the look ahead. 230 * @return the buffer itself 231 * @throws IOException If an I/O error occurs 232 */ 233 public int peek(final char[] buf) throws IOException { 234 final int n = buf.length; 235 mark(n); 236 final int c = read(buf, 0, n); 237 reset(); 238 return c; 239 } 240 241 /** 242 * Reads a single character from this reader and returns it with the two higher-order bytes set to 0. If possible, BufferedReader returns a character from 243 * the buffer. If there are no characters available in the buffer, it fills the buffer and then returns a character. It returns -1 if there are no more 244 * characters in the source reader. 245 * 246 * @return the character read or -1 if the end of the source reader has been reached. 247 * @throws IOException if this reader is closed or some other I/O error occurs. 248 */ 249 @Override 250 public int read() throws IOException { 251 checkOpen(); 252 /* Are there buffered characters available? */ 253 if (pos < end || fillBuf() != EOF) { 254 return buf[pos++]; 255 } 256 return EOF; 257 } 258 259 /** 260 * Reads at most {@code length} characters from this reader and stores them at {@code offset} in the character array {@code buffer}. Returns the number of 261 * characters actually read or -1 if the end of the source reader has been reached. If all the buffered characters have been used, a mark has not been set 262 * and the requested number of characters is larger than this readers buffer size, BufferedReader bypasses the buffer and simply places the results directly 263 * into {@code buffer}. 264 * 265 * @param buffer the character array to store the characters read. 266 * @param offset the initial position in {@code buffer} to store the bytes read from this reader. 267 * @param length the maximum number of characters to read, must be non-negative. 268 * @return number of characters read or -1 if the end of the source reader has been reached. 269 * @throws IndexOutOfBoundsException if {@code offset < 0} or {@code length < 0}, or if {@code offset + length} is greater than the size of {@code buffer}. 270 * @throws IOException if this reader is closed or some other I/O error occurs. 271 */ 272 @Override 273 public int read(final char[] buffer, int offset, final int length) throws IOException { 274 checkOpen(); 275 if (offset < 0 || offset > buffer.length - length || length < 0) { 276 throw new IndexOutOfBoundsException(); 277 } 278 int outstanding = length; 279 while (outstanding > 0) { 280 281 /* 282 * If there are bytes in the buffer, grab those first. 283 */ 284 final int available = end - pos; 285 if (available > 0) { 286 final int count = available >= outstanding ? outstanding : available; 287 System.arraycopy(buf, pos, buffer, offset, count); 288 pos += count; 289 offset += count; 290 outstanding -= count; 291 } 292 293 /* 294 * Before attempting to read from the underlying stream, make sure we really, really want to. We won't bother if we're done, or if we've already got 295 * some bytes and reading from the underlying stream would block. 296 */ 297 if (outstanding == 0 || outstanding < length && !in.ready()) { 298 break; 299 } 300 301 // assert(pos == end); 302 303 /* 304 * If we're unmarked and the requested size is greater than our buffer, read the bytes directly into the caller's buffer. We don't read into smaller 305 * buffers because that could result in a many reads. 306 */ 307 if ((mark == -1 || pos - mark >= markLimit) && outstanding >= buf.length) { 308 final int count = in.read(buffer, offset, outstanding); 309 if (count > 0) { 310 outstanding -= count; 311 mark = -1; 312 } 313 314 break; // assume the source stream gave us all that it could 315 } 316 317 if (fillBuf() == EOF) { 318 break; // source is exhausted 319 } 320 } 321 322 final int count = length - outstanding; 323 return count > 0 || count == length ? count : EOF; 324 } 325 326 /** 327 * Returns the next line of text available from this reader. A line is represented by zero or more characters followed by {@code LF}, {@code CR}, 328 * {@code "\r\n"} or the end of the reader. The string does not include the newline sequence. 329 * 330 * @return the contents of the line or {@code null} if no characters were read before the end of the reader has been reached. 331 * @throws IOException if this reader is closed or some other I/O error occurs. 332 */ 333 public String readLine() throws IOException { 334 checkOpen(); 335 /* has the underlying stream been exhausted? */ 336 if (pos == end && fillBuf() == EOF) { 337 return null; 338 } 339 for (int charPos = pos; charPos < end; charPos++) { 340 final char ch = buf[charPos]; 341 if (ch > CR) { 342 continue; 343 } 344 if (ch == LF) { 345 final String res = new String(buf, pos, charPos - pos); 346 pos = charPos + 1; 347 return res; 348 } 349 if (ch == CR) { 350 final String res = new String(buf, pos, charPos - pos); 351 pos = charPos + 1; 352 if ((pos < end || fillBuf() != EOF) && buf[pos] == LF) { 353 pos++; 354 } 355 return res; 356 } 357 } 358 359 char eol = NUL; 360 final StringBuilder result = new StringBuilder(80); 361 /* Typical Line Length */ 362 363 result.append(buf, pos, end - pos); 364 while (true) { 365 pos = end; 366 367 /* Are there buffered characters available? */ 368 if (eol == LF) { 369 return result.toString(); 370 } 371 // attempt to fill buffer 372 if (fillBuf() == EOF) { 373 // characters or null. 374 return result.length() > 0 || eol != NUL ? result.toString() : null; 375 } 376 for (int charPos = pos; charPos < end; charPos++) { 377 final char c = buf[charPos]; 378 if (eol != NUL) { 379 if (eol == CR && c == LF) { 380 if (charPos > pos) { 381 result.append(buf, pos, charPos - pos - 1); 382 } 383 pos = charPos + 1; 384 } else { 385 if (charPos > pos) { 386 result.append(buf, pos, charPos - pos - 1); 387 } 388 pos = charPos; 389 } 390 return result.toString(); 391 } 392 if (c == LF || c == CR) { 393 eol = c; 394 } 395 } 396 if (eol == NUL) { 397 result.append(buf, pos, end - pos); 398 } else { 399 result.append(buf, pos, end - pos - 1); 400 } 401 } 402 } 403 404 /** 405 * Tests whether this reader is ready to be read without blocking. 406 * 407 * @return {@code true} if this reader will not block when {@code read} is called, {@code false} if unknown or blocking will occur. 408 * @throws IOException if this reader is closed or some other I/O error occurs. 409 * @see #read() 410 * @see #read(char[], int, int) 411 * @see #readLine() 412 */ 413 @Override 414 public boolean ready() throws IOException { 415 checkOpen(); 416 return end - pos > 0 || in.ready(); 417 } 418 419 /** 420 * Resets this reader's position to the last {@code mark()} location. Invocations of {@code read()} and {@code skip()} will occur from this new location. 421 * 422 * @throws IOException if this reader is closed or no mark has been set. 423 * @see #mark(int) 424 * @see #markSupported() 425 */ 426 @Override 427 public void reset() throws IOException { 428 checkOpen(); 429 if (mark == -1) { 430 throw new IOException("mark == -1"); 431 } 432 pos = mark; 433 } 434 435 /** 436 * Skips {@code amount} characters in this reader. Subsequent {@code read()}s will not return these characters unless {@code reset()} is used. Skipping 437 * characters may invalidate a mark if {@code markLimit} is surpassed. 438 * 439 * @param amount the maximum number of characters to skip. 440 * @return the number of characters actually skipped. 441 * @throws IllegalArgumentException if {@code amount < 0}. 442 * @throws IOException if this reader is closed or some other I/O error occurs. 443 * @see #mark(int) 444 * @see #markSupported() 445 * @see #reset() 446 */ 447 @Override 448 public long skip(final long amount) throws IOException { 449 if (amount < 0) { 450 throw new IllegalArgumentException(); 451 } 452 checkOpen(); 453 if (amount < 1) { 454 return 0; 455 } 456 if (end - pos >= amount) { 457 pos += Math.toIntExact(amount); 458 return amount; 459 } 460 461 long read = end - pos; 462 pos = end; 463 while (read < amount) { 464 if (fillBuf() == EOF) { 465 return read; 466 } 467 if (end - pos >= amount - read) { 468 pos += Math.toIntExact(amount - read); 469 return amount; 470 } 471 // Couldn't get all the characters, skip what we read 472 read += end - pos; 473 pos = end; 474 } 475 return amount; 476 } 477 478}