001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.List; 035 036import org.apache.commons.io.Charsets; 037import org.apache.commons.io.FileSystem; 038import org.apache.commons.io.StandardLineSeparator; 039import org.apache.commons.io.build.AbstractStreamBuilder; 040 041/** 042 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files. 043 * <p> 044 * To build an instance, use {@link Builder}. 045 * </p> 046 * 047 * @see Builder 048 * @since 2.2 049 */ 050public class ReversedLinesFileReader implements Closeable { 051 052 // @formatter:off 053 /** 054 * Builds a new {@link ReversedLinesFileReader}. 055 * 056 * <p> 057 * For example: 058 * </p> 059 * <pre>{@code 060 * ReversedLinesFileReader r = ReversedLinesFileReader.builder() 061 * .setPath(path) 062 * .setBufferSize(4096) 063 * .setCharset(StandardCharsets.UTF_8) 064 * .get();} 065 * </pre> 066 * 067 * @see #get() 068 * @since 2.12.0 069 */ 070 // @formatter:on 071 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { 072 073 /** 074 * Constructs a new {@link Builder}. 075 */ 076 public Builder() { 077 setBufferSizeDefault(DEFAULT_BLOCK_SIZE); 078 setBufferSize(DEFAULT_BLOCK_SIZE); 079 } 080 081 /** 082 * Builds a new {@link ReversedLinesFileReader}. 083 * <p> 084 * You must set input that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception. 085 * </p> 086 * <p> 087 * This builder use the following aspects: 088 * </p> 089 * <ul> 090 * <li>{@link #getInputStream()}</li> 091 * <li>{@link #getBufferSize()}</li> 092 * <li>{@link #getCharset()}</li> 093 * </ul> 094 * 095 * @return a new instance. 096 * @throws IllegalStateException if the {@code origin} is {@code null}. 097 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}. 098 * @throws IOException if an I/O error occurs. 099 * @see #getPath() 100 * @see #getBufferSize() 101 * @see #getCharset() 102 */ 103 @Override 104 public ReversedLinesFileReader get() throws IOException { 105 return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset()); 106 } 107 108 } 109 110 private final class FilePart { 111 private final long no; 112 113 private final byte[] data; 114 115 private byte[] leftOver; 116 117 private int currentLastBytePos; 118 119 /** 120 * Constructs a new instance. 121 * 122 * @param no the part number 123 * @param length its length 124 * @param leftOverOfLastFilePart remainder 125 * @throws IOException if there is a problem reading the file 126 */ 127 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 128 this.no = no; 129 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 130 this.data = new byte[dataLength]; 131 final long off = (no - 1) * blockSize; 132 133 // read data 134 if (no > 0 /* file not empty */) { 135 channel.position(off); 136 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 137 if (countRead != length) { 138 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 139 } 140 } 141 // copy left over part into data arr 142 if (leftOverOfLastFilePart != null) { 143 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 144 } 145 this.currentLastBytePos = data.length - 1; 146 this.leftOver = null; 147 } 148 149 /** 150 * Constructs the buffer containing any leftover bytes. 151 */ 152 private void createLeftOver() { 153 final int lineLengthBytes = currentLastBytePos + 1; 154 if (lineLengthBytes > 0) { 155 // create left over for next block 156 leftOver = Arrays.copyOf(data, lineLengthBytes); 157 } else { 158 leftOver = null; 159 } 160 currentLastBytePos = -1; 161 } 162 163 /** 164 * Finds the new-line sequence and return its length. 165 * 166 * @param data buffer to scan 167 * @param i start offset in buffer 168 * @return length of newline sequence or 0 if none found 169 */ 170 private int getNewLineMatchByteCount(final byte[] data, final int i) { 171 for (final byte[] newLineSequence : newLineSequences) { 172 boolean match = true; 173 for (int j = newLineSequence.length - 1; j >= 0; j--) { 174 final int k = i + j - (newLineSequence.length - 1); 175 match &= k >= 0 && data[k] == newLineSequence[j]; 176 } 177 if (match) { 178 return newLineSequence.length; 179 } 180 } 181 return 0; 182 } 183 184 /** 185 * Reads a line. 186 * 187 * @return the line or null 188 */ 189 private String readLine() { //NOPMD Bug in PMD 190 191 String line = null; 192 int newLineMatchByteCount; 193 194 final boolean isLastFilePart = no == 1; 195 196 int i = currentLastBytePos; 197 while (i > -1) { 198 199 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 200 // avoidNewlineSplitBuffer: for all except the last file part we 201 // take a few bytes to the next file part to avoid splitting of newlines 202 createLeftOver(); 203 break; // skip last few bytes and leave it to the next file part 204 } 205 206 // check for newline 207 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 208 final int lineStart = i + 1; 209 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 210 211 if (lineLengthBytes < 0) { 212 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 213 } 214 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); 215 216 line = new String(lineData, charset); 217 218 currentLastBytePos = i - newLineMatchByteCount; 219 break; // found line 220 } 221 222 // move cursor 223 i -= byteDecrement; 224 225 // end of file part handling 226 if (i < 0) { 227 createLeftOver(); 228 break; // end of file part 229 } 230 } 231 232 // last file part handling 233 if (isLastFilePart && leftOver != null) { 234 // there will be no line break anymore, this is the first line of the file 235 line = new String(leftOver, charset); 236 leftOver = null; 237 } 238 239 return line; 240 } 241 242 /** 243 * Handles block rollover 244 * 245 * @return the new FilePart or null 246 * @throws IOException if there was a problem reading the file 247 */ 248 private FilePart rollOver() throws IOException { 249 250 if (currentLastBytePos > -1) { 251 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 252 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 253 } 254 255 if (no > 1) { 256 return new FilePart(no - 1, blockSize, leftOver); 257 } 258 // NO 1 was the last FilePart, we're finished 259 if (leftOver != null) { 260 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 261 + new String(leftOver, charset)); 262 } 263 return null; 264 } 265 } 266 267 private static final String EMPTY_STRING = ""; 268 269 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); 270 271 /** 272 * Constructs a new {@link Builder}. 273 * 274 * @return a new {@link Builder}. 275 * @since 2.12.0 276 */ 277 public static Builder builder() { 278 return new Builder(); 279 } 280 281 private final int blockSize; 282 private final Charset charset; 283 private final SeekableByteChannel channel; 284 private final long totalByteLength; 285 private final long totalBlockCount; 286 private final byte[][] newLineSequences; 287 private final int avoidNewlineSplitBufferSize; 288 private final int byteDecrement; 289 private FilePart currentFilePart; 290 private boolean trailingNewlineOfFileSkipped; 291 292 /** 293 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 294 * platform's default encoding. 295 * 296 * @param file the file to be read 297 * @throws IOException if an I/O error occurs. 298 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 299 */ 300 @Deprecated 301 public ReversedLinesFileReader(final File file) throws IOException { 302 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 303 } 304 305 /** 306 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 307 * specified encoding. 308 * 309 * @param file the file to be read 310 * @param charset the charset to use, null uses the default Charset. 311 * @throws IOException if an I/O error occurs. 312 * @since 2.5 313 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 314 */ 315 @Deprecated 316 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 317 this(file.toPath(), charset); 318 } 319 320 /** 321 * Constructs a ReversedLinesFileReader with the given block size and encoding. 322 * 323 * @param file the file to be read 324 * @param blockSize size of the internal buffer (for ideal performance this 325 * should match with the block size of the underlying file 326 * system). 327 * @param charset the encoding of the file, null uses the default Charset. 328 * @throws IOException if an I/O error occurs. 329 * @since 2.3 330 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 331 */ 332 @Deprecated 333 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 334 this(file.toPath(), blockSize, charset); 335 } 336 337 /** 338 * Constructs a ReversedLinesFileReader with the given block size and encoding. 339 * 340 * @param file the file to be read 341 * @param blockSize size of the internal buffer (for ideal performance this 342 * should match with the block size of the underlying file 343 * system). 344 * @param charsetName the encoding of the file, null uses the default Charset. 345 * @throws IOException if an I/O error occurs 346 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported 347 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 348 */ 349 @Deprecated 350 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 351 this(file.toPath(), blockSize, charsetName); 352 } 353 354 /** 355 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 356 * specified encoding. 357 * 358 * @param file the file to be read 359 * @param charset the charset to use, null uses the default Charset. 360 * @throws IOException if an I/O error occurs. 361 * @since 2.7 362 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 363 */ 364 @Deprecated 365 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 366 this(file, DEFAULT_BLOCK_SIZE, charset); 367 } 368 369 /** 370 * Constructs a ReversedLinesFileReader with the given block size and encoding. 371 * 372 * @param file the file to be read 373 * @param blockSize size of the internal buffer (for ideal performance this 374 * should match with the block size of the underlying file 375 * system). 376 * @param charset the encoding of the file, null uses the default Charset. 377 * @throws IOException if an I/O error occurs. 378 * @since 2.7 379 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 380 */ 381 @Deprecated 382 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 383 this.blockSize = blockSize; 384 this.charset = Charsets.toCharset(charset); 385 386 // --- check & prepare encoding --- 387 final CharsetEncoder charsetEncoder = this.charset.newEncoder(); 388 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 389 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) { 390 // all one byte encodings are no problem 391 byteDecrement = 1; 392 } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 393 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 394 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 395 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 396 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 397 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 398 byteDecrement = 1; 399 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { 400 // UTF-16 new line sequences are not allowed as second tuple of four byte 401 // sequences, 402 // however byte order has to be specified 403 byteDecrement = 2; 404 } else if (this.charset == StandardCharsets.UTF_16) { 405 throw new UnsupportedEncodingException( 406 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); 407 } else { 408 throw new UnsupportedEncodingException( 409 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); 410 } 411 412 // NOTE: The new line sequences are matched in the order given, so it is 413 // important that \r\n is BEFORE \n 414 this.newLineSequences = new byte[][] { 415 StandardLineSeparator.CRLF.getBytes(this.charset), 416 StandardLineSeparator.LF.getBytes(this.charset), 417 StandardLineSeparator.CR.getBytes(this.charset) 418 }; 419 420 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 421 422 // Open file 423 this.channel = Files.newByteChannel(file, StandardOpenOption.READ); 424 this.totalByteLength = channel.size(); 425 int lastBlockLength = (int) (this.totalByteLength % blockSize); 426 if (lastBlockLength > 0) { 427 this.totalBlockCount = this.totalByteLength / blockSize + 1; 428 } else { 429 this.totalBlockCount = this.totalByteLength / blockSize; 430 if (this.totalByteLength > 0) { 431 lastBlockLength = blockSize; 432 } 433 } 434 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 435 436 } 437 438 /** 439 * Constructs a ReversedLinesFileReader with the given block size and encoding. 440 * 441 * @param file the file to be read 442 * @param blockSize size of the internal buffer (for ideal performance this 443 * should match with the block size of the underlying file 444 * system). 445 * @param charsetName the encoding of the file, null uses the default Charset. 446 * @throws IOException if an I/O error occurs 447 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported 448 * @since 2.7 449 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 450 */ 451 @Deprecated 452 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 453 this(file, blockSize, Charsets.toCharset(charsetName)); 454 } 455 456 /** 457 * Closes underlying resources. 458 * 459 * @throws IOException if an I/O error occurs. 460 */ 461 @Override 462 public void close() throws IOException { 463 channel.close(); 464 } 465 466 /** 467 * Returns the lines of the file from bottom to top. 468 * 469 * @return the next line or null if the start of the file is reached 470 * @throws IOException if an I/O error occurs. 471 */ 472 public String readLine() throws IOException { 473 474 String line = currentFilePart.readLine(); 475 while (line == null) { 476 currentFilePart = currentFilePart.rollOver(); 477 if (currentFilePart == null) { 478 // no more FileParts: we're done, leave line set to null 479 break; 480 } 481 line = currentFilePart.readLine(); 482 } 483 484 // aligned behavior with BufferedReader that doesn't return a last, empty line 485 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 486 trailingNewlineOfFileSkipped = true; 487 line = readLine(); 488 } 489 490 return line; 491 } 492 493 /** 494 * Returns {@code lineCount} lines of the file from bottom to top. 495 * <p> 496 * If there are less than {@code lineCount} lines in the file, then that's what 497 * you get. 498 * </p> 499 * <p> 500 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 501 * </p> 502 * 503 * @param lineCount How many lines to read. 504 * @return A new list 505 * @throws IOException if an I/O error occurs. 506 * @since 2.8.0 507 */ 508 public List<String> readLines(final int lineCount) throws IOException { 509 if (lineCount < 0) { 510 throw new IllegalArgumentException("lineCount < 0"); 511 } 512 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 513 for (int i = 0; i < lineCount; i++) { 514 final String line = readLine(); 515 if (line == null) { 516 return arrayList; 517 } 518 arrayList.add(line); 519 } 520 return arrayList; 521 } 522 523 /** 524 * Returns the last {@code lineCount} lines of the file. 525 * <p> 526 * If there are less than {@code lineCount} lines in the file, then that's what 527 * you get. 528 * </p> 529 * 530 * @param lineCount How many lines to read. 531 * @return A String. 532 * @throws IOException if an I/O error occurs. 533 * @since 2.8.0 534 */ 535 public String toString(final int lineCount) throws IOException { 536 final List<String> lines = readLines(lineCount); 537 Collections.reverse(lines); 538 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 539 } 540 541}