001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.StandardOpenOption;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collections;
034import java.util.List;
035
036import org.apache.commons.io.Charsets;
037import org.apache.commons.io.FileSystem;
038import org.apache.commons.io.StandardLineSeparator;
039import org.apache.commons.io.build.AbstractStreamBuilder;
040
041/**
042 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
043 * <p>
044 * To build an instance, use {@link Builder}.
045 * </p>
046 *
047 * @see Builder
048 * @since 2.2
049 */
050public class ReversedLinesFileReader implements Closeable {
051
052    // @formatter:off
053    /**
054     * Builds a new {@link ReversedLinesFileReader}.
055     *
056     * <p>
057     * For example:
058     * </p>
059     * <pre>{@code
060     * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
061     *   .setPath(path)
062     *   .setBufferSize(4096)
063     *   .setCharset(StandardCharsets.UTF_8)
064     *   .get();}
065     * </pre>
066     *
067     * @see #get()
068     * @since 2.12.0
069     */
070    // @formatter:on
071    public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
072
073        /**
074         * Constructs a new {@link Builder}.
075         */
076        public Builder() {
077            setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
078            setBufferSize(DEFAULT_BLOCK_SIZE);
079        }
080
081        /**
082         * Builds a new {@link ReversedLinesFileReader}.
083         * <p>
084         * You must set input that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
085         * </p>
086         * <p>
087         * This builder use the following aspects:
088         * </p>
089         * <ul>
090         * <li>{@link #getInputStream()}</li>
091         * <li>{@link #getBufferSize()}</li>
092         * <li>{@link #getCharset()}</li>
093         * </ul>
094         *
095         * @return a new instance.
096         * @throws IllegalStateException         if the {@code origin} is {@code null}.
097         * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
098         * @throws IOException                   if an I/O error occurs.
099         * @see #getPath()
100         * @see #getBufferSize()
101         * @see #getCharset()
102         */
103        @Override
104        public ReversedLinesFileReader get() throws IOException {
105            return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
106        }
107
108    }
109
110    private final class FilePart {
111        private final long no;
112
113        private final byte[] data;
114
115        private byte[] leftOver;
116
117        private int currentLastBytePos;
118
119        /**
120         * Constructs a new instance.
121         *
122         * @param no                     the part number
123         * @param length                 its length
124         * @param leftOverOfLastFilePart remainder
125         * @throws IOException if there is a problem reading the file
126         */
127        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
128            this.no = no;
129            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
130            this.data = new byte[dataLength];
131            final long off = (no - 1) * blockSize;
132
133            // read data
134            if (no > 0 /* file not empty */) {
135                channel.position(off);
136                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
137                if (countRead != length) {
138                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
139                }
140            }
141            // copy left over part into data arr
142            if (leftOverOfLastFilePart != null) {
143                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
144            }
145            this.currentLastBytePos = data.length - 1;
146            this.leftOver = null;
147        }
148
149        /**
150         * Constructs the buffer containing any leftover bytes.
151         */
152        private void createLeftOver() {
153            final int lineLengthBytes = currentLastBytePos + 1;
154            if (lineLengthBytes > 0) {
155                // create left over for next block
156                leftOver = Arrays.copyOf(data, lineLengthBytes);
157            } else {
158                leftOver = null;
159            }
160            currentLastBytePos = -1;
161        }
162
163        /**
164         * Finds the new-line sequence and return its length.
165         *
166         * @param data buffer to scan
167         * @param i    start offset in buffer
168         * @return length of newline sequence or 0 if none found
169         */
170        private int getNewLineMatchByteCount(final byte[] data, final int i) {
171            for (final byte[] newLineSequence : newLineSequences) {
172                boolean match = true;
173                for (int j = newLineSequence.length - 1; j >= 0; j--) {
174                    final int k = i + j - (newLineSequence.length - 1);
175                    match &= k >= 0 && data[k] == newLineSequence[j];
176                }
177                if (match) {
178                    return newLineSequence.length;
179                }
180            }
181            return 0;
182        }
183
184        /**
185         * Reads a line.
186         *
187         * @return the line or null
188         */
189        private String readLine() { //NOPMD Bug in PMD
190
191            String line = null;
192            int newLineMatchByteCount;
193
194            final boolean isLastFilePart = no == 1;
195
196            int i = currentLastBytePos;
197            while (i > -1) {
198
199                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
200                    // avoidNewlineSplitBuffer: for all except the last file part we
201                    // take a few bytes to the next file part to avoid splitting of newlines
202                    createLeftOver();
203                    break; // skip last few bytes and leave it to the next file part
204                }
205
206                // check for newline
207                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
208                    final int lineStart = i + 1;
209                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
210
211                    if (lineLengthBytes < 0) {
212                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
213                    }
214                    final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
215
216                    line = new String(lineData, charset);
217
218                    currentLastBytePos = i - newLineMatchByteCount;
219                    break; // found line
220                }
221
222                // move cursor
223                i -= byteDecrement;
224
225                // end of file part handling
226                if (i < 0) {
227                    createLeftOver();
228                    break; // end of file part
229                }
230            }
231
232            // last file part handling
233            if (isLastFilePart && leftOver != null) {
234                // there will be no line break anymore, this is the first line of the file
235                line = new String(leftOver, charset);
236                leftOver = null;
237            }
238
239            return line;
240        }
241
242        /**
243         * Handles block rollover
244         *
245         * @return the new FilePart or null
246         * @throws IOException if there was a problem reading the file
247         */
248        private FilePart rollOver() throws IOException {
249
250            if (currentLastBytePos > -1) {
251                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
252                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
253            }
254
255            if (no > 1) {
256                return new FilePart(no - 1, blockSize, leftOver);
257            }
258            // NO 1 was the last FilePart, we're finished
259            if (leftOver != null) {
260                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
261                        + new String(leftOver, charset));
262            }
263            return null;
264        }
265    }
266
267    private static final String EMPTY_STRING = "";
268
269    private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
270
271    /**
272     * Constructs a new {@link Builder}.
273     *
274     * @return a new {@link Builder}.
275     * @since 2.12.0
276     */
277    public static Builder builder() {
278        return new Builder();
279    }
280
281    private final int blockSize;
282    private final Charset charset;
283    private final SeekableByteChannel channel;
284    private final long totalByteLength;
285    private final long totalBlockCount;
286    private final byte[][] newLineSequences;
287    private final int avoidNewlineSplitBufferSize;
288    private final int byteDecrement;
289    private FilePart currentFilePart;
290    private boolean trailingNewlineOfFileSkipped;
291
292    /**
293     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
294     * platform's default encoding.
295     *
296     * @param file the file to be read
297     * @throws IOException if an I/O error occurs.
298     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
299     */
300    @Deprecated
301    public ReversedLinesFileReader(final File file) throws IOException {
302        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
303    }
304
305    /**
306     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
307     * specified encoding.
308     *
309     * @param file    the file to be read
310     * @param charset the charset to use, null uses the default Charset.
311     * @throws IOException if an I/O error occurs.
312     * @since 2.5
313     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
314     */
315    @Deprecated
316    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
317        this(file.toPath(), charset);
318    }
319
320    /**
321     * Constructs a ReversedLinesFileReader with the given block size and encoding.
322     *
323     * @param file      the file to be read
324     * @param blockSize size of the internal buffer (for ideal performance this
325     *                  should match with the block size of the underlying file
326     *                  system).
327     * @param charset  the encoding of the file, null uses the default Charset.
328     * @throws IOException if an I/O error occurs.
329     * @since 2.3
330     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
331     */
332    @Deprecated
333    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
334        this(file.toPath(), blockSize, charset);
335    }
336
337    /**
338     * Constructs a ReversedLinesFileReader with the given block size and encoding.
339     *
340     * @param file      the file to be read
341     * @param blockSize size of the internal buffer (for ideal performance this
342     *                  should match with the block size of the underlying file
343     *                  system).
344     * @param charsetName  the encoding of the file, null uses the default Charset.
345     * @throws IOException                                  if an I/O error occurs
346     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
347     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
348     */
349    @Deprecated
350    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
351        this(file.toPath(), blockSize, charsetName);
352    }
353
354    /**
355     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
356     * specified encoding.
357     *
358     * @param file    the file to be read
359     * @param charset the charset to use, null uses the default Charset.
360     * @throws IOException if an I/O error occurs.
361     * @since 2.7
362     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
363     */
364    @Deprecated
365    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
366        this(file, DEFAULT_BLOCK_SIZE, charset);
367    }
368
369    /**
370     * Constructs a ReversedLinesFileReader with the given block size and encoding.
371     *
372     * @param file      the file to be read
373     * @param blockSize size of the internal buffer (for ideal performance this
374     *                  should match with the block size of the underlying file
375     *                  system).
376     * @param charset  the encoding of the file, null uses the default Charset.
377     * @throws IOException if an I/O error occurs.
378     * @since 2.7
379     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
380     */
381    @Deprecated
382    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
383        this.blockSize = blockSize;
384        this.charset = Charsets.toCharset(charset);
385
386        // --- check & prepare encoding ---
387        final CharsetEncoder charsetEncoder = this.charset.newEncoder();
388        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
389        if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
390            // all one byte encodings are no problem
391            byteDecrement = 1;
392        } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
393        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
394                this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
395                this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
396                this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
397                this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
398            byteDecrement = 1;
399        } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
400            // UTF-16 new line sequences are not allowed as second tuple of four byte
401            // sequences,
402            // however byte order has to be specified
403            byteDecrement = 2;
404        } else if (this.charset == StandardCharsets.UTF_16) {
405            throw new UnsupportedEncodingException(
406                    "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
407        } else {
408            throw new UnsupportedEncodingException(
409                    "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
410        }
411
412        // NOTE: The new line sequences are matched in the order given, so it is
413        // important that \r\n is BEFORE \n
414        this.newLineSequences = new byte[][] {
415            StandardLineSeparator.CRLF.getBytes(this.charset),
416            StandardLineSeparator.LF.getBytes(this.charset),
417            StandardLineSeparator.CR.getBytes(this.charset)
418        };
419
420        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
421
422        // Open file
423        this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
424        this.totalByteLength = channel.size();
425        int lastBlockLength = (int) (this.totalByteLength % blockSize);
426        if (lastBlockLength > 0) {
427            this.totalBlockCount = this.totalByteLength / blockSize + 1;
428        } else {
429            this.totalBlockCount = this.totalByteLength / blockSize;
430            if (this.totalByteLength > 0) {
431                lastBlockLength = blockSize;
432            }
433        }
434        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
435
436    }
437
438    /**
439     * Constructs a ReversedLinesFileReader with the given block size and encoding.
440     *
441     * @param file        the file to be read
442     * @param blockSize   size of the internal buffer (for ideal performance this
443     *                    should match with the block size of the underlying file
444     *                    system).
445     * @param charsetName the encoding of the file, null uses the default Charset.
446     * @throws IOException                                  if an I/O error occurs
447     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
448     * @since 2.7
449     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
450     */
451    @Deprecated
452    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
453        this(file, blockSize, Charsets.toCharset(charsetName));
454    }
455
456    /**
457     * Closes underlying resources.
458     *
459     * @throws IOException if an I/O error occurs.
460     */
461    @Override
462    public void close() throws IOException {
463        channel.close();
464    }
465
466    /**
467     * Returns the lines of the file from bottom to top.
468     *
469     * @return the next line or null if the start of the file is reached
470     * @throws IOException if an I/O error occurs.
471     */
472    public String readLine() throws IOException {
473
474        String line = currentFilePart.readLine();
475        while (line == null) {
476            currentFilePart = currentFilePart.rollOver();
477            if (currentFilePart == null) {
478                // no more FileParts: we're done, leave line set to null
479                break;
480            }
481            line = currentFilePart.readLine();
482        }
483
484        // aligned behavior with BufferedReader that doesn't return a last, empty line
485        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
486            trailingNewlineOfFileSkipped = true;
487            line = readLine();
488        }
489
490        return line;
491    }
492
493    /**
494     * Returns {@code lineCount} lines of the file from bottom to top.
495     * <p>
496     * If there are less than {@code lineCount} lines in the file, then that's what
497     * you get.
498     * </p>
499     * <p>
500     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
501     * </p>
502     *
503     * @param lineCount How many lines to read.
504     * @return A new list
505     * @throws IOException if an I/O error occurs.
506     * @since 2.8.0
507     */
508    public List<String> readLines(final int lineCount) throws IOException {
509        if (lineCount < 0) {
510            throw new IllegalArgumentException("lineCount < 0");
511        }
512        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
513        for (int i = 0; i < lineCount; i++) {
514            final String line = readLine();
515            if (line == null) {
516                return arrayList;
517            }
518            arrayList.add(line);
519        }
520        return arrayList;
521    }
522
523    /**
524     * Returns the last {@code lineCount} lines of the file.
525     * <p>
526     * If there are less than {@code lineCount} lines in the file, then that's what
527     * you get.
528     * </p>
529     *
530     * @param lineCount How many lines to read.
531     * @return A String.
532     * @throws IOException if an I/O error occurs.
533     * @since 2.8.0
534     */
535    public String toString(final int lineCount) throws IOException {
536        final List<String> lines = readLines(lineCount);
537        Collections.reverse(lines);
538        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
539    }
540
541}