ReversedLinesFileReader.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.io.input;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.apache.commons.io.Charsets;
import org.apache.commons.io.FileSystem;
import org.apache.commons.io.StandardLineSeparator;
import org.apache.commons.io.build.AbstractStreamBuilder;

/**
 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
 * <p>
 * To build an instance, use {@link Builder}.
 * </p>
 *
 * @see Builder
 * @since 2.2
 */
public class ReversedLinesFileReader implements Closeable {

    // @formatter:off
    /**
     * Builds a new {@link ReversedLinesFileReader}.
     *
     * <p>
     * For example:
     * </p>
     * <pre>{@code
     * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
     *   .setPath(path)
     *   .setBufferSize(4096)
     *   .setCharset(StandardCharsets.UTF_8)
     *   .get();}
     * </pre>
     *
     * @see #get()
     * @since 2.12.0
     */
    // @formatter:on
    public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {

        /**
         * Constructs a new {@link Builder}.
         */
        public Builder() {
            setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
            setBufferSize(DEFAULT_BLOCK_SIZE);
        }

        /**
         * Builds a new {@link ReversedLinesFileReader}.
         * <p>
         * You must set input that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
         * </p>
         * <p>
         * This builder use the following aspects:
         * </p>
         * <ul>
         * <li>{@link #getInputStream()}</li>
         * <li>{@link #getBufferSize()}</li>
         * <li>{@link #getCharset()}</li>
         * </ul>
         *
         * @return a new instance.
         * @throws IllegalStateException         if the {@code origin} is {@code null}.
         * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
         * @throws IOException                   if an I/O error occurs.
         * @see #getPath()
         * @see #getBufferSize()
         * @see #getCharset()
         */
        @Override
        public ReversedLinesFileReader get() throws IOException {
            return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
        }

    }

    private final class FilePart {
        private final long no;

        private final byte[] data;

        private byte[] leftOver;

        private int currentLastBytePos;

        /**
         * Constructs a new instance.
         *
         * @param no                     the part number
         * @param length                 its length
         * @param leftOverOfLastFilePart remainder
         * @throws IOException if there is a problem reading the file
         */
        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
            this.no = no;
            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
            this.data = new byte[dataLength];
            final long off = (no - 1) * blockSize;

            // read data
            if (no > 0 /* file not empty */) {
                channel.position(off);
                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
                if (countRead != length) {
                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
                }
            }
            // copy left over part into data arr
            if (leftOverOfLastFilePart != null) {
                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
            }
            this.currentLastBytePos = data.length - 1;
            this.leftOver = null;
        }

        /**
         * Constructs the buffer containing any leftover bytes.
         */
        private void createLeftOver() {
            final int lineLengthBytes = currentLastBytePos + 1;
            if (lineLengthBytes > 0) {
                // create left over for next block
                leftOver = Arrays.copyOf(data, lineLengthBytes);
            } else {
                leftOver = null;
            }
            currentLastBytePos = -1;
        }

        /**
         * Finds the new-line sequence and return its length.
         *
         * @param data buffer to scan
         * @param i    start offset in buffer
         * @return length of newline sequence or 0 if none found
         */
        private int getNewLineMatchByteCount(final byte[] data, final int i) {
            for (final byte[] newLineSequence : newLineSequences) {
                boolean match = true;
                for (int j = newLineSequence.length - 1; j >= 0; j--) {
                    final int k = i + j - (newLineSequence.length - 1);
                    match &= k >= 0 && data[k] == newLineSequence[j];
                }
                if (match) {
                    return newLineSequence.length;
                }
            }
            return 0;
        }

        /**
         * Reads a line.
         *
         * @return the line or null
         */
        private String readLine() { //NOPMD Bug in PMD

            String line = null;
            int newLineMatchByteCount;

            final boolean isLastFilePart = no == 1;

            int i = currentLastBytePos;
            while (i > -1) {

                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
                    // avoidNewlineSplitBuffer: for all except the last file part we
                    // take a few bytes to the next file part to avoid splitting of newlines
                    createLeftOver();
                    break; // skip last few bytes and leave it to the next file part
                }

                // check for newline
                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
                    final int lineStart = i + 1;
                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;

                    if (lineLengthBytes < 0) {
                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
                    }
                    final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);

                    line = new String(lineData, charset);

                    currentLastBytePos = i - newLineMatchByteCount;
                    break; // found line
                }

                // move cursor
                i -= byteDecrement;

                // end of file part handling
                if (i < 0) {
                    createLeftOver();
                    break; // end of file part
                }
            }

            // last file part handling
            if (isLastFilePart && leftOver != null) {
                // there will be no line break anymore, this is the first line of the file
                line = new String(leftOver, charset);
                leftOver = null;
            }

            return line;
        }

        /**
         * Handles block rollover
         *
         * @return the new FilePart or null
         * @throws IOException if there was a problem reading the file
         */
        private FilePart rollOver() throws IOException {

            if (currentLastBytePos > -1) {
                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
            }

            if (no > 1) {
                return new FilePart(no - 1, blockSize, leftOver);
            }
            // NO 1 was the last FilePart, we're finished
            if (leftOver != null) {
                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
                        + new String(leftOver, charset));
            }
            return null;
        }
    }

    private static final String EMPTY_STRING = "";

    private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();

    /**
     * Constructs a new {@link Builder}.
     *
     * @return a new {@link Builder}.
     * @since 2.12.0
     */
    public static Builder builder() {
        return new Builder();
    }

    private final int blockSize;
    private final Charset charset;
    private final SeekableByteChannel channel;
    private final long totalByteLength;
    private final long totalBlockCount;
    private final byte[][] newLineSequences;
    private final int avoidNewlineSplitBufferSize;
    private final int byteDecrement;
    private FilePart currentFilePart;
    private boolean trailingNewlineOfFileSkipped;

    /**
     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
     * platform's default encoding.
     *
     * @param file the file to be read
     * @throws IOException if an I/O error occurs.
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public ReversedLinesFileReader(final File file) throws IOException {
        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
    }

    /**
     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
     * specified encoding.
     *
     * @param file    the file to be read
     * @param charset the charset to use, null uses the default Charset.
     * @throws IOException if an I/O error occurs.
     * @since 2.5
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
        this(file.toPath(), charset);
    }

    /**
     * Constructs a ReversedLinesFileReader with the given block size and encoding.
     *
     * @param file      the file to be read
     * @param blockSize size of the internal buffer (for ideal performance this
     *                  should match with the block size of the underlying file
     *                  system).
     * @param charset  the encoding of the file, null uses the default Charset.
     * @throws IOException if an I/O error occurs.
     * @since 2.3
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
        this(file.toPath(), blockSize, charset);
    }

    /**
     * Constructs a ReversedLinesFileReader with the given block size and encoding.
     *
     * @param file      the file to be read
     * @param blockSize size of the internal buffer (for ideal performance this
     *                  should match with the block size of the underlying file
     *                  system).
     * @param charsetName  the encoding of the file, null uses the default Charset.
     * @throws IOException                                  if an I/O error occurs
     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
        this(file.toPath(), blockSize, charsetName);
    }

    /**
     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
     * specified encoding.
     *
     * @param file    the file to be read
     * @param charset the charset to use, null uses the default Charset.
     * @throws IOException if an I/O error occurs.
     * @since 2.7
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
        this(file, DEFAULT_BLOCK_SIZE, charset);
    }

    /**
     * Constructs a ReversedLinesFileReader with the given block size and encoding.
     *
     * @param file      the file to be read
     * @param blockSize size of the internal buffer (for ideal performance this
     *                  should match with the block size of the underlying file
     *                  system).
     * @param charset  the encoding of the file, null uses the default Charset.
     * @throws IOException if an I/O error occurs.
     * @since 2.7
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
        this.blockSize = blockSize;
        this.charset = Charsets.toCharset(charset);

        // --- check & prepare encoding ---
        final CharsetEncoder charsetEncoder = this.charset.newEncoder();
        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
        if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
            // all one byte encodings are no problem
            byteDecrement = 1;
        } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
                this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
                this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
                this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
                this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
            byteDecrement = 1;
        } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
            // UTF-16 new line sequences are not allowed as second tuple of four byte
            // sequences,
            // however byte order has to be specified
            byteDecrement = 2;
        } else if (this.charset == StandardCharsets.UTF_16) {
            throw new UnsupportedEncodingException(
                    "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
        } else {
            throw new UnsupportedEncodingException(
                    "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
        }

        // NOTE: The new line sequences are matched in the order given, so it is
        // important that \r\n is BEFORE \n
        this.newLineSequences = new byte[][] {
            StandardLineSeparator.CRLF.getBytes(this.charset),
            StandardLineSeparator.LF.getBytes(this.charset),
            StandardLineSeparator.CR.getBytes(this.charset)
        };

        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;

        // Open file
        this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
        this.totalByteLength = channel.size();
        int lastBlockLength = (int) (this.totalByteLength % blockSize);
        if (lastBlockLength > 0) {
            this.totalBlockCount = this.totalByteLength / blockSize + 1;
        } else {
            this.totalBlockCount = this.totalByteLength / blockSize;
            if (this.totalByteLength > 0) {
                lastBlockLength = blockSize;
            }
        }
        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

    }

    /**
     * Constructs a ReversedLinesFileReader with the given block size and encoding.
     *
     * @param file        the file to be read
     * @param blockSize   size of the internal buffer (for ideal performance this
     *                    should match with the block size of the underlying file
     *                    system).
     * @param charsetName the encoding of the file, null uses the default Charset.
     * @throws IOException                                  if an I/O error occurs
     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
     * @since 2.7
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
        this(file, blockSize, Charsets.toCharset(charsetName));
    }

    /**
     * Closes underlying resources.
     *
     * @throws IOException if an I/O error occurs.
     */
    @Override
    public void close() throws IOException {
        channel.close();
    }

    /**
     * Returns the lines of the file from bottom to top.
     *
     * @return the next line or null if the start of the file is reached
     * @throws IOException if an I/O error occurs.
     */
    public String readLine() throws IOException {

        String line = currentFilePart.readLine();
        while (line == null) {
            currentFilePart = currentFilePart.rollOver();
            if (currentFilePart == null) {
                // no more FileParts: we're done, leave line set to null
                break;
            }
            line = currentFilePart.readLine();
        }

        // aligned behavior with BufferedReader that doesn't return a last, empty line
        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
            trailingNewlineOfFileSkipped = true;
            line = readLine();
        }

        return line;
    }

    /**
     * Returns {@code lineCount} lines of the file from bottom to top.
     * <p>
     * If there are less than {@code lineCount} lines in the file, then that's what
     * you get.
     * </p>
     * <p>
     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
     * </p>
     *
     * @param lineCount How many lines to read.
     * @return A new list
     * @throws IOException if an I/O error occurs.
     * @since 2.8.0
     */
    public List<String> readLines(final int lineCount) throws IOException {
        if (lineCount < 0) {
            throw new IllegalArgumentException("lineCount < 0");
        }
        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
        for (int i = 0; i < lineCount; i++) {
            final String line = readLine();
            if (line == null) {
                return arrayList;
            }
            arrayList.add(line);
        }
        return arrayList;
    }

    /**
     * Returns the last {@code lineCount} lines of the file.
     * <p>
     * If there are less than {@code lineCount} lines in the file, then that's what
     * you get.
     * </p>
     *
     * @param lineCount How many lines to read.
     * @return A String.
     * @throws IOException if an I/O error occurs.
     * @since 2.8.0
     */
    public String toString(final int lineCount) throws IOException {
        final List<String> lines = readLines(lineCount);
        Collections.reverse(lines);
        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
    }

}