CSVParser.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.commons.csv;

import static org.apache.commons.csv.Token.Type.TOKEN;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import org.apache.commons.io.Charsets;
import org.apache.commons.io.build.AbstractStreamBuilder;
import org.apache.commons.io.function.Uncheck;

/**
 * Parses CSV files according to the specified format.
 *
 * Because CSV appears in many different dialects, the parser supports many formats by allowing the
 * specification of a {@link CSVFormat}.
 *
 * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
 *
 * <h2>Creating instances</h2>
 * <p>
 * There are several static factory methods that can be used to create instances for various types of resources:
 * </p>
 * <ul>
 *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
 *     <li>{@link #parse(String, CSVFormat)}</li>
 *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
 * </ul>
 * <p>
 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
 *
 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
 * </p>
 * <pre>
 * for (CSVRecord record : CSVFormat.EXCEL.parse(in)) {
 *     ...
 * }
 * </pre>
 *
 * <h2>Parsing record wise</h2>
 * <p>
 * To parse a CSV input from a file, you write:
 * </p>
 *
 * <pre>{@code
 * File csvData = new File("/path/to/csv");
 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
 * for (CSVRecord csvRecord : parser) {
 *     ...
 * }}
 * </pre>
 *
 * <p>
 * This will read the parse the contents of the file using the
 * <a href="https://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
 * </p>
 *
 * <p>
 * To parse CSV input in a format like Excel, you write:
 * </p>
 *
 * <pre>
 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
 * for (CSVRecord csvRecord : parser) {
 *     ...
 * }
 * </pre>
 *
 * <p>
 * If the predefined formats don't match the format at hand, custom formats can be defined. More information about
 * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
 * </p>
 *
 * <h2>Parsing into memory</h2>
 * <p>
 * If parsing record-wise is not desired, the contents of the input can be read completely into memory.
 * </p>
 *
 * <pre>{@code
 * Reader in = new StringReader("a;b\nc;d");
 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
 * List<CSVRecord> list = parser.getRecords();
 * }</pre>
 *
 * <p>
 * There are two constraints that have to be kept in mind:
 * </p>
 *
 * <ol>
 *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
 *     the input, those records will not end up in the in-memory representation of your CSV data.</li>
 *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
 *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
 * </ol>
 *
 * <h2>Notes</h2>
 * <p>
 * The internal parser state is completely covered by the format and the reader state.
 * </p>
 *
 * @see <a href="package-summary.html">package documentation for more details</a>
 */
public final class CSVParser implements Iterable<CSVRecord>, Closeable {

    /**
     * Builds a new {@link CSVParser}.
     *
     * @since 1.13.0
     */
    public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {

        private CSVFormat format;
        private long characterOffset;
        private long recordNumber = 1;
        private boolean trackBytes;

        /**
         * Constructs a new instance.
         */
        protected Builder() {
            // empty
        }

        @SuppressWarnings("resource")
        @Override
        public CSVParser get() throws IOException {
            return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset(), trackBytes);
        }

        /**
         * Sets the lexer offset when the parser does not start parsing at the beginning of the source.
         *
         * @param characterOffset the lexer offset.
         * @return this instance.
         */
        public Builder setCharacterOffset(final long characterOffset) {
            this.characterOffset = characterOffset;
            return asThis();
        }

        /**
         * Sets the CSV format. A copy of the given format is kept.
         *
         * @param format the CSV format, {@code null} resets to {@link CSVFormat#DEFAULT}.
         * @return this instance.
         */
        public Builder setFormat(final CSVFormat format) {
            this.format = CSVFormat.copy(format);
            return asThis();
        }

        /**
         * Sets the next record number to assign, defaults to {@code 1}.
         *
         * @param recordNumber the next record number to assign.
         * @return this instance.
         */
        public Builder setRecordNumber(final long recordNumber) {
            this.recordNumber = recordNumber;
            return asThis();
        }

        /**
         * Sets whether to enable byte tracking for the parser.
         *
         * @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it.
         * @return this instance.
         * @since 1.13.0
         */
        public Builder setTrackBytes(final boolean trackBytes) {
            this.trackBytes = trackBytes;
            return asThis();
        }

    }

    final class CSVRecordIterator implements Iterator<CSVRecord> {
        private CSVRecord current;

        /**
         * Gets the next record or null at the end of stream or max rows read.
         *
         * @throws IOException  on parse error or input read-failure
         * @throws CSVException on invalid input.
         * @return the next record, or {@code null} if the end of the stream has been reached.
         */
        private CSVRecord getNextRecord() {
            CSVRecord record = null;
            if (format.useRow(recordNumber + 1)) {
                record = Uncheck.get(CSVParser.this::nextRecord);
            }
            return record;
        }

        @Override
        public boolean hasNext() {
            if (isClosed()) {
                return false;
            }
            if (current == null) {
                current = getNextRecord();
            }
            return current != null;
        }

        @Override
        public CSVRecord next() {
            if (isClosed()) {
                throw new NoSuchElementException("CSVParser has been closed");
            }
            CSVRecord next = current;
            current = null;
            if (next == null) {
                // hasNext() wasn't called before
                next = getNextRecord();
                if (next == null) {
                    throw new NoSuchElementException("No more CSV records available");
                }
            }
            return next;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
    /**
     * Header information based on name and position.
     */
    private static final class Headers {

        /**
         * Header column positions (0-based)
         */
        final Map<String, Integer> headerMap;

        /**
         * Header names in column order
         */
        final List<String> headerNames;

        Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
            this.headerMap = headerMap;
            this.headerNames = headerNames;
        }
    }

    /**
     * Creates a new builder.
     *
     * @return a new builder.
     * @since 1.13.0
     */
    public static Builder builder() {
        return new Builder();
    }

    /**
     * Creates a parser for the given {@link File}.
     *
     * @param file
     *            a CSV file. Must not be null.
     * @param charset
     *            The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
     * @param format
     *            the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
     * @return a new parser
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent.
     * @throws IOException
     *             If an I/O error occurs
     * @throws CSVException Thrown on invalid CSV input data.
     * @throws NullPointerException if {@code file} is {@code null}.
     */
    public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
        Objects.requireNonNull(file, "file");
        return parse(file.toPath(), charset, format);
    }

    /**
     * Creates a CSV parser using the given {@link CSVFormat}.
     *
     * <p>
     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
     * unless you close the {@code reader}.
     * </p>
     *
     * @param inputStream
     *            an InputStream containing CSV-formatted input, {@code null} maps to {@link CSVFormat#DEFAULT}.
     * @param charset
     *            The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
     * @param format
     *            the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
     * @return a new CSVParser configured with the given reader and format.
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent or if either reader or format are null.
     * @throws IOException
     *             If there is a problem reading the header or skipping the first record
     * @throws CSVException Thrown on invalid CSV input data.
     * @since 1.5
     */
    public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
            throws IOException {
        return parse(new InputStreamReader(inputStream, Charsets.toCharset(charset)), format);
    }

    /**
     * Creates and returns a parser for the given {@link Path}, which the caller MUST close.
     *
     * @param path
     *            a CSV file. Must not be null.
     * @param charset
     *            The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
     * @param format
     *            the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
     * @return a new parser
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent.
     * @throws IOException
     *             If an I/O error occurs
     * @throws CSVException Thrown on invalid CSV input data.
     * @throws NullPointerException if {@code path} is {@code null}.
     * @since 1.5
     */
    @SuppressWarnings("resource")
    public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
        Objects.requireNonNull(path, "path");
        return parse(Files.newInputStream(path), charset, format);
    }

    /**
     * Creates a CSV parser using the given {@link CSVFormat}
     *
     * <p>
     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
     * unless you close the {@code reader}.
     * </p>
     *
     * @param reader
     *            a Reader containing CSV-formatted input. Must not be null.
     * @param format
     *            the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
     * @return a new CSVParser configured with the given reader and format.
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent or if either reader or format are null.
     * @throws IOException
     *             If there is a problem reading the header or skipping the first record
     * @throws CSVException Thrown on invalid CSV input data.
     * @since 1.5
     */
    public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
        return builder().setReader(reader).setFormat(format).get();
    }

    /**
     * Creates a parser for the given {@link String}.
     *
     * @param string
     *            a CSV string. Must not be null.
     * @param format
     *            the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
     * @return a new parser
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent.
     * @throws IOException
     *             If an I/O error occurs
     * @throws CSVException Thrown on invalid CSV input data.
     * @throws NullPointerException if {@code string} is {@code null}.
     */
    public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
        Objects.requireNonNull(string, "string");
        return parse(new StringReader(string), format);
    }

    /**
     * Creates and returns a parser for the given URL, which the caller MUST close.
     *
     * <p>
     * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
     * you close the {@code url}.
     * </p>
     *
     * @param url
     *            a URL. Must not be null.
     * @param charset
     *            the charset for the resource, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
     * @param format
     *            the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
     * @return a new parser
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent.
     * @throws IOException
     *             If an I/O error occurs
     * @throws CSVException Thrown on invalid CSV input data.
     * @throws NullPointerException if {@code url} is {@code null}.
     */
    @SuppressWarnings("resource")
    public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
        Objects.requireNonNull(url, "url");
        return parse(url.openStream(), charset, format);
    }

    private String headerComment;

    private String trailerComment;

    private final CSVFormat format;

    private final Headers headers;

    private final Lexer lexer;

    private final CSVRecordIterator csvRecordIterator;

    /** A record buffer for getRecord(). Grows as necessary and is reused. */
    private final List<String> recordList = new ArrayList<>();

    /**
     * The next record number to assign.
     */
    private long recordNumber;

    /**
     * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
     * with {@link #recordNumber}.
     */
    private final long characterOffset;

    private final Token reusableToken = new Token();

    /**
     * Constructs a new instance using the given {@link CSVFormat}.
     *
     * <p>
     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
     * unless you close the {@code reader}.
     * </p>
     *
     * @param reader
     *            a Reader containing CSV-formatted input. Must not be null.
     * @param format
     *            the CSVFormat used for CSV parsing. Must not be null.
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent or if either reader or format are null.
     * @throws IOException
     *             If there is a problem reading the header or skipping the first record
     * @throws CSVException Thrown on invalid CSV input data.
     * @deprecated Will be removed in the next major version, use {@link Builder#get()}.
     */
    @Deprecated
    public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
        this(reader, format, 0, 1);
    }

    /**
     * Constructs a new instance using the given {@link CSVFormat}.
     *
     * <p>
     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
     * unless you close the {@code reader}.
     * </p>
     *
     * @param reader
     *            a Reader containing CSV-formatted input. Must not be null.
     * @param format
     *            the CSVFormat used for CSV parsing. Must not be null.
     * @param characterOffset
     *            Lexer offset when the parser does not start parsing at the beginning of the source.
     * @param recordNumber
     *            The next record number to assign.
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent or if either the reader or format is null.
     * @throws IOException
     *             if there is a problem reading the header or skipping the first record
     * @throws CSVException on invalid input.
     * @since 1.1
     * @deprecated Will be removed in the next major version, use {@link Builder#get()}.
     */
    @Deprecated
    public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) throws IOException {
        this(reader, format, characterOffset, recordNumber, null, false);
    }

    /**
     * Constructs a new instance using the given {@link CSVFormat}.
     *
     * <p>
     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
     * unless you close the {@code reader}.
     * </p>
     *
     * @param reader
     *            a Reader containing CSV-formatted input. Must not be null.
     * @param format
     *            the CSVFormat used for CSV parsing. Must not be null.
     * @param characterOffset
     *            Lexer offset when the parser does not start parsing at the beginning of the source.
     * @param recordNumber
     *            The next record number to assign.
     * @param charset
     *            The character encoding to be used for the reader when enableByteTracking is true.
     * @param trackBytes
     *           {@code true} to enable byte tracking for the parser; {@code false} to disable it.
     * @throws IllegalArgumentException
     *             If the parameters of the format are inconsistent or if either the reader or format is null.
     * @throws IOException
     *             If there is a problem reading the header or skipping the first record.
     * @throws CSVException Thrown on invalid CSV input data.
     */
    private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
        final Charset charset, final boolean trackBytes)
        throws IOException {
        Objects.requireNonNull(reader, "reader");
        Objects.requireNonNull(format, "format");
        this.format = format.copy();
        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset, trackBytes));
        this.csvRecordIterator = new CSVRecordIterator();
        this.headers = createHeaders();
        this.characterOffset = characterOffset;
        this.recordNumber = recordNumber - 1;
    }

    private void addRecordValue(final boolean lastRecord) {
        final String input = format.trim(reusableToken.content.toString());
        if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) {
            return;
        }
        recordList.add(handleNull(input));
    }

    /**
     * Closes resources.
     *
     * @throws IOException
     *             If an I/O error occurs
     */
    @Override
    public void close() throws IOException {
        lexer.close();
    }

    private Map<String, Integer> createEmptyHeaderMap() {
        return format.getIgnoreHeaderCase() ?
                new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
                new LinkedHashMap<>();
    }

    /**
     * Creates the name to index mapping if the format defines a header.
     *
     * @return null if the format has no header.
     * @throws IOException if there is a problem reading the header or skipping the first record
     * @throws CSVException on invalid input.
     */
    private Headers createHeaders() throws IOException {
        Map<String, Integer> headerMap = null;
        List<String> headerNames = null;
        final String[] formatHeader = format.getHeader();
        if (formatHeader != null) {
            headerMap = createEmptyHeaderMap();
            String[] headerRecord = null;
            if (formatHeader.length == 0) {
                // read the header from the first line of the file
                final CSVRecord nextRecord = nextRecord();
                if (nextRecord != null) {
                    headerRecord = nextRecord.values();
                    headerComment = nextRecord.getComment();
                }
            } else {
                if (format.getSkipHeaderRecord()) {
                    final CSVRecord nextRecord = nextRecord();
                    if (nextRecord != null) {
                        headerComment = nextRecord.getComment();
                    }
                }
                headerRecord = formatHeader;
            }
            // build the name to index mappings
            if (headerRecord != null) {
                // Track an occurrence of a null, empty or blank header.
                boolean observedMissing = false;
                for (int i = 0; i < headerRecord.length; i++) {
                    final String header = headerRecord[i];
                    final boolean blankHeader = CSVFormat.isBlank(header);
                    if (blankHeader && !format.getAllowMissingColumnNames()) {
                        throw new IllegalArgumentException("A header name is missing in " + Arrays.toString(headerRecord));
                    }
                    final boolean containsHeader = blankHeader ? observedMissing : headerMap.containsKey(header);
                    final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode();
                    final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
                    final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
                    if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
                        throw new IllegalArgumentException(String.format(
                                "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
                                header, Arrays.toString(headerRecord)));
                    }
                    observedMissing |= blankHeader;
                    if (header != null) {
                        headerMap.put(header, Integer.valueOf(i)); // Explicit (un)boxing is intentional
                        if (headerNames == null) {
                            headerNames = new ArrayList<>(headerRecord.length);
                        }
                        headerNames.add(header);
                    }
                }
            }
        }
        // Make header names Collection immutable
        return new Headers(headerMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames));
    }

    /**
     * Gets the current line number in the input stream.
     *
     * <p>
     * <strong>Note:</strong> If your CSV input has multi-line values, the returned number does not correspond to
     * the record number.
     * </p>
     *
     * @return current line number.
     */
    public long getCurrentLineNumber() {
        return lexer.getCurrentLineNumber();
    }

    /**
     * Gets the first end-of-line string encountered.
     *
     * @return the first end-of-line string.
     * @since 1.5
     */
    public String getFirstEndOfLine() {
        return lexer.getFirstEol();
    }

    /**
     * Gets the header comment, if any.
     * The header comment appears before the header record.
     *
     * @return the header comment for this stream, or null if no comment is available.
     * @since 1.10.0
     */
    public String getHeaderComment() {
        return headerComment;
    }

    /**
     * Gets a copy of the header map as defined in the CSVFormat's header.
     * <p>
     * The map keys are column names. The map values are 0-based indices.
     * </p>
     * <p>
     * <strong>Note:</strong> The map can only provide a one-to-one mapping when the format did not
     * contain null or duplicate column names.
     * </p>
     *
     * @return a copy of the header map.
     */
    public Map<String, Integer> getHeaderMap() {
        if (headers.headerMap == null) {
            return null;
        }
        final Map<String, Integer> map = createEmptyHeaderMap();
        map.putAll(headers.headerMap);
        return map;
    }

    /**
     * Gets the underlying header map.
     *
     * @return the underlying header map.
     */
    Map<String, Integer> getHeaderMapRaw() {
        return headers.headerMap;
    }

    /**
     * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header.
     * <p>
     * Note: The list provides strings that can be used as keys in the header map.
     * The list will not contain null column names if they were present in the input
     * format.
     * </p>
     *
     * @return read-only list of header names that iterates in column order.
     * @see #getHeaderMap()
     * @since 1.7
     */
    public List<String> getHeaderNames() {
        return Collections.unmodifiableList(headers.headerNames);
    }

    /**
     * Gets the current record number in the input stream.
     *
     * <p>
     * <strong>Note:</strong> If your CSV input has multi-line values, the returned number does not correspond to
     * the line number.
     * </p>
     *
     * @return current record number
     */
    public long getRecordNumber() {
        return recordNumber;
    }

    /**
     * Parses the CSV input according to the given format and returns the content as a list of
     * {@link CSVRecord CSVRecords}.
     *
     * <p>
     * The returned content starts at the current parse-position in the stream.
     * </p>
     * <p>
     * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows this method produces.
     * </p>
     *
     * @return list of {@link CSVRecord CSVRecords}, may be empty
     * @throws UncheckedIOException
     *             on parse error or input read-failure
     */
    public List<CSVRecord> getRecords() {
        return stream().collect(Collectors.toList());
    }

    /**
     * Gets the trailer comment, if any.
     * Trailer comments are located between the last record and EOF
     *
     * @return the trailer comment for this stream, or null if no comment is available.
     * @since 1.10.0
     */
    public String getTrailerComment() {
        return trailerComment;
    }

    /**
     * Handles whether the input is parsed as null
     *
     * @param input
     *           the cell data to further processed
     * @return null if input is parsed as null, or input itself if the input isn't parsed as null
     */
    private String handleNull(final String input) {
        final boolean isQuoted = reusableToken.isQuoted;
        final String nullString = format.getNullString();
        final boolean strictQuoteMode = isStrictQuoteMode();
        if (input.equals(nullString)) {
            // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
            return strictQuoteMode && isQuoted ? input : null;
        }
        // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
        return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
    }

    /**
     * Checks whether there is a header comment.
     * The header comment appears before the header record.
     * Note that if the parser's format has been given an explicit header
     * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload)
     * and the header record is not being skipped
     * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments
     * will be associated with the first record, not the header.
     *
     * @return true if this parser has seen a header comment, false otherwise
     * @since 1.10.0
     */
    public boolean hasHeaderComment() {
        return headerComment != null;
    }

    /**
     * Checks whether there is a trailer comment.
     * Trailer comments are located between the last record and EOF.
     * The trailer comments will only be available after the parser has
     * finished processing this stream.
     *
     * @return true if this parser has seen a trailer comment, false otherwise
     * @since 1.10.0
     */
    public boolean hasTrailerComment() {
        return trailerComment != null;
    }

    /**
     * Tests whether this parser is closed.
     *
     * @return whether this parser is closed.
     */
    public boolean isClosed() {
        return lexer.isClosed();
    }

    /**
     * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
     *
     * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
     *         {@link QuoteMode#NON_NUMERIC}.
     */
    private boolean isStrictQuoteMode() {
        return format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
               format.getQuoteMode() == QuoteMode.NON_NUMERIC;
    }

    /**
     * Returns the record iterator.
     *
     * <p>
     * An {@link IOException} caught during the iteration is re-thrown as an
     * {@link IllegalStateException}.
     * </p>
     * <p>
     * If the parser is closed, the iterator will not yield any more records.
     * A call to {@link Iterator#hasNext()} will return {@code false} and
     * a call to {@link Iterator#next()} will throw a
     * {@link NoSuchElementException}.
     * </p>
     * <p>
     * If it is necessary to construct an iterator which is usable after the
     * parser is closed, one option is to extract all records as a list with
     * {@link #getRecords()}, and return an iterator to that list.
     * </p>
     * <p>
     * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows an Iterator produces.
     * </p>
     */
    @Override
    public Iterator<CSVRecord> iterator() {
        return csvRecordIterator;
    }

    /**
     * Parses the next record from the current point in the stream.
     *
     * @return the record as an array of values, or {@code null} if the end of the stream has been reached.
     * @throws IOException  on parse error or input read-failure.
     * @throws CSVException on invalid CSV input data.
     */
    CSVRecord nextRecord() throws IOException {
        CSVRecord result = null;
        recordList.clear();
        StringBuilder sb = null;
        final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
        final long startBytePosition = lexer.getBytesRead() + this.characterOffset;
        do {
            reusableToken.reset();
            lexer.nextToken(reusableToken);
            switch (reusableToken.type) {
            case TOKEN:
                addRecordValue(false);
                break;
            case EORECORD:
                addRecordValue(true);
                break;
            case EOF:
                if (reusableToken.isReady) {
                    addRecordValue(true);
                } else if (sb != null) {
                    trailerComment = sb.toString();
                }
                break;
            case INVALID:
                throw new CSVException("(line %,d) invalid parse sequence", getCurrentLineNumber());
            case COMMENT: // Ignored currently
                if (sb == null) { // first comment for this record
                    sb = new StringBuilder();
                } else {
                    sb.append(Constants.LF);
                }
                sb.append(reusableToken.content);
                reusableToken.type = TOKEN; // Read another token
                break;
            default:
                throw new CSVException("Unexpected Token type: %s", reusableToken.type);
            }
        } while (reusableToken.type == TOKEN);

        if (!recordList.isEmpty()) {
            recordNumber++;
            final String comment = Objects.toString(sb, null);
            result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
                recordNumber, startCharPosition, startBytePosition);
        }
        return result;
    }

    /**
     * Returns a sequential {@code Stream} with this collection as its source.
     * <p>
     * If the parser is closed, the stream will not produce any more values.
     * See the comments in {@link #iterator()}.
     * </p>
     * <p>
     * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a Stream produces.
     * </p>
     *
     * @return a sequential {@code Stream} with this collection as its source.
     * @since 1.9.0
     */
    public Stream<CSVRecord> stream() {
        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false);
    }

}