001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.csv;
019
020import static org.apache.commons.csv.Token.Type.TOKEN;
021
022import java.io.Closeable;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.io.Reader;
028import java.io.StringReader;
029import java.io.UncheckedIOException;
030import java.net.URL;
031import java.nio.charset.Charset;
032import java.nio.file.Files;
033import java.nio.file.Path;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Iterator;
038import java.util.LinkedHashMap;
039import java.util.List;
040import java.util.Map;
041import java.util.NoSuchElementException;
042import java.util.Objects;
043import java.util.Spliterator;
044import java.util.Spliterators;
045import java.util.TreeMap;
046import java.util.stream.Collectors;
047import java.util.stream.Stream;
048import java.util.stream.StreamSupport;
049
050import org.apache.commons.io.function.Uncheck;
051
052/**
053 * Parses CSV files according to the specified format.
054 *
055 * Because CSV appears in many different dialects, the parser supports many formats by allowing the
056 * specification of a {@link CSVFormat}.
057 *
058 * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
059 *
060 * <h2>Creating instances</h2>
061 * <p>
062 * There are several static factory methods that can be used to create instances for various types of resources:
063 * </p>
064 * <ul>
065 *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
066 *     <li>{@link #parse(String, CSVFormat)}</li>
067 *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
068 * </ul>
069 * <p>
070 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
071 *
072 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
073 * </p>
074 * <pre>
075 * for (CSVRecord record : CSVFormat.EXCEL.parse(in)) {
076 *     ...
077 * }
078 * </pre>
079 *
080 * <h2>Parsing record wise</h2>
081 * <p>
082 * To parse a CSV input from a file, you write:
083 * </p>
084 *
085 * <pre>{@code
086 * File csvData = new File("/path/to/csv");
087 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
088 * for (CSVRecord csvRecord : parser) {
089 *     ...
090 * }}
091 * </pre>
092 *
093 * <p>
094 * This will read the parse the contents of the file using the
095 * <a href="https://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
096 * </p>
097 *
098 * <p>
099 * To parse CSV input in a format like Excel, you write:
100 * </p>
101 *
102 * <pre>
103 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
104 * for (CSVRecord csvRecord : parser) {
105 *     ...
106 * }
107 * </pre>
108 *
109 * <p>
110 * If the predefined formats don't match the format at hand, custom formats can be defined. More information about
111 * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
112 * </p>
113 *
114 * <h2>Parsing into memory</h2>
115 * <p>
116 * If parsing record-wise is not desired, the contents of the input can be read completely into memory.
117 * </p>
118 *
119 * <pre>{@code
120 * Reader in = new StringReader("a;b\nc;d");
121 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
122 * List<CSVRecord> list = parser.getRecords();
123 * }</pre>
124 *
125 * <p>
126 * There are two constraints that have to be kept in mind:
127 * </p>
128 *
129 * <ol>
130 *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
131 *     the input, those records will not end up in the in-memory representation of your CSV data.</li>
132 *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
133 *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
134 * </ol>
135 *
136 * <h2>Notes</h2>
137 * <p>
138 * The internal parser state is completely covered by the format and the reader state.
139 * </p>
140 *
141 * @see <a href="package-summary.html">package documentation for more details</a>
142 */
143public final class CSVParser implements Iterable<CSVRecord>, Closeable {
144
145    final class CSVRecordIterator implements Iterator<CSVRecord> {
146        private CSVRecord current;
147
148        /**
149         * Gets the next record.
150         *
151         * @return the next record.
152         */
153        private CSVRecord getNextRecord() {
154            return Uncheck.get(CSVParser.this::nextRecord);
155        }
156
157        @Override
158        public boolean hasNext() {
159            if (isClosed()) {
160                return false;
161            }
162            if (current == null) {
163                current = getNextRecord();
164            }
165
166            return current != null;
167        }
168
169        @Override
170        public CSVRecord next() {
171            if (isClosed()) {
172                throw new NoSuchElementException("CSVParser has been closed");
173            }
174            CSVRecord next = current;
175            current = null;
176
177            if (next == null) {
178                // hasNext() wasn't called before
179                next = getNextRecord();
180                if (next == null) {
181                    throw new NoSuchElementException("No more CSV records available");
182                }
183            }
184
185            return next;
186        }
187
188        @Override
189        public void remove() {
190            throw new UnsupportedOperationException();
191        }
192    }
193
194    /**
195     * Header information based on name and position.
196     */
197    private static final class Headers {
198
199        /**
200         * Header column positions (0-based)
201         */
202        final Map<String, Integer> headerMap;
203
204        /**
205         * Header names in column order
206         */
207        final List<String> headerNames;
208
209        Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
210            this.headerMap = headerMap;
211            this.headerNames = headerNames;
212        }
213    }
214
215    /**
216     * Creates a parser for the given {@link File}.
217     *
218     * @param file
219     *            a CSV file. Must not be null.
220     * @param charset
221     *            The Charset to decode the given file.
222     * @param format
223     *            the CSVFormat used for CSV parsing. Must not be null.
224     * @return a new parser
225     * @throws IllegalArgumentException
226     *             If the parameters of the format are inconsistent or if either file or format are null.
227     * @throws IOException
228     *             If an I/O error occurs
229     * @throws CSVException Thrown on invalid input.
230     */
231    public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
232        Objects.requireNonNull(file, "file");
233        return parse(file.toPath(), charset, format);
234    }
235
236    /**
237     * Creates a CSV parser using the given {@link CSVFormat}.
238     *
239     * <p>
240     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
241     * unless you close the {@code reader}.
242     * </p>
243     *
244     * @param inputStream
245     *            an InputStream containing CSV-formatted input. Must not be null.
246     * @param charset
247     *            The Charset to decode the given file.
248     * @param format
249     *            the CSVFormat used for CSV parsing. Must not be null.
250     * @return a new CSVParser configured with the given reader and format.
251     * @throws IllegalArgumentException
252     *             If the parameters of the format are inconsistent or if either reader or format are null.
253     * @throws IOException
254     *             If there is a problem reading the header or skipping the first record
255     * @throws CSVException Thrown on invalid input.
256     * @since 1.5
257     */
258    @SuppressWarnings("resource")
259    public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
260            throws IOException {
261        Objects.requireNonNull(inputStream, "inputStream");
262        Objects.requireNonNull(format, "format");
263        return parse(new InputStreamReader(inputStream, charset), format);
264    }
265
266    /**
267     * Creates and returns a parser for the given {@link Path}, which the caller MUST close.
268     *
269     * @param path
270     *            a CSV file. Must not be null.
271     * @param charset
272     *            The Charset to decode the given file.
273     * @param format
274     *            the CSVFormat used for CSV parsing. Must not be null.
275     * @return a new parser
276     * @throws IllegalArgumentException
277     *             If the parameters of the format are inconsistent or if either file or format are null.
278     * @throws IOException
279     *             If an I/O error occurs
280     * @throws CSVException Thrown on invalid input.
281     * @since 1.5
282     */
283    @SuppressWarnings("resource")
284    public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
285        Objects.requireNonNull(path, "path");
286        Objects.requireNonNull(format, "format");
287        return parse(Files.newInputStream(path), charset, format);
288    }
289
290    /**
291     * Creates a CSV parser using the given {@link CSVFormat}
292     *
293     * <p>
294     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
295     * unless you close the {@code reader}.
296     * </p>
297     *
298     * @param reader
299     *            a Reader containing CSV-formatted input. Must not be null.
300     * @param format
301     *            the CSVFormat used for CSV parsing. Must not be null.
302     * @return a new CSVParser configured with the given reader and format.
303     * @throws IllegalArgumentException
304     *             If the parameters of the format are inconsistent or if either reader or format are null.
305     * @throws IOException
306     *             If there is a problem reading the header or skipping the first record
307     * @throws CSVException Thrown on invalid input.
308     * @since 1.5
309     */
310    public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
311        return new CSVParser(reader, format);
312    }
313
314    /**
315     * Creates a parser for the given {@link String}.
316     *
317     * @param string
318     *            a CSV string. Must not be null.
319     * @param format
320     *            the CSVFormat used for CSV parsing. Must not be null.
321     * @return a new parser
322     * @throws IllegalArgumentException
323     *             If the parameters of the format are inconsistent or if either string or format are null.
324     * @throws IOException
325     *             If an I/O error occurs
326     * @throws CSVException Thrown on invalid input.
327     */
328    public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
329        Objects.requireNonNull(string, "string");
330        Objects.requireNonNull(format, "format");
331
332        return new CSVParser(new StringReader(string), format);
333    }
334
335    /**
336     * Creates and returns a parser for the given URL, which the caller MUST close.
337     *
338     * <p>
339     * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
340     * you close the {@code url}.
341     * </p>
342     *
343     * @param url
344     *            a URL. Must not be null.
345     * @param charset
346     *            the charset for the resource. Must not be null.
347     * @param format
348     *            the CSVFormat used for CSV parsing. Must not be null.
349     * @return a new parser
350     * @throws IllegalArgumentException
351     *             If the parameters of the format are inconsistent or if either url, charset or format are null.
352     * @throws IOException
353     *             If an I/O error occurs
354     * @throws CSVException Thrown on invalid input.
355     */
356    @SuppressWarnings("resource")
357    public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
358        Objects.requireNonNull(url, "url");
359        Objects.requireNonNull(charset, "charset");
360        Objects.requireNonNull(format, "format");
361
362        return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
363    }
364
365    private String headerComment;
366
367    private String trailerComment;
368
369    private final CSVFormat format;
370
371    private final Headers headers;
372
373    private final Lexer lexer;
374
375    private final CSVRecordIterator csvRecordIterator;
376
377    /** A record buffer for getRecord(). Grows as necessary and is reused. */
378    private final List<String> recordList = new ArrayList<>();
379
380    /**
381     * The next record number to assign.
382     */
383    private long recordNumber;
384
385    /**
386     * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
387     * with {@link #recordNumber}.
388     */
389    private final long characterOffset;
390
391    private final Token reusableToken = new Token();
392
393    /**
394     * Constructs a new instance using the given {@link CSVFormat}
395     *
396     * <p>
397     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
398     * unless you close the {@code reader}.
399     * </p>
400     *
401     * @param reader
402     *            a Reader containing CSV-formatted input. Must not be null.
403     * @param format
404     *            the CSVFormat used for CSV parsing. Must not be null.
405     * @throws IllegalArgumentException
406     *             If the parameters of the format are inconsistent or if either reader or format are null.
407     * @throws IOException
408     *             If there is a problem reading the header or skipping the first record
409     * @throws CSVException Thrown on invalid input.
410     */
411    public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
412        this(reader, format, 0, 1);
413    }
414
415    /**
416     * Constructs a new instance using the given {@link CSVFormat}
417     *
418     * <p>
419     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
420     * unless you close the {@code reader}.
421     * </p>
422     *
423     * @param reader
424     *            a Reader containing CSV-formatted input. Must not be null.
425     * @param format
426     *            the CSVFormat used for CSV parsing. Must not be null.
427     * @param characterOffset
428     *            Lexer offset when the parser does not start parsing at the beginning of the source.
429     * @param recordNumber
430     *            The next record number to assign
431     * @throws IllegalArgumentException
432     *             If the parameters of the format are inconsistent or if either the reader or format is null.
433     * @throws IOException
434     *             If there is a problem reading the header or skipping the first record
435     * @throws CSVException Thrown on invalid input.
436     * @since 1.1
437     */
438    @SuppressWarnings("resource")
439    public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
440        throws IOException {
441        Objects.requireNonNull(reader, "reader");
442        Objects.requireNonNull(format, "format");
443        this.format = format.copy();
444        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
445        this.csvRecordIterator = new CSVRecordIterator();
446        this.headers = createHeaders();
447        this.characterOffset = characterOffset;
448        this.recordNumber = recordNumber - 1;
449    }
450
451    private void addRecordValue(final boolean lastRecord) {
452        final String input = format.trim(reusableToken.content.toString());
453        if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) {
454            return;
455        }
456        recordList.add(handleNull(input));
457    }
458
459    /**
460     * Closes resources.
461     *
462     * @throws IOException
463     *             If an I/O error occurs
464     */
465    @Override
466    public void close() throws IOException {
467        lexer.close();
468    }
469
470    private Map<String, Integer> createEmptyHeaderMap() {
471        return format.getIgnoreHeaderCase() ?
472                new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
473                new LinkedHashMap<>();
474    }
475
476    /**
477     * Creates the name to index mapping if the format defines a header.
478     *
479     * @return null if the format has no header.
480     * @throws IOException if there is a problem reading the header or skipping the first record
481     * @throws CSVException Thrown on invalid input.
482     */
483    private Headers createHeaders() throws IOException {
484        Map<String, Integer> hdrMap = null;
485        List<String> headerNames = null;
486        final String[] formatHeader = format.getHeader();
487        if (formatHeader != null) {
488            hdrMap = createEmptyHeaderMap();
489            String[] headerRecord = null;
490            if (formatHeader.length == 0) {
491                // read the header from the first line of the file
492                final CSVRecord nextRecord = nextRecord();
493                if (nextRecord != null) {
494                    headerRecord = nextRecord.values();
495                    headerComment = nextRecord.getComment();
496                }
497            } else {
498                if (format.getSkipHeaderRecord()) {
499                    final CSVRecord nextRecord = nextRecord();
500                    if (nextRecord != null) {
501                        headerComment = nextRecord.getComment();
502                    }
503                }
504                headerRecord = formatHeader;
505            }
506
507            // build the name to index mappings
508            if (headerRecord != null) {
509                // Track an occurrence of a null, empty or blank header.
510                boolean observedMissing = false;
511                for (int i = 0; i < headerRecord.length; i++) {
512                    final String header = headerRecord[i];
513                    final boolean blankHeader = CSVFormat.isBlank(header);
514                    if (blankHeader && !format.getAllowMissingColumnNames()) {
515                        throw new IllegalArgumentException(
516                            "A header name is missing in " + Arrays.toString(headerRecord));
517                    }
518
519                    final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header);
520                    final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode();
521                    final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
522                    final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
523
524                    if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
525                        throw new IllegalArgumentException(
526                            String.format(
527                                "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
528                                header, Arrays.toString(headerRecord)));
529                    }
530                    observedMissing |= blankHeader;
531                    if (header != null) {
532                        hdrMap.put(header, Integer.valueOf(i)); // N.B. Explicit (un)boxing is intentional
533                        if (headerNames == null) {
534                            headerNames = new ArrayList<>(headerRecord.length);
535                        }
536                        headerNames.add(header);
537                    }
538                }
539            }
540        }
541        // Make header names Collection immutable
542        return new Headers(hdrMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames));
543    }
544
545    /**
546     * Gets the current line number in the input stream.
547     *
548     * <p>
549     * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
550     * the record number.
551     * </p>
552     *
553     * @return current line number
554     */
555    public long getCurrentLineNumber() {
556        return lexer.getCurrentLineNumber();
557    }
558
559    /**
560     * Gets the first end-of-line string encountered.
561     *
562     * @return the first end-of-line string
563     * @since 1.5
564     */
565    public String getFirstEndOfLine() {
566        return lexer.getFirstEol();
567    }
568
569    /**
570     * Gets the header comment, if any.
571     * The header comment appears before the header record.
572     *
573     * @return the header comment for this stream, or null if no comment is available.
574     * @since 1.10.0
575     */
576    public String getHeaderComment() {
577        return headerComment;
578    }
579
580    /**
581     * Gets a copy of the header map as defined in the CSVFormat's header.
582     * <p>
583     * The map keys are column names. The map values are 0-based indices.
584     * </p>
585     * <p>
586     * Note: The map can only provide a one-to-one mapping when the format did not
587     * contain null or duplicate column names.
588     * </p>
589     *
590     * @return a copy of the header map.
591     */
592    public Map<String, Integer> getHeaderMap() {
593        if (headers.headerMap == null) {
594            return null;
595        }
596        final Map<String, Integer> map = createEmptyHeaderMap();
597        map.putAll(headers.headerMap);
598        return map;
599    }
600
601    /**
602     * Gets the underlying header map.
603     *
604     * @return the underlying header map.
605     */
606    Map<String, Integer> getHeaderMapRaw() {
607        return headers.headerMap;
608    }
609
610    /**
611     * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header.
612     * <p>
613     * Note: The list provides strings that can be used as keys in the header map.
614     * The list will not contain null column names if they were present in the input
615     * format.
616     * </p>
617     *
618     * @return read-only list of header names that iterates in column order.
619     * @see #getHeaderMap()
620     * @since 1.7
621     */
622    public List<String> getHeaderNames() {
623        return Collections.unmodifiableList(headers.headerNames);
624    }
625
626    /**
627     * Gets the current record number in the input stream.
628     *
629     * <p>
630     * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
631     * the line number.
632     * </p>
633     *
634     * @return current record number
635     */
636    public long getRecordNumber() {
637        return recordNumber;
638    }
639
640    /**
641     * Parses the CSV input according to the given format and returns the content as a list of
642     * {@link CSVRecord CSVRecords}.
643     *
644     * <p>
645     * The returned content starts at the current parse-position in the stream.
646     * </p>
647     *
648     * @return list of {@link CSVRecord CSVRecords}, may be empty
649     * @throws UncheckedIOException
650     *             on parse error or input read-failure
651     */
652    public List<CSVRecord> getRecords() {
653        return stream().collect(Collectors.toList());
654    }
655
656    /**
657     * Gets the trailer comment, if any.
658     * Trailer comments are located between the last record and EOF
659     *
660     * @return the trailer comment for this stream, or null if no comment is available.
661     * @since 1.10.0
662     */
663    public String getTrailerComment() {
664        return trailerComment;
665    }
666
667    /**
668     * Handles whether the input is parsed as null
669     *
670     * @param input
671     *           the cell data to further processed
672     * @return null if input is parsed as null, or input itself if the input isn't parsed as null
673     */
674    private String handleNull(final String input) {
675        final boolean isQuoted = reusableToken.isQuoted;
676        final String nullString = format.getNullString();
677        final boolean strictQuoteMode = isStrictQuoteMode();
678        if (input.equals(nullString)) {
679            // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
680            return strictQuoteMode && isQuoted ? input : null;
681        }
682        // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
683        return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
684    }
685
686    /**
687     * Checks whether there is a header comment.
688     * The header comment appears before the header record.
689     * Note that if the parser's format has been given an explicit header
690     * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload)
691     * and the header record is not being skipped
692     * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments
693     * will be associated with the first record, not the header.
694     *
695     * @return true if this parser has seen a header comment, false otherwise
696     * @since 1.10.0
697     */
698    public boolean hasHeaderComment() {
699        return headerComment != null;
700    }
701
702    /**
703     * Checks whether there is a trailer comment.
704     * Trailer comments are located between the last record and EOF.
705     * The trailer comments will only be available after the parser has
706     * finished processing this stream.
707     *
708     * @return true if this parser has seen a trailer comment, false otherwise
709     * @since 1.10.0
710     */
711    public boolean hasTrailerComment() {
712        return trailerComment != null;
713    }
714
715    /**
716     * Tests whether this parser is closed.
717     *
718     * @return whether this parser is closed.
719     */
720    public boolean isClosed() {
721        return lexer.isClosed();
722    }
723
724    /**
725     * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
726     *
727     * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
728     *         {@link QuoteMode#NON_NUMERIC}.
729     */
730    private boolean isStrictQuoteMode() {
731        return format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
732               format.getQuoteMode() == QuoteMode.NON_NUMERIC;
733    }
734
735    /**
736     * Returns the record iterator.
737     *
738     * <p>
739     * An {@link IOException} caught during the iteration is re-thrown as an
740     * {@link IllegalStateException}.
741     * </p>
742     * <p>
743     * If the parser is closed, the iterator will not yield any more records.
744     * A call to {@link Iterator#hasNext()} will return {@code false} and
745     * a call to {@link Iterator#next()} will throw a
746     * {@link NoSuchElementException}.
747     * </p>
748     * <p>
749     * If it is necessary to construct an iterator which is usable after the
750     * parser is closed, one option is to extract all records as a list with
751     * {@link #getRecords()}, and return an iterator to that list.
752     * </p>
753     */
754    @Override
755    public Iterator<CSVRecord> iterator() {
756        return csvRecordIterator;
757    }
758
759    /**
760     * Parses the next record from the current point in the stream.
761     *
762     * @return the record as an array of values, or {@code null} if the end of the stream has been reached
763     * @throws IOException  on parse error or input read-failure
764     * @throws CSVException Thrown on invalid input.
765     */
766    CSVRecord nextRecord() throws IOException {
767        CSVRecord result = null;
768        recordList.clear();
769        StringBuilder sb = null;
770        final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
771        do {
772            reusableToken.reset();
773            lexer.nextToken(reusableToken);
774            switch (reusableToken.type) {
775            case TOKEN:
776                addRecordValue(false);
777                break;
778            case EORECORD:
779                addRecordValue(true);
780                break;
781            case EOF:
782                if (reusableToken.isReady) {
783                    addRecordValue(true);
784                } else if (sb != null) {
785                    trailerComment = sb.toString();
786                }
787                break;
788            case INVALID:
789                throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence");
790            case COMMENT: // Ignored currently
791                if (sb == null) { // first comment for this record
792                    sb = new StringBuilder();
793                } else {
794                    sb.append(Constants.LF);
795                }
796                sb.append(reusableToken.content);
797                reusableToken.type = TOKEN; // Read another token
798                break;
799            default:
800                throw new IllegalStateException("Unexpected Token type: " + reusableToken.type);
801            }
802        } while (reusableToken.type == TOKEN);
803
804        if (!recordList.isEmpty()) {
805            recordNumber++;
806            final String comment = Objects.toString(sb, null);
807            result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
808                recordNumber, startCharPosition);
809        }
810        return result;
811    }
812
813    /**
814     * Returns a sequential {@code Stream} with this collection as its source.
815     * <p>
816     * If the parser is closed, the stream will not produce any more values.
817     * See the comments in {@link #iterator()}.
818     * </p>
819     * @return a sequential {@code Stream} with this collection as its source.
820     * @since 1.9.0
821     */
822    public Stream<CSVRecord> stream() {
823        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false);
824    }
825
826}