View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Token.Type.TOKEN;
21  
22  import java.io.Closeable;
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.InputStreamReader;
27  import java.io.Reader;
28  import java.io.StringReader;
29  import java.io.UncheckedIOException;
30  import java.net.URL;
31  import java.nio.charset.Charset;
32  import java.nio.file.Files;
33  import java.nio.file.Path;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.Collections;
37  import java.util.Iterator;
38  import java.util.LinkedHashMap;
39  import java.util.List;
40  import java.util.Map;
41  import java.util.NoSuchElementException;
42  import java.util.Objects;
43  import java.util.Spliterator;
44  import java.util.Spliterators;
45  import java.util.TreeMap;
46  import java.util.stream.Collectors;
47  import java.util.stream.Stream;
48  import java.util.stream.StreamSupport;
49  
50  import org.apache.commons.io.function.Uncheck;
51  
52  /**
53   * Parses CSV files according to the specified format.
54   *
55   * Because CSV appears in many different dialects, the parser supports many formats by allowing the
56   * specification of a {@link CSVFormat}.
57   *
58   * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
59   *
60   * <h2>Creating instances</h2>
61   * <p>
62   * There are several static factory methods that can be used to create instances for various types of resources:
63   * </p>
64   * <ul>
65   *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
66   *     <li>{@link #parse(String, CSVFormat)}</li>
67   *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
68   * </ul>
69   * <p>
70   * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
71   *
72   * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
73   * </p>
74   * <pre>
75   * for (CSVRecord record : CSVFormat.EXCEL.parse(in)) {
76   *     ...
77   * }
78   * </pre>
79   *
80   * <h2>Parsing record wise</h2>
81   * <p>
82   * To parse a CSV input from a file, you write:
83   * </p>
84   *
85   * <pre>{@code
86   * File csvData = new File("/path/to/csv");
87   * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
88   * for (CSVRecord csvRecord : parser) {
89   *     ...
90   * }}
91   * </pre>
92   *
93   * <p>
94   * This will read the parse the contents of the file using the
95   * <a href="https://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
96   * </p>
97   *
98   * <p>
99   * To parse CSV input in a format like Excel, you write:
100  * </p>
101  *
102  * <pre>
103  * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
104  * for (CSVRecord csvRecord : parser) {
105  *     ...
106  * }
107  * </pre>
108  *
109  * <p>
110  * If the predefined formats don't match the format at hand, custom formats can be defined. More information about
111  * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
112  * </p>
113  *
114  * <h2>Parsing into memory</h2>
115  * <p>
116  * If parsing record-wise is not desired, the contents of the input can be read completely into memory.
117  * </p>
118  *
119  * <pre>{@code
120  * Reader in = new StringReader("a;b\nc;d");
121  * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
122  * List<CSVRecord> list = parser.getRecords();
123  * }</pre>
124  *
125  * <p>
126  * There are two constraints that have to be kept in mind:
127  * </p>
128  *
129  * <ol>
130  *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
131  *     the input, those records will not end up in the in-memory representation of your CSV data.</li>
132  *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
133  *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
134  * </ol>
135  *
136  * <h2>Notes</h2>
137  * <p>
138  * The internal parser state is completely covered by the format and the reader state.
139  * </p>
140  *
141  * @see <a href="package-summary.html">package documentation for more details</a>
142  */
143 public final class CSVParser implements Iterable<CSVRecord>, Closeable {
144 
145     final class CSVRecordIterator implements Iterator<CSVRecord> {
146         private CSVRecord current;
147 
148         /**
149          * Gets the next record.
150          *
151          * @return the next record.
152          */
153         private CSVRecord getNextRecord() {
154             return Uncheck.get(CSVParser.this::nextRecord);
155         }
156 
157         @Override
158         public boolean hasNext() {
159             if (isClosed()) {
160                 return false;
161             }
162             if (current == null) {
163                 current = getNextRecord();
164             }
165 
166             return current != null;
167         }
168 
169         @Override
170         public CSVRecord next() {
171             if (isClosed()) {
172                 throw new NoSuchElementException("CSVParser has been closed");
173             }
174             CSVRecord next = current;
175             current = null;
176 
177             if (next == null) {
178                 // hasNext() wasn't called before
179                 next = getNextRecord();
180                 if (next == null) {
181                     throw new NoSuchElementException("No more CSV records available");
182                 }
183             }
184 
185             return next;
186         }
187 
188         @Override
189         public void remove() {
190             throw new UnsupportedOperationException();
191         }
192     }
193 
194     /**
195      * Header information based on name and position.
196      */
197     private static final class Headers {
198 
199         /**
200          * Header column positions (0-based)
201          */
202         final Map<String, Integer> headerMap;
203 
204         /**
205          * Header names in column order
206          */
207         final List<String> headerNames;
208 
209         Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
210             this.headerMap = headerMap;
211             this.headerNames = headerNames;
212         }
213     }
214 
215     /**
216      * Creates a parser for the given {@link File}.
217      *
218      * @param file
219      *            a CSV file. Must not be null.
220      * @param charset
221      *            The Charset to decode the given file.
222      * @param format
223      *            the CSVFormat used for CSV parsing. Must not be null.
224      * @return a new parser
225      * @throws IllegalArgumentException
226      *             If the parameters of the format are inconsistent or if either file or format are null.
227      * @throws IOException
228      *             If an I/O error occurs
229      * @throws CSVException Thrown on invalid input.
230      */
231     public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
232         Objects.requireNonNull(file, "file");
233         return parse(file.toPath(), charset, format);
234     }
235 
236     /**
237      * Creates a CSV parser using the given {@link CSVFormat}.
238      *
239      * <p>
240      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
241      * unless you close the {@code reader}.
242      * </p>
243      *
244      * @param inputStream
245      *            an InputStream containing CSV-formatted input. Must not be null.
246      * @param charset
247      *            The Charset to decode the given file.
248      * @param format
249      *            the CSVFormat used for CSV parsing. Must not be null.
250      * @return a new CSVParser configured with the given reader and format.
251      * @throws IllegalArgumentException
252      *             If the parameters of the format are inconsistent or if either reader or format are null.
253      * @throws IOException
254      *             If there is a problem reading the header or skipping the first record
255      * @throws CSVException Thrown on invalid input.
256      * @since 1.5
257      */
258     @SuppressWarnings("resource")
259     public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
260             throws IOException {
261         Objects.requireNonNull(inputStream, "inputStream");
262         Objects.requireNonNull(format, "format");
263         return parse(new InputStreamReader(inputStream, charset), format);
264     }
265 
266     /**
267      * Creates and returns a parser for the given {@link Path}, which the caller MUST close.
268      *
269      * @param path
270      *            a CSV file. Must not be null.
271      * @param charset
272      *            The Charset to decode the given file.
273      * @param format
274      *            the CSVFormat used for CSV parsing. Must not be null.
275      * @return a new parser
276      * @throws IllegalArgumentException
277      *             If the parameters of the format are inconsistent or if either file or format are null.
278      * @throws IOException
279      *             If an I/O error occurs
280      * @throws CSVException Thrown on invalid input.
281      * @since 1.5
282      */
283     @SuppressWarnings("resource")
284     public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
285         Objects.requireNonNull(path, "path");
286         Objects.requireNonNull(format, "format");
287         return parse(Files.newInputStream(path), charset, format);
288     }
289 
290     /**
291      * Creates a CSV parser using the given {@link CSVFormat}
292      *
293      * <p>
294      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
295      * unless you close the {@code reader}.
296      * </p>
297      *
298      * @param reader
299      *            a Reader containing CSV-formatted input. Must not be null.
300      * @param format
301      *            the CSVFormat used for CSV parsing. Must not be null.
302      * @return a new CSVParser configured with the given reader and format.
303      * @throws IllegalArgumentException
304      *             If the parameters of the format are inconsistent or if either reader or format are null.
305      * @throws IOException
306      *             If there is a problem reading the header or skipping the first record
307      * @throws CSVException Thrown on invalid input.
308      * @since 1.5
309      */
310     public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
311         return new CSVParser(reader, format);
312     }
313 
314     /**
315      * Creates a parser for the given {@link String}.
316      *
317      * @param string
318      *            a CSV string. Must not be null.
319      * @param format
320      *            the CSVFormat used for CSV parsing. Must not be null.
321      * @return a new parser
322      * @throws IllegalArgumentException
323      *             If the parameters of the format are inconsistent or if either string or format are null.
324      * @throws IOException
325      *             If an I/O error occurs
326      * @throws CSVException Thrown on invalid input.
327      */
328     public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
329         Objects.requireNonNull(string, "string");
330         Objects.requireNonNull(format, "format");
331 
332         return new CSVParser(new StringReader(string), format);
333     }
334 
335     /**
336      * Creates and returns a parser for the given URL, which the caller MUST close.
337      *
338      * <p>
339      * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
340      * you close the {@code url}.
341      * </p>
342      *
343      * @param url
344      *            a URL. Must not be null.
345      * @param charset
346      *            the charset for the resource. Must not be null.
347      * @param format
348      *            the CSVFormat used for CSV parsing. Must not be null.
349      * @return a new parser
350      * @throws IllegalArgumentException
351      *             If the parameters of the format are inconsistent or if either url, charset or format are null.
352      * @throws IOException
353      *             If an I/O error occurs
354      * @throws CSVException Thrown on invalid input.
355      */
356     @SuppressWarnings("resource")
357     public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
358         Objects.requireNonNull(url, "url");
359         Objects.requireNonNull(charset, "charset");
360         Objects.requireNonNull(format, "format");
361 
362         return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
363     }
364 
365     private String headerComment;
366 
367     private String trailerComment;
368 
369     private final CSVFormat format;
370 
371     private final Headers headers;
372 
373     private final Lexer lexer;
374 
375     private final CSVRecordIterator csvRecordIterator;
376 
377     /** A record buffer for getRecord(). Grows as necessary and is reused. */
378     private final List<String> recordList = new ArrayList<>();
379 
380     /**
381      * The next record number to assign.
382      */
383     private long recordNumber;
384 
385     /**
386      * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
387      * with {@link #recordNumber}.
388      */
389     private final long characterOffset;
390 
391     private final Token reusableToken = new Token();
392 
393     /**
394      * Constructs a new instance using the given {@link CSVFormat}
395      *
396      * <p>
397      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
398      * unless you close the {@code reader}.
399      * </p>
400      *
401      * @param reader
402      *            a Reader containing CSV-formatted input. Must not be null.
403      * @param format
404      *            the CSVFormat used for CSV parsing. Must not be null.
405      * @throws IllegalArgumentException
406      *             If the parameters of the format are inconsistent or if either reader or format are null.
407      * @throws IOException
408      *             If there is a problem reading the header or skipping the first record
409      * @throws CSVException Thrown on invalid input.
410      */
411     public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
412         this(reader, format, 0, 1);
413     }
414 
415     /**
416      * Constructs a new instance using the given {@link CSVFormat}
417      *
418      * <p>
419      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
420      * unless you close the {@code reader}.
421      * </p>
422      *
423      * @param reader
424      *            a Reader containing CSV-formatted input. Must not be null.
425      * @param format
426      *            the CSVFormat used for CSV parsing. Must not be null.
427      * @param characterOffset
428      *            Lexer offset when the parser does not start parsing at the beginning of the source.
429      * @param recordNumber
430      *            The next record number to assign
431      * @throws IllegalArgumentException
432      *             If the parameters of the format are inconsistent or if either the reader or format is null.
433      * @throws IOException
434      *             If there is a problem reading the header or skipping the first record
435      * @throws CSVException Thrown on invalid input.
436      * @since 1.1
437      */
438     @SuppressWarnings("resource")
439     public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
440         throws IOException {
441         Objects.requireNonNull(reader, "reader");
442         Objects.requireNonNull(format, "format");
443         this.format = format.copy();
444         this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
445         this.csvRecordIterator = new CSVRecordIterator();
446         this.headers = createHeaders();
447         this.characterOffset = characterOffset;
448         this.recordNumber = recordNumber - 1;
449     }
450 
451     private void addRecordValue(final boolean lastRecord) {
452         final String input = format.trim(reusableToken.content.toString());
453         if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) {
454             return;
455         }
456         recordList.add(handleNull(input));
457     }
458 
459     /**
460      * Closes resources.
461      *
462      * @throws IOException
463      *             If an I/O error occurs
464      */
465     @Override
466     public void close() throws IOException {
467         lexer.close();
468     }
469 
470     private Map<String, Integer> createEmptyHeaderMap() {
471         return format.getIgnoreHeaderCase() ?
472                 new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
473                 new LinkedHashMap<>();
474     }
475 
476     /**
477      * Creates the name to index mapping if the format defines a header.
478      *
479      * @return null if the format has no header.
480      * @throws IOException if there is a problem reading the header or skipping the first record
481      * @throws CSVException Thrown on invalid input.
482      */
483     private Headers createHeaders() throws IOException {
484         Map<String, Integer> hdrMap = null;
485         List<String> headerNames = null;
486         final String[] formatHeader = format.getHeader();
487         if (formatHeader != null) {
488             hdrMap = createEmptyHeaderMap();
489             String[] headerRecord = null;
490             if (formatHeader.length == 0) {
491                 // read the header from the first line of the file
492                 final CSVRecord nextRecord = nextRecord();
493                 if (nextRecord != null) {
494                     headerRecord = nextRecord.values();
495                     headerComment = nextRecord.getComment();
496                 }
497             } else {
498                 if (format.getSkipHeaderRecord()) {
499                     final CSVRecord nextRecord = nextRecord();
500                     if (nextRecord != null) {
501                         headerComment = nextRecord.getComment();
502                     }
503                 }
504                 headerRecord = formatHeader;
505             }
506 
507             // build the name to index mappings
508             if (headerRecord != null) {
509                 // Track an occurrence of a null, empty or blank header.
510                 boolean observedMissing = false;
511                 for (int i = 0; i < headerRecord.length; i++) {
512                     final String header = headerRecord[i];
513                     final boolean blankHeader = CSVFormat.isBlank(header);
514                     if (blankHeader && !format.getAllowMissingColumnNames()) {
515                         throw new IllegalArgumentException(
516                             "A header name is missing in " + Arrays.toString(headerRecord));
517                     }
518 
519                     final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header);
520                     final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode();
521                     final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
522                     final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
523 
524                     if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
525                         throw new IllegalArgumentException(
526                             String.format(
527                                 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
528                                 header, Arrays.toString(headerRecord)));
529                     }
530                     observedMissing |= blankHeader;
531                     if (header != null) {
532                         hdrMap.put(header, Integer.valueOf(i)); // N.B. Explicit (un)boxing is intentional
533                         if (headerNames == null) {
534                             headerNames = new ArrayList<>(headerRecord.length);
535                         }
536                         headerNames.add(header);
537                     }
538                 }
539             }
540         }
541         // Make header names Collection immutable
542         return new Headers(hdrMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames));
543     }
544 
545     /**
546      * Gets the current line number in the input stream.
547      *
548      * <p>
549      * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
550      * the record number.
551      * </p>
552      *
553      * @return current line number
554      */
555     public long getCurrentLineNumber() {
556         return lexer.getCurrentLineNumber();
557     }
558 
559     /**
560      * Gets the first end-of-line string encountered.
561      *
562      * @return the first end-of-line string
563      * @since 1.5
564      */
565     public String getFirstEndOfLine() {
566         return lexer.getFirstEol();
567     }
568 
569     /**
570      * Gets the header comment, if any.
571      * The header comment appears before the header record.
572      *
573      * @return the header comment for this stream, or null if no comment is available.
574      * @since 1.10.0
575      */
576     public String getHeaderComment() {
577         return headerComment;
578     }
579 
580     /**
581      * Gets a copy of the header map as defined in the CSVFormat's header.
582      * <p>
583      * The map keys are column names. The map values are 0-based indices.
584      * </p>
585      * <p>
586      * Note: The map can only provide a one-to-one mapping when the format did not
587      * contain null or duplicate column names.
588      * </p>
589      *
590      * @return a copy of the header map.
591      */
592     public Map<String, Integer> getHeaderMap() {
593         if (headers.headerMap == null) {
594             return null;
595         }
596         final Map<String, Integer> map = createEmptyHeaderMap();
597         map.putAll(headers.headerMap);
598         return map;
599     }
600 
601     /**
602      * Gets the underlying header map.
603      *
604      * @return the underlying header map.
605      */
606     Map<String, Integer> getHeaderMapRaw() {
607         return headers.headerMap;
608     }
609 
610     /**
611      * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header.
612      * <p>
613      * Note: The list provides strings that can be used as keys in the header map.
614      * The list will not contain null column names if they were present in the input
615      * format.
616      * </p>
617      *
618      * @return read-only list of header names that iterates in column order.
619      * @see #getHeaderMap()
620      * @since 1.7
621      */
622     public List<String> getHeaderNames() {
623         return Collections.unmodifiableList(headers.headerNames);
624     }
625 
626     /**
627      * Gets the current record number in the input stream.
628      *
629      * <p>
630      * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
631      * the line number.
632      * </p>
633      *
634      * @return current record number
635      */
636     public long getRecordNumber() {
637         return recordNumber;
638     }
639 
640     /**
641      * Parses the CSV input according to the given format and returns the content as a list of
642      * {@link CSVRecord CSVRecords}.
643      *
644      * <p>
645      * The returned content starts at the current parse-position in the stream.
646      * </p>
647      *
648      * @return list of {@link CSVRecord CSVRecords}, may be empty
649      * @throws UncheckedIOException
650      *             on parse error or input read-failure
651      */
652     public List<CSVRecord> getRecords() {
653         return stream().collect(Collectors.toList());
654     }
655 
656     /**
657      * Gets the trailer comment, if any.
658      * Trailer comments are located between the last record and EOF
659      *
660      * @return the trailer comment for this stream, or null if no comment is available.
661      * @since 1.10.0
662      */
663     public String getTrailerComment() {
664         return trailerComment;
665     }
666 
667     /**
668      * Handles whether the input is parsed as null
669      *
670      * @param input
671      *           the cell data to further processed
672      * @return null if input is parsed as null, or input itself if the input isn't parsed as null
673      */
674     private String handleNull(final String input) {
675         final boolean isQuoted = reusableToken.isQuoted;
676         final String nullString = format.getNullString();
677         final boolean strictQuoteMode = isStrictQuoteMode();
678         if (input.equals(nullString)) {
679             // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
680             return strictQuoteMode && isQuoted ? input : null;
681         }
682         // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
683         return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
684     }
685 
686     /**
687      * Checks whether there is a header comment.
688      * The header comment appears before the header record.
689      * Note that if the parser's format has been given an explicit header
690      * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload)
691      * and the header record is not being skipped
692      * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments
693      * will be associated with the first record, not the header.
694      *
695      * @return true if this parser has seen a header comment, false otherwise
696      * @since 1.10.0
697      */
698     public boolean hasHeaderComment() {
699         return headerComment != null;
700     }
701 
702     /**
703      * Checks whether there is a trailer comment.
704      * Trailer comments are located between the last record and EOF.
705      * The trailer comments will only be available after the parser has
706      * finished processing this stream.
707      *
708      * @return true if this parser has seen a trailer comment, false otherwise
709      * @since 1.10.0
710      */
711     public boolean hasTrailerComment() {
712         return trailerComment != null;
713     }
714 
715     /**
716      * Tests whether this parser is closed.
717      *
718      * @return whether this parser is closed.
719      */
720     public boolean isClosed() {
721         return lexer.isClosed();
722     }
723 
724     /**
725      * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
726      *
727      * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
728      *         {@link QuoteMode#NON_NUMERIC}.
729      */
730     private boolean isStrictQuoteMode() {
731         return format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
732                format.getQuoteMode() == QuoteMode.NON_NUMERIC;
733     }
734 
735     /**
736      * Returns the record iterator.
737      *
738      * <p>
739      * An {@link IOException} caught during the iteration is re-thrown as an
740      * {@link IllegalStateException}.
741      * </p>
742      * <p>
743      * If the parser is closed, the iterator will not yield any more records.
744      * A call to {@link Iterator#hasNext()} will return {@code false} and
745      * a call to {@link Iterator#next()} will throw a
746      * {@link NoSuchElementException}.
747      * </p>
748      * <p>
749      * If it is necessary to construct an iterator which is usable after the
750      * parser is closed, one option is to extract all records as a list with
751      * {@link #getRecords()}, and return an iterator to that list.
752      * </p>
753      */
754     @Override
755     public Iterator<CSVRecord> iterator() {
756         return csvRecordIterator;
757     }
758 
759     /**
760      * Parses the next record from the current point in the stream.
761      *
762      * @return the record as an array of values, or {@code null} if the end of the stream has been reached
763      * @throws IOException  on parse error or input read-failure
764      * @throws CSVException Thrown on invalid input.
765      */
766     CSVRecord nextRecord() throws IOException {
767         CSVRecord result = null;
768         recordList.clear();
769         StringBuilder sb = null;
770         final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
771         do {
772             reusableToken.reset();
773             lexer.nextToken(reusableToken);
774             switch (reusableToken.type) {
775             case TOKEN:
776                 addRecordValue(false);
777                 break;
778             case EORECORD:
779                 addRecordValue(true);
780                 break;
781             case EOF:
782                 if (reusableToken.isReady) {
783                     addRecordValue(true);
784                 } else if (sb != null) {
785                     trailerComment = sb.toString();
786                 }
787                 break;
788             case INVALID:
789                 throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence");
790             case COMMENT: // Ignored currently
791                 if (sb == null) { // first comment for this record
792                     sb = new StringBuilder();
793                 } else {
794                     sb.append(Constants.LF);
795                 }
796                 sb.append(reusableToken.content);
797                 reusableToken.type = TOKEN; // Read another token
798                 break;
799             default:
800                 throw new IllegalStateException("Unexpected Token type: " + reusableToken.type);
801             }
802         } while (reusableToken.type == TOKEN);
803 
804         if (!recordList.isEmpty()) {
805             recordNumber++;
806             final String comment = Objects.toString(sb, null);
807             result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
808                 recordNumber, startCharPosition);
809         }
810         return result;
811     }
812 
813     /**
814      * Returns a sequential {@code Stream} with this collection as its source.
815      * <p>
816      * If the parser is closed, the stream will not produce any more values.
817      * See the comments in {@link #iterator()}.
818      * </p>
819      * @return a sequential {@code Stream} with this collection as its source.
820      * @since 1.9.0
821      */
822     public Stream<CSVRecord> stream() {
823         return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false);
824     }
825 
826 }