001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.csv; 019 020import static org.apache.commons.csv.Token.Type.TOKEN; 021 022import java.io.Closeable; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.InputStreamReader; 027import java.io.Reader; 028import java.io.StringReader; 029import java.io.UncheckedIOException; 030import java.net.URL; 031import java.nio.charset.Charset; 032import java.nio.file.Files; 033import java.nio.file.Path; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Iterator; 038import java.util.LinkedHashMap; 039import java.util.List; 040import java.util.Map; 041import java.util.NoSuchElementException; 042import java.util.Objects; 043import java.util.Spliterator; 044import java.util.Spliterators; 045import java.util.TreeMap; 046import java.util.stream.Collectors; 047import java.util.stream.Stream; 048import java.util.stream.StreamSupport; 049 050import org.apache.commons.io.function.Uncheck; 051 052/** 053 * Parses CSV files according to the specified format. 054 * 055 * Because CSV appears in many different dialects, the parser supports many formats by allowing the 056 * specification of a {@link CSVFormat}. 057 * 058 * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream. 059 * 060 * <h2>Creating instances</h2> 061 * <p> 062 * There are several static factory methods that can be used to create instances for various types of resources: 063 * </p> 064 * <ul> 065 * <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li> 066 * <li>{@link #parse(String, CSVFormat)}</li> 067 * <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li> 068 * </ul> 069 * <p> 070 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor. 071 * 072 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut: 073 * </p> 074 * <pre> 075 * for (CSVRecord record : CSVFormat.EXCEL.parse(in)) { 076 * ... 077 * } 078 * </pre> 079 * 080 * <h2>Parsing record wise</h2> 081 * <p> 082 * To parse a CSV input from a file, you write: 083 * </p> 084 * 085 * <pre>{@code 086 * File csvData = new File("/path/to/csv"); 087 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180); 088 * for (CSVRecord csvRecord : parser) { 089 * ... 090 * }} 091 * </pre> 092 * 093 * <p> 094 * This will read the parse the contents of the file using the 095 * <a href="https://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format. 096 * </p> 097 * 098 * <p> 099 * To parse CSV input in a format like Excel, you write: 100 * </p> 101 * 102 * <pre> 103 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL); 104 * for (CSVRecord csvRecord : parser) { 105 * ... 106 * } 107 * </pre> 108 * 109 * <p> 110 * If the predefined formats don't match the format at hand, custom formats can be defined. More information about 111 * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. 112 * </p> 113 * 114 * <h2>Parsing into memory</h2> 115 * <p> 116 * If parsing record-wise is not desired, the contents of the input can be read completely into memory. 117 * </p> 118 * 119 * <pre>{@code 120 * Reader in = new StringReader("a;b\nc;d"); 121 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL); 122 * List<CSVRecord> list = parser.getRecords(); 123 * }</pre> 124 * 125 * <p> 126 * There are two constraints that have to be kept in mind: 127 * </p> 128 * 129 * <ol> 130 * <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from 131 * the input, those records will not end up in the in-memory representation of your CSV data.</li> 132 * <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're 133 * parsing a 150MB file of CSV data the contents will be read completely into memory.</li> 134 * </ol> 135 * 136 * <h2>Notes</h2> 137 * <p> 138 * The internal parser state is completely covered by the format and the reader state. 139 * </p> 140 * 141 * @see <a href="package-summary.html">package documentation for more details</a> 142 */ 143public final class CSVParser implements Iterable<CSVRecord>, Closeable { 144 145 final class CSVRecordIterator implements Iterator<CSVRecord> { 146 private CSVRecord current; 147 148 /** 149 * Gets the next record. 150 * 151 * @return the next record. 152 */ 153 private CSVRecord getNextRecord() { 154 return Uncheck.get(CSVParser.this::nextRecord); 155 } 156 157 @Override 158 public boolean hasNext() { 159 if (isClosed()) { 160 return false; 161 } 162 if (current == null) { 163 current = getNextRecord(); 164 } 165 166 return current != null; 167 } 168 169 @Override 170 public CSVRecord next() { 171 if (isClosed()) { 172 throw new NoSuchElementException("CSVParser has been closed"); 173 } 174 CSVRecord next = current; 175 current = null; 176 177 if (next == null) { 178 // hasNext() wasn't called before 179 next = getNextRecord(); 180 if (next == null) { 181 throw new NoSuchElementException("No more CSV records available"); 182 } 183 } 184 185 return next; 186 } 187 188 @Override 189 public void remove() { 190 throw new UnsupportedOperationException(); 191 } 192 } 193 194 /** 195 * Header information based on name and position. 196 */ 197 private static final class Headers { 198 199 /** 200 * Header column positions (0-based) 201 */ 202 final Map<String, Integer> headerMap; 203 204 /** 205 * Header names in column order 206 */ 207 final List<String> headerNames; 208 209 Headers(final Map<String, Integer> headerMap, final List<String> headerNames) { 210 this.headerMap = headerMap; 211 this.headerNames = headerNames; 212 } 213 } 214 215 /** 216 * Creates a parser for the given {@link File}. 217 * 218 * @param file 219 * a CSV file. Must not be null. 220 * @param charset 221 * The Charset to decode the given file. 222 * @param format 223 * the CSVFormat used for CSV parsing. Must not be null. 224 * @return a new parser 225 * @throws IllegalArgumentException 226 * If the parameters of the format are inconsistent or if either file or format are null. 227 * @throws IOException 228 * If an I/O error occurs 229 * @throws CSVException Thrown on invalid input. 230 */ 231 public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException { 232 Objects.requireNonNull(file, "file"); 233 return parse(file.toPath(), charset, format); 234 } 235 236 /** 237 * Creates a CSV parser using the given {@link CSVFormat}. 238 * 239 * <p> 240 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 241 * unless you close the {@code reader}. 242 * </p> 243 * 244 * @param inputStream 245 * an InputStream containing CSV-formatted input. Must not be null. 246 * @param charset 247 * The Charset to decode the given file. 248 * @param format 249 * the CSVFormat used for CSV parsing. Must not be null. 250 * @return a new CSVParser configured with the given reader and format. 251 * @throws IllegalArgumentException 252 * If the parameters of the format are inconsistent or if either reader or format are null. 253 * @throws IOException 254 * If there is a problem reading the header or skipping the first record 255 * @throws CSVException Thrown on invalid input. 256 * @since 1.5 257 */ 258 @SuppressWarnings("resource") 259 public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format) 260 throws IOException { 261 Objects.requireNonNull(inputStream, "inputStream"); 262 Objects.requireNonNull(format, "format"); 263 return parse(new InputStreamReader(inputStream, charset), format); 264 } 265 266 /** 267 * Creates and returns a parser for the given {@link Path}, which the caller MUST close. 268 * 269 * @param path 270 * a CSV file. Must not be null. 271 * @param charset 272 * The Charset to decode the given file. 273 * @param format 274 * the CSVFormat used for CSV parsing. Must not be null. 275 * @return a new parser 276 * @throws IllegalArgumentException 277 * If the parameters of the format are inconsistent or if either file or format are null. 278 * @throws IOException 279 * If an I/O error occurs 280 * @throws CSVException Thrown on invalid input. 281 * @since 1.5 282 */ 283 @SuppressWarnings("resource") 284 public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { 285 Objects.requireNonNull(path, "path"); 286 Objects.requireNonNull(format, "format"); 287 return parse(Files.newInputStream(path), charset, format); 288 } 289 290 /** 291 * Creates a CSV parser using the given {@link CSVFormat} 292 * 293 * <p> 294 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 295 * unless you close the {@code reader}. 296 * </p> 297 * 298 * @param reader 299 * a Reader containing CSV-formatted input. Must not be null. 300 * @param format 301 * the CSVFormat used for CSV parsing. Must not be null. 302 * @return a new CSVParser configured with the given reader and format. 303 * @throws IllegalArgumentException 304 * If the parameters of the format are inconsistent or if either reader or format are null. 305 * @throws IOException 306 * If there is a problem reading the header or skipping the first record 307 * @throws CSVException Thrown on invalid input. 308 * @since 1.5 309 */ 310 public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { 311 return new CSVParser(reader, format); 312 } 313 314 /** 315 * Creates a parser for the given {@link String}. 316 * 317 * @param string 318 * a CSV string. Must not be null. 319 * @param format 320 * the CSVFormat used for CSV parsing. Must not be null. 321 * @return a new parser 322 * @throws IllegalArgumentException 323 * If the parameters of the format are inconsistent or if either string or format are null. 324 * @throws IOException 325 * If an I/O error occurs 326 * @throws CSVException Thrown on invalid input. 327 */ 328 public static CSVParser parse(final String string, final CSVFormat format) throws IOException { 329 Objects.requireNonNull(string, "string"); 330 Objects.requireNonNull(format, "format"); 331 332 return new CSVParser(new StringReader(string), format); 333 } 334 335 /** 336 * Creates and returns a parser for the given URL, which the caller MUST close. 337 * 338 * <p> 339 * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless 340 * you close the {@code url}. 341 * </p> 342 * 343 * @param url 344 * a URL. Must not be null. 345 * @param charset 346 * the charset for the resource. Must not be null. 347 * @param format 348 * the CSVFormat used for CSV parsing. Must not be null. 349 * @return a new parser 350 * @throws IllegalArgumentException 351 * If the parameters of the format are inconsistent or if either url, charset or format are null. 352 * @throws IOException 353 * If an I/O error occurs 354 * @throws CSVException Thrown on invalid input. 355 */ 356 @SuppressWarnings("resource") 357 public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { 358 Objects.requireNonNull(url, "url"); 359 Objects.requireNonNull(charset, "charset"); 360 Objects.requireNonNull(format, "format"); 361 362 return new CSVParser(new InputStreamReader(url.openStream(), charset), format); 363 } 364 365 private String headerComment; 366 367 private String trailerComment; 368 369 private final CSVFormat format; 370 371 private final Headers headers; 372 373 private final Lexer lexer; 374 375 private final CSVRecordIterator csvRecordIterator; 376 377 /** A record buffer for getRecord(). Grows as necessary and is reused. */ 378 private final List<String> recordList = new ArrayList<>(); 379 380 /** 381 * The next record number to assign. 382 */ 383 private long recordNumber; 384 385 /** 386 * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination 387 * with {@link #recordNumber}. 388 */ 389 private final long characterOffset; 390 391 private final Token reusableToken = new Token(); 392 393 /** 394 * Constructs a new instance using the given {@link CSVFormat} 395 * 396 * <p> 397 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 398 * unless you close the {@code reader}. 399 * </p> 400 * 401 * @param reader 402 * a Reader containing CSV-formatted input. Must not be null. 403 * @param format 404 * the CSVFormat used for CSV parsing. Must not be null. 405 * @throws IllegalArgumentException 406 * If the parameters of the format are inconsistent or if either reader or format are null. 407 * @throws IOException 408 * If there is a problem reading the header or skipping the first record 409 * @throws CSVException Thrown on invalid input. 410 */ 411 public CSVParser(final Reader reader, final CSVFormat format) throws IOException { 412 this(reader, format, 0, 1); 413 } 414 415 /** 416 * Constructs a new instance using the given {@link CSVFormat} 417 * 418 * <p> 419 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 420 * unless you close the {@code reader}. 421 * </p> 422 * 423 * @param reader 424 * a Reader containing CSV-formatted input. Must not be null. 425 * @param format 426 * the CSVFormat used for CSV parsing. Must not be null. 427 * @param characterOffset 428 * Lexer offset when the parser does not start parsing at the beginning of the source. 429 * @param recordNumber 430 * The next record number to assign 431 * @throws IllegalArgumentException 432 * If the parameters of the format are inconsistent or if either the reader or format is null. 433 * @throws IOException 434 * If there is a problem reading the header or skipping the first record 435 * @throws CSVException Thrown on invalid input. 436 * @since 1.1 437 */ 438 @SuppressWarnings("resource") 439 public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) 440 throws IOException { 441 Objects.requireNonNull(reader, "reader"); 442 Objects.requireNonNull(format, "format"); 443 this.format = format.copy(); 444 this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); 445 this.csvRecordIterator = new CSVRecordIterator(); 446 this.headers = createHeaders(); 447 this.characterOffset = characterOffset; 448 this.recordNumber = recordNumber - 1; 449 } 450 451 private void addRecordValue(final boolean lastRecord) { 452 final String input = format.trim(reusableToken.content.toString()); 453 if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) { 454 return; 455 } 456 recordList.add(handleNull(input)); 457 } 458 459 /** 460 * Closes resources. 461 * 462 * @throws IOException 463 * If an I/O error occurs 464 */ 465 @Override 466 public void close() throws IOException { 467 lexer.close(); 468 } 469 470 private Map<String, Integer> createEmptyHeaderMap() { 471 return format.getIgnoreHeaderCase() ? 472 new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : 473 new LinkedHashMap<>(); 474 } 475 476 /** 477 * Creates the name to index mapping if the format defines a header. 478 * 479 * @return null if the format has no header. 480 * @throws IOException if there is a problem reading the header or skipping the first record 481 * @throws CSVException Thrown on invalid input. 482 */ 483 private Headers createHeaders() throws IOException { 484 Map<String, Integer> hdrMap = null; 485 List<String> headerNames = null; 486 final String[] formatHeader = format.getHeader(); 487 if (formatHeader != null) { 488 hdrMap = createEmptyHeaderMap(); 489 String[] headerRecord = null; 490 if (formatHeader.length == 0) { 491 // read the header from the first line of the file 492 final CSVRecord nextRecord = nextRecord(); 493 if (nextRecord != null) { 494 headerRecord = nextRecord.values(); 495 headerComment = nextRecord.getComment(); 496 } 497 } else { 498 if (format.getSkipHeaderRecord()) { 499 final CSVRecord nextRecord = nextRecord(); 500 if (nextRecord != null) { 501 headerComment = nextRecord.getComment(); 502 } 503 } 504 headerRecord = formatHeader; 505 } 506 507 // build the name to index mappings 508 if (headerRecord != null) { 509 // Track an occurrence of a null, empty or blank header. 510 boolean observedMissing = false; 511 for (int i = 0; i < headerRecord.length; i++) { 512 final String header = headerRecord[i]; 513 final boolean blankHeader = CSVFormat.isBlank(header); 514 if (blankHeader && !format.getAllowMissingColumnNames()) { 515 throw new IllegalArgumentException( 516 "A header name is missing in " + Arrays.toString(headerRecord)); 517 } 518 519 final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header); 520 final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode(); 521 final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL; 522 final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY; 523 524 if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) { 525 throw new IllegalArgumentException( 526 String.format( 527 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", 528 header, Arrays.toString(headerRecord))); 529 } 530 observedMissing |= blankHeader; 531 if (header != null) { 532 hdrMap.put(header, Integer.valueOf(i)); // N.B. Explicit (un)boxing is intentional 533 if (headerNames == null) { 534 headerNames = new ArrayList<>(headerRecord.length); 535 } 536 headerNames.add(header); 537 } 538 } 539 } 540 } 541 // Make header names Collection immutable 542 return new Headers(hdrMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames)); 543 } 544 545 /** 546 * Gets the current line number in the input stream. 547 * 548 * <p> 549 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to 550 * the record number. 551 * </p> 552 * 553 * @return current line number 554 */ 555 public long getCurrentLineNumber() { 556 return lexer.getCurrentLineNumber(); 557 } 558 559 /** 560 * Gets the first end-of-line string encountered. 561 * 562 * @return the first end-of-line string 563 * @since 1.5 564 */ 565 public String getFirstEndOfLine() { 566 return lexer.getFirstEol(); 567 } 568 569 /** 570 * Gets the header comment, if any. 571 * The header comment appears before the header record. 572 * 573 * @return the header comment for this stream, or null if no comment is available. 574 * @since 1.10.0 575 */ 576 public String getHeaderComment() { 577 return headerComment; 578 } 579 580 /** 581 * Gets a copy of the header map as defined in the CSVFormat's header. 582 * <p> 583 * The map keys are column names. The map values are 0-based indices. 584 * </p> 585 * <p> 586 * Note: The map can only provide a one-to-one mapping when the format did not 587 * contain null or duplicate column names. 588 * </p> 589 * 590 * @return a copy of the header map. 591 */ 592 public Map<String, Integer> getHeaderMap() { 593 if (headers.headerMap == null) { 594 return null; 595 } 596 final Map<String, Integer> map = createEmptyHeaderMap(); 597 map.putAll(headers.headerMap); 598 return map; 599 } 600 601 /** 602 * Gets the underlying header map. 603 * 604 * @return the underlying header map. 605 */ 606 Map<String, Integer> getHeaderMapRaw() { 607 return headers.headerMap; 608 } 609 610 /** 611 * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header. 612 * <p> 613 * Note: The list provides strings that can be used as keys in the header map. 614 * The list will not contain null column names if they were present in the input 615 * format. 616 * </p> 617 * 618 * @return read-only list of header names that iterates in column order. 619 * @see #getHeaderMap() 620 * @since 1.7 621 */ 622 public List<String> getHeaderNames() { 623 return Collections.unmodifiableList(headers.headerNames); 624 } 625 626 /** 627 * Gets the current record number in the input stream. 628 * 629 * <p> 630 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to 631 * the line number. 632 * </p> 633 * 634 * @return current record number 635 */ 636 public long getRecordNumber() { 637 return recordNumber; 638 } 639 640 /** 641 * Parses the CSV input according to the given format and returns the content as a list of 642 * {@link CSVRecord CSVRecords}. 643 * 644 * <p> 645 * The returned content starts at the current parse-position in the stream. 646 * </p> 647 * 648 * @return list of {@link CSVRecord CSVRecords}, may be empty 649 * @throws UncheckedIOException 650 * on parse error or input read-failure 651 */ 652 public List<CSVRecord> getRecords() { 653 return stream().collect(Collectors.toList()); 654 } 655 656 /** 657 * Gets the trailer comment, if any. 658 * Trailer comments are located between the last record and EOF 659 * 660 * @return the trailer comment for this stream, or null if no comment is available. 661 * @since 1.10.0 662 */ 663 public String getTrailerComment() { 664 return trailerComment; 665 } 666 667 /** 668 * Handles whether the input is parsed as null 669 * 670 * @param input 671 * the cell data to further processed 672 * @return null if input is parsed as null, or input itself if the input isn't parsed as null 673 */ 674 private String handleNull(final String input) { 675 final boolean isQuoted = reusableToken.isQuoted; 676 final String nullString = format.getNullString(); 677 final boolean strictQuoteMode = isStrictQuoteMode(); 678 if (input.equals(nullString)) { 679 // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode 680 return strictQuoteMode && isQuoted ? input : null; 681 } 682 // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode 683 return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input; 684 } 685 686 /** 687 * Checks whether there is a header comment. 688 * The header comment appears before the header record. 689 * Note that if the parser's format has been given an explicit header 690 * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload) 691 * and the header record is not being skipped 692 * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments 693 * will be associated with the first record, not the header. 694 * 695 * @return true if this parser has seen a header comment, false otherwise 696 * @since 1.10.0 697 */ 698 public boolean hasHeaderComment() { 699 return headerComment != null; 700 } 701 702 /** 703 * Checks whether there is a trailer comment. 704 * Trailer comments are located between the last record and EOF. 705 * The trailer comments will only be available after the parser has 706 * finished processing this stream. 707 * 708 * @return true if this parser has seen a trailer comment, false otherwise 709 * @since 1.10.0 710 */ 711 public boolean hasTrailerComment() { 712 return trailerComment != null; 713 } 714 715 /** 716 * Tests whether this parser is closed. 717 * 718 * @return whether this parser is closed. 719 */ 720 public boolean isClosed() { 721 return lexer.isClosed(); 722 } 723 724 /** 725 * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}. 726 * 727 * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or 728 * {@link QuoteMode#NON_NUMERIC}. 729 */ 730 private boolean isStrictQuoteMode() { 731 return format.getQuoteMode() == QuoteMode.ALL_NON_NULL || 732 format.getQuoteMode() == QuoteMode.NON_NUMERIC; 733 } 734 735 /** 736 * Returns the record iterator. 737 * 738 * <p> 739 * An {@link IOException} caught during the iteration is re-thrown as an 740 * {@link IllegalStateException}. 741 * </p> 742 * <p> 743 * If the parser is closed, the iterator will not yield any more records. 744 * A call to {@link Iterator#hasNext()} will return {@code false} and 745 * a call to {@link Iterator#next()} will throw a 746 * {@link NoSuchElementException}. 747 * </p> 748 * <p> 749 * If it is necessary to construct an iterator which is usable after the 750 * parser is closed, one option is to extract all records as a list with 751 * {@link #getRecords()}, and return an iterator to that list. 752 * </p> 753 */ 754 @Override 755 public Iterator<CSVRecord> iterator() { 756 return csvRecordIterator; 757 } 758 759 /** 760 * Parses the next record from the current point in the stream. 761 * 762 * @return the record as an array of values, or {@code null} if the end of the stream has been reached 763 * @throws IOException on parse error or input read-failure 764 * @throws CSVException Thrown on invalid input. 765 */ 766 CSVRecord nextRecord() throws IOException { 767 CSVRecord result = null; 768 recordList.clear(); 769 StringBuilder sb = null; 770 final long startCharPosition = lexer.getCharacterPosition() + characterOffset; 771 do { 772 reusableToken.reset(); 773 lexer.nextToken(reusableToken); 774 switch (reusableToken.type) { 775 case TOKEN: 776 addRecordValue(false); 777 break; 778 case EORECORD: 779 addRecordValue(true); 780 break; 781 case EOF: 782 if (reusableToken.isReady) { 783 addRecordValue(true); 784 } else if (sb != null) { 785 trailerComment = sb.toString(); 786 } 787 break; 788 case INVALID: 789 throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence"); 790 case COMMENT: // Ignored currently 791 if (sb == null) { // first comment for this record 792 sb = new StringBuilder(); 793 } else { 794 sb.append(Constants.LF); 795 } 796 sb.append(reusableToken.content); 797 reusableToken.type = TOKEN; // Read another token 798 break; 799 default: 800 throw new IllegalStateException("Unexpected Token type: " + reusableToken.type); 801 } 802 } while (reusableToken.type == TOKEN); 803 804 if (!recordList.isEmpty()) { 805 recordNumber++; 806 final String comment = Objects.toString(sb, null); 807 result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment, 808 recordNumber, startCharPosition); 809 } 810 return result; 811 } 812 813 /** 814 * Returns a sequential {@code Stream} with this collection as its source. 815 * <p> 816 * If the parser is closed, the stream will not produce any more values. 817 * See the comments in {@link #iterator()}. 818 * </p> 819 * @return a sequential {@code Stream} with this collection as its source. 820 * @since 1.9.0 821 */ 822 public Stream<CSVRecord> stream() { 823 return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false); 824 } 825 826}