1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * https://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20 package org.apache.commons.csv; 21 22 import static org.apache.commons.io.IOUtils.EOF; 23 24 import java.io.File; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.OutputStream; 28 import java.io.Reader; 29 import java.io.Serializable; 30 import java.io.StringWriter; 31 import java.io.Writer; 32 import java.nio.charset.Charset; 33 import java.nio.file.Files; 34 import java.nio.file.Path; 35 import java.sql.ResultSet; 36 import java.sql.ResultSetMetaData; 37 import java.sql.SQLException; 38 import java.util.Arrays; 39 import java.util.HashSet; 40 import java.util.Objects; 41 import java.util.Set; 42 import java.util.function.Supplier; 43 44 import org.apache.commons.codec.binary.Base64OutputStream; 45 import org.apache.commons.io.IOUtils; 46 import org.apache.commons.io.function.Uncheck; 47 import org.apache.commons.io.output.AppendableOutputStream; 48 49 /** 50 * Specifies the format of a CSV file for parsing and writing. 51 * 52 * <h2>Using predefined formats</h2> 53 * 54 * <p> 55 * You can use one of the predefined formats: 56 * </p> 57 * 58 * <ul> 59 * <li>{@link #DEFAULT}</li> 60 * <li>{@link #EXCEL}</li> 61 * <li>{@link #INFORMIX_UNLOAD}</li> 62 * <li>{@link #INFORMIX_UNLOAD_CSV}</li> 63 * <li>{@link #MONGODB_CSV}</li> 64 * <li>{@link #MONGODB_TSV}</li> 65 * <li>{@link #MYSQL}</li> 66 * <li>{@link #ORACLE}</li> 67 * <li>{@link #POSTGRESQL_CSV}</li> 68 * <li>{@link #POSTGRESQL_TEXT}</li> 69 * <li>{@link #RFC4180}</li> 70 * <li>{@link #TDF}</li> 71 * </ul> 72 * 73 * <p> 74 * For example: 75 * </p> 76 * 77 * <pre> 78 * CSVParser parser = CSVFormat.EXCEL.parse(reader); 79 * </pre> 80 * 81 * <p> 82 * The {@link CSVParser} provides static methods to parse other input types, for example: 83 * </p> 84 * 85 * <pre> 86 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); 87 * </pre> 88 * 89 * <h2>Defining formats</h2> 90 * 91 * <p> 92 * You can extend a format by calling the {@code set} methods. For example: 93 * </p> 94 * 95 * <pre>{@code 96 * CSVFormat.EXCEL.builder().setNullString("N/A").setIgnoreSurroundingSpaces(true).get(); 97 * }</pre> 98 * 99 * <h2>Defining column names</h2> 100 * 101 * <p> 102 * To define the column names you want to use to access records, write: 103 * </p> 104 * 105 * <pre>{@code 106 * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get(); 107 * }</pre> 108 * 109 * <p> 110 * Calling {@link Builder#setHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and assumes that your CSV source does not 111 * contain a first record that also defines column names. 112 * 113 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling 114 * {@link Builder#setSkipHeaderRecord(boolean)} with {@code true}. 115 * </p> 116 * 117 * <h2>Parsing</h2> 118 * 119 * <p> 120 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: 121 * </p> 122 * 123 * <pre>{@code 124 * Reader in = ...; 125 * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get().parse(in); 126 * }</pre> 127 * 128 * <p> 129 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. 130 * </p> 131 * 132 * <h2>Referencing columns safely</h2> 133 * 134 * <p> 135 * If your source contains a header record, you can simplify your code and safely reference columns, by using {@link Builder#setHeader(String...)} with no 136 * arguments: 137 * </p> 138 * 139 * <pre> 140 * CSVFormat.EXCEL.builder().setHeader().get(); 141 * </pre> 142 * 143 * <p> 144 * This causes the parser to read the first record and use its values as column names. 145 * 146 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: 147 * </p> 148 * 149 * <pre>{@code 150 * String value = record.get("Col1"); 151 * }</pre> 152 * 153 * <p> 154 * This makes your code impervious to changes in column order in the CSV file. 155 * </p> 156 * 157 * <h2>Serialization</h2> 158 * <p> 159 * This class implements the {@link Serializable} interface with the following caveats: 160 * </p> 161 * <ul> 162 * <li>This class will no longer implement Serializable in 2.0.</li> 163 * <li>Serialization is not supported from one version to the next.</li> 164 * </ul> 165 * <p> 166 * The {@code serialVersionUID} values are: 167 * </p> 168 * <ul> 169 * <li>Version 1.10.0: {@code 2L}</li> 170 * <li>Version 1.9.0 through 1.0: {@code 1L}</li> 171 * </ul> 172 * 173 * <h2>Notes</h2> 174 * <p> 175 * This class is immutable. 176 * </p> 177 * <p> 178 * Not all settings are used for both parsing and writing. 179 * </p> 180 */ 181 public final class CSVFormat implements Serializable { 182 183 /** 184 * Builds CSVFormat instances. 185 * 186 * @since 1.9.0 187 */ 188 public static class Builder implements Supplier<CSVFormat> { 189 190 /** 191 * Creates a new default builder, as for {@link #RFC4180} but allowing empty lines. 192 * 193 * <p> 194 * The {@link Builder} settings are: 195 * </p> 196 * <ul> 197 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 198 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li> 199 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li> 200 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (true)}</li> 201 * <li>{@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}</li> 202 * <li>All other values take their Java defaults, {@code false} for booleans, {@code null} for object references.</li> 203 * </ul> 204 * 205 * @see Predefined#Default 206 * @see DuplicateHeaderMode#ALLOW_ALL 207 * 208 * @return a copy of the builder 209 */ 210 public static Builder create() { 211 // @formatter:off 212 return new Builder() 213 .setDelimiter(Constants.COMMA) 214 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 215 .setRecordSeparator(Constants.CRLF) 216 .setIgnoreEmptyLines(true) 217 .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL); 218 // @formatter:on 219 } 220 221 /** 222 * Creates a new builder from the given format. 223 * 224 * @param csvFormat the source format. 225 * @return a new builder. 226 */ 227 public static Builder create(final CSVFormat csvFormat) { 228 return new Builder(csvFormat); 229 } 230 231 private boolean allowMissingColumnNames; 232 233 private boolean autoFlush; 234 235 private Character commentMarker; 236 237 private String delimiter; 238 239 private DuplicateHeaderMode duplicateHeaderMode; 240 241 private Character escapeCharacter; 242 243 private String[] headerComments; 244 245 private String[] headers; 246 247 private boolean ignoreEmptyLines; 248 249 private boolean ignoreHeaderCase; 250 251 private boolean ignoreSurroundingSpaces; 252 253 private String nullString; 254 255 private Character quoteCharacter; 256 257 private String quotedNullString; 258 259 private QuoteMode quoteMode; 260 261 private String recordSeparator; 262 263 private boolean skipHeaderRecord; 264 265 private boolean lenientEof; 266 267 private boolean trailingData; 268 269 private boolean trailingDelimiter; 270 271 private boolean trim; 272 273 private Builder() { 274 // empty 275 } 276 277 private Builder(final CSVFormat csvFormat) { 278 this.delimiter = csvFormat.delimiter; 279 this.quoteCharacter = csvFormat.quoteCharacter; 280 this.quoteMode = csvFormat.quoteMode; 281 this.commentMarker = csvFormat.commentMarker; 282 this.escapeCharacter = csvFormat.escapeCharacter; 283 this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; 284 this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; 285 this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; 286 this.recordSeparator = csvFormat.recordSeparator; 287 this.nullString = csvFormat.nullString; 288 this.headerComments = csvFormat.headerComments; 289 this.headers = csvFormat.headers; 290 this.skipHeaderRecord = csvFormat.skipHeaderRecord; 291 this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; 292 this.lenientEof = csvFormat.lenientEof; 293 this.trailingData = csvFormat.trailingData; 294 this.trailingDelimiter = csvFormat.trailingDelimiter; 295 this.trim = csvFormat.trim; 296 this.autoFlush = csvFormat.autoFlush; 297 this.quotedNullString = csvFormat.quotedNullString; 298 this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; 299 } 300 301 /** 302 * Builds a new CSVFormat instance. 303 * 304 * @return a new CSVFormat instance. 305 * @deprecated Use {@link #get()}. 306 */ 307 @Deprecated 308 public CSVFormat build() { 309 return get(); 310 } 311 312 /** 313 * Builds a new CSVFormat instance. 314 * 315 * @return a new CSVFormat instance. 316 * @since 1.13.0 317 */ 318 @Override 319 public CSVFormat get() { 320 return new CSVFormat(this); 321 } 322 323 /** 324 * Sets the duplicate header names behavior, true to allow, false to disallow. 325 * 326 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 327 * @return This instance. 328 * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}. 329 */ 330 @Deprecated 331 public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 332 setDuplicateHeaderMode(allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY); 333 return this; 334 } 335 336 /** 337 * Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an 338 * {@link IllegalArgumentException} to be thrown. 339 * 340 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to 341 * cause an {@link IllegalArgumentException} to be thrown. 342 * @return This instance. 343 */ 344 public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames) { 345 this.allowMissingColumnNames = allowMissingColumnNames; 346 return this; 347 } 348 349 /** 350 * Sets whether to flush on close. 351 * 352 * @param autoFlush whether to flush on close. 353 * @return This instance. 354 */ 355 public Builder setAutoFlush(final boolean autoFlush) { 356 this.autoFlush = autoFlush; 357 return this; 358 } 359 360 /** 361 * Sets the comment marker character, use {@code null} to disable comments. 362 * <p> 363 * The comment start character is only recognized at the start of a line. 364 * </p> 365 * <p> 366 * Comments are printed first, before headers. 367 * </p> 368 * <p> 369 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 370 * </p> 371 * <p> 372 * If the comment marker is not set, then the header comments are ignored. 373 * </p> 374 * <p> 375 * For example: 376 * </p> 377 * 378 * <pre> 379 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 380 * </pre> 381 * <p> 382 * writes: 383 * </p> 384 * 385 * <pre> 386 * # Generated by Apache Commons CSV. 387 * # 1970-01-01T00:00:00Z 388 * </pre> 389 * 390 * @param commentMarker the comment start marker, use {@code null} to disable. 391 * @return This instance. 392 * @throws IllegalArgumentException thrown if the specified character is a line break 393 */ 394 public Builder setCommentMarker(final char commentMarker) { 395 setCommentMarker(Character.valueOf(commentMarker)); 396 return this; 397 } 398 399 /** 400 * Sets the comment marker character, use {@code null} to disable comments. 401 * <p> 402 * The comment start character is only recognized at the start of a line. 403 * </p> 404 * <p> 405 * Comments are printed first, before headers. 406 * </p> 407 * <p> 408 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 409 * </p> 410 * <p> 411 * If the comment marker is not set, then the header comments are ignored. 412 * </p> 413 * <p> 414 * For example: 415 * </p> 416 * 417 * <pre> 418 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 419 * </pre> 420 * <p> 421 * writes: 422 * </p> 423 * 424 * <pre> 425 * # Generated by Apache Commons CSV. 426 * # 1970-01-01T00:00:00Z 427 * </pre> 428 * 429 * @param commentMarker the comment start marker, use {@code null} to disable. 430 * @return This instance. 431 * @throws IllegalArgumentException thrown if the specified character is a line break 432 */ 433 public Builder setCommentMarker(final Character commentMarker) { 434 if (isLineBreak(commentMarker)) { 435 throw new IllegalArgumentException("The comment start marker character cannot be a line break"); 436 } 437 this.commentMarker = commentMarker; 438 return this; 439 } 440 441 /** 442 * Sets the delimiter character. 443 * 444 * @param delimiter the delimiter character. 445 * @return This instance. 446 */ 447 public Builder setDelimiter(final char delimiter) { 448 return setDelimiter(String.valueOf(delimiter)); 449 } 450 451 /** 452 * Sets the delimiter character. 453 * 454 * @param delimiter the delimiter character. 455 * @return This instance. 456 */ 457 public Builder setDelimiter(final String delimiter) { 458 if (containsLineBreak(delimiter)) { 459 throw new IllegalArgumentException("The delimiter cannot be a line break"); 460 } 461 if (delimiter.isEmpty()) { 462 throw new IllegalArgumentException("The delimiter cannot be empty"); 463 } 464 this.delimiter = delimiter; 465 return this; 466 } 467 468 /** 469 * Sets the duplicate header names behavior. 470 * 471 * @param duplicateHeaderMode the duplicate header names behavior 472 * @return This instance. 473 * @since 1.10.0 474 */ 475 public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { 476 this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); 477 return this; 478 } 479 480 /** 481 * Sets the escape character. 482 * 483 * @param escapeCharacter the escape character. 484 * @return This instance. 485 * @throws IllegalArgumentException thrown if the specified character is a line break 486 */ 487 public Builder setEscape(final char escapeCharacter) { 488 setEscape(Character.valueOf(escapeCharacter)); 489 return this; 490 } 491 492 /** 493 * Sets the escape character. 494 * 495 * @param escapeCharacter the escape character. 496 * @return This instance. 497 * @throws IllegalArgumentException thrown if the specified character is a line break 498 */ 499 public Builder setEscape(final Character escapeCharacter) { 500 if (isLineBreak(escapeCharacter)) { 501 throw new IllegalArgumentException("The escape character cannot be a line break"); 502 } 503 this.escapeCharacter = escapeCharacter; 504 return this; 505 } 506 507 /** 508 * Sets the header defined by the given {@link Enum} class. 509 * 510 * <p> 511 * Example: 512 * </p> 513 * 514 * <pre> 515 * public enum HeaderEnum { 516 * Name, Email, Phone 517 * } 518 * 519 * Builder builder = builder.setHeader(HeaderEnum.class); 520 * </pre> 521 * <p> 522 * The header is also used by the {@link CSVPrinter}. 523 * </p> 524 * 525 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 526 * @return This instance. 527 */ 528 public Builder setHeader(final Class<? extends Enum<?>> headerEnum) { 529 String[] header = null; 530 if (headerEnum != null) { 531 final Enum<?>[] enumValues = headerEnum.getEnumConstants(); 532 header = new String[enumValues.length]; 533 Arrays.setAll(header, i -> enumValues[i].name()); 534 } 535 return setHeader(header); 536 } 537 538 /** 539 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 540 * 541 * <pre> 542 * builder.setHeader(); 543 * </pre> 544 * 545 * or specified manually with: 546 * 547 * <pre> 548 * builder.setHeader(resultSet); 549 * </pre> 550 * <p> 551 * The header is also used by the {@link CSVPrinter}. 552 * </p> 553 * 554 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 555 * @return This instance. 556 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 557 */ 558 public Builder setHeader(final ResultSet resultSet) throws SQLException { 559 return setHeader(resultSet != null ? resultSet.getMetaData() : null); 560 } 561 562 /** 563 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 564 * 565 * <pre> 566 * builder.setHeader(); 567 * </pre> 568 * 569 * or specified manually with: 570 * 571 * <pre> 572 * builder.setHeader(resultSetMetaData); 573 * </pre> 574 * <p> 575 * The header is also used by the {@link CSVPrinter}. 576 * </p> 577 * 578 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 579 * @return This instance. 580 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 581 */ 582 public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 583 String[] labels = null; 584 if (resultSetMetaData != null) { 585 final int columnCount = resultSetMetaData.getColumnCount(); 586 labels = new String[columnCount]; 587 for (int i = 0; i < columnCount; i++) { 588 labels[i] = resultSetMetaData.getColumnLabel(i + 1); 589 } 590 } 591 return setHeader(labels); 592 } 593 594 /** 595 * Sets the header to the given values. The header can be parsed automatically from the input file with: 596 * 597 * <pre> 598 * builder.setHeader(); 599 * </pre> 600 * 601 * or specified manually with: 602 * 603 * <pre>{@code 604 * builder.setHeader("name", "email", "phone"); 605 * }</pre> 606 * <p> 607 * The header is also used by the {@link CSVPrinter}. 608 * </p> 609 * 610 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 611 * @return This instance. 612 */ 613 public Builder setHeader(final String... header) { 614 this.headers = CSVFormat.clone(header); 615 return this; 616 } 617 618 /** 619 * Sets the header comments to write before the CSV data. 620 * <p> 621 * This setting is ignored by the parser. 622 * </p> 623 * <p> 624 * Comments are printed first, before headers. 625 * </p> 626 * <p> 627 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 628 * </p> 629 * <p> 630 * If the comment marker is not set, then the header comments are ignored. 631 * </p> 632 * <p> 633 * For example: 634 * </p> 635 * 636 * <pre> 637 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 638 * </pre> 639 * <p> 640 * writes: 641 * </p> 642 * 643 * <pre> 644 * # Generated by Apache Commons CSV. 645 * # 1970-01-01T00:00:00Z 646 * </pre> 647 * 648 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 649 * @return This instance. 650 */ 651 public Builder setHeaderComments(final Object... headerComments) { 652 this.headerComments = CSVFormat.clone(toStringArray(headerComments)); 653 return this; 654 } 655 656 /** 657 * Sets the header comments to write before the CSV data. 658 * <p> 659 * This setting is ignored by the parser. 660 * </p> 661 * <p> 662 * Comments are printed first, before headers. 663 * </p> 664 * <p> 665 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 666 * </p> 667 * <p> 668 * If the comment marker is not set, then the header comments are ignored. 669 * </p> 670 * <p> 671 * For example: 672 * </p> 673 * 674 * <pre> 675 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString()); 676 * </pre> 677 * <p> 678 * writes: 679 * </p> 680 * 681 * <pre> 682 * # Generated by Apache Commons CSV. 683 * # 1970-01-01T00:00:00Z 684 * </pre> 685 * 686 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 687 * @return This instance. 688 */ 689 public Builder setHeaderComments(final String... headerComments) { 690 this.headerComments = CSVFormat.clone(headerComments); 691 return this; 692 } 693 694 /** 695 * Sets the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty lines to empty 696 * records. 697 * 698 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate 699 * empty lines to empty records. 700 * @return This instance. 701 */ 702 public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) { 703 this.ignoreEmptyLines = ignoreEmptyLines; 704 return this; 705 } 706 707 /** 708 * Sets the parser case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 709 * 710 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 711 * @return This instance. 712 */ 713 public Builder setIgnoreHeaderCase(final boolean ignoreHeaderCase) { 714 this.ignoreHeaderCase = ignoreHeaderCase; 715 return this; 716 } 717 718 /** 719 * Sets the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 720 * 721 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 722 * @return This instance. 723 */ 724 public Builder setIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 725 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 726 return this; 727 } 728 729 /** 730 * Sets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 731 * 732 * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 733 * @return This instance. 734 * @since 1.11.0 735 */ 736 public Builder setLenientEof(final boolean lenientEof) { 737 this.lenientEof = lenientEof; 738 return this; 739 } 740 741 /** 742 * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}. 743 * 744 * <ul> 745 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 746 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 747 * </ul> 748 * 749 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}. 750 * @return This instance. 751 */ 752 public Builder setNullString(final String nullString) { 753 this.nullString = nullString; 754 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 755 return this; 756 } 757 758 /** 759 * Sets the quote character. 760 * 761 * @param quoteCharacter the quote character. 762 * @return This instance. 763 */ 764 public Builder setQuote(final char quoteCharacter) { 765 setQuote(Character.valueOf(quoteCharacter)); 766 return this; 767 } 768 769 /** 770 * Sets the quote character, use {@code null} to disable. 771 * 772 * @param quoteCharacter the quote character, use {@code null} to disable. 773 * @return This instance. 774 */ 775 public Builder setQuote(final Character quoteCharacter) { 776 if (isLineBreak(quoteCharacter)) { 777 throw new IllegalArgumentException("The quoteCharacter cannot be a line break"); 778 } 779 this.quoteCharacter = quoteCharacter; 780 return this; 781 } 782 783 /** 784 * Sets the quote policy to use for output. 785 * 786 * @param quoteMode the quote policy to use for output. 787 * @return This instance. 788 */ 789 public Builder setQuoteMode(final QuoteMode quoteMode) { 790 this.quoteMode = quoteMode; 791 return this; 792 } 793 794 /** 795 * Sets the record separator to use for output. 796 * 797 * <p> 798 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 799 * and "\r\n" 800 * </p> 801 * 802 * @param recordSeparator the record separator to use for output. 803 * @return This instance. 804 */ 805 public Builder setRecordSeparator(final char recordSeparator) { 806 this.recordSeparator = String.valueOf(recordSeparator); 807 return this; 808 } 809 810 /** 811 * Sets the record separator to use for output. 812 * 813 * <p> 814 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 815 * and "\r\n" 816 * </p> 817 * 818 * @param recordSeparator the record separator to use for output. 819 * @return This instance. 820 */ 821 public Builder setRecordSeparator(final String recordSeparator) { 822 this.recordSeparator = recordSeparator; 823 return this; 824 } 825 826 /** 827 * Sets whether to skip the header record. 828 * 829 * @param skipHeaderRecord whether to skip the header record. 830 * @return This instance. 831 */ 832 public Builder setSkipHeaderRecord(final boolean skipHeaderRecord) { 833 this.skipHeaderRecord = skipHeaderRecord; 834 return this; 835 } 836 837 /** 838 * Sets whether reading trailing data is allowed in records, helps Excel compatibility. 839 * 840 * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. 841 * @return This instance. 842 * @since 1.11.0 843 */ 844 public Builder setTrailingData(final boolean trailingData) { 845 this.trailingData = trailingData; 846 return this; 847 } 848 849 /** 850 * Sets whether to add a trailing delimiter. 851 * 852 * @param trailingDelimiter whether to add a trailing delimiter. 853 * @return This instance. 854 */ 855 public Builder setTrailingDelimiter(final boolean trailingDelimiter) { 856 this.trailingDelimiter = trailingDelimiter; 857 return this; 858 } 859 860 /** 861 * Sets whether to trim leading and trailing blanks. 862 * 863 * @param trim whether to trim leading and trailing blanks. 864 * @return This instance. 865 */ 866 public Builder setTrim(final boolean trim) { 867 this.trim = trim; 868 return this; 869 } 870 } 871 872 /** 873 * Predefines formats. 874 * 875 * @since 1.2 876 */ 877 public enum Predefined { 878 879 /** 880 * The DEFAULT predefined format. 881 * 882 * @see CSVFormat#DEFAULT 883 */ 884 Default(DEFAULT), 885 886 /** 887 * The EXCEL predefined format. 888 * 889 * @see CSVFormat#EXCEL 890 */ 891 Excel(EXCEL), 892 893 /** 894 * The INFORMIX_UNLOAD predefined format. 895 * 896 * @see CSVFormat#INFORMIX_UNLOAD 897 * @since 1.3 898 */ 899 InformixUnload(INFORMIX_UNLOAD), 900 901 /** 902 * The INFORMIX_UNLOAD_CSV predefined format. 903 * 904 * @see CSVFormat#INFORMIX_UNLOAD_CSV 905 * @since 1.3 906 */ 907 InformixUnloadCsv(INFORMIX_UNLOAD_CSV), 908 909 /** 910 * The MONGODB_CSV predefined format. 911 * 912 * @see CSVFormat#MONGODB_CSV 913 * @since 1.7 914 */ 915 MongoDBCsv(MONGODB_CSV), 916 917 /** 918 * The MONGODB_TSV predefined format. 919 * 920 * @see CSVFormat#MONGODB_TSV 921 * @since 1.7 922 */ 923 MongoDBTsv(MONGODB_TSV), 924 925 /** 926 * The MYSQL predefined format. 927 * 928 * @see CSVFormat#MYSQL 929 */ 930 MySQL(MYSQL), 931 932 /** 933 * The ORACLE predefined format. 934 * 935 * @see CSVFormat#ORACLE 936 */ 937 Oracle(ORACLE), 938 939 /** 940 * The POSTGRESQL_CSV predefined format. 941 * 942 * @see CSVFormat#POSTGRESQL_CSV 943 * @since 1.5 944 */ 945 PostgreSQLCsv(POSTGRESQL_CSV), 946 947 /** 948 * The POSTGRESQL_TEXT predefined format. 949 * 950 * @see CSVFormat#POSTGRESQL_TEXT 951 */ 952 PostgreSQLText(POSTGRESQL_TEXT), 953 954 /** 955 * The RFC4180 predefined format. 956 * 957 * @see CSVFormat#RFC4180 958 */ 959 RFC4180(CSVFormat.RFC4180), 960 961 /** 962 * The TDF predefined format. 963 * 964 * @see CSVFormat#TDF 965 */ 966 TDF(CSVFormat.TDF); 967 968 private final CSVFormat format; 969 970 Predefined(final CSVFormat format) { 971 this.format = format; 972 } 973 974 /** 975 * Gets the format. 976 * 977 * @return the format. 978 */ 979 public CSVFormat getFormat() { 980 return format; 981 } 982 } 983 984 /** 985 * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. 986 * 987 * <p> 988 * The {@link Builder} settings are: 989 * </p> 990 * <ul> 991 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 992 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li> 993 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li> 994 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (true)}</li> 995 * <li>{@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}</li> 996 * </ul> 997 * 998 * @see Predefined#Default 999 * @see DuplicateHeaderMode#ALLOW_ALL 1000 */ 1001 public static final CSVFormat DEFAULT = new CSVFormat(Builder.create()); 1002 1003 /** 1004 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary 1005 * to customize this format to accommodate your regional settings. 1006 * 1007 * <p> 1008 * For example for parsing or generating a CSV file on a French system the following format will be used: 1009 * </p> 1010 * 1011 * <pre> 1012 * CSVFormat fmt = CSVFormat.EXCEL.builder().setDelimiter(';').get(); 1013 * </pre> 1014 * 1015 * <p> 1016 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1017 * </p> 1018 * <ul> 1019 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 1020 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li> 1021 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li> 1022 * <li>{@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}</li> 1023 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li> 1024 * <li>{@link Builder#setAllowMissingColumnNames(boolean) setAllowMissingColumnNames}{@code (true)}</li> 1025 * <li>{@link Builder#setTrailingData(boolean) setTrailingData}{@code (true)}</li> 1026 * <li>{@link Builder#setLenientEof(boolean) setLenientEof}{@code (true)}</li> 1027 * </ul> 1028 * <p> 1029 * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and 1030 * {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(false)}. 1031 * </p> 1032 * 1033 * @see Predefined#Excel 1034 * @see DuplicateHeaderMode#ALLOW_ALL 1035 */ 1036 // @formatter:off 1037 public static final CSVFormat EXCEL = DEFAULT.builder() 1038 .setIgnoreEmptyLines(false) 1039 .setAllowMissingColumnNames(true) 1040 .setTrailingData(true) 1041 .setLenientEof(true) 1042 .get(); 1043 // @formatter:on 1044 1045 /** 1046 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. 1047 * 1048 * <p> 1049 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 1050 * The default NULL string is {@code "\\N"}. 1051 * </p> 1052 * 1053 * <p> 1054 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1055 * </p> 1056 * <ul> 1057 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 1058 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li> 1059 * <li>{@link Builder#setQuote(char) setQuote}{@code ('\"')}</li> 1060 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li> 1061 * </ul> 1062 * 1063 * @see Predefined#MySQL 1064 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1065 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1066 * @since 1.3 1067 */ 1068 // @formatter:off 1069 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT.builder() 1070 .setDelimiter(Constants.PIPE) 1071 .setEscape(Constants.BACKSLASH) 1072 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1073 .setRecordSeparator(Constants.LF) 1074 .get(); 1075 // @formatter:on 1076 1077 /** 1078 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) 1079 * 1080 * <p> 1081 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 1082 * The default NULL string is {@code "\\N"}. 1083 * </p> 1084 * 1085 * <p> 1086 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1087 * </p> 1088 * <ul> 1089 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 1090 * <li>{@link Builder#setQuote(char) setQuote}{@code ('\"')}</li> 1091 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li> 1092 * </ul> 1093 * 1094 * @see Predefined#MySQL 1095 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1096 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1097 * @since 1.3 1098 */ 1099 // @formatter:off 1100 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT.builder() 1101 .setDelimiter(Constants.COMMA) 1102 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1103 .setRecordSeparator(Constants.LF) 1104 .get(); 1105 // @formatter:on 1106 1107 /** 1108 * Default MongoDB CSV format used by the {@code mongoexport} operation. 1109 * <p> 1110 * <strong>Parsing is not supported yet.</strong> 1111 * </p> 1112 * 1113 * <p> 1114 * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with {@code '"'}. A header line with field 1115 * names is expected. 1116 * </p> 1117 * <p> 1118 * As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states: 1119 * </p> 1120 * <blockquote>The csv parser accepts that data that complies with RFC <a href="https://tools.ietf.org/html/4180">RFC-4180</a>. As a result, backslashes are 1121 * not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape internal double-quote marks by prepending 1122 * another double-quote. </blockquote> 1123 * <p> 1124 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1125 * </p> 1126 * <ul> 1127 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 1128 * <li>{@link Builder#setEscape(char) setEscape}{@code ('"')}</li> 1129 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li> 1130 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li> 1131 * </ul> 1132 * 1133 * @see Predefined#MongoDBCsv 1134 * @see QuoteMode#ALL_NON_NULL 1135 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command documentation</a> 1136 * @since 1.7 1137 */ 1138 // @formatter:off 1139 public static final CSVFormat MONGODB_CSV = DEFAULT.builder() 1140 .setDelimiter(Constants.COMMA) 1141 .setEscape(Constants.DOUBLE_QUOTE_CHAR) 1142 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1143 .setQuoteMode(QuoteMode.MINIMAL) 1144 .get(); 1145 // @formatter:off 1146 1147 /** 1148 * Default MongoDB TSV format used by the {@code mongoexport} operation. 1149 * <p> 1150 * <strong>Parsing is not supported yet.</strong> 1151 * </p> 1152 * 1153 * <p> 1154 * This is a tab-delimited format. Values are double quoted only if needed and special 1155 * characters are escaped with {@code '"'}. A header line with field names is expected. 1156 * </p> 1157 * 1158 * <p> 1159 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1160 * </p> 1161 * <ul> 1162 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li> 1163 * <li>{@link Builder#setEscape(char) setEscape}{@code ('"')}</li> 1164 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li> 1165 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li> 1166 * <li>{@code setSkipHeaderRecord(false)}</li> 1167 * </ul> 1168 * 1169 * @see Predefined#MongoDBCsv 1170 * @see QuoteMode#ALL_NON_NULL 1171 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 1172 * documentation</a> 1173 * @since 1.7 1174 */ 1175 // @formatter:off 1176 public static final CSVFormat MONGODB_TSV = DEFAULT.builder() 1177 .setDelimiter(Constants.TAB) 1178 .setEscape(Constants.DOUBLE_QUOTE_CHAR) 1179 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1180 .setQuoteMode(QuoteMode.MINIMAL) 1181 .setSkipHeaderRecord(false) 1182 .get(); 1183 // @formatter:off 1184 1185 /** 1186 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. 1187 * 1188 * <p> 1189 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1190 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 1191 * </p> 1192 * 1193 * <p> 1194 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1195 * </p> 1196 * <ul> 1197 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li> 1198 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li> 1199 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li> 1200 * <li>{@link Builder#setQuote(Character) setQuote}{@code (null)}</li> 1201 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li> 1202 * <li>{@link Builder#setNullString(String) setNullString}{@code ("\\N")}</li> 1203 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li> 1204 * </ul> 1205 * 1206 * @see Predefined#MySQL 1207 * @see QuoteMode#ALL_NON_NULL 1208 * @see <a href="https://dev.mysql.com/doc/refman/5.1/en/load-data.html"> https://dev.mysql.com/doc/refman/5.1/en/load 1209 * -data.html</a> 1210 */ 1211 // @formatter:off 1212 public static final CSVFormat MYSQL = DEFAULT.builder() 1213 .setDelimiter(Constants.TAB) 1214 .setEscape(Constants.BACKSLASH) 1215 .setIgnoreEmptyLines(false) 1216 .setQuote(null) 1217 .setRecordSeparator(Constants.LF) 1218 .setNullString(Constants.SQL_NULL_STRING) 1219 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1220 .get(); 1221 // @formatter:off 1222 1223 /** 1224 * Default Oracle format used by the SQL*Loader utility. 1225 * 1226 * <p> 1227 * This is a comma-delimited format with the system line separator character as the record separator. Values are 1228 * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is 1229 * {@code ""}. Values are trimmed. 1230 * </p> 1231 * 1232 * <p> 1233 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1234 * </p> 1235 * <ul> 1236 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')} // default is {@code FIELDS TERMINATED BY ','}}</li> 1237 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li> 1238 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li> 1239 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')} // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li> 1240 * <li>{@link Builder#setNullString(String) setNullString}{@code ("\\N")}</li> 1241 * <li>{@link Builder#setTrim(boolean) setTrim}{@code (true)}</li> 1242 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code (System.lineSeparator())}</li> 1243 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.MINIMAL)}</li> 1244 * </ul> 1245 * 1246 * @see Predefined#Oracle 1247 * @see QuoteMode#MINIMAL 1248 * @see <a href="https://s.apache.org/CGXG">Oracle CSV Format Specification</a> 1249 * @since 1.6 1250 */ 1251 // @formatter:off 1252 public static final CSVFormat ORACLE = DEFAULT.builder() 1253 .setDelimiter(Constants.COMMA) 1254 .setEscape(Constants.BACKSLASH) 1255 .setIgnoreEmptyLines(false) 1256 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1257 .setNullString(Constants.SQL_NULL_STRING) 1258 .setTrim(true) 1259 .setRecordSeparator(System.lineSeparator()) 1260 .setQuoteMode(QuoteMode.MINIMAL) 1261 .get(); 1262 // @formatter:off 1263 1264 /** 1265 * Default PostgreSQL CSV format used by the {@code COPY} operation. 1266 * 1267 * <p> 1268 * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special 1269 * characters are not escaped. The default NULL string is {@code ""}. 1270 * </p> 1271 * 1272 * <p> 1273 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1274 * </p> 1275 * <ul> 1276 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 1277 * <li>{@link Builder#setEscape(Character) setEscape}{@code (null)}</li> 1278 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li> 1279 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li> 1280 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li> 1281 * <li>{@link Builder#setNullString(String) setNullString}{@code ("")}</li> 1282 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li> 1283 * </ul> 1284 * 1285 * @see Predefined#MySQL 1286 * @see QuoteMode#ALL_NON_NULL 1287 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1288 * documentation</a> 1289 * @since 1.5 1290 */ 1291 // @formatter:off 1292 public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder() 1293 .setDelimiter(Constants.COMMA) 1294 .setEscape(null) 1295 .setIgnoreEmptyLines(false) 1296 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1297 .setRecordSeparator(Constants.LF) 1298 .setNullString(Constants.EMPTY) 1299 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1300 .get(); 1301 // @formatter:off 1302 1303 /** 1304 * Default PostgreSQL text format used by the {@code COPY} operation. 1305 * 1306 * <p> 1307 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1308 * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}. 1309 * </p> 1310 * 1311 * <p> 1312 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1313 * </p> 1314 * <ul> 1315 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li> 1316 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li> 1317 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li> 1318 * <li>{@link Builder#setQuote(Character) setQuote}{@code (null)}</li> 1319 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li> 1320 * <li>{@link Builder#setNullString(String) setNullString}{@code ("\\N")}</li> 1321 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li> 1322 * </ul> 1323 * 1324 * @see Predefined#MySQL 1325 * @see QuoteMode#ALL_NON_NULL 1326 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1327 * documentation</a> 1328 * @since 1.5 1329 */ 1330 // @formatter:off 1331 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT.builder() 1332 .setDelimiter(Constants.TAB) 1333 .setEscape(Constants.BACKSLASH) 1334 .setIgnoreEmptyLines(false) 1335 .setQuote(null) 1336 .setRecordSeparator(Constants.LF) 1337 .setNullString(Constants.SQL_NULL_STRING) 1338 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1339 .get(); 1340 // @formatter:off 1341 1342 /** 1343 * Comma separated format as defined by <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 1344 * 1345 * <p> 1346 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1347 * </p> 1348 * <ul> 1349 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li> 1350 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li> 1351 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li> 1352 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li> 1353 * </ul> 1354 * 1355 * @see Predefined#RFC4180 1356 */ 1357 public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).get(); 1358 1359 private static final long serialVersionUID = 2L; 1360 1361 /** 1362 * Tab-delimited format (TDF). 1363 * 1364 * <p> 1365 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>: 1366 * </p> 1367 * <ul> 1368 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li> 1369 * <li>{@link Builder#setIgnoreSurroundingSpaces(boolean) setIgnoreSurroundingSpaces}{@code (true)}</li> 1370 * </ul> 1371 * 1372 * @see Predefined#TDF 1373 */ 1374 // @formatter:off 1375 public static final CSVFormat TDF = DEFAULT.builder() 1376 .setDelimiter(Constants.TAB) 1377 .setIgnoreSurroundingSpaces(true) 1378 .get(); 1379 // @formatter:on 1380 1381 /** 1382 * Null-safe clone of an array. 1383 * 1384 * @param <T> The array element type. 1385 * @param values the source array 1386 * @return the cloned array. 1387 */ 1388 @SafeVarargs 1389 static <T> T[] clone(final T... values) { 1390 return values == null ? null : values.clone(); 1391 } 1392 1393 /** 1394 * Returns true if the given string contains the search char. 1395 * 1396 * @param source the string to check. 1397 * @param searchCh the character to search. 1398 * @return true if {@code c} contains a line break character 1399 */ 1400 private static boolean contains(final String source, final char searchCh) { 1401 return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0; 1402 } 1403 1404 /** 1405 * Returns true if the given string contains a line break character. 1406 * 1407 * @param source the string to check. 1408 * @return true if {@code c} contains a line break character. 1409 */ 1410 private static boolean containsLineBreak(final String source) { 1411 return contains(source, Constants.CR) || contains(source, Constants.LF); 1412 } 1413 1414 /** 1415 * Creates a null-safe copy of the given instance. 1416 * 1417 * @return a copy of the given instance or null if the input is null. 1418 */ 1419 static CSVFormat copy(final CSVFormat format) { 1420 return format != null ? format.copy() : null; 1421 } 1422 1423 static boolean isBlank(final String value) { 1424 return value == null || value.trim().isEmpty(); 1425 } 1426 1427 /** 1428 * Returns true if the given character is a line break character. 1429 * 1430 * @param c the character to check. 1431 * @return true if {@code c} is a line break character. 1432 */ 1433 private static boolean isLineBreak(final char c) { 1434 return c == Constants.LF || c == Constants.CR; 1435 } 1436 1437 /** 1438 * Returns true if the given character is a line break character. 1439 * 1440 * @param c the character to check, may be null. 1441 * @return true if {@code c} is a line break character (and not null). 1442 */ 1443 private static boolean isLineBreak(final Character c) { 1444 return c != null && isLineBreak(c.charValue()); // N.B. Explicit (un)boxing is intentional 1445 } 1446 1447 /** Same test as in as {@link String#trim()}. */ 1448 private static boolean isTrimChar(final char ch) { 1449 return ch <= Constants.SP; 1450 } 1451 1452 /** Same test as in as {@link String#trim()}. */ 1453 private static boolean isTrimChar(final CharSequence charSequence, final int pos) { 1454 return isTrimChar(charSequence.charAt(pos)); 1455 } 1456 1457 /** 1458 * Creates a new CSV format with the specified delimiter. 1459 * 1460 * <p> 1461 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized with null/false. 1462 * </p> 1463 * 1464 * @param delimiter the char used for value separation, must not be a line break character 1465 * @return a new CSV format. 1466 * @throws IllegalArgumentException if the delimiter is a line break character 1467 * @see #DEFAULT 1468 * @see #RFC4180 1469 * @see #MYSQL 1470 * @see #EXCEL 1471 * @see #TDF 1472 */ 1473 public static CSVFormat newFormat(final char delimiter) { 1474 return new CSVFormat(new Builder().setDelimiter(delimiter)); 1475 } 1476 1477 static String[] toStringArray(final Object[] values) { 1478 if (values == null) { 1479 return null; 1480 } 1481 final String[] strings = new String[values.length]; 1482 Arrays.setAll(strings, i -> Objects.toString(values[i], null)); 1483 return strings; 1484 } 1485 1486 static CharSequence trim(final CharSequence charSequence) { 1487 if (charSequence instanceof String) { 1488 return ((String) charSequence).trim(); 1489 } 1490 final int count = charSequence.length(); 1491 int len = count; 1492 int pos = 0; 1493 1494 while (pos < len && isTrimChar(charSequence, pos)) { 1495 pos++; 1496 } 1497 while (pos < len && isTrimChar(charSequence, len - 1)) { 1498 len--; 1499 } 1500 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; 1501 } 1502 1503 /** 1504 * Gets one of the predefined formats from {@link CSVFormat.Predefined}. 1505 * 1506 * @param format name 1507 * @return one of the predefined formats 1508 * @since 1.2 1509 */ 1510 public static CSVFormat valueOf(final String format) { 1511 return CSVFormat.Predefined.valueOf(format).getFormat(); 1512 } 1513 1514 /** How duplicate headers are handled. */ 1515 private final DuplicateHeaderMode duplicateHeaderMode; 1516 1517 /** Whether missing column names are allowed when parsing the header line. */ 1518 private final boolean allowMissingColumnNames; 1519 1520 /** Whether to flush on close. */ 1521 private final boolean autoFlush; 1522 1523 /** Set to null if commenting is disabled. */ 1524 private final Character commentMarker; 1525 1526 /** The character delimiting the values (typically ";", "," or "\t"). */ 1527 private final String delimiter; 1528 1529 /** Set to null if escaping is disabled. */ 1530 private final Character escapeCharacter; 1531 1532 /** Array of header column names. */ 1533 private final String[] headers; 1534 1535 /** Array of header comment lines. */ 1536 private final String[] headerComments; 1537 1538 /** Whether empty lines between records are ignored when parsing input. */ 1539 private final boolean ignoreEmptyLines; 1540 1541 /** Should ignore header names case. */ 1542 private final boolean ignoreHeaderCase; 1543 1544 /** Should leading/trailing spaces be ignored around values?. */ 1545 private final boolean ignoreSurroundingSpaces; 1546 1547 /** The string to be used for null values. */ 1548 private final String nullString; 1549 1550 /** Set to null if quoting is disabled. */ 1551 private final Character quoteCharacter; 1552 1553 /** Set to {@code quoteCharacter + nullString + quoteCharacter} */ 1554 private final String quotedNullString; 1555 1556 /** The quote policy output fields. */ 1557 private final QuoteMode quoteMode; 1558 1559 /** For output. */ 1560 private final String recordSeparator; 1561 1562 /** Whether to skip the header record. */ 1563 private final boolean skipHeaderRecord; 1564 1565 /** Whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. */ 1566 private final boolean lenientEof; 1567 1568 /** Whether reading trailing data is allowed in records, helps Excel compatibility. */ 1569 private final boolean trailingData; 1570 1571 /** Whether to add a trailing delimiter. */ 1572 private final boolean trailingDelimiter; 1573 1574 /** Whether to trim leading and trailing blanks. */ 1575 private final boolean trim; 1576 1577 private CSVFormat(final Builder builder) { 1578 this.delimiter = builder.delimiter; 1579 this.quoteCharacter = builder.quoteCharacter; 1580 this.quoteMode = builder.quoteMode; 1581 this.commentMarker = builder.commentMarker; 1582 this.escapeCharacter = builder.escapeCharacter; 1583 this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; 1584 this.allowMissingColumnNames = builder.allowMissingColumnNames; 1585 this.ignoreEmptyLines = builder.ignoreEmptyLines; 1586 this.recordSeparator = builder.recordSeparator; 1587 this.nullString = builder.nullString; 1588 this.headerComments = builder.headerComments; 1589 this.headers = builder.headers; 1590 this.skipHeaderRecord = builder.skipHeaderRecord; 1591 this.ignoreHeaderCase = builder.ignoreHeaderCase; 1592 this.lenientEof = builder.lenientEof; 1593 this.trailingData = builder.trailingData; 1594 this.trailingDelimiter = builder.trailingDelimiter; 1595 this.trim = builder.trim; 1596 this.autoFlush = builder.autoFlush; 1597 this.quotedNullString = builder.quotedNullString; 1598 this.duplicateHeaderMode = builder.duplicateHeaderMode; 1599 validate(); 1600 } 1601 1602 private void append(final char c, final Appendable appendable) throws IOException { 1603 // try { 1604 appendable.append(c); 1605 // } catch (final IOException e) { 1606 // throw new UncheckedIOException(e); 1607 // } 1608 } 1609 1610 private void append(final CharSequence csq, final Appendable appendable) throws IOException { 1611 // try { 1612 appendable.append(csq); 1613 // } catch (final IOException e) { 1614 // throw new UncheckedIOException(e); 1615 // } 1616 } 1617 1618 /** 1619 * Creates a new Builder for this instance. 1620 * 1621 * @return a new Builder. 1622 */ 1623 public Builder builder() { 1624 return Builder.create(this); 1625 } 1626 1627 /** 1628 * Creates a copy of this instance. 1629 * 1630 * @return a copy of this instance. 1631 */ 1632 CSVFormat copy() { 1633 return builder().get(); 1634 } 1635 1636 @Override 1637 public boolean equals(final Object obj) { 1638 if (this == obj) { 1639 return true; 1640 } 1641 if (obj == null) { 1642 return false; 1643 } 1644 if (getClass() != obj.getClass()) { 1645 return false; 1646 } 1647 final CSVFormat other = (CSVFormat) obj; 1648 return allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush && 1649 Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && 1650 duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) && 1651 Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) && 1652 ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase && 1653 ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && 1654 Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && 1655 quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) && 1656 Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord && 1657 trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim; 1658 } 1659 1660 private void escape(final char c, final Appendable appendable) throws IOException { 1661 append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional 1662 append(c, appendable); 1663 } 1664 1665 /** 1666 * Formats the specified values. 1667 * 1668 * @param values the values to format 1669 * @return the formatted values 1670 */ 1671 public String format(final Object... values) { 1672 return Uncheck.get(() -> format_(values)); 1673 } 1674 1675 private String format_(final Object... values) throws IOException { 1676 final StringWriter out = new StringWriter(); 1677 try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { 1678 csvPrinter.printRecord(values); 1679 final String res = out.toString(); 1680 final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); 1681 return res.substring(0, len); 1682 } 1683 } 1684 1685 /** 1686 * Gets whether duplicate names are allowed in the headers. 1687 * 1688 * @return whether duplicate header names are allowed 1689 * @since 1.7 1690 * @deprecated Use {@link #getDuplicateHeaderMode()}. 1691 */ 1692 @Deprecated 1693 public boolean getAllowDuplicateHeaderNames() { 1694 return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL; 1695 } 1696 1697 /** 1698 * Gets whether missing column names are allowed when parsing the header line. 1699 * 1700 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}. 1701 */ 1702 public boolean getAllowMissingColumnNames() { 1703 return allowMissingColumnNames; 1704 } 1705 1706 /** 1707 * Gets whether to flush on close. 1708 * 1709 * @return whether to flush on close. 1710 * @since 1.6 1711 */ 1712 public boolean getAutoFlush() { 1713 return autoFlush; 1714 } 1715 1716 /** 1717 * Gets the comment marker character, {@code null} disables comments. 1718 * <p> 1719 * The comment start character is only recognized at the start of a line. 1720 * </p> 1721 * <p> 1722 * Comments are printed first, before headers. 1723 * </p> 1724 * <p> 1725 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment 1726 * line. 1727 * </p> 1728 * <p> 1729 * If the comment marker is not set, then the header comments are ignored. 1730 * </p> 1731 * <p> 1732 * For example: 1733 * </p> 1734 * 1735 * <pre> 1736 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1737 * </pre> 1738 * <p> 1739 * writes: 1740 * </p> 1741 * 1742 * <pre> 1743 * # Generated by Apache Commons CSV. 1744 * # 1970-01-01T00:00:00Z 1745 * </pre> 1746 * 1747 * @return the comment start marker, may be {@code null} 1748 */ 1749 public Character getCommentMarker() { 1750 return commentMarker; 1751 } 1752 1753 /** 1754 * Gets the first character delimiting the values (typically ';', ',' or '\t'). 1755 * 1756 * @return the first delimiter character. 1757 * @deprecated Use {@link #getDelimiterString()}. 1758 */ 1759 @Deprecated 1760 public char getDelimiter() { 1761 return delimiter.charAt(0); 1762 } 1763 1764 /** 1765 * Gets the character delimiting the values (typically ";", "," or "\t"). 1766 * 1767 * @return the delimiter. 1768 */ 1769 char[] getDelimiterCharArray() { 1770 return delimiter.toCharArray(); 1771 } 1772 1773 /** 1774 * Gets the character delimiting the values (typically ";", "," or "\t"). 1775 * 1776 * @return the delimiter. 1777 * @since 1.9.0 1778 */ 1779 public String getDelimiterString() { 1780 return delimiter; 1781 } 1782 1783 /** 1784 * Gets how duplicate headers are handled. 1785 * 1786 * @return if duplicate header values are allowed, allowed conditionally, or disallowed. 1787 * @since 1.10.0 1788 */ 1789 public DuplicateHeaderMode getDuplicateHeaderMode() { 1790 return duplicateHeaderMode; 1791 } 1792 1793 /** 1794 * Gets the escape character. 1795 * 1796 * @return the escape character, may be {@code 0} 1797 */ 1798 char getEscapeChar() { 1799 return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional 1800 } 1801 1802 /** 1803 * Gets the escape character. 1804 * 1805 * @return the escape character, may be {@code null} 1806 */ 1807 public Character getEscapeCharacter() { 1808 return escapeCharacter; 1809 } 1810 1811 /** 1812 * Gets a copy of the header array. 1813 * 1814 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file 1815 */ 1816 public String[] getHeader() { 1817 return headers != null ? headers.clone() : null; 1818 } 1819 1820 /** 1821 * Gets a copy of the header comment array to write before the CSV data. 1822 * <p> 1823 * This setting is ignored by the parser. 1824 * </p> 1825 * <p> 1826 * Comments are printed first, before headers. 1827 * </p> 1828 * <p> 1829 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment 1830 * line. 1831 * </p> 1832 * <p> 1833 * If the comment marker is not set, then the header comments are ignored. 1834 * </p> 1835 * <p> 1836 * For example: 1837 * </p> 1838 * 1839 * <pre> 1840 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1841 * </pre> 1842 * <p> 1843 * writes: 1844 * </p> 1845 * 1846 * <pre> 1847 * # Generated by Apache Commons CSV. 1848 * # 1970-01-01T00:00:00Z 1849 * </pre> 1850 * 1851 * @return a copy of the header comment array; {@code null} if disabled. 1852 */ 1853 public String[] getHeaderComments() { 1854 return headerComments != null ? headerComments.clone() : null; 1855 } 1856 1857 /** 1858 * Gets whether empty lines between records are ignored when parsing input. 1859 * 1860 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records. 1861 */ 1862 public boolean getIgnoreEmptyLines() { 1863 return ignoreEmptyLines; 1864 } 1865 1866 /** 1867 * Gets whether header names will be accessed ignoring case when parsing input. 1868 * 1869 * @return {@code true} if header names cases are ignored, {@code false} if they are case-sensitive. 1870 * @since 1.3 1871 */ 1872 public boolean getIgnoreHeaderCase() { 1873 return ignoreHeaderCase; 1874 } 1875 1876 /** 1877 * Gets whether spaces around values are ignored when parsing input. 1878 * 1879 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. 1880 */ 1881 public boolean getIgnoreSurroundingSpaces() { 1882 return ignoreSurroundingSpaces; 1883 } 1884 1885 /** 1886 * Gets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1887 * 1888 * @return whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1889 * @since 1.11.0 1890 */ 1891 public boolean getLenientEof() { 1892 return lenientEof; 1893 } 1894 1895 /** 1896 * Gets the String to convert to and from {@code null}. 1897 * <ul> 1898 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 1899 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 1900 * </ul> 1901 * 1902 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} 1903 */ 1904 public String getNullString() { 1905 return nullString; 1906 } 1907 1908 /** 1909 * Gets the character used to encapsulate values containing special characters. 1910 * 1911 * @return the quoteChar character, may be {@code null} 1912 */ 1913 public Character getQuoteCharacter() { 1914 return quoteCharacter; 1915 } 1916 1917 /** 1918 * Gets the quote policy output fields. 1919 * 1920 * @return the quote policy 1921 */ 1922 public QuoteMode getQuoteMode() { 1923 return quoteMode; 1924 } 1925 1926 /** 1927 * Gets the record separator delimiting output records. 1928 * 1929 * @return the record separator 1930 */ 1931 public String getRecordSeparator() { 1932 return recordSeparator; 1933 } 1934 1935 /** 1936 * Gets whether to skip the header record. 1937 * 1938 * @return whether to skip the header record. 1939 */ 1940 public boolean getSkipHeaderRecord() { 1941 return skipHeaderRecord; 1942 } 1943 1944 /** 1945 * Gets whether reading trailing data is allowed in records, helps Excel compatibility. 1946 * 1947 * @return whether reading trailing data is allowed in records, helps Excel compatibility. 1948 * @since 1.11.0 1949 */ 1950 public boolean getTrailingData() { 1951 return trailingData; 1952 } 1953 1954 /** 1955 * Gets whether to add a trailing delimiter. 1956 * 1957 * @return whether to add a trailing delimiter. 1958 * @since 1.3 1959 */ 1960 public boolean getTrailingDelimiter() { 1961 return trailingDelimiter; 1962 } 1963 1964 /** 1965 * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by {CSVParser#addRecordValue(boolean)} 1966 * 1967 * @return whether to trim leading and trailing blanks. 1968 */ 1969 public boolean getTrim() { 1970 return trim; 1971 } 1972 1973 @Override 1974 public int hashCode() { 1975 final int prime = 31; 1976 int result = 1; 1977 result = prime * result + Arrays.hashCode(headerComments); 1978 result = prime * result + Arrays.hashCode(headers); 1979 result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter, 1980 ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString, 1981 recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim); 1982 return result; 1983 } 1984 1985 /** 1986 * Tests whether comments are supported by this format. 1987 * 1988 * Note that the comment introducer character is only recognized at the start of a line. 1989 * 1990 * @return {@code true} is comments are supported, {@code false} otherwise 1991 */ 1992 public boolean isCommentMarkerSet() { 1993 return commentMarker != null; 1994 } 1995 1996 /** 1997 * Tests whether the next characters constitute a delimiter 1998 * 1999 * @param ch0 the first char (index 0). 2000 * @param charSeq the match char sequence 2001 * @param startIndex where start to match 2002 * @param delimiter the delimiter 2003 * @param delimiterLength the delimiter length 2004 * @return true if the match is successful 2005 */ 2006 private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) { 2007 if (ch0 != delimiter[0]) { 2008 return false; 2009 } 2010 final int len = charSeq.length(); 2011 if (startIndex + delimiterLength > len) { 2012 return false; 2013 } 2014 for (int i = 1; i < delimiterLength; i++) { 2015 if (charSeq.charAt(startIndex + i) != delimiter[i]) { 2016 return false; 2017 } 2018 } 2019 return true; 2020 } 2021 2022 /** 2023 * Tests whether escapes are being processed. 2024 * 2025 * @return {@code true} if escapes are processed 2026 */ 2027 public boolean isEscapeCharacterSet() { 2028 return escapeCharacter != null; 2029 } 2030 2031 /** 2032 * Tests whether a null string has been defined. 2033 * 2034 * @return {@code true} if a nullString is defined 2035 */ 2036 public boolean isNullStringSet() { 2037 return nullString != null; 2038 } 2039 2040 /** 2041 * Tests whether a quoteChar has been defined. 2042 * 2043 * @return {@code true} if a quoteChar is defined 2044 */ 2045 public boolean isQuoteCharacterSet() { 2046 return quoteCharacter != null; 2047 } 2048 2049 /** 2050 * Parses the specified content. 2051 * 2052 * <p> 2053 * See also the various static parse methods on {@link CSVParser}. 2054 * </p> 2055 * 2056 * @param reader the input stream 2057 * @return a parser over a stream of {@link CSVRecord}s. 2058 * @throws IOException If an I/O error occurs 2059 * @throws CSVException Thrown on invalid input. 2060 */ 2061 public CSVParser parse(final Reader reader) throws IOException { 2062 return CSVParser.builder().setReader(reader).setFormat(this).get(); 2063 } 2064 2065 /** 2066 * Prints to the specified output. 2067 * 2068 * <p> 2069 * See also {@link CSVPrinter}. 2070 * </p> 2071 * 2072 * @param out the output. 2073 * @return a printer to an output. 2074 * @throws IOException thrown if the optional header cannot be printed. 2075 */ 2076 public CSVPrinter print(final Appendable out) throws IOException { 2077 return new CSVPrinter(out, this); 2078 } 2079 2080 /** 2081 * Prints to the specified {@code File} with given {@code Charset}. 2082 * 2083 * <p> 2084 * See also {@link CSVPrinter}. 2085 * </p> 2086 * 2087 * @param out the output. 2088 * @param charset A charset. 2089 * @return a printer to an output. 2090 * @throws IOException thrown if the optional header cannot be printed. 2091 * @since 1.5 2092 */ 2093 public CSVPrinter print(final File out, final Charset charset) throws IOException { 2094 return print(out.toPath(), charset); 2095 } 2096 2097 private void print(final InputStream inputStream, final Appendable out, final boolean newRecord) throws IOException { 2098 // InputStream is never null here 2099 // There is nothing to escape when quoting is used which is the default. 2100 if (!newRecord) { 2101 append(getDelimiterString(), out); 2102 } 2103 final boolean quoteCharacterSet = isQuoteCharacterSet(); 2104 if (quoteCharacterSet) { 2105 append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional 2106 } 2107 // Stream the input to the output without reading or holding the whole value in memory. 2108 // AppendableOutputStream cannot "close" an Appendable. 2109 try (OutputStream outputStream = new Base64OutputStream(new AppendableOutputStream<>(out))) { 2110 IOUtils.copy(inputStream, outputStream); 2111 } 2112 if (quoteCharacterSet) { 2113 append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional 2114 } 2115 } 2116 2117 /** 2118 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated as needed. Useful when one wants to 2119 * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is true. 2120 * 2121 * @param value value to output. 2122 * @param out where to print the value. 2123 * @param newRecord if this a new record. 2124 * @throws IOException If an I/O error occurs. 2125 * @since 1.4 2126 */ 2127 public synchronized void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { 2128 // null values are considered empty 2129 // Only call CharSequence.toString() if you have to, helps GC-free use cases. 2130 CharSequence charSequence; 2131 if (value == null) { 2132 // https://issues.apache.org/jira/browse/CSV-203 2133 if (null == nullString) { 2134 charSequence = Constants.EMPTY; 2135 } else if (QuoteMode.ALL == quoteMode) { 2136 charSequence = quotedNullString; 2137 } else { 2138 charSequence = nullString; 2139 } 2140 } else if (value instanceof CharSequence) { 2141 charSequence = (CharSequence) value; 2142 } else if (value instanceof Reader) { 2143 print((Reader) value, out, newRecord); 2144 return; 2145 } else if (value instanceof InputStream) { 2146 print((InputStream) value, out, newRecord); 2147 return; 2148 } else { 2149 charSequence = value.toString(); 2150 } 2151 charSequence = getTrim() ? trim(charSequence) : charSequence; 2152 print(value, charSequence, out, newRecord); 2153 } 2154 2155 private synchronized void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException { 2156 final int offset = 0; 2157 final int len = value.length(); 2158 if (!newRecord) { 2159 out.append(getDelimiterString()); 2160 } 2161 if (object == null) { 2162 out.append(value); 2163 } else if (isQuoteCharacterSet()) { 2164 // The original object is needed so can check for Number 2165 printWithQuotes(object, value, out, newRecord); 2166 } else if (isEscapeCharacterSet()) { 2167 printWithEscapes(value, out); 2168 } else { 2169 out.append(value, offset, len); 2170 } 2171 } 2172 2173 /** 2174 * Prints to the specified {@code Path} with given {@code Charset}, returns a {@code CSVPrinter} which the caller MUST close. 2175 * 2176 * <p> 2177 * See also {@link CSVPrinter}. 2178 * </p> 2179 * 2180 * @param out the output. 2181 * @param charset A charset. 2182 * @return a printer to an output. 2183 * @throws IOException thrown if the optional header cannot be printed. 2184 * @since 1.5 2185 */ 2186 @SuppressWarnings("resource") 2187 public CSVPrinter print(final Path out, final Charset charset) throws IOException { 2188 return print(Files.newBufferedWriter(out, charset)); 2189 } 2190 2191 private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { 2192 // Reader is never null here 2193 if (!newRecord) { 2194 append(getDelimiterString(), out); 2195 } 2196 if (isQuoteCharacterSet()) { 2197 printWithQuotes(reader, out); 2198 } else if (isEscapeCharacterSet()) { 2199 printWithEscapes(reader, out); 2200 } else if (out instanceof Writer) { 2201 IOUtils.copyLarge(reader, (Writer) out); 2202 } else { 2203 IOUtils.copy(reader, out); 2204 } 2205 } 2206 2207 /** 2208 * Prints to the {@link System#out}. 2209 * 2210 * <p> 2211 * See also {@link CSVPrinter}. 2212 * </p> 2213 * 2214 * @return a printer to {@link System#out}. 2215 * @throws IOException thrown if the optional header cannot be printed. 2216 * @since 1.5 2217 */ 2218 public CSVPrinter printer() throws IOException { 2219 return new CSVPrinter(System.out, this); 2220 } 2221 2222 /** 2223 * Outputs the trailing delimiter (if set) followed by the record separator (if set). 2224 * 2225 * @param appendable where to write 2226 * @throws IOException If an I/O error occurs. 2227 * @since 1.4 2228 */ 2229 public synchronized void println(final Appendable appendable) throws IOException { 2230 if (getTrailingDelimiter()) { 2231 append(getDelimiterString(), appendable); 2232 } 2233 if (recordSeparator != null) { 2234 append(recordSeparator, appendable); 2235 } 2236 } 2237 2238 /** 2239 * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator. 2240 * 2241 * <p> 2242 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing 2243 * the record, so there is no need to call {@link #println(Appendable)}. 2244 * </p> 2245 * 2246 * @param appendable where to write. 2247 * @param values values to output. 2248 * @throws IOException If an I/O error occurs. 2249 * @since 1.4 2250 */ 2251 public synchronized void printRecord(final Appendable appendable, final Object... values) throws IOException { 2252 for (int i = 0; i < values.length; i++) { 2253 print(values[i], appendable, i == 0); 2254 } 2255 println(appendable); 2256 } 2257 2258 /* 2259 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2260 */ 2261 private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { 2262 int start = 0; 2263 int pos = 0; 2264 final int end = charSeq.length(); 2265 final char[] delimArray = getDelimiterCharArray(); 2266 final int delimLength = delimArray.length; 2267 final char escape = getEscapeChar(); 2268 while (pos < end) { 2269 char c = charSeq.charAt(pos); 2270 final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength); 2271 final boolean isCr = c == Constants.CR; 2272 final boolean isLf = c == Constants.LF; 2273 if (isCr || isLf || c == escape || isDelimiterStart) { 2274 // write out segment up until this char 2275 if (pos > start) { 2276 appendable.append(charSeq, start, pos); 2277 } 2278 if (isLf) { 2279 c = 'n'; 2280 } else if (isCr) { 2281 c = 'r'; 2282 } 2283 escape(c, appendable); 2284 if (isDelimiterStart) { 2285 for (int i = 1; i < delimLength; i++) { 2286 pos++; 2287 escape(charSeq.charAt(pos), appendable); 2288 } 2289 } 2290 start = pos + 1; // start on the current char after this one 2291 } 2292 pos++; 2293 } 2294 2295 // write last segment 2296 if (pos > start) { 2297 appendable.append(charSeq, start, pos); 2298 } 2299 } 2300 2301 /* 2302 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2303 */ 2304 private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { 2305 int start = 0; 2306 int pos = 0; 2307 @SuppressWarnings("resource") // Temp reader on input reader. 2308 final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader); 2309 final char[] delimArray = getDelimiterCharArray(); 2310 final int delimLength = delimArray.length; 2311 final char escape = getEscapeChar(); 2312 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 2313 int c; 2314 final char[] lookAheadBuffer = new char[delimLength - 1]; 2315 while (EOF != (c = bufferedReader.read())) { 2316 builder.append((char) c); 2317 Arrays.fill(lookAheadBuffer, (char) 0); 2318 bufferedReader.peek(lookAheadBuffer); 2319 final String test = builder.toString() + new String(lookAheadBuffer); 2320 final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength); 2321 final boolean isCr = c == Constants.CR; 2322 final boolean isLf = c == Constants.LF; 2323 if (isCr || isLf || c == escape || isDelimiterStart) { 2324 // write out segment up until this char 2325 if (pos > start) { 2326 append(builder.substring(start, pos), appendable); 2327 builder.setLength(0); 2328 pos = -1; 2329 } 2330 if (isLf) { 2331 c = 'n'; 2332 } else if (isCr) { 2333 c = 'r'; 2334 } 2335 escape((char) c, appendable); 2336 if (isDelimiterStart) { 2337 for (int i = 1; i < delimLength; i++) { 2338 escape((char) bufferedReader.read(), appendable); 2339 } 2340 } 2341 start = pos + 1; // start on the current char after this one 2342 } 2343 pos++; 2344 } 2345 // write last segment 2346 if (pos > start) { 2347 appendable.append(builder, start, pos); 2348 } 2349 } 2350 2351 /* 2352 * Note: must only be called if quoting is enabled, otherwise will generate NPE 2353 */ 2354 // the original object is needed so can check for Number 2355 private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException { 2356 boolean quote = false; 2357 int start = 0; 2358 int pos = 0; 2359 final int len = charSeq.length(); 2360 final char[] delim = getDelimiterCharArray(); 2361 final int delimLength = delim.length; 2362 final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional 2363 // If escape char not specified, default to the quote char 2364 // This avoids having to keep checking whether there is an escape character 2365 // at the cost of checking against quote twice 2366 final char escapeChar = isEscapeCharacterSet() ? getEscapeChar() : quoteChar; 2367 QuoteMode quoteModePolicy = getQuoteMode(); 2368 if (quoteModePolicy == null) { 2369 quoteModePolicy = QuoteMode.MINIMAL; 2370 } 2371 switch (quoteModePolicy) { 2372 case ALL: 2373 case ALL_NON_NULL: 2374 quote = true; 2375 break; 2376 case NON_NUMERIC: 2377 quote = !(object instanceof Number); 2378 break; 2379 case NONE: 2380 // Use the existing escaping code 2381 printWithEscapes(charSeq, out); 2382 return; 2383 case MINIMAL: 2384 if (len <= 0) { 2385 // Always quote an empty token that is the first 2386 // on the line, as it may be the only thing on the 2387 // line. If it were not quoted in that case, 2388 // an empty line has no tokens. 2389 if (newRecord) { 2390 quote = true; 2391 } 2392 } else { 2393 char c = charSeq.charAt(pos); 2394 if (c <= Constants.COMMENT) { 2395 // Some other chars at the start of a value caused the parser to fail, so for now 2396 // encapsulate if we start in anything less than '#'. We are being conservative 2397 // by including the default comment char too. 2398 quote = true; 2399 } else { 2400 while (pos < len) { 2401 c = charSeq.charAt(pos); 2402 if (c == Constants.LF || c == Constants.CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) { 2403 quote = true; 2404 break; 2405 } 2406 pos++; 2407 } 2408 2409 if (!quote) { 2410 pos = len - 1; 2411 c = charSeq.charAt(pos); 2412 // Some other chars at the end caused the parser to fail, so for now 2413 // encapsulate if we end in anything less than ' ' 2414 if (isTrimChar(c)) { 2415 quote = true; 2416 } 2417 } 2418 } 2419 } 2420 if (!quote) { 2421 // No encapsulation needed - write out the original value 2422 out.append(charSeq, start, len); 2423 return; 2424 } 2425 break; 2426 default: 2427 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); 2428 } 2429 if (!quote) { 2430 // No encapsulation needed - write out the original value 2431 out.append(charSeq, start, len); 2432 return; 2433 } 2434 // We hit something that needed encapsulation 2435 out.append(quoteChar); 2436 // Pick up where we left off: pos should be positioned on the first character that caused 2437 // the need for encapsulation. 2438 while (pos < len) { 2439 final char c = charSeq.charAt(pos); 2440 if (c == quoteChar || c == escapeChar) { 2441 // write out the chunk up until this point 2442 out.append(charSeq, start, pos); 2443 out.append(escapeChar); // now output the escape 2444 start = pos; // and restart with the matched char 2445 } 2446 pos++; 2447 } 2448 // Write the last segment 2449 out.append(charSeq, start, pos); 2450 out.append(quoteChar); 2451 } 2452 2453 /** 2454 * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. 2455 * 2456 * @param reader What to print 2457 * @param appendable Where to print it 2458 * @throws IOException If an I/O error occurs 2459 */ 2460 private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException { 2461 if (getQuoteMode() == QuoteMode.NONE) { 2462 printWithEscapes(reader, appendable); 2463 return; 2464 } 2465 final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional 2466 // (1) Append opening quote 2467 append(quote, appendable); 2468 // (2) Append Reader contents, doubling quotes 2469 int c; 2470 while (EOF != (c = reader.read())) { 2471 append((char) c, appendable); 2472 if (c == quote) { 2473 append(quote, appendable); 2474 } 2475 } 2476 // (3) Append closing quote 2477 append(quote, appendable); 2478 } 2479 2480 @Override 2481 public String toString() { 2482 final StringBuilder sb = new StringBuilder(); 2483 sb.append("Delimiter=<").append(delimiter).append('>'); 2484 if (isEscapeCharacterSet()) { 2485 sb.append(' '); 2486 sb.append("Escape=<").append(escapeCharacter).append('>'); 2487 } 2488 if (isQuoteCharacterSet()) { 2489 sb.append(' '); 2490 sb.append("QuoteChar=<").append(quoteCharacter).append('>'); 2491 } 2492 if (quoteMode != null) { 2493 sb.append(' '); 2494 sb.append("QuoteMode=<").append(quoteMode).append('>'); 2495 } 2496 if (isCommentMarkerSet()) { 2497 sb.append(' '); 2498 sb.append("CommentStart=<").append(commentMarker).append('>'); 2499 } 2500 if (isNullStringSet()) { 2501 sb.append(' '); 2502 sb.append("NullString=<").append(nullString).append('>'); 2503 } 2504 if (recordSeparator != null) { 2505 sb.append(' '); 2506 sb.append("RecordSeparator=<").append(recordSeparator).append('>'); 2507 } 2508 if (getIgnoreEmptyLines()) { 2509 sb.append(" EmptyLines:ignored"); 2510 } 2511 if (getIgnoreSurroundingSpaces()) { 2512 sb.append(" SurroundingSpaces:ignored"); 2513 } 2514 if (getIgnoreHeaderCase()) { 2515 sb.append(" IgnoreHeaderCase:ignored"); 2516 } 2517 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); 2518 if (headerComments != null) { 2519 sb.append(' '); 2520 sb.append("HeaderComments:").append(Arrays.toString(headerComments)); 2521 } 2522 if (headers != null) { 2523 sb.append(' '); 2524 sb.append("Header:").append(Arrays.toString(headers)); 2525 } 2526 return sb.toString(); 2527 } 2528 2529 String trim(final String value) { 2530 return getTrim() ? value.trim() : value; 2531 } 2532 2533 /** 2534 * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. 2535 * <p> 2536 * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used 2537 * for parsing, so it cannot be used here. 2538 * </p> 2539 * 2540 * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. 2541 */ 2542 private void validate() throws IllegalArgumentException { 2543 if (containsLineBreak(delimiter)) { 2544 throw new IllegalArgumentException("The delimiter cannot be a line break"); 2545 } 2546 if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional 2547 throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); 2548 } 2549 if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional 2550 throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); 2551 } 2552 if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // N.B. Explicit (un)boxing is intentional 2553 throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); 2554 } 2555 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { 2556 throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); 2557 } 2558 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { 2559 throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" + commentMarker + "')"); 2560 } 2561 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { 2562 throw new IllegalArgumentException("Quote mode set to NONE but no escape character is set"); 2563 } 2564 // Validate headers 2565 if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { 2566 final Set<String> dupCheckSet = new HashSet<>(headers.length); 2567 final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY; 2568 for (final String header : headers) { 2569 final boolean blank = isBlank(header); 2570 // Sanitize all empty headers to the empty string "" when checking duplicates 2571 final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); 2572 if (containsHeader && !(blank && emptyDuplicatesAllowed)) { 2573 throw new IllegalArgumentException(String.format( 2574 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, 2575 Arrays.toString(headers))); 2576 } 2577 } 2578 } 2579 } 2580 2581 /** 2582 * Builds a new {@code CSVFormat} that allows duplicate header names. 2583 * 2584 * @return a new {@code CSVFormat} that allows duplicate header names 2585 * @since 1.7 2586 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean) Builder#setAllowDuplicateHeaderNames(true)} 2587 */ 2588 @Deprecated 2589 public CSVFormat withAllowDuplicateHeaderNames() { 2590 return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).get(); 2591 } 2592 2593 /** 2594 * Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2595 * 2596 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 2597 * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2598 * @since 1.7 2599 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean)} 2600 */ 2601 @Deprecated 2602 public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 2603 final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; 2604 return builder().setDuplicateHeaderMode(mode).get(); 2605 } 2606 2607 /** 2608 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. 2609 * 2610 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2611 * @see Builder#setAllowMissingColumnNames(boolean) 2612 * @since 1.1 2613 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} 2614 */ 2615 @Deprecated 2616 public CSVFormat withAllowMissingColumnNames() { 2617 return builder().setAllowMissingColumnNames(true).get(); 2618 } 2619 2620 /** 2621 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. 2622 * 2623 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause 2624 * an {@link IllegalArgumentException} to be thrown. 2625 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2626 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean)} 2627 */ 2628 @Deprecated 2629 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { 2630 return builder().setAllowMissingColumnNames(allowMissingColumnNames).get(); 2631 } 2632 2633 /** 2634 * Builds a new {@code CSVFormat} with whether to flush on close. 2635 * 2636 * @param autoFlush whether to flush on close. 2637 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. 2638 * @since 1.6 2639 * @deprecated Use {@link Builder#setAutoFlush(boolean)} 2640 */ 2641 @Deprecated 2642 public CSVFormat withAutoFlush(final boolean autoFlush) { 2643 return builder().setAutoFlush(autoFlush).get(); 2644 } 2645 2646 /** 2647 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2648 * 2649 * Note that the comment start character is only recognized at the start of a line. 2650 * 2651 * @param commentMarker the comment start marker 2652 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2653 * @throws IllegalArgumentException thrown if the specified character is a line break 2654 * @deprecated Use {@link Builder#setCommentMarker(char)} 2655 */ 2656 @Deprecated 2657 public CSVFormat withCommentMarker(final char commentMarker) { 2658 return builder().setCommentMarker(commentMarker).get(); 2659 } 2660 2661 /** 2662 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2663 * 2664 * Note that the comment start character is only recognized at the start of a line. 2665 * 2666 * @param commentMarker the comment start marker, use {@code null} to disable 2667 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2668 * @throws IllegalArgumentException thrown if the specified character is a line break 2669 * @deprecated Use {@link Builder#setCommentMarker(Character)} 2670 */ 2671 @Deprecated 2672 public CSVFormat withCommentMarker(final Character commentMarker) { 2673 return builder().setCommentMarker(commentMarker).get(); 2674 } 2675 2676 /** 2677 * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character. 2678 * 2679 * @param delimiter the delimiter character 2680 * @return A new CSVFormat that is equal to this with the specified character as a delimiter 2681 * @throws IllegalArgumentException thrown if the specified character is a line break 2682 * @deprecated Use {@link Builder#setDelimiter(char)} 2683 */ 2684 @Deprecated 2685 public CSVFormat withDelimiter(final char delimiter) { 2686 return builder().setDelimiter(delimiter).get(); 2687 } 2688 2689 /** 2690 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2691 * 2692 * @param escape the escape character 2693 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2694 * @throws IllegalArgumentException thrown if the specified character is a line break 2695 * @deprecated Use {@link Builder#setEscape(char)} 2696 */ 2697 @Deprecated 2698 public CSVFormat withEscape(final char escape) { 2699 return builder().setEscape(escape).get(); 2700 } 2701 2702 /** 2703 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2704 * 2705 * @param escape the escape character, use {@code null} to disable 2706 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2707 * @throws IllegalArgumentException thrown if the specified character is a line break 2708 * @deprecated Use {@link Builder#setEscape(Character)} 2709 */ 2710 @Deprecated 2711 public CSVFormat withEscape(final Character escape) { 2712 return builder().setEscape(escape).get(); 2713 } 2714 2715 // @formatter:off 2716 /** 2717 * Builds a new {@code CSVFormat} using the first record as header. 2718 * 2719 * <p> 2720 * Calling this method is equivalent to calling: 2721 * </p> 2722 * 2723 * <pre> 2724 * CSVFormat format = aFormat.builder() 2725 * .setHeader() 2726 * .setSkipHeaderRecord(true) 2727 * .get(); 2728 * </pre> 2729 * 2730 * @return A new CSVFormat that is equal to this but using the first record as header. 2731 * @see Builder#setSkipHeaderRecord(boolean) 2732 * @see Builder#setHeader(String...) 2733 * @since 1.3 2734 * @deprecated Use {@link Builder#setHeader(String...) Builder#setHeader()}.{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord(true)}. 2735 */ 2736 // @formatter:on 2737 @Deprecated 2738 public CSVFormat withFirstRecordAsHeader() { 2739 // @formatter:off 2740 return builder() 2741 .setHeader() 2742 .setSkipHeaderRecord(true) 2743 .get(); 2744 // @formatter:on 2745 } 2746 2747 /** 2748 * Builds a new {@code CSVFormat} with the header of the format defined by the enum class. 2749 * 2750 * <p> 2751 * Example: 2752 * </p> 2753 * 2754 * <pre> 2755 * public enum MyHeader { 2756 * Name, Email, Phone 2757 * } 2758 * ... 2759 * CSVFormat format = aFormat.builder().setHeader(MyHeader.class).get(); 2760 * </pre> 2761 * <p> 2762 * The header is also used by the {@link CSVPrinter}. 2763 * </p> 2764 * 2765 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2766 * @return A new CSVFormat that is equal to this but with the specified header 2767 * @see Builder#setHeader(String...) 2768 * @see Builder#setSkipHeaderRecord(boolean) 2769 * @since 1.3 2770 * @deprecated Use {@link Builder#setHeader(Class)} 2771 */ 2772 @Deprecated 2773 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) { 2774 return builder().setHeader(headerEnum).get(); 2775 } 2776 2777 /** 2778 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2779 * input file with: 2780 * 2781 * <pre> 2782 * CSVFormat format = aFormat.builder().setHeader().get(); 2783 * </pre> 2784 * 2785 * or specified manually with: 2786 * 2787 * <pre> 2788 * CSVFormat format = aFormat.builder().setHeader(resultSet).get(); 2789 * </pre> 2790 * <p> 2791 * The header is also used by the {@link CSVPrinter}. 2792 * </p> 2793 * 2794 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2795 * @return A new CSVFormat that is equal to this but with the specified header 2796 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2797 * @since 1.1 2798 * @deprecated Use {@link Builder#setHeader(ResultSet)} 2799 */ 2800 @Deprecated 2801 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { 2802 return builder().setHeader(resultSet).get(); 2803 } 2804 2805 /** 2806 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2807 * input file with: 2808 * 2809 * <pre> 2810 * CSVFormat format = aFormat.builder().setHeader().get() 2811 * </pre> 2812 * 2813 * or specified manually with: 2814 * 2815 * <pre> 2816 * CSVFormat format = aFormat.builder().setHeader(resultSetMetaData).get() 2817 * </pre> 2818 * <p> 2819 * The header is also used by the {@link CSVPrinter}. 2820 * </p> 2821 * 2822 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2823 * @return A new CSVFormat that is equal to this but with the specified header 2824 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2825 * @since 1.1 2826 * @deprecated Use {@link Builder#setHeader(ResultSetMetaData)} 2827 */ 2828 @Deprecated 2829 public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 2830 return builder().setHeader(resultSetMetaData).get(); 2831 } 2832 2833 /** 2834 * Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file 2835 * with: 2836 * 2837 * <pre> 2838 * CSVFormat format = aFormat.builder().setHeader().get(); 2839 * </pre> 2840 * 2841 * or specified manually with: 2842 * 2843 * <pre>{@code 2844 * CSVFormat format = aFormat.builder().setHeader("name", "email", "phone").get(); 2845 * }</pre> 2846 * <p> 2847 * The header is also used by the {@link CSVPrinter}. 2848 * </p> 2849 * 2850 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2851 * @return A new CSVFormat that is equal to this but with the specified header 2852 * @see Builder#setSkipHeaderRecord(boolean) 2853 * @deprecated Use {@link Builder#setHeader(String...)} 2854 */ 2855 @Deprecated 2856 public CSVFormat withHeader(final String... header) { 2857 return builder().setHeader(header).get(); 2858 } 2859 2860 /** 2861 * Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. 2862 * This setting is ignored by the parser. 2863 * 2864 * <pre>{@code 2865 * CSVFormat format = aFormat.builder().setHeaderComments("Generated by Apache Commons CSV.", Instant.now()).get(); 2866 * }</pre> 2867 * 2868 * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. 2869 * @return A new CSVFormat that is equal to this but with the specified header 2870 * @see Builder#setSkipHeaderRecord(boolean) 2871 * @since 1.1 2872 * @deprecated Use {@link Builder#setHeaderComments(Object...)} 2873 */ 2874 @Deprecated 2875 public CSVFormat withHeaderComments(final Object... headerComments) { 2876 return builder().setHeaderComments(headerComments).get(); 2877 } 2878 2879 /** 2880 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. 2881 * 2882 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2883 * @see Builder#setIgnoreEmptyLines(boolean) 2884 * @since 1.1 2885 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)} 2886 */ 2887 @Deprecated 2888 public CSVFormat withIgnoreEmptyLines() { 2889 return builder().setIgnoreEmptyLines(true).get(); 2890 } 2891 2892 /** 2893 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. 2894 * 2895 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty 2896 * lines to empty records. 2897 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2898 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean)} 2899 */ 2900 @Deprecated 2901 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { 2902 return builder().setIgnoreEmptyLines(ignoreEmptyLines).get(); 2903 } 2904 2905 /** 2906 * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. 2907 * 2908 * @return A new CSVFormat that will ignore the new case header name behavior. 2909 * @see Builder#setIgnoreHeaderCase(boolean) 2910 * @since 1.3 2911 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)} 2912 */ 2913 @Deprecated 2914 public CSVFormat withIgnoreHeaderCase() { 2915 return builder().setIgnoreHeaderCase(true).get(); 2916 } 2917 2918 /** 2919 * Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case. 2920 * 2921 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 2922 * @return A new CSVFormat that will ignore case header name if specified as {@code true} 2923 * @since 1.3 2924 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean)} 2925 */ 2926 @Deprecated 2927 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { 2928 return builder().setIgnoreHeaderCase(ignoreHeaderCase).get(); 2929 } 2930 2931 /** 2932 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. 2933 * 2934 * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. 2935 * @see Builder#setIgnoreSurroundingSpaces(boolean) 2936 * @since 1.1 2937 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean) Builder#setIgnoreSurroundingSpaces(true)} 2938 */ 2939 @Deprecated 2940 public CSVFormat withIgnoreSurroundingSpaces() { 2941 return builder().setIgnoreSurroundingSpaces(true).get(); 2942 } 2943 2944 /** 2945 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. 2946 * 2947 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 2948 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 2949 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean)} 2950 */ 2951 @Deprecated 2952 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 2953 return builder().setIgnoreSurroundingSpaces(ignoreSurroundingSpaces).get(); 2954 } 2955 2956 /** 2957 * Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output. 2958 * <ul> 2959 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 2960 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 2961 * </ul> 2962 * 2963 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null} 2964 * @return A new CSVFormat that is equal to this but with the specified null conversion string. 2965 * @deprecated Use {@link Builder#setNullString(String)} 2966 */ 2967 @Deprecated 2968 public CSVFormat withNullString(final String nullString) { 2969 return builder().setNullString(nullString).get(); 2970 } 2971 2972 /** 2973 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2974 * 2975 * @param quoteChar the quote character 2976 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2977 * @throws IllegalArgumentException thrown if the specified character is a line break 2978 * @deprecated Use {@link Builder#setQuote(char)} 2979 */ 2980 @Deprecated 2981 public CSVFormat withQuote(final char quoteChar) { 2982 return builder().setQuote(quoteChar).get(); 2983 } 2984 2985 /** 2986 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2987 * 2988 * @param quoteChar the quote character, use {@code null} to disable. 2989 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2990 * @throws IllegalArgumentException thrown if the specified character is a line break 2991 * @deprecated Use {@link Builder#setQuote(Character)} 2992 */ 2993 @Deprecated 2994 public CSVFormat withQuote(final Character quoteChar) { 2995 return builder().setQuote(quoteChar).get(); 2996 } 2997 2998 /** 2999 * Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value. 3000 * 3001 * @param quoteMode the quote policy to use for output. 3002 * @return A new CSVFormat that is equal to this but with the specified quote policy 3003 * @deprecated Use {@link Builder#setQuoteMode(QuoteMode)} 3004 */ 3005 @Deprecated 3006 public CSVFormat withQuoteMode(final QuoteMode quoteMode) { 3007 return builder().setQuoteMode(quoteMode).get(); 3008 } 3009 3010 /** 3011 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified character. 3012 * 3013 * <p> 3014 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3015 * "\r\n" 3016 * </p> 3017 * 3018 * @param recordSeparator the record separator to use for output. 3019 * @return A new CSVFormat that is equal to this but with the specified output record separator 3020 * @deprecated Use {@link Builder#setRecordSeparator(char)} 3021 */ 3022 @Deprecated 3023 public CSVFormat withRecordSeparator(final char recordSeparator) { 3024 return builder().setRecordSeparator(recordSeparator).get(); 3025 } 3026 3027 /** 3028 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified String. 3029 * 3030 * <p> 3031 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3032 * "\r\n" 3033 * </p> 3034 * 3035 * @param recordSeparator the record separator to use for output. 3036 * @return A new CSVFormat that is equal to this but with the specified output record separator 3037 * @throws IllegalArgumentException if recordSeparator is none of CR, LF or CRLF 3038 * @deprecated Use {@link Builder#setRecordSeparator(String)} 3039 */ 3040 @Deprecated 3041 public CSVFormat withRecordSeparator(final String recordSeparator) { 3042 return builder().setRecordSeparator(recordSeparator).get(); 3043 } 3044 3045 /** 3046 * Builds a new {@code CSVFormat} with skipping the header record set to {@code true}. 3047 * 3048 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3049 * @see Builder#setSkipHeaderRecord(boolean) 3050 * @see Builder#setHeader(String...) 3051 * @since 1.1 3052 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean) Builder#setSkipHeaderRecord(true)} 3053 */ 3054 @Deprecated 3055 public CSVFormat withSkipHeaderRecord() { 3056 return builder().setSkipHeaderRecord(true).get(); 3057 } 3058 3059 /** 3060 * Builds a new {@code CSVFormat} with whether to skip the header record. 3061 * 3062 * @param skipHeaderRecord whether to skip the header record. 3063 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3064 * @see Builder#setHeader(String...) 3065 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean)} 3066 */ 3067 @Deprecated 3068 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { 3069 return builder().setSkipHeaderRecord(skipHeaderRecord).get(); 3070 } 3071 3072 /** 3073 * Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows 3074 * and LF on Linux. 3075 * 3076 * <p> 3077 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3078 * "\r\n" 3079 * </p> 3080 * 3081 * @return A new CSVFormat that is equal to this but with the operating system's line separator string. 3082 * @since 1.6 3083 * @deprecated Use {@link Builder#setRecordSeparator(String) setRecordSeparator(System.lineSeparator())} 3084 */ 3085 @Deprecated 3086 public CSVFormat withSystemRecordSeparator() { 3087 return builder().setRecordSeparator(System.lineSeparator()).get(); 3088 } 3089 3090 /** 3091 * Builds a new {@code CSVFormat} to add a trailing delimiter. 3092 * 3093 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. 3094 * @since 1.3 3095 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean) Builder#setTrailingDelimiter(true)} 3096 */ 3097 @Deprecated 3098 public CSVFormat withTrailingDelimiter() { 3099 return builder().setTrailingDelimiter(true).get(); 3100 } 3101 3102 /** 3103 * Builds a new {@code CSVFormat} with whether to add a trailing delimiter. 3104 * 3105 * @param trailingDelimiter whether to add a trailing delimiter. 3106 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. 3107 * @since 1.3 3108 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean)} 3109 */ 3110 @Deprecated 3111 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { 3112 return builder().setTrailingDelimiter(trailingDelimiter).get(); 3113 } 3114 3115 /** 3116 * Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3117 * 3118 * @return A new CSVFormat that is equal to this but with the trim setting on. 3119 * @since 1.3 3120 * @deprecated Use {@link Builder#setTrim(boolean) Builder#setTrim(true)} 3121 */ 3122 @Deprecated 3123 public CSVFormat withTrim() { 3124 return builder().setTrim(true).get(); 3125 } 3126 3127 /** 3128 * Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3129 * 3130 * @param trim whether to trim leading and trailing blanks. 3131 * @return A new CSVFormat that is equal to this but with the specified trim setting. 3132 * @since 1.3 3133 * @deprecated Use {@link Builder#setTrim(boolean)} 3134 */ 3135 @Deprecated 3136 public CSVFormat withTrim(final boolean trim) { 3137 return builder().setTrim(trim).get(); 3138 } 3139 }