1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.csv; 19 20 import static org.apache.commons.io.IOUtils.EOF; 21 22 import java.io.File; 23 import java.io.IOException; 24 import java.io.InputStream; 25 import java.io.OutputStream; 26 import java.io.Reader; 27 import java.io.Serializable; 28 import java.io.StringWriter; 29 import java.io.Writer; 30 import java.nio.charset.Charset; 31 import java.nio.file.Files; 32 import java.nio.file.Path; 33 import java.sql.ResultSet; 34 import java.sql.ResultSetMetaData; 35 import java.sql.SQLException; 36 import java.util.Arrays; 37 import java.util.HashSet; 38 import java.util.Objects; 39 import java.util.Set; 40 41 import org.apache.commons.codec.binary.Base64OutputStream; 42 import org.apache.commons.io.IOUtils; 43 import org.apache.commons.io.function.Uncheck; 44 import org.apache.commons.io.output.AppendableOutputStream; 45 46 /** 47 * Specifies the format of a CSV file for parsing and writing. 48 * 49 * <h2>Using predefined formats</h2> 50 * 51 * <p> 52 * You can use one of the predefined formats: 53 * </p> 54 * 55 * <ul> 56 * <li>{@link #DEFAULT}</li> 57 * <li>{@link #EXCEL}</li> 58 * <li>{@link #INFORMIX_UNLOAD}</li> 59 * <li>{@link #INFORMIX_UNLOAD_CSV}</li> 60 * <li>{@link #MONGODB_CSV}</li> 61 * <li>{@link #MONGODB_TSV}</li> 62 * <li>{@link #MYSQL}</li> 63 * <li>{@link #ORACLE}</li> 64 * <li>{@link #POSTGRESQL_CSV}</li> 65 * <li>{@link #POSTGRESQL_TEXT}</li> 66 * <li>{@link #RFC4180}</li> 67 * <li>{@link #TDF}</li> 68 * </ul> 69 * 70 * <p> 71 * For example: 72 * </p> 73 * 74 * <pre> 75 * CSVParser parser = CSVFormat.EXCEL.parse(reader); 76 * </pre> 77 * 78 * <p> 79 * The {@link CSVParser} provides static methods to parse other input types, for example: 80 * </p> 81 * 82 * <pre> 83 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); 84 * </pre> 85 * 86 * <h2>Defining formats</h2> 87 * 88 * <p> 89 * You can extend a format by calling the {@code set} methods. For example: 90 * </p> 91 * 92 * <pre>{@code 93 * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true); 94 * }</pre> 95 * 96 * <h2>Defining column names</h2> 97 * 98 * <p> 99 * To define the column names you want to use to access records, write: 100 * </p> 101 * 102 * <pre>{@code 103 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3"); 104 * }</pre> 105 * 106 * <p> 107 * Calling {@link Builder#setHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and assumes that your CSV source does not 108 * contain a first record that also defines column names. 109 * 110 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling 111 * {@link Builder#setSkipHeaderRecord(boolean)} with {@code true}. 112 * </p> 113 * 114 * <h2>Parsing</h2> 115 * 116 * <p> 117 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: 118 * </p> 119 * 120 * <pre>{@code 121 * Reader in = ...; 122 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in); 123 * }</pre> 124 * 125 * <p> 126 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. 127 * </p> 128 * 129 * <h2>Referencing columns safely</h2> 130 * 131 * <p> 132 * If your source contains a header record, you can simplify your code and safely reference columns, by using {@link Builder#setHeader(String...)} with no 133 * arguments: 134 * </p> 135 * 136 * <pre> 137 * CSVFormat.EXCEL.withHeader(); 138 * </pre> 139 * 140 * <p> 141 * This causes the parser to read the first record and use its values as column names. 142 * 143 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: 144 * </p> 145 * 146 * <pre>{@code 147 * String value = record.get("Col1"); 148 * }</pre> 149 * 150 * <p> 151 * This makes your code impervious to changes in column order in the CSV file. 152 * </p> 153 * 154 * <h2>Serialization</h2> 155 * <p> 156 * This class implements the {@link Serializable} interface with the following caveats: 157 * </p> 158 * <ul> 159 * <li>This class will no longer implement Serializable in 2.0.</li> 160 * <li>Serialization is not supported from one version to the next.</li> 161 * </ul> 162 * <p> 163 * The {@code serialVersionUID} values are: 164 * </p> 165 * <ul> 166 * <li>Version 1.10.0: {@code 2L}</li> 167 * <li>Version 1.9.0 through 1.0: {@code 1L}</li> 168 * </ul> 169 * 170 * <h2>Notes</h2> 171 * <p> 172 * This class is immutable. 173 * </p> 174 * <p> 175 * Not all settings are used for both parsing and writing. 176 * </p> 177 */ 178 public final class CSVFormat implements Serializable { 179 180 /** 181 * Builds CSVFormat instances. 182 * 183 * @since 1.9.0 184 */ 185 public static class Builder { 186 187 /** 188 * Creates a new default builder. 189 * 190 * @return a copy of the builder 191 */ 192 public static Builder create() { 193 return new Builder(DEFAULT); 194 } 195 196 /** 197 * Creates a new builder for the given format. 198 * 199 * @param csvFormat the source format. 200 * @return a copy of the builder 201 */ 202 public static Builder create(final CSVFormat csvFormat) { 203 return new Builder(csvFormat); 204 } 205 206 private boolean allowMissingColumnNames; 207 208 private boolean autoFlush; 209 210 private Character commentMarker; 211 212 private String delimiter; 213 214 private DuplicateHeaderMode duplicateHeaderMode; 215 216 private Character escapeCharacter; 217 218 private String[] headerComments; 219 220 private String[] headers; 221 222 private boolean ignoreEmptyLines; 223 224 private boolean ignoreHeaderCase; 225 226 private boolean ignoreSurroundingSpaces; 227 228 private String nullString; 229 230 private Character quoteCharacter; 231 232 private String quotedNullString; 233 234 private QuoteMode quoteMode; 235 236 private String recordSeparator; 237 238 private boolean skipHeaderRecord; 239 240 private boolean lenientEof; 241 242 private boolean trailingData; 243 244 private boolean trailingDelimiter; 245 246 private boolean trim; 247 248 private Builder(final CSVFormat csvFormat) { 249 this.delimiter = csvFormat.delimiter; 250 this.quoteCharacter = csvFormat.quoteCharacter; 251 this.quoteMode = csvFormat.quoteMode; 252 this.commentMarker = csvFormat.commentMarker; 253 this.escapeCharacter = csvFormat.escapeCharacter; 254 this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; 255 this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; 256 this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; 257 this.recordSeparator = csvFormat.recordSeparator; 258 this.nullString = csvFormat.nullString; 259 this.headerComments = csvFormat.headerComments; 260 this.headers = csvFormat.headers; 261 this.skipHeaderRecord = csvFormat.skipHeaderRecord; 262 this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; 263 this.lenientEof = csvFormat.lenientEof; 264 this.trailingData = csvFormat.trailingData; 265 this.trailingDelimiter = csvFormat.trailingDelimiter; 266 this.trim = csvFormat.trim; 267 this.autoFlush = csvFormat.autoFlush; 268 this.quotedNullString = csvFormat.quotedNullString; 269 this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; 270 } 271 272 /** 273 * Builds a new CSVFormat instance. 274 * 275 * @return a new CSVFormat instance. 276 */ 277 public CSVFormat build() { 278 return new CSVFormat(this); 279 } 280 281 /** 282 * Sets the duplicate header names behavior, true to allow, false to disallow. 283 * 284 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 285 * @return This instance. 286 * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}. 287 */ 288 @Deprecated 289 public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 290 setDuplicateHeaderMode(allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY); 291 return this; 292 } 293 294 /** 295 * Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an 296 * {@link IllegalArgumentException} to be thrown. 297 * 298 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to 299 * cause an {@link IllegalArgumentException} to be thrown. 300 * @return This instance. 301 */ 302 public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames) { 303 this.allowMissingColumnNames = allowMissingColumnNames; 304 return this; 305 } 306 307 /** 308 * Sets whether to flush on close. 309 * 310 * @param autoFlush whether to flush on close. 311 * @return This instance. 312 */ 313 public Builder setAutoFlush(final boolean autoFlush) { 314 this.autoFlush = autoFlush; 315 return this; 316 } 317 318 /** 319 * Sets the comment marker character, use {@code null} to disable comments. 320 * <p> 321 * The comment start character is only recognized at the start of a line. 322 * </p> 323 * <p> 324 * Comments are printed first, before headers. 325 * </p> 326 * <p> 327 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 328 * each comment line. 329 * </p> 330 * <p> 331 * If the comment marker is not set, then the header comments are ignored. 332 * </p> 333 * <p> 334 * For example: 335 * </p> 336 * <pre> 337 * builder.setCommentMarker('#') 338 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 339 * </pre> 340 * <p> 341 * writes: 342 * </p> 343 * <pre> 344 * # Generated by Apache Commons CSV. 345 * # 1970-01-01T00:00:00Z 346 * </pre> 347 * 348 * @param commentMarker the comment start marker, use {@code null} to disable. 349 * @return This instance. 350 * @throws IllegalArgumentException thrown if the specified character is a line break 351 */ 352 public Builder setCommentMarker(final char commentMarker) { 353 setCommentMarker(Character.valueOf(commentMarker)); 354 return this; 355 } 356 357 /** 358 * Sets the comment marker character, use {@code null} to disable comments. 359 * <p> 360 * The comment start character is only recognized at the start of a line. 361 * </p> 362 * <p> 363 * Comments are printed first, before headers. 364 * </p> 365 * <p> 366 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 367 * each comment line. 368 * </p> 369 * <p> 370 * If the comment marker is not set, then the header comments are ignored. 371 * </p> 372 * <p> 373 * For example: 374 * </p> 375 * <pre> 376 * builder.setCommentMarker('#') 377 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 378 * </pre> 379 * <p> 380 * writes: 381 * </p> 382 * <pre> 383 * # Generated by Apache Commons CSV. 384 * # 1970-01-01T00:00:00Z 385 * </pre> 386 * 387 * @param commentMarker the comment start marker, use {@code null} to disable. 388 * @return This instance. 389 * @throws IllegalArgumentException thrown if the specified character is a line break 390 */ 391 public Builder setCommentMarker(final Character commentMarker) { 392 if (isLineBreak(commentMarker)) { 393 throw new IllegalArgumentException("The comment start marker character cannot be a line break"); 394 } 395 this.commentMarker = commentMarker; 396 return this; 397 } 398 399 /** 400 * Sets the delimiter character. 401 * 402 * @param delimiter the delimiter character. 403 * @return This instance. 404 */ 405 public Builder setDelimiter(final char delimiter) { 406 return setDelimiter(String.valueOf(delimiter)); 407 } 408 409 /** 410 * Sets the delimiter character. 411 * 412 * @param delimiter the delimiter character. 413 * @return This instance. 414 */ 415 public Builder setDelimiter(final String delimiter) { 416 if (containsLineBreak(delimiter)) { 417 throw new IllegalArgumentException("The delimiter cannot be a line break"); 418 } 419 if (delimiter.isEmpty()) { 420 throw new IllegalArgumentException("The delimiter cannot be empty"); 421 } 422 this.delimiter = delimiter; 423 return this; 424 } 425 426 /** 427 * Sets the duplicate header names behavior. 428 * 429 * @param duplicateHeaderMode the duplicate header names behavior 430 * @return This instance. 431 * @since 1.10.0 432 */ 433 public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { 434 this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); 435 return this; 436 } 437 438 /** 439 * Sets the escape character. 440 * 441 * @param escapeCharacter the escape character. 442 * @return This instance. 443 * @throws IllegalArgumentException thrown if the specified character is a line break 444 */ 445 public Builder setEscape(final char escapeCharacter) { 446 setEscape(Character.valueOf(escapeCharacter)); 447 return this; 448 } 449 450 /** 451 * Sets the escape character. 452 * 453 * @param escapeCharacter the escape character. 454 * @return This instance. 455 * @throws IllegalArgumentException thrown if the specified character is a line break 456 */ 457 public Builder setEscape(final Character escapeCharacter) { 458 if (isLineBreak(escapeCharacter)) { 459 throw new IllegalArgumentException("The escape character cannot be a line break"); 460 } 461 this.escapeCharacter = escapeCharacter; 462 return this; 463 } 464 465 /** 466 * Sets the header defined by the given {@link Enum} class. 467 * 468 * <p> 469 * Example: 470 * </p> 471 * 472 * <pre> 473 * public enum HeaderEnum { 474 * Name, Email, Phone 475 * } 476 * 477 * Builder builder = builder.setHeader(HeaderEnum.class); 478 * </pre> 479 * <p> 480 * The header is also used by the {@link CSVPrinter}. 481 * </p> 482 * 483 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 484 * @return This instance. 485 */ 486 public Builder setHeader(final Class<? extends Enum<?>> headerEnum) { 487 String[] header = null; 488 if (headerEnum != null) { 489 final Enum<?>[] enumValues = headerEnum.getEnumConstants(); 490 header = new String[enumValues.length]; 491 Arrays.setAll(header, i -> enumValues[i].name()); 492 } 493 return setHeader(header); 494 } 495 496 /** 497 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 498 * 499 * <pre> 500 * builder.setHeader(); 501 * </pre> 502 * 503 * or specified manually with: 504 * 505 * <pre> 506 * builder.setHeader(resultSet); 507 * </pre> 508 * <p> 509 * The header is also used by the {@link CSVPrinter}. 510 * </p> 511 * 512 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 513 * @return This instance. 514 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 515 */ 516 public Builder setHeader(final ResultSet resultSet) throws SQLException { 517 return setHeader(resultSet != null ? resultSet.getMetaData() : null); 518 } 519 520 /** 521 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 522 * 523 * <pre> 524 * builder.setHeader(); 525 * </pre> 526 * 527 * or specified manually with: 528 * 529 * <pre> 530 * builder.setHeader(resultSetMetaData); 531 * </pre> 532 * <p> 533 * The header is also used by the {@link CSVPrinter}. 534 * </p> 535 * 536 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 537 * @return This instance. 538 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 539 */ 540 public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 541 String[] labels = null; 542 if (resultSetMetaData != null) { 543 final int columnCount = resultSetMetaData.getColumnCount(); 544 labels = new String[columnCount]; 545 for (int i = 0; i < columnCount; i++) { 546 labels[i] = resultSetMetaData.getColumnLabel(i + 1); 547 } 548 } 549 return setHeader(labels); 550 } 551 552 /** 553 * Sets the header to the given values. The header can be parsed automatically from the input file with: 554 * 555 * <pre> 556 * builder.setHeader(); 557 * </pre> 558 * 559 * or specified manually with: 560 * 561 * <pre>{@code 562 * builder.setHeader("name", "email", "phone"); 563 * }</pre> 564 * <p> 565 * The header is also used by the {@link CSVPrinter}. 566 * </p> 567 * 568 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 569 * @return This instance. 570 */ 571 public Builder setHeader(final String... header) { 572 this.headers = CSVFormat.clone(header); 573 return this; 574 } 575 576 /** 577 * Sets the header comments to write before the CSV data. 578 * <p> 579 * This setting is ignored by the parser. 580 * </p> 581 * <p> 582 * Comments are printed first, before headers. 583 * </p> 584 * <p> 585 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 586 * each comment line. 587 * </p> 588 * <p> 589 * If the comment marker is not set, then the header comments are ignored. 590 * </p> 591 * <p> 592 * For example: 593 * </p> 594 * <pre> 595 * builder.setCommentMarker('#') 596 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 597 * </pre> 598 * <p> 599 * writes: 600 * </p> 601 * <pre> 602 * # Generated by Apache Commons CSV. 603 * # 1970-01-01T00:00:00Z 604 * </pre> 605 * 606 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 607 * @return This instance. 608 */ 609 public Builder setHeaderComments(final Object... headerComments) { 610 this.headerComments = CSVFormat.clone(toStringArray(headerComments)); 611 return this; 612 } 613 614 /** 615 * Sets the header comments to write before the CSV data. 616 * <p> 617 * This setting is ignored by the parser. 618 * </p> 619 * <p> 620 * Comments are printed first, before headers. 621 * </p> 622 * <p> 623 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 624 * each comment line. 625 * </p> 626 * <p> 627 * If the comment marker is not set, then the header comments are ignored. 628 * </p> 629 * <p> 630 * For example: 631 * </p> 632 * <pre> 633 * builder.setCommentMarker('#') 634 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString()); 635 * </pre> 636 * <p> 637 * writes: 638 * </p> 639 * <pre> 640 * # Generated by Apache Commons CSV. 641 * # 1970-01-01T00:00:00Z 642 * </pre> 643 * 644 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 645 * @return This instance. 646 */ 647 public Builder setHeaderComments(final String... headerComments) { 648 this.headerComments = CSVFormat.clone(headerComments); 649 return this; 650 } 651 652 /** 653 * Sets the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty lines to empty 654 * records. 655 * 656 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate 657 * empty lines to empty records. 658 * @return This instance. 659 */ 660 public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) { 661 this.ignoreEmptyLines = ignoreEmptyLines; 662 return this; 663 } 664 665 /** 666 * Sets the parser case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 667 * 668 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 669 * @return This instance. 670 */ 671 public Builder setIgnoreHeaderCase(final boolean ignoreHeaderCase) { 672 this.ignoreHeaderCase = ignoreHeaderCase; 673 return this; 674 } 675 676 /** 677 * Sets the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 678 * 679 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 680 * @return This instance. 681 */ 682 public Builder setIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 683 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 684 return this; 685 } 686 687 /** 688 * Sets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 689 * 690 * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 691 * @return This instance. 692 * @since 1.11.0 693 */ 694 public Builder setLenientEof(final boolean lenientEof) { 695 this.lenientEof = lenientEof; 696 return this; 697 } 698 699 /** 700 * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}. 701 * 702 * <ul> 703 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 704 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 705 * </ul> 706 * 707 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}. 708 * @return This instance. 709 */ 710 public Builder setNullString(final String nullString) { 711 this.nullString = nullString; 712 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 713 return this; 714 } 715 716 /** 717 * Sets the quote character. 718 * 719 * @param quoteCharacter the quote character. 720 * @return This instance. 721 */ 722 public Builder setQuote(final char quoteCharacter) { 723 setQuote(Character.valueOf(quoteCharacter)); 724 return this; 725 } 726 727 /** 728 * Sets the quote character, use {@code null} to disable. 729 * 730 * @param quoteCharacter the quote character, use {@code null} to disable. 731 * @return This instance. 732 */ 733 public Builder setQuote(final Character quoteCharacter) { 734 if (isLineBreak(quoteCharacter)) { 735 throw new IllegalArgumentException("The quoteChar cannot be a line break"); 736 } 737 this.quoteCharacter = quoteCharacter; 738 return this; 739 } 740 741 /** 742 * Sets the quote policy to use for output. 743 * 744 * @param quoteMode the quote policy to use for output. 745 * @return This instance. 746 */ 747 public Builder setQuoteMode(final QuoteMode quoteMode) { 748 this.quoteMode = quoteMode; 749 return this; 750 } 751 752 /** 753 * Sets the record separator to use for output. 754 * 755 * <p> 756 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 757 * and "\r\n" 758 * </p> 759 * 760 * @param recordSeparator the record separator to use for output. 761 * @return This instance. 762 */ 763 public Builder setRecordSeparator(final char recordSeparator) { 764 this.recordSeparator = String.valueOf(recordSeparator); 765 return this; 766 } 767 768 /** 769 * Sets the record separator to use for output. 770 * 771 * <p> 772 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 773 * and "\r\n" 774 * </p> 775 * 776 * @param recordSeparator the record separator to use for output. 777 * @return This instance. 778 */ 779 public Builder setRecordSeparator(final String recordSeparator) { 780 this.recordSeparator = recordSeparator; 781 return this; 782 } 783 784 /** 785 * Sets whether to skip the header record. 786 * 787 * @param skipHeaderRecord whether to skip the header record. 788 * @return This instance. 789 */ 790 public Builder setSkipHeaderRecord(final boolean skipHeaderRecord) { 791 this.skipHeaderRecord = skipHeaderRecord; 792 return this; 793 } 794 795 /** 796 * Sets whether reading trailing data is allowed in records, helps Excel compatibility. 797 * 798 * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. 799 * @return This instance. 800 * @since 1.11.0 801 */ 802 public Builder setTrailingData(final boolean trailingData) { 803 this.trailingData = trailingData; 804 return this; 805 } 806 807 /** 808 * Sets whether to add a trailing delimiter. 809 * 810 * @param trailingDelimiter whether to add a trailing delimiter. 811 * @return This instance. 812 */ 813 public Builder setTrailingDelimiter(final boolean trailingDelimiter) { 814 this.trailingDelimiter = trailingDelimiter; 815 return this; 816 } 817 818 /** 819 * Sets whether to trim leading and trailing blanks. 820 * 821 * @param trim whether to trim leading and trailing blanks. 822 * @return This instance. 823 */ 824 public Builder setTrim(final boolean trim) { 825 this.trim = trim; 826 return this; 827 } 828 } 829 830 /** 831 * Predefines formats. 832 * 833 * @since 1.2 834 */ 835 public enum Predefined { 836 837 /** 838 * The DEFAULT predefined format. 839 * 840 * @see CSVFormat#DEFAULT 841 */ 842 Default(DEFAULT), 843 844 /** 845 * The EXCEL predefined format. 846 * 847 * @see CSVFormat#EXCEL 848 */ 849 Excel(EXCEL), 850 851 /** 852 * The INFORMIX_UNLOAD predefined format. 853 * 854 * @see CSVFormat#INFORMIX_UNLOAD 855 * @since 1.3 856 */ 857 InformixUnload(INFORMIX_UNLOAD), 858 859 /** 860 * The INFORMIX_UNLOAD_CSV predefined format. 861 * 862 * @see CSVFormat#INFORMIX_UNLOAD_CSV 863 * @since 1.3 864 */ 865 InformixUnloadCsv(INFORMIX_UNLOAD_CSV), 866 867 /** 868 * The MONGODB_CSV predefined format. 869 * 870 * @see CSVFormat#MONGODB_CSV 871 * @since 1.7 872 */ 873 MongoDBCsv(MONGODB_CSV), 874 875 /** 876 * The MONGODB_TSV predefined format. 877 * 878 * @see CSVFormat#MONGODB_TSV 879 * @since 1.7 880 */ 881 MongoDBTsv(MONGODB_TSV), 882 883 /** 884 * The MYSQL predefined format. 885 * 886 * @see CSVFormat#MYSQL 887 */ 888 MySQL(MYSQL), 889 890 /** 891 * The ORACLE predefined format. 892 * 893 * @see CSVFormat#ORACLE 894 */ 895 Oracle(ORACLE), 896 897 /** 898 * The POSTGRESQL_CSV predefined format. 899 * 900 * @see CSVFormat#POSTGRESQL_CSV 901 * @since 1.5 902 */ 903 PostgreSQLCsv(POSTGRESQL_CSV), 904 905 /** 906 * The POSTGRESQL_TEXT predefined format. 907 * 908 * @see CSVFormat#POSTGRESQL_TEXT 909 */ 910 PostgreSQLText(POSTGRESQL_TEXT), 911 912 /** 913 * The RFC4180 predefined format. 914 * 915 * @see CSVFormat#RFC4180 916 */ 917 RFC4180(CSVFormat.RFC4180), 918 919 /** 920 * The TDF predefined format. 921 * 922 * @see CSVFormat#TDF 923 */ 924 TDF(CSVFormat.TDF); 925 926 private final CSVFormat format; 927 928 Predefined(final CSVFormat format) { 929 this.format = format; 930 } 931 932 /** 933 * Gets the format. 934 * 935 * @return the format. 936 */ 937 public CSVFormat getFormat() { 938 return format; 939 } 940 } 941 942 /** 943 * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing 944 * empty lines. 945 * 946 * <p> 947 * The {@link Builder} settings are: 948 * </p> 949 * <ul> 950 * <li>{@code setDelimiter(',')}</li> 951 * <li>{@code setQuote('"')}</li> 952 * <li>{@code setRecordSeparator("\r\n")}</li> 953 * <li>{@code setIgnoreEmptyLines(true)}</li> 954 * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li> 955 * </ul> 956 * 957 * @see Predefined#Default 958 */ 959 public static final CSVFormat DEFAULT = new CSVFormat(Constants.COMMA, Constants.DOUBLE_QUOTE_CHAR, null, null, null, false, true, Constants.CRLF, null, 960 null, null, false, false, false, false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false); 961 962 /** 963 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary 964 * to customize this format to accommodate your regional settings. 965 * 966 * <p> 967 * For example for parsing or generating a CSV file on a French system the following format will be used: 968 * </p> 969 * 970 * <pre> 971 * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';'); 972 * </pre> 973 * 974 * <p> 975 * The {@link Builder} settings are: 976 * </p> 977 * <ul> 978 * <li>{@code setDelimiter(',')}</li> 979 * <li>{@code setQuote('"')}</li> 980 * <li>{@code setRecordSeparator("\r\n")}</li> 981 * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li> 982 * <li>{@code setIgnoreEmptyLines(false)}</li> 983 * <li>{@code setAllowMissingColumnNames(true)}</li> 984 * <li>{@code setTrailingData(true)}</li> 985 * <li>{@code setLenientEof(true)}</li> 986 * </ul> 987 * <p> 988 * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and 989 * {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(false)}. 990 * </p> 991 * 992 * @see Predefined#Excel 993 */ 994 // @formatter:off 995 public static final CSVFormat EXCEL = DEFAULT.builder() 996 .setIgnoreEmptyLines(false) 997 .setAllowMissingColumnNames(true) 998 .setTrailingData(true) 999 .setLenientEof(true) 1000 .build(); 1001 // @formatter:on 1002 1003 /** 1004 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. 1005 * 1006 * <p> 1007 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 1008 * The default NULL string is {@code "\\N"}. 1009 * </p> 1010 * 1011 * <p> 1012 * The {@link Builder} settings are: 1013 * </p> 1014 * <ul> 1015 * <li>{@code setDelimiter(',')}</li> 1016 * <li>{@code setEscape('\\')}</li> 1017 * <li>{@code setQuote("\"")}</li> 1018 * <li>{@code setRecordSeparator('\n')}</li> 1019 * </ul> 1020 * 1021 * @see Predefined#MySQL 1022 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1023 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1024 * @since 1.3 1025 */ 1026 // @formatter:off 1027 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT.builder() 1028 .setDelimiter(Constants.PIPE) 1029 .setEscape(Constants.BACKSLASH) 1030 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1031 .setRecordSeparator(Constants.LF) 1032 .build(); 1033 // @formatter:on 1034 1035 /** 1036 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) 1037 * 1038 * <p> 1039 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 1040 * The default NULL string is {@code "\\N"}. 1041 * </p> 1042 * 1043 * <p> 1044 * The {@link Builder} settings are: 1045 * </p> 1046 * <ul> 1047 * <li>{@code setDelimiter(',')}</li> 1048 * <li>{@code setQuote("\"")}</li> 1049 * <li>{@code setRecordSeparator('\n')}</li> 1050 * </ul> 1051 * 1052 * @see Predefined#MySQL 1053 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1054 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1055 * @since 1.3 1056 */ 1057 // @formatter:off 1058 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT.builder() 1059 .setDelimiter(Constants.COMMA) 1060 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1061 .setRecordSeparator(Constants.LF) 1062 .build(); 1063 // @formatter:on 1064 1065 /** 1066 * Default MongoDB CSV format used by the {@code mongoexport} operation. 1067 * <p> 1068 * <b>Parsing is not supported yet.</b> 1069 * </p> 1070 * 1071 * <p> 1072 * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with {@code '"'}. A header line with field 1073 * names is expected. 1074 * </p> 1075 * <p> 1076 * As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states: 1077 * </p> 1078 * <blockquote>The csv parser accepts that data that complies with RFC <a href="https://tools.ietf.org/html/4180">RFC-4180</a>. 1079 * As a result, backslashes are not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape 1080 * internal double-quote marks by prepending another double-quote. 1081 * </blockquote> 1082 * <p> 1083 * The {@link Builder} settings are: 1084 * </p> 1085 * <ul> 1086 * <li>{@code setDelimiter(',')}</li> 1087 * <li>{@code setEscape('"')}</li> 1088 * <li>{@code setQuote('"')}</li> 1089 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1090 * <li>{@code setSkipHeaderRecord(false)}</li> 1091 * </ul> 1092 * 1093 * @see Predefined#MongoDBCsv 1094 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command documentation</a> 1095 * @since 1.7 1096 */ 1097 // @formatter:off 1098 public static final CSVFormat MONGODB_CSV = DEFAULT.builder() 1099 .setDelimiter(Constants.COMMA) 1100 .setEscape(Constants.DOUBLE_QUOTE_CHAR) 1101 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1102 .setQuoteMode(QuoteMode.MINIMAL) 1103 .setSkipHeaderRecord(false) 1104 .build(); 1105 // @formatter:off 1106 1107 /** 1108 * Default MongoDB TSV format used by the {@code mongoexport} operation. 1109 * <p> 1110 * <b>Parsing is not supported yet.</b> 1111 * </p> 1112 * 1113 * <p> 1114 * This is a tab-delimited format. Values are double quoted only if needed and special 1115 * characters are escaped with {@code '"'}. A header line with field names is expected. 1116 * </p> 1117 * 1118 * <p> 1119 * The {@link Builder} settings are: 1120 * </p> 1121 * <ul> 1122 * <li>{@code setDelimiter('\t')}</li> 1123 * <li>{@code setEscape('"')}</li> 1124 * <li>{@code setQuote('"')}</li> 1125 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1126 * <li>{@code setSkipHeaderRecord(false)}</li> 1127 * </ul> 1128 * 1129 * @see Predefined#MongoDBCsv 1130 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 1131 * documentation</a> 1132 * @since 1.7 1133 */ 1134 // @formatter:off 1135 public static final CSVFormat MONGODB_TSV = DEFAULT.builder() 1136 .setDelimiter(Constants.TAB) 1137 .setEscape(Constants.DOUBLE_QUOTE_CHAR) 1138 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1139 .setQuoteMode(QuoteMode.MINIMAL) 1140 .setSkipHeaderRecord(false) 1141 .build(); 1142 // @formatter:off 1143 1144 /** 1145 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. 1146 * 1147 * <p> 1148 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1149 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 1150 * </p> 1151 * 1152 * <p> 1153 * The {@link Builder} settings are: 1154 * </p> 1155 * <ul> 1156 * <li>{@code setDelimiter('\t')}</li> 1157 * <li>{@code setEscape('\\')}</li> 1158 * <li>{@code setIgnoreEmptyLines(false)}</li> 1159 * <li>{@code setQuote(null)}</li> 1160 * <li>{@code setRecordSeparator('\n')}</li> 1161 * <li>{@code setNullString("\\N")}</li> 1162 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1163 * </ul> 1164 * 1165 * @see Predefined#MySQL 1166 * @see <a href="https://dev.mysql.com/doc/refman/5.1/en/load-data.html"> https://dev.mysql.com/doc/refman/5.1/en/load 1167 * -data.html</a> 1168 */ 1169 // @formatter:off 1170 public static final CSVFormat MYSQL = DEFAULT.builder() 1171 .setDelimiter(Constants.TAB) 1172 .setEscape(Constants.BACKSLASH) 1173 .setIgnoreEmptyLines(false) 1174 .setQuote(null) 1175 .setRecordSeparator(Constants.LF) 1176 .setNullString(Constants.SQL_NULL_STRING) 1177 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1178 .build(); 1179 // @formatter:off 1180 1181 /** 1182 * Default Oracle format used by the SQL*Loader utility. 1183 * 1184 * <p> 1185 * This is a comma-delimited format with the system line separator character as the record separator. Values are 1186 * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is 1187 * {@code ""}. Values are trimmed. 1188 * </p> 1189 * 1190 * <p> 1191 * The {@link Builder} settings are: 1192 * </p> 1193 * <ul> 1194 * <li>{@code setDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}}</li> 1195 * <li>{@code setEscape('\\')}</li> 1196 * <li>{@code setIgnoreEmptyLines(false)}</li> 1197 * <li>{@code setQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li> 1198 * <li>{@code setNullString("\\N")}</li> 1199 * <li>{@code setTrim()}</li> 1200 * <li>{@code setSystemRecordSeparator()}</li> 1201 * <li>{@code setQuoteMode(QuoteMode.MINIMAL)}</li> 1202 * </ul> 1203 * 1204 * @see Predefined#Oracle 1205 * @see <a href="https://s.apache.org/CGXG">Oracle CSV Format Specification</a> 1206 * @since 1.6 1207 */ 1208 // @formatter:off 1209 public static final CSVFormat ORACLE = DEFAULT.builder() 1210 .setDelimiter(Constants.COMMA) 1211 .setEscape(Constants.BACKSLASH) 1212 .setIgnoreEmptyLines(false) 1213 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1214 .setNullString(Constants.SQL_NULL_STRING) 1215 .setTrim(true) 1216 .setRecordSeparator(System.lineSeparator()) 1217 .setQuoteMode(QuoteMode.MINIMAL) 1218 .build(); 1219 // @formatter:off 1220 1221 /** 1222 * Default PostgreSQL CSV format used by the {@code COPY} operation. 1223 * 1224 * <p> 1225 * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special 1226 * characters are not escaped. The default NULL string is {@code ""}. 1227 * </p> 1228 * 1229 * <p> 1230 * The {@link Builder} settings are: 1231 * </p> 1232 * <ul> 1233 * <li>{@code setDelimiter(',')}</li> 1234 * <li>{@code setEscape(null)}</li> 1235 * <li>{@code setIgnoreEmptyLines(false)}</li> 1236 * <li>{@code setQuote('"')}</li> 1237 * <li>{@code setRecordSeparator('\n')}</li> 1238 * <li>{@code setNullString("")}</li> 1239 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1240 * </ul> 1241 * 1242 * @see Predefined#MySQL 1243 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1244 * documentation</a> 1245 * @since 1.5 1246 */ 1247 // @formatter:off 1248 public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder() 1249 .setDelimiter(Constants.COMMA) 1250 .setEscape(null) 1251 .setIgnoreEmptyLines(false) 1252 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1253 .setRecordSeparator(Constants.LF) 1254 .setNullString(Constants.EMPTY) 1255 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1256 .build(); 1257 // @formatter:off 1258 1259 /** 1260 * Default PostgreSQL text format used by the {@code COPY} operation. 1261 * 1262 * <p> 1263 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1264 * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}. 1265 * </p> 1266 * 1267 * <p> 1268 * The {@link Builder} settings are: 1269 * </p> 1270 * <ul> 1271 * <li>{@code setDelimiter('\t')}</li> 1272 * <li>{@code setEscape('\\')}</li> 1273 * <li>{@code setIgnoreEmptyLines(false)}</li> 1274 * <li>{@code setQuote(null)}</li> 1275 * <li>{@code setRecordSeparator('\n')}</li> 1276 * <li>{@code setNullString("\\N")}</li> 1277 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1278 * </ul> 1279 * 1280 * @see Predefined#MySQL 1281 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1282 * documentation</a> 1283 * @since 1.5 1284 */ 1285 // @formatter:off 1286 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT.builder() 1287 .setDelimiter(Constants.TAB) 1288 .setEscape(Constants.BACKSLASH) 1289 .setIgnoreEmptyLines(false) 1290 .setQuote(null) 1291 .setRecordSeparator(Constants.LF) 1292 .setNullString(Constants.SQL_NULL_STRING) 1293 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1294 .build(); 1295 // @formatter:off 1296 1297 /** 1298 * Comma separated format as defined by <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 1299 * 1300 * <p> 1301 * The {@link Builder} settings are: 1302 * </p> 1303 * <ul> 1304 * <li>{@code setDelimiter(',')}</li> 1305 * <li>{@code setQuote('"')}</li> 1306 * <li>{@code setRecordSeparator("\r\n")}</li> 1307 * <li>{@code setIgnoreEmptyLines(false)}</li> 1308 * </ul> 1309 * 1310 * @see Predefined#RFC4180 1311 */ 1312 public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).build(); 1313 1314 private static final long serialVersionUID = 2L; 1315 1316 /** 1317 * Tab-delimited format. 1318 * 1319 * <p> 1320 * The {@link Builder} settings are: 1321 * </p> 1322 * <ul> 1323 * <li>{@code setDelimiter('\t')}</li> 1324 * <li>{@code setQuote('"')}</li> 1325 * <li>{@code setRecordSeparator("\r\n")}</li> 1326 * <li>{@code setIgnoreSurroundingSpaces(true)}</li> 1327 * </ul> 1328 * 1329 * @see Predefined#TDF 1330 */ 1331 // @formatter:off 1332 public static final CSVFormat TDF = DEFAULT.builder() 1333 .setDelimiter(Constants.TAB) 1334 .setIgnoreSurroundingSpaces(true) 1335 .build(); 1336 // @formatter:on 1337 1338 /** 1339 * Null-safe clone of an array. 1340 * 1341 * @param <T> The array element type. 1342 * @param values the source array 1343 * @return the cloned array. 1344 */ 1345 @SafeVarargs 1346 static <T> T[] clone(final T... values) { 1347 return values == null ? null : values.clone(); 1348 } 1349 1350 /** 1351 * Returns true if the given string contains the search char. 1352 * 1353 * @param source the string to check. 1354 * @param searchCh the character to search. 1355 * 1356 * @return true if {@code c} contains a line break character 1357 */ 1358 private static boolean contains(final String source, final char searchCh) { 1359 return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0; 1360 } 1361 1362 /** 1363 * Returns true if the given string contains a line break character. 1364 * 1365 * @param source the string to check. 1366 * 1367 * @return true if {@code c} contains a line break character. 1368 */ 1369 private static boolean containsLineBreak(final String source) { 1370 return contains(source, Constants.CR) || contains(source, Constants.LF); 1371 } 1372 1373 static boolean isBlank(final String value) { 1374 return value == null || value.trim().isEmpty(); 1375 } 1376 1377 /** 1378 * Returns true if the given character is a line break character. 1379 * 1380 * @param c the character to check. 1381 * 1382 * @return true if {@code c} is a line break character. 1383 */ 1384 private static boolean isLineBreak(final char c) { 1385 return c == Constants.LF || c == Constants.CR; 1386 } 1387 1388 /** 1389 * Returns true if the given character is a line break character. 1390 * 1391 * @param c the character to check, may be null. 1392 * 1393 * @return true if {@code c} is a line break character (and not null). 1394 */ 1395 private static boolean isLineBreak(final Character c) { 1396 return c != null && isLineBreak(c.charValue()); // N.B. Explicit (un)boxing is intentional 1397 } 1398 1399 /** Same test as in as {@link String#trim()}. */ 1400 private static boolean isTrimChar(final char ch) { 1401 return ch <= Constants.SP; 1402 } 1403 1404 /** Same test as in as {@link String#trim()}. */ 1405 private static boolean isTrimChar(final CharSequence charSequence, final int pos) { 1406 return isTrimChar(charSequence.charAt(pos)); 1407 } 1408 1409 /** 1410 * Creates a new CSV format with the specified delimiter. 1411 * 1412 * <p> 1413 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized with null/false. 1414 * </p> 1415 * 1416 * @param delimiter the char used for value separation, must not be a line break character 1417 * @return a new CSV format. 1418 * @throws IllegalArgumentException if the delimiter is a line break character 1419 * 1420 * @see #DEFAULT 1421 * @see #RFC4180 1422 * @see #MYSQL 1423 * @see #EXCEL 1424 * @see #TDF 1425 */ 1426 public static CSVFormat newFormat(final char delimiter) { 1427 return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false, 1428 DuplicateHeaderMode.ALLOW_ALL, false, false); 1429 } 1430 1431 static String[] toStringArray(final Object[] values) { 1432 if (values == null) { 1433 return null; 1434 } 1435 final String[] strings = new String[values.length]; 1436 Arrays.setAll(strings, i -> Objects.toString(values[i], null)); 1437 return strings; 1438 } 1439 1440 static CharSequence trim(final CharSequence charSequence) { 1441 if (charSequence instanceof String) { 1442 return ((String) charSequence).trim(); 1443 } 1444 final int count = charSequence.length(); 1445 int len = count; 1446 int pos = 0; 1447 1448 while (pos < len && isTrimChar(charSequence, pos)) { 1449 pos++; 1450 } 1451 while (pos < len && isTrimChar(charSequence, len - 1)) { 1452 len--; 1453 } 1454 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; 1455 } 1456 1457 /** 1458 * Gets one of the predefined formats from {@link CSVFormat.Predefined}. 1459 * 1460 * @param format name 1461 * @return one of the predefined formats 1462 * @since 1.2 1463 */ 1464 public static CSVFormat valueOf(final String format) { 1465 return CSVFormat.Predefined.valueOf(format).getFormat(); 1466 } 1467 1468 /** How duplicate headers are handled. */ 1469 private final DuplicateHeaderMode duplicateHeaderMode; 1470 1471 /** Whether missing column names are allowed when parsing the header line. */ 1472 private final boolean allowMissingColumnNames; 1473 1474 /** Whether to flush on close. */ 1475 private final boolean autoFlush; 1476 1477 /** Set to null if commenting is disabled. */ 1478 private final Character commentMarker; 1479 1480 /** The character delimiting the values (typically ";", "," or "\t"). */ 1481 private final String delimiter; 1482 1483 /** Set to null if escaping is disabled. */ 1484 private final Character escapeCharacter; 1485 1486 /** Array of header column names. */ 1487 private final String[] headers; 1488 1489 /** Array of header comment lines. */ 1490 private final String[] headerComments; 1491 1492 /** Whether empty lines between records are ignored when parsing input. */ 1493 private final boolean ignoreEmptyLines; 1494 1495 /** Should ignore header names case. */ 1496 private final boolean ignoreHeaderCase; 1497 1498 /** Should leading/trailing spaces be ignored around values?. */ 1499 private final boolean ignoreSurroundingSpaces; 1500 1501 /** The string to be used for null values. */ 1502 private final String nullString; 1503 1504 /** Set to null if quoting is disabled. */ 1505 private final Character quoteCharacter; 1506 1507 /** Set to {@code quoteCharacter + nullString + quoteCharacter} */ 1508 private final String quotedNullString; 1509 1510 /** The quote policy output fields. */ 1511 private final QuoteMode quoteMode; 1512 1513 /** For output. */ 1514 private final String recordSeparator; 1515 1516 /** Whether to skip the header record. */ 1517 private final boolean skipHeaderRecord; 1518 1519 /** Whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. */ 1520 private final boolean lenientEof; 1521 1522 /** Whether reading trailing data is allowed in records, helps Excel compatibility. */ 1523 private final boolean trailingData; 1524 1525 /** Whether to add a trailing delimiter. */ 1526 private final boolean trailingDelimiter; 1527 1528 /** Whether to trim leading and trailing blanks. */ 1529 private final boolean trim; 1530 1531 private CSVFormat(final Builder builder) { 1532 this.delimiter = builder.delimiter; 1533 this.quoteCharacter = builder.quoteCharacter; 1534 this.quoteMode = builder.quoteMode; 1535 this.commentMarker = builder.commentMarker; 1536 this.escapeCharacter = builder.escapeCharacter; 1537 this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; 1538 this.allowMissingColumnNames = builder.allowMissingColumnNames; 1539 this.ignoreEmptyLines = builder.ignoreEmptyLines; 1540 this.recordSeparator = builder.recordSeparator; 1541 this.nullString = builder.nullString; 1542 this.headerComments = builder.headerComments; 1543 this.headers = builder.headers; 1544 this.skipHeaderRecord = builder.skipHeaderRecord; 1545 this.ignoreHeaderCase = builder.ignoreHeaderCase; 1546 this.lenientEof = builder.lenientEof; 1547 this.trailingData = builder.trailingData; 1548 this.trailingDelimiter = builder.trailingDelimiter; 1549 this.trim = builder.trim; 1550 this.autoFlush = builder.autoFlush; 1551 this.quotedNullString = builder.quotedNullString; 1552 this.duplicateHeaderMode = builder.duplicateHeaderMode; 1553 validate(); 1554 } 1555 1556 /** 1557 * Creates a customized CSV format. 1558 * 1559 * @param delimiter the char used for value separation, must not be a line break character. 1560 * @param quoteChar the Character used as value encapsulation marker, may be {@code null} to disable. 1561 * @param quoteMode the quote mode. 1562 * @param commentStart the Character used for comment identification, may be {@code null} to disable. 1563 * @param escape the Character used to escape special characters in values, may be {@code null} to disable. 1564 * @param ignoreSurroundingSpaces {@code true} when whitespaces enclosing values should be ignored. 1565 * @param ignoreEmptyLines {@code true} when the parser should skip empty lines. 1566 * @param recordSeparator the line separator to use for output. 1567 * @param nullString the String to convert to and from {@code null}. 1568 * @param headerComments the comments to be printed by the Printer before the actual CSV data. 1569 * @param header the header. 1570 * @param skipHeaderRecord if {@code true} the header row will be skipped. 1571 * @param allowMissingColumnNames if {@code true} the missing column names are allowed when parsing the header line. 1572 * @param ignoreHeaderCase if {@code true} header names will be accessed ignoring case when parsing input. 1573 * @param trim if {@code true} next record value will be trimmed. 1574 * @param trailingDelimiter if {@code true} the trailing delimiter wil be added before record separator (if set). 1575 * @param autoFlush if {@code true} the underlying stream will be flushed before closing. 1576 * @param duplicateHeaderMode the behavior when handling duplicate headers. 1577 * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. 1578 * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1579 * @throws IllegalArgumentException if the delimiter is a line break character. 1580 */ 1581 private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape, 1582 final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, 1583 final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, 1584 final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush, 1585 final DuplicateHeaderMode duplicateHeaderMode, final boolean trailingData, final boolean lenientEof) { 1586 this.delimiter = delimiter; 1587 this.quoteCharacter = quoteChar; 1588 this.quoteMode = quoteMode; 1589 this.commentMarker = commentStart; 1590 this.escapeCharacter = escape; 1591 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 1592 this.allowMissingColumnNames = allowMissingColumnNames; 1593 this.ignoreEmptyLines = ignoreEmptyLines; 1594 this.recordSeparator = recordSeparator; 1595 this.nullString = nullString; 1596 this.headerComments = toStringArray(headerComments); 1597 this.headers = clone(header); 1598 this.skipHeaderRecord = skipHeaderRecord; 1599 this.ignoreHeaderCase = ignoreHeaderCase; 1600 this.lenientEof = lenientEof; 1601 this.trailingData = trailingData; 1602 this.trailingDelimiter = trailingDelimiter; 1603 this.trim = trim; 1604 this.autoFlush = autoFlush; 1605 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 1606 this.duplicateHeaderMode = duplicateHeaderMode; 1607 validate(); 1608 } 1609 1610 private void append(final char c, final Appendable appendable) throws IOException { 1611 //try { 1612 appendable.append(c); 1613 //} catch (final IOException e) { 1614 // throw new UncheckedIOException(e); 1615 //} 1616 } 1617 1618 private void append(final CharSequence csq, final Appendable appendable) throws IOException { 1619 //try { 1620 appendable.append(csq); 1621 //} catch (final IOException e) { 1622 // throw new UncheckedIOException(e); 1623 //} 1624 } 1625 1626 /** 1627 * Creates a new Builder for this instance. 1628 * 1629 * @return a new Builder. 1630 */ 1631 public Builder builder() { 1632 return Builder.create(this); 1633 } 1634 1635 /** 1636 * Creates a copy of this instance. 1637 * 1638 * @return a copy of this instance. 1639 */ 1640 CSVFormat copy() { 1641 return builder().build(); 1642 } 1643 1644 @Override 1645 public boolean equals(final Object obj) { 1646 if (this == obj) { 1647 return true; 1648 } 1649 if (obj == null) { 1650 return false; 1651 } 1652 if (getClass() != obj.getClass()) { 1653 return false; 1654 } 1655 final CSVFormat other = (CSVFormat) obj; 1656 return allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush && 1657 Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && 1658 duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) && 1659 Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) && 1660 ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase && 1661 ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && 1662 Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && 1663 quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) && 1664 Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord && 1665 trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim; 1666 } 1667 1668 private void escape(final char c, final Appendable appendable) throws IOException { 1669 append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional 1670 append(c, appendable); 1671 } 1672 1673 /** 1674 * Formats the specified values. 1675 * 1676 * @param values the values to format 1677 * @return the formatted values 1678 */ 1679 public String format(final Object... values) { 1680 return Uncheck.get(() -> format_(values)); 1681 } 1682 1683 private String format_(final Object... values) throws IOException { 1684 final StringWriter out = new StringWriter(); 1685 try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { 1686 csvPrinter.printRecord(values); 1687 final String res = out.toString(); 1688 final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); 1689 return res.substring(0, len); 1690 } 1691 } 1692 1693 /** 1694 * Gets whether duplicate names are allowed in the headers. 1695 * 1696 * @return whether duplicate header names are allowed 1697 * @since 1.7 1698 * @deprecated Use {@link #getDuplicateHeaderMode()}. 1699 */ 1700 @Deprecated 1701 public boolean getAllowDuplicateHeaderNames() { 1702 return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL; 1703 } 1704 1705 /** 1706 * Gets whether missing column names are allowed when parsing the header line. 1707 * 1708 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}. 1709 */ 1710 public boolean getAllowMissingColumnNames() { 1711 return allowMissingColumnNames; 1712 } 1713 1714 /** 1715 * Gets whether to flush on close. 1716 * 1717 * @return whether to flush on close. 1718 * @since 1.6 1719 */ 1720 public boolean getAutoFlush() { 1721 return autoFlush; 1722 } 1723 1724 /** 1725 * Gets the comment marker character, {@code null} disables comments. 1726 * <p> 1727 * The comment start character is only recognized at the start of a line. 1728 * </p> 1729 * <p> 1730 * Comments are printed first, before headers. 1731 * </p> 1732 * <p> 1733 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment 1734 * marker written at the start of each comment line. 1735 * </p> 1736 * <p> 1737 * If the comment marker is not set, then the header comments are ignored. 1738 * </p> 1739 * <p> 1740 * For example: 1741 * </p> 1742 * <pre> 1743 * builder.setCommentMarker('#') 1744 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1745 * </pre> 1746 * <p> 1747 * writes: 1748 * </p> 1749 * <pre> 1750 * # Generated by Apache Commons CSV. 1751 * # 1970-01-01T00:00:00Z 1752 * </pre> 1753 * 1754 * @return the comment start marker, may be {@code null} 1755 */ 1756 public Character getCommentMarker() { 1757 return commentMarker; 1758 } 1759 1760 /** 1761 * Gets the first character delimiting the values (typically ';', ',' or '\t'). 1762 * 1763 * @return the first delimiter character. 1764 * @deprecated Use {@link #getDelimiterString()}. 1765 */ 1766 @Deprecated 1767 public char getDelimiter() { 1768 return delimiter.charAt(0); 1769 } 1770 1771 /** 1772 * Gets the character delimiting the values (typically ";", "," or "\t"). 1773 * 1774 * @return the delimiter. 1775 */ 1776 char[] getDelimiterCharArray() { 1777 return delimiter.toCharArray(); 1778 } 1779 1780 /** 1781 * Gets the character delimiting the values (typically ";", "," or "\t"). 1782 * 1783 * @return the delimiter. 1784 * @since 1.9.0 1785 */ 1786 public String getDelimiterString() { 1787 return delimiter; 1788 } 1789 1790 /** 1791 * Gets how duplicate headers are handled. 1792 * 1793 * @return if duplicate header values are allowed, allowed conditionally, or disallowed. 1794 * @since 1.10.0 1795 */ 1796 public DuplicateHeaderMode getDuplicateHeaderMode() { 1797 return duplicateHeaderMode; 1798 } 1799 1800 /** 1801 * Gets the escape character. 1802 * 1803 * @return the escape character, may be {@code 0} 1804 */ 1805 char getEscapeChar() { 1806 return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional 1807 } 1808 1809 /** 1810 * Gets the escape character. 1811 * 1812 * @return the escape character, may be {@code null} 1813 */ 1814 public Character getEscapeCharacter() { 1815 return escapeCharacter; 1816 } 1817 1818 /** 1819 * Gets a copy of the header array. 1820 * 1821 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file 1822 */ 1823 public String[] getHeader() { 1824 return headers != null ? headers.clone() : null; 1825 } 1826 1827 /** 1828 * Gets a copy of the header comment array to write before the CSV data. 1829 * <p> 1830 * This setting is ignored by the parser. 1831 * </p> 1832 * <p> 1833 * Comments are printed first, before headers. 1834 * </p> 1835 * <p> 1836 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment 1837 * marker written at the start of each comment line. 1838 * </p> 1839 * <p> 1840 * If the comment marker is not set, then the header comments are ignored. 1841 * </p> 1842 * <p> 1843 * For example: 1844 * </p> 1845 * <pre> 1846 * builder.setCommentMarker('#') 1847 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1848 * </pre> 1849 * <p> 1850 * writes: 1851 * </p> 1852 * <pre> 1853 * # Generated by Apache Commons CSV. 1854 * # 1970-01-01T00:00:00Z 1855 * </pre> 1856 * 1857 * @return a copy of the header comment array; {@code null} if disabled. 1858 */ 1859 public String[] getHeaderComments() { 1860 return headerComments != null ? headerComments.clone() : null; 1861 } 1862 1863 /** 1864 * Gets whether empty lines between records are ignored when parsing input. 1865 * 1866 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records. 1867 */ 1868 public boolean getIgnoreEmptyLines() { 1869 return ignoreEmptyLines; 1870 } 1871 1872 /** 1873 * Gets whether header names will be accessed ignoring case when parsing input. 1874 * 1875 * @return {@code true} if header names cases are ignored, {@code false} if they are case-sensitive. 1876 * @since 1.3 1877 */ 1878 public boolean getIgnoreHeaderCase() { 1879 return ignoreHeaderCase; 1880 } 1881 1882 /** 1883 * Gets whether spaces around values are ignored when parsing input. 1884 * 1885 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. 1886 */ 1887 public boolean getIgnoreSurroundingSpaces() { 1888 return ignoreSurroundingSpaces; 1889 } 1890 1891 /** 1892 * Gets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1893 * 1894 * @return whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1895 * @since 1.11.0 1896 */ 1897 public boolean getLenientEof() { 1898 return lenientEof; 1899 } 1900 1901 /** 1902 * Gets the String to convert to and from {@code null}. 1903 * <ul> 1904 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 1905 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 1906 * </ul> 1907 * 1908 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} 1909 */ 1910 public String getNullString() { 1911 return nullString; 1912 } 1913 1914 /** 1915 * Gets the character used to encapsulate values containing special characters. 1916 * 1917 * @return the quoteChar character, may be {@code null} 1918 */ 1919 public Character getQuoteCharacter() { 1920 return quoteCharacter; 1921 } 1922 1923 /** 1924 * Gets the quote policy output fields. 1925 * 1926 * @return the quote policy 1927 */ 1928 public QuoteMode getQuoteMode() { 1929 return quoteMode; 1930 } 1931 1932 /** 1933 * Gets the record separator delimiting output records. 1934 * 1935 * @return the record separator 1936 */ 1937 public String getRecordSeparator() { 1938 return recordSeparator; 1939 } 1940 1941 /** 1942 * Gets whether to skip the header record. 1943 * 1944 * @return whether to skip the header record. 1945 */ 1946 public boolean getSkipHeaderRecord() { 1947 return skipHeaderRecord; 1948 } 1949 1950 /** 1951 * Gets whether reading trailing data is allowed in records, helps Excel compatibility. 1952 * 1953 * @return whether reading trailing data is allowed in records, helps Excel compatibility. 1954 * @since 1.11.0 1955 */ 1956 public boolean getTrailingData() { 1957 return trailingData; 1958 } 1959 1960 /** 1961 * Gets whether to add a trailing delimiter. 1962 * 1963 * @return whether to add a trailing delimiter. 1964 * @since 1.3 1965 */ 1966 public boolean getTrailingDelimiter() { 1967 return trailingDelimiter; 1968 } 1969 1970 /** 1971 * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by 1972 * {CSVParser#addRecordValue(boolean)} 1973 * 1974 * @return whether to trim leading and trailing blanks. 1975 */ 1976 public boolean getTrim() { 1977 return trim; 1978 } 1979 1980 @Override 1981 public int hashCode() { 1982 final int prime = 31; 1983 int result = 1; 1984 result = prime * result + Arrays.hashCode(headerComments); 1985 result = prime * result + Arrays.hashCode(headers); 1986 result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter, 1987 ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString, 1988 recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim); 1989 return result; 1990 } 1991 1992 /** 1993 * Tests whether comments are supported by this format. 1994 * 1995 * Note that the comment introducer character is only recognized at the start of a line. 1996 * 1997 * @return {@code true} is comments are supported, {@code false} otherwise 1998 */ 1999 public boolean isCommentMarkerSet() { 2000 return commentMarker != null; 2001 } 2002 2003 /** 2004 * Tests whether the next characters constitute a delimiter 2005 * 2006 * @param ch0 2007 * the first char (index 0). 2008 * @param charSeq 2009 * the match char sequence 2010 * @param startIndex 2011 * where start to match 2012 * @param delimiter 2013 * the delimiter 2014 * @param delimiterLength 2015 * the delimiter length 2016 * @return true if the match is successful 2017 */ 2018 private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) { 2019 if (ch0 != delimiter[0]) { 2020 return false; 2021 } 2022 final int len = charSeq.length(); 2023 if (startIndex + delimiterLength > len) { 2024 return false; 2025 } 2026 for (int i = 1; i < delimiterLength; i++) { 2027 if (charSeq.charAt(startIndex + i) != delimiter[i]) { 2028 return false; 2029 } 2030 } 2031 return true; 2032 } 2033 2034 /** 2035 * Tests whether escapes are being processed. 2036 * 2037 * @return {@code true} if escapes are processed 2038 */ 2039 public boolean isEscapeCharacterSet() { 2040 return escapeCharacter != null; 2041 } 2042 2043 /** 2044 * Tests whether a null string has been defined. 2045 * 2046 * @return {@code true} if a nullString is defined 2047 */ 2048 public boolean isNullStringSet() { 2049 return nullString != null; 2050 } 2051 2052 /** 2053 * Tests whether a quoteChar has been defined. 2054 * 2055 * @return {@code true} if a quoteChar is defined 2056 */ 2057 public boolean isQuoteCharacterSet() { 2058 return quoteCharacter != null; 2059 } 2060 2061 /** 2062 * Parses the specified content. 2063 * 2064 * <p> 2065 * See also the various static parse methods on {@link CSVParser}. 2066 * </p> 2067 * 2068 * @param reader the input stream 2069 * @return a parser over a stream of {@link CSVRecord}s. 2070 * @throws IOException If an I/O error occurs 2071 * @throws CSVException Thrown on invalid input. 2072 */ 2073 public CSVParser parse(final Reader reader) throws IOException { 2074 return new CSVParser(reader, this); 2075 } 2076 2077 /** 2078 * Prints to the specified output. 2079 * 2080 * <p> 2081 * See also {@link CSVPrinter}. 2082 * </p> 2083 * 2084 * @param out the output. 2085 * @return a printer to an output. 2086 * @throws IOException thrown if the optional header cannot be printed. 2087 */ 2088 public CSVPrinter print(final Appendable out) throws IOException { 2089 return new CSVPrinter(out, this); 2090 } 2091 2092 /** 2093 * Prints to the specified {@code File} with given {@code Charset}. 2094 * 2095 * <p> 2096 * See also {@link CSVPrinter}. 2097 * </p> 2098 * 2099 * @param out the output. 2100 * @param charset A charset. 2101 * @return a printer to an output. 2102 * @throws IOException thrown if the optional header cannot be printed. 2103 * @since 1.5 2104 */ 2105 public CSVPrinter print(final File out, final Charset charset) throws IOException { 2106 return print(out.toPath(), charset); 2107 } 2108 2109 private void print(final InputStream inputStream, final Appendable out, final boolean newRecord) throws IOException { 2110 // InputStream is never null here 2111 // There is nothing to escape when quoting is used which is the default. 2112 if (!newRecord) { 2113 append(getDelimiterString(), out); 2114 } 2115 final boolean quoteCharacterSet = isQuoteCharacterSet(); 2116 if (quoteCharacterSet) { 2117 append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional 2118 } 2119 // Stream the input to the output without reading or holding the whole value in memory. 2120 // AppendableOutputStream cannot "close" an Appendable. 2121 try (OutputStream outputStream = new Base64OutputStream(new AppendableOutputStream<>(out))) { 2122 IOUtils.copy(inputStream, outputStream); 2123 } 2124 if (quoteCharacterSet) { 2125 append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional 2126 } 2127 } 2128 2129 /** 2130 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated as needed. Useful when one wants to 2131 * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is true. 2132 * 2133 * @param value value to output. 2134 * @param out where to print the value. 2135 * @param newRecord if this a new record. 2136 * @throws IOException If an I/O error occurs. 2137 * @since 1.4 2138 */ 2139 public synchronized void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { 2140 // null values are considered empty 2141 // Only call CharSequence.toString() if you have to, helps GC-free use cases. 2142 CharSequence charSequence; 2143 if (value == null) { 2144 // https://issues.apache.org/jira/browse/CSV-203 2145 if (null == nullString) { 2146 charSequence = Constants.EMPTY; 2147 } else if (QuoteMode.ALL == quoteMode) { 2148 charSequence = quotedNullString; 2149 } else { 2150 charSequence = nullString; 2151 } 2152 } else if (value instanceof CharSequence) { 2153 charSequence = (CharSequence) value; 2154 } else if (value instanceof Reader) { 2155 print((Reader) value, out, newRecord); 2156 return; 2157 } else if (value instanceof InputStream) { 2158 print((InputStream) value, out, newRecord); 2159 return; 2160 } else { 2161 charSequence = value.toString(); 2162 } 2163 charSequence = getTrim() ? trim(charSequence) : charSequence; 2164 print(value, charSequence, out, newRecord); 2165 } 2166 2167 private synchronized void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException { 2168 final int offset = 0; 2169 final int len = value.length(); 2170 if (!newRecord) { 2171 out.append(getDelimiterString()); 2172 } 2173 if (object == null) { 2174 out.append(value); 2175 } else if (isQuoteCharacterSet()) { 2176 // The original object is needed so can check for Number 2177 printWithQuotes(object, value, out, newRecord); 2178 } else if (isEscapeCharacterSet()) { 2179 printWithEscapes(value, out); 2180 } else { 2181 out.append(value, offset, len); 2182 } 2183 } 2184 2185 /** 2186 * Prints to the specified {@code Path} with given {@code Charset}, 2187 * returns a {@code CSVPrinter} which the caller MUST close. 2188 * 2189 * <p> 2190 * See also {@link CSVPrinter}. 2191 * </p> 2192 * 2193 * @param out the output. 2194 * @param charset A charset. 2195 * @return a printer to an output. 2196 * @throws IOException thrown if the optional header cannot be printed. 2197 * @since 1.5 2198 */ 2199 @SuppressWarnings("resource") 2200 public CSVPrinter print(final Path out, final Charset charset) throws IOException { 2201 return print(Files.newBufferedWriter(out, charset)); 2202 } 2203 2204 private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { 2205 // Reader is never null here 2206 if (!newRecord) { 2207 append(getDelimiterString(), out); 2208 } 2209 if (isQuoteCharacterSet()) { 2210 printWithQuotes(reader, out); 2211 } else if (isEscapeCharacterSet()) { 2212 printWithEscapes(reader, out); 2213 } else if (out instanceof Writer) { 2214 IOUtils.copyLarge(reader, (Writer) out); 2215 } else { 2216 IOUtils.copy(reader, out); 2217 } 2218 } 2219 2220 /** 2221 * Prints to the {@link System#out}. 2222 * 2223 * <p> 2224 * See also {@link CSVPrinter}. 2225 * </p> 2226 * 2227 * @return a printer to {@link System#out}. 2228 * @throws IOException thrown if the optional header cannot be printed. 2229 * @since 1.5 2230 */ 2231 public CSVPrinter printer() throws IOException { 2232 return new CSVPrinter(System.out, this); 2233 } 2234 2235 /** 2236 * Outputs the trailing delimiter (if set) followed by the record separator (if set). 2237 * 2238 * @param appendable where to write 2239 * @throws IOException If an I/O error occurs. 2240 * @since 1.4 2241 */ 2242 public synchronized void println(final Appendable appendable) throws IOException { 2243 if (getTrailingDelimiter()) { 2244 append(getDelimiterString(), appendable); 2245 } 2246 if (recordSeparator != null) { 2247 append(recordSeparator, appendable); 2248 } 2249 } 2250 2251 /** 2252 * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator. 2253 * 2254 * <p> 2255 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing 2256 * the record, so there is no need to call {@link #println(Appendable)}. 2257 * </p> 2258 * 2259 * @param appendable where to write. 2260 * @param values values to output. 2261 * @throws IOException If an I/O error occurs. 2262 * @since 1.4 2263 */ 2264 public synchronized void printRecord(final Appendable appendable, final Object... values) throws IOException { 2265 for (int i = 0; i < values.length; i++) { 2266 print(values[i], appendable, i == 0); 2267 } 2268 println(appendable); 2269 } 2270 2271 /* 2272 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2273 */ 2274 private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { 2275 int start = 0; 2276 int pos = 0; 2277 final int end = charSeq.length(); 2278 final char[] delimArray = getDelimiterCharArray(); 2279 final int delimLength = delimArray.length; 2280 final char escape = getEscapeChar(); 2281 while (pos < end) { 2282 char c = charSeq.charAt(pos); 2283 final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength); 2284 final boolean isCr = c == Constants.CR; 2285 final boolean isLf = c == Constants.LF; 2286 if (isCr || isLf || c == escape || isDelimiterStart) { 2287 // write out segment up until this char 2288 if (pos > start) { 2289 appendable.append(charSeq, start, pos); 2290 } 2291 if (isLf) { 2292 c = 'n'; 2293 } else if (isCr) { 2294 c = 'r'; 2295 } 2296 escape(c, appendable); 2297 if (isDelimiterStart) { 2298 for (int i = 1; i < delimLength; i++) { 2299 pos++; 2300 escape(charSeq.charAt(pos), appendable); 2301 } 2302 } 2303 start = pos + 1; // start on the current char after this one 2304 } 2305 pos++; 2306 } 2307 2308 // write last segment 2309 if (pos > start) { 2310 appendable.append(charSeq, start, pos); 2311 } 2312 } 2313 2314 /* 2315 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2316 */ 2317 private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { 2318 int start = 0; 2319 int pos = 0; 2320 @SuppressWarnings("resource") // Temp reader on input reader. 2321 final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader); 2322 final char[] delimArray = getDelimiterCharArray(); 2323 final int delimLength = delimArray.length; 2324 final char escape = getEscapeChar(); 2325 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 2326 int c; 2327 final char[] lookAheadBuffer = new char[delimLength - 1]; 2328 while (EOF != (c = bufferedReader.read())) { 2329 builder.append((char) c); 2330 Arrays.fill(lookAheadBuffer, (char) 0); 2331 bufferedReader.peek(lookAheadBuffer); 2332 final String test = builder.toString() + new String(lookAheadBuffer); 2333 final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength); 2334 final boolean isCr = c == Constants.CR; 2335 final boolean isLf = c == Constants.LF; 2336 if (isCr || isLf || c == escape || isDelimiterStart) { 2337 // write out segment up until this char 2338 if (pos > start) { 2339 append(builder.substring(start, pos), appendable); 2340 builder.setLength(0); 2341 pos = -1; 2342 } 2343 if (isLf) { 2344 c = 'n'; 2345 } else if (isCr) { 2346 c = 'r'; 2347 } 2348 escape((char) c, appendable); 2349 if (isDelimiterStart) { 2350 for (int i = 1; i < delimLength; i++) { 2351 escape((char) bufferedReader.read(), appendable); 2352 } 2353 } 2354 start = pos + 1; // start on the current char after this one 2355 } 2356 pos++; 2357 } 2358 // write last segment 2359 if (pos > start) { 2360 appendable.append(builder, start, pos); 2361 } 2362 } 2363 2364 /* 2365 * Note: must only be called if quoting is enabled, otherwise will generate NPE 2366 */ 2367 // the original object is needed so can check for Number 2368 private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException { 2369 boolean quote = false; 2370 int start = 0; 2371 int pos = 0; 2372 final int len = charSeq.length(); 2373 final char[] delim = getDelimiterCharArray(); 2374 final int delimLength = delim.length; 2375 final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional 2376 // If escape char not specified, default to the quote char 2377 // This avoids having to keep checking whether there is an escape character 2378 // at the cost of checking against quote twice 2379 final char escapeChar = isEscapeCharacterSet() ? getEscapeChar() : quoteChar; 2380 QuoteMode quoteModePolicy = getQuoteMode(); 2381 if (quoteModePolicy == null) { 2382 quoteModePolicy = QuoteMode.MINIMAL; 2383 } 2384 switch (quoteModePolicy) { 2385 case ALL: 2386 case ALL_NON_NULL: 2387 quote = true; 2388 break; 2389 case NON_NUMERIC: 2390 quote = !(object instanceof Number); 2391 break; 2392 case NONE: 2393 // Use the existing escaping code 2394 printWithEscapes(charSeq, out); 2395 return; 2396 case MINIMAL: 2397 if (len <= 0) { 2398 // Always quote an empty token that is the first 2399 // on the line, as it may be the only thing on the 2400 // line. If it were not quoted in that case, 2401 // an empty line has no tokens. 2402 if (newRecord) { 2403 quote = true; 2404 } 2405 } else { 2406 char c = charSeq.charAt(pos); 2407 if (c <= Constants.COMMENT) { 2408 // Some other chars at the start of a value caused the parser to fail, so for now 2409 // encapsulate if we start in anything less than '#'. We are being conservative 2410 // by including the default comment char too. 2411 quote = true; 2412 } else { 2413 while (pos < len) { 2414 c = charSeq.charAt(pos); 2415 if (c == Constants.LF || c == Constants.CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) { 2416 quote = true; 2417 break; 2418 } 2419 pos++; 2420 } 2421 2422 if (!quote) { 2423 pos = len - 1; 2424 c = charSeq.charAt(pos); 2425 // Some other chars at the end caused the parser to fail, so for now 2426 // encapsulate if we end in anything less than ' ' 2427 if (isTrimChar(c)) { 2428 quote = true; 2429 } 2430 } 2431 } 2432 } 2433 if (!quote) { 2434 // No encapsulation needed - write out the original value 2435 out.append(charSeq, start, len); 2436 return; 2437 } 2438 break; 2439 default: 2440 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); 2441 } 2442 if (!quote) { 2443 // No encapsulation needed - write out the original value 2444 out.append(charSeq, start, len); 2445 return; 2446 } 2447 // We hit something that needed encapsulation 2448 out.append(quoteChar); 2449 // Pick up where we left off: pos should be positioned on the first character that caused 2450 // the need for encapsulation. 2451 while (pos < len) { 2452 final char c = charSeq.charAt(pos); 2453 if (c == quoteChar || c == escapeChar) { 2454 // write out the chunk up until this point 2455 out.append(charSeq, start, pos); 2456 out.append(escapeChar); // now output the escape 2457 start = pos; // and restart with the matched char 2458 } 2459 pos++; 2460 } 2461 // Write the last segment 2462 out.append(charSeq, start, pos); 2463 out.append(quoteChar); 2464 } 2465 2466 /** 2467 * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. 2468 * 2469 * @param reader What to print 2470 * @param appendable Where to print it 2471 * @throws IOException If an I/O error occurs 2472 */ 2473 private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException { 2474 if (getQuoteMode() == QuoteMode.NONE) { 2475 printWithEscapes(reader, appendable); 2476 return; 2477 } 2478 final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional 2479 // (1) Append opening quote 2480 append(quote, appendable); 2481 // (2) Append Reader contents, doubling quotes 2482 int c; 2483 while (EOF != (c = reader.read())) { 2484 append((char) c, appendable); 2485 if (c == quote) { 2486 append(quote, appendable); 2487 } 2488 } 2489 // (3) Append closing quote 2490 append(quote, appendable); 2491 } 2492 2493 @Override 2494 public String toString() { 2495 final StringBuilder sb = new StringBuilder(); 2496 sb.append("Delimiter=<").append(delimiter).append('>'); 2497 if (isEscapeCharacterSet()) { 2498 sb.append(' '); 2499 sb.append("Escape=<").append(escapeCharacter).append('>'); 2500 } 2501 if (isQuoteCharacterSet()) { 2502 sb.append(' '); 2503 sb.append("QuoteChar=<").append(quoteCharacter).append('>'); 2504 } 2505 if (quoteMode != null) { 2506 sb.append(' '); 2507 sb.append("QuoteMode=<").append(quoteMode).append('>'); 2508 } 2509 if (isCommentMarkerSet()) { 2510 sb.append(' '); 2511 sb.append("CommentStart=<").append(commentMarker).append('>'); 2512 } 2513 if (isNullStringSet()) { 2514 sb.append(' '); 2515 sb.append("NullString=<").append(nullString).append('>'); 2516 } 2517 if (recordSeparator != null) { 2518 sb.append(' '); 2519 sb.append("RecordSeparator=<").append(recordSeparator).append('>'); 2520 } 2521 if (getIgnoreEmptyLines()) { 2522 sb.append(" EmptyLines:ignored"); 2523 } 2524 if (getIgnoreSurroundingSpaces()) { 2525 sb.append(" SurroundingSpaces:ignored"); 2526 } 2527 if (getIgnoreHeaderCase()) { 2528 sb.append(" IgnoreHeaderCase:ignored"); 2529 } 2530 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); 2531 if (headerComments != null) { 2532 sb.append(' '); 2533 sb.append("HeaderComments:").append(Arrays.toString(headerComments)); 2534 } 2535 if (headers != null) { 2536 sb.append(' '); 2537 sb.append("Header:").append(Arrays.toString(headers)); 2538 } 2539 return sb.toString(); 2540 } 2541 2542 String trim(final String value) { 2543 return getTrim() ? value.trim() : value; 2544 } 2545 2546 /** 2547 * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. 2548 * <p> 2549 * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used 2550 * for parsing, so it cannot be used here. 2551 * </p> 2552 * 2553 * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. 2554 */ 2555 private void validate() throws IllegalArgumentException { 2556 if (containsLineBreak(delimiter)) { 2557 throw new IllegalArgumentException("The delimiter cannot be a line break"); 2558 } 2559 if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional 2560 throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); 2561 } 2562 if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional 2563 throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); 2564 } 2565 if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // N.B. Explicit (un)boxing is intentional 2566 throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); 2567 } 2568 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { 2569 throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); 2570 } 2571 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { 2572 throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" + commentMarker + "')"); 2573 } 2574 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { 2575 throw new IllegalArgumentException("Quote mode set to NONE but no escape character is set"); 2576 } 2577 // Validate headers 2578 if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { 2579 final Set<String> dupCheckSet = new HashSet<>(headers.length); 2580 final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY; 2581 for (final String header : headers) { 2582 final boolean blank = isBlank(header); 2583 // Sanitize all empty headers to the empty string "" when checking duplicates 2584 final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); 2585 if (containsHeader && !(blank && emptyDuplicatesAllowed)) { 2586 throw new IllegalArgumentException( 2587 String.format( 2588 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", 2589 header, Arrays.toString(headers))); 2590 } 2591 } 2592 } 2593 } 2594 2595 /** 2596 * Builds a new {@code CSVFormat} that allows duplicate header names. 2597 * 2598 * @return a new {@code CSVFormat} that allows duplicate header names 2599 * @since 1.7 2600 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean) Builder#setAllowDuplicateHeaderNames(true)} 2601 */ 2602 @Deprecated 2603 public CSVFormat withAllowDuplicateHeaderNames() { 2604 return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build(); 2605 } 2606 2607 /** 2608 * Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2609 * 2610 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 2611 * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2612 * @since 1.7 2613 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean)} 2614 */ 2615 @Deprecated 2616 public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 2617 final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; 2618 return builder().setDuplicateHeaderMode(mode).build(); 2619 } 2620 2621 /** 2622 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. 2623 * 2624 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2625 * @see Builder#setAllowMissingColumnNames(boolean) 2626 * @since 1.1 2627 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} 2628 */ 2629 @Deprecated 2630 public CSVFormat withAllowMissingColumnNames() { 2631 return builder().setAllowMissingColumnNames(true).build(); 2632 } 2633 2634 /** 2635 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. 2636 * 2637 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause 2638 * an {@link IllegalArgumentException} to be thrown. 2639 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2640 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean)} 2641 */ 2642 @Deprecated 2643 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { 2644 return builder().setAllowMissingColumnNames(allowMissingColumnNames).build(); 2645 } 2646 2647 /** 2648 * Builds a new {@code CSVFormat} with whether to flush on close. 2649 * 2650 * @param autoFlush whether to flush on close. 2651 * 2652 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. 2653 * @since 1.6 2654 * @deprecated Use {@link Builder#setAutoFlush(boolean)} 2655 */ 2656 @Deprecated 2657 public CSVFormat withAutoFlush(final boolean autoFlush) { 2658 return builder().setAutoFlush(autoFlush).build(); 2659 } 2660 2661 /** 2662 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2663 * 2664 * Note that the comment start character is only recognized at the start of a line. 2665 * 2666 * @param commentMarker the comment start marker 2667 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2668 * @throws IllegalArgumentException thrown if the specified character is a line break 2669 * @deprecated Use {@link Builder#setCommentMarker(char)} 2670 */ 2671 @Deprecated 2672 public CSVFormat withCommentMarker(final char commentMarker) { 2673 return builder().setCommentMarker(commentMarker).build(); 2674 } 2675 2676 /** 2677 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2678 * 2679 * Note that the comment start character is only recognized at the start of a line. 2680 * 2681 * @param commentMarker the comment start marker, use {@code null} to disable 2682 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2683 * @throws IllegalArgumentException thrown if the specified character is a line break 2684 * @deprecated Use {@link Builder#setCommentMarker(Character)} 2685 */ 2686 @Deprecated 2687 public CSVFormat withCommentMarker(final Character commentMarker) { 2688 return builder().setCommentMarker(commentMarker).build(); 2689 } 2690 2691 /** 2692 * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character. 2693 * 2694 * @param delimiter the delimiter character 2695 * @return A new CSVFormat that is equal to this with the specified character as a delimiter 2696 * @throws IllegalArgumentException thrown if the specified character is a line break 2697 * @deprecated Use {@link Builder#setDelimiter(char)} 2698 */ 2699 @Deprecated 2700 public CSVFormat withDelimiter(final char delimiter) { 2701 return builder().setDelimiter(delimiter).build(); 2702 } 2703 2704 /** 2705 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2706 * 2707 * @param escape the escape character 2708 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2709 * @throws IllegalArgumentException thrown if the specified character is a line break 2710 * @deprecated Use {@link Builder#setEscape(char)} 2711 */ 2712 @Deprecated 2713 public CSVFormat withEscape(final char escape) { 2714 return builder().setEscape(escape).build(); 2715 } 2716 2717 /** 2718 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2719 * 2720 * @param escape the escape character, use {@code null} to disable 2721 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2722 * @throws IllegalArgumentException thrown if the specified character is a line break 2723 * @deprecated Use {@link Builder#setEscape(Character)} 2724 */ 2725 @Deprecated 2726 public CSVFormat withEscape(final Character escape) { 2727 return builder().setEscape(escape).build(); 2728 } 2729 2730 /** 2731 * Builds a new {@code CSVFormat} using the first record as header. 2732 * 2733 * <p> 2734 * Calling this method is equivalent to calling: 2735 * </p> 2736 * 2737 * <pre> 2738 * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord(); 2739 * </pre> 2740 * 2741 * @return A new CSVFormat that is equal to this but using the first record as header. 2742 * @see Builder#setSkipHeaderRecord(boolean) 2743 * @see Builder#setHeader(String...) 2744 * @since 1.3 2745 * @deprecated Use {@link Builder#setHeader(String...) Builder#setHeader()}.{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord(true)}. 2746 */ 2747 @Deprecated 2748 public CSVFormat withFirstRecordAsHeader() { 2749 // @formatter:off 2750 return builder() 2751 .setHeader() 2752 .setSkipHeaderRecord(true) 2753 .build(); 2754 // @formatter:on 2755 } 2756 2757 /** 2758 * Builds a new {@code CSVFormat} with the header of the format defined by the enum class. 2759 * 2760 * <p> 2761 * Example: 2762 * </p> 2763 * 2764 * <pre> 2765 * public enum Header { 2766 * Name, Email, Phone 2767 * } 2768 * 2769 * CSVFormat format = aformat.withHeader(Header.class); 2770 * </pre> 2771 * <p> 2772 * The header is also used by the {@link CSVPrinter}. 2773 * </p> 2774 * 2775 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2776 * @return A new CSVFormat that is equal to this but with the specified header 2777 * @see Builder#setHeader(String...) 2778 * @see Builder#setSkipHeaderRecord(boolean) 2779 * @since 1.3 2780 * @deprecated Use {@link Builder#setHeader(Class)} 2781 */ 2782 @Deprecated 2783 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) { 2784 return builder().setHeader(headerEnum).build(); 2785 } 2786 2787 /** 2788 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2789 * input file with: 2790 * 2791 * <pre> 2792 * CSVFormat format = aformat.withHeader(); 2793 * </pre> 2794 * 2795 * or specified manually with: 2796 * 2797 * <pre> 2798 * CSVFormat format = aformat.withHeader(resultSet); 2799 * </pre> 2800 * <p> 2801 * The header is also used by the {@link CSVPrinter}. 2802 * </p> 2803 * 2804 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2805 * @return A new CSVFormat that is equal to this but with the specified header 2806 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2807 * @since 1.1 2808 * @deprecated Use {@link Builder#setHeader(ResultSet)} 2809 */ 2810 @Deprecated 2811 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { 2812 return builder().setHeader(resultSet).build(); 2813 } 2814 2815 /** 2816 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2817 * input file with: 2818 * 2819 * <pre> 2820 * CSVFormat format = aformat.withHeader(); 2821 * </pre> 2822 * 2823 * or specified manually with: 2824 * 2825 * <pre> 2826 * CSVFormat format = aformat.withHeader(metaData); 2827 * </pre> 2828 * <p> 2829 * The header is also used by the {@link CSVPrinter}. 2830 * </p> 2831 * 2832 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2833 * @return A new CSVFormat that is equal to this but with the specified header 2834 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2835 * @since 1.1 2836 * @deprecated Use {@link Builder#setHeader(ResultSetMetaData)} 2837 */ 2838 @Deprecated 2839 public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 2840 return builder().setHeader(resultSetMetaData).build(); 2841 } 2842 2843 /** 2844 * Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file 2845 * with: 2846 * 2847 * <pre> 2848 * CSVFormat format = aformat.withHeader(); 2849 * </pre> 2850 * 2851 * or specified manually with: 2852 * 2853 * <pre>{@code 2854 * CSVFormat format = aformat.withHeader("name", "email", "phone"); 2855 * }</pre> 2856 * <p> 2857 * The header is also used by the {@link CSVPrinter}. 2858 * </p> 2859 * 2860 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2861 * @return A new CSVFormat that is equal to this but with the specified header 2862 * @see Builder#setSkipHeaderRecord(boolean) 2863 * @deprecated Use {@link Builder#setHeader(String...)} 2864 */ 2865 @Deprecated 2866 public CSVFormat withHeader(final String... header) { 2867 return builder().setHeader(header).build(); 2868 } 2869 2870 /** 2871 * Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. 2872 * This setting is ignored by the parser. 2873 * 2874 * <pre>{@code 2875 * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV.", Instant.now()); 2876 * }</pre> 2877 * 2878 * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. 2879 * @return A new CSVFormat that is equal to this but with the specified header 2880 * @see Builder#setSkipHeaderRecord(boolean) 2881 * @since 1.1 2882 * @deprecated Use {@link Builder#setHeaderComments(Object...)} 2883 */ 2884 @Deprecated 2885 public CSVFormat withHeaderComments(final Object... headerComments) { 2886 return builder().setHeaderComments(headerComments).build(); 2887 } 2888 2889 /** 2890 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. 2891 * 2892 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2893 * @see Builder#setIgnoreEmptyLines(boolean) 2894 * @since 1.1 2895 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)} 2896 */ 2897 @Deprecated 2898 public CSVFormat withIgnoreEmptyLines() { 2899 return builder().setIgnoreEmptyLines(true).build(); 2900 } 2901 2902 /** 2903 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. 2904 * 2905 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty 2906 * lines to empty records. 2907 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2908 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean)} 2909 */ 2910 @Deprecated 2911 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { 2912 return builder().setIgnoreEmptyLines(ignoreEmptyLines).build(); 2913 } 2914 2915 /** 2916 * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. 2917 * 2918 * @return A new CSVFormat that will ignore the new case header name behavior. 2919 * @see Builder#setIgnoreHeaderCase(boolean) 2920 * @since 1.3 2921 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)} 2922 */ 2923 @Deprecated 2924 public CSVFormat withIgnoreHeaderCase() { 2925 return builder().setIgnoreHeaderCase(true).build(); 2926 } 2927 2928 /** 2929 * Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case. 2930 * 2931 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 2932 * @return A new CSVFormat that will ignore case header name if specified as {@code true} 2933 * @since 1.3 2934 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean)} 2935 */ 2936 @Deprecated 2937 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { 2938 return builder().setIgnoreHeaderCase(ignoreHeaderCase).build(); 2939 } 2940 2941 /** 2942 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. 2943 * 2944 * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. 2945 * @see Builder#setIgnoreSurroundingSpaces(boolean) 2946 * @since 1.1 2947 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean) Builder#setIgnoreSurroundingSpaces(true)} 2948 */ 2949 @Deprecated 2950 public CSVFormat withIgnoreSurroundingSpaces() { 2951 return builder().setIgnoreSurroundingSpaces(true).build(); 2952 } 2953 2954 /** 2955 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. 2956 * 2957 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 2958 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 2959 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean)} 2960 */ 2961 @Deprecated 2962 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 2963 return builder().setIgnoreSurroundingSpaces(ignoreSurroundingSpaces).build(); 2964 } 2965 2966 /** 2967 * Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output. 2968 * <ul> 2969 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 2970 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 2971 * </ul> 2972 * 2973 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null} 2974 * @return A new CSVFormat that is equal to this but with the specified null conversion string. 2975 * @deprecated Use {@link Builder#setNullString(String)} 2976 */ 2977 @Deprecated 2978 public CSVFormat withNullString(final String nullString) { 2979 return builder().setNullString(nullString).build(); 2980 } 2981 2982 /** 2983 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2984 * 2985 * @param quoteChar the quote character 2986 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2987 * @throws IllegalArgumentException thrown if the specified character is a line break 2988 * @deprecated Use {@link Builder#setQuote(char)} 2989 */ 2990 @Deprecated 2991 public CSVFormat withQuote(final char quoteChar) { 2992 return builder().setQuote(quoteChar).build(); 2993 } 2994 2995 /** 2996 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2997 * 2998 * @param quoteChar the quote character, use {@code null} to disable. 2999 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 3000 * @throws IllegalArgumentException thrown if the specified character is a line break 3001 * @deprecated Use {@link Builder#setQuote(Character)} 3002 */ 3003 @Deprecated 3004 public CSVFormat withQuote(final Character quoteChar) { 3005 return builder().setQuote(quoteChar).build(); 3006 } 3007 3008 /** 3009 * Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value. 3010 * 3011 * @param quoteMode the quote policy to use for output. 3012 * 3013 * @return A new CSVFormat that is equal to this but with the specified quote policy 3014 * @deprecated Use {@link Builder#setQuoteMode(QuoteMode)} 3015 */ 3016 @Deprecated 3017 public CSVFormat withQuoteMode(final QuoteMode quoteMode) { 3018 return builder().setQuoteMode(quoteMode).build(); 3019 } 3020 3021 /** 3022 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified character. 3023 * 3024 * <p> 3025 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3026 * "\r\n" 3027 * </p> 3028 * 3029 * @param recordSeparator the record separator to use for output. 3030 * @return A new CSVFormat that is equal to this but with the specified output record separator 3031 * @deprecated Use {@link Builder#setRecordSeparator(char)} 3032 */ 3033 @Deprecated 3034 public CSVFormat withRecordSeparator(final char recordSeparator) { 3035 return builder().setRecordSeparator(recordSeparator).build(); 3036 } 3037 3038 /** 3039 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified String. 3040 * 3041 * <p> 3042 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3043 * "\r\n" 3044 * </p> 3045 * 3046 * @param recordSeparator the record separator to use for output. 3047 * @return A new CSVFormat that is equal to this but with the specified output record separator 3048 * @throws IllegalArgumentException if recordSeparator is none of CR, LF or CRLF 3049 * @deprecated Use {@link Builder#setRecordSeparator(String)} 3050 */ 3051 @Deprecated 3052 public CSVFormat withRecordSeparator(final String recordSeparator) { 3053 return builder().setRecordSeparator(recordSeparator).build(); 3054 } 3055 3056 /** 3057 * Builds a new {@code CSVFormat} with skipping the header record set to {@code true}. 3058 * 3059 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3060 * @see Builder#setSkipHeaderRecord(boolean) 3061 * @see Builder#setHeader(String...) 3062 * @since 1.1 3063 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean) Builder#setSkipHeaderRecord(true)} 3064 */ 3065 @Deprecated 3066 public CSVFormat withSkipHeaderRecord() { 3067 return builder().setSkipHeaderRecord(true).build(); 3068 } 3069 3070 /** 3071 * Builds a new {@code CSVFormat} with whether to skip the header record. 3072 * 3073 * @param skipHeaderRecord whether to skip the header record. 3074 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3075 * @see Builder#setHeader(String...) 3076 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean)} 3077 */ 3078 @Deprecated 3079 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { 3080 return builder().setSkipHeaderRecord(skipHeaderRecord).build(); 3081 } 3082 3083 /** 3084 * Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows 3085 * and LF on Linux. 3086 * 3087 * <p> 3088 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3089 * "\r\n" 3090 * </p> 3091 * 3092 * @return A new CSVFormat that is equal to this but with the operating system's line separator string. 3093 * @since 1.6 3094 * @deprecated Use {@link Builder#setRecordSeparator(String) setRecordSeparator(System.lineSeparator())} 3095 */ 3096 @Deprecated 3097 public CSVFormat withSystemRecordSeparator() { 3098 return builder().setRecordSeparator(System.lineSeparator()).build(); 3099 } 3100 3101 /** 3102 * Builds a new {@code CSVFormat} to add a trailing delimiter. 3103 * 3104 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. 3105 * @since 1.3 3106 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean) Builder#setTrailingDelimiter(true)} 3107 */ 3108 @Deprecated 3109 public CSVFormat withTrailingDelimiter() { 3110 return builder().setTrailingDelimiter(true).build(); 3111 } 3112 3113 /** 3114 * Builds a new {@code CSVFormat} with whether to add a trailing delimiter. 3115 * 3116 * @param trailingDelimiter whether to add a trailing delimiter. 3117 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. 3118 * @since 1.3 3119 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean)} 3120 */ 3121 @Deprecated 3122 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { 3123 return builder().setTrailingDelimiter(trailingDelimiter).build(); 3124 } 3125 3126 /** 3127 * Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3128 * 3129 * @return A new CSVFormat that is equal to this but with the trim setting on. 3130 * @since 1.3 3131 * @deprecated Use {@link Builder#setTrim(boolean) Builder#setTrim(true)} 3132 */ 3133 @Deprecated 3134 public CSVFormat withTrim() { 3135 return builder().setTrim(true).build(); 3136 } 3137 3138 /** 3139 * Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3140 * 3141 * @param trim whether to trim leading and trailing blanks. 3142 * @return A new CSVFormat that is equal to this but with the specified trim setting. 3143 * @since 1.3 3144 * @deprecated Use {@link Builder#setTrim(boolean)} 3145 */ 3146 @Deprecated 3147 public CSVFormat withTrim(final boolean trim) { 3148 return builder().setTrim(trim).build(); 3149 } 3150 }