1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.csv;
19
20 import static org.apache.commons.csv.Constants.CR;
21 import static org.apache.commons.csv.Constants.CRLF;
22 import static org.apache.commons.csv.Constants.LF;
23 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
24 import static org.junit.jupiter.api.Assertions.assertEquals;
25 import static org.junit.jupiter.api.Assertions.assertFalse;
26 import static org.junit.jupiter.api.Assertions.assertInstanceOf;
27 import static org.junit.jupiter.api.Assertions.assertNotNull;
28 import static org.junit.jupiter.api.Assertions.assertNull;
29 import static org.junit.jupiter.api.Assertions.assertThrows;
30 import static org.junit.jupiter.api.Assertions.assertTrue;
31
32 import java.io.File;
33 import java.io.IOException;
34 import java.io.InputStreamReader;
35 import java.io.PipedReader;
36 import java.io.PipedWriter;
37 import java.io.Reader;
38 import java.io.StringReader;
39 import java.io.StringWriter;
40 import java.io.UncheckedIOException;
41 import java.net.URL;
42 import java.nio.charset.Charset;
43 import java.nio.charset.StandardCharsets;
44 import java.nio.file.Files;
45 import java.nio.file.Path;
46 import java.nio.file.Paths;
47 import java.util.ArrayList;
48 import java.util.Arrays;
49 import java.util.Iterator;
50 import java.util.List;
51 import java.util.Map;
52 import java.util.NoSuchElementException;
53 import java.util.stream.Collectors;
54 import java.util.stream.Stream;
55
56 import org.apache.commons.io.input.BOMInputStream;
57 import org.apache.commons.io.input.BrokenInputStream;
58 import org.junit.jupiter.api.Assertions;
59 import org.junit.jupiter.api.Disabled;
60 import org.junit.jupiter.api.Test;
61 import org.junit.jupiter.params.ParameterizedTest;
62 import org.junit.jupiter.params.provider.EnumSource;
63
64
65
66
67
68
69
70 public class CSVParserTest {
71
72 private static final Charset UTF_8 = StandardCharsets.UTF_8;
73
74 private static final String UTF_8_NAME = UTF_8.name();
75
76 private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" +
77
78 " \"foo\n,,\n\"\",,\n\"\"\",d,e\n";
79
80 private static final String CSV_INPUT_1 = "a,b,c,d";
81
82 private static final String CSV_INPUT_2 = "a,b,1 2";
83
84 private static final String[][] RESULT = { { "a", "b", "c", "d" }, { "a", "b", "1 2" }, { "foo baar", "b", "" }, { "foo\n,,\n\",,\n\"", "d", "e" } };
85
86
87 static private final String CSV_INPUT_NO_COMMENT = "A,B" + CRLF + "1,2" + CRLF;
88
89
90 static private final String CSV_INPUT_HEADER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF;
91
92
93 static private final String CSV_INPUT_HEADER_TRAILER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# comment";
94
95
96 static private final String CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT = "# multi-line" + CRLF + "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF +
97 "# multi-line" + CRLF + "# comment";
98
99
100 static private final CSVFormat FORMAT_AUTO_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build();
101
102
103
104 static private final CSVFormat FORMAT_EXPLICIT_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT)
105 .setSkipHeaderRecord(true)
106 .setCommentMarker('#')
107 .setHeader("A", "B")
108 .build();
109
110
111
112
113 CSVFormat FORMAT_EXPLICIT_HEADER_NOSKIP = CSVFormat.Builder.create(CSVFormat.DEFAULT)
114 .setCommentMarker('#')
115 .setHeader("A", "B")
116 .build();
117
118
119 @SuppressWarnings("resource")
120 private BOMInputStream createBOMInputStream(final String resource) throws IOException {
121 return new BOMInputStream(ClassLoader.getSystemClassLoader().getResource(resource).openStream());
122 }
123
124 CSVRecord parse(final CSVParser parser, final int failParseRecordNo) throws IOException {
125 if (parser.getRecordNumber() + 1 == failParseRecordNo) {
126 assertThrows(IOException.class, () -> parser.nextRecord());
127 return null;
128 }
129 return parser.nextRecord();
130 }
131
132 private void parseFully(final CSVParser parser) {
133 parser.forEach(Assertions::assertNotNull);
134 }
135
136 @Test
137 public void testBackslashEscaping() throws IOException {
138
139
140
141
142
143 final String code = "one,two,three\n" +
144 "'',''\n" +
145 "/',/'\n" +
146 "'/'','/''\n" +
147 "'''',''''\n" +
148 "/,,/,\n" +
149 "//,//\n" + // 6) escape escaped
150 "'//','//'\n" + // 7) escape escaped in encapsulation
151 " 8 , \"quoted \"\" /\" // string\" \n" + // don't eat spaces
152 "9, /\n \n" +
153 "";
154 final String[][] res = {{"one", "two", "three"},
155 {"", ""},
156 {"'", "'"},
157 {"'", "'"},
158 {"'", "'"},
159 {",", ","},
160 {"/", "/"},
161 {"/", "/"},
162 {" 8 ", " \"quoted \"\" /\" / string\" "}, {"9", " \n "} };
163
164 final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines();
165 try (final CSVParser parser = CSVParser.parse(code, format)) {
166 final List<CSVRecord> records = parser.getRecords();
167 assertFalse(records.isEmpty());
168 Utils.compare("Records do not match expected result", res, records);
169 }
170 }
171
172 @Test
173 public void testBackslashEscaping2() throws IOException {
174
175
176
177
178 final String code = "" + " , , \n" +
179 " \t , , \n" +
180 " // , /, , /,\n" + // 3)
181 "";
182 final String[][] res = {{" ", " ", " "},
183 {" \t ", " ", " "},
184 {" / ", " , ", " ,"},
185 };
186
187 final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines();
188 try (final CSVParser parser = CSVParser.parse(code, format)) {
189 final List<CSVRecord> records = parser.getRecords();
190 assertFalse(records.isEmpty());
191 Utils.compare("", res, records);
192 }
193 }
194
195 @Test
196 @Disabled
197 public void testBackslashEscapingOld() throws IOException {
198 final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" + "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" +
199 "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\"";
200 final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" }, { "one", "tw\"o" }, { "one", "t\\,wo" },
201
202
203
204
205 { "one", "two", "th,ree" }, { "a\\\\" },
206 { "a\\", "b" },
207 { "a\\\\,b" }
208 };
209 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
210 final List<CSVRecord> records = parser.getRecords();
211 assertEquals(res.length, records.size());
212 assertFalse(records.isEmpty());
213 for (int i = 0; i < res.length; i++) {
214 assertArrayEquals(res[i], records.get(i).values());
215 }
216 }
217 }
218
219 @Test
220 @Disabled("CSV-107")
221 public void testBOM() throws IOException {
222 final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv");
223 try (final CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, CSVFormat.EXCEL.withHeader())) {
224 parser.forEach(record -> assertNotNull(record.get("Date")));
225 }
226 }
227
228 @Test
229 public void testBOMInputStreamParserWithInputStream() throws IOException {
230 try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv");
231 final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
232 parser.forEach(record -> assertNotNull(record.get("Date")));
233 }
234 }
235
236 @Test
237 public void testBOMInputStreamParserWithReader() throws IOException {
238 try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
239 final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
240 parser.forEach(record -> assertNotNull(record.get("Date")));
241 }
242 }
243
244 @Test
245 public void testBOMInputStreamParseWithReader() throws IOException {
246 try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
247 final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) {
248 parser.forEach(record -> assertNotNull(record.get("Date")));
249 }
250 }
251
252 @Test
253 public void testCarriageReturnEndings() throws IOException {
254 final String code = "foo\rbaar,\rhello,world\r,kanu";
255 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
256 final List<CSVRecord> records = parser.getRecords();
257 assertEquals(4, records.size());
258 }
259 }
260
261 @Test
262 public void testCarriageReturnLineFeedEndings() throws IOException {
263 final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
264 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
265 final List<CSVRecord> records = parser.getRecords();
266 assertEquals(4, records.size());
267 }
268 }
269
270 @Test
271 public void testClose() throws Exception {
272 final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
273 final Iterator<CSVRecord> records;
274 try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) {
275 records = parser.iterator();
276 assertTrue(records.hasNext());
277 }
278 assertFalse(records.hasNext());
279 assertThrows(NoSuchElementException.class, records::next);
280 }
281
282 @Test
283 public void testCSV141CSVFormat_DEFAULT() throws Exception {
284 testCSV141Failure(CSVFormat.DEFAULT, 3);
285 }
286
287 @Test
288 public void testCSV141CSVFormat_INFORMIX_UNLOAD() throws Exception {
289 testCSV141Failure(CSVFormat.INFORMIX_UNLOAD, 1);
290 }
291
292 @Test
293 public void testCSV141CSVFormat_INFORMIX_UNLOAD_CSV() throws Exception {
294 testCSV141Failure(CSVFormat.INFORMIX_UNLOAD_CSV, 3);
295 }
296
297 @Test
298 public void testCSV141CSVFormat_ORACLE() throws Exception {
299 testCSV141Failure(CSVFormat.ORACLE, 2);
300 }
301
302 @Test
303 public void testCSV141CSVFormat_POSTGRESQL_CSV() throws Exception {
304 testCSV141Failure(CSVFormat.POSTGRESQL_CSV, 3);
305 }
306
307 @Test
308 public void testCSV141Excel() throws Exception {
309 testCSV141Ok(CSVFormat.EXCEL);
310 }
311
312 private void testCSV141Failure(final CSVFormat format, final int failParseRecordNo) throws IOException {
313 final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv");
314 try (final CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) {
315
316 CSVRecord record = parse(parser, failParseRecordNo);
317 if (record == null) {
318 return;
319 }
320 assertEquals("1414770317901", record.get(0));
321 assertEquals("android.widget.EditText", record.get(1));
322 assertEquals("pass sem1 _84*|*", record.get(2));
323 assertEquals("0", record.get(3));
324 assertEquals("pass sem1 _8", record.get(4));
325 assertEquals(5, record.size());
326
327 record = parse(parser, failParseRecordNo);
328 if (record == null) {
329 return;
330 }
331 assertEquals("1414770318470", record.get(0));
332 assertEquals("android.widget.EditText", record.get(1));
333 assertEquals("pass sem1 _84:|", record.get(2));
334 assertEquals("0", record.get(3));
335 assertEquals("pass sem1 _84:\\", record.get(4));
336 assertEquals(5, record.size());
337
338 assertThrows(IOException.class, () -> parser.nextRecord());
339 }
340 }
341
342 private void testCSV141Ok(final CSVFormat format) throws IOException {
343 final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv");
344 try (final CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) {
345
346 CSVRecord record = parser.nextRecord();
347 assertEquals("1414770317901", record.get(0));
348 assertEquals("android.widget.EditText", record.get(1));
349 assertEquals("pass sem1 _84*|*", record.get(2));
350 assertEquals("0", record.get(3));
351 assertEquals("pass sem1 _8", record.get(4));
352 assertEquals(5, record.size());
353
354 record = parser.nextRecord();
355 assertEquals("1414770318470", record.get(0));
356 assertEquals("android.widget.EditText", record.get(1));
357 assertEquals("pass sem1 _84:|", record.get(2));
358 assertEquals("0", record.get(3));
359 assertEquals("pass sem1 _84:\\", record.get(4));
360 assertEquals(5, record.size());
361
362 record = parser.nextRecord();
363 assertEquals("1414770318327", record.get(0));
364 assertEquals("android.widget.EditText", record.get(1));
365 assertEquals("pass sem1\n1414770318628\"", record.get(2));
366 assertEquals("android.widget.EditText", record.get(3));
367 assertEquals("pass sem1 _84*|*", record.get(4));
368 assertEquals("0", record.get(5));
369 assertEquals("pass sem1\n", record.get(6));
370 assertEquals(7, record.size());
371
372 record = parser.nextRecord();
373 assertNull(record);
374 }
375 }
376
377 @Test
378 public void testCSV141RFC4180() throws Exception {
379 testCSV141Failure(CSVFormat.RFC4180, 3);
380 }
381
382 @Test
383 public void testCSV235() throws IOException {
384 final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\"";
385 try (final CSVParser parser = CSVFormat.RFC4180.parse(new StringReader(dqString))) {
386 final Iterator<CSVRecord> records = parser.iterator();
387 final CSVRecord record = records.next();
388 assertFalse(records.hasNext());
389 assertEquals(3, record.size());
390 assertEquals("aaa", record.get(0));
391 assertEquals("b\"bb", record.get(1));
392 assertEquals("ccc", record.get(2));
393 }
394 }
395
396 @Test
397 public void testCSV57() throws Exception {
398 try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
399 final List<CSVRecord> list = parser.getRecords();
400 assertNotNull(list);
401 assertEquals(0, list.size());
402 }
403 }
404
405 @Test
406 public void testDefaultFormat() throws IOException {
407
408 final String code = "" + "a,b#\n" +
409 "\"\n\",\" \",#\n" +
410 "#,\"\"\n" +
411 "# Final comment\n"
412 ;
413
414 final String[][] res = { { "a", "b#" }, { "\n", " ", "#" }, { "#", "" }, { "# Final comment" } };
415 CSVFormat format = CSVFormat.DEFAULT;
416 assertFalse(format.isCommentMarkerSet());
417 final String[][] res_comments = { { "a", "b#" }, { "\n", " ", "#" } };
418 try (final CSVParser parser = CSVParser.parse(code, format)) {
419 final List<CSVRecord> records = parser.getRecords();
420 assertFalse(records.isEmpty());
421 Utils.compare("Failed to parse without comments", res, records);
422 format = CSVFormat.DEFAULT.withCommentMarker('#');
423 }
424 try (final CSVParser parser = CSVParser.parse(code, format)) {
425 final List<CSVRecord> records = parser.getRecords();
426 Utils.compare("Failed to parse with comments", res_comments, records);
427 }
428 }
429
430 @Test
431 public void testDuplicateHeadersAllowedByDefault() throws Exception {
432 try (CSVParser parser = CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader())) {
433
434 }
435 }
436
437 @Test
438 public void testDuplicateHeadersNotAllowed() {
439 assertThrows(IllegalArgumentException.class,
440 () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false)));
441 }
442
443 @Test
444 public void testEmptyFile() throws Exception {
445 try (final CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8,
446 CSVFormat.DEFAULT)) {
447 assertNull(parser.nextRecord());
448 }
449 }
450
451 @Test
452 public void testEmptyFileHeaderParsing() throws Exception {
453 try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) {
454 assertNull(parser.nextRecord());
455 assertTrue(parser.getHeaderNames().isEmpty());
456 }
457 }
458
459 @Test
460 public void testEmptyLineBehaviorCSV() throws Exception {
461 final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
462 final String[][] res = { { "hello", "" }
463 };
464 for (final String code : codes) {
465 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
466 final List<CSVRecord> records = parser.getRecords();
467 assertEquals(res.length, records.size());
468 assertFalse(records.isEmpty());
469 for (int i = 0; i < res.length; i++) {
470 assertArrayEquals(res[i], records.get(i).values());
471 }
472 }
473 }
474 }
475
476 @Test
477 public void testEmptyLineBehaviorExcel() throws Exception {
478 final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
479 final String[][] res = { { "hello", "" }, { "" },
480 { "" } };
481 for (final String code : codes) {
482 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
483 final List<CSVRecord> records = parser.getRecords();
484 assertEquals(res.length, records.size());
485 assertFalse(records.isEmpty());
486 for (int i = 0; i < res.length; i++) {
487 assertArrayEquals(res[i], records.get(i).values());
488 }
489 }
490 }
491 }
492
493 @Test
494 public void testEmptyString() throws Exception {
495 try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
496 assertNull(parser.nextRecord());
497 }
498 }
499
500 @Test
501 public void testEndOfFileBehaviorCSV() throws Exception {
502 final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"",
503 "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" };
504 final String[][] res = { { "hello", "" },
505 { "world", "" } };
506 for (final String code : codes) {
507 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
508 final List<CSVRecord> records = parser.getRecords();
509 assertEquals(res.length, records.size());
510 assertFalse(records.isEmpty());
511 for (int i = 0; i < res.length; i++) {
512 assertArrayEquals(res[i], records.get(i).values());
513 }
514 }
515 }
516 }
517
518 @Test
519 public void testEndOfFileBehaviorExcel() throws Exception {
520 final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"",
521 "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" };
522 final String[][] res = { { "hello", "" }, { "" },
523 { "world", "" } };
524
525 for (final String code : codes) {
526 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
527 final List<CSVRecord> records = parser.getRecords();
528 assertEquals(res.length, records.size());
529 assertFalse(records.isEmpty());
530 for (int i = 0; i < res.length; i++) {
531 assertArrayEquals(res[i], records.get(i).values());
532 }
533 }
534 }
535 }
536
537 @Test
538 public void testExcelFormat1() throws IOException {
539 final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
540 final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" }, { " x", "", "", "" }, { "" },
541 { "\"hello\"", " \"world\"", "abc\ndef", "" } };
542 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
543 final List<CSVRecord> records = parser.getRecords();
544 assertEquals(res.length, records.size());
545 assertFalse(records.isEmpty());
546 for (int i = 0; i < res.length; i++) {
547 assertArrayEquals(res[i], records.get(i).values());
548 }
549 }
550 }
551
552 @Test
553 public void testExcelFormat2() throws Exception {
554 final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
555 final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } };
556 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
557 final List<CSVRecord> records = parser.getRecords();
558 assertEquals(res.length, records.size());
559 assertFalse(records.isEmpty());
560 for (int i = 0; i < res.length; i++) {
561 assertArrayEquals(res[i], records.get(i).values());
562 }
563 }
564 }
565
566
567
568
569 @Test
570 public void testExcelHeaderCountLessThanData() throws Exception {
571 final String code = "A,B,C,,\r\na,b,c,d,e\r\n";
572 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) {
573 parser.getRecords().forEach(record -> {
574 assertEquals("a", record.get("A"));
575 assertEquals("b", record.get("B"));
576 assertEquals("c", record.get("C"));
577 });
578 }
579 }
580
581 @Test
582 public void testFirstEndOfLineCr() throws IOException {
583 final String data = "foo\rbaar,\rhello,world\r,kanu";
584 try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
585 final List<CSVRecord> records = parser.getRecords();
586 assertEquals(4, records.size());
587 assertEquals("\r", parser.getFirstEndOfLine());
588 }
589 }
590
591 @Test
592 public void testFirstEndOfLineCrLf() throws IOException {
593 final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
594 try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
595 final List<CSVRecord> records = parser.getRecords();
596 assertEquals(4, records.size());
597 assertEquals("\r\n", parser.getFirstEndOfLine());
598 }
599 }
600
601 @Test
602 public void testFirstEndOfLineLf() throws IOException {
603 final String data = "foo\nbaar,\nhello,world\n,kanu";
604 try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
605 final List<CSVRecord> records = parser.getRecords();
606 assertEquals(4, records.size());
607 assertEquals("\n", parser.getFirstEndOfLine());
608 }
609 }
610
611 @Test
612 public void testForEach() throws Exception {
613 try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
614 final CSVParser parser = CSVFormat.DEFAULT.parse(in)) {
615 final List<CSVRecord> records = new ArrayList<>();
616 for (final CSVRecord record : parser) {
617 records.add(record);
618 }
619 assertEquals(3, records.size());
620 assertArrayEquals(new String[] { "a", "b", "c" }, records.get(0).values());
621 assertArrayEquals(new String[] { "1", "2", "3" }, records.get(1).values());
622 assertArrayEquals(new String[] { "x", "y", "z" }, records.get(2).values());
623 }
624 }
625
626 @Test
627 public void testGetHeaderComment_HeaderComment1() throws IOException {
628 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
629 parser.getRecords();
630
631 assertTrue(parser.hasHeaderComment());
632 assertEquals("header comment", parser.getHeaderComment());
633 }
634 }
635
636 @Test
637 public void testGetHeaderComment_HeaderComment2() throws IOException {
638 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
639 parser.getRecords();
640
641 assertTrue(parser.hasHeaderComment());
642 assertEquals("header comment", parser.getHeaderComment());
643 }
644 }
645
646 @Test
647 public void testGetHeaderComment_HeaderComment3() throws IOException {
648 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
649 parser.getRecords();
650
651 assertFalse(parser.hasHeaderComment());
652 assertNull(parser.getHeaderComment());
653 }
654 }
655
656 @Test
657 public void testGetHeaderComment_HeaderTrailerComment() throws IOException {
658 try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
659 parser.getRecords();
660
661 assertTrue(parser.hasHeaderComment());
662 assertEquals("multi-line" + LF + "header comment", parser.getHeaderComment());
663 }
664 }
665
666 @Test
667 public void testGetHeaderComment_NoComment1() throws IOException {
668 try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) {
669 parser.getRecords();
670
671 assertFalse(parser.hasHeaderComment());
672 assertNull(parser.getHeaderComment());
673 }
674 }
675
676 @Test
677 public void testGetHeaderComment_NoComment2() throws IOException {
678 try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) {
679 parser.getRecords();
680
681 assertFalse(parser.hasHeaderComment());
682 assertNull(parser.getHeaderComment());
683 }
684 }
685
686 @Test
687 public void testGetHeaderComment_NoComment3() throws IOException {
688 try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
689 parser.getRecords();
690
691 assertFalse(parser.hasHeaderComment());
692 assertNull(parser.getHeaderComment());
693 }
694 }
695
696 @Test
697 public void testGetHeaderMap() throws Exception {
698 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
699 final Map<String, Integer> headerMap = parser.getHeaderMap();
700 final Iterator<String> columnNames = headerMap.keySet().iterator();
701
702 assertEquals("A", columnNames.next());
703 assertEquals("B", columnNames.next());
704 assertEquals("C", columnNames.next());
705 final Iterator<CSVRecord> records = parser.iterator();
706
707
708 for (int i = 0; i < 3; i++) {
709 assertTrue(records.hasNext());
710 final CSVRecord record = records.next();
711 assertEquals(record.get(0), record.get("A"));
712 assertEquals(record.get(1), record.get("B"));
713 assertEquals(record.get(2), record.get("C"));
714 }
715
716 assertFalse(records.hasNext());
717 }
718 }
719
720 @Test
721 public void testGetHeaderNames() throws IOException {
722 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
723 final Map<String, Integer> nameIndexMap = parser.getHeaderMap();
724 final List<String> headerNames = parser.getHeaderNames();
725 assertNotNull(headerNames);
726 assertEquals(nameIndexMap.size(), headerNames.size());
727 for (int i = 0; i < headerNames.size(); i++) {
728 final String name = headerNames.get(i);
729 assertEquals(i, nameIndexMap.get(name).intValue());
730 }
731 }
732 }
733
734 @Test
735 public void testGetHeaderNamesReadOnly() throws IOException {
736 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
737 final List<String> headerNames = parser.getHeaderNames();
738 assertNotNull(headerNames);
739 assertThrows(UnsupportedOperationException.class, () -> headerNames.add("This is a read-only list."));
740 }
741 }
742
743 @Test
744 public void testGetLine() throws IOException {
745 try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
746 for (final String[] re : RESULT) {
747 assertArrayEquals(re, parser.nextRecord().values());
748 }
749
750 assertNull(parser.nextRecord());
751 }
752 }
753
754 @Test
755 public void testGetLineNumberWithCR() throws Exception {
756 validateLineNumbers(String.valueOf(CR));
757 }
758
759 @Test
760 public void testGetLineNumberWithCRLF() throws Exception {
761 validateLineNumbers(CRLF);
762 }
763
764 @Test
765 public void testGetLineNumberWithLF() throws Exception {
766 validateLineNumbers(String.valueOf(LF));
767 }
768
769 @Test
770 public void testGetOneLine() throws IOException {
771 try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) {
772 final CSVRecord record = parser.getRecords().get(0);
773 assertArrayEquals(RESULT[0], record.values());
774 }
775 }
776
777
778
779
780
781
782 @Test
783 public void testGetOneLineOneParser() throws IOException {
784 final CSVFormat format = CSVFormat.DEFAULT;
785 try (final PipedWriter writer = new PipedWriter();
786 final CSVParser parser = new CSVParser(new PipedReader(writer), format)) {
787 writer.append(CSV_INPUT_1);
788 writer.append(format.getRecordSeparator());
789 final CSVRecord record1 = parser.nextRecord();
790 assertArrayEquals(RESULT[0], record1.values());
791 writer.append(CSV_INPUT_2);
792 writer.append(format.getRecordSeparator());
793 final CSVRecord record2 = parser.nextRecord();
794 assertArrayEquals(RESULT[1], record2.values());
795 }
796 }
797
798 @Test
799 public void testGetRecordNumberWithCR() throws Exception {
800 validateRecordNumbers(String.valueOf(CR));
801 }
802
803 @Test
804 public void testGetRecordNumberWithCRLF() throws Exception {
805 validateRecordNumbers(CRLF);
806 }
807
808 @Test
809 public void testGetRecordNumberWithLF() throws Exception {
810 validateRecordNumbers(String.valueOf(LF));
811 }
812
813 @Test
814 public void testGetRecordPositionWithCRLF() throws Exception {
815 validateRecordPosition(CRLF);
816 }
817
818 @Test
819 public void testGetRecordPositionWithLF() throws Exception {
820 validateRecordPosition(String.valueOf(LF));
821 }
822
823 @Test
824 public void testGetRecords() throws IOException {
825 try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
826 final List<CSVRecord> records = parser.getRecords();
827 assertEquals(RESULT.length, records.size());
828 assertFalse(records.isEmpty());
829 for (int i = 0; i < RESULT.length; i++) {
830 assertArrayEquals(RESULT[i], records.get(i).values());
831 }
832 }
833 }
834
835 @Test
836 public void testGetRecordsFromBrokenInputStream() throws IOException {
837 @SuppressWarnings("resource")
838 final CSVParser parser = CSVParser.parse(new BrokenInputStream(), UTF_8, CSVFormat.DEFAULT);
839 assertThrows(UncheckedIOException.class, parser::getRecords);
840
841 }
842
843 @Test
844 public void testGetRecordWithMultiLineValues() throws Exception {
845 try (final CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",
846 CSVFormat.DEFAULT.withRecordSeparator(CRLF))) {
847 CSVRecord record;
848 assertEquals(0, parser.getRecordNumber());
849 assertEquals(0, parser.getCurrentLineNumber());
850 assertNotNull(record = parser.nextRecord());
851 assertEquals(3, parser.getCurrentLineNumber());
852 assertEquals(1, record.getRecordNumber());
853 assertEquals(1, parser.getRecordNumber());
854 assertNotNull(record = parser.nextRecord());
855 assertEquals(6, parser.getCurrentLineNumber());
856 assertEquals(2, record.getRecordNumber());
857 assertEquals(2, parser.getRecordNumber());
858 assertNotNull(record = parser.nextRecord());
859 assertEquals(9, parser.getCurrentLineNumber());
860 assertEquals(3, record.getRecordNumber());
861 assertEquals(3, parser.getRecordNumber());
862 assertNull(record = parser.nextRecord());
863 assertEquals(9, parser.getCurrentLineNumber());
864 assertEquals(3, parser.getRecordNumber());
865 }
866 }
867
868 @Test
869 public void testGetTrailerComment_HeaderComment1() throws IOException {
870 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
871 parser.getRecords();
872 assertFalse(parser.hasTrailerComment());
873 assertNull(parser.getTrailerComment());
874 }
875 }
876
877 @Test
878 public void testGetTrailerComment_HeaderComment2() throws IOException {
879 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
880 parser.getRecords();
881 assertFalse(parser.hasTrailerComment());
882 assertNull(parser.getTrailerComment());
883 }
884 }
885
886 @Test
887 public void testGetTrailerComment_HeaderComment3() throws IOException {
888 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
889 parser.getRecords();
890 assertFalse(parser.hasTrailerComment());
891 assertNull(parser.getTrailerComment());
892 }
893 }
894
895 @Test
896 public void testGetTrailerComment_HeaderTrailerComment1() throws IOException {
897 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
898 parser.getRecords();
899 assertTrue(parser.hasTrailerComment());
900 assertEquals("comment", parser.getTrailerComment());
901 }
902 }
903
904 @Test
905 public void testGetTrailerComment_HeaderTrailerComment2() throws IOException {
906 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
907 parser.getRecords();
908 assertTrue(parser.hasTrailerComment());
909 assertEquals("comment", parser.getTrailerComment());
910 }
911 }
912
913 @Test
914 public void testGetTrailerComment_HeaderTrailerComment3() throws IOException {
915 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
916 parser.getRecords();
917 assertTrue(parser.hasTrailerComment());
918 assertEquals("comment", parser.getTrailerComment());
919 }
920 }
921
922 @Test
923 public void testGetTrailerComment_MultilineComment() throws IOException {
924 try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
925 parser.getRecords();
926 assertTrue(parser.hasTrailerComment());
927 assertEquals("multi-line" + LF + "comment", parser.getTrailerComment());
928 }
929 }
930
931 @Test
932 public void testHeader() throws Exception {
933 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
934
935 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) {
936 final Iterator<CSVRecord> records = parser.iterator();
937
938 for (int i = 0; i < 2; i++) {
939 assertTrue(records.hasNext());
940 final CSVRecord record = records.next();
941 assertEquals(record.get(0), record.get("a"));
942 assertEquals(record.get(1), record.get("b"));
943 assertEquals(record.get(2), record.get("c"));
944 }
945
946 assertFalse(records.hasNext());
947 }
948 }
949
950 @Test
951 public void testHeaderComment() throws Exception {
952 final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
953 try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) {
954 final Iterator<CSVRecord> records = parser.iterator();
955 for (int i = 0; i < 2; i++) {
956 assertTrue(records.hasNext());
957 final CSVRecord record = records.next();
958 assertEquals(record.get(0), record.get("a"));
959 assertEquals(record.get(1), record.get("b"));
960 assertEquals(record.get(2), record.get("c"));
961 }
962 assertFalse(records.hasNext());
963 }
964 }
965
966 @Test
967 public void testHeaderMissing() throws Exception {
968 final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z");
969 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) {
970 final Iterator<CSVRecord> records = parser.iterator();
971 for (int i = 0; i < 2; i++) {
972 assertTrue(records.hasNext());
973 final CSVRecord record = records.next();
974 assertEquals(record.get(0), record.get("a"));
975 assertEquals(record.get(2), record.get("c"));
976 }
977 assertFalse(records.hasNext());
978 }
979 }
980
981 @Test
982 public void testHeaderMissingWithNull() throws Exception {
983 final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
984 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in)) {
985 parser.iterator();
986 }
987 }
988
989 @Test
990 public void testHeadersMissing() throws Exception {
991 try (final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
992 final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) {
993 parser.iterator();
994 }
995 }
996
997 @Test
998 public void testHeadersMissingException() {
999 final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
1000 assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator());
1001 }
1002
1003 @Test
1004 public void testHeadersMissingOneColumnException() {
1005 final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z");
1006 assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator());
1007 }
1008
1009 @Test
1010 public void testHeadersWithNullColumnName() throws IOException {
1011 final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6");
1012 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("null").withAllowMissingColumnNames().parse(in)) {
1013 final Iterator<CSVRecord> records = parser.iterator();
1014 final CSVRecord record = records.next();
1015
1016 @SuppressWarnings("resource")
1017 final CSVParser recordParser = record.getParser();
1018 assertEquals(Arrays.asList("header1", "header3"), recordParser.getHeaderNames());
1019 assertEquals(2, recordParser.getHeaderMap().size());
1020 }
1021 }
1022
1023 @Test
1024 public void testIgnoreCaseHeaderMapping() throws Exception {
1025 final Reader reader = new StringReader("1,2,3");
1026 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase().parse(reader)) {
1027 final Iterator<CSVRecord> records = parser.iterator();
1028 final CSVRecord record = records.next();
1029 assertEquals("1", record.get("one"));
1030 assertEquals("2", record.get("two"));
1031 assertEquals("3", record.get("THREE"));
1032 }
1033 }
1034
1035 @Test
1036 public void testIgnoreEmptyLines() throws IOException {
1037 final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
1038
1039
1040 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
1041 final List<CSVRecord> records = parser.getRecords();
1042 assertEquals(3, records.size());
1043 }
1044 }
1045
1046 @Test
1047 public void testInvalidFormat() {
1048 assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR));
1049 }
1050
1051 @Test
1052 public void testIterator() throws Exception {
1053 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1054 try (final CSVParser parser = CSVFormat.DEFAULT.parse(in)) {
1055 final Iterator<CSVRecord> iterator = parser.iterator();
1056 assertTrue(iterator.hasNext());
1057 assertThrows(UnsupportedOperationException.class, iterator::remove);
1058 assertArrayEquals(new String[] { "a", "b", "c" }, iterator.next().values());
1059 assertArrayEquals(new String[] { "1", "2", "3" }, iterator.next().values());
1060 assertTrue(iterator.hasNext());
1061 assertTrue(iterator.hasNext());
1062 assertTrue(iterator.hasNext());
1063 assertArrayEquals(new String[] { "x", "y", "z" }, iterator.next().values());
1064 assertFalse(iterator.hasNext());
1065 assertThrows(NoSuchElementException.class, iterator::next);
1066 }
1067 }
1068
1069 @Test
1070 public void testIteratorSequenceBreaking() throws IOException {
1071 final String fiveRows = "1\n2\n3\n4\n5\n";
1072
1073 try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
1074 final Iterator<CSVRecord> iter = parser.iterator();
1075 int recordNumber = 0;
1076 while (iter.hasNext()) {
1077 final CSVRecord record = iter.next();
1078 recordNumber++;
1079 assertEquals(String.valueOf(recordNumber), record.get(0));
1080 if (recordNumber >= 2) {
1081 break;
1082 }
1083 }
1084 iter.hasNext();
1085 while (iter.hasNext()) {
1086 final CSVRecord record = iter.next();
1087 recordNumber++;
1088 assertEquals(String.valueOf(recordNumber), record.get(0));
1089 }
1090 }
1091
1092 try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
1093 int recordNumber = 0;
1094 for (final CSVRecord record : parser) {
1095 recordNumber++;
1096 assertEquals(String.valueOf(recordNumber), record.get(0));
1097 if (recordNumber >= 2) {
1098 break;
1099 }
1100 }
1101 for (final CSVRecord record : parser) {
1102 recordNumber++;
1103 assertEquals(String.valueOf(recordNumber), record.get(0));
1104 }
1105 }
1106
1107 try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
1108 int recordNumber = 0;
1109 for (final CSVRecord record : parser) {
1110 recordNumber++;
1111 assertEquals(String.valueOf(recordNumber), record.get(0));
1112 if (recordNumber >= 2) {
1113 break;
1114 }
1115 }
1116 parser.iterator().hasNext();
1117 for (final CSVRecord record : parser) {
1118 recordNumber++;
1119 assertEquals(String.valueOf(recordNumber), record.get(0));
1120 }
1121 }
1122 }
1123
1124 @Test
1125 public void testLineFeedEndings() throws IOException {
1126 final String code = "foo\nbaar,\nhello,world\n,kanu";
1127 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
1128 final List<CSVRecord> records = parser.getRecords();
1129 assertEquals(4, records.size());
1130 }
1131 }
1132
1133 @Test
1134 public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
1135 final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
1136 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)) {
1137 final Iterator<CSVRecord> records = parser.iterator();
1138 CSVRecord record;
1139
1140 record = records.next();
1141 assertTrue(record.isMapped("A"));
1142 assertTrue(record.isMapped("B"));
1143 assertTrue(record.isMapped("C"));
1144 assertTrue(record.isSet("A"));
1145 assertTrue(record.isSet("B"));
1146 assertFalse(record.isSet("C"));
1147 assertEquals("1", record.get("A"));
1148 assertEquals("2", record.get("B"));
1149 assertFalse(record.isConsistent());
1150
1151 record = records.next();
1152 assertTrue(record.isMapped("A"));
1153 assertTrue(record.isMapped("B"));
1154 assertTrue(record.isMapped("C"));
1155 assertTrue(record.isSet("A"));
1156 assertTrue(record.isSet("B"));
1157 assertTrue(record.isSet("C"));
1158 assertEquals("x", record.get("A"));
1159 assertEquals("y", record.get("B"));
1160 assertEquals("z", record.get("C"));
1161 assertTrue(record.isConsistent());
1162
1163 assertFalse(records.hasNext());
1164 }
1165 }
1166
1167 @Test
1168 @Disabled
1169 public void testMongoDbCsv() throws Exception {
1170 try (final CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) {
1171 final Iterator<CSVRecord> itr1 = parser.iterator();
1172 final Iterator<CSVRecord> itr2 = parser.iterator();
1173
1174 final CSVRecord first = itr1.next();
1175 assertEquals("a a", first.get(0));
1176 assertEquals("b", first.get(1));
1177 assertEquals("c", first.get(2));
1178
1179 final CSVRecord second = itr2.next();
1180 assertEquals("d", second.get(0));
1181 assertEquals("e", second.get(1));
1182 assertEquals("f", second.get(2));
1183 }
1184 }
1185
1186 @Test
1187
1188 public void testMultipleIterators() throws Exception {
1189 try (final CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) {
1190 final Iterator<CSVRecord> itr1 = parser.iterator();
1191
1192 final CSVRecord first = itr1.next();
1193 assertEquals("a", first.get(0));
1194 assertEquals("b", first.get(1));
1195 assertEquals("c", first.get(2));
1196
1197 final CSVRecord second = itr1.next();
1198 assertEquals("d", second.get(0));
1199 assertEquals("e", second.get(1));
1200 assertEquals("f", second.get(2));
1201 }
1202 }
1203
1204 @Test
1205 public void testNewCSVParserNullReaderFormat() {
1206 assertThrows(NullPointerException.class, () -> new CSVParser(null, CSVFormat.DEFAULT));
1207 }
1208
1209 @Test
1210 public void testNewCSVParserReaderNullFormat() {
1211 assertThrows(NullPointerException.class, () -> new CSVParser(new StringReader(""), null));
1212 }
1213
1214 @Test
1215 public void testNoHeaderMap() throws Exception {
1216 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) {
1217 assertNull(parser.getHeaderMap());
1218 }
1219 }
1220
1221 @Test
1222 public void testNotValueCSV() throws IOException {
1223 final String source = "#";
1224 final CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#');
1225 try (final CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1226 final CSVRecord csvRecord = csvParser.nextRecord();
1227 assertNull(csvRecord);
1228 }
1229 }
1230
1231 @Test
1232 public void testParse() throws Exception {
1233 final ClassLoader loader = ClassLoader.getSystemClassLoader();
1234 final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv");
1235 final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D");
1236 final Charset charset = StandardCharsets.UTF_8;
1237
1238 try (@SuppressWarnings("resource")
1239 final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) {
1240 parseFully(parser);
1241 }
1242 try (final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) {
1243 parseFully(parser);
1244 }
1245 try (final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) {
1246 parseFully(parser);
1247 }
1248 try (@SuppressWarnings("resource")
1249 final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) {
1250 parseFully(parser);
1251 }
1252 try (final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) {
1253 parseFully(parser);
1254 }
1255 try (final CSVParser parser = CSVParser.parse(url, charset, format)) {
1256 parseFully(parser);
1257 }
1258 try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) {
1259 parseFully(parser);
1260 }
1261 try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, 0, 1)) {
1262 parseFully(parser);
1263 }
1264 }
1265
1266 @Test
1267 public void testParseFileNullFormat() {
1268 assertThrows(NullPointerException.class, () -> CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null));
1269 }
1270
1271 @Test
1272 public void testParseNullFileFormat() {
1273 assertThrows(NullPointerException.class, () -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
1274 }
1275
1276 @Test
1277 public void testParseNullPathFormat() {
1278 assertThrows(NullPointerException.class, () -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
1279 }
1280
1281 @Test
1282 public void testParseNullStringFormat() {
1283 assertThrows(NullPointerException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT));
1284 }
1285
1286 @Test
1287 public void testParseNullUrlCharsetFormat() {
1288 assertThrows(NullPointerException.class, () -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
1289 }
1290
1291 @Test
1292 public void testParserUrlNullCharsetFormat() {
1293 assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), null, CSVFormat.DEFAULT));
1294 }
1295
1296 @Test
1297 public void testParseStringNullFormat() {
1298 assertThrows(NullPointerException.class, () -> CSVParser.parse("csv data", (CSVFormat) null));
1299 }
1300
1301 @Test
1302 public void testParseUrlCharsetNullFormat() {
1303 assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), Charset.defaultCharset(), null));
1304 }
1305
1306 @Test
1307 public void testParseWithDelimiterStringWithEscape() throws IOException {
1308 final String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz";
1309 final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').build();
1310 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1311 CSVRecord csvRecord = csvParser.nextRecord();
1312 assertEquals("a[|]b![|]c", csvRecord.get(0));
1313 assertEquals("xyz", csvRecord.get(1));
1314 csvRecord = csvParser.nextRecord();
1315 assertEquals("abc[abc]", csvRecord.get(0));
1316 assertEquals("xyz", csvRecord.get(1));
1317 }
1318 }
1319
1320 @Test
1321 public void testParseWithDelimiterStringWithQuote() throws IOException {
1322 final String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz";
1323 final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build();
1324 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1325 CSVRecord csvRecord = csvParser.nextRecord();
1326 assertEquals("a[|]b[|]c", csvRecord.get(0));
1327 assertEquals("xyz", csvRecord.get(1));
1328 csvRecord = csvParser.nextRecord();
1329 assertEquals("abc[abc]", csvRecord.get(0));
1330 assertEquals("xyz", csvRecord.get(1));
1331 }
1332 }
1333
1334 @Test
1335 public void testParseWithDelimiterWithEscape() throws IOException {
1336 final String source = "a!,b!,c,xyz";
1337 final CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!');
1338 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1339 final CSVRecord csvRecord = csvParser.nextRecord();
1340 assertEquals("a,b,c", csvRecord.get(0));
1341 assertEquals("xyz", csvRecord.get(1));
1342 }
1343 }
1344
1345 @Test
1346 public void testParseWithDelimiterWithQuote() throws IOException {
1347 final String source = "'a,b,c',xyz";
1348 final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'');
1349 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1350 final CSVRecord csvRecord = csvParser.nextRecord();
1351 assertEquals("a,b,c", csvRecord.get(0));
1352 assertEquals("xyz", csvRecord.get(1));
1353 }
1354 }
1355
1356 @Test
1357 public void testParseWithQuoteThrowsException() {
1358 final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'');
1359 assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord());
1360 assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord());
1361 assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'abc'a,b,c',xyz")).nextRecord());
1362 }
1363
1364 @Test
1365 public void testParseWithQuoteWithEscape() throws IOException {
1366 final String source = "'a?,b?,c?d',xyz";
1367 final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?');
1368 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1369 final CSVRecord csvRecord = csvParser.nextRecord();
1370 assertEquals("a,b,c?d", csvRecord.get(0));
1371 assertEquals("xyz", csvRecord.get(1));
1372 }
1373 }
1374
1375 @ParameterizedTest
1376 @EnumSource(CSVFormat.Predefined.class)
1377 public void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format) throws Exception {
1378 final String[][] lines = { { "a", "b" }, { "", "x" } };
1379 final StringWriter buf = new StringWriter();
1380 try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) {
1381 printer.printRecords(Stream.of(lines));
1382 }
1383 try (CSVParser csvRecords = new CSVParser(new StringReader(buf.toString()), format.getFormat())) {
1384 for (final String[] line : lines) {
1385 assertArrayEquals(line, csvRecords.nextRecord().values());
1386 }
1387 assertNull(csvRecords.nextRecord());
1388 }
1389 }
1390
1391 @Test
1392 public void testProvidedHeader() throws Exception {
1393 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1394
1395 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in)) {
1396 final Iterator<CSVRecord> records = parser.iterator();
1397
1398 for (int i = 0; i < 3; i++) {
1399 assertTrue(records.hasNext());
1400 final CSVRecord record = records.next();
1401 assertTrue(record.isMapped("A"));
1402 assertTrue(record.isMapped("B"));
1403 assertTrue(record.isMapped("C"));
1404 assertFalse(record.isMapped("NOT MAPPED"));
1405 assertEquals(record.get(0), record.get("A"));
1406 assertEquals(record.get(1), record.get("B"));
1407 assertEquals(record.get(2), record.get("C"));
1408 }
1409
1410 assertFalse(records.hasNext());
1411 }
1412 }
1413
1414 @Test
1415 public void testProvidedHeaderAuto() throws Exception {
1416 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1417
1418 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) {
1419 final Iterator<CSVRecord> records = parser.iterator();
1420
1421 for (int i = 0; i < 2; i++) {
1422 assertTrue(records.hasNext());
1423 final CSVRecord record = records.next();
1424 assertTrue(record.isMapped("a"));
1425 assertTrue(record.isMapped("b"));
1426 assertTrue(record.isMapped("c"));
1427 assertFalse(record.isMapped("NOT MAPPED"));
1428 assertEquals(record.get(0), record.get("a"));
1429 assertEquals(record.get(1), record.get("b"));
1430 assertEquals(record.get(2), record.get("c"));
1431 }
1432
1433 assertFalse(records.hasNext());
1434 }
1435 }
1436
1437 @Test
1438 public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException {
1439 final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6");
1440 try (final CSVParser parser = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in)) {
1441 final Iterator<CSVRecord> records = parser.iterator();
1442 final CSVRecord record = records.next();
1443 @SuppressWarnings("resource")
1444 final CSVParser recordParser = record.getParser();
1445 assertEquals(Arrays.asList("header1", "header2", "header1"), recordParser.getHeaderNames());
1446 }
1447 }
1448
1449 @Test
1450 public void testRoundtrip() throws Exception {
1451 final StringWriter out = new StringWriter();
1452 final String data = "a,b,c\r\n1,2,3\r\nx,y,z\r\n";
1453 try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT);
1454 final CSVParser parse = CSVParser.parse(data, CSVFormat.DEFAULT)) {
1455 for (final CSVRecord record : parse) {
1456 printer.printRecord(record);
1457 }
1458 assertEquals(data, out.toString());
1459 }
1460 }
1461
1462 @Test
1463 public void testSkipAutoHeader() throws Exception {
1464 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1465 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) {
1466 final Iterator<CSVRecord> records = parser.iterator();
1467 final CSVRecord record = records.next();
1468 assertEquals("1", record.get("a"));
1469 assertEquals("2", record.get("b"));
1470 assertEquals("3", record.get("c"));
1471 }
1472 }
1473
1474 @Test
1475 public void testSkipHeaderOverrideDuplicateHeaders() throws Exception {
1476 final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z");
1477 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) {
1478 final Iterator<CSVRecord> records = parser.iterator();
1479 final CSVRecord record = records.next();
1480 assertEquals("1", record.get("X"));
1481 assertEquals("2", record.get("Y"));
1482 assertEquals("3", record.get("Z"));
1483 }
1484 }
1485
1486 @Test
1487 public void testSkipSetAltHeaders() throws Exception {
1488 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1489 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) {
1490 final Iterator<CSVRecord> records = parser.iterator();
1491 final CSVRecord record = records.next();
1492 assertEquals("1", record.get("X"));
1493 assertEquals("2", record.get("Y"));
1494 assertEquals("3", record.get("Z"));
1495 }
1496 }
1497
1498 @Test
1499 public void testSkipSetHeader() throws Exception {
1500 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1501 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)) {
1502 final Iterator<CSVRecord> records = parser.iterator();
1503 final CSVRecord record = records.next();
1504 assertEquals("1", record.get("a"));
1505 assertEquals("2", record.get("b"));
1506 assertEquals("3", record.get("c"));
1507 }
1508 }
1509
1510 @Test
1511 @Disabled
1512 public void testStartWithEmptyLinesThenHeaders() throws Exception {
1513 final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
1514 final String[][] res = { { "hello", "" }, { "" },
1515 { "" } };
1516 for (final String code : codes) {
1517 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
1518 final List<CSVRecord> records = parser.getRecords();
1519 assertEquals(res.length, records.size());
1520 assertFalse(records.isEmpty());
1521 for (int i = 0; i < res.length; i++) {
1522 assertArrayEquals(res[i], records.get(i).values());
1523 }
1524 }
1525 }
1526 }
1527
1528 @Test
1529 public void testStream() throws Exception {
1530 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1531 try (final CSVParser parser = CSVFormat.DEFAULT.parse(in)) {
1532 final List<CSVRecord> list = parser.stream().collect(Collectors.toList());
1533 assertFalse(list.isEmpty());
1534 assertArrayEquals(new String[] { "a", "b", "c" }, list.get(0).values());
1535 assertArrayEquals(new String[] { "1", "2", "3" }, list.get(1).values());
1536 assertArrayEquals(new String[] { "x", "y", "z" }, list.get(2).values());
1537 }
1538 }
1539
1540 @Test
1541 public void testThrowExceptionWithLineAndPosition() throws IOException {
1542 final String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\nrec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10";
1543 final StringReader stringReader = new StringReader(csvContent);
1544
1545 final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
1546 .setHeader()
1547 .setSkipHeaderRecord(true)
1548 .build();
1549
1550 try (CSVParser csvParser = csvFormat.parse(stringReader)) {
1551 final UncheckedIOException exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
1552 assertInstanceOf(CSVException.class, exception.getCause());
1553 assertTrue(exception.getMessage().contains("Invalid character between encapsulated token and delimiter at line: 2, position: 94"),
1554 exception::getMessage);
1555 }
1556 }
1557
1558 @Test
1559 public void testTrailingDelimiter() throws Exception {
1560 final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,");
1561 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrailingDelimiter().parse(in)) {
1562 final Iterator<CSVRecord> records = parser.iterator();
1563 final CSVRecord record = records.next();
1564 assertEquals("1", record.get("X"));
1565 assertEquals("2", record.get("Y"));
1566 assertEquals("3", record.get("Z"));
1567 assertEquals(3, record.size());
1568 }
1569 }
1570
1571 @Test
1572 public void testTrim() throws Exception {
1573 final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z");
1574 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrim().parse(in)) {
1575 final Iterator<CSVRecord> records = parser.iterator();
1576 final CSVRecord record = records.next();
1577 assertEquals("1", record.get("X"));
1578 assertEquals("2", record.get("Y"));
1579 assertEquals("3", record.get("Z"));
1580 assertEquals(3, record.size());
1581 }
1582 }
1583
1584 private void validateLineNumbers(final String lineSeparator) throws IOException {
1585 try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
1586 assertEquals(0, parser.getCurrentLineNumber());
1587 assertNotNull(parser.nextRecord());
1588 assertEquals(1, parser.getCurrentLineNumber());
1589 assertNotNull(parser.nextRecord());
1590 assertEquals(2, parser.getCurrentLineNumber());
1591 assertNotNull(parser.nextRecord());
1592
1593 assertEquals(3, parser.getCurrentLineNumber());
1594 assertNull(parser.nextRecord());
1595
1596 assertEquals(3, parser.getCurrentLineNumber());
1597 }
1598 }
1599
1600 private void validateRecordNumbers(final String lineSeparator) throws IOException {
1601 try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
1602 CSVRecord record;
1603 assertEquals(0, parser.getRecordNumber());
1604 assertNotNull(record = parser.nextRecord());
1605 assertEquals(1, record.getRecordNumber());
1606 assertEquals(1, parser.getRecordNumber());
1607 assertNotNull(record = parser.nextRecord());
1608 assertEquals(2, record.getRecordNumber());
1609 assertEquals(2, parser.getRecordNumber());
1610 assertNotNull(record = parser.nextRecord());
1611 assertEquals(3, record.getRecordNumber());
1612 assertEquals(3, parser.getRecordNumber());
1613 assertNull(record = parser.nextRecord());
1614 assertEquals(3, parser.getRecordNumber());
1615 }
1616 }
1617
1618 private void validateRecordPosition(final String lineSeparator) throws IOException {
1619 final String nl = lineSeparator;
1620 final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator +
1621
1622 "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator +
1623
1624
1625 "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF";
1626 final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator);
1627 final long positionRecord3;
1628 try (CSVParser parser = CSVParser.parse(code, format)) {
1629 CSVRecord record;
1630 assertEquals(0, parser.getRecordNumber());
1631
1632 assertNotNull(record = parser.nextRecord());
1633 assertEquals(1, record.getRecordNumber());
1634 assertEquals(code.indexOf('a'), record.getCharacterPosition());
1635
1636 assertNotNull(record = parser.nextRecord());
1637 assertEquals(2, record.getRecordNumber());
1638 assertEquals(code.indexOf('1'), record.getCharacterPosition());
1639
1640 assertNotNull(record = parser.nextRecord());
1641 positionRecord3 = record.getCharacterPosition();
1642 assertEquals(3, record.getRecordNumber());
1643 assertEquals(code.indexOf("'A"), record.getCharacterPosition());
1644 assertEquals("A" + lineSeparator + "A", record.get(0));
1645 assertEquals("B" + lineSeparator + "B", record.get(1));
1646 assertEquals("CC", record.get(2));
1647
1648 assertNotNull(record = parser.nextRecord());
1649 assertEquals(4, record.getRecordNumber());
1650 assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
1651
1652 assertNotNull(record = parser.nextRecord());
1653 assertEquals(5, record.getRecordNumber());
1654 assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
1655 }
1656
1657 try (CSVParser parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3)) {
1658 CSVRecord record;
1659
1660 assertNotNull(record = parser.nextRecord());
1661 assertEquals(3, record.getRecordNumber());
1662 assertEquals(code.indexOf("'A"), record.getCharacterPosition());
1663 assertEquals("A" + lineSeparator + "A", record.get(0));
1664 assertEquals("B" + lineSeparator + "B", record.get(1));
1665 assertEquals("CC", record.get(2));
1666
1667 assertNotNull(record = parser.nextRecord());
1668 assertEquals(4, record.getRecordNumber());
1669 assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
1670 assertEquals("\u00c4", record.get(0));
1671 }
1672 }
1673 }