Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020package org.apache.commons.csv;
021
022import java.io.Serializable;
023import java.util.Arrays;
024import java.util.Iterator;
025import java.util.LinkedHashMap;
026import java.util.List;
027import java.util.Map;
028import java.util.stream.Collectors;
029import java.util.stream.Stream;
030
031/**
032 * A CSV record parsed from a CSV file.
033 *
034 * <p>
035 * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
036 * In version 1.8 the mapping between the column header and the column index was
037 * removed from the serialized state. The class maintains serialization compatibility
038 * with versions pre-1.8 for the record values; these must be accessed by index
039 * following deserialization. There will be a loss of any functionally linked to the header
040 * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
041 * </p>
042 */
043public final class CSVRecord implements Serializable, Iterable<String> {
044
045    private static final long serialVersionUID = 1L;
046
047    /**
048     * The start position of this record as a character position in the source stream. This may or may not correspond to the byte position depending on the
049     * character set.
050     */
051    private final long characterPosition;
052
053    /**
054     * The starting position of this record in the source stream, measured in bytes.
055     */
056    private final long bytePosition;
057
058    /** The accumulated comments (if any) */
059    private final String comment;
060
061    /** The record number. */
062    private final long recordNumber;
063
064    /** The values of the record */
065    private final String[] values;
066
067    /** The parser that originates this record. This is not serialized. */
068    private final transient CSVParser parser;
069
070    CSVRecord(final CSVParser parser, final String[] values,  final String comment, final long recordNumber,
071            final long characterPosition, final long bytePosition) {
072        this.recordNumber = recordNumber;
073        this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY;
074        this.parser = parser;
075        this.comment = comment;
076        this.characterPosition = characterPosition;
077        this.bytePosition = bytePosition;
078    }
079
080    /**
081     * Returns a value by {@link Enum}.
082     *
083     * @param e
084     *            an enum
085     * @return the String at the given enum String
086     */
087    public String get(final Enum<?> e) {
088        return get(e == null ? null : e.name());
089    }
090
091    /**
092     * Returns a value by index.
093     *
094     * @param i
095     *            a column index (0-based)
096     * @return the String at the given index
097     */
098    public String get(final int i) {
099        return values[i];
100    }
101
102    /**
103     * Returns a value by name. If multiple instances of the header name exists, only the last occurrence is returned.
104     *
105     * <p>
106     * Note: This requires a field mapping obtained from the original parser.
107     * A check using {@link #isMapped(String)} should be used to determine if a
108     * mapping exists from the provided {@code name} to a field index. In this case an
109     * exception will only be thrown if the record does not contain a field corresponding
110     * to the mapping, that is the record length is not consistent with the mapping size.
111     * </p>
112     *
113     * @param name
114     *            the name of the column to be retrieved.
115     * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
116     * @throws IllegalStateException
117     *             if no header mapping was provided
118     * @throws IllegalArgumentException
119     *             if {@code name} is not mapped or if the record is inconsistent
120     * @see #isMapped(String)
121     * @see #isConsistent()
122     * @see #getParser()
123     * @see CSVFormat.Builder#setNullString(String)
124     */
125    public String get(final String name) {
126        final Map<String, Integer> headerMap = getHeaderMapRaw();
127        if (headerMap == null) {
128            throw new IllegalStateException(
129                "No header mapping was specified, the record values can't be accessed by name");
130        }
131        final Integer index = headerMap.get(name);
132        if (index == null) {
133            throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
134                headerMap.keySet()));
135        }
136        try {
137            return values[index.intValue()];  // Explicit (un)boxing is intentional
138        } catch (final ArrayIndexOutOfBoundsException e) {
139            throw new IllegalArgumentException(String.format(
140                "Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
141                Integer.valueOf(values.length)));  // Explicit (un)boxing is intentional
142        }
143    }
144
145    /**
146     * Returns the starting position of this record in the source stream, measured in bytes.
147     *
148     * @return the byte position of this record in the source stream.
149     * @since 1.13.0
150     */
151    public long getBytePosition() {
152        return bytePosition;
153    }
154
155    /**
156     * Returns the start position of this record as a character position in the source stream. This may or may not
157     * correspond to the byte position depending on the character set.
158     *
159     * @return the position of this record in the source stream.
160     */
161    public long getCharacterPosition() {
162        return characterPosition;
163    }
164
165    /**
166     * Returns the comment for this record, if any.
167     * Note that comments are attached to the following record.
168     * If there is no following record (that is, the comment is at EOF),
169     * then the comment will be ignored.
170     *
171     * @return the comment for this record, or null if no comment for this record is available.
172     */
173    public String getComment() {
174        return comment;
175    }
176
177    private Map<String, Integer> getHeaderMapRaw() {
178        return parser == null ? null : parser.getHeaderMapRaw();
179    }
180
181    /**
182     * Returns the parser.
183     *
184     * <p>
185     * Note: The parser is not part of the serialized state of the record. A null check
186     * should be used when the record may have originated from a serialized form.
187     * </p>
188     *
189     * @return the parser.
190     * @since 1.7
191     */
192    public CSVParser getParser() {
193        return parser;
194    }
195
196    /**
197     * Returns the number of this record in the parsed CSV file.
198     *
199     * <p>
200     * <strong>NOTE:</strong>If your CSV input has multi-line values, the returned number does not correspond to
201     * the current line number of the parser that created this record.
202     * </p>
203     *
204     * @return the number of this record.
205     * @see CSVParser#getCurrentLineNumber()
206     */
207    public long getRecordNumber() {
208        return recordNumber;
209    }
210
211    /**
212     * Checks whether this record has a comment, false otherwise.
213     * Note that comments are attached to the following record.
214     * If there is no following record (that is, the comment is at EOF),
215     * then the comment will be ignored.
216     *
217     * @return true if this record has a comment, false otherwise
218     * @since 1.3
219     */
220    public boolean hasComment() {
221        return comment != null;
222    }
223
224    /**
225     * Tells whether the record size matches the header size.
226     *
227     * <p>
228     * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
229     * test but still produce parsable files.
230     * </p>
231     *
232     * @return true of this record is valid, false if not
233     */
234    public boolean isConsistent() {
235        final Map<String, Integer> headerMap = getHeaderMapRaw();
236        return headerMap == null || headerMap.size() == values.length;
237    }
238
239    /**
240     * Checks whether a given column is mapped, that is, its name has been defined to the parser.
241     *
242     * @param name
243     *            the name of the column to be retrieved.
244     * @return whether a given column is mapped.
245     */
246    public boolean isMapped(final String name) {
247        final Map<String, Integer> headerMap = getHeaderMapRaw();
248        return headerMap != null && headerMap.containsKey(name);
249    }
250
251    /**
252     * Checks whether a column with a given index has a value.
253     *
254     * @param index
255     *         a column index (0-based)
256     * @return whether a column with a given index has a value
257     */
258    public boolean isSet(final int index) {
259        return 0 <= index && index < values.length;
260    }
261
262    /**
263     * Checks whether a given column is mapped and has a value.
264     *
265     * @param name
266     *            the name of the column to be retrieved.
267     * @return whether a given column is mapped and has a value
268     */
269    public boolean isSet(final String name) {
270        return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit (un)boxing is intentional
271    }
272
273    /**
274     * Returns an iterator over the values of this record.
275     *
276     * @return an iterator over the values of this record.
277     */
278    @Override
279    public Iterator<String> iterator() {
280        return toList().iterator();
281    }
282
283    /**
284     * Puts all values of this record into the given Map.
285     *
286     * @param <M> the map type
287     * @param map The Map to populate.
288     * @return the given map.
289     * @since 1.9.0
290     */
291    public <M extends Map<String, String>> M putIn(final M map) {
292        if (getHeaderMapRaw() == null) {
293            return map;
294        }
295        getHeaderMapRaw().forEach((key, value) -> {
296            if (value < values.length) {
297                map.put(key, values[value]);
298            }
299        });
300        return map;
301    }
302
303    /**
304     * Returns the number of values in this record.
305     *
306     * @return the number of values.
307     */
308    public int size() {
309        return values.length;
310    }
311
312    /**
313     * Returns a sequential ordered stream whose elements are the values.
314     *
315     * @return the new stream.
316     * @since 1.9.0
317     */
318    public Stream<String> stream() {
319        return Stream.of(values);
320    }
321
322    /**
323     * Converts the values to a new List.
324     * <p>
325     * Editing the list does not update this instance.
326     * </p>
327     *
328     * @return a new List
329     * @since 1.9.0
330     */
331    public List<String> toList() {
332        return stream().collect(Collectors.toList());
333    }
334
335    /**
336     * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
337     * then only the last occurrence is mapped.
338     *
339     * <p>
340     * Editing the map does not update this instance.
341     * </p>
342     *
343     * @return A new Map. The map is empty if the record has no headers.
344     */
345    public Map<String, String> toMap() {
346        return putIn(new LinkedHashMap<>(values.length));
347    }
348
349    /**
350     * Returns a string representation of the contents of this record. The result is constructed by comment, mapping,
351     * recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}.
352     *
353     * @return a String representation of this record.
354     */
355    @Override
356    public String toString() {
357        return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" +
358            Arrays.toString(values) + "]";
359    }
360
361    /**
362     * Gets the values for this record. This is not a copy.
363     *
364     * @return the values for this record.
365     * @since 1.10.0
366     */
367    public String[] values() {
368        return values;
369    }
370
371}