View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.CR;
21  import static org.apache.commons.csv.Constants.LF;
22  import static org.apache.commons.csv.Constants.UNDEFINED;
23  import static org.apache.commons.io.IOUtils.EOF;
24  
25  import java.io.IOException;
26  import java.io.Reader;
27  
28  import org.apache.commons.io.IOUtils;
29  import org.apache.commons.io.input.UnsynchronizedBufferedReader;
30  
31  /**
32   * A special buffered reader which supports sophisticated read access.
33   * <p>
34   * In particular the reader supports a look-ahead option, which allows you to see the next char returned by
35   * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
36   * </p>
37   */
38  final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
39  
40      /** The last char returned */
41      private int lastChar = UNDEFINED;
42      private int lastCharMark = UNDEFINED;
43  
44      /** The count of EOLs (CR/LF/CRLF) seen so far */
45      private long lineNumber;
46      private long lineNumberMark;
47  
48      /** The position, which is the number of characters read so far */
49      private long position;
50      private long positionMark;
51  
52      /**
53       * Constructs a new instance using the default buffer size.
54       */
55      ExtendedBufferedReader(final Reader reader) {
56          super(reader);
57      }
58  
59      /**
60       * Closes the stream.
61       *
62       * @throws IOException
63       *             If an I/O error occurs
64       */
65      @Override
66      public void close() throws IOException {
67          // Set ivars before calling super close() in case close() throws an IOException.
68          lastChar = EOF;
69          super.close();
70      }
71  
72      /**
73       * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
74       * any of the read methods. This will not include a character read using the {@link #peek()} method. If no
75       * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
76       * on the last read then this will return {@link IOUtils#EOF}.
77       *
78       * @return the last character that was read
79       */
80      int getLastChar() {
81          return lastChar;
82      }
83  
84      /**
85       * Returns the current line number
86       *
87       * @return the current line number
88       */
89      long getLineNumber() {
90          // Check if we are at EOL or EOF or just starting
91          if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == EOF) {
92              return lineNumber; // counter is accurate
93          }
94          return lineNumber + 1; // Allow for counter being incremented only at EOL
95      }
96  
97      /**
98       * Gets the character position in the reader.
99       *
100      * @return the current position in the reader (counting characters, not bytes since this is a Reader)
101      */
102     long getPosition() {
103         return this.position;
104     }
105 
106     @Override
107     public void mark(final int readAheadLimit) throws IOException {
108         lineNumberMark = lineNumber;
109         lastCharMark = lastChar;
110         positionMark = position;
111         super.mark(readAheadLimit);
112     }
113 
114     @Override
115     public int read() throws IOException {
116         final int current = super.read();
117         if (current == CR || current == LF && lastChar != CR ||
118             current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
119             lineNumber++;
120         }
121         lastChar = current;
122         position++;
123         return lastChar;
124     }
125 
126     @Override
127     public int read(final char[] buf, final int offset, final int length) throws IOException {
128         if (length == 0) {
129             return 0;
130         }
131         final int len = super.read(buf, offset, length);
132         if (len > 0) {
133             for (int i = offset; i < offset + len; i++) {
134                 final char ch = buf[i];
135                 if (ch == LF) {
136                     if (CR != (i > offset ? buf[i - 1] : lastChar)) {
137                         lineNumber++;
138                     }
139                 } else if (ch == CR) {
140                     lineNumber++;
141                 }
142             }
143             lastChar = buf[offset + len - 1];
144         } else if (len == EOF) {
145             lastChar = EOF;
146         }
147         position += len;
148         return len;
149     }
150 
151     /**
152      * Gets the next line, dropping the line terminator(s). This method should only be called when processing a
153      * comment, otherwise, information can be lost.
154      * <p>
155      * Increments {@link #lineNumber} and updates {@link #position}.
156      * </p>
157      * <p>
158      * Sets {@link #lastChar} to {@code Constants.EOF} at EOF, otherwise the last EOL character.
159      * </p>
160      *
161      * @return the line that was read, or null if reached EOF.
162      */
163     @Override
164     public String readLine() throws IOException {
165         if (peek() == EOF) {
166             return null;
167         }
168         final StringBuilder buffer = new StringBuilder();
169         while (true) {
170             final int current = read();
171             if (current == CR) {
172                 final int next = peek();
173                 if (next == LF) {
174                     read();
175                 }
176             }
177             if (current == EOF || current == LF || current == CR) {
178                 break;
179             }
180             buffer.append((char) current);
181         }
182         return buffer.toString();
183     }
184 
185     @Override
186     public void reset() throws IOException {
187         lineNumber = lineNumberMark;
188         lastChar = lastCharMark;
189         position = positionMark;
190         super.reset();
191     }
192 
193 }