001    /*
002     * Java CSV is a stream based library for reading and writing
003     * CSV and other delimited data.
004     *   
005     * Copyright (C) Bruce Dunwiddie bruce@csvreader.com
006     *
007     * This library is free software; you can redistribute it and/or
008     * modify it under the terms of the GNU Lesser General Public
009     * License as published by the Free Software Foundation; either
010     * version 2.1 of the License, or (at your option) any later version.
011     *
012     * This library is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
015     * Lesser General Public License for more details.
016     *
017     * You should have received a copy of the GNU Lesser General Public
018     * License along with this library; if not, write to the Free Software
019     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
020     */
021    package com.csvreader;
022    
023    import java.io.BufferedReader;
024    import java.io.File;
025    import java.io.FileInputStream;
026    import java.io.FileNotFoundException;
027    import java.io.IOException;
028    import java.io.InputStream;
029    import java.io.InputStreamReader;
030    import java.io.Reader;
031    import java.io.StringReader;
032    import java.nio.charset.Charset;
033    import java.text.NumberFormat;
034    import java.util.HashMap;
035    
036    /**
037     * A stream based parser for parsing delimited text data from a file or a
038     * stream.
039     */
040    public class CsvReader {
041            private Reader inputStream = null;
042    
043            private String fileName = null;
044    
045            // this holds all the values for switches that the user is allowed to set
046            private UserSettings userSettings = new UserSettings();
047    
048            private Charset charset = null;
049    
050            private boolean useCustomRecordDelimiter = false;
051    
052            // this will be our working buffer to hold data chunks
053            // read in from the data file
054    
055            private DataBuffer dataBuffer = new DataBuffer();
056    
057            private ColumnBuffer columnBuffer = new ColumnBuffer();
058    
059            private RawRecordBuffer rawBuffer = new RawRecordBuffer();
060    
061            private boolean[] isQualified = null;
062    
063            private String rawRecord = "";
064    
065            private HeadersHolder headersHolder = new HeadersHolder();
066    
067            // these are all more or less global loop variables
068            // to keep from needing to pass them all into various
069            // methods during parsing
070    
071            private boolean startedColumn = false;
072    
073            private boolean startedWithQualifier = false;
074    
075            private boolean hasMoreData = true;
076    
077            private char lastLetter = '\0';
078    
079            private boolean hasReadNextLine = false;
080    
081            private int columnsCount = 0;
082    
083            private long currentRecord = 0;
084    
085            private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];
086    
087            private boolean initialized = false;
088    
089            private boolean closed = false;
090    
091            /**
092             * Double up the text qualifier to represent an occurance of the text
093             * qualifier.
094             */
095            public static final int ESCAPE_MODE_DOUBLED = 1;
096    
097            /**
098             * Use a backslash character before the text qualifier to represent an
099             * occurance of the text qualifier.
100             */
101            public static final int ESCAPE_MODE_BACKSLASH = 2;
102    
103            /**
104             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
105             * as the data source.
106             * 
107             * @param fileName
108             *            The path to the file to use as the data source.
109             * @param delimiter
110             *            The character to use as the column delimiter.
111             * @param charset
112             *            The {@link java.nio.charset.Charset Charset} to use while
113             *            parsing the data.
114             */
115            public CsvReader(String fileName, char delimiter, Charset charset)
116                            throws FileNotFoundException {
117                    if (fileName == null) {
118                            throw new IllegalArgumentException(
119                                            "Parameter fileName can not be null.");
120                    }
121    
122                    if (charset == null) {
123                            throw new IllegalArgumentException(
124                                            "Parameter charset can not be null.");
125                    }
126    
127                    if (!new File(fileName).exists()) {
128                            throw new FileNotFoundException("File " + fileName
129                                            + " does not exist.");
130                    }
131    
132                    this.fileName = fileName;
133                    this.userSettings.Delimiter = delimiter;
134                    this.charset = charset;
135    
136                    isQualified = new boolean[values.length];
137            }
138    
139            /**
140             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
141             * as the data source. Uses ISO-8859-1 as the
142             * {@link java.nio.charset.Charset Charset}.
143             * 
144             * @param fileName
145             *            The path to the file to use as the data source.
146             * @param delimiter
147             *            The character to use as the column delimiter.
148             */
149            public CsvReader(String fileName, char delimiter)
150                            throws FileNotFoundException {
151                    this(fileName, delimiter, Charset.forName("ISO-8859-1"));
152            }
153    
154            /**
155             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
156             * as the data source. Uses a comma as the column delimiter and
157             * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}.
158             * 
159             * @param fileName
160             *            The path to the file to use as the data source.
161             */
162            public CsvReader(String fileName) throws FileNotFoundException {
163                    this(fileName, Letters.COMMA);
164            }
165    
166            /**
167             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
168             * {@link java.io.Reader Reader} object as the data source.
169             * 
170             * @param inputStream
171             *            The stream to use as the data source.
172             * @param delimiter
173             *            The character to use as the column delimiter.
174             */
175            public CsvReader(Reader inputStream, char delimiter) {
176                    if (inputStream == null) {
177                            throw new IllegalArgumentException(
178                                            "Parameter inputStream can not be null.");
179                    }
180    
181                    this.inputStream = inputStream;
182                    this.userSettings.Delimiter = delimiter;
183                    initialized = true;
184    
185                    isQualified = new boolean[values.length];
186            }
187    
188            /**
189             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
190             * {@link java.io.Reader Reader} object as the data source. Uses a
191             * comma as the column delimiter.
192             * 
193             * @param inputStream
194             *            The stream to use as the data source.
195             */
196            public CsvReader(Reader inputStream) {
197                    this(inputStream, Letters.COMMA);
198            }
199    
200            /**
201             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
202             * {@link java.io.InputStream InputStream} object as the data source.
203             * 
204             * @param inputStream
205             *            The stream to use as the data source.
206             * @param delimiter
207             *            The character to use as the column delimiter.
208             * @param charset
209             *            The {@link java.nio.charset.Charset Charset} to use while
210             *            parsing the data.
211             */
212            public CsvReader(InputStream inputStream, char delimiter, Charset charset) {
213                    this(new InputStreamReader(inputStream, charset), delimiter);
214            }
215    
216            /**
217             * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
218             * {@link java.io.InputStream InputStream} object as the data
219             * source. Uses a comma as the column delimiter.
220             * 
221             * @param inputStream
222             *            The stream to use as the data source.
223             * @param charset
224             *            The {@link java.nio.charset.Charset Charset} to use while
225             *            parsing the data.
226             */
227            public CsvReader(InputStream inputStream, Charset charset) {
228                    this(new InputStreamReader(inputStream, charset));
229            }
230    
231            public boolean getCaptureRawRecord() {
232                    return userSettings.CaptureRawRecord;
233            }
234    
235            public void setCaptureRawRecord(boolean captureRawRecord) {
236                    userSettings.CaptureRawRecord = captureRawRecord;
237            }
238    
239            public String getRawRecord() {
240                    return rawRecord;
241            }
242    
243            /**
244             * Gets whether leading and trailing whitespace characters are being trimmed
245             * from non-textqualified column data. Default is true.
246             * 
247             * @return Whether leading and trailing whitespace characters are being
248             *         trimmed from non-textqualified column data.
249             */
250            public boolean getTrimWhitespace() {
251                    return userSettings.TrimWhitespace;
252            }
253    
254            /**
255             * Sets whether leading and trailing whitespace characters should be trimmed
256             * from non-textqualified column data or not. Default is true.
257             * 
258             * @param trimWhitespace
259             *            Whether leading and trailing whitespace characters should be
260             *            trimmed from non-textqualified column data or not.
261             */
262            public void setTrimWhitespace(boolean trimWhitespace) {
263                    userSettings.TrimWhitespace = trimWhitespace;
264            }
265    
266            /**
267             * Gets the character being used as the column delimiter. Default is comma,
268             * ','.
269             * 
270             * @return The character being used as the column delimiter.
271             */
272            public char getDelimiter() {
273                    return userSettings.Delimiter;
274            }
275    
276            /**
277             * Sets the character to use as the column delimiter. Default is comma, ','.
278             * 
279             * @param delimiter
280             *            The character to use as the column delimiter.
281             */
282            public void setDelimiter(char delimiter) {
283                    userSettings.Delimiter = delimiter;
284            }
285    
286            public char getRecordDelimiter() {
287                    return userSettings.RecordDelimiter;
288            }
289    
290            /**
291             * Sets the character to use as the record delimiter.
292             * 
293             * @param recordDelimiter
294             *            The character to use as the record delimiter. Default is
295             *            combination of standard end of line characters for Windows,
296             *            Unix, or Mac.
297             */
298            public void setRecordDelimiter(char recordDelimiter) {
299                    useCustomRecordDelimiter = true;
300                    userSettings.RecordDelimiter = recordDelimiter;
301            }
302    
303            /**
304             * Gets the character to use as a text qualifier in the data.
305             * 
306             * @return The character to use as a text qualifier in the data.
307             */
308            public char getTextQualifier() {
309                    return userSettings.TextQualifier;
310            }
311    
312            /**
313             * Sets the character to use as a text qualifier in the data.
314             * 
315             * @param textQualifier
316             *            The character to use as a text qualifier in the data.
317             */
318            public void setTextQualifier(char textQualifier) {
319                    userSettings.TextQualifier = textQualifier;
320            }
321    
322            /**
323             * Whether text qualifiers will be used while parsing or not.
324             * 
325             * @return Whether text qualifiers will be used while parsing or not.
326             */
327            public boolean getUseTextQualifier() {
328                    return userSettings.UseTextQualifier;
329            }
330    
331            /**
332             * Sets whether text qualifiers will be used while parsing or not.
333             * 
334             * @param useTextQualifier
335             *            Whether to use a text qualifier while parsing or not.
336             */
337            public void setUseTextQualifier(boolean useTextQualifier) {
338                    userSettings.UseTextQualifier = useTextQualifier;
339            }
340    
341            /**
342             * Gets the character being used as a comment signal.
343             * 
344             * @return The character being used as a comment signal.
345             */
346            public char getComment() {
347                    return userSettings.Comment;
348            }
349    
350            /**
351             * Sets the character to use as a comment signal.
352             * 
353             * @param comment
354             *            The character to use as a comment signal.
355             */
356            public void setComment(char comment) {
357                    userSettings.Comment = comment;
358            }
359    
360            /**
361             * Gets whether comments are being looked for while parsing or not.
362             * 
363             * @return Whether comments are being looked for while parsing or not.
364             */
365            public boolean getUseComments() {
366                    return userSettings.UseComments;
367            }
368    
369            /**
370             * Sets whether comments are being looked for while parsing or not.
371             * 
372             * @param useComments
373             *            Whether comments are being looked for while parsing or not.
374             */
375            public void setUseComments(boolean useComments) {
376                    userSettings.UseComments = useComments;
377            }
378    
379            /**
380             * Gets the current way to escape an occurance of the text qualifier inside
381             * qualified data.
382             * 
383             * @return The current way to escape an occurance of the text qualifier
384             *         inside qualified data.
385             */
386            public int getEscapeMode() {
387                    return userSettings.EscapeMode;
388            }
389    
390            /**
391             * Sets the current way to escape an occurance of the text qualifier inside
392             * qualified data.
393             * 
394             * @param escapeMode
395             *            The way to escape an occurance of the text qualifier inside
396             *            qualified data.
397             * @exception IllegalArgumentException
398             *                When an illegal value is specified for escapeMode.
399             */
400            public void setEscapeMode(int escapeMode) throws IllegalArgumentException {
401                    if (escapeMode != ESCAPE_MODE_DOUBLED
402                                    && escapeMode != ESCAPE_MODE_BACKSLASH) {
403                            throw new IllegalArgumentException(
404                                            "Parameter escapeMode must be a valid value.");
405                    }
406    
407                    userSettings.EscapeMode = escapeMode;
408            }
409    
410            public boolean getSkipEmptyRecords() {
411                    return userSettings.SkipEmptyRecords;
412            }
413    
414            public void setSkipEmptyRecords(boolean skipEmptyRecords) {
415                    userSettings.SkipEmptyRecords = skipEmptyRecords;
416            }
417    
418            /**
419             * Safety caution to prevent the parser from using large amounts of memory
420             * in the case where parsing settings like file encodings don't end up
421             * matching the actual format of a file. This switch can be turned off if
422             * the file format is known and tested. With the switch off, the max column
423             * lengths and max column count per record supported by the parser will
424             * greatly increase. Default is true.
425             * 
426             * @return The current setting of the safety switch.
427             */
428            public boolean getSafetySwitch() {
429                    return userSettings.SafetySwitch;
430            }
431    
432            /**
433             * Safety caution to prevent the parser from using large amounts of memory
434             * in the case where parsing settings like file encodings don't end up
435             * matching the actual format of a file. This switch can be turned off if
436             * the file format is known and tested. With the switch off, the max column
437             * lengths and max column count per record supported by the parser will
438             * greatly increase. Default is true.
439             * 
440             * @param safetySwitch
441             */
442            public void setSafetySwitch(boolean safetySwitch) {
443                    userSettings.SafetySwitch = safetySwitch;
444            }
445    
446            /**
447             * Gets the count of columns found in this record.
448             * 
449             * @return The count of columns found in this record.
450             */
451            public int getColumnCount() {
452                    return columnsCount;
453            }
454    
455            /**
456             * Gets the index of the current record.
457             * 
458             * @return The index of the current record.
459             */
460            public long getCurrentRecord() {
461                    return currentRecord - 1;
462            }
463    
464            /**
465             * Gets the count of headers read in by a previous call to
466             * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
467             * 
468             * @return The count of headers read in by a previous call to
469             *         {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
470             */
471            public int getHeaderCount() {
472                    return headersHolder.Length;
473            }
474    
475            /**
476             * Returns the header values as a string array.
477             * 
478             * @return The header values as a String array.
479             * @exception IOException
480             *                Thrown if this object has already been closed.
481             */
482            public String[] getHeaders() throws IOException {
483                    checkClosed();
484    
485                    if (headersHolder.Headers == null) {
486                            return null;
487                    }
488                    
489                    // use clone here to prevent the outside code from
490                    // setting values on the array directly, which would
491                    // throw off the index lookup based on header name
492                    String[] clone = new String[headersHolder.Length];
493                    System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length);
494                    
495                    return clone;
496            }
497    
498            public void setHeaders(String[] headers) {
499                    headersHolder.Headers = headers;
500    
501                    headersHolder.IndexByName.clear();
502                    
503                    if(headers == null){
504                            return;
505                    }
506                    
507                    headersHolder.Length = headers.length;
508    
509                    // use headersHolder.Length here in case headers is null
510                    for (int i = 0; i < headersHolder.Length; i++) {
511                            headersHolder.IndexByName.put(headers[i], new Integer(i));
512                    }
513            }
514    
515            public String[] getValues() throws IOException {
516                    checkClosed();
517    
518                    // need to return a clone, and can't use clone because values.Length
519                    // might be greater than columnsCount
520                    String[] clone = new String[columnsCount];
521                    System.arraycopy(values, 0, clone, 0, columnsCount);
522                    return clone;
523            }
524    
525            /**
526             * Returns the current column value for a given column index.
527             * 
528             * @param columnIndex
529             *            The index of the column.
530             * @return The current column value.
531             * @exception IOException
532             *                Thrown if this object has already been closed.
533             */
534            public String get(int columnIndex) throws IOException {
535                    checkClosed();
536                    
537                    if(columnIndex < 0 || columnIndex >= columnsCount){
538                            return "";
539                    }
540    
541                    return values[columnIndex];
542            }
543    
544            /**
545             * Returns the current column value for a given column header name.
546             * 
547             * @param headerName
548             *            The header name of the column.
549             * @return The current column value.
550             * @exception IOException
551             *                Thrown if this object has already been closed.
552             */
553            public String get(String headerName) throws IOException {
554                    checkClosed();
555    
556                    return get(getIndex(headerName));
557            }
558    
559            /**
560             * Creates a {@link com.csvreader.CsvReader CsvReader} object using a string
561             * of data as the source.&nbsp;Uses ISO-8859-1 as the
562             * {@link java.nio.charset.Charset Charset}.
563             * 
564             * @param data
565             *            The String of data to use as the source.
566             * @return A {@link com.csvreader.CsvReader CsvReader} object using the
567             *         String of data as the source.
568             */
569            public static CsvReader parse(String data) {
570                    if (data == null) {
571                            throw new IllegalArgumentException(
572                                            "Parameter data can not be null.");
573                    }
574    
575                    return new CsvReader(new StringReader(data));
576            }
577    
578            /**
579             * Reads another record.
580             * 
581             * @return Whether another record was successfully read or not.
582             * @exception IOException
583             *                Thrown if an error occurs while reading data from the
584             *                source stream.
585             */
586            public boolean readRecord() throws IOException {
587                    checkClosed();
588    
589                    columnsCount = 0;
590                    rawBuffer.Position = 0;
591    
592                    dataBuffer.LineStart = dataBuffer.Position;
593    
594                    hasReadNextLine = false;
595    
596                    // check to see if we've already found the end of data
597    
598                    if (hasMoreData) {
599                            // loop over the data stream until the end of data is found
600                            // or the end of the record is found
601    
602                            do {
603                                    if (dataBuffer.Position == dataBuffer.Count) {
604                                            checkDataLength();
605                                    } else {
606                                            startedWithQualifier = false;
607    
608                                            // grab the current letter as a char
609    
610                                            char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
611    
612                                            if (userSettings.UseTextQualifier
613                                                            && currentLetter == userSettings.TextQualifier) {
614                                                    // this will be a text qualified column, so
615                                                    // we need to set startedWithQualifier to make it
616                                                    // enter the seperate branch to handle text
617                                                    // qualified columns
618    
619                                                    lastLetter = currentLetter;
620    
621                                                    // read qualified
622                                                    startedColumn = true;
623                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
624                                                    startedWithQualifier = true;
625                                                    boolean lastLetterWasQualifier = false;
626    
627                                                    char escapeChar = userSettings.TextQualifier;
628    
629                                                    if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
630                                                            escapeChar = Letters.BACKSLASH;
631                                                    }
632    
633                                                    boolean eatingTrailingJunk = false;
634                                                    boolean lastLetterWasEscape = false;
635                                                    boolean readingComplexEscape = false;
636                                                    int escape = ComplexEscape.UNICODE;
637                                                    int escapeLength = 0;
638                                                    char escapeValue = (char) 0;
639    
640                                                    dataBuffer.Position++;
641    
642                                                    do {
643                                                            if (dataBuffer.Position == dataBuffer.Count) {
644                                                                    checkDataLength();
645                                                            } else {
646                                                                    // grab the current letter as a char
647    
648                                                                    currentLetter = dataBuffer.Buffer[dataBuffer.Position];
649    
650                                                                    if (eatingTrailingJunk) {
651                                                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
652    
653                                                                            if (currentLetter == userSettings.Delimiter) {
654                                                                                    endColumn();
655                                                                            } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
656                                                                                            || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
657                                                                                    endColumn();
658    
659                                                                                    endRecord();
660                                                                            }
661                                                                    } else if (readingComplexEscape) {
662                                                                            escapeLength++;
663    
664                                                                            switch (escape) {
665                                                                            case ComplexEscape.UNICODE:
666                                                                                    escapeValue *= (char) 16;
667                                                                                    escapeValue += hexToDec(currentLetter);
668    
669                                                                                    if (escapeLength == 4) {
670                                                                                            readingComplexEscape = false;
671                                                                                    }
672    
673                                                                                    break;
674                                                                            case ComplexEscape.OCTAL:
675                                                                                    escapeValue *= (char) 8;
676                                                                                    escapeValue += (char) (currentLetter - '0');
677    
678                                                                                    if (escapeLength == 3) {
679                                                                                            readingComplexEscape = false;
680                                                                                    }
681    
682                                                                                    break;
683                                                                            case ComplexEscape.DECIMAL:
684                                                                                    escapeValue *= (char) 10;
685                                                                                    escapeValue += (char) (currentLetter - '0');
686    
687                                                                                    if (escapeLength == 3) {
688                                                                                            readingComplexEscape = false;
689                                                                                    }
690    
691                                                                                    break;
692                                                                            case ComplexEscape.HEX:
693                                                                                    escapeValue *= (char) 16;
694                                                                                    escapeValue += hexToDec(currentLetter);
695    
696                                                                                    if (escapeLength == 2) {
697                                                                                            readingComplexEscape = false;
698                                                                                    }
699    
700                                                                                    break;
701                                                                            }
702    
703                                                                            if (!readingComplexEscape) {
704                                                                                    appendLetter(escapeValue);
705                                                                            } else {
706                                                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
707                                                                            }
708                                                                    } else if (currentLetter == userSettings.TextQualifier) {
709                                                                            if (lastLetterWasEscape) {
710                                                                                    lastLetterWasEscape = false;
711                                                                                    lastLetterWasQualifier = false;
712                                                                            } else {
713                                                                                    updateCurrentValue();
714    
715                                                                                    if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {
716                                                                                            lastLetterWasEscape = true;
717                                                                                    }
718    
719                                                                                    lastLetterWasQualifier = true;
720                                                                            }
721                                                                    } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
722                                                                                    && lastLetterWasEscape) {
723                                                                            switch (currentLetter) {
724                                                                            case 'n':
725                                                                                    appendLetter(Letters.LF);
726                                                                                    break;
727                                                                            case 'r':
728                                                                                    appendLetter(Letters.CR);
729                                                                                    break;
730                                                                            case 't':
731                                                                                    appendLetter(Letters.TAB);
732                                                                                    break;
733                                                                            case 'b':
734                                                                                    appendLetter(Letters.BACKSPACE);
735                                                                                    break;
736                                                                            case 'f':
737                                                                                    appendLetter(Letters.FORM_FEED);
738                                                                                    break;
739                                                                            case 'e':
740                                                                                    appendLetter(Letters.ESCAPE);
741                                                                                    break;
742                                                                            case 'v':
743                                                                                    appendLetter(Letters.VERTICAL_TAB);
744                                                                                    break;
745                                                                            case 'a':
746                                                                                    appendLetter(Letters.ALERT);
747                                                                                    break;
748                                                                            case '0':
749                                                                            case '1':
750                                                                            case '2':
751                                                                            case '3':
752                                                                            case '4':
753                                                                            case '5':
754                                                                            case '6':
755                                                                            case '7':
756                                                                                    escape = ComplexEscape.OCTAL;
757                                                                                    readingComplexEscape = true;
758                                                                                    escapeLength = 1;
759                                                                                    escapeValue = (char) (currentLetter - '0');
760                                                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
761                                                                                    break;
762                                                                            case 'u':
763                                                                            case 'x':
764                                                                            case 'o':
765                                                                            case 'd':
766                                                                            case 'U':
767                                                                            case 'X':
768                                                                            case 'O':
769                                                                            case 'D':
770                                                                                    switch (currentLetter) {
771                                                                                    case 'u':
772                                                                                    case 'U':
773                                                                                            escape = ComplexEscape.UNICODE;
774                                                                                            break;
775                                                                                    case 'x':
776                                                                                    case 'X':
777                                                                                            escape = ComplexEscape.HEX;
778                                                                                            break;
779                                                                                    case 'o':
780                                                                                    case 'O':
781                                                                                            escape = ComplexEscape.OCTAL;
782                                                                                            break;
783                                                                                    case 'd':
784                                                                                    case 'D':
785                                                                                            escape = ComplexEscape.DECIMAL;
786                                                                                            break;
787                                                                                    }
788    
789                                                                                    readingComplexEscape = true;
790                                                                                    escapeLength = 0;
791                                                                                    escapeValue = (char) 0;
792                                                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
793    
794                                                                                    break;
795                                                                            default:
796                                                                                    break;
797                                                                            }
798    
799                                                                            lastLetterWasEscape = false;
800    
801                                                                            // can only happen for ESCAPE_MODE_BACKSLASH
802                                                                    } else if (currentLetter == escapeChar) {
803                                                                            updateCurrentValue();
804                                                                            lastLetterWasEscape = true;
805                                                                    } else {
806                                                                            if (lastLetterWasQualifier) {
807                                                                                    if (currentLetter == userSettings.Delimiter) {
808                                                                                            endColumn();
809                                                                                    } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
810                                                                                                    || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
811                                                                                            endColumn();
812    
813                                                                                            endRecord();
814                                                                                    } else {
815                                                                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
816    
817                                                                                            eatingTrailingJunk = true;
818                                                                                    }
819    
820                                                                                    // make sure to clear the flag for next
821                                                                                    // run of the loop
822    
823                                                                                    lastLetterWasQualifier = false;
824                                                                            }
825                                                                    }
826    
827                                                                    // keep track of the last letter because we need
828                                                                    // it for several key decisions
829    
830                                                                    lastLetter = currentLetter;
831    
832                                                                    if (startedColumn) {
833                                                                            dataBuffer.Position++;
834    
835                                                                            if (userSettings.SafetySwitch
836                                                                                            && dataBuffer.Position
837                                                                                                            - dataBuffer.ColumnStart
838                                                                                                            + columnBuffer.Position > 100000) {
839                                                                                    close();
840    
841                                                                                    throw new IOException(
842                                                                                                    "Maximum column length of 100,000 exceeded in column "
843                                                                                                                    + NumberFormat
844                                                                                                                                    .getIntegerInstance()
845                                                                                                                                    .format(
846                                                                                                                                                    columnsCount)
847                                                                                                                    + " in record "
848                                                                                                                    + NumberFormat
849                                                                                                                                    .getIntegerInstance()
850                                                                                                                                    .format(
851                                                                                                                                                    currentRecord)
852                                                                                                                    + ". Set the SafetySwitch property to false"
853                                                                                                                    + " if you're expecting column lengths greater than 100,000 characters to"
854                                                                                                                    + " avoid this error.");
855                                                                            }
856                                                                    }
857                                                            } // end else
858    
859                                                    } while (hasMoreData && startedColumn);
860                                            } else if (currentLetter == userSettings.Delimiter) {
861                                                    // we encountered a column with no data, so
862                                                    // just send the end column
863    
864                                                    lastLetter = currentLetter;
865    
866                                                    endColumn();
867                                            } else if (useCustomRecordDelimiter
868                                                            && currentLetter == userSettings.RecordDelimiter) {
869                                                    // this will skip blank lines
870                                                    if (startedColumn || columnsCount > 0
871                                                                    || !userSettings.SkipEmptyRecords) {
872                                                            endColumn();
873    
874                                                            endRecord();
875                                                    } else {
876                                                            dataBuffer.LineStart = dataBuffer.Position + 1;
877                                                    }
878    
879                                                    lastLetter = currentLetter;
880                                            } else if (!useCustomRecordDelimiter
881                                                            && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {
882                                                    // this will skip blank lines
883                                                    if (startedColumn
884                                                                    || columnsCount > 0
885                                                                    || (!userSettings.SkipEmptyRecords && (currentLetter == Letters.CR || lastLetter != Letters.CR))) {
886                                                            endColumn();
887    
888                                                            endRecord();
889                                                    } else {
890                                                            dataBuffer.LineStart = dataBuffer.Position + 1;
891                                                    }
892    
893                                                    lastLetter = currentLetter;
894                                            } else if (userSettings.UseComments && columnsCount == 0
895                                                            && currentLetter == userSettings.Comment) {
896                                                    // encountered a comment character at the beginning of
897                                                    // the line so just ignore the rest of the line
898    
899                                                    lastLetter = currentLetter;
900    
901                                                    skipLine();
902                                            } else if (userSettings.TrimWhitespace
903                                                            && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {
904                                                    // do nothing, this will trim leading whitespace
905                                                    // for both text qualified columns and non
906    
907                                                    startedColumn = true;
908                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
909                                            } else {
910                                                    // since the letter wasn't a special letter, this
911                                                    // will be the first letter of our current column
912    
913                                                    startedColumn = true;
914                                                    dataBuffer.ColumnStart = dataBuffer.Position;
915                                                    boolean lastLetterWasBackslash = false;
916                                                    boolean readingComplexEscape = false;
917                                                    int escape = ComplexEscape.UNICODE;
918                                                    int escapeLength = 0;
919                                                    char escapeValue = (char) 0;
920    
921                                                    boolean firstLoop = true;
922    
923                                                    do {
924                                                            if (!firstLoop
925                                                                            && dataBuffer.Position == dataBuffer.Count) {
926                                                                    checkDataLength();
927                                                            } else {
928                                                                    if (!firstLoop) {
929                                                                            // grab the current letter as a char
930                                                                            currentLetter = dataBuffer.Buffer[dataBuffer.Position];
931                                                                    }
932    
933                                                                    if (!userSettings.UseTextQualifier
934                                                                                    && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
935                                                                                    && currentLetter == Letters.BACKSLASH) {
936                                                                            if (lastLetterWasBackslash) {
937                                                                                    lastLetterWasBackslash = false;
938                                                                            } else {
939                                                                                    updateCurrentValue();
940                                                                                    lastLetterWasBackslash = true;
941                                                                            }
942                                                                    } else if (readingComplexEscape) {
943                                                                            escapeLength++;
944    
945                                                                            switch (escape) {
946                                                                            case ComplexEscape.UNICODE:
947                                                                                    escapeValue *= (char) 16;
948                                                                                    escapeValue += hexToDec(currentLetter);
949    
950                                                                                    if (escapeLength == 4) {
951                                                                                            readingComplexEscape = false;
952                                                                                    }
953    
954                                                                                    break;
955                                                                            case ComplexEscape.OCTAL:
956                                                                                    escapeValue *= (char) 8;
957                                                                                    escapeValue += (char) (currentLetter - '0');
958    
959                                                                                    if (escapeLength == 3) {
960                                                                                            readingComplexEscape = false;
961                                                                                    }
962    
963                                                                                    break;
964                                                                            case ComplexEscape.DECIMAL:
965                                                                                    escapeValue *= (char) 10;
966                                                                                    escapeValue += (char) (currentLetter - '0');
967    
968                                                                                    if (escapeLength == 3) {
969                                                                                            readingComplexEscape = false;
970                                                                                    }
971    
972                                                                                    break;
973                                                                            case ComplexEscape.HEX:
974                                                                                    escapeValue *= (char) 16;
975                                                                                    escapeValue += hexToDec(currentLetter);
976    
977                                                                                    if (escapeLength == 2) {
978                                                                                            readingComplexEscape = false;
979                                                                                    }
980    
981                                                                                    break;
982                                                                            }
983    
984                                                                            if (!readingComplexEscape) {
985                                                                                    appendLetter(escapeValue);
986                                                                            } else {
987                                                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
988                                                                            }
989                                                                    } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
990                                                                                    && lastLetterWasBackslash) {
991                                                                            switch (currentLetter) {
992                                                                            case 'n':
993                                                                                    appendLetter(Letters.LF);
994                                                                                    break;
995                                                                            case 'r':
996                                                                                    appendLetter(Letters.CR);
997                                                                                    break;
998                                                                            case 't':
999                                                                                    appendLetter(Letters.TAB);
1000                                                                                    break;
1001                                                                            case 'b':
1002                                                                                    appendLetter(Letters.BACKSPACE);
1003                                                                                    break;
1004                                                                            case 'f':
1005                                                                                    appendLetter(Letters.FORM_FEED);
1006                                                                                    break;
1007                                                                            case 'e':
1008                                                                                    appendLetter(Letters.ESCAPE);
1009                                                                                    break;
1010                                                                            case 'v':
1011                                                                                    appendLetter(Letters.VERTICAL_TAB);
1012                                                                                    break;
1013                                                                            case 'a':
1014                                                                                    appendLetter(Letters.ALERT);
1015                                                                                    break;
1016                                                                            case '0':
1017                                                                            case '1':
1018                                                                            case '2':
1019                                                                            case '3':
1020                                                                            case '4':
1021                                                                            case '5':
1022                                                                            case '6':
1023                                                                            case '7':
1024                                                                                    escape = ComplexEscape.OCTAL;
1025                                                                                    readingComplexEscape = true;
1026                                                                                    escapeLength = 1;
1027                                                                                    escapeValue = (char) (currentLetter - '0');
1028                                                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
1029                                                                                    break;
1030                                                                            case 'u':
1031                                                                            case 'x':
1032                                                                            case 'o':
1033                                                                            case 'd':
1034                                                                            case 'U':
1035                                                                            case 'X':
1036                                                                            case 'O':
1037                                                                            case 'D':
1038                                                                                    switch (currentLetter) {
1039                                                                                    case 'u':
1040                                                                                    case 'U':
1041                                                                                            escape = ComplexEscape.UNICODE;
1042                                                                                            break;
1043                                                                                    case 'x':
1044                                                                                    case 'X':
1045                                                                                            escape = ComplexEscape.HEX;
1046                                                                                            break;
1047                                                                                    case 'o':
1048                                                                                    case 'O':
1049                                                                                            escape = ComplexEscape.OCTAL;
1050                                                                                            break;
1051                                                                                    case 'd':
1052                                                                                    case 'D':
1053                                                                                            escape = ComplexEscape.DECIMAL;
1054                                                                                            break;
1055                                                                                    }
1056    
1057                                                                                    readingComplexEscape = true;
1058                                                                                    escapeLength = 0;
1059                                                                                    escapeValue = (char) 0;
1060                                                                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
1061    
1062                                                                                    break;
1063                                                                            default:
1064                                                                                    break;
1065                                                                            }
1066    
1067                                                                            lastLetterWasBackslash = false;
1068                                                                    } else {
1069                                                                            if (currentLetter == userSettings.Delimiter) {
1070                                                                                    endColumn();
1071                                                                            } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
1072                                                                                            || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
1073                                                                                    endColumn();
1074    
1075                                                                                    endRecord();
1076                                                                            }
1077                                                                    }
1078    
1079                                                                    // keep track of the last letter because we need
1080                                                                    // it for several key decisions
1081    
1082                                                                    lastLetter = currentLetter;
1083                                                                    firstLoop = false;
1084    
1085                                                                    if (startedColumn) {
1086                                                                            dataBuffer.Position++;
1087    
1088                                                                            if (userSettings.SafetySwitch
1089                                                                                            && dataBuffer.Position
1090                                                                                                            - dataBuffer.ColumnStart
1091                                                                                                            + columnBuffer.Position > 100000) {
1092                                                                                    close();
1093    
1094                                                                                    throw new IOException(
1095                                                                                                    "Maximum column length of 100,000 exceeded in column "
1096                                                                                                                    + NumberFormat
1097                                                                                                                                    .getIntegerInstance()
1098                                                                                                                                    .format(
1099                                                                                                                                                    columnsCount)
1100                                                                                                                    + " in record "
1101                                                                                                                    + NumberFormat
1102                                                                                                                                    .getIntegerInstance()
1103                                                                                                                                    .format(
1104                                                                                                                                                    currentRecord)
1105                                                                                                                    + ". Set the SafetySwitch property to false"
1106                                                                                                                    + " if you're expecting column lengths greater than 100,000 characters to"
1107                                                                                                                    + " avoid this error.");
1108                                                                            }
1109                                                                    }
1110                                                            } // end else
1111                                                    } while (hasMoreData && startedColumn);
1112                                            }
1113    
1114                                            if (hasMoreData) {
1115                                                    dataBuffer.Position++;
1116                                            }
1117                                    } // end else
1118                            } while (hasMoreData && !hasReadNextLine);
1119    
1120                            // check to see if we hit the end of the file
1121                            // without processing the current record
1122    
1123                            if (startedColumn || lastLetter == userSettings.Delimiter) {
1124                                    endColumn();
1125    
1126                                    endRecord();
1127                            }
1128                    }
1129    
1130                    if (userSettings.CaptureRawRecord) {
1131                            if (hasMoreData) {
1132                                    if (rawBuffer.Position == 0) {
1133                                            rawRecord = new String(dataBuffer.Buffer,
1134                                                            dataBuffer.LineStart, dataBuffer.Position
1135                                                                            - dataBuffer.LineStart - 1);
1136                                    } else {
1137                                            rawRecord = new String(rawBuffer.Buffer, 0,
1138                                                            rawBuffer.Position)
1139                                                            + new String(dataBuffer.Buffer,
1140                                                                            dataBuffer.LineStart, dataBuffer.Position
1141                                                                                            - dataBuffer.LineStart - 1);
1142                                    }
1143                            } else {
1144                                    // for hasMoreData to ever be false, all data would have had to
1145                                    // have been
1146                                    // copied to the raw buffer
1147                                    rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position);
1148                            }
1149                    } else {
1150                            rawRecord = "";
1151                    }
1152    
1153                    return hasReadNextLine;
1154            }
1155    
1156            /**
1157             * @exception IOException
1158             *                Thrown if an error occurs while reading data from the
1159             *                source stream.
1160             */
1161            private void checkDataLength() throws IOException {
1162                    if (!initialized) {
1163                            if (fileName != null) {
1164                                    inputStream = new BufferedReader(new InputStreamReader(
1165                                                    new FileInputStream(fileName), charset),
1166                                                    StaticSettings.MAX_FILE_BUFFER_SIZE);
1167                            }
1168    
1169                            charset = null;
1170                            initialized = true;
1171                    }
1172    
1173                    updateCurrentValue();
1174    
1175                    if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {
1176                            if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count
1177                                            - dataBuffer.LineStart) {
1178                                    int newLength = rawBuffer.Buffer.length
1179                                                    + Math.max(dataBuffer.Count - dataBuffer.LineStart,
1180                                                                    rawBuffer.Buffer.length);
1181    
1182                                    char[] holder = new char[newLength];
1183    
1184                                    System.arraycopy(rawBuffer.Buffer, 0, holder, 0,
1185                                                    rawBuffer.Position);
1186    
1187                                    rawBuffer.Buffer = holder;
1188                            }
1189    
1190                            System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart,
1191                                            rawBuffer.Buffer, rawBuffer.Position, dataBuffer.Count
1192                                                            - dataBuffer.LineStart);
1193    
1194                            rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart;
1195                    }
1196    
1197                    try {
1198                            dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0,
1199                                            dataBuffer.Buffer.length);
1200                    } catch (IOException ex) {
1201                            close();
1202    
1203                            throw ex;
1204                    }
1205    
1206                    // if no more data could be found, set flag stating that
1207                    // the end of the data was found
1208    
1209                    if (dataBuffer.Count == -1) {
1210                            hasMoreData = false;
1211                    }
1212    
1213                    dataBuffer.Position = 0;
1214                    dataBuffer.LineStart = 0;
1215                    dataBuffer.ColumnStart = 0;
1216            }
1217    
1218            /**
1219             * Read the first record of data as column headers.
1220             * 
1221             * @return Whether the header record was successfully read or not.
1222             * @exception IOException
1223             *                Thrown if an error occurs while reading data from the
1224             *                source stream.
1225             */
1226            public boolean readHeaders() throws IOException {
1227                    boolean result = readRecord();
1228    
1229                    // copy the header data from the column array
1230                    // to the header string array
1231    
1232                    headersHolder.Length = columnsCount;
1233    
1234                    headersHolder.Headers = new String[columnsCount];
1235    
1236                    for (int i = 0; i < headersHolder.Length; i++) {
1237                            String columnValue = get(i);
1238    
1239                            headersHolder.Headers[i] = columnValue;
1240    
1241                            // if there are duplicate header names, we will save the last one
1242                            headersHolder.IndexByName.put(columnValue, new Integer(i));
1243                    }
1244    
1245                    if (result) {
1246                            currentRecord--;
1247                    }
1248    
1249                    columnsCount = 0;
1250    
1251                    return result;
1252            }
1253    
1254            /**
1255             * Returns the column header value for a given column index.
1256             * 
1257             * @param columnIndex
1258             *            The index of the header column being requested.
1259             * @return The value of the column header at the given column index.
1260             * @exception IOException
1261             *                Thrown if this object has already been closed.
1262             */
1263            public String getHeader(int columnIndex) throws IOException {
1264                    checkClosed();
1265    
1266                    if(columnIndex < 0 || columnIndex >= headersHolder.Length){
1267                            return "";
1268                    }
1269                    
1270                    return headersHolder.Headers[columnIndex];
1271            }
1272    
1273            public boolean isQualified(int columnIndex) throws IOException {
1274                    checkClosed();
1275                    
1276                    if (columnIndex < 0 || columnIndex >= columnsCount) {
1277                            return false;
1278                    }
1279                    
1280                    return isQualified[columnIndex];
1281            }
1282    
1283            /**
1284             * @exception IOException
1285             *                Thrown if a very rare extreme exception occurs during
1286             *                parsing, normally resulting from improper data format.
1287             */
1288            private void endColumn() throws IOException {
1289                    String currentValue = "";
1290    
1291                    // must be called before setting startedColumn = false
1292                    if (startedColumn) {
1293                            if (columnBuffer.Position == 0) {
1294                                    if (dataBuffer.ColumnStart < dataBuffer.Position) {
1295                                            int lastLetter = dataBuffer.Position - 1;
1296    
1297                                            if (userSettings.TrimWhitespace && !startedWithQualifier) {
1298                                                    while (lastLetter >= dataBuffer.ColumnStart
1299                                                                    && (dataBuffer.Buffer[lastLetter] == Letters.SPACE || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {
1300                                                            lastLetter--;
1301                                                    }
1302                                            }
1303    
1304                                            currentValue = new String(dataBuffer.Buffer,
1305                                                            dataBuffer.ColumnStart, lastLetter
1306                                                                            - dataBuffer.ColumnStart + 1);
1307                                    }
1308                            } else {
1309                                    updateCurrentValue();
1310    
1311                                    int lastLetter = columnBuffer.Position - 1;
1312    
1313                                    if (userSettings.TrimWhitespace && !startedWithQualifier) {
1314                                            while (lastLetter >= 0
1315                                                            && (columnBuffer.Buffer[lastLetter] == Letters.SPACE || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {
1316                                                    lastLetter--;
1317                                            }
1318                                    }
1319    
1320                                    currentValue = new String(columnBuffer.Buffer, 0,
1321                                                    lastLetter + 1);
1322                            }
1323                    }
1324    
1325                    columnBuffer.Position = 0;
1326    
1327                    startedColumn = false;
1328    
1329                    if (columnsCount >= 100000 && userSettings.SafetySwitch) {
1330                            close();
1331    
1332                            throw new IOException(
1333                                            "Maximum column count of 100,000 exceeded in record "
1334                                                            + NumberFormat.getIntegerInstance().format(
1335                                                                            currentRecord)
1336                                                            + ". Set the SafetySwitch property to false"
1337                                                            + " if you're expecting more than 100,000 columns per record to"
1338                                                            + " avoid this error.");
1339                    }
1340    
1341                    // check to see if our current holder array for
1342                    // column chunks is still big enough to handle another
1343                    // column chunk
1344    
1345                    if (columnsCount == values.length) {
1346                            // holder array needs to grow to be able to hold another column
1347                            int newLength = values.length * 2;
1348    
1349                            String[] holder = new String[newLength];
1350    
1351                            System.arraycopy(values, 0, holder, 0, values.length);
1352    
1353                            values = holder;
1354    
1355                            boolean[] qualifiedHolder = new boolean[newLength];
1356    
1357                            System.arraycopy(isQualified, 0, qualifiedHolder, 0,
1358                                            isQualified.length);
1359    
1360                            isQualified = qualifiedHolder;
1361                    }
1362    
1363                    values[columnsCount] = currentValue;
1364    
1365                    isQualified[columnsCount] = startedWithQualifier;
1366    
1367                    currentValue = "";
1368    
1369                    columnsCount++;
1370            }
1371    
1372            private void appendLetter(char letter) {
1373                    if (columnBuffer.Position == columnBuffer.Buffer.length) {
1374                            int newLength = columnBuffer.Buffer.length * 2;
1375    
1376                            char[] holder = new char[newLength];
1377    
1378                            System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1379                                            columnBuffer.Position);
1380    
1381                            columnBuffer.Buffer = holder;
1382                    }
1383                    columnBuffer.Buffer[columnBuffer.Position++] = letter;
1384                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
1385            }
1386    
1387            private void updateCurrentValue() {
1388                    if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) {
1389                            if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position
1390                                            - dataBuffer.ColumnStart) {
1391                                    int newLength = columnBuffer.Buffer.length
1392                                                    + Math.max(
1393                                                                    dataBuffer.Position - dataBuffer.ColumnStart,
1394                                                                    columnBuffer.Buffer.length);
1395    
1396                                    char[] holder = new char[newLength];
1397    
1398                                    System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1399                                                    columnBuffer.Position);
1400    
1401                                    columnBuffer.Buffer = holder;
1402                            }
1403    
1404                            System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart,
1405                                            columnBuffer.Buffer, columnBuffer.Position,
1406                                            dataBuffer.Position - dataBuffer.ColumnStart);
1407    
1408                            columnBuffer.Position += dataBuffer.Position
1409                                            - dataBuffer.ColumnStart;
1410                    }
1411    
1412                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
1413            }
1414            
1415            private void endRecord() {
1416                    // this flag is used as a loop exit condition
1417                    // during parsing
1418    
1419                    hasReadNextLine = true;
1420    
1421                    currentRecord++;
1422            }
1423    
1424            /**
1425             * Gets the corresponding column index for a given column header name.
1426             * 
1427             * @param headerName
1428             *            The header name of the column.
1429             * @return The column index for the given column header name.&nbsp;Returns
1430             *         -1 if not found.
1431             * @exception IOException
1432             *                Thrown if this object has already been closed.
1433             */
1434            public int getIndex(String headerName) throws IOException {
1435                    checkClosed();
1436    
1437                    Object indexValue = headersHolder.IndexByName.get(headerName);
1438                    
1439                    if(indexValue == null){
1440                            return -1;
1441                    }
1442                    
1443                    return ((Integer) indexValue).intValue();
1444            }
1445    
1446            /**
1447             * Skips the next record of data by parsing each column.&nbsp;Does not
1448             * increment
1449             * {@link com.csvreader.CsvReader#getCurrentRecord getCurrentRecord()}.
1450             * 
1451             * @return Whether another record was successfully skipped or not.
1452             * @exception IOException
1453             *                Thrown if an error occurs while reading data from the
1454             *                source stream.
1455             */
1456            public boolean skipRecord() throws IOException {
1457                    checkClosed();
1458    
1459                    boolean recordRead = false;
1460    
1461                    if (hasMoreData) {
1462                            recordRead = readRecord();
1463    
1464                            if (recordRead) {
1465                                    currentRecord--;
1466                            }
1467                    }
1468    
1469                    return recordRead;
1470            }
1471    
1472            /**
1473             * Skips the next line of data using the standard end of line characters and
1474             * does not do any column delimited parsing.
1475             * 
1476             * @return Whether a line was successfully skipped or not.
1477             * @exception IOException
1478             *                Thrown if an error occurs while reading data from the
1479             *                source stream.
1480             */
1481            public boolean skipLine() throws IOException {
1482                    checkClosed();
1483    
1484                    // clear public column values for current line
1485    
1486                    columnsCount = 0;
1487    
1488                    boolean skippedLine = false;
1489    
1490                    if (hasMoreData) {
1491                            boolean foundEol = false;
1492    
1493                            do {
1494                                    if (dataBuffer.Position == dataBuffer.Count) {
1495                                            checkDataLength();
1496                                    } else {
1497                                            skippedLine = true;
1498    
1499                                            // grab the current letter as a char
1500    
1501                                            char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
1502    
1503                                            if (currentLetter == Letters.CR
1504                                                            || currentLetter == Letters.LF) {
1505                                                    foundEol = true;
1506                                            }
1507    
1508                                            // keep track of the last letter because we need
1509                                            // it for several key decisions
1510    
1511                                            lastLetter = currentLetter;
1512    
1513                                            if (!foundEol) {
1514                                                    dataBuffer.Position++;
1515                                            }
1516    
1517                                    } // end else
1518                            } while (hasMoreData && !foundEol);
1519    
1520                            columnBuffer.Position = 0;
1521    
1522                            dataBuffer.LineStart = dataBuffer.Position + 1;
1523                    }
1524    
1525                    rawBuffer.Position = 0;
1526                    rawRecord = "";
1527    
1528                    return skippedLine;
1529            }
1530    
1531            /**
1532             * Closes and releases all related resources.
1533             */
1534            public void close() {
1535                    if (!closed) {
1536                            close(true);
1537    
1538                            closed = true;
1539                    }
1540            }
1541    
1542            /**
1543             * 
1544             */
1545            private void close(boolean closing) {
1546                    if (!closed) {
1547                            if (closing) {
1548                                    charset = null;
1549                                    headersHolder.Headers = null;
1550                                    headersHolder.IndexByName = null;
1551                                    dataBuffer.Buffer = null;
1552                                    columnBuffer.Buffer = null;
1553                                    rawBuffer.Buffer = null;
1554                            }
1555    
1556                            try {
1557                                    if (initialized) {
1558                                            inputStream.close();
1559                                    }
1560                            } catch (Exception e) {
1561                                    // just eat the exception
1562                            }
1563    
1564                            inputStream = null;
1565    
1566                            closed = true;
1567                    }
1568            }
1569    
1570            /**
1571             * @exception IOException
1572             *                Thrown if this object has already been closed.
1573             */
1574            private void checkClosed() throws IOException {
1575                    if (closed) {
1576                            throw new IOException(
1577                                            "This instance of the CsvReader class has already been closed.");
1578                    }
1579            }
1580    
1581            /**
1582             * 
1583             */
1584            protected void finalize() {
1585                    close(false);
1586            }
1587    
1588            private class ComplexEscape {
1589                    private static final int UNICODE = 1;
1590    
1591                    private static final int OCTAL = 2;
1592    
1593                    private static final int DECIMAL = 3;
1594    
1595                    private static final int HEX = 4;
1596            }
1597    
1598            private static char hexToDec(char hex) {
1599                    char result;
1600    
1601                    if (hex >= 'a') {
1602                            result = (char) (hex - 'a' + 10);
1603                    } else if (hex >= 'A') {
1604                            result = (char) (hex - 'A' + 10);
1605                    } else {
1606                            result = (char) (hex - '0');
1607                    }
1608    
1609                    return result;
1610            }
1611    
1612            private class DataBuffer {
1613                    public char[] Buffer;
1614    
1615                    public int Position;
1616    
1617                    // / <summary>
1618                    // / How much usable data has been read into the stream,
1619                    // / which will not always be as long as Buffer.Length.
1620                    // / </summary>
1621                    public int Count;
1622    
1623                    // / <summary>
1624                    // / The position of the cursor in the buffer when the
1625                    // / current column was started or the last time data
1626                    // / was moved out to the column buffer.
1627                    // / </summary>
1628                    public int ColumnStart;
1629    
1630                    public int LineStart;
1631    
1632                    public DataBuffer() {
1633                            Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];
1634                            Position = 0;
1635                            Count = 0;
1636                            ColumnStart = 0;
1637                            LineStart = 0;
1638                    }
1639            }
1640    
1641            private class ColumnBuffer {
1642                    public char[] Buffer;
1643    
1644                    public int Position;
1645    
1646                    public ColumnBuffer() {
1647                            Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];
1648                            Position = 0;
1649                    }
1650            }
1651    
1652            private class RawRecordBuffer {
1653                    public char[] Buffer;
1654    
1655                    public int Position;
1656    
1657                    public RawRecordBuffer() {
1658                            Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE
1659                                            * StaticSettings.INITIAL_COLUMN_COUNT];
1660                            Position = 0;
1661                    }
1662            }
1663    
1664            private class Letters {
1665                    public static final char LF = '\n';
1666    
1667                    public static final char CR = '\r';
1668    
1669                    public static final char QUOTE = '"';
1670    
1671                    public static final char COMMA = ',';
1672    
1673                    public static final char SPACE = ' ';
1674    
1675                    public static final char TAB = '\t';
1676    
1677                    public static final char POUND = '#';
1678    
1679                    public static final char BACKSLASH = '\\';
1680    
1681                    public static final char NULL = '\0';
1682    
1683                    public static final char BACKSPACE = '\b';
1684    
1685                    public static final char FORM_FEED = '\f';
1686    
1687                    public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape
1688    
1689                    public static final char VERTICAL_TAB = '\u000B';
1690    
1691                    public static final char ALERT = '\u0007';
1692            }
1693    
1694            private class UserSettings {
1695                    // having these as publicly accessible members will prevent
1696                    // the overhead of the method call that exists on properties
1697                    public boolean CaseSensitive;
1698    
1699                    public char TextQualifier;
1700    
1701                    public boolean TrimWhitespace;
1702    
1703                    public boolean UseTextQualifier;
1704    
1705                    public char Delimiter;
1706    
1707                    public char RecordDelimiter;
1708    
1709                    public char Comment;
1710    
1711                    public boolean UseComments;
1712    
1713                    public int EscapeMode;
1714    
1715                    public boolean SafetySwitch;
1716    
1717                    public boolean SkipEmptyRecords;
1718    
1719                    public boolean CaptureRawRecord;
1720    
1721                    public UserSettings() {
1722                            CaseSensitive = true;
1723                            TextQualifier = Letters.QUOTE;
1724                            TrimWhitespace = true;
1725                            UseTextQualifier = true;
1726                            Delimiter = Letters.COMMA;
1727                            RecordDelimiter = Letters.NULL;
1728                            Comment = Letters.POUND;
1729                            UseComments = false;
1730                            EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED;
1731                            SafetySwitch = true;
1732                            SkipEmptyRecords = true;
1733                            CaptureRawRecord = true;
1734                    }
1735            }
1736    
1737            private class HeadersHolder {
1738                    public String[] Headers;
1739    
1740                    public int Length;
1741    
1742                    public HashMap<String, Integer> IndexByName;
1743    
1744                    public HeadersHolder() {
1745                            Headers = null;
1746                            Length = 0;
1747                            IndexByName = new HashMap<String, Integer>();
1748                    }
1749            }
1750    
1751            private class StaticSettings {
1752                    // these are static instead of final so they can be changed in unit test
1753                    // isn't visible outside this class and is only accessed once during
1754                    // CsvReader construction
1755                    public static final int MAX_BUFFER_SIZE = 1024;
1756    
1757                    public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;
1758    
1759                    public static final int INITIAL_COLUMN_COUNT = 10;
1760    
1761                    public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;
1762            }
1763    }