1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.utilities.io;
22 
23 import java.io.IOException;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26 
27 /**
28  * Reader that will tokenize the input data as desired.
29  *
30  * @author Erich Schubert
31  * @since 0.7.0
32  *
33  * @composed - - - Tokenizer
34  */
35 public class TokenizedReader extends BufferedLineReader {
36   /**
37    * Comment pattern.
38    */
39   private Matcher comment = null;
40 
41   /**
42    * String tokenizer.
43    */
44   protected Tokenizer tokenizer;
45 
46   /**
47    * Constructor.
48    *
49    * @param colSep Column separator
50    * @param quoteChars Quote character
51    * @param comment Comment pattern
52    */
TokenizedReader(Pattern colSep, String quoteChars, Pattern comment)53   public TokenizedReader(Pattern colSep, String quoteChars, Pattern comment) {
54     super();
55     this.tokenizer = new Tokenizer(colSep, quoteChars);
56     this.comment = comment.matcher("");
57   }
58 
59   /**
60    * Read the next line into the tokenizer.
61    *
62    * @return The next line, or {@code null}.
63    */
nextLineExceptComments()64   public boolean nextLineExceptComments() throws IOException {
65     while(nextLine()) {
66       if(comment == null || !comment.reset(buf).matches()) {
67         tokenizer.initialize(buf, 0, buf.length());
68         return true;
69       }
70     }
71     return false;
72   }
73 
74   /**
75    * Cleanup the internal state of the tokenized reader.
76    *
77    * This also closes the input stream, but the TokenizerReader can still be
78    * applied to a new stream using any of the {@link #reset} methods.
79    */
80   @Override
reset()81   public void reset() {
82     super.reset();
83     if(comment != null) {
84       comment.reset("");
85     }
86     tokenizer.cleanup();
87   }
88 
89   @Override
close()90   public void close() throws IOException {
91     reset();
92     super.close();
93   }
94 
95   /**
96    * Get the tokenizer of the reader.
97    *
98    * @return Tokenizer
99    */
getTokenizer()100   public Tokenizer getTokenizer() {
101     return tokenizer;
102   }
103 }