1 #ifndef INC_CharScanner_hpp__
2 #define INC_CharScanner_hpp__
3
4 /**
5 * <b>SOFTWARE RIGHTS</b>
6 * <p>
7 * ANTLR 2.6.0 MageLang Insitute, 1999
8 * <p>
9 * $Id: CharScanner.hpp,v 1.2 2003/05/29 06:31:42 ronniemaor Exp $
10 * <p>
11 * We reserve no legal rights to the ANTLR--it is fully in the
12 * public domain. An individual or company may do whatever
13 * they wish with source code distributed with ANTLR or the
14 * code generated by ANTLR, including the incorporation of
15 * ANTLR, or its output, into commerical software.
16 * <p>
17 * We encourage users to develop software with ANTLR. However,
18 * we do ask that credit is given to us for developing
19 * ANTLR. By "credit", we mean that if you use ANTLR or
20 * incorporate any source code into one of your programs
21 * (commercial product, research project, or otherwise) that
22 * you acknowledge this fact somewhere in the documentation,
23 * research report, etc... If you like ANTLR and have
24 * developed a nice tool with the output, please mention that
25 * you developed it using ANTLR. In addition, we ask that the
26 * headers remain intact in our source code. As long as these
27 * guidelines are kept, we expect to continue enhancing this
28 * system and expect to make other tools available as they are
29 * completed.
30 * <p>
31 * The ANTLR gang:
32 * @version ANTLR 2.6.0 MageLang Insitute, 1999
33 * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
34 * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
35 * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
36 */
37
38 #include "antlr/config.hpp"
39 #include "antlr/TokenStream.hpp"
40 #include "antlr/RecognitionException.hpp"
41 #include "antlr/InputBuffer.hpp"
42 #include "antlr/BitSet.hpp"
43 #include "antlr/LexerSharedInputState.hpp"
44 #include <map>
45 #include <cstdio> // for EOF
46
47 ANTLR_BEGIN_NAMESPACE(xparam_antlr)
48
49 class CharScanner;
50
51 class CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
52 private:
53 const CharScanner* scanner;
54 public:
55 #ifdef NO_TEMPLATE_PARTS
56 CharScannerLiteralsLess(); // not really used
57 #endif
58 CharScannerLiteralsLess(const CharScanner* theScanner);
59 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
60 private:
61 // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
62 // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
63 };
64
65 class CharScanner : public TokenStream {
66 private:
67 #ifndef NO_STATIC_CONSTS
68 static const int NO_CHAR = 0;
69 #else
70 enum {
71 NO_CHAR = 0
72 };
73 #endif
74
75 public:
76 #ifndef NO_STATIC_CONSTS
77 static const int EOF_CHAR = EOF;
78 #else
79 enum {
80 EOF_CHAR = EOF
81 };
82 #endif
83
84 protected:
85 ANTLR_USE_NAMESPACE(std)string text; // text of current token
86
87 bool saveConsumedInput; // does consume() save characters?
88
89 typedef RefToken (*factory_type)();
90 factory_type tokenFactory; // what kind of tokens to create?
91
92 bool caseSensitive;
93 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
94
95 RefToken _returnToken; // used to return tokens w/o using return val
96
97 // Input chars
98 LexerSharedInputState inputState;
99
100 /** Used during filter mode to indicate that path is desired.
101 * A subsequent scan error will report an error as usual if acceptPath=true;
102 */
103 bool commitToPath;
104
105 public:
106 CharScanner();
107
108 CharScanner(InputBuffer& cb);
109 CharScanner(InputBuffer* cb);
110
111 CharScanner(const LexerSharedInputState& state);
112
113 virtual ~CharScanner();
114
115 virtual void append(char c);
116
117 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s);
118
119 virtual void commit();
120
121 virtual void consume();
122
123 /** Consume chars until one matches the given char */
124 virtual void consumeUntil(int c);
125
126 /** Consume chars until one matches the given set */
127 virtual void consumeUntil(const BitSet& set);
128
129 virtual bool getCaseSensitive() const;
130
131 virtual bool getCaseSensitiveLiterals() const=0;
132
133 virtual int getColumn() const;
134
135 virtual void setColumn(int c);
136
137 virtual bool getCommitToPath() const;
138
139 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const;
140
141 virtual InputBuffer& getInputBuffer();
142
143 virtual LexerSharedInputState getInputState();
144
145 virtual int getLine() const;
146
147 /** return a copy of the current text buffer */
148 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const;
149
150 virtual RefToken getTokenObject() const;
151
152 virtual int LA(int i);
153
154 protected:
155 virtual RefToken makeToken(int t);
156
157 public:
158 virtual int mark();
159
160 virtual void match(int c);
161
162 virtual void match(const BitSet& b);
163
164 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s);
165
166 virtual void matchNot(int c);
167
168 virtual void matchRange(int c1, int c2);
169
170 virtual void newline();
171
172 virtual void tab();
173
174 void panic();
175
176 void panic(const ANTLR_USE_NAMESPACE(std)string& s);
177
178 /** Report exception errors caught in nextToken() */
179 virtual void reportError(const RecognitionException& e);
180
181 /** Parser error-reporting function can be overridden in subclass */
182 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
183
184 /** Parser warning-reporting function can be overridden in subclass */
185 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
186
187 virtual void resetText();
188
189 virtual void rewind(int pos);
190
191 virtual void setCaseSensitive(bool t);
192
193 virtual void setCommitToPath(bool commit);
194
195 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f);
196
197 virtual void setInputState(LexerSharedInputState state);
198
199 virtual void setLine(int l);
200
201 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s);
202
203 virtual void setTokenObjectFactory(factory_type factory);
204
205 // Test the token text against the literals table
206 // Override this method to perform a different literals test
207 virtual int testLiteralsTable(int ttype) const;
208
209 // Test the text passed in against the literals table
210 // Override this method to perform a different literals test
211 // This is used primarily when you want to test a portion of
212 // a token
213 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text,int ttype) const;
214
215 // Override this method to get more specific case handling
216 virtual int toLower(int c) const;
217
218 protected:
219 class Tracer {
220 private:
221 CharScanner* parser;
222 ANTLR_USE_NAMESPACE(std)string text;
223 public:
Tracer(CharScanner * p,const ANTLR_USE_NAMESPACE (std)string & t)224 Tracer(CharScanner* p,const ANTLR_USE_NAMESPACE(std)string& t)
225 : parser(p), text(t) { parser->traceIn(text); }
~Tracer()226 ~Tracer()
227 { parser->traceOut(text); }
228 };
229
230 int traceDepth;
231 public:
232 virtual void traceIndent();
233 virtual void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname);
234 virtual void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname);
235
236 /* This method is called by YourLexer::nextToken() when the lexer has
237 * hit EOF condition. EOF is NOT a character.
238 * This method is not called if EOF is reached during
239 * syntactic predicate evaluation or during evaluation
240 * of normal lexical rules, which presumably would be
241 * an IOException. This traps the "normal" EOF condition.
242 *
243 * uponEOF() is called after the complete evaluation of
244 * the previous token and only if your parser asks
245 * for another token beyond that last non-EOF token.
246 *
247 * You might want to throw token or char stream exceptions
248 * like: "Heh, premature eof" or a retry stream exception
249 * ("I found the end of this file, go back to referencing file").
250 */
251 virtual void uponEOF();
252 };
253
LA(int i)254 inline int CharScanner::LA(int i)
255 {
256 if ( caseSensitive ) {
257 return inputState->getInput().LA(i);
258 } else {
259 return toLower(inputState->getInput().LA(i));
260 }
261 }
262
263 ANTLR_END_NAMESPACE
264
265 #endif //INC_CharScanner_hpp__
266