1 #ifndef INC_CharScanner_hpp__
2 #define INC_CharScanner_hpp__
3 
4 /**
5  * <b>SOFTWARE RIGHTS</b>
6  * <p>
7  * ANTLR 2.6.0 MageLang Insitute, 1999
8  * <p>
9  * $Id: CharScanner.hpp,v 1.2 2003/05/29 06:31:42 ronniemaor Exp $
10  * <p>
11  * We reserve no legal rights to the ANTLR--it is fully in the
12  * public domain. An individual or company may do whatever
13  * they wish with source code distributed with ANTLR or the
14  * code generated by ANTLR, including the incorporation of
15  * ANTLR, or its output, into commerical software.
16  * <p>
17  * We encourage users to develop software with ANTLR. However,
18  * we do ask that credit is given to us for developing
19  * ANTLR. By "credit", we mean that if you use ANTLR or
20  * incorporate any source code into one of your programs
21  * (commercial product, research project, or otherwise) that
22  * you acknowledge this fact somewhere in the documentation,
23  * research report, etc... If you like ANTLR and have
24  * developed a nice tool with the output, please mention that
25  * you developed it using ANTLR. In addition, we ask that the
26  * headers remain intact in our source code. As long as these
27  * guidelines are kept, we expect to continue enhancing this
28  * system and expect to make other tools available as they are
29  * completed.
30  * <p>
31  * The ANTLR gang:
32  * @version ANTLR 2.6.0 MageLang Insitute, 1999
33  * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
34  * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
35  * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
36  */
37 
38 #include "antlr/config.hpp"
39 #include "antlr/TokenStream.hpp"
40 #include "antlr/RecognitionException.hpp"
41 #include "antlr/InputBuffer.hpp"
42 #include "antlr/BitSet.hpp"
43 #include "antlr/LexerSharedInputState.hpp"
44 #include <map>
45 #include <cstdio>  // for EOF
46 
47 ANTLR_BEGIN_NAMESPACE(xparam_antlr)
48 
49 class CharScanner;
50 
51 class CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
52 private:
53 	const CharScanner* scanner;
54 public:
55 #ifdef NO_TEMPLATE_PARTS
56 	CharScannerLiteralsLess(); // not really used
57 #endif
58 	CharScannerLiteralsLess(const CharScanner* theScanner);
59 	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
60 private:
61 //	CharScannerLiteralsLess(const CharScannerLiteralsLess&);
62 //	CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
63 };
64 
65 class CharScanner : public TokenStream {
66 private:
67 #ifndef NO_STATIC_CONSTS
68 	static const int NO_CHAR = 0;
69 #else
70 	enum {
71 		NO_CHAR = 0
72 	};
73 #endif
74 
75 public:
76 #ifndef NO_STATIC_CONSTS
77 	static const int EOF_CHAR = EOF;
78 #else
79 	enum {
80 		EOF_CHAR = EOF
81 	};
82 #endif
83 
84 protected:
85 	ANTLR_USE_NAMESPACE(std)string text;		// text of current token
86 
87 	bool saveConsumedInput; // does consume() save characters?
88 
89 	typedef RefToken (*factory_type)();
90 	factory_type tokenFactory; // what kind of tokens to create?
91 
92 	bool caseSensitive;
93 	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
94 
95 	RefToken _returnToken; // used to return tokens w/o using return val
96 
97 	// Input chars
98 	LexerSharedInputState inputState;
99 
100 	/** Used during filter mode to indicate that path is desired.
101 	 *  A subsequent scan error will report an error as usual if acceptPath=true;
102 	 */
103 	bool commitToPath;
104 
105 public:
106 	CharScanner();
107 
108 	CharScanner(InputBuffer& cb);
109 	CharScanner(InputBuffer* cb);
110 
111 	CharScanner(const LexerSharedInputState& state);
112 
113 	virtual ~CharScanner();
114 
115 	virtual void append(char c);
116 
117 	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s);
118 
119 	virtual void commit();
120 
121 	virtual void consume();
122 
123 	/** Consume chars until one matches the given char */
124 	virtual void consumeUntil(int c);
125 
126 	/** Consume chars until one matches the given set */
127 	virtual void consumeUntil(const BitSet& set);
128 
129 	virtual bool getCaseSensitive() const;
130 
131 	virtual bool getCaseSensitiveLiterals() const=0;
132 
133 	virtual int getColumn() const;
134 
135 	virtual void setColumn(int c);
136 
137 	virtual bool getCommitToPath() const;
138 
139 	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const;
140 
141 	virtual InputBuffer& getInputBuffer();
142 
143 	virtual LexerSharedInputState getInputState();
144 
145 	virtual int getLine() const;
146 
147 	/** return a copy of the current text buffer */
148 	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const;
149 
150 	virtual RefToken getTokenObject() const;
151 
152 	virtual int LA(int i);
153 
154 protected:
155 	virtual RefToken makeToken(int t);
156 
157 public:
158 	virtual int mark();
159 
160 	virtual void match(int c);
161 
162 	virtual void match(const BitSet& b);
163 
164 	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s);
165 
166 	virtual void matchNot(int c);
167 
168 	virtual void matchRange(int c1, int c2);
169 
170 	virtual void newline();
171 
172 	virtual void tab();
173 
174 	void panic();
175 
176 	void panic(const ANTLR_USE_NAMESPACE(std)string& s);
177 
178 	/** Report exception errors caught in nextToken() */
179 	virtual void reportError(const RecognitionException& e);
180 
181 	/** Parser error-reporting function can be overridden in subclass */
182 	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
183 
184 	/** Parser warning-reporting function can be overridden in subclass */
185 	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
186 
187 	virtual void resetText();
188 
189 	virtual void rewind(int pos);
190 
191 	virtual void setCaseSensitive(bool t);
192 
193 	virtual void setCommitToPath(bool commit);
194 
195 	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f);
196 
197 	virtual void setInputState(LexerSharedInputState state);
198 
199 	virtual void setLine(int l);
200 
201 	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s);
202 
203 	virtual void setTokenObjectFactory(factory_type factory);
204 
205 	// Test the token text against the literals table
206 	// Override this method to perform a different literals test
207 	virtual int testLiteralsTable(int ttype) const;
208 
209 	// Test the text passed in against the literals table
210 	// Override this method to perform a different literals test
211 	// This is used primarily when you want to test a portion of
212 	// a token
213 	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text,int ttype) const;
214 
215 	// Override this method to get more specific case handling
216 	virtual int toLower(int c) const;
217 
218 protected:
219 	class Tracer {
220 	private:
221 		CharScanner* parser;
222 		ANTLR_USE_NAMESPACE(std)string text;
223 	public:
Tracer(CharScanner * p,const ANTLR_USE_NAMESPACE (std)string & t)224 		Tracer(CharScanner* p,const ANTLR_USE_NAMESPACE(std)string& t)
225 			: parser(p), text(t) { parser->traceIn(text); }
~Tracer()226 		~Tracer()
227 			{ parser->traceOut(text); }
228 	};
229 
230 	int traceDepth;
231 public:
232 	virtual void traceIndent();
233 	virtual void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname);
234 	virtual void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname);
235 
236 	/* This method is called by YourLexer::nextToken() when the lexer has
237 	*  hit EOF condition.  EOF is NOT a character.
238 	*  This method is not called if EOF is reached during
239 	*  syntactic predicate evaluation or during evaluation
240 	*  of normal lexical rules, which presumably would be
241 	*  an IOException.  This traps the "normal" EOF condition.
242 	*
243 	*  uponEOF() is called after the complete evaluation of
244 	*  the previous token and only if your parser asks
245 	*  for another token beyond that last non-EOF token.
246 	*
247 	*  You might want to throw token or char stream exceptions
248 	*  like: "Heh, premature eof" or a retry stream exception
249 	*  ("I found the end of this file, go back to referencing file").
250 	*/
251 	virtual void uponEOF();
252 };
253 
LA(int i)254 inline int CharScanner::LA(int i)
255 {
256 	if ( caseSensitive ) {
257 		return inputState->getInput().LA(i);
258 	} else {
259 		return toLower(inputState->getInput().LA(i));
260 	}
261 }
262 
263 ANTLR_END_NAMESPACE
264 
265 #endif //INC_CharScanner_hpp__
266