1 /* ANTLRParser.h
2  *
3  * Define the generic ANTLRParser superclass, which is subclassed to
4  * define an actual parser.
5  *
6  * Before entry into this file: ANTLRTokenType must be set.
7  *
8  * SOFTWARE RIGHTS
9  *
10  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
11  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
12  * company may do whatever they wish with source code distributed with
13  * PCCTS or the code generated by PCCTS, including the incorporation of
14  * PCCTS, or its output, into commerical software.
15  *
16  * We encourage users to develop software with PCCTS.  However, we do ask
17  * that credit is given to us for developing PCCTS.  By "credit",
18  * we mean that if you incorporate our source code into one of your
19  * programs (commercial product, research project, or otherwise) that you
20  * acknowledge this fact somewhere in the documentation, research report,
21  * etc...  If you like PCCTS and have developed a nice tool with the
22  * output, please mention that you developed it using PCCTS.  In
23  * addition, we ask that this header remain intact in our source code.
24  * As long as these guidelines are kept, we expect to continue enhancing
25  * this system and expect to make other tools available as they are
26  * completed.
27  *
28  * ANTLR 1.33
29  * Terence Parr
30  * Parr Research Corporation
31  * with Purdue University and AHPCRC, University of Minnesota
32  * 1989-1998
33  */
34 
35 #ifndef APARSER_H_GATE
36 #define APARSER_H_GATE
37 
38 #include "pcctscfg.h"
39 
40 #include "pccts_stdio.h"
41 #include "pccts_setjmp.h"
42 
43 PCCTS_NAMESPACE_STD
44 
45 #include ATOKEN_H
46 #include ATOKENBUFFER_H
47 
48 #ifdef ZZCAN_GUESS
49 #ifndef ZZINF_LOOK
50 #define ZZINF_LOOK
51 #endif
52 #endif
53 
54 
55 #define NLA			(token_type[lap&(LLk-1)])/* --> next LA */
56 
57 typedef unsigned char SetWordType;
58 
59 /* Define external bit set stuff (for SetWordType) */
60 #define EXT_WORDSIZE	(sizeof(char)*8)
61 #define EXT_LOGWORDSIZE	3
62 
63            /* s y n t a c t i c  p r e d i c a t e  s t u f f */
64 
65 #ifndef zzUSER_GUESS_HOOK
66 #define zzUSER_GUESS_HOOK(seqFrozen,zzrv)
67 #endif
68 
69 #ifndef zzUSER_GUESS_DONE_HOOK
70 #define zzUSER_GUESS_DONE_HOOK(seqFrozen)
71 #endif
72 
73 /* MR14 Add zzUSER_GUESS_FAIL_HOOK and related code */
74 
75 #define zzUSER_GUESS_FAIL_HOOK_INTERNAL zzUSER_GUESS_FAIL_HOOK(SeqFrozen)
76 #ifndef zzUSER_GUESS_FAIL_HOOK
77 #define zzUSER_GUESS_FAIL_HOOK(zzGuessSeq)
78 #endif
79 
80 
81 typedef struct _zzjmp_buf {
82 			jmp_buf state;
83 		} zzjmp_buf;
84 
85 /* these need to be macros not member functions */
86 #define zzGUESS_BLOCK		ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen;
87 #define zzNON_GUESS_MODE	if ( !guessing )
88 #define zzGUESS_FAIL		guess_fail();
89 #define zzGUESS_DONE		{zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) }
90 #define zzGUESS				saveState(&zzst); \
91 							guessing = 1; \
92                             zzGuessSeqFrozen = ++zzGuessSeq; \
93 							_marker = inputTokens->mark(); \
94 							zzrv = setjmp(guess_start.state); \
95                             zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \
96 						    if ( zzrv ) zzGUESS_DONE
97 
98 #ifndef zzTRACE_RULES
99 #define zzTRACEdata
100 #else
101 #ifndef zzTRACEdata
102 #define zzTRACEdata     const ANTLRChar *zzTracePrevRuleName;
103 #endif
104 #endif
105 
106 #ifndef zzTRACEIN
107 #define zzTRACEIN(r)	zzTracePrevRuleName=traceCurrentRuleName;tracein(r);
108 #endif
109 #ifndef zzTRACEOUT
110 #define zzTRACEOUT(r)	traceout(r);traceCurrentRuleName=zzTracePrevRuleName;
111 #endif
112 
113                   /* a n t l r  p a r s e r  d e f */
114 
115 struct ANTLRParserState {
116 	/* class variables */
117 	zzjmp_buf guess_start;
118 	int guessing;
119 
120 	int inf_labase;
121 	int inf_last;
122 
123 	int dirty;
124 
125     int             traceOptionValue;       // MR10
126     int             traceGuessOptionValue;  // MR10
127     const ANTLRChar *traceCurrentRuleName;  // MR10
128     int             traceDepth;             // MR10
129 
130 };
131 
132 /* notes:
133  *
134  * multiple inheritance is a cool way to include what stuff is needed
135  * in this structure (like guess stuff).  however, i'm not convinced that
136  * multiple inheritance works correctly on all platforms.  not that
137  * much space is used--just include all possibly useful members.
138  *
139  * the class should also be a template with arguments for the lookahead
140  * depth and so on.  that way, more than one parser can be defined (as
141  * each will probably have different lookahead requirements).  however,
142  * am i sure that templates work?  no, i'm not sure.
143  *
144  * no attributes are maintained and, hence, the 'asp' variable is not
145  * needed.  $i can still be referenced, but it refers to the token
146  * associated with that rule element.  question: where are the token's
147  * stored if not on the software stack?  in local variables created
148  * and assigned to by antlr.
149  */
150 class ANTLRParser {
151 protected:
152 	/* class variables */
153 	static SetWordType bitmask[sizeof(SetWordType)*8];
154 	static char eMsgBuffer[500];
155 
156 protected:
157 	int LLk;					// number of lookahead symbols (old LL_K)
158 	int demand_look;
159 	ANTLRTokenType eofToken;			// when do I stop during resynch()s
160 	int bsetsize;           			// size of bitsets created by ANTLR in
161         								// units of SetWordType
162 
163 	ANTLRTokenBuffer *inputTokens;	//place to get input tokens
164 
165 	zzjmp_buf guess_start;		// where to jump back to upon failure
166 	int guessing;				// if guessing (using (...)? predicate)
167 
168 	// infinite lookahead stuff
169 	int can_use_inf_look;		// set by subclass (generated by ANTLR)
170 	int inf_lap;
171 	int inf_labase;
172 	int inf_last;
173 	int *_inf_line;
174 
175 	const ANTLRChar **token_tbl; // pointer to table of token type strings MR20 const
176 
177 	int dirty;					// used during demand lookahead
178 
179 	ANTLRTokenType *token_type;		// fast reference cache of token.getType()
180 //	ANTLRLightweightToken **token;	// the token with all its attributes
181 	int lap;
182 	int labase;
183 #ifdef ZZDEFER_FETCH
184 	int stillToFetch;                               // MR19 V.H. Simonis
185 #endif
186 
187 private:
188 	void fill_inf_look();
189 
190 protected:
guess_fail()191 	virtual void guess_fail() {                         // MR9 27-Sep-97 make virtual
192         traceGuessFail();                               // MR10
193         longjmp(guess_start.state, 1); }                // MR9
guess_done(ANTLRParserState * st)194 	virtual void guess_done(ANTLRParserState *st) {     // MR9 27-Sep-97 make virtual
195          restoreState(st); }                            // MR9
196 	virtual int guess(ANTLRParserState *);              // MR9 27-Sep-97 make virtual
197 	void look(int);
198     int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *,
199 			   _ANTLRTokenPtr *, SetWordType **);
200     int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *,
201 			   _ANTLRTokenPtr *, SetWordType **);
202     int _match_wsig(ANTLRTokenType);
203     int _setmatch_wsig(SetWordType *);
204     virtual void consume();
205     void resynch(SetWordType *wd,SetWordType mask);
206 	void prime_lookahead();
207 	virtual void tracein(const ANTLRChar *r);              // MR10
208 	virtual void traceout(const ANTLRChar *r);             // MR10
MODWORD(unsigned x)209 	static unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);}	// x % EXT_WORDSIZE // MR9
DIVWORD(unsigned x)210 	static unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;}	// x / EXT_WORDSIZE // MR9
211 	int set_deg(SetWordType *);
212 	int set_el(ANTLRTokenType, SetWordType *);
213 	virtual void edecode(SetWordType *);				// MR1
214 	virtual void FAIL(int k, ...);					    // MR1
215     int                 traceOptionValue;                           // MR10
216     int                 traceGuessOptionValue;                      // MR10
217     const ANTLRChar     *traceCurrentRuleName;                      // MR10
218     int                 traceDepth;                                 // MR10
219     void                traceReset();                               // MR10
220     virtual void        traceGuessFail();                           // MR10
221     virtual void        traceGuessDone(const ANTLRParserState *);   // MR10
222     int                 zzGuessSeq;                                 // MR10
223 
224 public:
225 	ANTLRParser(ANTLRTokenBuffer *,
226 				int k=1,
227 				int use_inf_look=0,
228 				int demand_look=0,
229 				int bsetsize=1);
230 	virtual ~ANTLRParser();
231 
232 	virtual void init();
233 
LA(int i)234 	ANTLRTokenType LA(int i)
235 	{
236 //
237 //  MR14 demand look will always be 0 for C++ mode
238 //
239 ////	return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] :
240 ////						token_type[(lap+(i)-1)&(LLk-1)];
241 
242 // MR19 V.H. Simonis Defer fetch feature
243 
244 #ifdef ZZDEFER_FETCH
245       undeferFetch();
246 #endif
247 	  return token_type[(lap+(i)-1)&(LLk-1)];
248 	}
249 	_ANTLRTokenPtr LT(int i);
250 
setEofToken(ANTLRTokenType t)251 	void setEofToken(ANTLRTokenType t)	{ eofToken = t; }
getEofToken()252 	ANTLRTokenType getEofToken() const  { return eofToken; }    // MR14
253 
noGarbageCollectTokens()254 	void noGarbageCollectTokens()	{ inputTokens->noGarbageCollectTokens(); }
garbageCollectTokens()255 	void garbageCollectTokens()		{ inputTokens->garbageCollectTokens(); }
256 
257     virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup,
258 					 SetWordType *eset, ANTLRTokenType etok, int k);
259 	virtual void saveState(ANTLRParserState *);     // MR9 27-Sep-97 make virtual
260 	virtual void restoreState(ANTLRParserState *);  // MR9 27-Sep-97 make virtual
261 
262 	virtual void panic(const char *msg); // MR20 const
263 	static char *eMsgd(char *,int);
264 	static char *eMsg(char *,char *);
265 	static char *eMsg2(char *,char *,char *);
266 
267 	void consumeUntil(SetWordType *st);
268 	void consumeUntilToken(int t);
269 
270 	virtual int _setmatch_wdfltsig(SetWordType *tokensWanted,
271 					 ANTLRTokenType tokenTypeOfSet,
272 					 SetWordType *whatFollows);
273 	virtual int _match_wdfltsig(ANTLRTokenType tokenWanted,
274 					 SetWordType *whatFollows);
275 
276 	const ANTLRChar * parserTokenName(int tok);			// MR1
277 
278     int                 traceOptionValueDefault;        // MR11
279     int                 traceOption(int delta);         // MR11
280     int                 traceGuessOption(int delta);    // MR11
281 
282 //  MR8  5-Aug-97   S.Bochnak@microtool.com.pl
283 //  MR8             Move resynch static local variable
284 //  MR8               to class instance
285 
286     int                 syntaxErrCount;                      // MR12
getLexer()287     ANTLRTokenStream   *getLexer() const {                   // MR12
288       return inputTokens ? inputTokens->getLexer() : 0; }    // MR12
289 protected:                                              // MR8
290     int     resynchConsumed;                            // MR8
291     char    *zzFAILtext; // workarea required by zzFAIL // MR9
292     void    undeferFetch();                             // MR19 V.H. Simonis
293     int     isDeferFetchEnabled();                      // MR19 V.H. Simonis
294 };
295 
296 #define zzmatch(_t)							\
297 	if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \
298 				 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
299 
300 #define zzmatch_wsig(_t,handler)						\
301 	if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
302 
303 #define zzsetmatch(_ts)							\
304 	if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \
305 				 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
306 
307 #define zzsetmatch_wsig(_ts, handler)				\
308 	if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
309 
310 /* For the dflt signal matchers, a FALSE indicates that an error occurred
311  * just like the other matchers, but in this case, the routine has already
312  * recovered--we do NOT want to consume another token.  However, when
313  * the match was successful, we do want to consume hence _signal=0 so that
314  * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;"
315  * preamble.
316  */
317 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \
318 	if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \
319 		_signal = MismatchedToken;
320 
321 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \
322 	if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken;
323 
324 
325 //  MR1  10-Apr-97 	zzfailed_pred() macro does not backtrack
326 //  MR1			  in guess mode.
327 //  MR1			Identification and correction due to J. Lilley
328 
329 #ifndef zzfailed_pred
330 #define zzfailed_pred(_p) \
331   if (guessing) { \
332     zzGUESS_FAIL; \
333   } else { \
334     fprintf(stdout,"line %d: semantic error; failed predicate: '%s'\n", \
335 	LT(1)->getLine(), _p); \
336   }
337 #endif
338 
339 #define zzRULE \
340 		SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0;	\
341 		_ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)"";	\
342 		int zzErrk=1,zzpf=0; \
343         zzTRACEdata \
344 		ANTLRChar *zzMissText=(ANTLRChar *)"";
345 
346 #endif
347 
348         /* S t a n d a r d  E x c e p t i o n  S i g n a l s */
349 
350 #define NoSignal			0
351 #define MismatchedToken		1
352 #define NoViableAlt			2
353 #define NoSemViableAlt		3
354 
355 /* MR7  Allow more control over signalling                                  */
356 /*        by adding "Unwind" and "SetSignal"                                */
357 
358 #define Unwind              4
359 #define setSignal(newValue) *_retsignal=_signal=(newValue)
360 #define suppressSignal       *_retsignal=_signal=0
361 #define exportSignal        *_retsignal=_signal
362