1 /* ANTLRParser.h 2 * 3 * Define the generic ANTLRParser superclass, which is subclassed to 4 * define an actual parser. 5 * 6 * Before entry into this file: ANTLRTokenType must be set. 7 * 8 * SOFTWARE RIGHTS 9 * 10 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 11 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 12 * company may do whatever they wish with source code distributed with 13 * PCCTS or the code generated by PCCTS, including the incorporation of 14 * PCCTS, or its output, into commerical software. 15 * 16 * We encourage users to develop software with PCCTS. However, we do ask 17 * that credit is given to us for developing PCCTS. By "credit", 18 * we mean that if you incorporate our source code into one of your 19 * programs (commercial product, research project, or otherwise) that you 20 * acknowledge this fact somewhere in the documentation, research report, 21 * etc... If you like PCCTS and have developed a nice tool with the 22 * output, please mention that you developed it using PCCTS. In 23 * addition, we ask that this header remain intact in our source code. 24 * As long as these guidelines are kept, we expect to continue enhancing 25 * this system and expect to make other tools available as they are 26 * completed. 27 * 28 * ANTLR 1.33 29 * Terence Parr 30 * Parr Research Corporation 31 * with Purdue University and AHPCRC, University of Minnesota 32 * 1989-1998 33 */ 34 35 #ifndef APARSER_H_GATE 36 #define APARSER_H_GATE 37 38 #include "pcctscfg.h" 39 40 #include "pccts_stdio.h" 41 #include "pccts_setjmp.h" 42 43 PCCTS_NAMESPACE_STD 44 45 #include ATOKEN_H 46 #include ATOKENBUFFER_H 47 48 #ifdef ZZCAN_GUESS 49 #ifndef ZZINF_LOOK 50 #define ZZINF_LOOK 51 #endif 52 #endif 53 54 55 #define NLA (token_type[lap&(LLk-1)])/* --> next LA */ 56 57 typedef unsigned char SetWordType; 58 59 /* Define external bit set stuff (for SetWordType) */ 60 #define EXT_WORDSIZE (sizeof(char)*8) 61 #define EXT_LOGWORDSIZE 3 62 63 /* s y n t a c t i c p r e d i c a t e s t u f f */ 64 65 #ifndef zzUSER_GUESS_HOOK 66 #define zzUSER_GUESS_HOOK(seqFrozen,zzrv) 67 #endif 68 69 #ifndef zzUSER_GUESS_DONE_HOOK 70 #define zzUSER_GUESS_DONE_HOOK(seqFrozen) 71 #endif 72 73 /* MR14 Add zzUSER_GUESS_FAIL_HOOK and related code */ 74 75 #define zzUSER_GUESS_FAIL_HOOK_INTERNAL zzUSER_GUESS_FAIL_HOOK(SeqFrozen) 76 #ifndef zzUSER_GUESS_FAIL_HOOK 77 #define zzUSER_GUESS_FAIL_HOOK(zzGuessSeq) 78 #endif 79 80 81 typedef struct _zzjmp_buf { 82 jmp_buf state; 83 } zzjmp_buf; 84 85 /* these need to be macros not member functions */ 86 #define zzGUESS_BLOCK ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen; 87 #define zzNON_GUESS_MODE if ( !guessing ) 88 #define zzGUESS_FAIL guess_fail(); 89 #define zzGUESS_DONE {zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) } 90 #define zzGUESS saveState(&zzst); \ 91 guessing = 1; \ 92 zzGuessSeqFrozen = ++zzGuessSeq; \ 93 _marker = inputTokens->mark(); \ 94 zzrv = setjmp(guess_start.state); \ 95 zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \ 96 if ( zzrv ) zzGUESS_DONE 97 98 #ifndef zzTRACE_RULES 99 #define zzTRACEdata 100 #else 101 #ifndef zzTRACEdata 102 #define zzTRACEdata const ANTLRChar *zzTracePrevRuleName; 103 #endif 104 #endif 105 106 #ifndef zzTRACEIN 107 #define zzTRACEIN(r) zzTracePrevRuleName=traceCurrentRuleName;tracein(r); 108 #endif 109 #ifndef zzTRACEOUT 110 #define zzTRACEOUT(r) traceout(r);traceCurrentRuleName=zzTracePrevRuleName; 111 #endif 112 113 /* a n t l r p a r s e r d e f */ 114 115 struct ANTLRParserState { 116 /* class variables */ 117 zzjmp_buf guess_start; 118 int guessing; 119 120 int inf_labase; 121 int inf_last; 122 123 int dirty; 124 125 int traceOptionValue; // MR10 126 int traceGuessOptionValue; // MR10 127 const ANTLRChar *traceCurrentRuleName; // MR10 128 int traceDepth; // MR10 129 130 }; 131 132 /* notes: 133 * 134 * multiple inheritance is a cool way to include what stuff is needed 135 * in this structure (like guess stuff). however, i'm not convinced that 136 * multiple inheritance works correctly on all platforms. not that 137 * much space is used--just include all possibly useful members. 138 * 139 * the class should also be a template with arguments for the lookahead 140 * depth and so on. that way, more than one parser can be defined (as 141 * each will probably have different lookahead requirements). however, 142 * am i sure that templates work? no, i'm not sure. 143 * 144 * no attributes are maintained and, hence, the 'asp' variable is not 145 * needed. $i can still be referenced, but it refers to the token 146 * associated with that rule element. question: where are the token's 147 * stored if not on the software stack? in local variables created 148 * and assigned to by antlr. 149 */ 150 class ANTLRParser { 151 protected: 152 /* class variables */ 153 static SetWordType bitmask[sizeof(SetWordType)*8]; 154 static char eMsgBuffer[500]; 155 156 protected: 157 int LLk; // number of lookahead symbols (old LL_K) 158 int demand_look; 159 ANTLRTokenType eofToken; // when do I stop during resynch()s 160 int bsetsize; // size of bitsets created by ANTLR in 161 // units of SetWordType 162 163 ANTLRTokenBuffer *inputTokens; //place to get input tokens 164 165 zzjmp_buf guess_start; // where to jump back to upon failure 166 int guessing; // if guessing (using (...)? predicate) 167 168 // infinite lookahead stuff 169 int can_use_inf_look; // set by subclass (generated by ANTLR) 170 int inf_lap; 171 int inf_labase; 172 int inf_last; 173 int *_inf_line; 174 175 const ANTLRChar **token_tbl; // pointer to table of token type strings MR20 const 176 177 int dirty; // used during demand lookahead 178 179 ANTLRTokenType *token_type; // fast reference cache of token.getType() 180 // ANTLRLightweightToken **token; // the token with all its attributes 181 int lap; 182 int labase; 183 #ifdef ZZDEFER_FETCH 184 int stillToFetch; // MR19 V.H. Simonis 185 #endif 186 187 private: 188 void fill_inf_look(); 189 190 protected: guess_fail()191 virtual void guess_fail() { // MR9 27-Sep-97 make virtual 192 traceGuessFail(); // MR10 193 longjmp(guess_start.state, 1); } // MR9 guess_done(ANTLRParserState * st)194 virtual void guess_done(ANTLRParserState *st) { // MR9 27-Sep-97 make virtual 195 restoreState(st); } // MR9 196 virtual int guess(ANTLRParserState *); // MR9 27-Sep-97 make virtual 197 void look(int); 198 int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *, 199 _ANTLRTokenPtr *, SetWordType **); 200 int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *, 201 _ANTLRTokenPtr *, SetWordType **); 202 int _match_wsig(ANTLRTokenType); 203 int _setmatch_wsig(SetWordType *); 204 virtual void consume(); 205 void resynch(SetWordType *wd,SetWordType mask); 206 void prime_lookahead(); 207 virtual void tracein(const ANTLRChar *r); // MR10 208 virtual void traceout(const ANTLRChar *r); // MR10 MODWORD(unsigned x)209 static unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);} // x % EXT_WORDSIZE // MR9 DIVWORD(unsigned x)210 static unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;} // x / EXT_WORDSIZE // MR9 211 int set_deg(SetWordType *); 212 int set_el(ANTLRTokenType, SetWordType *); 213 virtual void edecode(SetWordType *); // MR1 214 virtual void FAIL(int k, ...); // MR1 215 int traceOptionValue; // MR10 216 int traceGuessOptionValue; // MR10 217 const ANTLRChar *traceCurrentRuleName; // MR10 218 int traceDepth; // MR10 219 void traceReset(); // MR10 220 virtual void traceGuessFail(); // MR10 221 virtual void traceGuessDone(const ANTLRParserState *); // MR10 222 int zzGuessSeq; // MR10 223 224 public: 225 ANTLRParser(ANTLRTokenBuffer *, 226 int k=1, 227 int use_inf_look=0, 228 int demand_look=0, 229 int bsetsize=1); 230 virtual ~ANTLRParser(); 231 232 virtual void init(); 233 LA(int i)234 ANTLRTokenType LA(int i) 235 { 236 // 237 // MR14 demand look will always be 0 for C++ mode 238 // 239 //// return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] : 240 //// token_type[(lap+(i)-1)&(LLk-1)]; 241 242 // MR19 V.H. Simonis Defer fetch feature 243 244 #ifdef ZZDEFER_FETCH 245 undeferFetch(); 246 #endif 247 return token_type[(lap+(i)-1)&(LLk-1)]; 248 } 249 _ANTLRTokenPtr LT(int i); 250 setEofToken(ANTLRTokenType t)251 void setEofToken(ANTLRTokenType t) { eofToken = t; } getEofToken()252 ANTLRTokenType getEofToken() const { return eofToken; } // MR14 253 noGarbageCollectTokens()254 void noGarbageCollectTokens() { inputTokens->noGarbageCollectTokens(); } garbageCollectTokens()255 void garbageCollectTokens() { inputTokens->garbageCollectTokens(); } 256 257 virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, 258 SetWordType *eset, ANTLRTokenType etok, int k); 259 virtual void saveState(ANTLRParserState *); // MR9 27-Sep-97 make virtual 260 virtual void restoreState(ANTLRParserState *); // MR9 27-Sep-97 make virtual 261 262 virtual void panic(const char *msg); // MR20 const 263 static char *eMsgd(char *,int); 264 static char *eMsg(char *,char *); 265 static char *eMsg2(char *,char *,char *); 266 267 void consumeUntil(SetWordType *st); 268 void consumeUntilToken(int t); 269 270 virtual int _setmatch_wdfltsig(SetWordType *tokensWanted, 271 ANTLRTokenType tokenTypeOfSet, 272 SetWordType *whatFollows); 273 virtual int _match_wdfltsig(ANTLRTokenType tokenWanted, 274 SetWordType *whatFollows); 275 276 const ANTLRChar * parserTokenName(int tok); // MR1 277 278 int traceOptionValueDefault; // MR11 279 int traceOption(int delta); // MR11 280 int traceGuessOption(int delta); // MR11 281 282 // MR8 5-Aug-97 S.Bochnak@microtool.com.pl 283 // MR8 Move resynch static local variable 284 // MR8 to class instance 285 286 int syntaxErrCount; // MR12 getLexer()287 ANTLRTokenStream *getLexer() const { // MR12 288 return inputTokens ? inputTokens->getLexer() : 0; } // MR12 289 protected: // MR8 290 int resynchConsumed; // MR8 291 char *zzFAILtext; // workarea required by zzFAIL // MR9 292 void undeferFetch(); // MR19 V.H. Simonis 293 int isDeferFetchEnabled(); // MR19 V.H. Simonis 294 }; 295 296 #define zzmatch(_t) \ 297 if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \ 298 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail; 299 300 #define zzmatch_wsig(_t,handler) \ 301 if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} 302 303 #define zzsetmatch(_ts) \ 304 if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \ 305 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail; 306 307 #define zzsetmatch_wsig(_ts, handler) \ 308 if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} 309 310 /* For the dflt signal matchers, a FALSE indicates that an error occurred 311 * just like the other matchers, but in this case, the routine has already 312 * recovered--we do NOT want to consume another token. However, when 313 * the match was successful, we do want to consume hence _signal=0 so that 314 * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;" 315 * preamble. 316 */ 317 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \ 318 if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \ 319 _signal = MismatchedToken; 320 321 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \ 322 if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken; 323 324 325 // MR1 10-Apr-97 zzfailed_pred() macro does not backtrack 326 // MR1 in guess mode. 327 // MR1 Identification and correction due to J. Lilley 328 329 #ifndef zzfailed_pred 330 #define zzfailed_pred(_p) \ 331 if (guessing) { \ 332 zzGUESS_FAIL; \ 333 } else { \ 334 fprintf(stdout,"line %d: semantic error; failed predicate: '%s'\n", \ 335 LT(1)->getLine(), _p); \ 336 } 337 #endif 338 339 #define zzRULE \ 340 SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0; \ 341 _ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)""; \ 342 int zzErrk=1,zzpf=0; \ 343 zzTRACEdata \ 344 ANTLRChar *zzMissText=(ANTLRChar *)""; 345 346 #endif 347 348 /* S t a n d a r d E x c e p t i o n S i g n a l s */ 349 350 #define NoSignal 0 351 #define MismatchedToken 1 352 #define NoViableAlt 2 353 #define NoSemViableAlt 3 354 355 /* MR7 Allow more control over signalling */ 356 /* by adding "Unwind" and "SetSignal" */ 357 358 #define Unwind 4 359 #define setSignal(newValue) *_retsignal=_signal=(newValue) 360 #define suppressSignal *_retsignal=_signal=0 361 #define exportSignal *_retsignal=_signal 362