1 /* 2 ** This file contains all sources (including headers) to the LEMON 3 ** LALR(1) parser generator. The sources have been combined into a 4 ** single file to make it easy to include LEMON in the source tree 5 ** and Makefile of another program. 6 ** 7 ** The author of this program disclaims copyright. 8 */ 9 #include <stdio.h> 10 #include <stdarg.h> 11 #include <string.h> 12 #include <ctype.h> 13 #include <stdlib.h> 14 #include <assert.h> 15 16 #ifndef __WIN32__ 17 # if defined(_WIN32) || defined(WIN32) 18 # define __WIN32__ 19 # endif 20 #endif 21 22 #ifdef __WIN32__ 23 extern int access(); 24 #else 25 #include <unistd.h> 26 #endif 27 28 /* #define PRIVATE static */ 29 #define PRIVATE 30 31 #ifdef TEST 32 #define MAXRHS 5 /* Set low to exercise exception code */ 33 #else 34 #define MAXRHS 1000 35 #endif 36 37 static int showPrecedenceConflict = 0; 38 static const char **made_files = NULL; 39 static int made_files_count = 0; 40 static int successful_exit = 0; 41 static void LemonAtExit(void) 42 { 43 /* if we failed, delete (most) files we made, to unconfuse build tools. */ 44 int i; 45 for (i = 0; i < made_files_count; i++) { 46 if (!successful_exit) { 47 remove(made_files[i]); 48 } 49 } 50 free(made_files); 51 made_files_count = 0; 52 made_files = NULL; 53 } 54 55 static char *msort(char*,char**,int(*)(const char*,const char*)); 56 57 /* 58 ** Compilers are getting increasingly pedantic about type conversions 59 ** as C evolves ever closer to Ada.... To work around the latest problems 60 ** we have to define the following variant of strlen(). 61 */ 62 #define lemonStrlen(X) ((int)strlen(X)) 63 64 /* a few forward declarations... */ 65 struct rule; 66 struct lemon; 67 struct action; 68 69 static struct action *Action_new(void); 70 static struct action *Action_sort(struct action *); 71 72 /********** From the file "build.h" ************************************/ 73 void FindRulePrecedences(); 74 void FindFirstSets(); 75 void FindStates(); 76 void FindLinks(); 77 void FindFollowSets(); 78 void FindActions(); 79 80 /********* From the file "configlist.h" *********************************/ 81 void Configlist_init(void); 82 struct config *Configlist_add(struct rule *, int); 83 struct config *Configlist_addbasis(struct rule *, int); 84 void Configlist_closure(struct lemon *); 85 void Configlist_sort(void); 86 void Configlist_sortbasis(void); 87 struct config *Configlist_return(void); 88 struct config *Configlist_basis(void); 89 void Configlist_eat(struct config *); 90 void Configlist_reset(void); 91 92 /********* From the file "error.h" ***************************************/ 93 void ErrorMsg(const char *, int,const char *, ...); 94 95 /****** From the file "option.h" ******************************************/ 96 enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, 97 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; 98 struct s_options { 99 enum option_type type; 100 const char *label; 101 char *arg; 102 const char *message; 103 }; 104 int OptInit(char**,struct s_options*,FILE*); 105 int OptNArgs(void); 106 char *OptArg(int); 107 void OptErr(int); 108 void OptPrint(void); 109 110 /******** From the file "parse.h" *****************************************/ 111 void Parse(struct lemon *lemp); 112 113 /********* From the file "plink.h" ***************************************/ 114 struct plink *Plink_new(void); 115 void Plink_add(struct plink **, struct config *); 116 void Plink_copy(struct plink **, struct plink *); 117 void Plink_delete(struct plink *); 118 119 /********** From the file "report.h" *************************************/ 120 void Reprint(struct lemon *); 121 void ReportOutput(struct lemon *); 122 void ReportTable(struct lemon *, int); 123 void ReportHeader(struct lemon *); 124 void CompressTables(struct lemon *); 125 void ResortStates(struct lemon *); 126 127 /********** From the file "set.h" ****************************************/ 128 void SetSize(int); /* All sets will be of size N */ 129 char *SetNew(void); /* A new set for element 0..N */ 130 void SetFree(char*); /* Deallocate a set */ 131 132 char *SetNew(void); /* A new set for element 0..N */ 133 int SetAdd(char*,int); /* Add element to a set */ 134 int SetUnion(char *,char *); /* A <- A U B, thru element N */ 135 #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ 136 137 /********** From the file "struct.h" *************************************/ 138 /* 139 ** Principal data structures for the LEMON parser generator. 140 */ 141 142 typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; 143 144 /* Symbols (terminals and nonterminals) of the grammar are stored 145 ** in the following: */ 146 enum symbol_type { 147 TERMINAL, 148 NONTERMINAL, 149 MULTITERMINAL 150 }; 151 enum e_assoc { 152 LEFT, 153 RIGHT, 154 NONE, 155 UNK 156 }; 157 struct symbol { 158 const char *name; /* Name of the symbol */ 159 int index; /* Index number for this symbol */ 160 enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ 161 struct rule *rule; /* Linked list of rules of this (if an NT) */ 162 struct symbol *fallback; /* fallback token in case this token doesn't parse */ 163 int prec; /* Precedence if defined (-1 otherwise) */ 164 enum e_assoc assoc; /* Associativity if precedence is defined */ 165 char *firstset; /* First-set for all rules of this symbol */ 166 Boolean lambda; /* True if NT and can generate an empty string */ 167 int useCnt; /* Number of times used */ 168 char *destructor; /* Code which executes whenever this symbol is 169 ** popped from the stack during error processing */ 170 int destLineno; /* Line number for start of destructor */ 171 char *datatype; /* The data type of information held by this 172 ** object. Only used if type==NONTERMINAL */ 173 int dtnum; /* The data type number. In the parser, the value 174 ** stack is a union. The .yy%d element of this 175 ** union is the correct data type for this object */ 176 /* The following fields are used by MULTITERMINALs only */ 177 int nsubsym; /* Number of constituent symbols in the MULTI */ 178 struct symbol **subsym; /* Array of constituent symbols */ 179 }; 180 181 /* Each production rule in the grammar is stored in the following 182 ** structure. */ 183 struct rule { 184 struct symbol *lhs; /* Left-hand side of the rule */ 185 const char *lhsalias; /* Alias for the LHS (NULL if none) */ 186 int lhsStart; /* True if left-hand side is the start symbol */ 187 int ruleline; /* Line number for the rule */ 188 int nrhs; /* Number of RHS symbols */ 189 struct symbol **rhs; /* The RHS symbols */ 190 const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ 191 int line; /* Line number at which code begins */ 192 const char *code; /* The code executed when this rule is reduced */ 193 struct symbol *precsym; /* Precedence symbol for this rule */ 194 int index; /* An index number for this rule */ 195 Boolean canReduce; /* True if this rule is ever reduced */ 196 struct rule *nextlhs; /* Next rule with the same LHS */ 197 struct rule *next; /* Next rule in the global list */ 198 }; 199 200 /* A configuration is a production rule of the grammar together with 201 ** a mark (dot) showing how much of that rule has been processed so far. 202 ** Configurations also contain a follow-set which is a list of terminal 203 ** symbols which are allowed to immediately follow the end of the rule. 204 ** Every configuration is recorded as an instance of the following: */ 205 enum cfgstatus { 206 COMPLETE, 207 INCOMPLETE 208 }; 209 struct config { 210 struct rule *rp; /* The rule upon which the configuration is based */ 211 int dot; /* The parse point */ 212 char *fws; /* Follow-set for this configuration only */ 213 struct plink *fplp; /* Follow-set forward propagation links */ 214 struct plink *bplp; /* Follow-set backwards propagation links */ 215 struct state *stp; /* Pointer to state which contains this */ 216 enum cfgstatus status; /* used during followset and shift computations */ 217 struct config *next; /* Next configuration in the state */ 218 struct config *bp; /* The next basis configuration */ 219 }; 220 221 enum e_action { 222 SHIFT, 223 ACCEPT, 224 REDUCE, 225 ERROR, 226 SSCONFLICT, /* A shift/shift conflict */ 227 SRCONFLICT, /* Was a reduce, but part of a conflict */ 228 RRCONFLICT, /* Was a reduce, but part of a conflict */ 229 SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ 230 RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ 231 NOT_USED /* Deleted by compression */ 232 }; 233 234 /* Every shift or reduce operation is stored as one of the following */ 235 struct action { 236 struct symbol *sp; /* The look-ahead symbol */ 237 enum e_action type; 238 union { 239 struct state *stp; /* The new state, if a shift */ 240 struct rule *rp; /* The rule, if a reduce */ 241 } x; 242 struct action *next; /* Next action for this state */ 243 struct action *collide; /* Next action with the same hash */ 244 }; 245 246 /* Each state of the generated parser's finite state machine 247 ** is encoded as an instance of the following structure. */ 248 struct state { 249 struct config *bp; /* The basis configurations for this state */ 250 struct config *cfp; /* All configurations in this set */ 251 int statenum; /* Sequential number for this state */ 252 struct action *ap; /* Array of actions for this state */ 253 int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ 254 int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ 255 int iDflt; /* Default action */ 256 }; 257 #define NO_OFFSET (-2147483647) 258 259 /* A followset propagation link indicates that the contents of one 260 ** configuration followset should be propagated to another whenever 261 ** the first changes. */ 262 struct plink { 263 struct config *cfp; /* The configuration to which linked */ 264 struct plink *next; /* The next propagate link */ 265 }; 266 267 /* The state vector for the entire parser generator is recorded as 268 ** follows. (LEMON uses no global variables and makes little use of 269 ** static variables. Fields in the following structure can be thought 270 ** of as begin global variables in the program.) */ 271 struct lemon { 272 struct state **sorted; /* Table of states sorted by state number */ 273 struct rule *rule; /* List of all rules */ 274 int nstate; /* Number of states */ 275 int nrule; /* Number of rules */ 276 int nsymbol; /* Number of terminal and nonterminal symbols */ 277 int nterminal; /* Number of terminal symbols */ 278 struct symbol **symbols; /* Sorted array of pointers to symbols */ 279 int errorcnt; /* Number of errors */ 280 struct symbol *errsym; /* The error symbol */ 281 struct symbol *wildcard; /* Token that matches anything */ 282 char *name; /* Name of the generated parser */ 283 char *arg; /* Declaration of the 3th argument to parser */ 284 char *tokentype; /* Type of terminal symbols in the parser stack */ 285 char *vartype; /* The default type of non-terminal symbols */ 286 char *start; /* Name of the start symbol for the grammar */ 287 char *stacksize; /* Size of the parser stack */ 288 char *include; /* Code to put at the start of the C file */ 289 char *error; /* Code to execute when an error is seen */ 290 char *overflow; /* Code to execute on a stack overflow */ 291 char *failure; /* Code to execute on parser failure */ 292 char *accept; /* Code to execute when the parser excepts */ 293 char *extracode; /* Code appended to the generated file */ 294 char *tokendest; /* Code to execute to destroy token data */ 295 char *vardest; /* Code for the default non-terminal destructor */ 296 char *filename; /* Name of the input file */ 297 char *outname; /* Name of the current output file */ 298 char *tokenprefix; /* A prefix added to token names in the .h file */ 299 int nconflict; /* Number of parsing conflicts */ 300 int tablesize; /* Size of the parse tables */ 301 int basisflag; /* Print only basis configurations */ 302 int has_fallback; /* True if any %fallback is seen in the grammar */ 303 int nolinenosflag; /* True if #line statements should not be printed */ 304 char *argv0; /* Name of the program */ 305 }; 306 307 #define MemoryCheck(X) if((X)==0){ \ 308 extern void memory_error(); \ 309 memory_error(); \ 310 } 311 312 /**************** From the file "table.h" *********************************/ 313 /* 314 ** All code in this file has been automatically generated 315 ** from a specification in the file 316 ** "table.q" 317 ** by the associative array code building program "aagen". 318 ** Do not edit this file! Instead, edit the specification 319 ** file, then rerun aagen. 320 */ 321 /* 322 ** Code for processing tables in the LEMON parser generator. 323 */ 324 /* Routines for handling a strings */ 325 326 const char *Strsafe(const char *); 327 328 void Strsafe_init(void); 329 int Strsafe_insert(const char *); 330 const char *Strsafe_find(const char *); 331 332 /* Routines for handling symbols of the grammar */ 333 334 struct symbol *Symbol_new(const char *); 335 int Symbolcmpp(const void *, const void *); 336 void Symbol_init(void); 337 int Symbol_insert(struct symbol *, const char *); 338 struct symbol *Symbol_find(const char *); 339 struct symbol *Symbol_Nth(int); 340 int Symbol_count(void); 341 struct symbol **Symbol_arrayof(void); 342 343 /* Routines to manage the state table */ 344 345 int Configcmp(const char *, const char *); 346 struct state *State_new(void); 347 void State_init(void); 348 int State_insert(struct state *, struct config *); 349 struct state *State_find(struct config *); 350 struct state **State_arrayof(/* */); 351 352 /* Routines used for efficiency in Configlist_add */ 353 354 void Configtable_init(void); 355 int Configtable_insert(struct config *); 356 struct config *Configtable_find(struct config *); 357 void Configtable_clear(int(*)(struct config *)); 358 359 /****************** From the file "action.c" *******************************/ 360 /* 361 ** Routines processing parser actions in the LEMON parser generator. 362 */ 363 364 /* Allocate a new parser action */ 365 static struct action *Action_new(void){ 366 static struct action *freelist = 0; 367 struct action *newaction; 368 369 if( freelist==0 ){ 370 int i; 371 int amt = 100; 372 freelist = (struct action *)calloc(amt, sizeof(struct action)); 373 if( freelist==0 ){ 374 fprintf(stderr,"Unable to allocate memory for a new parser action."); 375 exit(1); 376 } 377 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; 378 freelist[amt-1].next = 0; 379 } 380 newaction = freelist; 381 freelist = freelist->next; 382 return newaction; 383 } 384 385 /* Compare two actions for sorting purposes. Return negative, zero, or 386 ** positive if the first action is less than, equal to, or greater than 387 ** the first 388 */ 389 static int actioncmp( 390 struct action *ap1, 391 struct action *ap2 392 ){ 393 int rc; 394 rc = ap1->sp->index - ap2->sp->index; 395 if( rc==0 ){ 396 rc = (int)ap1->type - (int)ap2->type; 397 } 398 if( rc==0 && ap1->type==REDUCE ){ 399 rc = ap1->x.rp->index - ap2->x.rp->index; 400 } 401 if( rc==0 ){ 402 rc = (int) (ap2 - ap1); 403 } 404 return rc; 405 } 406 407 /* Sort parser actions */ 408 static struct action *Action_sort( 409 struct action *ap 410 ){ 411 ap = (struct action *)msort((char *)ap,(char **)&ap->next, 412 (int(*)(const char*,const char*))actioncmp); 413 return ap; 414 } 415 416 void Action_add( 417 struct action **app, 418 enum e_action type, 419 struct symbol *sp, 420 char *arg 421 ){ 422 struct action *newaction; 423 newaction = Action_new(); 424 newaction->next = *app; 425 *app = newaction; 426 newaction->type = type; 427 newaction->sp = sp; 428 if( type==SHIFT ){ 429 newaction->x.stp = (struct state *)arg; 430 }else{ 431 newaction->x.rp = (struct rule *)arg; 432 } 433 } 434 /********************** New code to implement the "acttab" module ***********/ 435 /* 436 ** This module implements routines use to construct the yy_action[] table. 437 */ 438 439 /* 440 ** The state of the yy_action table under construction is an instance of 441 ** the following structure. 442 ** 443 ** The yy_action table maps the pair (state_number, lookahead) into an 444 ** action_number. The table is an array of integers pairs. The state_number 445 ** determines an initial offset into the yy_action array. The lookahead 446 ** value is then added to this initial offset to get an index X into the 447 ** yy_action array. If the aAction[X].lookahead equals the value of the 448 ** of the lookahead input, then the value of the action_number output is 449 ** aAction[X].action. If the lookaheads do not match then the 450 ** default action for the state_number is returned. 451 ** 452 ** All actions associated with a single state_number are first entered 453 ** into aLookahead[] using multiple calls to acttab_action(). Then the 454 ** actions for that single state_number are placed into the aAction[] 455 ** array with a single call to acttab_insert(). The acttab_insert() call 456 ** also resets the aLookahead[] array in preparation for the next 457 ** state number. 458 */ 459 struct lookahead_action { 460 int lookahead; /* Value of the lookahead token */ 461 int action; /* Action to take on the given lookahead */ 462 }; 463 typedef struct acttab acttab; 464 struct acttab { 465 int nAction; /* Number of used slots in aAction[] */ 466 int nActionAlloc; /* Slots allocated for aAction[] */ 467 struct lookahead_action 468 *aAction, /* The yy_action[] table under construction */ 469 *aLookahead; /* A single new transaction set */ 470 int mnLookahead; /* Minimum aLookahead[].lookahead */ 471 int mnAction; /* Action associated with mnLookahead */ 472 int mxLookahead; /* Maximum aLookahead[].lookahead */ 473 int nLookahead; /* Used slots in aLookahead[] */ 474 int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ 475 }; 476 477 /* Return the number of entries in the yy_action table */ 478 #define acttab_size(X) ((X)->nAction) 479 480 /* The value for the N-th entry in yy_action */ 481 #define acttab_yyaction(X,N) ((X)->aAction[N].action) 482 483 /* The value for the N-th entry in yy_lookahead */ 484 #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) 485 486 /* Free all memory associated with the given acttab */ 487 void acttab_free(acttab *p){ 488 free( p->aAction ); 489 free( p->aLookahead ); 490 free( p ); 491 } 492 493 /* Allocate a new acttab structure */ 494 acttab *acttab_alloc(void){ 495 acttab *p = (acttab *) calloc( 1, sizeof(*p) ); 496 if( p==0 ){ 497 fprintf(stderr,"Unable to allocate memory for a new acttab."); 498 exit(1); 499 } 500 memset(p, 0, sizeof(*p)); 501 return p; 502 } 503 504 /* Add a new action to the current transaction set. 505 ** 506 ** This routine is called once for each lookahead for a particular 507 ** state. 508 */ 509 void acttab_action(acttab *p, int lookahead, int action){ 510 if( p->nLookahead>=p->nLookaheadAlloc ){ 511 p->nLookaheadAlloc += 25; 512 p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, 513 sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); 514 if( p->aLookahead==0 ){ 515 fprintf(stderr,"malloc failed\n"); 516 exit(1); 517 } 518 } 519 if( p->nLookahead==0 ){ 520 p->mxLookahead = lookahead; 521 p->mnLookahead = lookahead; 522 p->mnAction = action; 523 }else{ 524 if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead; 525 if( p->mnLookahead>lookahead ){ 526 p->mnLookahead = lookahead; 527 p->mnAction = action; 528 } 529 } 530 p->aLookahead[p->nLookahead].lookahead = lookahead; 531 p->aLookahead[p->nLookahead].action = action; 532 p->nLookahead++; 533 } 534 535 /* 536 ** Add the transaction set built up with prior calls to acttab_action() 537 ** into the current action table. Then reset the transaction set back 538 ** to an empty set in preparation for a new round of acttab_action() calls. 539 ** 540 ** Return the offset into the action table of the new transaction. 541 */ 542 int acttab_insert(acttab *p){ 543 int i, j, k, n; 544 assert( p->nLookahead>0 ); 545 546 /* Make sure we have enough space to hold the expanded action table 547 ** in the worst case. The worst case occurs if the transaction set 548 ** must be appended to the current action table 549 */ 550 n = p->mxLookahead + 1; 551 if( p->nAction + n >= p->nActionAlloc ){ 552 int oldAlloc = p->nActionAlloc; 553 p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; 554 p->aAction = (struct lookahead_action *) realloc( p->aAction, 555 sizeof(p->aAction[0])*p->nActionAlloc); 556 if( p->aAction==0 ){ 557 fprintf(stderr,"malloc failed\n"); 558 exit(1); 559 } 560 for(i=oldAlloc; i<p->nActionAlloc; i++){ 561 p->aAction[i].lookahead = -1; 562 p->aAction[i].action = -1; 563 } 564 } 565 566 /* Scan the existing action table looking for an offset that is a 567 ** duplicate of the current transaction set. Fall out of the loop 568 ** if and when the duplicate is found. 569 ** 570 ** i is the index in p->aAction[] where p->mnLookahead is inserted. 571 */ 572 for(i=p->nAction-1; i>=0; i--){ 573 if( p->aAction[i].lookahead==p->mnLookahead ){ 574 /* All lookaheads and actions in the aLookahead[] transaction 575 ** must match against the candidate aAction[i] entry. */ 576 if( p->aAction[i].action!=p->mnAction ) continue; 577 for(j=0; j<p->nLookahead; j++){ 578 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 579 if( k<0 || k>=p->nAction ) break; 580 if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; 581 if( p->aLookahead[j].action!=p->aAction[k].action ) break; 582 } 583 if( j<p->nLookahead ) continue; 584 585 /* No possible lookahead value that is not in the aLookahead[] 586 ** transaction is allowed to match aAction[i] */ 587 n = 0; 588 for(j=0; j<p->nAction; j++){ 589 if( p->aAction[j].lookahead<0 ) continue; 590 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; 591 } 592 if( n==p->nLookahead ){ 593 break; /* An exact match is found at offset i */ 594 } 595 } 596 } 597 598 /* If no existing offsets exactly match the current transaction, find an 599 ** an empty offset in the aAction[] table in which we can add the 600 ** aLookahead[] transaction. 601 */ 602 if( i<0 ){ 603 /* Look for holes in the aAction[] table that fit the current 604 ** aLookahead[] transaction. Leave i set to the offset of the hole. 605 ** If no holes are found, i is left at p->nAction, which means the 606 ** transaction will be appended. */ 607 for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){ 608 if( p->aAction[i].lookahead<0 ){ 609 for(j=0; j<p->nLookahead; j++){ 610 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 611 if( k<0 ) break; 612 if( p->aAction[k].lookahead>=0 ) break; 613 } 614 if( j<p->nLookahead ) continue; 615 for(j=0; j<p->nAction; j++){ 616 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; 617 } 618 if( j==p->nAction ){ 619 break; /* Fits in empty slots */ 620 } 621 } 622 } 623 } 624 /* Insert transaction set at index i. */ 625 for(j=0; j<p->nLookahead; j++){ 626 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 627 p->aAction[k] = p->aLookahead[j]; 628 if( k>=p->nAction ) p->nAction = k+1; 629 } 630 p->nLookahead = 0; 631 632 /* Return the offset that is added to the lookahead in order to get the 633 ** index into yy_action of the action */ 634 return i - p->mnLookahead; 635 } 636 637 /********************** From the file "build.c" *****************************/ 638 /* 639 ** Routines to construction the finite state machine for the LEMON 640 ** parser generator. 641 */ 642 643 /* Find a precedence symbol of every rule in the grammar. 644 ** 645 ** Those rules which have a precedence symbol coded in the input 646 ** grammar using the "[symbol]" construct will already have the 647 ** rp->precsym field filled. Other rules take as their precedence 648 ** symbol the first RHS symbol with a defined precedence. If there 649 ** are not RHS symbols with a defined precedence, the precedence 650 ** symbol field is left blank. 651 */ 652 void FindRulePrecedences(struct lemon *xp) 653 { 654 struct rule *rp; 655 for(rp=xp->rule; rp; rp=rp->next){ 656 if( rp->precsym==0 ){ 657 int i, j; 658 for(i=0; i<rp->nrhs && rp->precsym==0; i++){ 659 struct symbol *sp = rp->rhs[i]; 660 if( sp->type==MULTITERMINAL ){ 661 for(j=0; j<sp->nsubsym; j++){ 662 if( sp->subsym[j]->prec>=0 ){ 663 rp->precsym = sp->subsym[j]; 664 break; 665 } 666 } 667 }else if( sp->prec>=0 ){ 668 rp->precsym = rp->rhs[i]; 669 } 670 } 671 } 672 } 673 return; 674 } 675 676 /* Find all nonterminals which will generate the empty string. 677 ** Then go back and compute the first sets of every nonterminal. 678 ** The first set is the set of all terminal symbols which can begin 679 ** a string generated by that nonterminal. 680 */ 681 void FindFirstSets(struct lemon *lemp) 682 { 683 int i, j; 684 struct rule *rp; 685 int progress; 686 687 for(i=0; i<lemp->nsymbol; i++){ 688 lemp->symbols[i]->lambda = LEMON_FALSE; 689 } 690 for(i=lemp->nterminal; i<lemp->nsymbol; i++){ 691 lemp->symbols[i]->firstset = SetNew(); 692 } 693 694 /* First compute all lambdas */ 695 do{ 696 progress = 0; 697 for(rp=lemp->rule; rp; rp=rp->next){ 698 if( rp->lhs->lambda ) continue; 699 for(i=0; i<rp->nrhs; i++){ 700 struct symbol *sp = rp->rhs[i]; 701 if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break; 702 } 703 if( i==rp->nrhs ){ 704 rp->lhs->lambda = LEMON_TRUE; 705 progress = 1; 706 } 707 } 708 }while( progress ); 709 710 /* Now compute all first sets */ 711 do{ 712 struct symbol *s1, *s2; 713 progress = 0; 714 for(rp=lemp->rule; rp; rp=rp->next){ 715 s1 = rp->lhs; 716 for(i=0; i<rp->nrhs; i++){ 717 s2 = rp->rhs[i]; 718 if( s2->type==TERMINAL ){ 719 progress += SetAdd(s1->firstset,s2->index); 720 break; 721 }else if( s2->type==MULTITERMINAL ){ 722 for(j=0; j<s2->nsubsym; j++){ 723 progress += SetAdd(s1->firstset,s2->subsym[j]->index); 724 } 725 break; 726 }else if( s1==s2 ){ 727 if( s1->lambda==LEMON_FALSE ) break; 728 }else{ 729 progress += SetUnion(s1->firstset,s2->firstset); 730 if( s2->lambda==LEMON_FALSE ) break; 731 } 732 } 733 } 734 }while( progress ); 735 return; 736 } 737 738 /* Compute all LR(0) states for the grammar. Links 739 ** are added to between some states so that the LR(1) follow sets 740 ** can be computed later. 741 */ 742 PRIVATE struct state *getstate(struct lemon *); /* forward reference */ 743 void FindStates(struct lemon *lemp) 744 { 745 struct symbol *sp; 746 struct rule *rp; 747 748 Configlist_init(); 749 750 /* Find the start symbol */ 751 if( lemp->start ){ 752 sp = Symbol_find(lemp->start); 753 if( sp==0 ){ 754 ErrorMsg(lemp->filename,0, 755 "The specified start symbol \"%s\" is not \ 756 in a nonterminal of the grammar. \"%s\" will be used as the start \ 757 symbol instead.",lemp->start,lemp->rule->lhs->name); 758 lemp->errorcnt++; 759 sp = lemp->rule->lhs; 760 } 761 }else{ 762 sp = lemp->rule->lhs; 763 } 764 765 /* Make sure the start symbol doesn't occur on the right-hand side of 766 ** any rule. Report an error if it does. (YACC would generate a new 767 ** start symbol in this case.) */ 768 for(rp=lemp->rule; rp; rp=rp->next){ 769 int i; 770 for(i=0; i<rp->nrhs; i++){ 771 if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ 772 ErrorMsg(lemp->filename,0, 773 "The start symbol \"%s\" occurs on the \ 774 right-hand side of a rule. This will result in a parser which \ 775 does not work properly.",sp->name); 776 lemp->errorcnt++; 777 } 778 } 779 } 780 781 /* The basis configuration set for the first state 782 ** is all rules which have the start symbol as their 783 ** left-hand side */ 784 for(rp=sp->rule; rp; rp=rp->nextlhs){ 785 struct config *newcfp; 786 rp->lhsStart = 1; 787 newcfp = Configlist_addbasis(rp,0); 788 SetAdd(newcfp->fws,0); 789 } 790 791 /* Compute the first state. All other states will be 792 ** computed automatically during the computation of the first one. 793 ** The returned pointer to the first state is not used. */ 794 (void)getstate(lemp); 795 return; 796 } 797 798 /* Return a pointer to a state which is described by the configuration 799 ** list which has been built from calls to Configlist_add. 800 */ 801 PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ 802 PRIVATE struct state *getstate(struct lemon *lemp) 803 { 804 struct config *cfp, *bp; 805 struct state *stp; 806 807 /* Extract the sorted basis of the new state. The basis was constructed 808 ** by prior calls to "Configlist_addbasis()". */ 809 Configlist_sortbasis(); 810 bp = Configlist_basis(); 811 812 /* Get a state with the same basis */ 813 stp = State_find(bp); 814 if( stp ){ 815 /* A state with the same basis already exists! Copy all the follow-set 816 ** propagation links from the state under construction into the 817 ** preexisting state, then return a pointer to the preexisting state */ 818 struct config *x, *y; 819 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ 820 Plink_copy(&y->bplp,x->bplp); 821 Plink_delete(x->fplp); 822 x->fplp = x->bplp = 0; 823 } 824 cfp = Configlist_return(); 825 Configlist_eat(cfp); 826 }else{ 827 /* This really is a new state. Construct all the details */ 828 Configlist_closure(lemp); /* Compute the configuration closure */ 829 Configlist_sort(); /* Sort the configuration closure */ 830 cfp = Configlist_return(); /* Get a pointer to the config list */ 831 stp = State_new(); /* A new state structure */ 832 MemoryCheck(stp); 833 stp->bp = bp; /* Remember the configuration basis */ 834 stp->cfp = cfp; /* Remember the configuration closure */ 835 stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ 836 stp->ap = 0; /* No actions, yet. */ 837 State_insert(stp,stp->bp); /* Add to the state table */ 838 buildshifts(lemp,stp); /* Recursively compute successor states */ 839 } 840 return stp; 841 } 842 843 /* 844 ** Return true if two symbols are the same. 845 */ 846 int same_symbol(struct symbol *a, struct symbol *b) 847 { 848 int i; 849 if( a==b ) return 1; 850 if( a->type!=MULTITERMINAL ) return 0; 851 if( b->type!=MULTITERMINAL ) return 0; 852 if( a->nsubsym!=b->nsubsym ) return 0; 853 for(i=0; i<a->nsubsym; i++){ 854 if( a->subsym[i]!=b->subsym[i] ) return 0; 855 } 856 return 1; 857 } 858 859 /* Construct all successor states to the given state. A "successor" 860 ** state is any state which can be reached by a shift action. 861 */ 862 PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) 863 { 864 struct config *cfp; /* For looping thru the config closure of "stp" */ 865 struct config *bcfp; /* For the inner loop on config closure of "stp" */ 866 struct config *newcfg; /* */ 867 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ 868 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ 869 struct state *newstp; /* A pointer to a successor state */ 870 871 /* Each configuration becomes complete after it contibutes to a successor 872 ** state. Initially, all configurations are incomplete */ 873 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; 874 875 /* Loop through all configurations of the state "stp" */ 876 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 877 if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ 878 if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ 879 Configlist_reset(); /* Reset the new config set */ 880 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ 881 882 /* For every configuration in the state "stp" which has the symbol "sp" 883 ** following its dot, add the same configuration to the basis set under 884 ** construction but with the dot shifted one symbol to the right. */ 885 for(bcfp=cfp; bcfp; bcfp=bcfp->next){ 886 if( bcfp->status==COMPLETE ) continue; /* Already used */ 887 if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ 888 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ 889 if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ 890 bcfp->status = COMPLETE; /* Mark this config as used */ 891 newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); 892 Plink_add(&newcfg->bplp,bcfp); 893 } 894 895 /* Get a pointer to the state described by the basis configuration set 896 ** constructed in the preceding loop */ 897 newstp = getstate(lemp); 898 899 /* The state "newstp" is reached from the state "stp" by a shift action 900 ** on the symbol "sp" */ 901 if( sp->type==MULTITERMINAL ){ 902 int i; 903 for(i=0; i<sp->nsubsym; i++){ 904 Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); 905 } 906 }else{ 907 Action_add(&stp->ap,SHIFT,sp,(char *)newstp); 908 } 909 } 910 } 911 912 /* 913 ** Construct the propagation links 914 */ 915 void FindLinks(struct lemon *lemp) 916 { 917 int i; 918 struct config *cfp, *other; 919 struct state *stp; 920 struct plink *plp; 921 922 /* Housekeeping detail: 923 ** Add to every propagate link a pointer back to the state to 924 ** which the link is attached. */ 925 for(i=0; i<lemp->nstate; i++){ 926 stp = lemp->sorted[i]; 927 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 928 cfp->stp = stp; 929 } 930 } 931 932 /* Convert all backlinks into forward links. Only the forward 933 ** links are used in the follow-set computation. */ 934 for(i=0; i<lemp->nstate; i++){ 935 stp = lemp->sorted[i]; 936 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 937 for(plp=cfp->bplp; plp; plp=plp->next){ 938 other = plp->cfp; 939 Plink_add(&other->fplp,cfp); 940 } 941 } 942 } 943 } 944 945 /* Compute all followsets. 946 ** 947 ** A followset is the set of all symbols which can come immediately 948 ** after a configuration. 949 */ 950 void FindFollowSets(struct lemon *lemp) 951 { 952 int i; 953 struct config *cfp; 954 struct plink *plp; 955 int progress; 956 int change; 957 958 for(i=0; i<lemp->nstate; i++){ 959 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 960 cfp->status = INCOMPLETE; 961 } 962 } 963 964 do{ 965 progress = 0; 966 for(i=0; i<lemp->nstate; i++){ 967 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 968 if( cfp->status==COMPLETE ) continue; 969 for(plp=cfp->fplp; plp; plp=plp->next){ 970 change = SetUnion(plp->cfp->fws,cfp->fws); 971 if( change ){ 972 plp->cfp->status = INCOMPLETE; 973 progress = 1; 974 } 975 } 976 cfp->status = COMPLETE; 977 } 978 } 979 }while( progress ); 980 } 981 982 static int resolve_conflict(struct action *,struct action *, struct symbol *); 983 984 /* Compute the reduce actions, and resolve conflicts. 985 */ 986 void FindActions(struct lemon *lemp) 987 { 988 int i,j; 989 struct config *cfp; 990 struct state *stp; 991 struct symbol *sp; 992 struct rule *rp; 993 994 /* Add all of the reduce actions 995 ** A reduce action is added for each element of the followset of 996 ** a configuration which has its dot at the extreme right. 997 */ 998 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */ 999 stp = lemp->sorted[i]; 1000 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ 1001 if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ 1002 for(j=0; j<lemp->nterminal; j++){ 1003 if( SetFind(cfp->fws,j) ){ 1004 /* Add a reduce action to the state "stp" which will reduce by the 1005 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ 1006 Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); 1007 } 1008 } 1009 } 1010 } 1011 } 1012 1013 /* Add the accepting token */ 1014 if( lemp->start ){ 1015 sp = Symbol_find(lemp->start); 1016 if( sp==0 ) sp = lemp->rule->lhs; 1017 }else{ 1018 sp = lemp->rule->lhs; 1019 } 1020 /* Add to the first state (which is always the starting state of the 1021 ** finite state machine) an action to ACCEPT if the lookahead is the 1022 ** start nonterminal. */ 1023 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); 1024 1025 /* Resolve conflicts */ 1026 for(i=0; i<lemp->nstate; i++){ 1027 struct action *ap, *nap; 1028 struct state *stp; 1029 stp = lemp->sorted[i]; 1030 /* assert( stp->ap ); */ 1031 stp->ap = Action_sort(stp->ap); 1032 for(ap=stp->ap; ap && ap->next; ap=ap->next){ 1033 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ 1034 /* The two actions "ap" and "nap" have the same lookahead. 1035 ** Figure out which one should be used */ 1036 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym); 1037 } 1038 } 1039 } 1040 1041 /* Report an error for each rule that can never be reduced. */ 1042 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; 1043 for(i=0; i<lemp->nstate; i++){ 1044 struct action *ap; 1045 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ 1046 if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; 1047 } 1048 } 1049 for(rp=lemp->rule; rp; rp=rp->next){ 1050 if( rp->canReduce ) continue; 1051 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); 1052 lemp->errorcnt++; 1053 } 1054 } 1055 1056 /* Resolve a conflict between the two given actions. If the 1057 ** conflict can't be resolved, return non-zero. 1058 ** 1059 ** NO LONGER TRUE: 1060 ** To resolve a conflict, first look to see if either action 1061 ** is on an error rule. In that case, take the action which 1062 ** is not associated with the error rule. If neither or both 1063 ** actions are associated with an error rule, then try to 1064 ** use precedence to resolve the conflict. 1065 ** 1066 ** If either action is a SHIFT, then it must be apx. This 1067 ** function won't work if apx->type==REDUCE and apy->type==SHIFT. 1068 */ 1069 static int resolve_conflict( 1070 struct action *apx, 1071 struct action *apy, 1072 struct symbol *errsym /* The error symbol (if defined. NULL otherwise) */ 1073 ){ 1074 struct symbol *spx, *spy; 1075 int errcnt = 0; 1076 assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ 1077 if( apx->type==SHIFT && apy->type==SHIFT ){ 1078 apy->type = SSCONFLICT; 1079 errcnt++; 1080 } 1081 if( apx->type==SHIFT && apy->type==REDUCE ){ 1082 spx = apx->sp; 1083 spy = apy->x.rp->precsym; 1084 if( spy==0 || spx->prec<0 || spy->prec<0 ){ 1085 /* Not enough precedence information. */ 1086 apy->type = SRCONFLICT; 1087 errcnt++; 1088 }else if( spx->prec>spy->prec ){ /* higher precedence wins */ 1089 apy->type = RD_RESOLVED; 1090 }else if( spx->prec<spy->prec ){ 1091 apx->type = SH_RESOLVED; 1092 }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ 1093 apy->type = RD_RESOLVED; /* associativity */ 1094 }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ 1095 apx->type = SH_RESOLVED; 1096 }else{ 1097 assert( spx->prec==spy->prec && spx->assoc==NONE ); 1098 apy->type = SRCONFLICT; 1099 errcnt++; 1100 } 1101 }else if( apx->type==REDUCE && apy->type==REDUCE ){ 1102 spx = apx->x.rp->precsym; 1103 spy = apy->x.rp->precsym; 1104 if( spx==0 || spy==0 || spx->prec<0 || 1105 spy->prec<0 || spx->prec==spy->prec ){ 1106 apy->type = RRCONFLICT; 1107 errcnt++; 1108 }else if( spx->prec>spy->prec ){ 1109 apy->type = RD_RESOLVED; 1110 }else if( spx->prec<spy->prec ){ 1111 apx->type = RD_RESOLVED; 1112 } 1113 }else{ 1114 assert( 1115 apx->type==SH_RESOLVED || 1116 apx->type==RD_RESOLVED || 1117 apx->type==SSCONFLICT || 1118 apx->type==SRCONFLICT || 1119 apx->type==RRCONFLICT || 1120 apy->type==SH_RESOLVED || 1121 apy->type==RD_RESOLVED || 1122 apy->type==SSCONFLICT || 1123 apy->type==SRCONFLICT || 1124 apy->type==RRCONFLICT 1125 ); 1126 /* The REDUCE/SHIFT case cannot happen because SHIFTs come before 1127 ** REDUCEs on the list. If we reach this point it must be because 1128 ** the parser conflict had already been resolved. */ 1129 } 1130 return errcnt; 1131 } 1132 /********************* From the file "configlist.c" *************************/ 1133 /* 1134 ** Routines to processing a configuration list and building a state 1135 ** in the LEMON parser generator. 1136 */ 1137 1138 static struct config *freelist = 0; /* List of free configurations */ 1139 static struct config *current = 0; /* Top of list of configurations */ 1140 static struct config **currentend = 0; /* Last on list of configs */ 1141 static struct config *basis = 0; /* Top of list of basis configs */ 1142 static struct config **basisend = 0; /* End of list of basis configs */ 1143 1144 /* Return a pointer to a new configuration */ 1145 PRIVATE struct config *newconfig(){ 1146 struct config *newcfg; 1147 if( freelist==0 ){ 1148 int i; 1149 int amt = 3; 1150 freelist = (struct config *)calloc( amt, sizeof(struct config) ); 1151 if( freelist==0 ){ 1152 fprintf(stderr,"Unable to allocate memory for a new configuration."); 1153 exit(1); 1154 } 1155 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; 1156 freelist[amt-1].next = 0; 1157 } 1158 newcfg = freelist; 1159 freelist = freelist->next; 1160 return newcfg; 1161 } 1162 1163 /* The configuration "old" is no longer used */ 1164 PRIVATE void deleteconfig(struct config *old) 1165 { 1166 old->next = freelist; 1167 freelist = old; 1168 } 1169 1170 /* Initialized the configuration list builder */ 1171 void Configlist_init(){ 1172 current = 0; 1173 currentend = ¤t; 1174 basis = 0; 1175 basisend = &basis; 1176 Configtable_init(); 1177 return; 1178 } 1179 1180 /* Initialized the configuration list builder */ 1181 void Configlist_reset(){ 1182 current = 0; 1183 currentend = ¤t; 1184 basis = 0; 1185 basisend = &basis; 1186 Configtable_clear(0); 1187 return; 1188 } 1189 1190 /* Add another configuration to the configuration list */ 1191 struct config *Configlist_add( 1192 struct rule *rp, /* The rule */ 1193 int dot /* Index into the RHS of the rule where the dot goes */ 1194 ){ 1195 struct config *cfp, model; 1196 1197 assert( currentend!=0 ); 1198 model.rp = rp; 1199 model.dot = dot; 1200 cfp = Configtable_find(&model); 1201 if( cfp==0 ){ 1202 cfp = newconfig(); 1203 cfp->rp = rp; 1204 cfp->dot = dot; 1205 cfp->fws = SetNew(); 1206 cfp->stp = 0; 1207 cfp->fplp = cfp->bplp = 0; 1208 cfp->next = 0; 1209 cfp->bp = 0; 1210 *currentend = cfp; 1211 currentend = &cfp->next; 1212 Configtable_insert(cfp); 1213 } 1214 return cfp; 1215 } 1216 1217 /* Add a basis configuration to the configuration list */ 1218 struct config *Configlist_addbasis(struct rule *rp, int dot) 1219 { 1220 struct config *cfp, model; 1221 1222 assert( basisend!=0 ); 1223 assert( currentend!=0 ); 1224 model.rp = rp; 1225 model.dot = dot; 1226 cfp = Configtable_find(&model); 1227 if( cfp==0 ){ 1228 cfp = newconfig(); 1229 cfp->rp = rp; 1230 cfp->dot = dot; 1231 cfp->fws = SetNew(); 1232 cfp->stp = 0; 1233 cfp->fplp = cfp->bplp = 0; 1234 cfp->next = 0; 1235 cfp->bp = 0; 1236 *currentend = cfp; 1237 currentend = &cfp->next; 1238 *basisend = cfp; 1239 basisend = &cfp->bp; 1240 Configtable_insert(cfp); 1241 } 1242 return cfp; 1243 } 1244 1245 /* Compute the closure of the configuration list */ 1246 void Configlist_closure(struct lemon *lemp) 1247 { 1248 struct config *cfp, *newcfp; 1249 struct rule *rp, *newrp; 1250 struct symbol *sp, *xsp; 1251 int i, dot; 1252 1253 assert( currentend!=0 ); 1254 for(cfp=current; cfp; cfp=cfp->next){ 1255 rp = cfp->rp; 1256 dot = cfp->dot; 1257 if( dot>=rp->nrhs ) continue; 1258 sp = rp->rhs[dot]; 1259 if( sp->type==NONTERMINAL ){ 1260 if( sp->rule==0 && sp!=lemp->errsym ){ 1261 ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", 1262 sp->name); 1263 lemp->errorcnt++; 1264 } 1265 for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ 1266 newcfp = Configlist_add(newrp,0); 1267 for(i=dot+1; i<rp->nrhs; i++){ 1268 xsp = rp->rhs[i]; 1269 if( xsp->type==TERMINAL ){ 1270 SetAdd(newcfp->fws,xsp->index); 1271 break; 1272 }else if( xsp->type==MULTITERMINAL ){ 1273 int k; 1274 for(k=0; k<xsp->nsubsym; k++){ 1275 SetAdd(newcfp->fws, xsp->subsym[k]->index); 1276 } 1277 break; 1278 }else{ 1279 SetUnion(newcfp->fws,xsp->firstset); 1280 if( xsp->lambda==LEMON_FALSE ) break; 1281 } 1282 } 1283 if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); 1284 } 1285 } 1286 } 1287 return; 1288 } 1289 1290 /* Sort the configuration list */ 1291 void Configlist_sort(){ 1292 current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp); 1293 currentend = 0; 1294 return; 1295 } 1296 1297 /* Sort the basis configuration list */ 1298 void Configlist_sortbasis(){ 1299 basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp); 1300 basisend = 0; 1301 return; 1302 } 1303 1304 /* Return a pointer to the head of the configuration list and 1305 ** reset the list */ 1306 struct config *Configlist_return(){ 1307 struct config *old; 1308 old = current; 1309 current = 0; 1310 currentend = 0; 1311 return old; 1312 } 1313 1314 /* Return a pointer to the head of the configuration list and 1315 ** reset the list */ 1316 struct config *Configlist_basis(){ 1317 struct config *old; 1318 old = basis; 1319 basis = 0; 1320 basisend = 0; 1321 return old; 1322 } 1323 1324 /* Free all elements of the given configuration list */ 1325 void Configlist_eat(struct config *cfp) 1326 { 1327 struct config *nextcfp; 1328 for(; cfp; cfp=nextcfp){ 1329 nextcfp = cfp->next; 1330 assert( cfp->fplp==0 ); 1331 assert( cfp->bplp==0 ); 1332 if( cfp->fws ) SetFree(cfp->fws); 1333 deleteconfig(cfp); 1334 } 1335 return; 1336 } 1337 /***************** From the file "error.c" *********************************/ 1338 /* 1339 ** Code for printing error message. 1340 */ 1341 1342 void ErrorMsg(const char *filename, int lineno, const char *format, ...){ 1343 va_list ap; 1344 fprintf(stderr, "%s:%d: ", filename, lineno); 1345 va_start(ap, format); 1346 vfprintf(stderr,format,ap); 1347 va_end(ap); 1348 fprintf(stderr, "\n"); 1349 } 1350 /**************** From the file "main.c" ************************************/ 1351 /* 1352 ** Main program file for the LEMON parser generator. 1353 */ 1354 1355 /* Report an out-of-memory condition and abort. This function 1356 ** is used mostly by the "MemoryCheck" macro in struct.h 1357 */ 1358 void memory_error(){ 1359 fprintf(stderr,"Out of memory. Aborting...\n"); 1360 exit(1); 1361 } 1362 1363 static int nDefine = 0; /* Number of -D options on the command line */ 1364 static char **azDefine = 0; /* Name of the -D macros */ 1365 1366 /* This routine is called with the argument to each -D command-line option. 1367 ** Add the macro defined to the azDefine array. 1368 */ 1369 static void handle_D_option(char *z){ 1370 char **paz; 1371 nDefine++; 1372 azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); 1373 if( azDefine==0 ){ 1374 fprintf(stderr,"out of memory\n"); 1375 exit(1); 1376 } 1377 paz = &azDefine[nDefine-1]; 1378 *paz = (char *) malloc( lemonStrlen(z)+1 ); 1379 if( *paz==0 ){ 1380 fprintf(stderr,"out of memory\n"); 1381 exit(1); 1382 } 1383 strcpy(*paz, z); 1384 for(z=*paz; *z && *z!='='; z++){} 1385 *z = 0; 1386 } 1387 1388 static char *user_templatename = NULL; 1389 static void handle_T_option(char *z){ 1390 user_templatename = (char *) malloc( lemonStrlen(z)+1 ); 1391 if( user_templatename==0 ){ 1392 memory_error(); 1393 } 1394 strcpy(user_templatename, z); 1395 } 1396 1397 int local_out_dir = 0; 1398 1399 /* The main program. Parse the command line and do it... */ 1400 int main(int argc, char **argv) 1401 { 1402 static int version = 0; 1403 static int rpflag = 0; 1404 static int basisflag = 0; 1405 static int compress = 0; 1406 static int quiet = 0; 1407 static int statistics = 0; 1408 static int mhflag = 0; 1409 static int nolinenosflag = 0; 1410 static int noResort = 0; 1411 static struct s_options options[] = { 1412 {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."}, 1413 {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."}, 1414 {OPT_FLAG, "d", (char*)&local_out_dir, "Output files in the current directory."}, 1415 {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."}, 1416 {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."}, 1417 {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."}, 1418 {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."}, 1419 {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."}, 1420 {OPT_FLAG, "p", (char*)&showPrecedenceConflict, 1421 "Show conflicts resolved by precedence rules"}, 1422 {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."}, 1423 {OPT_FLAG, "r", (char*)&noResort, "Do not sort or renumber states"}, 1424 {OPT_FLAG, "s", (char*)&statistics, 1425 "Print parser stats to standard output."}, 1426 {OPT_FLAG, "x", (char*)&version, "Print the version number."}, 1427 {OPT_FLAG,0,0,0} 1428 }; 1429 int i; 1430 int exitcode; 1431 struct lemon lem; 1432 1433 /*atexit(LemonAtExit);*/ 1434 1435 OptInit(argv,options,stderr); 1436 if( version ){ 1437 printf("Lemon version 1.0\n"); 1438 exit(0); 1439 } 1440 if( OptNArgs()!=1 ){ 1441 fprintf(stderr,"Exactly one filename argument is required.\n"); 1442 exit(1); 1443 } 1444 memset(&lem, 0, sizeof(lem)); 1445 lem.errorcnt = 0; 1446 1447 /* Initialize the machine */ 1448 Strsafe_init(); 1449 Symbol_init(); 1450 State_init(); 1451 lem.argv0 = argv[0]; 1452 lem.filename = OptArg(0); 1453 lem.basisflag = basisflag; 1454 lem.nolinenosflag = nolinenosflag; 1455 Symbol_new("$"); 1456 lem.errsym = Symbol_new("error"); 1457 lem.errsym->useCnt = 0; 1458 1459 /* Parse the input file */ 1460 Parse(&lem); 1461 if( lem.errorcnt ) exit(lem.errorcnt); 1462 if( lem.nrule==0 ){ 1463 fprintf(stderr,"Empty grammar.\n"); 1464 exit(1); 1465 } 1466 1467 /* Count and index the symbols of the grammar */ 1468 lem.nsymbol = Symbol_count(); 1469 Symbol_new("{default}"); 1470 lem.symbols = Symbol_arrayof(); 1471 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1472 qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp); 1473 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1474 for(i=1; isupper(lem.symbols[i]->name[0]); i++); 1475 lem.nterminal = i; 1476 1477 /* Generate a reprint of the grammar, if requested on the command line */ 1478 if( rpflag ){ 1479 Reprint(&lem); 1480 }else{ 1481 /* Initialize the size for all follow and first sets */ 1482 SetSize(lem.nterminal+1); 1483 1484 /* Find the precedence for every production rule (that has one) */ 1485 FindRulePrecedences(&lem); 1486 1487 /* Compute the lambda-nonterminals and the first-sets for every 1488 ** nonterminal */ 1489 FindFirstSets(&lem); 1490 1491 /* Compute all LR(0) states. Also record follow-set propagation 1492 ** links so that the follow-set can be computed later */ 1493 lem.nstate = 0; 1494 FindStates(&lem); 1495 lem.sorted = State_arrayof(); 1496 1497 /* Tie up loose ends on the propagation links */ 1498 FindLinks(&lem); 1499 1500 /* Compute the follow set of every reducible configuration */ 1501 FindFollowSets(&lem); 1502 1503 /* Compute the action tables */ 1504 FindActions(&lem); 1505 1506 /* Compress the action tables */ 1507 if( compress==0 ) CompressTables(&lem); 1508 1509 /* Reorder and renumber the states so that states with fewer choices 1510 ** occur at the end. This is an optimization that helps make the 1511 ** generated parser tables smaller. */ 1512 if( noResort==0 ) ResortStates(&lem); 1513 1514 /* Generate a report of the parser generated. (the "y.output" file) */ 1515 if( !quiet ) ReportOutput(&lem); 1516 1517 /* Generate the source code for the parser */ 1518 ReportTable(&lem, mhflag); 1519 1520 /* Produce a header file for use by the scanner. (This step is 1521 ** omitted if the "-m" option is used because makeheaders will 1522 ** generate the file for us.) */ 1523 if( !mhflag ) ReportHeader(&lem); 1524 } 1525 if( statistics ){ 1526 printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", 1527 lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); 1528 printf(" %d states, %d parser table entries, %d conflicts\n", 1529 lem.nstate, lem.tablesize, lem.nconflict); 1530 } 1531 if( lem.nconflict > 0 ){ 1532 fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); 1533 } 1534 1535 /* return 0 on success, 1 on failure. */ 1536 exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; 1537 successful_exit = (exitcode == 0); 1538 exit(exitcode); 1539 return (exitcode); 1540 } 1541 /******************** From the file "msort.c" *******************************/ 1542 /* 1543 ** A generic merge-sort program. 1544 ** 1545 ** USAGE: 1546 ** Let "ptr" be a pointer to some structure which is at the head of 1547 ** a null-terminated list. Then to sort the list call: 1548 ** 1549 ** ptr = msort(ptr,&(ptr->next),cmpfnc); 1550 ** 1551 ** In the above, "cmpfnc" is a pointer to a function which compares 1552 ** two instances of the structure and returns an integer, as in 1553 ** strcmp. The second argument is a pointer to the pointer to the 1554 ** second element of the linked list. This address is used to compute 1555 ** the offset to the "next" field within the structure. The offset to 1556 ** the "next" field must be constant for all structures in the list. 1557 ** 1558 ** The function returns a new pointer which is the head of the list 1559 ** after sorting. 1560 ** 1561 ** ALGORITHM: 1562 ** Merge-sort. 1563 */ 1564 1565 /* 1566 ** Return a pointer to the next structure in the linked list. 1567 */ 1568 #define NEXT(A) (*(char**)(((unsigned long)A)+offset)) 1569 1570 /* 1571 ** Inputs: 1572 ** a: A sorted, null-terminated linked list. (May be null). 1573 ** b: A sorted, null-terminated linked list. (May be null). 1574 ** cmp: A pointer to the comparison function. 1575 ** offset: Offset in the structure to the "next" field. 1576 ** 1577 ** Return Value: 1578 ** A pointer to the head of a sorted list containing the elements 1579 ** of both a and b. 1580 ** 1581 ** Side effects: 1582 ** The "next" pointers for elements in the lists a and b are 1583 ** changed. 1584 */ 1585 static char *merge( 1586 char *a, 1587 char *b, 1588 int (*cmp)(const char*,const char*), 1589 int offset 1590 ){ 1591 char *ptr, *head; 1592 1593 if( a==0 ){ 1594 head = b; 1595 }else if( b==0 ){ 1596 head = a; 1597 }else{ 1598 if( (*cmp)(a,b)<=0 ){ 1599 ptr = a; 1600 a = NEXT(a); 1601 }else{ 1602 ptr = b; 1603 b = NEXT(b); 1604 } 1605 head = ptr; 1606 while( a && b ){ 1607 if( (*cmp)(a,b)<=0 ){ 1608 NEXT(ptr) = a; 1609 ptr = a; 1610 a = NEXT(a); 1611 }else{ 1612 NEXT(ptr) = b; 1613 ptr = b; 1614 b = NEXT(b); 1615 } 1616 } 1617 if( a ) NEXT(ptr) = a; 1618 else NEXT(ptr) = b; 1619 } 1620 return head; 1621 } 1622 1623 /* 1624 ** Inputs: 1625 ** list: Pointer to a singly-linked list of structures. 1626 ** next: Pointer to pointer to the second element of the list. 1627 ** cmp: A comparison function. 1628 ** 1629 ** Return Value: 1630 ** A pointer to the head of a sorted list containing the elements 1631 ** orginally in list. 1632 ** 1633 ** Side effects: 1634 ** The "next" pointers for elements in list are changed. 1635 */ 1636 #define LISTSIZE 30 1637 static char *msort( 1638 char *list, 1639 char **next, 1640 int (*cmp)(const char*,const char*) 1641 ){ 1642 unsigned long offset; 1643 char *ep; 1644 char *set[LISTSIZE]; 1645 int i; 1646 offset = (unsigned long)next - (unsigned long)list; 1647 for(i=0; i<LISTSIZE; i++) set[i] = 0; 1648 while( list ){ 1649 ep = list; 1650 list = NEXT(list); 1651 NEXT(ep) = 0; 1652 for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){ 1653 ep = merge(ep,set[i],cmp,offset); 1654 set[i] = 0; 1655 } 1656 set[i] = ep; 1657 } 1658 ep = 0; 1659 for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(set[i],ep,cmp,offset); 1660 return ep; 1661 } 1662 /************************ From the file "option.c" **************************/ 1663 static char **argv; 1664 static struct s_options *op; 1665 static FILE *errstream; 1666 1667 #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0) 1668 1669 /* 1670 ** Print the command line with a carrot pointing to the k-th character 1671 ** of the n-th field. 1672 */ 1673 static void errline(int n, int k, FILE *err) 1674 { 1675 int spcnt, i; 1676 if( argv[0] ) fprintf(err,"%s",argv[0]); 1677 spcnt = lemonStrlen(argv[0]) + 1; 1678 for(i=1; i<n && argv[i]; i++){ 1679 fprintf(err," %s",argv[i]); 1680 spcnt += lemonStrlen(argv[i])+1; 1681 } 1682 spcnt += k; 1683 for(; argv[i]; i++) fprintf(err," %s",argv[i]); 1684 if( spcnt<20 ){ 1685 fprintf(err,"\n%*s^-- here\n",spcnt,""); 1686 }else{ 1687 fprintf(err,"\n%*shere --^\n",spcnt-7,""); 1688 } 1689 } 1690 1691 /* 1692 ** Return the index of the N-th non-switch argument. Return -1 1693 ** if N is out of range. 1694 */ 1695 static int argindex(int n) 1696 { 1697 int i; 1698 int dashdash = 0; 1699 if( argv!=0 && *argv!=0 ){ 1700 for(i=1; argv[i]; i++){ 1701 if( dashdash || !ISOPT(argv[i]) ){ 1702 if( n==0 ) return i; 1703 n--; 1704 } 1705 if( strcmp(argv[i],"--")==0 ) dashdash = 1; 1706 } 1707 } 1708 return -1; 1709 } 1710 1711 static char emsg[] = "Command line syntax error: "; 1712 1713 /* 1714 ** Process a flag command line argument. 1715 */ 1716 static int handleflags(int i, FILE *err) 1717 { 1718 int v; 1719 int errcnt = 0; 1720 int j; 1721 for(j=0; op[j].label; j++){ 1722 if( strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break; 1723 } 1724 v = argv[i][0]=='-' ? 1 : 0; 1725 if( op[j].label==0 ){ 1726 if( err ){ 1727 fprintf(err,"%sundefined option.\n",emsg); 1728 errline(i,1,err); 1729 } 1730 errcnt++; 1731 }else if( op[j].type==OPT_FLAG ){ 1732 *((int*)op[j].arg) = v; 1733 }else if( op[j].type==OPT_FFLAG ){ 1734 (*(void(*)(int))(op[j].arg))(v); 1735 }else if( op[j].type==OPT_FSTR ){ 1736 (*(void(*)(char *))(op[j].arg))(&argv[i][2]); 1737 }else{ 1738 if( err ){ 1739 fprintf(err,"%smissing argument on switch.\n",emsg); 1740 errline(i,1,err); 1741 } 1742 errcnt++; 1743 } 1744 return errcnt; 1745 } 1746 1747 /* 1748 ** Process a command line switch which has an argument. 1749 */ 1750 static int handleswitch(int i, FILE *err) 1751 { 1752 int lv = 0; 1753 double dv = 0.0; 1754 char *sv = 0, *end; 1755 char *cp; 1756 int j; 1757 int errcnt = 0; 1758 cp = strchr(argv[i],'='); 1759 assert( cp!=0 ); 1760 *cp = 0; 1761 for(j=0; op[j].label; j++){ 1762 if( strcmp(argv[i],op[j].label)==0 ) break; 1763 } 1764 *cp = '='; 1765 if( op[j].label==0 ){ 1766 if( err ){ 1767 fprintf(err,"%sundefined option.\n",emsg); 1768 errline(i,0,err); 1769 } 1770 errcnt++; 1771 }else{ 1772 cp++; 1773 switch( op[j].type ){ 1774 case OPT_FLAG: 1775 case OPT_FFLAG: 1776 if( err ){ 1777 fprintf(err,"%soption requires an argument.\n",emsg); 1778 errline(i,0,err); 1779 } 1780 errcnt++; 1781 break; 1782 case OPT_DBL: 1783 case OPT_FDBL: 1784 dv = strtod(cp,&end); 1785 if( *end ){ 1786 if( err ){ 1787 fprintf(err,"%sillegal character in floating-point argument.\n",emsg); 1788 errline(i,((unsigned long)end)-(unsigned long)argv[i],err); 1789 } 1790 errcnt++; 1791 } 1792 break; 1793 case OPT_INT: 1794 case OPT_FINT: 1795 lv = strtol(cp,&end,0); 1796 if( *end ){ 1797 if( err ){ 1798 fprintf(err,"%sillegal character in integer argument.\n",emsg); 1799 errline(i,((unsigned long)end)-(unsigned long)argv[i],err); 1800 } 1801 errcnt++; 1802 } 1803 break; 1804 case OPT_STR: 1805 case OPT_FSTR: 1806 sv = cp; 1807 break; 1808 } 1809 switch( op[j].type ){ 1810 case OPT_FLAG: 1811 case OPT_FFLAG: 1812 break; 1813 case OPT_DBL: 1814 *(double*)(op[j].arg) = dv; 1815 break; 1816 case OPT_FDBL: 1817 (*(void(*)(double))(op[j].arg))(dv); 1818 break; 1819 case OPT_INT: 1820 *(int*)(op[j].arg) = lv; 1821 break; 1822 case OPT_FINT: 1823 (*(void(*)(int))(op[j].arg))((int)lv); 1824 break; 1825 case OPT_STR: 1826 *(char**)(op[j].arg) = sv; 1827 break; 1828 case OPT_FSTR: 1829 (*(void(*)(char *))(op[j].arg))(sv); 1830 break; 1831 } 1832 } 1833 return errcnt; 1834 } 1835 1836 int OptInit(char **a, struct s_options *o, FILE *err) 1837 { 1838 int errcnt = 0; 1839 argv = a; 1840 op = o; 1841 errstream = err; 1842 if( argv && *argv && op ){ 1843 int i; 1844 for(i=1; argv[i]; i++){ 1845 if( argv[i][0]=='+' || argv[i][0]=='-' ){ 1846 errcnt += handleflags(i,err); 1847 }else if( strchr(argv[i],'=') ){ 1848 errcnt += handleswitch(i,err); 1849 } 1850 } 1851 } 1852 if( errcnt>0 ){ 1853 fprintf(err,"Valid command line options for \"%s\" are:\n",*a); 1854 OptPrint(); 1855 exit(1); 1856 } 1857 return 0; 1858 } 1859 1860 int OptNArgs(){ 1861 int cnt = 0; 1862 int dashdash = 0; 1863 int i; 1864 if( argv!=0 && argv[0]!=0 ){ 1865 for(i=1; argv[i]; i++){ 1866 if( dashdash || !ISOPT(argv[i]) ) cnt++; 1867 if( strcmp(argv[i],"--")==0 ) dashdash = 1; 1868 } 1869 } 1870 return cnt; 1871 } 1872 1873 char *OptArg(int n) 1874 { 1875 int i; 1876 i = argindex(n); 1877 return i>=0 ? argv[i] : 0; 1878 } 1879 1880 void OptErr(int n) 1881 { 1882 int i; 1883 i = argindex(n); 1884 if( i>=0 ) errline(i,0,errstream); 1885 } 1886 1887 void OptPrint(){ 1888 int i; 1889 int max, len; 1890 max = 0; 1891 for(i=0; op[i].label; i++){ 1892 len = lemonStrlen(op[i].label) + 1; 1893 switch( op[i].type ){ 1894 case OPT_FLAG: 1895 case OPT_FFLAG: 1896 break; 1897 case OPT_INT: 1898 case OPT_FINT: 1899 len += 9; /* length of "<integer>" */ 1900 break; 1901 case OPT_DBL: 1902 case OPT_FDBL: 1903 len += 6; /* length of "<real>" */ 1904 break; 1905 case OPT_STR: 1906 case OPT_FSTR: 1907 len += 8; /* length of "<string>" */ 1908 break; 1909 } 1910 if( len>max ) max = len; 1911 } 1912 for(i=0; op[i].label; i++){ 1913 switch( op[i].type ){ 1914 case OPT_FLAG: 1915 case OPT_FFLAG: 1916 fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message); 1917 break; 1918 case OPT_INT: 1919 case OPT_FINT: 1920 fprintf(errstream," %s=<integer>%*s %s\n",op[i].label, 1921 (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message); 1922 break; 1923 case OPT_DBL: 1924 case OPT_FDBL: 1925 fprintf(errstream," %s=<real>%*s %s\n",op[i].label, 1926 (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message); 1927 break; 1928 case OPT_STR: 1929 case OPT_FSTR: 1930 fprintf(errstream," %s=<string>%*s %s\n",op[i].label, 1931 (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message); 1932 break; 1933 } 1934 } 1935 } 1936 /*********************** From the file "parse.c" ****************************/ 1937 /* 1938 ** Input file parser for the LEMON parser generator. 1939 */ 1940 1941 /* The state of the parser */ 1942 enum e_state { 1943 INITIALIZE, 1944 WAITING_FOR_DECL_OR_RULE, 1945 WAITING_FOR_DECL_KEYWORD, 1946 WAITING_FOR_DECL_ARG, 1947 WAITING_FOR_PRECEDENCE_SYMBOL, 1948 WAITING_FOR_ARROW, 1949 IN_RHS, 1950 LHS_ALIAS_1, 1951 LHS_ALIAS_2, 1952 LHS_ALIAS_3, 1953 RHS_ALIAS_1, 1954 RHS_ALIAS_2, 1955 PRECEDENCE_MARK_1, 1956 PRECEDENCE_MARK_2, 1957 RESYNC_AFTER_RULE_ERROR, 1958 RESYNC_AFTER_DECL_ERROR, 1959 WAITING_FOR_DESTRUCTOR_SYMBOL, 1960 WAITING_FOR_DATATYPE_SYMBOL, 1961 WAITING_FOR_FALLBACK_ID, 1962 WAITING_FOR_WILDCARD_ID 1963 }; 1964 struct pstate { 1965 char *filename; /* Name of the input file */ 1966 int tokenlineno; /* Linenumber at which current token starts */ 1967 int errorcnt; /* Number of errors so far */ 1968 char *tokenstart; /* Text of current token */ 1969 struct lemon *gp; /* Global state vector */ 1970 enum e_state state; /* The state of the parser */ 1971 struct symbol *fallback; /* The fallback token */ 1972 struct symbol *lhs; /* Left-hand side of current rule */ 1973 const char *lhsalias; /* Alias for the LHS */ 1974 int nrhs; /* Number of right-hand side symbols seen */ 1975 struct symbol *rhs[MAXRHS]; /* RHS symbols */ 1976 const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ 1977 struct rule *prevrule; /* Previous rule parsed */ 1978 const char *declkeyword; /* Keyword of a declaration */ 1979 char **declargslot; /* Where the declaration argument should be put */ 1980 int insertLineMacro; /* Add #line before declaration insert */ 1981 int *decllinenoslot; /* Where to write declaration line number */ 1982 enum e_assoc declassoc; /* Assign this association to decl arguments */ 1983 int preccounter; /* Assign this precedence to decl arguments */ 1984 struct rule *firstrule; /* Pointer to first rule in the grammar */ 1985 struct rule *lastrule; /* Pointer to the most recently parsed rule */ 1986 }; 1987 1988 /* Parse a single token */ 1989 static void parseonetoken(struct pstate *psp) 1990 { 1991 const char *x; 1992 x = Strsafe(psp->tokenstart); /* Save the token permanently */ 1993 #if 0 1994 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, 1995 x,psp->state); 1996 #endif 1997 switch( psp->state ){ 1998 case INITIALIZE: 1999 psp->prevrule = 0; 2000 psp->preccounter = 0; 2001 psp->firstrule = psp->lastrule = 0; 2002 psp->gp->nrule = 0; 2003 /* Fall thru to next case */ 2004 case WAITING_FOR_DECL_OR_RULE: 2005 if( x[0]=='%' ){ 2006 psp->state = WAITING_FOR_DECL_KEYWORD; 2007 }else if( islower(x[0]) ){ 2008 psp->lhs = Symbol_new(x); 2009 psp->nrhs = 0; 2010 psp->lhsalias = 0; 2011 psp->state = WAITING_FOR_ARROW; 2012 }else if( x[0]=='{' ){ 2013 if( psp->prevrule==0 ){ 2014 ErrorMsg(psp->filename,psp->tokenlineno, 2015 "There is no prior rule opon which to attach the code \ 2016 fragment which begins on this line."); 2017 psp->errorcnt++; 2018 }else if( psp->prevrule->code!=0 ){ 2019 ErrorMsg(psp->filename,psp->tokenlineno, 2020 "Code fragment beginning on this line is not the first \ 2021 to follow the previous rule."); 2022 psp->errorcnt++; 2023 }else{ 2024 psp->prevrule->line = psp->tokenlineno; 2025 psp->prevrule->code = &x[1]; 2026 } 2027 }else if( x[0]=='[' ){ 2028 psp->state = PRECEDENCE_MARK_1; 2029 }else{ 2030 ErrorMsg(psp->filename,psp->tokenlineno, 2031 "Token \"%s\" should be either \"%%\" or a nonterminal name.", 2032 x); 2033 psp->errorcnt++; 2034 } 2035 break; 2036 case PRECEDENCE_MARK_1: 2037 if( !isupper(x[0]) ){ 2038 ErrorMsg(psp->filename,psp->tokenlineno, 2039 "The precedence symbol must be a terminal."); 2040 psp->errorcnt++; 2041 }else if( psp->prevrule==0 ){ 2042 ErrorMsg(psp->filename,psp->tokenlineno, 2043 "There is no prior rule to assign precedence \"[%s]\".",x); 2044 psp->errorcnt++; 2045 }else if( psp->prevrule->precsym!=0 ){ 2046 ErrorMsg(psp->filename,psp->tokenlineno, 2047 "Precedence mark on this line is not the first \ 2048 to follow the previous rule."); 2049 psp->errorcnt++; 2050 }else{ 2051 psp->prevrule->precsym = Symbol_new(x); 2052 } 2053 psp->state = PRECEDENCE_MARK_2; 2054 break; 2055 case PRECEDENCE_MARK_2: 2056 if( x[0]!=']' ){ 2057 ErrorMsg(psp->filename,psp->tokenlineno, 2058 "Missing \"]\" on precedence mark."); 2059 psp->errorcnt++; 2060 } 2061 psp->state = WAITING_FOR_DECL_OR_RULE; 2062 break; 2063 case WAITING_FOR_ARROW: 2064 if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 2065 psp->state = IN_RHS; 2066 }else if( x[0]=='(' ){ 2067 psp->state = LHS_ALIAS_1; 2068 }else{ 2069 ErrorMsg(psp->filename,psp->tokenlineno, 2070 "Expected to see a \":\" following the LHS symbol \"%s\".", 2071 psp->lhs->name); 2072 psp->errorcnt++; 2073 psp->state = RESYNC_AFTER_RULE_ERROR; 2074 } 2075 break; 2076 case LHS_ALIAS_1: 2077 if( isalpha(x[0]) ){ 2078 psp->lhsalias = x; 2079 psp->state = LHS_ALIAS_2; 2080 }else{ 2081 ErrorMsg(psp->filename,psp->tokenlineno, 2082 "\"%s\" is not a valid alias for the LHS \"%s\"\n", 2083 x,psp->lhs->name); 2084 psp->errorcnt++; 2085 psp->state = RESYNC_AFTER_RULE_ERROR; 2086 } 2087 break; 2088 case LHS_ALIAS_2: 2089 if( x[0]==')' ){ 2090 psp->state = LHS_ALIAS_3; 2091 }else{ 2092 ErrorMsg(psp->filename,psp->tokenlineno, 2093 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2094 psp->errorcnt++; 2095 psp->state = RESYNC_AFTER_RULE_ERROR; 2096 } 2097 break; 2098 case LHS_ALIAS_3: 2099 if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 2100 psp->state = IN_RHS; 2101 }else{ 2102 ErrorMsg(psp->filename,psp->tokenlineno, 2103 "Missing \"->\" following: \"%s(%s)\".", 2104 psp->lhs->name,psp->lhsalias); 2105 psp->errorcnt++; 2106 psp->state = RESYNC_AFTER_RULE_ERROR; 2107 } 2108 break; 2109 case IN_RHS: 2110 if( x[0]=='.' ){ 2111 struct rule *rp; 2112 rp = (struct rule *)calloc( sizeof(struct rule) + 2113 sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); 2114 if( rp==0 ){ 2115 ErrorMsg(psp->filename,psp->tokenlineno, 2116 "Can't allocate enough memory for this rule."); 2117 psp->errorcnt++; 2118 psp->prevrule = 0; 2119 }else{ 2120 int i; 2121 rp->ruleline = psp->tokenlineno; 2122 rp->rhs = (struct symbol**)&rp[1]; 2123 rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); 2124 for(i=0; i<psp->nrhs; i++){ 2125 rp->rhs[i] = psp->rhs[i]; 2126 rp->rhsalias[i] = psp->alias[i]; 2127 } 2128 rp->lhs = psp->lhs; 2129 rp->lhsalias = psp->lhsalias; 2130 rp->nrhs = psp->nrhs; 2131 rp->code = 0; 2132 rp->precsym = 0; 2133 rp->index = psp->gp->nrule++; 2134 rp->nextlhs = rp->lhs->rule; 2135 rp->lhs->rule = rp; 2136 rp->next = 0; 2137 if( psp->firstrule==0 ){ 2138 psp->firstrule = psp->lastrule = rp; 2139 }else{ 2140 psp->lastrule->next = rp; 2141 psp->lastrule = rp; 2142 } 2143 psp->prevrule = rp; 2144 } 2145 psp->state = WAITING_FOR_DECL_OR_RULE; 2146 }else if( isalpha(x[0]) ){ 2147 if( psp->nrhs>=MAXRHS ){ 2148 ErrorMsg(psp->filename,psp->tokenlineno, 2149 "Too many symbols on RHS of rule beginning at \"%s\".", 2150 x); 2151 psp->errorcnt++; 2152 psp->state = RESYNC_AFTER_RULE_ERROR; 2153 }else{ 2154 psp->rhs[psp->nrhs] = Symbol_new(x); 2155 psp->alias[psp->nrhs] = 0; 2156 psp->nrhs++; 2157 } 2158 }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ 2159 struct symbol *msp = psp->rhs[psp->nrhs-1]; 2160 if( msp->type!=MULTITERMINAL ){ 2161 struct symbol *origsp = msp; 2162 msp = (struct symbol *) calloc(1,sizeof(*msp)); 2163 memset(msp, 0, sizeof(*msp)); 2164 msp->type = MULTITERMINAL; 2165 msp->nsubsym = 1; 2166 msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); 2167 msp->subsym[0] = origsp; 2168 msp->name = origsp->name; 2169 psp->rhs[psp->nrhs-1] = msp; 2170 } 2171 msp->nsubsym++; 2172 msp->subsym = (struct symbol **) realloc(msp->subsym, 2173 sizeof(struct symbol*)*msp->nsubsym); 2174 msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); 2175 if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){ 2176 ErrorMsg(psp->filename,psp->tokenlineno, 2177 "Cannot form a compound containing a non-terminal"); 2178 psp->errorcnt++; 2179 } 2180 }else if( x[0]=='(' && psp->nrhs>0 ){ 2181 psp->state = RHS_ALIAS_1; 2182 }else{ 2183 ErrorMsg(psp->filename,psp->tokenlineno, 2184 "Illegal character on RHS of rule: \"%s\".",x); 2185 psp->errorcnt++; 2186 psp->state = RESYNC_AFTER_RULE_ERROR; 2187 } 2188 break; 2189 case RHS_ALIAS_1: 2190 if( isalpha(x[0]) ){ 2191 psp->alias[psp->nrhs-1] = x; 2192 psp->state = RHS_ALIAS_2; 2193 }else{ 2194 ErrorMsg(psp->filename,psp->tokenlineno, 2195 "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", 2196 x,psp->rhs[psp->nrhs-1]->name); 2197 psp->errorcnt++; 2198 psp->state = RESYNC_AFTER_RULE_ERROR; 2199 } 2200 break; 2201 case RHS_ALIAS_2: 2202 if( x[0]==')' ){ 2203 psp->state = IN_RHS; 2204 }else{ 2205 ErrorMsg(psp->filename,psp->tokenlineno, 2206 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2207 psp->errorcnt++; 2208 psp->state = RESYNC_AFTER_RULE_ERROR; 2209 } 2210 break; 2211 case WAITING_FOR_DECL_KEYWORD: 2212 if( isalpha(x[0]) ){ 2213 psp->declkeyword = x; 2214 psp->declargslot = 0; 2215 psp->decllinenoslot = 0; 2216 psp->insertLineMacro = 1; 2217 psp->state = WAITING_FOR_DECL_ARG; 2218 if( strcmp(x,"name")==0 ){ 2219 psp->declargslot = &(psp->gp->name); 2220 psp->insertLineMacro = 0; 2221 }else if( strcmp(x,"include")==0 ){ 2222 psp->declargslot = &(psp->gp->include); 2223 }else if( strcmp(x,"code")==0 ){ 2224 psp->declargslot = &(psp->gp->extracode); 2225 }else if( strcmp(x,"token_destructor")==0 ){ 2226 psp->declargslot = &psp->gp->tokendest; 2227 }else if( strcmp(x,"default_destructor")==0 ){ 2228 psp->declargslot = &psp->gp->vardest; 2229 }else if( strcmp(x,"token_prefix")==0 ){ 2230 psp->declargslot = &psp->gp->tokenprefix; 2231 psp->insertLineMacro = 0; 2232 }else if( strcmp(x,"syntax_error")==0 ){ 2233 psp->declargslot = &(psp->gp->error); 2234 }else if( strcmp(x,"parse_accept")==0 ){ 2235 psp->declargslot = &(psp->gp->accept); 2236 }else if( strcmp(x,"parse_failure")==0 ){ 2237 psp->declargslot = &(psp->gp->failure); 2238 }else if( strcmp(x,"stack_overflow")==0 ){ 2239 psp->declargslot = &(psp->gp->overflow); 2240 }else if( strcmp(x,"extra_argument")==0 ){ 2241 psp->declargslot = &(psp->gp->arg); 2242 psp->insertLineMacro = 0; 2243 }else if( strcmp(x,"token_type")==0 ){ 2244 psp->declargslot = &(psp->gp->tokentype); 2245 psp->insertLineMacro = 0; 2246 }else if( strcmp(x,"default_type")==0 ){ 2247 psp->declargslot = &(psp->gp->vartype); 2248 psp->insertLineMacro = 0; 2249 }else if( strcmp(x,"stack_size")==0 ){ 2250 psp->declargslot = &(psp->gp->stacksize); 2251 psp->insertLineMacro = 0; 2252 }else if( strcmp(x,"start_symbol")==0 ){ 2253 psp->declargslot = &(psp->gp->start); 2254 psp->insertLineMacro = 0; 2255 }else if( strcmp(x,"left")==0 ){ 2256 psp->preccounter++; 2257 psp->declassoc = LEFT; 2258 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2259 }else if( strcmp(x,"right")==0 ){ 2260 psp->preccounter++; 2261 psp->declassoc = RIGHT; 2262 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2263 }else if( strcmp(x,"nonassoc")==0 ){ 2264 psp->preccounter++; 2265 psp->declassoc = NONE; 2266 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2267 }else if( strcmp(x,"destructor")==0 ){ 2268 psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; 2269 }else if( strcmp(x,"type")==0 ){ 2270 psp->state = WAITING_FOR_DATATYPE_SYMBOL; 2271 }else if( strcmp(x,"fallback")==0 ){ 2272 psp->fallback = 0; 2273 psp->state = WAITING_FOR_FALLBACK_ID; 2274 }else if( strcmp(x,"wildcard")==0 ){ 2275 psp->state = WAITING_FOR_WILDCARD_ID; 2276 }else{ 2277 ErrorMsg(psp->filename,psp->tokenlineno, 2278 "Unknown declaration keyword: \"%%%s\".",x); 2279 psp->errorcnt++; 2280 psp->state = RESYNC_AFTER_DECL_ERROR; 2281 } 2282 }else{ 2283 ErrorMsg(psp->filename,psp->tokenlineno, 2284 "Illegal declaration keyword: \"%s\".",x); 2285 psp->errorcnt++; 2286 psp->state = RESYNC_AFTER_DECL_ERROR; 2287 } 2288 break; 2289 case WAITING_FOR_DESTRUCTOR_SYMBOL: 2290 if( !isalpha(x[0]) ){ 2291 ErrorMsg(psp->filename,psp->tokenlineno, 2292 "Symbol name missing after %%destructor keyword"); 2293 psp->errorcnt++; 2294 psp->state = RESYNC_AFTER_DECL_ERROR; 2295 }else{ 2296 struct symbol *sp = Symbol_new(x); 2297 psp->declargslot = &sp->destructor; 2298 psp->decllinenoslot = &sp->destLineno; 2299 psp->insertLineMacro = 1; 2300 psp->state = WAITING_FOR_DECL_ARG; 2301 } 2302 break; 2303 case WAITING_FOR_DATATYPE_SYMBOL: 2304 if( !isalpha(x[0]) ){ 2305 ErrorMsg(psp->filename,psp->tokenlineno, 2306 "Symbol name missing after %%type keyword"); 2307 psp->errorcnt++; 2308 psp->state = RESYNC_AFTER_DECL_ERROR; 2309 }else{ 2310 struct symbol *sp = Symbol_find(x); 2311 if((sp) && (sp->datatype)){ 2312 ErrorMsg(psp->filename,psp->tokenlineno, 2313 "Symbol %%type \"%s\" already defined", x); 2314 psp->errorcnt++; 2315 psp->state = RESYNC_AFTER_DECL_ERROR; 2316 }else{ 2317 if (!sp){ 2318 sp = Symbol_new(x); 2319 } 2320 psp->declargslot = &sp->datatype; 2321 psp->insertLineMacro = 0; 2322 psp->state = WAITING_FOR_DECL_ARG; 2323 } 2324 } 2325 break; 2326 case WAITING_FOR_PRECEDENCE_SYMBOL: 2327 if( x[0]=='.' ){ 2328 psp->state = WAITING_FOR_DECL_OR_RULE; 2329 }else if( isupper(x[0]) ){ 2330 struct symbol *sp; 2331 sp = Symbol_new(x); 2332 if( sp->prec>=0 ){ 2333 ErrorMsg(psp->filename,psp->tokenlineno, 2334 "Symbol \"%s\" has already be given a precedence.",x); 2335 psp->errorcnt++; 2336 }else{ 2337 sp->prec = psp->preccounter; 2338 sp->assoc = psp->declassoc; 2339 } 2340 }else{ 2341 ErrorMsg(psp->filename,psp->tokenlineno, 2342 "Can't assign a precedence to \"%s\".",x); 2343 psp->errorcnt++; 2344 } 2345 break; 2346 case WAITING_FOR_DECL_ARG: 2347 if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){ 2348 const char *zOld, *zNew; 2349 char *zBuf, *z; 2350 int nOld, n, nLine, nNew, nBack; 2351 int addLineMacro; 2352 char zLine[50]; 2353 zNew = x; 2354 if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; 2355 nNew = lemonStrlen(zNew); 2356 if( *psp->declargslot ){ 2357 zOld = *psp->declargslot; 2358 }else{ 2359 zOld = ""; 2360 } 2361 nOld = lemonStrlen(zOld); 2362 n = nOld + nNew + 20; 2363 addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && 2364 (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); 2365 if( addLineMacro ){ 2366 for(z=psp->filename, nBack=0; *z; z++){ 2367 if( *z=='\\' ) nBack++; 2368 } 2369 sprintf(zLine, "#line %d ", psp->tokenlineno); 2370 nLine = lemonStrlen(zLine); 2371 n += nLine + lemonStrlen(psp->filename) + nBack; 2372 } 2373 *psp->declargslot = (char *) realloc(*psp->declargslot, n); 2374 zBuf = *psp->declargslot + nOld; 2375 if( addLineMacro ){ 2376 if( nOld && zBuf[-1]!='\n' ){ 2377 *(zBuf++) = '\n'; 2378 } 2379 memcpy(zBuf, zLine, nLine); 2380 zBuf += nLine; 2381 *(zBuf++) = '"'; 2382 for(z=psp->filename; *z; z++){ 2383 if( *z=='\\' ){ 2384 *(zBuf++) = '\\'; 2385 } 2386 *(zBuf++) = *z; 2387 } 2388 *(zBuf++) = '"'; 2389 *(zBuf++) = '\n'; 2390 } 2391 if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ 2392 psp->decllinenoslot[0] = psp->tokenlineno; 2393 } 2394 memcpy(zBuf, zNew, nNew); 2395 zBuf += nNew; 2396 *zBuf = 0; 2397 psp->state = WAITING_FOR_DECL_OR_RULE; 2398 }else{ 2399 ErrorMsg(psp->filename,psp->tokenlineno, 2400 "Illegal argument to %%%s: %s",psp->declkeyword,x); 2401 psp->errorcnt++; 2402 psp->state = RESYNC_AFTER_DECL_ERROR; 2403 } 2404 break; 2405 case WAITING_FOR_FALLBACK_ID: 2406 if( x[0]=='.' ){ 2407 psp->state = WAITING_FOR_DECL_OR_RULE; 2408 }else if( !isupper(x[0]) ){ 2409 ErrorMsg(psp->filename, psp->tokenlineno, 2410 "%%fallback argument \"%s\" should be a token", x); 2411 psp->errorcnt++; 2412 }else{ 2413 struct symbol *sp = Symbol_new(x); 2414 if( psp->fallback==0 ){ 2415 psp->fallback = sp; 2416 }else if( sp->fallback ){ 2417 ErrorMsg(psp->filename, psp->tokenlineno, 2418 "More than one fallback assigned to token %s", x); 2419 psp->errorcnt++; 2420 }else{ 2421 sp->fallback = psp->fallback; 2422 psp->gp->has_fallback = 1; 2423 } 2424 } 2425 break; 2426 case WAITING_FOR_WILDCARD_ID: 2427 if( x[0]=='.' ){ 2428 psp->state = WAITING_FOR_DECL_OR_RULE; 2429 }else if( !isupper(x[0]) ){ 2430 ErrorMsg(psp->filename, psp->tokenlineno, 2431 "%%wildcard argument \"%s\" should be a token", x); 2432 psp->errorcnt++; 2433 }else{ 2434 struct symbol *sp = Symbol_new(x); 2435 if( psp->gp->wildcard==0 ){ 2436 psp->gp->wildcard = sp; 2437 }else{ 2438 ErrorMsg(psp->filename, psp->tokenlineno, 2439 "Extra wildcard to token: %s", x); 2440 psp->errorcnt++; 2441 } 2442 } 2443 break; 2444 case RESYNC_AFTER_RULE_ERROR: 2445 /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2446 ** break; */ 2447 case RESYNC_AFTER_DECL_ERROR: 2448 if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2449 if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; 2450 break; 2451 } 2452 } 2453 2454 /* Run the preprocessor over the input file text. The global variables 2455 ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined 2456 ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and 2457 ** comments them out. Text in between is also commented out as appropriate. 2458 */ 2459 static void preprocess_input(char *z){ 2460 int i, j, k, n; 2461 int exclude = 0; 2462 int start = 0; 2463 int lineno = 1; 2464 int start_lineno = 1; 2465 for(i=0; z[i]; i++){ 2466 if( z[i]=='\n' ) lineno++; 2467 if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; 2468 if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){ 2469 if( exclude ){ 2470 exclude--; 2471 if( exclude==0 ){ 2472 for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' '; 2473 } 2474 } 2475 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; 2476 }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6])) 2477 || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){ 2478 if( exclude ){ 2479 exclude++; 2480 }else{ 2481 for(j=i+7; isspace(z[j]); j++){} 2482 for(n=0; z[j+n] && !isspace(z[j+n]); n++){} 2483 exclude = 1; 2484 for(k=0; k<nDefine; k++){ 2485 if( strncmp(azDefine[k],&z[j],n)==0 && lemonStrlen(azDefine[k])==n ){ 2486 exclude = 0; 2487 break; 2488 } 2489 } 2490 if( z[i+3]=='n' ) exclude = !exclude; 2491 if( exclude ){ 2492 start = i; 2493 start_lineno = lineno; 2494 } 2495 } 2496 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; 2497 } 2498 } 2499 if( exclude ){ 2500 fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno); 2501 exit(1); 2502 } 2503 } 2504 2505 /* In spite of its name, this function is really a scanner. It read 2506 ** in the entire input file (all at once) then tokenizes it. Each 2507 ** token is passed to the function "parseonetoken" which builds all 2508 ** the appropriate data structures in the global state vector "gp". 2509 */ 2510 void Parse(struct lemon *gp) 2511 { 2512 struct pstate ps; 2513 FILE *fp; 2514 char *filebuf; 2515 int filesize; 2516 int lineno; 2517 int c; 2518 char *cp, *nextcp; 2519 int startline = 0; 2520 2521 memset(&ps, '\0', sizeof(ps)); 2522 ps.gp = gp; 2523 ps.filename = gp->filename; 2524 ps.errorcnt = 0; 2525 ps.state = INITIALIZE; 2526 2527 /* Begin by reading the input file */ 2528 fp = fopen(ps.filename,"rb"); 2529 if( fp==0 ){ 2530 ErrorMsg(ps.filename,0,"Can't open this file for reading."); 2531 gp->errorcnt++; 2532 return; 2533 } 2534 fseek(fp,0,2); 2535 filesize = ftell(fp); 2536 rewind(fp); 2537 filebuf = (char *)malloc( filesize+1 ); 2538 if( filebuf==0 ){ 2539 ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.", 2540 filesize+1); 2541 gp->errorcnt++; 2542 return; 2543 } 2544 if( fread(filebuf,1,filesize,fp)!=filesize ){ 2545 ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", 2546 filesize); 2547 free(filebuf); 2548 gp->errorcnt++; 2549 return; 2550 } 2551 fclose(fp); 2552 filebuf[filesize] = 0; 2553 2554 /* Make an initial pass through the file to handle %ifdef and %ifndef */ 2555 preprocess_input(filebuf); 2556 2557 /* Now scan the text of the input file */ 2558 lineno = 1; 2559 for(cp=filebuf; (c= *cp)!=0; ){ 2560 if( c=='\n' ) lineno++; /* Keep track of the line number */ 2561 if( isspace(c) ){ cp++; continue; } /* Skip all white space */ 2562 if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ 2563 cp+=2; 2564 while( (c= *cp)!=0 && c!='\n' ) cp++; 2565 continue; 2566 } 2567 if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ 2568 cp+=2; 2569 while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ 2570 if( c=='\n' ) lineno++; 2571 cp++; 2572 } 2573 if( c ) cp++; 2574 continue; 2575 } 2576 ps.tokenstart = cp; /* Mark the beginning of the token */ 2577 ps.tokenlineno = lineno; /* Linenumber on which token begins */ 2578 if( c=='\"' ){ /* String literals */ 2579 cp++; 2580 while( (c= *cp)!=0 && c!='\"' ){ 2581 if( c=='\n' ) lineno++; 2582 cp++; 2583 } 2584 if( c==0 ){ 2585 ErrorMsg(ps.filename,startline, 2586 "String starting on this line is not terminated before the end of the file."); 2587 ps.errorcnt++; 2588 nextcp = cp; 2589 }else{ 2590 nextcp = cp+1; 2591 } 2592 }else if( c=='{' ){ /* A block of C code */ 2593 int level; 2594 cp++; 2595 for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ 2596 if( c=='\n' ) lineno++; 2597 else if( c=='{' ) level++; 2598 else if( c=='}' ) level--; 2599 else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ 2600 int prevc; 2601 cp = &cp[2]; 2602 prevc = 0; 2603 while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ 2604 if( c=='\n' ) lineno++; 2605 prevc = c; 2606 cp++; 2607 } 2608 }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ 2609 cp = &cp[2]; 2610 while( (c= *cp)!=0 && c!='\n' ) cp++; 2611 if( c ) lineno++; 2612 }else if( c=='\'' || c=='\"' ){ /* String a character literals */ 2613 int startchar, prevc; 2614 startchar = c; 2615 prevc = 0; 2616 for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ 2617 if( c=='\n' ) lineno++; 2618 if( prevc=='\\' ) prevc = 0; 2619 else prevc = c; 2620 } 2621 } 2622 } 2623 if( c==0 ){ 2624 ErrorMsg(ps.filename,ps.tokenlineno, 2625 "C code starting on this line is not terminated before the end of the file."); 2626 ps.errorcnt++; 2627 nextcp = cp; 2628 }else{ 2629 nextcp = cp+1; 2630 } 2631 }else if( isalnum(c) ){ /* Identifiers */ 2632 while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++; 2633 nextcp = cp; 2634 }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ 2635 cp += 3; 2636 nextcp = cp; 2637 }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){ 2638 cp += 2; 2639 while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++; 2640 nextcp = cp; 2641 }else{ /* All other (one character) operators */ 2642 cp++; 2643 nextcp = cp; 2644 } 2645 c = *cp; 2646 *cp = 0; /* Null terminate the token */ 2647 parseonetoken(&ps); /* Parse the token */ 2648 *cp = c; /* Restore the buffer */ 2649 cp = nextcp; 2650 } 2651 free(filebuf); /* Release the buffer after parsing */ 2652 gp->rule = ps.firstrule; 2653 gp->errorcnt = ps.errorcnt; 2654 } 2655 /*************************** From the file "plink.c" *********************/ 2656 /* 2657 ** Routines processing configuration follow-set propagation links 2658 ** in the LEMON parser generator. 2659 */ 2660 static struct plink *plink_freelist = 0; 2661 2662 /* Allocate a new plink */ 2663 struct plink *Plink_new(){ 2664 struct plink *newlink; 2665 2666 if( plink_freelist==0 ){ 2667 int i; 2668 int amt = 100; 2669 plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); 2670 if( plink_freelist==0 ){ 2671 fprintf(stderr, 2672 "Unable to allocate memory for a new follow-set propagation link.\n"); 2673 exit(1); 2674 } 2675 for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1]; 2676 plink_freelist[amt-1].next = 0; 2677 } 2678 newlink = plink_freelist; 2679 plink_freelist = plink_freelist->next; 2680 return newlink; 2681 } 2682 2683 /* Add a plink to a plink list */ 2684 void Plink_add(struct plink **plpp, struct config *cfp) 2685 { 2686 struct plink *newlink; 2687 newlink = Plink_new(); 2688 newlink->next = *plpp; 2689 *plpp = newlink; 2690 newlink->cfp = cfp; 2691 } 2692 2693 /* Transfer every plink on the list "from" to the list "to" */ 2694 void Plink_copy(struct plink **to, struct plink *from) 2695 { 2696 struct plink *nextpl; 2697 while( from ){ 2698 nextpl = from->next; 2699 from->next = *to; 2700 *to = from; 2701 from = nextpl; 2702 } 2703 } 2704 2705 /* Delete every plink on the list */ 2706 void Plink_delete(struct plink *plp) 2707 { 2708 struct plink *nextpl; 2709 2710 while( plp ){ 2711 nextpl = plp->next; 2712 plp->next = plink_freelist; 2713 plink_freelist = plp; 2714 plp = nextpl; 2715 } 2716 } 2717 /*********************** From the file "report.c" **************************/ 2718 /* 2719 ** Procedures for generating reports and tables in the LEMON parser generator. 2720 */ 2721 2722 /* Generate a filename with the given suffix. Space to hold the 2723 ** name comes from malloc() and must be freed by the calling 2724 ** function. 2725 */ 2726 PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) 2727 { 2728 char *name; 2729 char *cp; 2730 char *filename; 2731 2732 filename = lemp->filename; 2733 if (local_out_dir) { 2734 char *ptr; 2735 #ifdef __WIN32__ 2736 for (ptr = lemp->filename + strlen (lemp->filename) - 1; (ptr > lemp->filename) && (*ptr != '\\'); ptr--); 2737 #else 2738 for (ptr = lemp->filename + strlen (lemp->filename) - 1; (ptr > lemp->filename) && (*ptr != '/'); ptr--); 2739 #endif 2740 if (ptr > lemp->filename) 2741 filename = ptr + 1; 2742 } 2743 2744 name = (char*)malloc( lemonStrlen(filename) + lemonStrlen(suffix) + 5 ); 2745 if( name==0 ){ 2746 fprintf(stderr,"Can't allocate space for a filename.\n"); 2747 exit(1); 2748 } 2749 strcpy(name,filename); 2750 cp = strrchr(name,'.'); 2751 if( cp ) *cp = 0; 2752 strcat(name,suffix); 2753 return name; 2754 } 2755 2756 /* Open a file with a name based on the name of the input file, 2757 ** but with a different (specified) suffix, and return a pointer 2758 ** to the stream */ 2759 PRIVATE FILE *file_open( 2760 struct lemon *lemp, 2761 const char *suffix, 2762 const char *mode 2763 ){ 2764 FILE *fp; 2765 2766 if( lemp->outname ) free(lemp->outname); 2767 lemp->outname = file_makename(lemp, suffix); 2768 fp = fopen(lemp->outname,mode); 2769 if( fp==0 && *mode=='w' ){ 2770 fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); 2771 lemp->errorcnt++; 2772 return 0; 2773 } 2774 2775 /* Add files we create to a list, so we can delete them if we fail. This 2776 ** is to keep makefiles from getting confused. We don't include .out files, 2777 ** though: this is debug information, and you don't want it deleted if there 2778 ** was an error you need to track down. 2779 */ 2780 if(( *mode=='w' ) && (strcmp(suffix, ".out") != 0)){ 2781 const char **ptr = (const char **) 2782 realloc(made_files, sizeof (const char **) * (made_files_count + 1)); 2783 const char *fname = Strsafe(lemp->outname); 2784 if ((ptr == NULL) || (fname == NULL)) { 2785 free(ptr); 2786 memory_error(); 2787 } 2788 made_files = ptr; 2789 made_files[made_files_count++] = fname; 2790 } 2791 return fp; 2792 } 2793 2794 /* Duplicate the input file without comments and without actions 2795 ** on rules */ 2796 void Reprint(struct lemon *lemp) 2797 { 2798 struct rule *rp; 2799 struct symbol *sp; 2800 int i, j, maxlen, len, ncolumns, skip; 2801 printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); 2802 maxlen = 10; 2803 for(i=0; i<lemp->nsymbol; i++){ 2804 sp = lemp->symbols[i]; 2805 len = lemonStrlen(sp->name); 2806 if( len>maxlen ) maxlen = len; 2807 } 2808 ncolumns = 76/(maxlen+5); 2809 if( ncolumns<1 ) ncolumns = 1; 2810 skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; 2811 for(i=0; i<skip; i++){ 2812 printf("//"); 2813 for(j=i; j<lemp->nsymbol; j+=skip){ 2814 sp = lemp->symbols[j]; 2815 assert( sp->index==j ); 2816 printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); 2817 } 2818 printf("\n"); 2819 } 2820 for(rp=lemp->rule; rp; rp=rp->next){ 2821 printf("%s",rp->lhs->name); 2822 /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ 2823 printf(" ::="); 2824 for(i=0; i<rp->nrhs; i++){ 2825 sp = rp->rhs[i]; 2826 printf(" %s", sp->name); 2827 if( sp->type==MULTITERMINAL ){ 2828 for(j=1; j<sp->nsubsym; j++){ 2829 printf("|%s", sp->subsym[j]->name); 2830 } 2831 } 2832 /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ 2833 } 2834 printf("."); 2835 if( rp->precsym ) printf(" [%s]",rp->precsym->name); 2836 /* if( rp->code ) printf("\n %s",rp->code); */ 2837 printf("\n"); 2838 } 2839 } 2840 2841 void ConfigPrint(FILE *fp, struct config *cfp) 2842 { 2843 struct rule *rp; 2844 struct symbol *sp; 2845 int i, j; 2846 rp = cfp->rp; 2847 fprintf(fp,"%s ::=",rp->lhs->name); 2848 for(i=0; i<=rp->nrhs; i++){ 2849 if( i==cfp->dot ) fprintf(fp," *"); 2850 if( i==rp->nrhs ) break; 2851 sp = rp->rhs[i]; 2852 fprintf(fp," %s", sp->name); 2853 if( sp->type==MULTITERMINAL ){ 2854 for(j=1; j<sp->nsubsym; j++){ 2855 fprintf(fp,"|%s",sp->subsym[j]->name); 2856 } 2857 } 2858 } 2859 } 2860 2861 /* #define TEST */ 2862 #if 0 2863 /* Print a set */ 2864 PRIVATE void SetPrint(out,set,lemp) 2865 FILE *out; 2866 char *set; 2867 struct lemon *lemp; 2868 { 2869 int i; 2870 char *spacer; 2871 spacer = ""; 2872 fprintf(out,"%12s[",""); 2873 for(i=0; i<lemp->nterminal; i++){ 2874 if( SetFind(set,i) ){ 2875 fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); 2876 spacer = " "; 2877 } 2878 } 2879 fprintf(out,"]\n"); 2880 } 2881 2882 /* Print a plink chain */ 2883 PRIVATE void PlinkPrint(out,plp,tag) 2884 FILE *out; 2885 struct plink *plp; 2886 char *tag; 2887 { 2888 while( plp ){ 2889 fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); 2890 ConfigPrint(out,plp->cfp); 2891 fprintf(out,"\n"); 2892 plp = plp->next; 2893 } 2894 } 2895 #endif 2896 2897 /* Print an action to the given file descriptor. Return FALSE if 2898 ** nothing was actually printed. 2899 */ 2900 int PrintAction(struct action *ap, FILE *fp, int indent){ 2901 int result = 1; 2902 switch( ap->type ){ 2903 case SHIFT: 2904 fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->statenum); 2905 break; 2906 case REDUCE: 2907 fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); 2908 break; 2909 case ACCEPT: 2910 fprintf(fp,"%*s accept",indent,ap->sp->name); 2911 break; 2912 case ERROR: 2913 fprintf(fp,"%*s error",indent,ap->sp->name); 2914 break; 2915 case SRCONFLICT: 2916 case RRCONFLICT: 2917 fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", 2918 indent,ap->sp->name,ap->x.rp->index); 2919 break; 2920 case SSCONFLICT: 2921 fprintf(fp,"%*s shift %-3d ** Parsing conflict **", 2922 indent,ap->sp->name,ap->x.stp->statenum); 2923 break; 2924 case SH_RESOLVED: 2925 if( showPrecedenceConflict ){ 2926 fprintf(fp,"%*s shift %-3d -- dropped by precedence", 2927 indent,ap->sp->name,ap->x.stp->statenum); 2928 }else{ 2929 result = 0; 2930 } 2931 break; 2932 case RD_RESOLVED: 2933 if( showPrecedenceConflict ){ 2934 fprintf(fp,"%*s reduce %-3d -- dropped by precedence", 2935 indent,ap->sp->name,ap->x.rp->index); 2936 }else{ 2937 result = 0; 2938 } 2939 break; 2940 case NOT_USED: 2941 result = 0; 2942 break; 2943 } 2944 return result; 2945 } 2946 2947 /* Generate the "y.output" log file */ 2948 void ReportOutput(struct lemon *lemp) 2949 { 2950 int i; 2951 struct state *stp; 2952 struct config *cfp; 2953 struct action *ap; 2954 FILE *fp; 2955 2956 fp = file_open(lemp,".out","wb"); 2957 if( fp==0 ) return; 2958 for(i=0; i<lemp->nstate; i++){ 2959 stp = lemp->sorted[i]; 2960 fprintf(fp,"State %d:\n",stp->statenum); 2961 if( lemp->basisflag ) cfp=stp->bp; 2962 else cfp=stp->cfp; 2963 while( cfp ){ 2964 char buf[20]; 2965 if( cfp->dot==cfp->rp->nrhs ){ 2966 sprintf(buf,"(%d)",cfp->rp->index); 2967 fprintf(fp," %5s ",buf); 2968 }else{ 2969 fprintf(fp," "); 2970 } 2971 ConfigPrint(fp,cfp); 2972 fprintf(fp,"\n"); 2973 #if 0 2974 SetPrint(fp,cfp->fws,lemp); 2975 PlinkPrint(fp,cfp->fplp,"To "); 2976 PlinkPrint(fp,cfp->bplp,"From"); 2977 #endif 2978 if( lemp->basisflag ) cfp=cfp->bp; 2979 else cfp=cfp->next; 2980 } 2981 fprintf(fp,"\n"); 2982 for(ap=stp->ap; ap; ap=ap->next){ 2983 if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); 2984 } 2985 fprintf(fp,"\n"); 2986 } 2987 fprintf(fp, "----------------------------------------------------\n"); 2988 fprintf(fp, "Symbols:\n"); 2989 for(i=0; i<lemp->nsymbol; i++){ 2990 int j; 2991 struct symbol *sp; 2992 2993 sp = lemp->symbols[i]; 2994 fprintf(fp, " %3d: %s", i, sp->name); 2995 if( sp->type==NONTERMINAL ){ 2996 fprintf(fp, ":"); 2997 if( sp->lambda ){ 2998 fprintf(fp, " <lambda>"); 2999 } 3000 for(j=0; j<lemp->nterminal; j++){ 3001 if( sp->firstset && SetFind(sp->firstset, j) ){ 3002 fprintf(fp, " %s", lemp->symbols[j]->name); 3003 } 3004 } 3005 } 3006 fprintf(fp, "\n"); 3007 } 3008 fclose(fp); 3009 return; 3010 } 3011 3012 /* Search for the file "name" which is in the same directory as 3013 ** the exacutable */ 3014 PRIVATE char *pathsearch(char *argv0, char *name, int modemask) 3015 { 3016 const char *pathlist; 3017 char *pathbufptr; 3018 char *pathbuf; 3019 char *path,*cp; 3020 char c; 3021 3022 #ifdef __WIN32__ 3023 cp = strrchr(argv0,'\\'); 3024 #else 3025 cp = strrchr(argv0,'/'); 3026 #endif 3027 if( cp ){ 3028 c = *cp; 3029 *cp = 0; 3030 path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); 3031 if( path ) sprintf(path,"%s/%s",argv0,name); 3032 *cp = c; 3033 }else{ 3034 pathlist = getenv("PATH"); 3035 if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; 3036 pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); 3037 path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); 3038 if( (pathbuf != 0) && (path!=0) ){ 3039 pathbufptr = pathbuf; 3040 strcpy(pathbuf, pathlist); 3041 while( *pathbuf ){ 3042 cp = strchr(pathbuf,':'); 3043 if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; 3044 c = *cp; 3045 *cp = 0; 3046 sprintf(path,"%s/%s",pathbuf,name); 3047 *cp = c; 3048 if( c==0 ) pathbuf[0] = 0; 3049 else pathbuf = &cp[1]; 3050 if( access(path,modemask)==0 ) break; 3051 } 3052 free(pathbufptr); 3053 } 3054 } 3055 return path; 3056 } 3057 3058 /* Given an action, compute the integer value for that action 3059 ** which is to be put in the action table of the generated machine. 3060 ** Return negative if no action should be generated. 3061 */ 3062 PRIVATE int compute_action(struct lemon *lemp, struct action *ap) 3063 { 3064 int act; 3065 switch( ap->type ){ 3066 case SHIFT: act = ap->x.stp->statenum; break; 3067 case REDUCE: act = ap->x.rp->index + lemp->nstate; break; 3068 case ERROR: act = lemp->nstate + lemp->nrule; break; 3069 case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; 3070 default: act = -1; break; 3071 } 3072 return act; 3073 } 3074 3075 #define LINESIZE 1000 3076 /* The next cluster of routines are for reading the template file 3077 ** and writing the results to the generated parser */ 3078 /* The first function transfers data from "in" to "out" until 3079 ** a line is seen which begins with "%%". The line number is 3080 ** tracked. 3081 ** 3082 ** if name!=0, then any word that begin with "Parse" is changed to 3083 ** begin with *name instead. 3084 */ 3085 PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) 3086 { 3087 int i, iStart; 3088 char line[LINESIZE]; 3089 while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ 3090 (*lineno)++; 3091 iStart = 0; 3092 if( name ){ 3093 for(i=0; line[i]; i++){ 3094 if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 3095 && (i==0 || !isalpha(line[i-1])) 3096 ){ 3097 if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); 3098 fprintf(out,"%s",name); 3099 i += 4; 3100 iStart = i+1; 3101 } 3102 } 3103 } 3104 fprintf(out,"%s",&line[iStart]); 3105 } 3106 } 3107 3108 /* The next function finds the template file and opens it, returning 3109 ** a pointer to the opened file. */ 3110 PRIVATE FILE *tplt_open(struct lemon *lemp) 3111 { 3112 static char templatename[] = "lempar.c"; 3113 char buf[1000]; 3114 FILE *in; 3115 char *tpltname; 3116 char *cp; 3117 3118 /* first, see if user specified a template filename on the command line. */ 3119 if (user_templatename != 0) { 3120 if( access(user_templatename,004)==-1 ){ 3121 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", 3122 user_templatename); 3123 lemp->errorcnt++; 3124 return 0; 3125 } 3126 in = fopen(user_templatename,"rb"); 3127 if( in==0 ){ 3128 fprintf(stderr,"Can't open the template file \"%s\".\n",user_templatename); 3129 lemp->errorcnt++; 3130 return 0; 3131 } 3132 return in; 3133 } 3134 3135 cp = strrchr(lemp->filename,'.'); 3136 if( cp ){ 3137 sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); 3138 }else{ 3139 sprintf(buf,"%s.lt",lemp->filename); 3140 } 3141 if( access(buf,004)==0 ){ 3142 tpltname = buf; 3143 }else if( access(templatename,004)==0 ){ 3144 tpltname = templatename; 3145 }else{ 3146 tpltname = pathsearch(lemp->argv0,templatename,0); 3147 } 3148 if( tpltname==0 ){ 3149 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", 3150 templatename); 3151 lemp->errorcnt++; 3152 return 0; 3153 } 3154 in = fopen(tpltname,"rb"); 3155 if( in==0 ){ 3156 fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); 3157 lemp->errorcnt++; 3158 return 0; 3159 } 3160 return in; 3161 } 3162 3163 /* Print a #line directive line to the output file. */ 3164 PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) 3165 { 3166 fprintf(out,"#line %d \"",lineno); 3167 while( *filename ){ 3168 if( *filename == '\\' ) putc('\\',out); 3169 putc(*filename,out); 3170 filename++; 3171 } 3172 fprintf(out,"\"\n"); 3173 } 3174 3175 /* Print a string to the file and keep the linenumber up to date */ 3176 PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) 3177 { 3178 if( str==0 ) return; 3179 while( *str ){ 3180 putc(*str,out); 3181 if( *str=='\n' ) (*lineno)++; 3182 str++; 3183 } 3184 if( str[-1]!='\n' ){ 3185 putc('\n',out); 3186 (*lineno)++; 3187 } 3188 if (!lemp->nolinenosflag) { 3189 (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); 3190 } 3191 return; 3192 } 3193 3194 /* 3195 ** The following routine emits code for the destructor for the 3196 ** symbol sp 3197 */ 3198 void emit_destructor_code( 3199 FILE *out, 3200 struct symbol *sp, 3201 struct lemon *lemp, 3202 int *lineno 3203 ){ 3204 char *cp = 0; 3205 3206 if( sp->type==TERMINAL ){ 3207 cp = lemp->tokendest; 3208 if( cp==0 ) return; 3209 fprintf(out,"{\n"); (*lineno)++; 3210 }else if( sp->destructor ){ 3211 cp = sp->destructor; 3212 fprintf(out,"{\n"); (*lineno)++; 3213 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); } 3214 }else if( lemp->vardest ){ 3215 cp = lemp->vardest; 3216 if( cp==0 ) return; 3217 fprintf(out,"{\n"); (*lineno)++; 3218 }else{ 3219 assert( 0 ); /* Cannot happen */ 3220 } 3221 for(; *cp; cp++){ 3222 if( *cp=='$' && cp[1]=='$' ){ 3223 fprintf(out,"(yypminor->yy%d)",sp->dtnum); 3224 cp++; 3225 continue; 3226 } 3227 if( *cp=='\n' ) (*lineno)++; 3228 fputc(*cp,out); 3229 } 3230 fprintf(out,"\n"); (*lineno)++; 3231 if (!lemp->nolinenosflag) { 3232 (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); 3233 } 3234 fprintf(out,"}\n"); (*lineno)++; 3235 return; 3236 } 3237 3238 /* 3239 ** Return TRUE (non-zero) if the given symbol has a destructor. 3240 */ 3241 int has_destructor(struct symbol *sp, struct lemon *lemp) 3242 { 3243 int ret; 3244 if( sp->type==TERMINAL ){ 3245 ret = lemp->tokendest!=0; 3246 }else{ 3247 ret = lemp->vardest!=0 || sp->destructor!=0; 3248 } 3249 return ret; 3250 } 3251 3252 /* 3253 ** Append text to a dynamically allocated string. If zText is 0 then 3254 ** reset the string to be empty again. Always return the complete text 3255 ** of the string (which is overwritten with each call). 3256 ** 3257 ** n bytes of zText are stored. If n==0 then all of zText up to the first 3258 ** \000 terminator is stored. zText can contain up to two instances of 3259 ** %d. The values of p1 and p2 are written into the first and second 3260 ** %d. 3261 ** 3262 ** If n==-1, then the previous character is overwritten. 3263 */ 3264 PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ 3265 static char empty[1] = { 0 }; 3266 static char *z = 0; 3267 static int alloced = 0; 3268 static int used = 0; 3269 int c; 3270 char zInt[40]; 3271 if( zText==0 ){ 3272 used = 0; 3273 return z; 3274 } 3275 if( n<=0 ){ 3276 if( n<0 ){ 3277 used += n; 3278 assert( used>=0 ); 3279 } 3280 n = lemonStrlen(zText); 3281 } 3282 if( n+sizeof(zInt)*2+used >= alloced ){ 3283 alloced = n + sizeof(zInt)*2 + used + 200; 3284 z = (char *) realloc(z, alloced); 3285 } 3286 if( z==0 ) return empty; 3287 while( n-- > 0 ){ 3288 c = *(zText++); 3289 if( c=='%' && n>0 && zText[0]=='d' ){ 3290 sprintf(zInt, "%d", p1); 3291 p1 = p2; 3292 strcpy(&z[used], zInt); 3293 used += lemonStrlen(&z[used]); 3294 zText++; 3295 n--; 3296 }else{ 3297 z[used++] = c; 3298 } 3299 } 3300 z[used] = 0; 3301 return z; 3302 } 3303 3304 /* 3305 ** zCode is a string that is the action associated with a rule. Expand 3306 ** the symbols in this string so that the refer to elements of the parser 3307 ** stack. 3308 */ 3309 PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){ 3310 char *cp, *xp; 3311 int i; 3312 char lhsused = 0; /* True if the LHS element has been used */ 3313 char used[MAXRHS]; /* True for each RHS element which is used */ 3314 3315 for(i=0; i<rp->nrhs; i++) used[i] = 0; 3316 lhsused = 0; 3317 3318 if( rp->code==0 ){ 3319 static char newlinestr[2] = { '\n', '\0' }; 3320 rp->code = newlinestr; 3321 rp->line = rp->ruleline; 3322 } 3323 3324 append_str(0,0,0,0); 3325 3326 /* This const cast is wrong but harmless, if we're careful. */ 3327 for(cp=(char *)rp->code; *cp; cp++){ 3328 if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){ 3329 char saved; 3330 for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++); 3331 saved = *xp; 3332 *xp = 0; 3333 if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ 3334 append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0); 3335 cp = xp; 3336 lhsused = 1; 3337 }else{ 3338 for(i=0; i<rp->nrhs; i++){ 3339 if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ 3340 if( cp!=rp->code && cp[-1]=='@' ){ 3341 /* If the argument is of the form @X then substituted 3342 ** the token number of X, not the value of X */ 3343 append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); 3344 }else{ 3345 struct symbol *sp = rp->rhs[i]; 3346 int dtnum; 3347 if( sp->type==MULTITERMINAL ){ 3348 dtnum = sp->subsym[0]->dtnum; 3349 }else{ 3350 dtnum = sp->dtnum; 3351 } 3352 append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); 3353 } 3354 cp = xp; 3355 used[i] = 1; 3356 break; 3357 } 3358 } 3359 } 3360 *xp = saved; 3361 } 3362 append_str(cp, 1, 0, 0); 3363 } /* End loop */ 3364 3365 /* Check to make sure the LHS has been used */ 3366 if( rp->lhsalias && !lhsused ){ 3367 ErrorMsg(lemp->filename,rp->ruleline, 3368 "Label \"%s\" for \"%s(%s)\" is never used.", 3369 rp->lhsalias,rp->lhs->name,rp->lhsalias); 3370 lemp->errorcnt++; 3371 } 3372 3373 /* Generate destructor code for RHS symbols which are not used in the 3374 ** reduce code */ 3375 for(i=0; i<rp->nrhs; i++){ 3376 if( rp->rhsalias[i] && !used[i] ){ 3377 ErrorMsg(lemp->filename,rp->ruleline, 3378 "Label %s for \"%s(%s)\" is never used.", 3379 rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); 3380 lemp->errorcnt++; 3381 }else if( rp->rhsalias[i]==0 ){ 3382 if( has_destructor(rp->rhs[i],lemp) ){ 3383 append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, 3384 rp->rhs[i]->index,i-rp->nrhs+1); 3385 }else{ 3386 /* No destructor defined for this term */ 3387 } 3388 } 3389 } 3390 if( rp->code ){ 3391 cp = append_str(0,0,0,0); 3392 rp->code = Strsafe(cp?cp:""); 3393 } 3394 } 3395 3396 /* 3397 ** Generate code which executes when the rule "rp" is reduced. Write 3398 ** the code to "out". Make sure lineno stays up-to-date. 3399 */ 3400 PRIVATE void emit_code( 3401 FILE *out, 3402 struct rule *rp, 3403 struct lemon *lemp, 3404 int *lineno 3405 ){ 3406 const char *cp; 3407 3408 /* Generate code to do the reduce action */ 3409 if( rp->code ){ 3410 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); } 3411 fprintf(out,"{%s",rp->code); 3412 for(cp=rp->code; *cp; cp++){ 3413 if( *cp=='\n' ) (*lineno)++; 3414 } /* End loop */ 3415 fprintf(out,"}\n"); (*lineno)++; 3416 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } 3417 } /* End if( rp->code ) */ 3418 3419 return; 3420 } 3421 3422 /* 3423 ** Print the definition of the union used for the parser's data stack. 3424 ** This union contains fields for every possible data type for tokens 3425 ** and nonterminals. In the process of computing and printing this 3426 ** union, also set the ".dtnum" field of every terminal and nonterminal 3427 ** symbol. 3428 */ 3429 void print_stack_union( 3430 FILE *out, /* The output stream */ 3431 struct lemon *lemp, /* The main info structure for this parser */ 3432 int *plineno, /* Pointer to the line number */ 3433 int mhflag /* True if generating makeheaders output */ 3434 ){ 3435 int lineno = *plineno; /* The line number of the output */ 3436 char **types; /* A hash table of datatypes */ 3437 int arraysize; /* Size of the "types" array */ 3438 int maxdtlength; /* Maximum length of any ".datatype" field. */ 3439 char *stddt; /* Standardized name for a datatype */ 3440 int i,j; /* Loop counters */ 3441 int hash; /* For hashing the name of a type */ 3442 const char *name; /* Name of the parser */ 3443 3444 /* Allocate and initialize types[] and allocate stddt[] */ 3445 arraysize = lemp->nsymbol * 2; 3446 types = (char**)calloc( arraysize, sizeof(char*) ); 3447 for(i=0; i<arraysize; i++) types[i] = 0; 3448 maxdtlength = 0; 3449 if( lemp->vartype ){ 3450 maxdtlength = lemonStrlen(lemp->vartype); 3451 } 3452 for(i=0; i<lemp->nsymbol; i++){ 3453 int len; 3454 struct symbol *sp = lemp->symbols[i]; 3455 if( sp->datatype==0 ) continue; 3456 len = lemonStrlen(sp->datatype); 3457 if( len>maxdtlength ) maxdtlength = len; 3458 } 3459 stddt = (char*)malloc( maxdtlength*2 + 1 ); 3460 if( types==0 || stddt==0 ){ 3461 fprintf(stderr,"Out of memory.\n"); 3462 exit(1); 3463 } 3464 3465 /* Build a hash table of datatypes. The ".dtnum" field of each symbol 3466 ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is 3467 ** used for terminal symbols. If there is no %default_type defined then 3468 ** 0 is also used as the .dtnum value for nonterminals which do not specify 3469 ** a datatype using the %type directive. 3470 */ 3471 for(i=0; i<lemp->nsymbol; i++){ 3472 struct symbol *sp = lemp->symbols[i]; 3473 char *cp; 3474 if( sp==lemp->errsym ){ 3475 sp->dtnum = arraysize+1; 3476 continue; 3477 } 3478 if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ 3479 sp->dtnum = 0; 3480 continue; 3481 } 3482 cp = sp->datatype; 3483 if( cp==0 ) cp = lemp->vartype; 3484 j = 0; 3485 while( isspace(*cp) ) cp++; 3486 while( *cp ) stddt[j++] = *cp++; 3487 while( j>0 && isspace(stddt[j-1]) ) j--; 3488 stddt[j] = 0; 3489 if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ 3490 sp->dtnum = 0; 3491 continue; 3492 } 3493 hash = 0; 3494 for(j=0; stddt[j]; j++){ 3495 hash = hash*53 + stddt[j]; 3496 } 3497 hash = (hash & 0x7fffffff)%arraysize; 3498 while( types[hash] ){ 3499 if( strcmp(types[hash],stddt)==0 ){ 3500 sp->dtnum = hash + 1; 3501 break; 3502 } 3503 hash++; 3504 if( hash>=arraysize ) hash = 0; 3505 } 3506 if( types[hash]==0 ){ 3507 sp->dtnum = hash + 1; 3508 types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); 3509 if( types[hash]==0 ){ 3510 fprintf(stderr,"Out of memory.\n"); 3511 exit(1); 3512 } 3513 strcpy(types[hash],stddt); 3514 } 3515 } 3516 3517 /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ 3518 name = lemp->name ? lemp->name : "Parse"; 3519 lineno = *plineno; 3520 if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } 3521 fprintf(out,"#define %sTOKENTYPE %s\n",name, 3522 lemp->tokentype?lemp->tokentype:"void*"); lineno++; 3523 if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } 3524 fprintf(out,"typedef union {\n"); lineno++; 3525 fprintf(out," int yyinit;\n"); lineno++; 3526 fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; 3527 for(i=0; i<arraysize; i++){ 3528 if( types[i]==0 ) continue; 3529 fprintf(out," %s yy%d;\n",types[i],i+1); lineno++; 3530 free(types[i]); 3531 } 3532 if( lemp->errsym->useCnt ){ 3533 fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; 3534 } 3535 free(stddt); 3536 free(types); 3537 fprintf(out,"} YYMINORTYPE;\n"); lineno++; 3538 *plineno = lineno; 3539 } 3540 3541 /* 3542 ** Return the name of a C datatype able to represent values between 3543 ** lwr and upr, inclusive. 3544 */ 3545 static const char *minimum_size_type(int lwr, int upr){ 3546 if( lwr>=0 ){ 3547 if( upr<=255 ){ 3548 return "unsigned char"; 3549 }else if( upr<65535 ){ 3550 return "unsigned short int"; 3551 }else{ 3552 return "unsigned int"; 3553 } 3554 }else if( lwr>=-127 && upr<=127 ){ 3555 return "signed char"; 3556 }else if( lwr>=-32767 && upr<32767 ){ 3557 return "short"; 3558 }else{ 3559 return "int"; 3560 } 3561 } 3562 3563 /* 3564 ** Each state contains a set of token transaction and a set of 3565 ** nonterminal transactions. Each of these sets makes an instance 3566 ** of the following structure. An array of these structures is used 3567 ** to order the creation of entries in the yy_action[] table. 3568 */ 3569 struct axset { 3570 struct state *stp; /* A pointer to a state */ 3571 int isTkn; /* True to use tokens. False for non-terminals */ 3572 int nAction; /* Number of actions */ 3573 int iOrder; /* Original order of action sets */ 3574 }; 3575 3576 /* 3577 ** Compare to axset structures for sorting purposes 3578 */ 3579 static int axset_compare(const void *a, const void *b){ 3580 struct axset *p1 = (struct axset*)a; 3581 struct axset *p2 = (struct axset*)b; 3582 int c; 3583 c = p2->nAction - p1->nAction; 3584 if( c==0 ){ 3585 c = p2->iOrder - p1->iOrder; 3586 } 3587 assert( c!=0 || p1==p2 ); 3588 return c; 3589 } 3590 3591 /* 3592 ** Write text on "out" that describes the rule "rp". 3593 */ 3594 static void writeRuleText(FILE *out, struct rule *rp){ 3595 int j; 3596 fprintf(out,"%s ::=", rp->lhs->name); 3597 for(j=0; j<rp->nrhs; j++){ 3598 struct symbol *sp = rp->rhs[j]; 3599 fprintf(out," %s", sp->name); 3600 if( sp->type==MULTITERMINAL ){ 3601 int k; 3602 for(k=1; k<sp->nsubsym; k++){ 3603 fprintf(out,"|%s",sp->subsym[k]->name); 3604 } 3605 } 3606 } 3607 } 3608 3609 3610 /* Generate C source code for the parser */ 3611 void ReportTable( 3612 struct lemon *lemp, 3613 int mhflag /* Output in makeheaders format if true */ 3614 ){ 3615 FILE *out, *in; 3616 char line[LINESIZE]; 3617 int lineno; 3618 struct state *stp; 3619 struct action *ap; 3620 struct rule *rp; 3621 struct acttab *pActtab; 3622 int i, j, n; 3623 const char *name; 3624 int mnTknOfst, mxTknOfst; 3625 int mnNtOfst, mxNtOfst; 3626 struct axset *ax; 3627 3628 in = tplt_open(lemp); 3629 if( in==0 ) return; 3630 out = file_open(lemp,".c","wb"); 3631 if( out==0 ){ 3632 fclose(in); 3633 return; 3634 } 3635 lineno = 1; 3636 tplt_xfer(lemp->name,in,out,&lineno); 3637 3638 /* Generate the include code, if any */ 3639 tplt_print(out,lemp,lemp->include,&lineno); 3640 if( mhflag ){ 3641 char *name = file_makename(lemp, ".h"); 3642 fprintf(out,"#include \"%s\"\n", name); lineno++; 3643 free(name); 3644 } 3645 tplt_xfer(lemp->name,in,out,&lineno); 3646 3647 /* Generate #defines for all tokens */ 3648 if( mhflag ){ 3649 const char *prefix; 3650 fprintf(out,"#if INTERFACE\n"); lineno++; 3651 if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 3652 else prefix = ""; 3653 for(i=1; i<lemp->nterminal; i++){ 3654 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 3655 lineno++; 3656 } 3657 fprintf(out,"#endif\n"); lineno++; 3658 } 3659 tplt_xfer(lemp->name,in,out,&lineno); 3660 3661 /* Generate the defines */ 3662 fprintf(out,"#define YYCODETYPE %s\n", 3663 minimum_size_type(0, lemp->nsymbol+1)); lineno++; 3664 fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; 3665 fprintf(out,"#define YYACTIONTYPE %s\n", 3666 minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; 3667 if( lemp->wildcard ){ 3668 fprintf(out,"#define YYWILDCARD %d\n", 3669 lemp->wildcard->index); lineno++; 3670 } 3671 print_stack_union(out,lemp,&lineno,mhflag); 3672 fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; 3673 if( lemp->stacksize ){ 3674 fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; 3675 }else{ 3676 fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; 3677 } 3678 fprintf(out, "#endif\n"); lineno++; 3679 if( mhflag ){ 3680 fprintf(out,"#if INTERFACE\n"); lineno++; 3681 } 3682 name = lemp->name ? lemp->name : "Parse"; 3683 if( lemp->arg && lemp->arg[0] ){ 3684 int i; 3685 i = lemonStrlen(lemp->arg); 3686 while( i>=1 && isspace(lemp->arg[i-1]) ) i--; 3687 while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; 3688 fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; 3689 fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; 3690 fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", 3691 name,lemp->arg,&lemp->arg[i]); lineno++; 3692 fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", 3693 name,&lemp->arg[i],&lemp->arg[i]); lineno++; 3694 }else{ 3695 fprintf(out,"#define %sARG_SDECL\n",name); lineno++; 3696 fprintf(out,"#define %sARG_PDECL\n",name); lineno++; 3697 fprintf(out,"#define %sARG_FETCH\n",name); lineno++; 3698 fprintf(out,"#define %sARG_STORE\n",name); lineno++; 3699 } 3700 if( mhflag ){ 3701 fprintf(out,"#endif\n"); lineno++; 3702 } 3703 fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++; 3704 fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; 3705 if( lemp->errsym->useCnt ){ 3706 fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; 3707 fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; 3708 } 3709 if( lemp->has_fallback ){ 3710 fprintf(out,"#define YYFALLBACK 1\n"); lineno++; 3711 } 3712 tplt_xfer(lemp->name,in,out,&lineno); 3713 3714 /* Generate the action table and its associates: 3715 ** 3716 ** yy_action[] A single table containing all actions. 3717 ** yy_lookahead[] A table containing the lookahead for each entry in 3718 ** yy_action. Used to detect hash collisions. 3719 ** yy_shift_ofst[] For each state, the offset into yy_action for 3720 ** shifting terminals. 3721 ** yy_reduce_ofst[] For each state, the offset into yy_action for 3722 ** shifting non-terminals after a reduce. 3723 ** yy_default[] Default action for each state. 3724 */ 3725 3726 /* Compute the actions on all states and count them up */ 3727 ax = (struct axset *) calloc(lemp->nstate*2, sizeof(ax[0])); 3728 if( ax==0 ){ 3729 fprintf(stderr,"malloc failed\n"); 3730 exit(1); 3731 } 3732 for(i=0; i<lemp->nstate; i++){ 3733 stp = lemp->sorted[i]; 3734 ax[i*2].stp = stp; 3735 ax[i*2].isTkn = 1; 3736 ax[i*2].nAction = stp->nTknAct; 3737 ax[i*2+1].stp = stp; 3738 ax[i*2+1].isTkn = 0; 3739 ax[i*2+1].nAction = stp->nNtAct; 3740 } 3741 mxTknOfst = mnTknOfst = 0; 3742 mxNtOfst = mnNtOfst = 0; 3743 3744 /* Compute the action table. In order to try to keep the size of the 3745 ** action table to a minimum, the heuristic of placing the largest action 3746 ** sets first is used. 3747 */ 3748 for(i=0; i<lemp->nstate*2; i++) ax[i].iOrder = i; 3749 qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); 3750 pActtab = acttab_alloc(); 3751 for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){ 3752 stp = ax[i].stp; 3753 if( ax[i].isTkn ){ 3754 for(ap=stp->ap; ap; ap=ap->next){ 3755 int action; 3756 if( ap->sp->index>=lemp->nterminal ) continue; 3757 action = compute_action(lemp, ap); 3758 if( action<0 ) continue; 3759 acttab_action(pActtab, ap->sp->index, action); 3760 } 3761 stp->iTknOfst = acttab_insert(pActtab); 3762 if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; 3763 if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; 3764 }else{ 3765 for(ap=stp->ap; ap; ap=ap->next){ 3766 int action; 3767 if( ap->sp->index<lemp->nterminal ) continue; 3768 if( ap->sp->index==lemp->nsymbol ) continue; 3769 action = compute_action(lemp, ap); 3770 if( action<0 ) continue; 3771 acttab_action(pActtab, ap->sp->index, action); 3772 } 3773 stp->iNtOfst = acttab_insert(pActtab); 3774 if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; 3775 if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; 3776 } 3777 } 3778 free(ax); 3779 3780 /* Output the yy_action table */ 3781 n = acttab_size(pActtab); 3782 fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; 3783 fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; 3784 for(i=j=0; i<n; i++){ 3785 int action = acttab_yyaction(pActtab, i); 3786 if( action<0 ) action = lemp->nstate + lemp->nrule + 2; 3787 if( j==0 ) fprintf(out," /* %5d */ ", i); 3788 fprintf(out, " %4d,", action); 3789 if( j==9 || i==n-1 ){ 3790 fprintf(out, "\n"); lineno++; 3791 j = 0; 3792 }else{ 3793 j++; 3794 } 3795 } 3796 fprintf(out, "};\n"); lineno++; 3797 3798 /* Output the yy_lookahead table */ 3799 fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; 3800 for(i=j=0; i<n; i++){ 3801 int la = acttab_yylookahead(pActtab, i); 3802 if( la<0 ) la = lemp->nsymbol; 3803 if( j==0 ) fprintf(out," /* %5d */ ", i); 3804 fprintf(out, " %4d,", la); 3805 if( j==9 || i==n-1 ){ 3806 fprintf(out, "\n"); lineno++; 3807 j = 0; 3808 }else{ 3809 j++; 3810 } 3811 } 3812 fprintf(out, "};\n"); lineno++; 3813 3814 /* Output the yy_shift_ofst[] table */ 3815 fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; 3816 n = lemp->nstate; 3817 while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; 3818 fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; 3819 fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; 3820 fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; 3821 fprintf(out, "static const %s yy_shift_ofst[] = {\n", 3822 minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; 3823 for(i=j=0; i<n; i++){ 3824 int ofst; 3825 stp = lemp->sorted[i]; 3826 ofst = stp->iTknOfst; 3827 if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; 3828 if( j==0 ) fprintf(out," /* %5d */ ", i); 3829 fprintf(out, " %4d,", ofst); 3830 if( j==9 || i==n-1 ){ 3831 fprintf(out, "\n"); lineno++; 3832 j = 0; 3833 }else{ 3834 j++; 3835 } 3836 } 3837 fprintf(out, "};\n"); lineno++; 3838 3839 /* Output the yy_reduce_ofst[] table */ 3840 fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; 3841 n = lemp->nstate; 3842 while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; 3843 fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; 3844 fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; 3845 fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; 3846 fprintf(out, "static const %s yy_reduce_ofst[] = {\n", 3847 minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; 3848 for(i=j=0; i<n; i++){ 3849 int ofst; 3850 stp = lemp->sorted[i]; 3851 ofst = stp->iNtOfst; 3852 if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; 3853 if( j==0 ) fprintf(out," /* %5d */ ", i); 3854 fprintf(out, " %4d,", ofst); 3855 if( j==9 || i==n-1 ){ 3856 fprintf(out, "\n"); lineno++; 3857 j = 0; 3858 }else{ 3859 j++; 3860 } 3861 } 3862 fprintf(out, "};\n"); lineno++; 3863 3864 /* Output the default action table */ 3865 fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; 3866 n = lemp->nstate; 3867 for(i=j=0; i<n; i++){ 3868 stp = lemp->sorted[i]; 3869 if( j==0 ) fprintf(out," /* %5d */ ", i); 3870 fprintf(out, " %4d,", stp->iDflt); 3871 if( j==9 || i==n-1 ){ 3872 fprintf(out, "\n"); lineno++; 3873 j = 0; 3874 }else{ 3875 j++; 3876 } 3877 } 3878 fprintf(out, "};\n"); lineno++; 3879 tplt_xfer(lemp->name,in,out,&lineno); 3880 3881 /* Generate the table of fallback tokens. 3882 */ 3883 if( lemp->has_fallback ){ 3884 int mx = lemp->nterminal - 1; 3885 while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } 3886 for(i=0; i<=mx; i++){ 3887 struct symbol *p = lemp->symbols[i]; 3888 if( p->fallback==0 ){ 3889 fprintf(out, " 0, /* %10s => nothing */\n", p->name); 3890 }else{ 3891 fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, 3892 p->name, p->fallback->name); 3893 } 3894 lineno++; 3895 } 3896 } 3897 tplt_xfer(lemp->name, in, out, &lineno); 3898 3899 /* Generate a table containing the symbolic name of every symbol 3900 */ 3901 for(i=0; i<lemp->nsymbol; i++){ 3902 sprintf(line,"\"%s\",",lemp->symbols[i]->name); 3903 fprintf(out," %-15s",line); 3904 if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } 3905 } 3906 if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } 3907 tplt_xfer(lemp->name,in,out,&lineno); 3908 3909 /* Generate a table containing a text string that describes every 3910 ** rule in the rule set of the grammar. This information is used 3911 ** when tracing REDUCE actions. 3912 */ 3913 for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ 3914 assert( rp->index==i ); 3915 fprintf(out," /* %3d */ \"", i); 3916 writeRuleText(out, rp); 3917 fprintf(out,"\",\n"); lineno++; 3918 } 3919 tplt_xfer(lemp->name,in,out,&lineno); 3920 3921 /* Generate code which executes every time a symbol is popped from 3922 ** the stack while processing errors or while destroying the parser. 3923 ** (In other words, generate the %destructor actions) 3924 */ 3925 if( lemp->tokendest ){ 3926 int once = 1; 3927 for(i=0; i<lemp->nsymbol; i++){ 3928 struct symbol *sp = lemp->symbols[i]; 3929 if( sp==0 || sp->type!=TERMINAL ) continue; 3930 if( once ){ 3931 fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; 3932 once = 0; 3933 } 3934 fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3935 } 3936 for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++); 3937 if( i<lemp->nsymbol ){ 3938 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3939 fprintf(out," break;\n"); lineno++; 3940 } 3941 } 3942 if( lemp->vardest ){ 3943 struct symbol *dflt_sp = 0; 3944 int once = 1; 3945 for(i=0; i<lemp->nsymbol; i++){ 3946 struct symbol *sp = lemp->symbols[i]; 3947 if( sp==0 || sp->type==TERMINAL || 3948 sp->index<=0 || sp->destructor!=0 ) continue; 3949 if( once ){ 3950 fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; 3951 once = 0; 3952 } 3953 fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3954 dflt_sp = sp; 3955 } 3956 if( dflt_sp!=0 ){ 3957 emit_destructor_code(out,dflt_sp,lemp,&lineno); 3958 } 3959 fprintf(out," break;\n"); lineno++; 3960 } 3961 for(i=0; i<lemp->nsymbol; i++){ 3962 struct symbol *sp = lemp->symbols[i]; 3963 if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; 3964 fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3965 3966 /* Combine duplicate destructors into a single case */ 3967 for(j=i+1; j<lemp->nsymbol; j++){ 3968 struct symbol *sp2 = lemp->symbols[j]; 3969 if( sp2 && sp2->type!=TERMINAL && sp2->destructor 3970 && sp2->dtnum==sp->dtnum 3971 && strcmp(sp->destructor,sp2->destructor)==0 ){ 3972 fprintf(out," case %d: /* %s */\n", 3973 sp2->index, sp2->name); lineno++; 3974 sp2->destructor = 0; 3975 } 3976 } 3977 3978 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3979 fprintf(out," break;\n"); lineno++; 3980 } 3981 tplt_xfer(lemp->name,in,out,&lineno); 3982 3983 /* Generate code which executes whenever the parser stack overflows */ 3984 tplt_print(out,lemp,lemp->overflow,&lineno); 3985 tplt_xfer(lemp->name,in,out,&lineno); 3986 3987 /* Generate the table of rule information 3988 ** 3989 ** Note: This code depends on the fact that rules are number 3990 ** sequentually beginning with 0. 3991 */ 3992 for(rp=lemp->rule; rp; rp=rp->next){ 3993 fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; 3994 } 3995 tplt_xfer(lemp->name,in,out,&lineno); 3996 3997 /* Generate code which execution during each REDUCE action */ 3998 for(rp=lemp->rule; rp; rp=rp->next){ 3999 translate_code(lemp, rp); 4000 } 4001 /* First output rules other than the default: rule */ 4002 for(rp=lemp->rule; rp; rp=rp->next){ 4003 struct rule *rp2; /* Other rules with the same action */ 4004 if( rp->code==0 ) continue; 4005 if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */ 4006 fprintf(out," case %d: /* ", rp->index); 4007 writeRuleText(out, rp); 4008 fprintf(out, " */\n"); lineno++; 4009 for(rp2=rp->next; rp2; rp2=rp2->next){ 4010 if( rp2->code==rp->code ){ 4011 fprintf(out," case %d: /* ", rp2->index); 4012 writeRuleText(out, rp2); 4013 fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->index); lineno++; 4014 rp2->code = 0; 4015 } 4016 } 4017 emit_code(out,rp,lemp,&lineno); 4018 fprintf(out," break;\n"); lineno++; 4019 rp->code = 0; 4020 } 4021 /* Finally, output the default: rule. We choose as the default: all 4022 ** empty actions. */ 4023 fprintf(out," default:\n"); lineno++; 4024 for(rp=lemp->rule; rp; rp=rp->next){ 4025 if( rp->code==0 ) continue; 4026 assert( rp->code[0]=='\n' && rp->code[1]==0 ); 4027 fprintf(out," /* (%d) ", rp->index); 4028 writeRuleText(out, rp); 4029 fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->index); lineno++; 4030 } 4031 fprintf(out," break;\n"); lineno++; 4032 tplt_xfer(lemp->name,in,out,&lineno); 4033 4034 /* Generate code which executes if a parse fails */ 4035 tplt_print(out,lemp,lemp->failure,&lineno); 4036 tplt_xfer(lemp->name,in,out,&lineno); 4037 4038 /* Generate code which executes when a syntax error occurs */ 4039 tplt_print(out,lemp,lemp->error,&lineno); 4040 tplt_xfer(lemp->name,in,out,&lineno); 4041 4042 /* Generate code which executes when the parser accepts its input */ 4043 tplt_print(out,lemp,lemp->accept,&lineno); 4044 tplt_xfer(lemp->name,in,out,&lineno); 4045 4046 /* Append any addition code the user desires */ 4047 tplt_print(out,lemp,lemp->extracode,&lineno); 4048 4049 fclose(in); 4050 fclose(out); 4051 return; 4052 } 4053 4054 /* Generate a header file for the parser */ 4055 void ReportHeader(struct lemon *lemp) 4056 { 4057 FILE *out, *in; 4058 const char *prefix; 4059 char line[LINESIZE]; 4060 char pattern[LINESIZE]; 4061 int i; 4062 4063 if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 4064 else prefix = ""; 4065 in = file_open(lemp,".h","rb"); 4066 if( in ){ 4067 for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ 4068 sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 4069 if( strcmp(line,pattern) ) break; 4070 } 4071 fclose(in); 4072 if( i==lemp->nterminal ){ 4073 /* No change in the file. Don't rewrite it. */ 4074 return; 4075 } 4076 } 4077 out = file_open(lemp,".h","wb"); 4078 if( out ){ 4079 for(i=1; i<lemp->nterminal; i++){ 4080 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 4081 } 4082 fclose(out); 4083 } 4084 return; 4085 } 4086 4087 /* Reduce the size of the action tables, if possible, by making use 4088 ** of defaults. 4089 ** 4090 ** In this version, we take the most frequent REDUCE action and make 4091 ** it the default. Except, there is no default if the wildcard token 4092 ** is a possible look-ahead. 4093 */ 4094 void CompressTables(struct lemon *lemp) 4095 { 4096 struct state *stp; 4097 struct action *ap, *ap2; 4098 struct rule *rp, *rp2, *rbest; 4099 int nbest, n; 4100 int i; 4101 int usesWildcard; 4102 4103 for(i=0; i<lemp->nstate; i++){ 4104 stp = lemp->sorted[i]; 4105 nbest = 0; 4106 rbest = 0; 4107 usesWildcard = 0; 4108 4109 for(ap=stp->ap; ap; ap=ap->next){ 4110 if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ 4111 usesWildcard = 1; 4112 } 4113 if( ap->type!=REDUCE ) continue; 4114 rp = ap->x.rp; 4115 if( rp->lhsStart ) continue; 4116 if( rp==rbest ) continue; 4117 n = 1; 4118 for(ap2=ap->next; ap2; ap2=ap2->next){ 4119 if( ap2->type!=REDUCE ) continue; 4120 rp2 = ap2->x.rp; 4121 if( rp2==rbest ) continue; 4122 if( rp2==rp ) n++; 4123 } 4124 if( n>nbest ){ 4125 nbest = n; 4126 rbest = rp; 4127 } 4128 } 4129 4130 /* Do not make a default if the number of rules to default 4131 ** is not at least 1 or if the wildcard token is a possible 4132 ** lookahead. 4133 */ 4134 if( nbest<1 || usesWildcard ) continue; 4135 4136 4137 /* Combine matching REDUCE actions into a single default */ 4138 for(ap=stp->ap; ap; ap=ap->next){ 4139 if( ap->type==REDUCE && ap->x.rp==rbest ) break; 4140 } 4141 assert( ap ); 4142 ap->sp = Symbol_new("{default}"); 4143 for(ap=ap->next; ap; ap=ap->next){ 4144 if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; 4145 } 4146 stp->ap = Action_sort(stp->ap); 4147 } 4148 } 4149 4150 4151 /* 4152 ** Compare two states for sorting purposes. The smaller state is the 4153 ** one with the most non-terminal actions. If they have the same number 4154 ** of non-terminal actions, then the smaller is the one with the most 4155 ** token actions. 4156 */ 4157 static int stateResortCompare(const void *a, const void *b){ 4158 const struct state *pA = *(const struct state**)a; 4159 const struct state *pB = *(const struct state**)b; 4160 int n; 4161 4162 n = pB->nNtAct - pA->nNtAct; 4163 if( n==0 ){ 4164 n = pB->nTknAct - pA->nTknAct; 4165 if( n==0 ){ 4166 n = pB->statenum - pA->statenum; 4167 } 4168 } 4169 assert( n!=0 ); 4170 return n; 4171 } 4172 4173 4174 /* 4175 ** Renumber and resort states so that states with fewer choices 4176 ** occur at the end. Except, keep state 0 as the first state. 4177 */ 4178 void ResortStates(struct lemon *lemp) 4179 { 4180 int i; 4181 struct state *stp; 4182 struct action *ap; 4183 4184 for(i=0; i<lemp->nstate; i++){ 4185 stp = lemp->sorted[i]; 4186 stp->nTknAct = stp->nNtAct = 0; 4187 stp->iDflt = lemp->nstate + lemp->nrule; 4188 stp->iTknOfst = NO_OFFSET; 4189 stp->iNtOfst = NO_OFFSET; 4190 for(ap=stp->ap; ap; ap=ap->next){ 4191 if( compute_action(lemp,ap)>=0 ){ 4192 if( ap->sp->index<lemp->nterminal ){ 4193 stp->nTknAct++; 4194 }else if( ap->sp->index<lemp->nsymbol ){ 4195 stp->nNtAct++; 4196 }else{ 4197 stp->iDflt = compute_action(lemp, ap); 4198 } 4199 } 4200 } 4201 } 4202 qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), 4203 stateResortCompare); 4204 for(i=0; i<lemp->nstate; i++){ 4205 lemp->sorted[i]->statenum = i; 4206 } 4207 } 4208 4209 4210 /***************** From the file "set.c" ************************************/ 4211 /* 4212 ** Set manipulation routines for the LEMON parser generator. 4213 */ 4214 4215 static int size = 0; 4216 4217 /* Set the set size */ 4218 void SetSize(int n) 4219 { 4220 size = n+1; 4221 } 4222 4223 /* Allocate a new set */ 4224 char *SetNew(){ 4225 char *s; 4226 s = (char*)calloc( size, 1); 4227 if( s==0 ){ 4228 extern void memory_error(); 4229 memory_error(); 4230 } 4231 return s; 4232 } 4233 4234 /* Deallocate a set */ 4235 void SetFree(char *s) 4236 { 4237 free(s); 4238 } 4239 4240 /* Add a new element to the set. Return TRUE if the element was added 4241 ** and FALSE if it was already there. */ 4242 int SetAdd(char *s, int e) 4243 { 4244 int rv; 4245 assert( e>=0 && e<size ); 4246 rv = s[e]; 4247 s[e] = 1; 4248 return !rv; 4249 } 4250 4251 /* Add every element of s2 to s1. Return TRUE if s1 changes. */ 4252 int SetUnion(char *s1, char *s2) 4253 { 4254 int i, progress; 4255 progress = 0; 4256 for(i=0; i<size; i++){ 4257 if( s2[i]==0 ) continue; 4258 if( s1[i]==0 ){ 4259 progress = 1; 4260 s1[i] = 1; 4261 } 4262 } 4263 return progress; 4264 } 4265 /********************** From the file "table.c" ****************************/ 4266 /* 4267 ** All code in this file has been automatically generated 4268 ** from a specification in the file 4269 ** "table.q" 4270 ** by the associative array code building program "aagen". 4271 ** Do not edit this file! Instead, edit the specification 4272 ** file, then rerun aagen. 4273 */ 4274 /* 4275 ** Code for processing tables in the LEMON parser generator. 4276 */ 4277 4278 PRIVATE int strhash(const char *x) 4279 { 4280 int h = 0; 4281 while( *x) h = h*13 + *(x++); 4282 return h; 4283 } 4284 4285 /* Works like strdup, sort of. Save a string in malloced memory, but 4286 ** keep strings in a table so that the same string is not in more 4287 ** than one place. 4288 */ 4289 const char *Strsafe(const char *y) 4290 { 4291 const char *z; 4292 char *cpy; 4293 4294 if( y==0 ) return 0; 4295 z = Strsafe_find(y); 4296 if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){ 4297 strcpy(cpy,y); 4298 z = cpy; 4299 Strsafe_insert(z); 4300 } 4301 MemoryCheck(z); 4302 return z; 4303 } 4304 4305 /* There is one instance of the following structure for each 4306 ** associative array of type "x1". 4307 */ 4308 struct s_x1 { 4309 int size; /* The number of available slots. */ 4310 /* Must be a power of 2 greater than or */ 4311 /* equal to 1 */ 4312 int count; /* Number of currently slots filled */ 4313 struct s_x1node *tbl; /* The data stored here */ 4314 struct s_x1node **ht; /* Hash table for lookups */ 4315 }; 4316 4317 /* There is one instance of this structure for every data element 4318 ** in an associative array of type "x1". 4319 */ 4320 typedef struct s_x1node { 4321 const char *data; /* The data */ 4322 struct s_x1node *next; /* Next entry with the same hash */ 4323 struct s_x1node **from; /* Previous link */ 4324 } x1node; 4325 4326 /* There is only one instance of the array, which is the following */ 4327 static struct s_x1 *x1a; 4328 4329 /* Allocate a new associative array */ 4330 void Strsafe_init(){ 4331 if( x1a ) return; 4332 x1a = (struct s_x1*)malloc( sizeof(struct s_x1) ); 4333 if( x1a ){ 4334 x1a->size = 1024; 4335 x1a->count = 0; 4336 x1a->tbl = (x1node*)malloc( 4337 (sizeof(x1node) + sizeof(x1node*))*1024 ); 4338 if( x1a->tbl==0 ){ 4339 free(x1a); 4340 x1a = 0; 4341 }else{ 4342 int i; 4343 x1a->ht = (x1node**)&(x1a->tbl[1024]); 4344 for(i=0; i<1024; i++) x1a->ht[i] = 0; 4345 } 4346 } 4347 } 4348 /* Insert a new record into the array. Return TRUE if successful. 4349 ** Prior data with the same key is NOT overwritten */ 4350 int Strsafe_insert(const char *data) 4351 { 4352 x1node *np; 4353 int h; 4354 int ph; 4355 4356 if( x1a==0 ) return 0; 4357 ph = strhash(data); 4358 h = ph & (x1a->size-1); 4359 np = x1a->ht[h]; 4360 while( np ){ 4361 if( strcmp(np->data,data)==0 ){ 4362 /* An existing entry with the same key is found. */ 4363 /* Fail because overwrite is not allows. */ 4364 return 0; 4365 } 4366 np = np->next; 4367 } 4368 if( x1a->count>=x1a->size ){ 4369 /* Need to make the hash table bigger */ 4370 int i,size; 4371 struct s_x1 array; 4372 array.size = size = x1a->size*2; 4373 array.count = x1a->count; 4374 array.tbl = (x1node*)malloc( 4375 (sizeof(x1node) + sizeof(x1node*))*size ); 4376 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4377 array.ht = (x1node**)&(array.tbl[size]); 4378 for(i=0; i<size; i++) array.ht[i] = 0; 4379 for(i=0; i<x1a->count; i++){ 4380 x1node *oldnp, *newnp; 4381 oldnp = &(x1a->tbl[i]); 4382 h = strhash(oldnp->data) & (size-1); 4383 newnp = &(array.tbl[i]); 4384 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4385 newnp->next = array.ht[h]; 4386 newnp->data = oldnp->data; 4387 newnp->from = &(array.ht[h]); 4388 array.ht[h] = newnp; 4389 } 4390 free(x1a->tbl); 4391 *x1a = array; 4392 } 4393 /* Insert the new data */ 4394 h = ph & (x1a->size-1); 4395 np = &(x1a->tbl[x1a->count++]); 4396 np->data = data; 4397 if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); 4398 np->next = x1a->ht[h]; 4399 x1a->ht[h] = np; 4400 np->from = &(x1a->ht[h]); 4401 return 1; 4402 } 4403 4404 /* Return a pointer to data assigned to the given key. Return NULL 4405 ** if no such key. */ 4406 const char *Strsafe_find(const char *key) 4407 { 4408 int h; 4409 x1node *np; 4410 4411 if( x1a==0 ) return 0; 4412 h = strhash(key) & (x1a->size-1); 4413 np = x1a->ht[h]; 4414 while( np ){ 4415 if( strcmp(np->data,key)==0 ) break; 4416 np = np->next; 4417 } 4418 return np ? np->data : 0; 4419 } 4420 4421 /* Return a pointer to the (terminal or nonterminal) symbol "x". 4422 ** Create a new symbol if this is the first time "x" has been seen. 4423 */ 4424 struct symbol *Symbol_new(const char *x) 4425 { 4426 struct symbol *sp; 4427 4428 sp = Symbol_find(x); 4429 if( sp==0 ){ 4430 sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); 4431 MemoryCheck(sp); 4432 sp->name = Strsafe(x); 4433 sp->type = isupper(*x) ? TERMINAL : NONTERMINAL; 4434 sp->rule = 0; 4435 sp->fallback = 0; 4436 sp->prec = -1; 4437 sp->assoc = UNK; 4438 sp->firstset = 0; 4439 sp->lambda = LEMON_FALSE; 4440 sp->destructor = 0; 4441 sp->destLineno = 0; 4442 sp->datatype = 0; 4443 sp->useCnt = 0; 4444 Symbol_insert(sp,sp->name); 4445 } 4446 sp->useCnt++; 4447 return sp; 4448 } 4449 4450 /* Compare two symbols for working purposes 4451 ** 4452 ** Symbols that begin with upper case letters (terminals or tokens) 4453 ** must sort before symbols that begin with lower case letters 4454 ** (non-terminals). Other than that, the order does not matter. 4455 ** 4456 ** We find experimentally that leaving the symbols in their original 4457 ** order (the order they appeared in the grammar file) gives the 4458 ** smallest parser tables in SQLite. 4459 */ 4460 int Symbolcmpp(const void *_a, const void *_b) 4461 { 4462 const struct symbol **a = (const struct symbol **) _a; 4463 const struct symbol **b = (const struct symbol **) _b; 4464 int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); 4465 int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); 4466 assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 ); 4467 return i1-i2; 4468 } 4469 4470 /* There is one instance of the following structure for each 4471 ** associative array of type "x2". 4472 */ 4473 struct s_x2 { 4474 int size; /* The number of available slots. */ 4475 /* Must be a power of 2 greater than or */ 4476 /* equal to 1 */ 4477 int count; /* Number of currently slots filled */ 4478 struct s_x2node *tbl; /* The data stored here */ 4479 struct s_x2node **ht; /* Hash table for lookups */ 4480 }; 4481 4482 /* There is one instance of this structure for every data element 4483 ** in an associative array of type "x2". 4484 */ 4485 typedef struct s_x2node { 4486 struct symbol *data; /* The data */ 4487 const char *key; /* The key */ 4488 struct s_x2node *next; /* Next entry with the same hash */ 4489 struct s_x2node **from; /* Previous link */ 4490 } x2node; 4491 4492 /* There is only one instance of the array, which is the following */ 4493 static struct s_x2 *x2a; 4494 4495 /* Allocate a new associative array */ 4496 void Symbol_init(){ 4497 if( x2a ) return; 4498 x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); 4499 if( x2a ){ 4500 x2a->size = 128; 4501 x2a->count = 0; 4502 x2a->tbl = (x2node*)malloc( 4503 (sizeof(x2node) + sizeof(x2node*))*128 ); 4504 if( x2a->tbl==0 ){ 4505 free(x2a); 4506 x2a = 0; 4507 }else{ 4508 int i; 4509 x2a->ht = (x2node**)&(x2a->tbl[128]); 4510 for(i=0; i<128; i++) x2a->ht[i] = 0; 4511 } 4512 } 4513 } 4514 /* Insert a new record into the array. Return TRUE if successful. 4515 ** Prior data with the same key is NOT overwritten */ 4516 int Symbol_insert(struct symbol *data, const char *key) 4517 { 4518 x2node *np; 4519 int h; 4520 int ph; 4521 4522 if( x2a==0 ) return 0; 4523 ph = strhash(key); 4524 h = ph & (x2a->size-1); 4525 np = x2a->ht[h]; 4526 while( np ){ 4527 if( strcmp(np->key,key)==0 ){ 4528 /* An existing entry with the same key is found. */ 4529 /* Fail because overwrite is not allows. */ 4530 return 0; 4531 } 4532 np = np->next; 4533 } 4534 if( x2a->count>=x2a->size ){ 4535 /* Need to make the hash table bigger */ 4536 int i,size; 4537 struct s_x2 array; 4538 array.size = size = x2a->size*2; 4539 array.count = x2a->count; 4540 array.tbl = (x2node*)malloc( 4541 (sizeof(x2node) + sizeof(x2node*))*size ); 4542 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4543 array.ht = (x2node**)&(array.tbl[size]); 4544 for(i=0; i<size; i++) array.ht[i] = 0; 4545 for(i=0; i<x2a->count; i++){ 4546 x2node *oldnp, *newnp; 4547 oldnp = &(x2a->tbl[i]); 4548 h = strhash(oldnp->key) & (size-1); 4549 newnp = &(array.tbl[i]); 4550 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4551 newnp->next = array.ht[h]; 4552 newnp->key = oldnp->key; 4553 newnp->data = oldnp->data; 4554 newnp->from = &(array.ht[h]); 4555 array.ht[h] = newnp; 4556 } 4557 free(x2a->tbl); 4558 *x2a = array; 4559 } 4560 /* Insert the new data */ 4561 h = ph & (x2a->size-1); 4562 np = &(x2a->tbl[x2a->count++]); 4563 np->key = key; 4564 np->data = data; 4565 if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); 4566 np->next = x2a->ht[h]; 4567 x2a->ht[h] = np; 4568 np->from = &(x2a->ht[h]); 4569 return 1; 4570 } 4571 4572 /* Return a pointer to data assigned to the given key. Return NULL 4573 ** if no such key. */ 4574 struct symbol *Symbol_find(const char *key) 4575 { 4576 int h; 4577 x2node *np; 4578 4579 if( x2a==0 ) return 0; 4580 h = strhash(key) & (x2a->size-1); 4581 np = x2a->ht[h]; 4582 while( np ){ 4583 if( strcmp(np->key,key)==0 ) break; 4584 np = np->next; 4585 } 4586 return np ? np->data : 0; 4587 } 4588 4589 /* Return the n-th data. Return NULL if n is out of range. */ 4590 struct symbol *Symbol_Nth(int n) 4591 { 4592 struct symbol *data; 4593 if( x2a && n>0 && n<=x2a->count ){ 4594 data = x2a->tbl[n-1].data; 4595 }else{ 4596 data = 0; 4597 } 4598 return data; 4599 } 4600 4601 /* Return the size of the array */ 4602 int Symbol_count() 4603 { 4604 return x2a ? x2a->count : 0; 4605 } 4606 4607 /* Return an array of pointers to all data in the table. 4608 ** The array is obtained from malloc. Return NULL if memory allocation 4609 ** problems, or if the array is empty. */ 4610 struct symbol **Symbol_arrayof() 4611 { 4612 struct symbol **array; 4613 int i,size; 4614 if( x2a==0 ) return 0; 4615 size = x2a->count; 4616 array = (struct symbol **)calloc(size, sizeof(struct symbol *)); 4617 if( array ){ 4618 for(i=0; i<size; i++) array[i] = x2a->tbl[i].data; 4619 } 4620 return array; 4621 } 4622 4623 /* Compare two configurations */ 4624 int Configcmp(const char *_a,const char *_b) 4625 { 4626 const struct config *a = (struct config *) _a; 4627 const struct config *b = (struct config *) _b; 4628 int x; 4629 x = a->rp->index - b->rp->index; 4630 if( x==0 ) x = a->dot - b->dot; 4631 return x; 4632 } 4633 4634 /* Compare two states */ 4635 PRIVATE int statecmp(struct config *a, struct config *b) 4636 { 4637 int rc; 4638 for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ 4639 rc = a->rp->index - b->rp->index; 4640 if( rc==0 ) rc = a->dot - b->dot; 4641 } 4642 if( rc==0 ){ 4643 if( a ) rc = 1; 4644 if( b ) rc = -1; 4645 } 4646 return rc; 4647 } 4648 4649 /* Hash a state */ 4650 PRIVATE int statehash(struct config *a) 4651 { 4652 int h=0; 4653 while( a ){ 4654 h = h*571 + a->rp->index*37 + a->dot; 4655 a = a->bp; 4656 } 4657 return h; 4658 } 4659 4660 /* Allocate a new state structure */ 4661 struct state *State_new() 4662 { 4663 struct state *newstate; 4664 newstate = (struct state *)calloc(1, sizeof(struct state) ); 4665 MemoryCheck(newstate); 4666 return newstate; 4667 } 4668 4669 /* There is one instance of the following structure for each 4670 ** associative array of type "x3". 4671 */ 4672 struct s_x3 { 4673 int size; /* The number of available slots. */ 4674 /* Must be a power of 2 greater than or */ 4675 /* equal to 1 */ 4676 int count; /* Number of currently slots filled */ 4677 struct s_x3node *tbl; /* The data stored here */ 4678 struct s_x3node **ht; /* Hash table for lookups */ 4679 }; 4680 4681 /* There is one instance of this structure for every data element 4682 ** in an associative array of type "x3". 4683 */ 4684 typedef struct s_x3node { 4685 struct state *data; /* The data */ 4686 struct config *key; /* The key */ 4687 struct s_x3node *next; /* Next entry with the same hash */ 4688 struct s_x3node **from; /* Previous link */ 4689 } x3node; 4690 4691 /* There is only one instance of the array, which is the following */ 4692 static struct s_x3 *x3a; 4693 4694 /* Allocate a new associative array */ 4695 void State_init(){ 4696 if( x3a ) return; 4697 x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); 4698 if( x3a ){ 4699 x3a->size = 128; 4700 x3a->count = 0; 4701 x3a->tbl = (x3node*)malloc( 4702 (sizeof(x3node) + sizeof(x3node*))*128 ); 4703 if( x3a->tbl==0 ){ 4704 free(x3a); 4705 x3a = 0; 4706 }else{ 4707 int i; 4708 x3a->ht = (x3node**)&(x3a->tbl[128]); 4709 for(i=0; i<128; i++) x3a->ht[i] = 0; 4710 } 4711 } 4712 } 4713 /* Insert a new record into the array. Return TRUE if successful. 4714 ** Prior data with the same key is NOT overwritten */ 4715 int State_insert(struct state *data, struct config *key) 4716 { 4717 x3node *np; 4718 int h; 4719 int ph; 4720 4721 if( x3a==0 ) return 0; 4722 ph = statehash(key); 4723 h = ph & (x3a->size-1); 4724 np = x3a->ht[h]; 4725 while( np ){ 4726 if( statecmp(np->key,key)==0 ){ 4727 /* An existing entry with the same key is found. */ 4728 /* Fail because overwrite is not allows. */ 4729 return 0; 4730 } 4731 np = np->next; 4732 } 4733 if( x3a->count>=x3a->size ){ 4734 /* Need to make the hash table bigger */ 4735 int i,size; 4736 struct s_x3 array; 4737 array.size = size = x3a->size*2; 4738 array.count = x3a->count; 4739 array.tbl = (x3node*)malloc( 4740 (sizeof(x3node) + sizeof(x3node*))*size ); 4741 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4742 array.ht = (x3node**)&(array.tbl[size]); 4743 for(i=0; i<size; i++) array.ht[i] = 0; 4744 for(i=0; i<x3a->count; i++){ 4745 x3node *oldnp, *newnp; 4746 oldnp = &(x3a->tbl[i]); 4747 h = statehash(oldnp->key) & (size-1); 4748 newnp = &(array.tbl[i]); 4749 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4750 newnp->next = array.ht[h]; 4751 newnp->key = oldnp->key; 4752 newnp->data = oldnp->data; 4753 newnp->from = &(array.ht[h]); 4754 array.ht[h] = newnp; 4755 } 4756 free(x3a->tbl); 4757 *x3a = array; 4758 } 4759 /* Insert the new data */ 4760 h = ph & (x3a->size-1); 4761 np = &(x3a->tbl[x3a->count++]); 4762 np->key = key; 4763 np->data = data; 4764 if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); 4765 np->next = x3a->ht[h]; 4766 x3a->ht[h] = np; 4767 np->from = &(x3a->ht[h]); 4768 return 1; 4769 } 4770 4771 /* Return a pointer to data assigned to the given key. Return NULL 4772 ** if no such key. */ 4773 struct state *State_find(struct config *key) 4774 { 4775 int h; 4776 x3node *np; 4777 4778 if( x3a==0 ) return 0; 4779 h = statehash(key) & (x3a->size-1); 4780 np = x3a->ht[h]; 4781 while( np ){ 4782 if( statecmp(np->key,key)==0 ) break; 4783 np = np->next; 4784 } 4785 return np ? np->data : 0; 4786 } 4787 4788 /* Return an array of pointers to all data in the table. 4789 ** The array is obtained from malloc. Return NULL if memory allocation 4790 ** problems, or if the array is empty. */ 4791 struct state **State_arrayof() 4792 { 4793 struct state **array; 4794 int i,size; 4795 if( x3a==0 ) return 0; 4796 size = x3a->count; 4797 array = (struct state **)malloc( sizeof(struct state *)*size ); 4798 if( array ){ 4799 for(i=0; i<size; i++) array[i] = x3a->tbl[i].data; 4800 } 4801 return array; 4802 } 4803 4804 /* Hash a configuration */ 4805 PRIVATE int confighash(struct config *a) 4806 { 4807 int h=0; 4808 h = h*571 + a->rp->index*37 + a->dot; 4809 return h; 4810 } 4811 4812 /* There is one instance of the following structure for each 4813 ** associative array of type "x4". 4814 */ 4815 struct s_x4 { 4816 int size; /* The number of available slots. */ 4817 /* Must be a power of 2 greater than or */ 4818 /* equal to 1 */ 4819 int count; /* Number of currently slots filled */ 4820 struct s_x4node *tbl; /* The data stored here */ 4821 struct s_x4node **ht; /* Hash table for lookups */ 4822 }; 4823 4824 /* There is one instance of this structure for every data element 4825 ** in an associative array of type "x4". 4826 */ 4827 typedef struct s_x4node { 4828 struct config *data; /* The data */ 4829 struct s_x4node *next; /* Next entry with the same hash */ 4830 struct s_x4node **from; /* Previous link */ 4831 } x4node; 4832 4833 /* There is only one instance of the array, which is the following */ 4834 static struct s_x4 *x4a; 4835 4836 /* Allocate a new associative array */ 4837 void Configtable_init(){ 4838 if( x4a ) return; 4839 x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); 4840 if( x4a ){ 4841 x4a->size = 64; 4842 x4a->count = 0; 4843 x4a->tbl = (x4node*)malloc( 4844 (sizeof(x4node) + sizeof(x4node*))*64 ); 4845 if( x4a->tbl==0 ){ 4846 free(x4a); 4847 x4a = 0; 4848 }else{ 4849 int i; 4850 x4a->ht = (x4node**)&(x4a->tbl[64]); 4851 for(i=0; i<64; i++) x4a->ht[i] = 0; 4852 } 4853 } 4854 } 4855 /* Insert a new record into the array. Return TRUE if successful. 4856 ** Prior data with the same key is NOT overwritten */ 4857 int Configtable_insert(struct config *data) 4858 { 4859 x4node *np; 4860 int h; 4861 int ph; 4862 4863 if( x4a==0 ) return 0; 4864 ph = confighash(data); 4865 h = ph & (x4a->size-1); 4866 np = x4a->ht[h]; 4867 while( np ){ 4868 if( Configcmp((const char *) np->data,(const char *) data)==0 ){ 4869 /* An existing entry with the same key is found. */ 4870 /* Fail because overwrite is not allows. */ 4871 return 0; 4872 } 4873 np = np->next; 4874 } 4875 if( x4a->count>=x4a->size ){ 4876 /* Need to make the hash table bigger */ 4877 int i,size; 4878 struct s_x4 array; 4879 array.size = size = x4a->size*2; 4880 array.count = x4a->count; 4881 array.tbl = (x4node*)malloc( 4882 (sizeof(x4node) + sizeof(x4node*))*size ); 4883 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4884 array.ht = (x4node**)&(array.tbl[size]); 4885 for(i=0; i<size; i++) array.ht[i] = 0; 4886 for(i=0; i<x4a->count; i++){ 4887 x4node *oldnp, *newnp; 4888 oldnp = &(x4a->tbl[i]); 4889 h = confighash(oldnp->data) & (size-1); 4890 newnp = &(array.tbl[i]); 4891 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4892 newnp->next = array.ht[h]; 4893 newnp->data = oldnp->data; 4894 newnp->from = &(array.ht[h]); 4895 array.ht[h] = newnp; 4896 } 4897 free(x4a->tbl); 4898 *x4a = array; 4899 } 4900 /* Insert the new data */ 4901 h = ph & (x4a->size-1); 4902 np = &(x4a->tbl[x4a->count++]); 4903 np->data = data; 4904 if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); 4905 np->next = x4a->ht[h]; 4906 x4a->ht[h] = np; 4907 np->from = &(x4a->ht[h]); 4908 return 1; 4909 } 4910 4911 /* Return a pointer to data assigned to the given key. Return NULL 4912 ** if no such key. */ 4913 struct config *Configtable_find(struct config *key) 4914 { 4915 int h; 4916 x4node *np; 4917 4918 if( x4a==0 ) return 0; 4919 h = confighash(key) & (x4a->size-1); 4920 np = x4a->ht[h]; 4921 while( np ){ 4922 if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; 4923 np = np->next; 4924 } 4925 return np ? np->data : 0; 4926 } 4927 4928 /* Remove all data from the table. Pass each data to the function "f" 4929 ** as it is removed. ("f" may be null to avoid this step.) */ 4930 void Configtable_clear(int(*f)(struct config *)) 4931 { 4932 int i; 4933 if( x4a==0 || x4a->count==0 ) return; 4934 if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data); 4935 for(i=0; i<x4a->size; i++) x4a->ht[i] = 0; 4936 x4a->count = 0; 4937 return; 4938 } 4939