1 /*
2 ** This file contains all sources (including headers) to the LEMON
3 ** LALR(1) parser generator.  The sources have been combined into a
4 ** single file to make it easy to include LEMON in the source tree
5 ** and Makefile of another program.
6 **
7 ** The authors of this program disclaim copyright.
8 **
9 ** Modified to add "-o" and "-h" command line options.  Olly Betts 2005-02-14
10 ** Modified to fix a number of compiler warnings.  Olly Betts 2007-02-20
11 **
12 ** Synced with upstream CVS rev 1.69:
13 ** http://www.sqlite.org/cvstrac/fileview?f=sqlite/tool/lemon.c&v=1.69
14 */
15 #include <stdio.h>
16 #include <stdarg.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20 #include <assert.h>
21 
22 #ifndef __WIN32__
23 #   if defined(_WIN32) || defined(WIN32)
24 #	define __WIN32__
25 #   endif
26 #endif
27 
28 #ifdef __WIN32__
29 extern int access();
30 #else
31 #include <unistd.h>
32 #endif
33 
34 #define PRIVATE static
35 
36 #ifdef TEST
37 #define MAXRHS 5       /* Set low to exercise exception code */
38 #else
39 #define MAXRHS 1000
40 #endif
41 
42 static char *msort(char*,char**,int(*)(const char*,const char*));
43 
44 /*
45 ** Compilers are getting increasingly pedantic about type conversions
46 ** as C evolves ever closer to Ada....  To work around the latest problems
47 ** we have to define the following variant of strlen().
48 */
49 #define lemonStrlen(X)   ((int)strlen(X))
50 
51 static struct action *Action_new(void);
52 static struct action *Action_sort(struct action *);
53 
54 /********** From the file "build.h" ************************************/
55 void FindRulePrecedences();
56 void FindFirstSets();
57 void FindStates();
58 void FindLinks();
59 void FindFollowSets();
60 void FindActions();
61 
62 /********* From the file "configlist.h" *********************************/
63 void Configlist_init(/* void */);
64 struct config *Configlist_add(/* struct rule *, int */);
65 struct config *Configlist_addbasis(/* struct rule *, int */);
66 void Configlist_closure(/* void */);
67 void Configlist_sort(/* void */);
68 void Configlist_sortbasis(/* void */);
69 struct config *Configlist_return(/* void */);
70 struct config *Configlist_basis(/* void */);
71 void Configlist_eat(/* struct config * */);
72 void Configlist_reset(/* void */);
73 
74 /********* From the file "error.h" ***************************************/
75 void ErrorMsg(const char *, int,const char *, ...);
76 
77 /****** From the file "option.h" ******************************************/
78 struct s_options {
79   enum { OPT_FLAG=1,  OPT_INT,  OPT_DBL,  OPT_STR,
80          OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type;
81   char *label;
82   void *arg;
83   void(*func)();
84   char *message;
85 };
86 int    OptInit(/* char**,struct s_options*,FILE* */);
87 int    OptNArgs(/* void */);
88 char  *OptArg(/* int */);
89 void   OptErr(/* int */);
90 void   OptPrint(/* void */);
91 
92 /******** From the file "parse.h" *****************************************/
93 void Parse(/* struct lemon *lemp */);
94 
95 /********* From the file "plink.h" ***************************************/
96 struct plink *Plink_new(/* void */);
97 void Plink_add(/* struct plink **, struct config * */);
98 void Plink_copy(/* struct plink **, struct plink * */);
99 void Plink_delete(/* struct plink * */);
100 
101 /********** From the file "report.h" *************************************/
102 void Reprint(/* struct lemon * */);
103 void ReportOutput(/* struct lemon * */);
104 void ReportTable(/* struct lemon * */);
105 void ReportHeader(/* struct lemon * */);
106 void CompressTables(/* struct lemon * */);
107 void ResortStates(/* struct lemon * */);
108 
109 /********** From the file "set.h" ****************************************/
110 void  SetSize(/* int N */);             /* All sets will be of size N */
111 char *SetNew(/* void */);               /* A new set for element 0..N */
112 void  SetFree(/* char* */);             /* Deallocate a set */
113 
114 int SetAdd(/* char*,int */);            /* Add element to a set */
115 int SetUnion(/* char *A,char *B */);    /* A <- A U B, thru element N */
116 
117 #define SetFind(X,Y) (X[Y])       /* True if Y is in set X */
118 
119 /********** From the file "struct.h" *************************************/
120 /*
121 ** Principal data structures for the LEMON parser generator.
122 */
123 
124 typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean;
125 
126 /* Symbols (terminals and nonterminals) of the grammar are stored
127 ** in the following: */
128 struct symbol {
129   char *name;              /* Name of the symbol */
130   int index;               /* Index number for this symbol */
131   enum {
132     TERMINAL,
133     NONTERMINAL,
134     MULTITERMINAL
135   } type;                  /* Symbols are all either TERMINALS or NTs */
136   struct rule *rule;       /* Linked list of rules of this (if an NT) */
137   struct symbol *fallback; /* fallback token in case this token doesn't parse */
138   int prec;                /* Precedence if defined (-1 otherwise) */
139   enum e_assoc {
140     LEFT,
141     RIGHT,
142     NONE,
143     UNK
144   } assoc;                 /* Associativity if precedence is defined */
145   char *firstset;          /* First-set for all rules of this symbol */
146   Boolean lambda;          /* True if NT and can generate an empty string */
147   int useCnt;              /* Number of times used */
148   char *destructor;        /* Code which executes whenever this symbol is
149                            ** popped from the stack during error processing */
150   int destLineno;          /* Line number for start of destructor */
151   char *datatype;          /* The data type of information held by this
152                            ** object. Only used if type==NONTERMINAL */
153   int dtnum;               /* The data type number.  In the parser, the value
154                            ** stack is a union.  The .yy%d element of this
155                            ** union is the correct data type for this object */
156   /* The following fields are used by MULTITERMINALs only */
157   int nsubsym;             /* Number of constituent symbols in the MULTI */
158   struct symbol **subsym;  /* Array of constituent symbols */
159 };
160 
161 /* Each production rule in the grammar is stored in the following
162 ** structure.  */
163 struct rule {
164   struct symbol *lhs;      /* Left-hand side of the rule */
165   char *lhsalias;          /* Alias for the LHS (NULL if none) */
166   int lhsStart;            /* True if left-hand side is the start symbol */
167   int ruleline;            /* Line number for the rule */
168   int nrhs;                /* Number of RHS symbols */
169   struct symbol **rhs;     /* The RHS symbols */
170   char **rhsalias;         /* An alias for each RHS symbol (NULL if none) */
171   int line;                /* Line number at which code begins */
172   char *code;              /* The code executed when this rule is reduced */
173   struct symbol *precsym;  /* Precedence symbol for this rule */
174   int index;               /* An index number for this rule */
175   Boolean canReduce;       /* True if this rule is ever reduced */
176   struct rule *nextlhs;    /* Next rule with the same LHS */
177   struct rule *next;       /* Next rule in the global list */
178 };
179 
180 /* A configuration is a production rule of the grammar together with
181 ** a mark (dot) showing how much of that rule has been processed so far.
182 ** Configurations also contain a follow-set which is a list of terminal
183 ** symbols which are allowed to immediately follow the end of the rule.
184 ** Every configuration is recorded as an instance of the following: */
185 struct config {
186   struct rule *rp;         /* The rule upon which the configuration is based */
187   int dot;                 /* The parse point */
188   char *fws;               /* Follow-set for this configuration only */
189   struct plink *fplp;      /* Follow-set forward propagation links */
190   struct plink *bplp;      /* Follow-set backwards propagation links */
191   struct state *stp;       /* Pointer to state which contains this */
192   enum {
193     COMPLETE,              /* The status is used during followset and */
194     INCOMPLETE             /*    shift computations */
195   } status;
196   struct config *next;     /* Next configuration in the state */
197   struct config *bp;       /* The next basis configuration */
198 };
199 
200 /* Every shift or reduce operation is stored as one of the following */
201 struct action {
202   struct symbol *sp;       /* The look-ahead symbol */
203   enum e_action {
204     SHIFT,
205     ACCEPT,
206     REDUCE,
207     ERROR,
208     SSCONFLICT,              /* A shift/shift conflict */
209     SRCONFLICT,              /* Was a reduce, but part of a conflict */
210     RRCONFLICT,              /* Was a reduce, but part of a conflict */
211     SH_RESOLVED,             /* Was a shift.  Precedence resolved conflict */
212     RD_RESOLVED,             /* Was reduce.  Precedence resolved conflict */
213     NOT_USED                 /* Deleted by compression */
214   } type;
215   union {
216     struct state *stp;     /* The new state, if a shift */
217     struct rule *rp;       /* The rule, if a reduce */
218   } x;
219   struct action *next;     /* Next action for this state */
220   struct action *collide;  /* Next action with the same hash */
221 };
222 
223 /* Each state of the generated parser's finite state machine
224 ** is encoded as an instance of the following structure. */
225 struct state {
226   struct config *bp;       /* The basis configurations for this state */
227   struct config *cfp;      /* All configurations in this set */
228   int statenum;            /* Sequential number for this state */
229   struct action *ap;       /* Array of actions for this state */
230   int nTknAct, nNtAct;     /* Number of actions on terminals and nonterminals */
231   int iTknOfst, iNtOfst;   /* yy_action[] offset for terminals and nonterms */
232   int iDflt;               /* Default action */
233 };
234 #define NO_OFFSET (-2147483647)
235 
236 /* A followset propagation link indicates that the contents of one
237 ** configuration followset should be propagated to another whenever
238 ** the first changes. */
239 struct plink {
240   struct config *cfp;      /* The configuration to which linked */
241   struct plink *next;      /* The next propagate link */
242 };
243 
244 /* The state vector for the entire parser generator is recorded as
245 ** follows.  (LEMON uses no global variables and makes little use of
246 ** static variables.  Fields in the following structure can be thought
247 ** of as begin global variables in the program.) */
248 struct lemon {
249   struct state **sorted;   /* Table of states sorted by state number */
250   struct rule *rule;       /* List of all rules */
251   int nstate;              /* Number of states */
252   int nrule;               /* Number of rules */
253   int nsymbol;             /* Number of terminal and nonterminal symbols */
254   int nterminal;           /* Number of terminal symbols */
255   struct symbol **symbols; /* Sorted array of pointers to symbols */
256   int errorcnt;            /* Number of errors */
257   struct symbol *errsym;   /* The error symbol */
258   struct symbol *wildcard; /* Token that matches anything */
259   char *name;              /* Name of the generated parser */
260   char *arg;               /* Declaration of the 3th argument to parser */
261   char *tokentype;         /* Type of terminal symbols in the parser stack */
262   char *vartype;           /* The default type of non-terminal symbols */
263   char *start;             /* Name of the start symbol for the grammar */
264   char *stacksize;         /* Size of the parser stack */
265   char *include;           /* Code to put at the start of the C file */
266   char *error;             /* Code to execute when an error is seen */
267   char *overflow;          /* Code to execute on a stack overflow */
268   char *failure;           /* Code to execute on parser failure */
269   char *accept;            /* Code to execute when the parser excepts */
270   char *extracode;         /* Code appended to the generated file */
271   char *tokendest;         /* Code to execute to destroy token data */
272   char *vardest;           /* Code for the default non-terminal destructor */
273   char *filename;          /* Name of the input file */
274   char *outname;           /* Name of the current output file */
275   char *tokenprefix;       /* A prefix added to token names in the .h file */
276   int nconflict;           /* Number of parsing conflicts */
277   int tablesize;           /* Size of the parse tables */
278   int basisflag;           /* Print only basis configurations */
279   int has_fallback;        /* True if any %fallback is seen in the grammar */
280   int nolinenosflag;       /* True if #line statements should not be printed */
281   char *argv0;             /* Name of the program */
282 };
283 
284 #define MemoryCheck(X) if((X)==0){ \
285   extern void memory_error(); \
286   memory_error(); \
287 }
288 
289 /**************** From the file "table.h" *********************************/
290 /*
291 ** All code in this file has been automatically generated
292 ** from a specification in the file
293 **              "table.q"
294 ** by the associative array code building program "aagen".
295 ** Do not edit this file!  Instead, edit the specification
296 ** file, then rerun aagen.
297 */
298 /*
299 ** Code for processing tables in the LEMON parser generator.
300 */
301 
302 /* Routines for handling a strings */
303 
304 char *Strsafe();
305 
306 void Strsafe_init(/* void */);
307 int Strsafe_insert(/* char * */);
308 char *Strsafe_find(/* char * */);
309 
310 /* Routines for handling symbols of the grammar */
311 
312 struct symbol *Symbol_new();
313 int Symbolcmpp(const void *void_a, const void *void_b);
314 void Symbol_init(/* void */);
315 int Symbol_insert(/* struct symbol *, char * */);
316 struct symbol *Symbol_find(/* char * */);
317 struct symbol *Symbol_Nth(/* int */);
318 int Symbol_count(/*  */);
319 struct symbol **Symbol_arrayof(/*  */);
320 
321 /* Routines to manage the state table */
322 
323 int Configcmp(/* struct config *, struct config * */);
324 struct state *State_new();
325 void State_init(/* void */);
326 int State_insert(/* struct state *, struct config * */);
327 struct state *State_find(/* struct config * */);
328 struct state **State_arrayof(/*  */);
329 
330 /* Routines used for efficiency in Configlist_add */
331 
332 void Configtable_init(/* void */);
333 int Configtable_insert(/* struct config * */);
334 struct config *Configtable_find(/* struct config * */);
335 void Configtable_clear(/* int(*)(struct config *) */);
336 /****************** From the file "action.c" *******************************/
337 /*
338 ** Routines processing parser actions in the LEMON parser generator.
339 */
340 
341 /* Allocate a new parser action */
Action_new(void)342 static struct action *Action_new(void){
343   static struct action *freelist = 0;
344   struct action *new;
345 
346   if( freelist==0 ){
347     int i;
348     int amt = 100;
349     freelist = (struct action *)calloc(amt, sizeof(struct action));
350     if( freelist==0 ){
351       fprintf(stderr,"Unable to allocate memory for a new parser action.");
352       exit(1);
353     }
354     for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
355     freelist[amt-1].next = 0;
356   }
357   new = freelist;
358   freelist = freelist->next;
359   return new;
360 }
361 
362 /* Compare two actions for sorting purposes.  Return negative, zero, or
363 ** positive if the first action is less than, equal to, or greater than
364 ** the first
365 */
actioncmp(const char * char_ap1,const char * char_ap2)366 static int actioncmp(
367   const char *char_ap1,
368   const char *char_ap2
369 ){
370   const struct action *ap1 = (const struct action *)char_ap1;
371   const struct action *ap2 = (const struct action *)char_ap2;
372   int rc;
373   rc = ap1->sp->index - ap2->sp->index;
374   if( rc==0 ){
375     rc = (int)ap1->type - (int)ap2->type;
376   }
377   if( rc==0 && ap1->type==REDUCE ){
378     rc = ap1->x.rp->index - ap2->x.rp->index;
379   }
380   return rc;
381 }
382 
383 /* Sort parser actions */
Action_sort(struct action * ap)384 static struct action *Action_sort(
385   struct action *ap
386 ){
387   /* Cast to "char **" via "void *" to avoid aliasing problems. */
388   ap = (struct action *)msort((char *)ap,(char **)(void *)&ap->next,actioncmp);
389   return ap;
390 }
391 
Action_add(app,type,sp,arg)392 void Action_add(app,type,sp,arg)
393 struct action **app;
394 enum e_action type;
395 struct symbol *sp;
396 char *arg;
397 {
398   struct action *new;
399   new = Action_new();
400   new->next = *app;
401   *app = new;
402   new->type = type;
403   new->sp = sp;
404   if( type==SHIFT ){
405     new->x.stp = (struct state *)arg;
406   }else{
407     new->x.rp = (struct rule *)arg;
408   }
409 }
410 /********************** New code to implement the "acttab" module ***********/
411 /*
412 ** This module implements routines use to construct the yy_action[] table.
413 */
414 
415 /*
416 ** The state of the yy_action table under construction is an instance of
417 ** the following structure
418 */
419 typedef struct acttab acttab;
420 struct acttab {
421   int nAction;                 /* Number of used slots in aAction[] */
422   int nActionAlloc;            /* Slots allocated for aAction[] */
423   struct {
424     int lookahead;             /* Value of the lookahead token */
425     int action;                /* Action to take on the given lookahead */
426   } *aAction,                  /* The yy_action[] table under construction */
427     *aLookahead;               /* A single new transaction set */
428   int mnLookahead;             /* Minimum aLookahead[].lookahead */
429   int mnAction;                /* Action associated with mnLookahead */
430   int mxLookahead;             /* Maximum aLookahead[].lookahead */
431   int nLookahead;              /* Used slots in aLookahead[] */
432   int nLookaheadAlloc;         /* Slots allocated in aLookahead[] */
433 };
434 
435 /* Return the number of entries in the yy_action table */
436 #define acttab_size(X) ((X)->nAction)
437 
438 /* The value for the N-th entry in yy_action */
439 #define acttab_yyaction(X,N)  ((X)->aAction[N].action)
440 
441 /* The value for the N-th entry in yy_lookahead */
442 #define acttab_yylookahead(X,N)  ((X)->aAction[N].lookahead)
443 
444 /* Free all memory associated with the given acttab */
acttab_free(acttab * p)445 void acttab_free(acttab *p){
446   free( p->aAction );
447   free( p->aLookahead );
448   free( p );
449 }
450 
451 /* Allocate a new acttab structure */
acttab_alloc(void)452 acttab *acttab_alloc(void){
453   acttab *p = calloc( 1, sizeof(*p) );
454   if( p==0 ){
455     fprintf(stderr,"Unable to allocate memory for a new acttab.");
456     exit(1);
457   }
458   memset(p, 0, sizeof(*p));
459   return p;
460 }
461 
462 /* Add a new action to the current transaction set
463 */
acttab_action(acttab * p,int lookahead,int action)464 void acttab_action(acttab *p, int lookahead, int action){
465   if( p->nLookahead>=p->nLookaheadAlloc ){
466     p->nLookaheadAlloc += 25;
467     p->aLookahead = realloc( p->aLookahead,
468                              sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
469     if( p->aLookahead==0 ){
470       fprintf(stderr,"malloc failed\n");
471       exit(1);
472     }
473   }
474   if( p->nLookahead==0 ){
475     p->mxLookahead = lookahead;
476     p->mnLookahead = lookahead;
477     p->mnAction = action;
478   }else{
479     if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
480     if( p->mnLookahead>lookahead ){
481       p->mnLookahead = lookahead;
482       p->mnAction = action;
483     }
484   }
485   p->aLookahead[p->nLookahead].lookahead = lookahead;
486   p->aLookahead[p->nLookahead].action = action;
487   p->nLookahead++;
488 }
489 
490 /*
491 ** Add the transaction set built up with prior calls to acttab_action()
492 ** into the current action table.  Then reset the transaction set back
493 ** to an empty set in preparation for a new round of acttab_action() calls.
494 **
495 ** Return the offset into the action table of the new transaction.
496 */
acttab_insert(acttab * p)497 int acttab_insert(acttab *p){
498   int i, j, k, n;
499   assert( p->nLookahead>0 );
500 
501   /* Make sure we have enough space to hold the expanded action table
502   ** in the worst case.  The worst case occurs if the transaction set
503   ** must be appended to the current action table
504   */
505   n = p->mxLookahead + 1;
506   if( p->nAction + n >= p->nActionAlloc ){
507     int oldAlloc = p->nActionAlloc;
508     p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
509     p->aAction = realloc( p->aAction,
510                           sizeof(p->aAction[0])*p->nActionAlloc);
511     if( p->aAction==0 ){
512       fprintf(stderr,"malloc failed\n");
513       exit(1);
514     }
515     for(i=oldAlloc; i<p->nActionAlloc; i++){
516       p->aAction[i].lookahead = -1;
517       p->aAction[i].action = -1;
518     }
519   }
520 
521   /* Scan the existing action table looking for an offset where we can
522   ** insert the current transaction set.  Fall out of the loop when that
523   ** offset is found.  In the worst case, we fall out of the loop when
524   ** i reaches p->nAction, which means we append the new transaction set.
525   **
526   ** i is the index in p->aAction[] where p->mnLookahead is inserted.
527   */
528   for(i=0; i<p->nAction+p->mnLookahead; i++){
529     if( p->aAction[i].lookahead<0 ){
530       for(j=0; j<p->nLookahead; j++){
531         k = p->aLookahead[j].lookahead - p->mnLookahead + i;
532         if( k<0 ) break;
533         if( p->aAction[k].lookahead>=0 ) break;
534       }
535       if( j<p->nLookahead ) continue;
536       for(j=0; j<p->nAction; j++){
537         if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
538       }
539       if( j==p->nAction ){
540         break;  /* Fits in empty slots */
541       }
542     }else if( p->aAction[i].lookahead==p->mnLookahead ){
543       if( p->aAction[i].action!=p->mnAction ) continue;
544       for(j=0; j<p->nLookahead; j++){
545         k = p->aLookahead[j].lookahead - p->mnLookahead + i;
546         if( k<0 || k>=p->nAction ) break;
547         if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
548         if( p->aLookahead[j].action!=p->aAction[k].action ) break;
549       }
550       if( j<p->nLookahead ) continue;
551       n = 0;
552       for(j=0; j<p->nAction; j++){
553         if( p->aAction[j].lookahead<0 ) continue;
554         if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
555       }
556       if( n==p->nLookahead ){
557         break;  /* Same as a prior transaction set */
558       }
559     }
560   }
561   /* Insert transaction set at index i. */
562   for(j=0; j<p->nLookahead; j++){
563     k = p->aLookahead[j].lookahead - p->mnLookahead + i;
564     p->aAction[k] = p->aLookahead[j];
565     if( k>=p->nAction ) p->nAction = k+1;
566   }
567   p->nLookahead = 0;
568 
569   /* Return the offset that is added to the lookahead in order to get the
570   ** index into yy_action of the action */
571   return i - p->mnLookahead;
572 }
573 
574 /********************** From the file "build.c" *****************************/
575 /*
576 ** Routines to construction the finite state machine for the LEMON
577 ** parser generator.
578 */
579 
580 /* Find a precedence symbol of every rule in the grammar.
581 **
582 ** Those rules which have a precedence symbol coded in the input
583 ** grammar using the "[symbol]" construct will already have the
584 ** rp->precsym field filled.  Other rules take as their precedence
585 ** symbol the first RHS symbol with a defined precedence.  If there
586 ** are not RHS symbols with a defined precedence, the precedence
587 ** symbol field is left blank.
588 */
FindRulePrecedences(xp)589 void FindRulePrecedences(xp)
590 struct lemon *xp;
591 {
592   struct rule *rp;
593   for(rp=xp->rule; rp; rp=rp->next){
594     if( rp->precsym==0 ){
595       int i, j;
596       for(i=0; i<rp->nrhs && rp->precsym==0; i++){
597         struct symbol *sp = rp->rhs[i];
598         if( sp->type==MULTITERMINAL ){
599           for(j=0; j<sp->nsubsym; j++){
600             if( sp->subsym[j]->prec>=0 ){
601               rp->precsym = sp->subsym[j];
602               break;
603             }
604           }
605         }else if( sp->prec>=0 ){
606           rp->precsym = rp->rhs[i];
607 	}
608       }
609     }
610   }
611   return;
612 }
613 
614 /* Find all nonterminals which will generate the empty string.
615 ** Then go back and compute the first sets of every nonterminal.
616 ** The first set is the set of all terminal symbols which can begin
617 ** a string generated by that nonterminal.
618 */
FindFirstSets(lemp)619 void FindFirstSets(lemp)
620 struct lemon *lemp;
621 {
622   int i, j;
623   struct rule *rp;
624   int progress;
625 
626   for(i=0; i<lemp->nsymbol; i++){
627     lemp->symbols[i]->lambda = LEMON_FALSE;
628   }
629   for(i=lemp->nterminal; i<lemp->nsymbol; i++){
630     lemp->symbols[i]->firstset = SetNew();
631   }
632 
633   /* First compute all lambdas */
634   do{
635     progress = 0;
636     for(rp=lemp->rule; rp; rp=rp->next){
637       if( rp->lhs->lambda ) continue;
638       for(i=0; i<rp->nrhs; i++){
639          struct symbol *sp = rp->rhs[i];
640          if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break;
641       }
642       if( i==rp->nrhs ){
643         rp->lhs->lambda = LEMON_TRUE;
644         progress = 1;
645       }
646     }
647   }while( progress );
648 
649   /* Now compute all first sets */
650   do{
651     struct symbol *s1, *s2;
652     progress = 0;
653     for(rp=lemp->rule; rp; rp=rp->next){
654       s1 = rp->lhs;
655       for(i=0; i<rp->nrhs; i++){
656         s2 = rp->rhs[i];
657         if( s2->type==TERMINAL ){
658           progress += SetAdd(s1->firstset,s2->index);
659           break;
660         }else if( s2->type==MULTITERMINAL ){
661           for(j=0; j<s2->nsubsym; j++){
662             progress += SetAdd(s1->firstset,s2->subsym[j]->index);
663           }
664           break;
665 	}else if( s1==s2 ){
666           if( s1->lambda==LEMON_FALSE ) break;
667 	}else{
668           progress += SetUnion(s1->firstset,s2->firstset);
669           if( s2->lambda==LEMON_FALSE ) break;
670 	}
671       }
672     }
673   }while( progress );
674   return;
675 }
676 
677 /* Compute all LR(0) states for the grammar.  Links
678 ** are added to between some states so that the LR(1) follow sets
679 ** can be computed later.
680 */
681 PRIVATE struct state *getstate(/* struct lemon * */);  /* forward reference */
FindStates(lemp)682 void FindStates(lemp)
683 struct lemon *lemp;
684 {
685   struct symbol *sp;
686   struct rule *rp;
687 
688   Configlist_init();
689 
690   /* Find the start symbol */
691   if( lemp->start ){
692     sp = Symbol_find(lemp->start);
693     if( sp==0 ){
694       ErrorMsg(lemp->filename,0,
695 "The specified start symbol \"%s\" is not "
696 "in a nonterminal of the grammar.  \"%s\" will be used as the start "
697 "symbol instead.",lemp->start,lemp->rule->lhs->name);
698       lemp->errorcnt++;
699       sp = lemp->rule->lhs;
700     }
701   }else{
702     sp = lemp->rule->lhs;
703   }
704 
705   /* Make sure the start symbol doesn't occur on the right-hand side of
706   ** any rule.  Report an error if it does.  (YACC would generate a new
707   ** start symbol in this case.) */
708   for(rp=lemp->rule; rp; rp=rp->next){
709     int i;
710     for(i=0; i<rp->nrhs; i++){
711       if( rp->rhs[i]==sp ){   /* FIX ME:  Deal with multiterminals */
712         ErrorMsg(lemp->filename,0,
713 "The start symbol \"%s\" occurs on the "
714 "right-hand side of a rule. This will result in a parser which "
715 "does not work properly.",sp->name);
716         lemp->errorcnt++;
717       }
718     }
719   }
720 
721   /* The basis configuration set for the first state
722   ** is all rules which have the start symbol as their
723   ** left-hand side */
724   for(rp=sp->rule; rp; rp=rp->nextlhs){
725     struct config *newcfp;
726     rp->lhsStart = 1;
727     newcfp = Configlist_addbasis(rp,0);
728     SetAdd(newcfp->fws,0);
729   }
730 
731   /* Compute the first state.  All other states will be
732   ** computed automatically during the computation of the first one.
733   ** The returned pointer to the first state is not used. */
734   (void)getstate(lemp);
735   return;
736 }
737 
738 /* Return a pointer to a state which is described by the configuration
739 ** list which has been built from calls to Configlist_add.
740 */
741 PRIVATE void buildshifts(/* struct lemon *, struct state * */); /* Forwd ref */
getstate(lemp)742 PRIVATE struct state *getstate(lemp)
743 struct lemon *lemp;
744 {
745   struct config *cfp, *bp;
746   struct state *stp;
747 
748   /* Extract the sorted basis of the new state.  The basis was constructed
749   ** by prior calls to "Configlist_addbasis()". */
750   Configlist_sortbasis();
751   bp = Configlist_basis();
752 
753   /* Get a state with the same basis */
754   stp = State_find(bp);
755   if( stp ){
756     /* A state with the same basis already exists!  Copy all the follow-set
757     ** propagation links from the state under construction into the
758     ** preexisting state, then return a pointer to the preexisting state */
759     struct config *x, *y;
760     for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
761       Plink_copy(&y->bplp,x->bplp);
762       Plink_delete(x->fplp);
763       x->fplp = x->bplp = 0;
764     }
765     cfp = Configlist_return();
766     Configlist_eat(cfp);
767   }else{
768     /* This really is a new state.  Construct all the details */
769     Configlist_closure(lemp);    /* Compute the configuration closure */
770     Configlist_sort();           /* Sort the configuration closure */
771     cfp = Configlist_return();   /* Get a pointer to the config list */
772     stp = State_new();           /* A new state structure */
773     MemoryCheck(stp);
774     stp->bp = bp;                /* Remember the configuration basis */
775     stp->cfp = cfp;              /* Remember the configuration closure */
776     stp->statenum = lemp->nstate++; /* Every state gets a sequence number */
777     stp->ap = 0;                 /* No actions, yet. */
778     State_insert(stp,stp->bp);   /* Add to the state table */
779     buildshifts(lemp,stp);       /* Recursively compute successor states */
780   }
781   return stp;
782 }
783 
784 /*
785 ** Return true if two symbols are the same.
786 */
same_symbol(a,b)787 int same_symbol(a,b)
788 struct symbol *a;
789 struct symbol *b;
790 {
791   int i;
792   if( a==b ) return 1;
793   if( a->type!=MULTITERMINAL ) return 0;
794   if( b->type!=MULTITERMINAL ) return 0;
795   if( a->nsubsym!=b->nsubsym ) return 0;
796   for(i=0; i<a->nsubsym; i++){
797     if( a->subsym[i]!=b->subsym[i] ) return 0;
798   }
799   return 1;
800 }
801 
802 /* Construct all successor states to the given state.  A "successor"
803 ** state is any state which can be reached by a shift action.
804 */
buildshifts(lemp,stp)805 PRIVATE void buildshifts(lemp,stp)
806 struct lemon *lemp;
807 struct state *stp;     /* The state from which successors are computed */
808 {
809   struct config *cfp;  /* For looping thru the config closure of "stp" */
810   struct config *bcfp; /* For the inner loop on config closure of "stp" */
811   struct config *new;  /* */
812   struct symbol *sp;   /* Symbol following the dot in configuration "cfp" */
813   struct symbol *bsp;  /* Symbol following the dot in configuration "bcfp" */
814   struct state *newstp; /* A pointer to a successor state */
815 
816   /* Each configuration becomes complete after it contibutes to a successor
817   ** state.  Initially, all configurations are incomplete */
818   for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
819 
820   /* Loop through all configurations of the state "stp" */
821   for(cfp=stp->cfp; cfp; cfp=cfp->next){
822     if( cfp->status==COMPLETE ) continue;    /* Already used by inner loop */
823     if( cfp->dot>=cfp->rp->nrhs ) continue;  /* Can't shift this config */
824     Configlist_reset();                      /* Reset the new config set */
825     sp = cfp->rp->rhs[cfp->dot];             /* Symbol after the dot */
826 
827     /* For every configuration in the state "stp" which has the symbol "sp"
828     ** following its dot, add the same configuration to the basis set under
829     ** construction but with the dot shifted one symbol to the right. */
830     for(bcfp=cfp; bcfp; bcfp=bcfp->next){
831       if( bcfp->status==COMPLETE ) continue;    /* Already used */
832       if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
833       bsp = bcfp->rp->rhs[bcfp->dot];           /* Get symbol after dot */
834       if( !same_symbol(bsp,sp) ) continue;      /* Must be same as for "cfp" */
835       bcfp->status = COMPLETE;                  /* Mark this config as used */
836       new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
837       Plink_add(&new->bplp,bcfp);
838     }
839 
840     /* Get a pointer to the state described by the basis configuration set
841     ** constructed in the preceding loop */
842     newstp = getstate(lemp);
843 
844     /* The state "newstp" is reached from the state "stp" by a shift action
845     ** on the symbol "sp" */
846     if( sp->type==MULTITERMINAL ){
847       int i;
848       for(i=0; i<sp->nsubsym; i++){
849         Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp);
850       }
851     }else{
852       Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
853     }
854   }
855 }
856 
857 /*
858 ** Construct the propagation links
859 */
FindLinks(lemp)860 void FindLinks(lemp)
861 struct lemon *lemp;
862 {
863   int i;
864   struct config *cfp, *other;
865   struct state *stp;
866   struct plink *plp;
867 
868   /* Housekeeping detail:
869   ** Add to every propagate link a pointer back to the state to
870   ** which the link is attached. */
871   for(i=0; i<lemp->nstate; i++){
872     stp = lemp->sorted[i];
873     for(cfp=stp->cfp; cfp; cfp=cfp->next){
874       cfp->stp = stp;
875     }
876   }
877 
878   /* Convert all backlinks into forward links.  Only the forward
879   ** links are used in the follow-set computation. */
880   for(i=0; i<lemp->nstate; i++){
881     stp = lemp->sorted[i];
882     for(cfp=stp->cfp; cfp; cfp=cfp->next){
883       for(plp=cfp->bplp; plp; plp=plp->next){
884         other = plp->cfp;
885         Plink_add(&other->fplp,cfp);
886       }
887     }
888   }
889 }
890 
891 /* Compute all followsets.
892 **
893 ** A followset is the set of all symbols which can come immediately
894 ** after a configuration.
895 */
FindFollowSets(lemp)896 void FindFollowSets(lemp)
897 struct lemon *lemp;
898 {
899   int i;
900   struct config *cfp;
901   struct plink *plp;
902   int progress;
903   int change;
904 
905   for(i=0; i<lemp->nstate; i++){
906     for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
907       cfp->status = INCOMPLETE;
908     }
909   }
910 
911   do{
912     progress = 0;
913     for(i=0; i<lemp->nstate; i++){
914       for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
915         if( cfp->status==COMPLETE ) continue;
916         for(plp=cfp->fplp; plp; plp=plp->next){
917           change = SetUnion(plp->cfp->fws,cfp->fws);
918           if( change ){
919             plp->cfp->status = INCOMPLETE;
920             progress = 1;
921 	  }
922 	}
923         cfp->status = COMPLETE;
924       }
925     }
926   }while( progress );
927 }
928 
929 static int resolve_conflict();
930 
931 /* Compute the reduce actions, and resolve conflicts.
932 */
FindActions(lemp)933 void FindActions(lemp)
934 struct lemon *lemp;
935 {
936   int i,j;
937   struct config *cfp;
938   struct state *stp;
939   struct symbol *sp;
940   struct rule *rp;
941 
942   /* Add all of the reduce actions
943   ** A reduce action is added for each element of the followset of
944   ** a configuration which has its dot at the extreme right.
945   */
946   for(i=0; i<lemp->nstate; i++){   /* Loop over all states */
947     stp = lemp->sorted[i];
948     for(cfp=stp->cfp; cfp; cfp=cfp->next){  /* Loop over all configurations */
949       if( cfp->rp->nrhs==cfp->dot ){        /* Is dot at extreme right? */
950         for(j=0; j<lemp->nterminal; j++){
951           if( SetFind(cfp->fws,j) ){
952             /* Add a reduce action to the state "stp" which will reduce by the
953             ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
954             Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
955           }
956 	}
957       }
958     }
959   }
960 
961   /* Add the accepting token */
962   if( lemp->start ){
963     sp = Symbol_find(lemp->start);
964     if( sp==0 ) sp = lemp->rule->lhs;
965   }else{
966     sp = lemp->rule->lhs;
967   }
968   /* Add to the first state (which is always the starting state of the
969   ** finite state machine) an action to ACCEPT if the lookahead is the
970   ** start nonterminal.  */
971   Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
972 
973   /* Resolve conflicts */
974   for(i=0; i<lemp->nstate; i++){
975     struct action *ap, *nap;
976     struct state *stp;
977     stp = lemp->sorted[i];
978     /* assert( stp->ap ); */
979     stp->ap = Action_sort(stp->ap);
980     for(ap=stp->ap; ap && ap->next; ap=ap->next){
981       for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
982          /* The two actions "ap" and "nap" have the same lookahead.
983          ** Figure out which one should be used */
984          lemp->nconflict += resolve_conflict(ap,nap);
985       }
986     }
987   }
988 
989   /* Report an error for each rule that can never be reduced. */
990   for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE;
991   for(i=0; i<lemp->nstate; i++){
992     struct action *ap;
993     for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
994       if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE;
995     }
996   }
997   for(rp=lemp->rule; rp; rp=rp->next){
998     if( rp->canReduce ) continue;
999     ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.");
1000     lemp->errorcnt++;
1001   }
1002 }
1003 
1004 /* Resolve a conflict between the two given actions.  If the
1005 ** conflict can't be resolved, return non-zero.
1006 **
1007 ** NO LONGER TRUE:
1008 **   To resolve a conflict, first look to see if either action
1009 **   is on an error rule.  In that case, take the action which
1010 **   is not associated with the error rule.  If neither or both
1011 **   actions are associated with an error rule, then try to
1012 **   use precedence to resolve the conflict.
1013 **
1014 ** If either action is a SHIFT, then it must be apx.  This
1015 ** function won't work if apx->type==REDUCE and apy->type==SHIFT.
1016 */
resolve_conflict(apx,apy)1017 static int resolve_conflict(apx,apy)
1018 struct action *apx;
1019 struct action *apy;
1020 {
1021   struct symbol *spx, *spy;
1022   int errcnt = 0;
1023   assert( apx->sp==apy->sp );  /* Otherwise there would be no conflict */
1024   if( apx->type==SHIFT && apy->type==SHIFT ){
1025     apy->type = SSCONFLICT;
1026     errcnt++;
1027   }
1028   if( apx->type==SHIFT && apy->type==REDUCE ){
1029     spx = apx->sp;
1030     spy = apy->x.rp->precsym;
1031     if( spy==0 || spx->prec<0 || spy->prec<0 ){
1032       /* Not enough precedence information. */
1033       apy->type = SRCONFLICT;
1034       errcnt++;
1035     }else if( spx->prec>spy->prec ){    /* Lower precedence wins */
1036       apy->type = RD_RESOLVED;
1037     }else if( spx->prec<spy->prec ){
1038       apx->type = SH_RESOLVED;
1039     }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */
1040       apy->type = RD_RESOLVED;                             /* associativity */
1041     }else if( spx->prec==spy->prec && spx->assoc==LEFT ){  /* to break tie */
1042       apx->type = SH_RESOLVED;
1043     }else{
1044       assert( spx->prec==spy->prec && spx->assoc==NONE );
1045       apy->type = SRCONFLICT;
1046       errcnt++;
1047     }
1048   }else if( apx->type==REDUCE && apy->type==REDUCE ){
1049     spx = apx->x.rp->precsym;
1050     spy = apy->x.rp->precsym;
1051     if( spx==0 || spy==0 || spx->prec<0 ||
1052     spy->prec<0 || spx->prec==spy->prec ){
1053       apy->type = RRCONFLICT;
1054       errcnt++;
1055     }else if( spx->prec>spy->prec ){
1056       apy->type = RD_RESOLVED;
1057     }else if( spx->prec<spy->prec ){
1058       apx->type = RD_RESOLVED;
1059     }
1060   }else{
1061     assert(
1062       apx->type==SH_RESOLVED ||
1063       apx->type==RD_RESOLVED ||
1064       apx->type==SSCONFLICT ||
1065       apx->type==SRCONFLICT ||
1066       apx->type==RRCONFLICT ||
1067       apy->type==SH_RESOLVED ||
1068       apy->type==RD_RESOLVED ||
1069       apy->type==SSCONFLICT ||
1070       apy->type==SRCONFLICT ||
1071       apy->type==RRCONFLICT
1072     );
1073     /* The REDUCE/SHIFT case cannot happen because SHIFTs come before
1074     ** REDUCEs on the list.  If we reach this point it must be because
1075     ** the parser conflict had already been resolved. */
1076   }
1077   return errcnt;
1078 }
1079 /********************* From the file "configlist.c" *************************/
1080 /*
1081 ** Routines to processing a configuration list and building a state
1082 ** in the LEMON parser generator.
1083 */
1084 
1085 static struct config *freelist = 0;      /* List of free configurations */
1086 static struct config *current = 0;       /* Top of list of configurations */
1087 static struct config **currentend = 0;   /* Last on list of configs */
1088 static struct config *basis = 0;         /* Top of list of basis configs */
1089 static struct config **basisend = 0;     /* End of list of basis configs */
1090 
1091 /* Return a pointer to a new configuration */
newconfig()1092 PRIVATE struct config *newconfig(){
1093   struct config *new;
1094   if( freelist==0 ){
1095     int i;
1096     int amt = 3;
1097     freelist = (struct config *)calloc( amt, sizeof(struct config) );
1098     if( freelist==0 ){
1099       fprintf(stderr,"Unable to allocate memory for a new configuration.");
1100       exit(1);
1101     }
1102     for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
1103     freelist[amt-1].next = 0;
1104   }
1105   new = freelist;
1106   freelist = freelist->next;
1107   return new;
1108 }
1109 
1110 /* The configuration "old" is no longer used */
deleteconfig(old)1111 PRIVATE void deleteconfig(old)
1112 struct config *old;
1113 {
1114   old->next = freelist;
1115   freelist = old;
1116 }
1117 
1118 /* Initialized the configuration list builder */
Configlist_init()1119 void Configlist_init(){
1120   current = 0;
1121   currentend = &current;
1122   basis = 0;
1123   basisend = &basis;
1124   Configtable_init();
1125   return;
1126 }
1127 
1128 /* Initialized the configuration list builder */
Configlist_reset()1129 void Configlist_reset(){
1130   current = 0;
1131   currentend = &current;
1132   basis = 0;
1133   basisend = &basis;
1134   Configtable_clear(0);
1135   return;
1136 }
1137 
1138 /* Add another configuration to the configuration list */
Configlist_add(rp,dot)1139 struct config *Configlist_add(rp,dot)
1140 struct rule *rp;    /* The rule */
1141 int dot;            /* Index into the RHS of the rule where the dot goes */
1142 {
1143   struct config *cfp, model;
1144 
1145   assert( currentend!=0 );
1146   model.rp = rp;
1147   model.dot = dot;
1148   cfp = Configtable_find(&model);
1149   if( cfp==0 ){
1150     cfp = newconfig();
1151     cfp->rp = rp;
1152     cfp->dot = dot;
1153     cfp->fws = SetNew();
1154     cfp->stp = 0;
1155     cfp->fplp = cfp->bplp = 0;
1156     cfp->next = 0;
1157     cfp->bp = 0;
1158     *currentend = cfp;
1159     currentend = &cfp->next;
1160     Configtable_insert(cfp);
1161   }
1162   return cfp;
1163 }
1164 
1165 /* Add a basis configuration to the configuration list */
Configlist_addbasis(rp,dot)1166 struct config *Configlist_addbasis(rp,dot)
1167 struct rule *rp;
1168 int dot;
1169 {
1170   struct config *cfp, model;
1171 
1172   assert( basisend!=0 );
1173   assert( currentend!=0 );
1174   model.rp = rp;
1175   model.dot = dot;
1176   cfp = Configtable_find(&model);
1177   if( cfp==0 ){
1178     cfp = newconfig();
1179     cfp->rp = rp;
1180     cfp->dot = dot;
1181     cfp->fws = SetNew();
1182     cfp->stp = 0;
1183     cfp->fplp = cfp->bplp = 0;
1184     cfp->next = 0;
1185     cfp->bp = 0;
1186     *currentend = cfp;
1187     currentend = &cfp->next;
1188     *basisend = cfp;
1189     basisend = &cfp->bp;
1190     Configtable_insert(cfp);
1191   }
1192   return cfp;
1193 }
1194 
1195 /* Compute the closure of the configuration list */
Configlist_closure(lemp)1196 void Configlist_closure(lemp)
1197 struct lemon *lemp;
1198 {
1199   struct config *cfp, *newcfp;
1200   struct rule *rp, *newrp;
1201   struct symbol *sp, *xsp;
1202   int i, dot;
1203 
1204   assert( currentend!=0 );
1205   for(cfp=current; cfp; cfp=cfp->next){
1206     rp = cfp->rp;
1207     dot = cfp->dot;
1208     if( dot>=rp->nrhs ) continue;
1209     sp = rp->rhs[dot];
1210     if( sp->type==NONTERMINAL ){
1211       if( sp->rule==0 && sp!=lemp->errsym ){
1212         ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.",
1213           sp->name);
1214         lemp->errorcnt++;
1215       }
1216       for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
1217         newcfp = Configlist_add(newrp,0);
1218         for(i=dot+1; i<rp->nrhs; i++){
1219           xsp = rp->rhs[i];
1220           if( xsp->type==TERMINAL ){
1221             SetAdd(newcfp->fws,xsp->index);
1222             break;
1223           }else if( xsp->type==MULTITERMINAL ){
1224             int k;
1225             for(k=0; k<xsp->nsubsym; k++){
1226               SetAdd(newcfp->fws, xsp->subsym[k]->index);
1227             }
1228             break;
1229 	  }else{
1230             SetUnion(newcfp->fws,xsp->firstset);
1231             if( xsp->lambda==LEMON_FALSE ) break;
1232 	  }
1233 	}
1234         if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
1235       }
1236     }
1237   }
1238   return;
1239 }
1240 
1241 /* Sort the configuration list */
Configlist_sort()1242 void Configlist_sort(){
1243   /* Cast to "char **" via "void *" to avoid aliasing problems. */
1244   current = (struct config *)msort((char *)current,(char **)(void *)&(current->next),Configcmp);
1245   currentend = 0;
1246   return;
1247 }
1248 
1249 /* Sort the basis configuration list */
Configlist_sortbasis()1250 void Configlist_sortbasis(){
1251   /* Cast to "char **" via "void *" to avoid aliasing problems. */
1252   basis = (struct config *)msort((char *)current,(char **)(void *)&(current->bp),Configcmp);
1253   basisend = 0;
1254   return;
1255 }
1256 
1257 /* Return a pointer to the head of the configuration list and
1258 ** reset the list */
Configlist_return()1259 struct config *Configlist_return(){
1260   struct config *old;
1261   old = current;
1262   current = 0;
1263   currentend = 0;
1264   return old;
1265 }
1266 
1267 /* Return a pointer to the head of the configuration list and
1268 ** reset the list */
Configlist_basis()1269 struct config *Configlist_basis(){
1270   struct config *old;
1271   old = basis;
1272   basis = 0;
1273   basisend = 0;
1274   return old;
1275 }
1276 
1277 /* Free all elements of the given configuration list */
Configlist_eat(cfp)1278 void Configlist_eat(cfp)
1279 struct config *cfp;
1280 {
1281   struct config *nextcfp;
1282   for(; cfp; cfp=nextcfp){
1283     nextcfp = cfp->next;
1284     assert( cfp->fplp==0 );
1285     assert( cfp->bplp==0 );
1286     if( cfp->fws ) SetFree(cfp->fws);
1287     deleteconfig(cfp);
1288   }
1289   return;
1290 }
1291 /***************** From the file "error.c" *********************************/
1292 /*
1293 ** Code for printing error message.
1294 */
1295 
1296 /* Find a good place to break "msg" so that its length is at least "min"
1297 ** but no more than "max".  Make the point as close to max as possible.
1298 */
findbreak(msg,min,max)1299 static int findbreak(msg,min,max)
1300 char *msg;
1301 int min;
1302 int max;
1303 {
1304   int i,spot;
1305   char c;
1306   for(i=spot=min; i<=max; i++){
1307     c = msg[i];
1308     if( c=='\t' ) msg[i] = ' ';
1309     if( c=='\n' ){ msg[i] = ' '; spot = i; break; }
1310     if( c==0 ){ spot = i; break; }
1311     if( c=='-' && i<max-1 ) spot = i+1;
1312     if( c==' ' ) spot = i;
1313   }
1314   return spot;
1315 }
1316 
1317 /*
1318 ** The error message is split across multiple lines if necessary.  The
1319 ** splits occur at a space, if there is a space available near the end
1320 ** of the line.
1321 */
1322 #define ERRMSGSIZE  10000 /* Hope this is big enough.  No way to error check */
1323 #define LINEWIDTH      79 /* Max width of any output line */
1324 #define PREFIXLIMIT    60 /* Max width of the prefix on each line */
ErrorMsg(const char * filename,int lineno,const char * format,...)1325 void ErrorMsg(const char *filename, int lineno, const char *format, ...){
1326   char errmsg[ERRMSGSIZE];
1327   char prefix[PREFIXLIMIT+10];
1328   int errmsgsize;
1329   int prefixsize;
1330   int availablewidth;
1331   va_list ap;
1332   int end, restart, base;
1333 
1334   va_start(ap, format);
1335   /* Prepare a prefix to be prepended to every output line */
1336   if( lineno>0 ){
1337     sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno);
1338   }else{
1339     sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename);
1340   }
1341   prefixsize = lemonStrlen(prefix);
1342   availablewidth = LINEWIDTH - prefixsize;
1343 
1344   /* Generate the error message */
1345   vsprintf(errmsg,format,ap);
1346   va_end(ap);
1347   errmsgsize = lemonStrlen(errmsg);
1348   /* Remove trailing '\n's from the error message. */
1349   while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){
1350      errmsg[--errmsgsize] = 0;
1351   }
1352 
1353   /* Print the error message */
1354   base = 0;
1355   while( errmsg[base]!=0 ){
1356     end = restart = findbreak(&errmsg[base],0,availablewidth);
1357     restart += base;
1358     while( errmsg[restart]==' ' ) restart++;
1359     fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]);
1360     base = restart;
1361   }
1362 }
1363 /**************** From the file "main.c" ************************************/
1364 /*
1365 ** Main program file for the LEMON parser generator.
1366 */
1367 
1368 /* Report an out-of-memory condition and abort.  This function
1369 ** is used mostly by the "MemoryCheck" macro in struct.h
1370 */
memory_error()1371 void memory_error(){
1372   fprintf(stderr,"Out of memory.  Aborting...\n");
1373   exit(1);
1374 }
1375 
1376 static int nDefine = 0;      /* Number of -D options on the command line */
1377 static char **azDefine = 0;  /* Name of the -D macros */
1378 
1379 /* This routine is called with the argument to each -D command-line option.
1380 ** Add the macro defined to the azDefine array.
1381 */
handle_D_option(char * z)1382 static void handle_D_option(char *z){
1383   char **paz;
1384   nDefine++;
1385   azDefine = realloc(azDefine, sizeof(azDefine[0])*nDefine);
1386   if( azDefine==0 ){
1387     fprintf(stderr,"out of memory\n");
1388     exit(1);
1389   }
1390   paz = &azDefine[nDefine-1];
1391   *paz = malloc( lemonStrlen(z)+1 );
1392   if( *paz==0 ){
1393     fprintf(stderr,"out of memory\n");
1394     exit(1);
1395   }
1396   strcpy(*paz, z);
1397   for(z=*paz; *z && *z!='='; z++){}
1398   *z = 0;
1399 }
1400 
1401 static char *output_filename = 0;  /* Output filename from -o */
1402 
1403 /* This routine is called with the argument to any -o command-line option.
1404 */
handle_o_option(char * z)1405 static void handle_o_option(char *z){
1406   output_filename = malloc( strlen(z)+1 );
1407   if( output_filename==0 ){
1408     fprintf(stderr,"out of memory\n");
1409     exit(1);
1410   }
1411   strcpy(output_filename, z);
1412 }
1413 
1414 static char *output_header_filename = 0;  /* Output filename from -h */
1415 
1416 /* This routine is called with the argument to any -h command-line option.
1417 */
handle_h_option(char * z)1418 static void handle_h_option(char *z){
1419   output_header_filename = malloc( strlen(z)+1 );
1420   if( output_header_filename==0 ){
1421     fprintf(stderr,"out of memory\n");
1422     exit(1);
1423   }
1424   strcpy(output_header_filename, z);
1425 }
1426 
1427 
1428 /* The main program.  Parse the command line and do it... */
main(argc,argv)1429 int main(argc,argv)
1430 int argc;
1431 char **argv;
1432 {
1433   static int version = 0;
1434   static int rpflag = 0;
1435   static int basisflag = 0;
1436   static int compress = 0;
1437   static int quiet = 0;
1438   static int statistics = 0;
1439   static int mhflag = 0;
1440   static int nolinenosflag = 0;
1441   static struct s_options options[] = {
1442     {OPT_FLAG, "b", (void*)&basisflag, 0, "Print only the basis in report."},
1443     {OPT_FLAG, "c", (void*)&compress, 0, "Don't compress the action table."},
1444     {OPT_FSTR, "D", 0, handle_D_option, "Define an %ifdef macro."},
1445     {OPT_FLAG, "g", (void*)&rpflag, 0, "Print grammar without actions."},
1446     {OPT_FLAG, "m", (char*)&mhflag, 0, "Output a makeheaders compatible file."},
1447     {OPT_FLAG, "l", (char*)&nolinenosflag, 0, "Do not print #line statements."},
1448     {OPT_FLAG, "q", (void*)&quiet, 0, "(Quiet) Don't print the report file."},
1449     {OPT_FLAG, "s", (void*)&statistics, 0,
1450                                    "Print parser stats to standard output."},
1451     {OPT_FLAG, "x", (void*)&version, 0, "Print the version number."},
1452     {OPT_FSTR, "o", 0, handle_o_option, "Specify output filename."},
1453     {OPT_FSTR, "h", 0, handle_h_option, "Specify output header filename."},
1454     {OPT_FLAG,0,0,0,0}
1455   };
1456   int i;
1457   struct lemon lem;
1458 
1459   (void)argc; /* Suppress "unused argument" warning. */
1460   OptInit(argv,options,stderr);
1461   if( version ){
1462      printf("Lemon version 1.0 (patched for Xapian)\n");
1463      exit(0);
1464   }
1465   if( OptNArgs()!=1 ){
1466     fprintf(stderr,"Exactly one filename argument is required.\n");
1467     exit(1);
1468   }
1469   memset(&lem, 0, sizeof(lem));
1470   lem.errorcnt = 0;
1471 
1472   /* Initialize the machine */
1473   Strsafe_init();
1474   Symbol_init();
1475   State_init();
1476   lem.argv0 = argv[0];
1477   lem.filename = OptArg(0);
1478   lem.basisflag = basisflag;
1479   lem.nolinenosflag = nolinenosflag;
1480   Symbol_new("$");
1481   lem.errsym = Symbol_new("error");
1482   lem.errsym->useCnt = 0;
1483 
1484   /* Parse the input file */
1485   Parse(&lem);
1486   if( lem.errorcnt ) exit(lem.errorcnt);
1487   if( lem.nrule==0 ){
1488     fprintf(stderr,"Empty grammar.\n");
1489     exit(1);
1490   }
1491 
1492   /* Count and index the symbols of the grammar */
1493   lem.nsymbol = Symbol_count();
1494   Symbol_new("{default}");
1495   lem.symbols = Symbol_arrayof();
1496   for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1497   qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*),
1498         Symbolcmpp);
1499   for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1500   for(i=1; isupper(lem.symbols[i]->name[0]); i++);
1501   lem.nterminal = i;
1502 
1503   /* Generate a reprint of the grammar, if requested on the command line */
1504   if( rpflag ){
1505     Reprint(&lem);
1506   }else{
1507     /* Initialize the size for all follow and first sets */
1508     SetSize(lem.nterminal+1);
1509 
1510     /* Find the precedence for every production rule (that has one) */
1511     FindRulePrecedences(&lem);
1512 
1513     /* Compute the lambda-nonterminals and the first-sets for every
1514     ** nonterminal */
1515     FindFirstSets(&lem);
1516 
1517     /* Compute all LR(0) states.  Also record follow-set propagation
1518     ** links so that the follow-set can be computed later */
1519     lem.nstate = 0;
1520     FindStates(&lem);
1521     lem.sorted = State_arrayof();
1522 
1523     /* Tie up loose ends on the propagation links */
1524     FindLinks(&lem);
1525 
1526     /* Compute the follow set of every reducible configuration */
1527     FindFollowSets(&lem);
1528 
1529     /* Compute the action tables */
1530     FindActions(&lem);
1531 
1532     /* Compress the action tables */
1533     if( compress==0 ) CompressTables(&lem);
1534 
1535     /* Reorder and renumber the states so that states with fewer choices
1536     ** occur at the end. */
1537     ResortStates(&lem);
1538 
1539     /* Generate a report of the parser generated.  (the "y.output" file) */
1540     if( !quiet ) ReportOutput(&lem);
1541 
1542     /* Generate the source code for the parser */
1543     ReportTable(&lem, mhflag);
1544 
1545     /* Produce a header file for use by the scanner.  (This step is
1546     ** omitted if the "-m" option is used because makeheaders will
1547     ** generate the file for us.) */
1548     if( !mhflag ) ReportHeader(&lem);
1549   }
1550   if( statistics ){
1551     printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
1552       lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule);
1553     printf("                   %d states, %d parser table entries, %d conflicts\n",
1554       lem.nstate, lem.tablesize, lem.nconflict);
1555   }
1556   if( lem.nconflict ){
1557     fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
1558   }
1559   exit(lem.errorcnt + lem.nconflict);
1560   return (lem.errorcnt + lem.nconflict);
1561 }
1562 /******************** From the file "msort.c" *******************************/
1563 /*
1564 ** A generic merge-sort program.
1565 **
1566 ** USAGE:
1567 ** Let "ptr" be a pointer to some structure which is at the head of
1568 ** a null-terminated list.  Then to sort the list call:
1569 **
1570 **     ptr = msort(ptr,&(ptr->next),cmpfnc);
1571 **
1572 ** In the above, "cmpfnc" is a pointer to a function which compares
1573 ** two instances of the structure and returns an integer, as in
1574 ** strcmp.  The second argument is a pointer to the pointer to the
1575 ** second element of the linked list.  This address is used to compute
1576 ** the offset to the "next" field within the structure.  The offset to
1577 ** the "next" field must be constant for all structures in the list.
1578 **
1579 ** The function returns a new pointer which is the head of the list
1580 ** after sorting.
1581 **
1582 ** ALGORITHM:
1583 ** Merge-sort.
1584 */
1585 
1586 /*
1587 ** Return a pointer to the next structure in the linked list.
1588 */
1589 #define NEXT(A) (*(char**)(((char*)A)+offset))
1590 
1591 /*
1592 ** Inputs:
1593 **   a:       A sorted, null-terminated linked list.  (May be null).
1594 **   b:       A sorted, null-terminated linked list.  (May be null).
1595 **   cmp:     A pointer to the comparison function.
1596 **   offset:  Offset in the structure to the "next" field.
1597 **
1598 ** Return Value:
1599 **   A pointer to the head of a sorted list containing the elements
1600 **   of both a and b.
1601 **
1602 ** Side effects:
1603 **   The "next" pointers for elements in the lists a and b are
1604 **   changed.
1605 */
merge(char * a,char * b,int (* cmp)(const char *,const char *),int offset)1606 static char *merge(
1607   char *a,
1608   char *b,
1609   int (*cmp)(const char*,const char*),
1610   int offset
1611 ){
1612   char *ptr, *head;
1613 
1614   if( a==0 ){
1615     head = b;
1616   }else if( b==0 ){
1617     head = a;
1618   }else{
1619     if( (*cmp)(a,b)<0 ){
1620       ptr = a;
1621       a = NEXT(a);
1622     }else{
1623       ptr = b;
1624       b = NEXT(b);
1625     }
1626     head = ptr;
1627     while( a && b ){
1628       if( (*cmp)(a,b)<0 ){
1629         NEXT(ptr) = a;
1630         ptr = a;
1631         a = NEXT(a);
1632       }else{
1633         NEXT(ptr) = b;
1634         ptr = b;
1635         b = NEXT(b);
1636       }
1637     }
1638     if( a ) NEXT(ptr) = a;
1639     else    NEXT(ptr) = b;
1640   }
1641   return head;
1642 }
1643 
1644 /*
1645 ** Inputs:
1646 **   list:      Pointer to a singly-linked list of structures.
1647 **   next:      Pointer to pointer to the second element of the list.
1648 **   cmp:       A comparison function.
1649 **
1650 ** Return Value:
1651 **   A pointer to the head of a sorted list containing the elements
1652 **   orginally in list.
1653 **
1654 ** Side effects:
1655 **   The "next" pointers for elements in list are changed.
1656 */
1657 #define LISTSIZE 30
msort(char * list,char ** next,int (* cmp)(const char *,const char *))1658 static char *msort(
1659   char *list,
1660   char **next,
1661   int (*cmp)(const char*,const char*)
1662 ){
1663   unsigned long offset;
1664   char *ep;
1665   char *set[LISTSIZE];
1666   int i;
1667   offset = (unsigned long)next - (unsigned long)list;
1668   for(i=0; i<LISTSIZE; i++) set[i] = 0;
1669   while( list ){
1670     ep = list;
1671     list = NEXT(list);
1672     NEXT(ep) = 0;
1673     for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){
1674       ep = merge(ep,set[i],cmp,offset);
1675       set[i] = 0;
1676     }
1677     set[i] = ep;
1678   }
1679   ep = 0;
1680   for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(ep,set[i],cmp,offset);
1681   return ep;
1682 }
1683 /************************ From the file "option.c" **************************/
1684 static char **argv;
1685 static struct s_options *op;
1686 static FILE *errstream;
1687 
1688 #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0)
1689 
1690 /*
1691 ** Print the command line with a carrot pointing to the k-th character
1692 ** of the n-th field.
1693 */
errline(n,k,err)1694 static void errline(n,k,err)
1695 int n;
1696 int k;
1697 FILE *err;
1698 {
1699   int spcnt, i;
1700   if( argv[0] ) fprintf(err,"%s",argv[0]);
1701   spcnt = lemonStrlen(argv[0]) + 1;
1702   for(i=1; i<n && argv[i]; i++){
1703     fprintf(err," %s",argv[i]);
1704     spcnt += lemonStrlen(argv[i])+1;
1705   }
1706   spcnt += k;
1707   for(; argv[i]; i++) fprintf(err," %s",argv[i]);
1708   if( spcnt<20 ){
1709     fprintf(err,"\n%*s^-- here\n",spcnt,"");
1710   }else{
1711     fprintf(err,"\n%*shere --^\n",spcnt-7,"");
1712   }
1713 }
1714 
1715 /*
1716 ** Return the index of the N-th non-switch argument.  Return -1
1717 ** if N is out of range.
1718 */
argindex(n)1719 static int argindex(n)
1720 int n;
1721 {
1722   int i;
1723   int dashdash = 0;
1724   if( argv!=0 && *argv!=0 ){
1725     for(i=1; argv[i]; i++){
1726       if( dashdash || !ISOPT(argv[i]) ){
1727         if( n==0 ) return i;
1728         n--;
1729       }
1730       if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1731     }
1732   }
1733   return -1;
1734 }
1735 
1736 static char emsg[] = "Command line syntax error: ";
1737 
1738 /*
1739 ** Process a flag command line argument.
1740 */
handleflags(i,err)1741 static int handleflags(i,err)
1742 int i;
1743 FILE *err;
1744 {
1745   int v;
1746   int errcnt = 0;
1747   int j;
1748   for(j=0; op[j].label; j++){
1749     if( strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break;
1750   }
1751   v = argv[i][0]=='-' ? 1 : 0;
1752   if( op[j].label==0 ){
1753     if( err ){
1754       fprintf(err,"%sundefined option.\n",emsg);
1755       errline(i,1,err);
1756     }
1757     errcnt++;
1758   }else if( op[j].type==OPT_FLAG ){
1759     *((int*)op[j].arg) = v;
1760   }else if( op[j].type==OPT_FFLAG ){
1761     (op[j].func)(v);
1762   }else if( op[j].type==OPT_FSTR ){
1763     (op[j].func)(&argv[i][2]);
1764   }else{
1765     if( err ){
1766       fprintf(err,"%smissing argument on switch.\n",emsg);
1767       errline(i,1,err);
1768     }
1769     errcnt++;
1770   }
1771   return errcnt;
1772 }
1773 
1774 /*
1775 ** Process a command line switch which has an argument.
1776 */
handleswitch(i,err)1777 static int handleswitch(i,err)
1778 int i;
1779 FILE *err;
1780 {
1781   int lv = 0;
1782   double dv = 0.0;
1783   char *sv = 0, *end;
1784   char *cp;
1785   int j;
1786   int errcnt = 0;
1787   cp = strchr(argv[i],'=');
1788   assert( cp!=0 );
1789   *cp = 0;
1790   for(j=0; op[j].label; j++){
1791     if( strcmp(argv[i],op[j].label)==0 ) break;
1792   }
1793   *cp = '=';
1794   if( op[j].label==0 ){
1795     if( err ){
1796       fprintf(err,"%sundefined option.\n",emsg);
1797       errline(i,0,err);
1798     }
1799     errcnt++;
1800   }else{
1801     cp++;
1802     switch( op[j].type ){
1803       case OPT_FLAG:
1804       case OPT_FFLAG:
1805         if( err ){
1806           fprintf(err,"%soption requires an argument.\n",emsg);
1807           errline(i,0,err);
1808         }
1809         errcnt++;
1810         break;
1811       case OPT_DBL:
1812       case OPT_FDBL:
1813         dv = strtod(cp,&end);
1814         if( *end ){
1815           if( err ){
1816             fprintf(err,"%sillegal character in floating-point argument.\n",emsg);
1817             errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
1818           }
1819           errcnt++;
1820         }
1821         break;
1822       case OPT_INT:
1823       case OPT_FINT:
1824         lv = strtol(cp,&end,0);
1825         if( *end ){
1826           if( err ){
1827             fprintf(err,"%sillegal character in integer argument.\n",emsg);
1828             errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
1829           }
1830           errcnt++;
1831         }
1832         break;
1833       case OPT_STR:
1834       case OPT_FSTR:
1835         sv = cp;
1836         break;
1837     }
1838     switch( op[j].type ){
1839       case OPT_FLAG:
1840       case OPT_FFLAG:
1841         break;
1842       case OPT_DBL:
1843         *(double*)(op[j].arg) = dv;
1844         break;
1845       case OPT_FDBL:
1846         (op[j].func)(dv);
1847         break;
1848       case OPT_INT:
1849         *(int*)(op[j].arg) = lv;
1850         break;
1851       case OPT_FINT:
1852         (op[j].func)((int)lv);
1853         break;
1854       case OPT_STR:
1855         *(char**)(op[j].arg) = sv;
1856         break;
1857       case OPT_FSTR:
1858         (op[j].func)(sv);
1859         break;
1860     }
1861   }
1862   return errcnt;
1863 }
1864 
OptInit(a,o,err)1865 int OptInit(a,o,err)
1866 char **a;
1867 struct s_options *o;
1868 FILE *err;
1869 {
1870   int errcnt = 0;
1871   argv = a;
1872   op = o;
1873   errstream = err;
1874   if( argv && *argv && op ){
1875     int i;
1876     for(i=1; argv[i]; i++){
1877       if( argv[i][0]=='+' || argv[i][0]=='-' ){
1878         errcnt += handleflags(i,err);
1879       }else if( strchr(argv[i],'=') ){
1880         errcnt += handleswitch(i,err);
1881       }
1882     }
1883   }
1884   if( errcnt>0 ){
1885     fprintf(err,"Valid command line options for \"%s\" are:\n",*a);
1886     OptPrint();
1887     exit(1);
1888   }
1889   return 0;
1890 }
1891 
OptNArgs()1892 int OptNArgs(){
1893   int cnt = 0;
1894   int dashdash = 0;
1895   int i;
1896   if( argv!=0 && argv[0]!=0 ){
1897     for(i=1; argv[i]; i++){
1898       if( dashdash || !ISOPT(argv[i]) ) cnt++;
1899       if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1900     }
1901   }
1902   return cnt;
1903 }
1904 
OptArg(n)1905 char *OptArg(n)
1906 int n;
1907 {
1908   int i;
1909   i = argindex(n);
1910   return i>=0 ? argv[i] : 0;
1911 }
1912 
OptErr(n)1913 void OptErr(n)
1914 int n;
1915 {
1916   int i;
1917   i = argindex(n);
1918   if( i>=0 ) errline(i,0,errstream);
1919 }
1920 
OptPrint()1921 void OptPrint(){
1922   int i;
1923   int max, len;
1924   max = 0;
1925   for(i=0; op[i].label; i++){
1926     len = lemonStrlen(op[i].label) + 1;
1927     switch( op[i].type ){
1928       case OPT_FLAG:
1929       case OPT_FFLAG:
1930         break;
1931       case OPT_INT:
1932       case OPT_FINT:
1933         len += 9;       /* length of "<integer>" */
1934         break;
1935       case OPT_DBL:
1936       case OPT_FDBL:
1937         len += 6;       /* length of "<real>" */
1938         break;
1939       case OPT_STR:
1940       case OPT_FSTR:
1941         len += 8;       /* length of "<string>" */
1942         break;
1943     }
1944     if( len>max ) max = len;
1945   }
1946   for(i=0; op[i].label; i++){
1947     switch( op[i].type ){
1948       case OPT_FLAG:
1949       case OPT_FFLAG:
1950         fprintf(errstream,"  -%-*s  %s\n",max,op[i].label,op[i].message);
1951         break;
1952       case OPT_INT:
1953       case OPT_FINT:
1954         fprintf(errstream,"  %s=<integer>%*s  %s\n",op[i].label,
1955           (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message);
1956         break;
1957       case OPT_DBL:
1958       case OPT_FDBL:
1959         fprintf(errstream,"  %s=<real>%*s  %s\n",op[i].label,
1960           (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message);
1961         break;
1962       case OPT_STR:
1963       case OPT_FSTR:
1964         fprintf(errstream,"  %s=<string>%*s  %s\n",op[i].label,
1965           (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message);
1966         break;
1967     }
1968   }
1969 }
1970 /*********************** From the file "parse.c" ****************************/
1971 /*
1972 ** Input file parser for the LEMON parser generator.
1973 */
1974 
1975 /* The state of the parser */
1976 struct pstate {
1977   char *filename;       /* Name of the input file */
1978   int tokenlineno;      /* Linenumber at which current token starts */
1979   int errorcnt;         /* Number of errors so far */
1980   char *tokenstart;     /* Text of current token */
1981   struct lemon *gp;     /* Global state vector */
1982   enum e_state {
1983     INITIALIZE,
1984     WAITING_FOR_DECL_OR_RULE,
1985     WAITING_FOR_DECL_KEYWORD,
1986     WAITING_FOR_DECL_ARG,
1987     WAITING_FOR_PRECEDENCE_SYMBOL,
1988     WAITING_FOR_ARROW,
1989     IN_RHS,
1990     LHS_ALIAS_1,
1991     LHS_ALIAS_2,
1992     LHS_ALIAS_3,
1993     RHS_ALIAS_1,
1994     RHS_ALIAS_2,
1995     PRECEDENCE_MARK_1,
1996     PRECEDENCE_MARK_2,
1997     RESYNC_AFTER_RULE_ERROR,
1998     RESYNC_AFTER_DECL_ERROR,
1999     WAITING_FOR_DESTRUCTOR_SYMBOL,
2000     WAITING_FOR_DATATYPE_SYMBOL,
2001     WAITING_FOR_FALLBACK_ID,
2002     WAITING_FOR_WILDCARD_ID
2003   } state;                   /* The state of the parser */
2004   struct symbol *fallback;   /* The fallback token */
2005   struct symbol *lhs;        /* Left-hand side of current rule */
2006   char *lhsalias;            /* Alias for the LHS */
2007   int nrhs;                  /* Number of right-hand side symbols seen */
2008   struct symbol *rhs[MAXRHS];  /* RHS symbols */
2009   char *alias[MAXRHS];       /* Aliases for each RHS symbol (or NULL) */
2010   struct rule *prevrule;     /* Previous rule parsed */
2011   char *declkeyword;         /* Keyword of a declaration */
2012   char **declargslot;        /* Where the declaration argument should be put */
2013   int insertLineMacro;       /* Add #line before declaration insert */
2014   int *decllinenoslot;       /* Where to write declaration line number */
2015   enum e_assoc declassoc;    /* Assign this association to decl arguments */
2016   int preccounter;           /* Assign this precedence to decl arguments */
2017   struct rule *firstrule;    /* Pointer to first rule in the grammar */
2018   struct rule *lastrule;     /* Pointer to the most recently parsed rule */
2019 };
2020 
2021 /* Parse a single token */
parseonetoken(psp)2022 static void parseonetoken(psp)
2023 struct pstate *psp;
2024 {
2025   char *x;
2026   x = Strsafe(psp->tokenstart);     /* Save the token permanently */
2027 #if 0
2028   printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno,
2029     x,psp->state);
2030 #endif
2031   switch( psp->state ){
2032     case INITIALIZE:
2033       psp->prevrule = 0;
2034       psp->preccounter = 0;
2035       psp->firstrule = psp->lastrule = 0;
2036       psp->gp->nrule = 0;
2037       /* Fall thru to next case */
2038     case WAITING_FOR_DECL_OR_RULE:
2039       if( x[0]=='%' ){
2040         psp->state = WAITING_FOR_DECL_KEYWORD;
2041       }else if( islower(x[0]) ){
2042         psp->lhs = Symbol_new(x);
2043         psp->nrhs = 0;
2044         psp->lhsalias = 0;
2045         psp->state = WAITING_FOR_ARROW;
2046       }else if( x[0]=='{' ){
2047         if( psp->prevrule==0 ){
2048           ErrorMsg(psp->filename,psp->tokenlineno,
2049 "There is no prior rule upon which to attach the code "
2050 "fragment which begins on this line.");
2051           psp->errorcnt++;
2052 	}else if( psp->prevrule->code!=0 ){
2053           ErrorMsg(psp->filename,psp->tokenlineno,
2054 "Code fragment beginning on this line is not the first "
2055 "to follow the previous rule.");
2056           psp->errorcnt++;
2057         }else{
2058           psp->prevrule->line = psp->tokenlineno;
2059           psp->prevrule->code = &x[1];
2060 	}
2061       }else if( x[0]=='[' ){
2062         psp->state = PRECEDENCE_MARK_1;
2063       }else{
2064         ErrorMsg(psp->filename,psp->tokenlineno,
2065           "Token \"%s\" should be either \"%%\" or a nonterminal name.",
2066           x);
2067         psp->errorcnt++;
2068       }
2069       break;
2070     case PRECEDENCE_MARK_1:
2071       if( !isupper(x[0]) ){
2072         ErrorMsg(psp->filename,psp->tokenlineno,
2073           "The precedence symbol must be a terminal.");
2074         psp->errorcnt++;
2075       }else if( psp->prevrule==0 ){
2076         ErrorMsg(psp->filename,psp->tokenlineno,
2077           "There is no prior rule to assign precedence \"[%s]\".",x);
2078         psp->errorcnt++;
2079       }else if( psp->prevrule->precsym!=0 ){
2080         ErrorMsg(psp->filename,psp->tokenlineno,
2081 "Precedence mark on this line is not the first "
2082 "to follow the previous rule.");
2083         psp->errorcnt++;
2084       }else{
2085         psp->prevrule->precsym = Symbol_new(x);
2086       }
2087       psp->state = PRECEDENCE_MARK_2;
2088       break;
2089     case PRECEDENCE_MARK_2:
2090       if( x[0]!=']' ){
2091         ErrorMsg(psp->filename,psp->tokenlineno,
2092           "Missing \"]\" on precedence mark.");
2093         psp->errorcnt++;
2094       }
2095       psp->state = WAITING_FOR_DECL_OR_RULE;
2096       break;
2097     case WAITING_FOR_ARROW:
2098       if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2099         psp->state = IN_RHS;
2100       }else if( x[0]=='(' ){
2101         psp->state = LHS_ALIAS_1;
2102       }else{
2103         ErrorMsg(psp->filename,psp->tokenlineno,
2104           "Expected to see a \":\" following the LHS symbol \"%s\".",
2105           psp->lhs->name);
2106         psp->errorcnt++;
2107         psp->state = RESYNC_AFTER_RULE_ERROR;
2108       }
2109       break;
2110     case LHS_ALIAS_1:
2111       if( isalpha(x[0]) ){
2112         psp->lhsalias = x;
2113         psp->state = LHS_ALIAS_2;
2114       }else{
2115         ErrorMsg(psp->filename,psp->tokenlineno,
2116           "\"%s\" is not a valid alias for the LHS \"%s\"",
2117           x,psp->lhs->name);
2118         psp->errorcnt++;
2119         psp->state = RESYNC_AFTER_RULE_ERROR;
2120       }
2121       break;
2122     case LHS_ALIAS_2:
2123       if( x[0]==')' ){
2124         psp->state = LHS_ALIAS_3;
2125       }else{
2126         ErrorMsg(psp->filename,psp->tokenlineno,
2127           "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2128         psp->errorcnt++;
2129         psp->state = RESYNC_AFTER_RULE_ERROR;
2130       }
2131       break;
2132     case LHS_ALIAS_3:
2133       if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2134         psp->state = IN_RHS;
2135       }else{
2136         ErrorMsg(psp->filename,psp->tokenlineno,
2137           "Missing \"->\" following: \"%s(%s)\".",
2138            psp->lhs->name,psp->lhsalias);
2139         psp->errorcnt++;
2140         psp->state = RESYNC_AFTER_RULE_ERROR;
2141       }
2142       break;
2143     case IN_RHS:
2144       if( x[0]=='.' ){
2145         struct rule *rp;
2146         rp = (struct rule *)calloc( sizeof(struct rule) +
2147              sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1);
2148         if( rp==0 ){
2149           ErrorMsg(psp->filename,psp->tokenlineno,
2150             "Can't allocate enough memory for this rule.");
2151           psp->errorcnt++;
2152           psp->prevrule = 0;
2153 	}else{
2154           int i;
2155           rp->ruleline = psp->tokenlineno;
2156           rp->rhs = (struct symbol**)&rp[1];
2157           rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]);
2158           for(i=0; i<psp->nrhs; i++){
2159             rp->rhs[i] = psp->rhs[i];
2160             rp->rhsalias[i] = psp->alias[i];
2161 	  }
2162           rp->lhs = psp->lhs;
2163           rp->lhsalias = psp->lhsalias;
2164           rp->nrhs = psp->nrhs;
2165           rp->code = 0;
2166           rp->precsym = 0;
2167           rp->index = psp->gp->nrule++;
2168           rp->nextlhs = rp->lhs->rule;
2169           rp->lhs->rule = rp;
2170           rp->next = 0;
2171           if( psp->firstrule==0 ){
2172             psp->firstrule = psp->lastrule = rp;
2173 	  }else{
2174             psp->lastrule->next = rp;
2175             psp->lastrule = rp;
2176 	  }
2177           psp->prevrule = rp;
2178 	}
2179         psp->state = WAITING_FOR_DECL_OR_RULE;
2180       }else if( isalpha(x[0]) ){
2181         if( psp->nrhs>=MAXRHS ){
2182           ErrorMsg(psp->filename,psp->tokenlineno,
2183             "Too many symbols on RHS of rule beginning at \"%s\".",
2184             x);
2185           psp->errorcnt++;
2186           psp->state = RESYNC_AFTER_RULE_ERROR;
2187 	}else{
2188           psp->rhs[psp->nrhs] = Symbol_new(x);
2189           psp->alias[psp->nrhs] = 0;
2190           psp->nrhs++;
2191 	}
2192       }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){
2193         struct symbol *msp = psp->rhs[psp->nrhs-1];
2194         if( msp->type!=MULTITERMINAL ){
2195           struct symbol *origsp = msp;
2196           msp = calloc(1,sizeof(*msp));
2197           memset(msp, 0, sizeof(*msp));
2198           msp->type = MULTITERMINAL;
2199           msp->nsubsym = 1;
2200           msp->subsym = calloc(1,sizeof(struct symbol*));
2201           msp->subsym[0] = origsp;
2202           msp->name = origsp->name;
2203           psp->rhs[psp->nrhs-1] = msp;
2204         }
2205         msp->nsubsym++;
2206         msp->subsym = realloc(msp->subsym, sizeof(struct symbol*)*msp->nsubsym);
2207         msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]);
2208         if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){
2209           ErrorMsg(psp->filename,psp->tokenlineno,
2210             "Cannot form a compound containing a non-terminal");
2211           psp->errorcnt++;
2212         }
2213       }else if( x[0]=='(' && psp->nrhs>0 ){
2214         psp->state = RHS_ALIAS_1;
2215       }else{
2216         ErrorMsg(psp->filename,psp->tokenlineno,
2217           "Illegal character on RHS of rule: \"%s\".",x);
2218         psp->errorcnt++;
2219         psp->state = RESYNC_AFTER_RULE_ERROR;
2220       }
2221       break;
2222     case RHS_ALIAS_1:
2223       if( isalpha(x[0]) ){
2224         psp->alias[psp->nrhs-1] = x;
2225         psp->state = RHS_ALIAS_2;
2226       }else{
2227         ErrorMsg(psp->filename,psp->tokenlineno,
2228           "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
2229           x,psp->rhs[psp->nrhs-1]->name);
2230         psp->errorcnt++;
2231         psp->state = RESYNC_AFTER_RULE_ERROR;
2232       }
2233       break;
2234     case RHS_ALIAS_2:
2235       if( x[0]==')' ){
2236         psp->state = IN_RHS;
2237       }else{
2238         ErrorMsg(psp->filename,psp->tokenlineno,
2239           "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2240         psp->errorcnt++;
2241         psp->state = RESYNC_AFTER_RULE_ERROR;
2242       }
2243       break;
2244     case WAITING_FOR_DECL_KEYWORD:
2245       if( isalpha(x[0]) ){
2246         psp->declkeyword = x;
2247         psp->declargslot = 0;
2248         psp->decllinenoslot = 0;
2249         psp->insertLineMacro = 1;
2250         psp->state = WAITING_FOR_DECL_ARG;
2251         if( strcmp(x,"name")==0 ){
2252           psp->declargslot = &(psp->gp->name);
2253           psp->insertLineMacro = 0;
2254 	}else if( strcmp(x,"include")==0 ){
2255           psp->declargslot = &(psp->gp->include);
2256 	}else if( strcmp(x,"code")==0 ){
2257           psp->declargslot = &(psp->gp->extracode);
2258 	}else if( strcmp(x,"token_destructor")==0 ){
2259           psp->declargslot = &psp->gp->tokendest;
2260 	}else if( strcmp(x,"default_destructor")==0 ){
2261           psp->declargslot = &psp->gp->vardest;
2262 	}else if( strcmp(x,"token_prefix")==0 ){
2263           psp->declargslot = &psp->gp->tokenprefix;
2264           psp->insertLineMacro = 0;
2265 	}else if( strcmp(x,"syntax_error")==0 ){
2266           psp->declargslot = &(psp->gp->error);
2267 	}else if( strcmp(x,"parse_accept")==0 ){
2268           psp->declargslot = &(psp->gp->accept);
2269 	}else if( strcmp(x,"parse_failure")==0 ){
2270           psp->declargslot = &(psp->gp->failure);
2271 	}else if( strcmp(x,"stack_overflow")==0 ){
2272           psp->declargslot = &(psp->gp->overflow);
2273         }else if( strcmp(x,"extra_argument")==0 ){
2274           psp->declargslot = &(psp->gp->arg);
2275           psp->insertLineMacro = 0;
2276         }else if( strcmp(x,"token_type")==0 ){
2277           psp->declargslot = &(psp->gp->tokentype);
2278           psp->insertLineMacro = 0;
2279         }else if( strcmp(x,"default_type")==0 ){
2280           psp->declargslot = &(psp->gp->vartype);
2281           psp->insertLineMacro = 0;
2282         }else if( strcmp(x,"stack_size")==0 ){
2283           psp->declargslot = &(psp->gp->stacksize);
2284           psp->insertLineMacro = 0;
2285         }else if( strcmp(x,"start_symbol")==0 ){
2286           psp->declargslot = &(psp->gp->start);
2287           psp->insertLineMacro = 0;
2288         }else if( strcmp(x,"left")==0 ){
2289           psp->preccounter++;
2290           psp->declassoc = LEFT;
2291           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2292         }else if( strcmp(x,"right")==0 ){
2293           psp->preccounter++;
2294           psp->declassoc = RIGHT;
2295           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2296         }else if( strcmp(x,"nonassoc")==0 ){
2297           psp->preccounter++;
2298           psp->declassoc = NONE;
2299           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2300 	}else if( strcmp(x,"destructor")==0 ){
2301           psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL;
2302 	}else if( strcmp(x,"type")==0 ){
2303           psp->state = WAITING_FOR_DATATYPE_SYMBOL;
2304         }else if( strcmp(x,"fallback")==0 ){
2305           psp->fallback = 0;
2306           psp->state = WAITING_FOR_FALLBACK_ID;
2307         }else if( strcmp(x,"wildcard")==0 ){
2308           psp->state = WAITING_FOR_WILDCARD_ID;
2309         }else{
2310           ErrorMsg(psp->filename,psp->tokenlineno,
2311             "Unknown declaration keyword: \"%%%s\".",x);
2312           psp->errorcnt++;
2313           psp->state = RESYNC_AFTER_DECL_ERROR;
2314 	}
2315       }else{
2316         ErrorMsg(psp->filename,psp->tokenlineno,
2317           "Illegal declaration keyword: \"%s\".",x);
2318         psp->errorcnt++;
2319         psp->state = RESYNC_AFTER_DECL_ERROR;
2320       }
2321       break;
2322     case WAITING_FOR_DESTRUCTOR_SYMBOL:
2323       if( !isalpha(x[0]) ){
2324         ErrorMsg(psp->filename,psp->tokenlineno,
2325           "Symbol name missing after %%destructor keyword");
2326         psp->errorcnt++;
2327         psp->state = RESYNC_AFTER_DECL_ERROR;
2328       }else{
2329         struct symbol *sp = Symbol_new(x);
2330         psp->declargslot = &sp->destructor;
2331         psp->decllinenoslot = &sp->destLineno;
2332         psp->insertLineMacro = 1;
2333         psp->state = WAITING_FOR_DECL_ARG;
2334       }
2335       break;
2336     case WAITING_FOR_DATATYPE_SYMBOL:
2337       if( !isalpha(x[0]) ){
2338         ErrorMsg(psp->filename,psp->tokenlineno,
2339           "Symbol name missing after %%destructor keyword");
2340         psp->errorcnt++;
2341         psp->state = RESYNC_AFTER_DECL_ERROR;
2342       }else{
2343         struct symbol *sp = Symbol_new(x);
2344         psp->declargslot = &sp->datatype;
2345         psp->insertLineMacro = 0;
2346         psp->state = WAITING_FOR_DECL_ARG;
2347       }
2348       break;
2349     case WAITING_FOR_PRECEDENCE_SYMBOL:
2350       if( x[0]=='.' ){
2351         psp->state = WAITING_FOR_DECL_OR_RULE;
2352       }else if( isupper(x[0]) ){
2353         struct symbol *sp;
2354         sp = Symbol_new(x);
2355         if( sp->prec>=0 ){
2356           ErrorMsg(psp->filename,psp->tokenlineno,
2357             "Symbol \"%s\" has already be given a precedence.",x);
2358           psp->errorcnt++;
2359 	}else{
2360           sp->prec = psp->preccounter;
2361           sp->assoc = psp->declassoc;
2362 	}
2363       }else{
2364         ErrorMsg(psp->filename,psp->tokenlineno,
2365           "Can't assign a precedence to \"%s\".",x);
2366         psp->errorcnt++;
2367       }
2368       break;
2369     case WAITING_FOR_DECL_ARG:
2370       if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){
2371         char *zOld, *zNew, *zBuf, *z;
2372         int nOld, n, nLine, nNew, nBack;
2373         int addLineMacro;
2374         char zLine[50];
2375         zNew = x;
2376         if( zNew[0]=='"' || zNew[0]=='{' ) zNew++;
2377         nNew = lemonStrlen(zNew);
2378         if( *psp->declargslot ){
2379           zOld = *psp->declargslot;
2380         }else{
2381           zOld = "";
2382         }
2383         nOld = lemonStrlen(zOld);
2384         n = nOld + nNew + 20;
2385         addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro &&
2386                         (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0);
2387         if( addLineMacro ){
2388           for(z=psp->filename, nBack=0; *z; z++){
2389             if( *z=='\\' ) nBack++;
2390           }
2391           sprintf(zLine, "#line %d ", psp->tokenlineno);
2392           nLine = lemonStrlen(zLine);
2393           n += nLine + lemonStrlen(psp->filename) + nBack;
2394         }
2395         *psp->declargslot = zBuf = realloc(*psp->declargslot, n);
2396         zBuf += nOld;
2397         if( addLineMacro ){
2398           if( nOld && zBuf[-1]!='\n' ){
2399             *(zBuf++) = '\n';
2400           }
2401           memcpy(zBuf, zLine, nLine);
2402           zBuf += nLine;
2403           *(zBuf++) = '"';
2404           for(z=psp->filename; *z; z++){
2405             if( *z=='\\' ){
2406               *(zBuf++) = '\\';
2407             }
2408             *(zBuf++) = *z;
2409           }
2410           *(zBuf++) = '"';
2411           *(zBuf++) = '\n';
2412         }
2413         if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){
2414           psp->decllinenoslot[0] = psp->tokenlineno;
2415         }
2416         memcpy(zBuf, zNew, nNew);
2417         zBuf += nNew;
2418         *zBuf = 0;
2419         psp->state = WAITING_FOR_DECL_OR_RULE;
2420       }else{
2421         ErrorMsg(psp->filename,psp->tokenlineno,
2422           "Illegal argument to %%%s: %s",psp->declkeyword,x);
2423         psp->errorcnt++;
2424         psp->state = RESYNC_AFTER_DECL_ERROR;
2425       }
2426       break;
2427     case WAITING_FOR_FALLBACK_ID:
2428       if( x[0]=='.' ){
2429         psp->state = WAITING_FOR_DECL_OR_RULE;
2430       }else if( !isupper(x[0]) ){
2431         ErrorMsg(psp->filename, psp->tokenlineno,
2432           "%%fallback argument \"%s\" should be a token", x);
2433         psp->errorcnt++;
2434       }else{
2435         struct symbol *sp = Symbol_new(x);
2436         if( psp->fallback==0 ){
2437           psp->fallback = sp;
2438         }else if( sp->fallback ){
2439           ErrorMsg(psp->filename, psp->tokenlineno,
2440             "More than one fallback assigned to token %s", x);
2441           psp->errorcnt++;
2442         }else{
2443           sp->fallback = psp->fallback;
2444           psp->gp->has_fallback = 1;
2445         }
2446       }
2447       break;
2448     case WAITING_FOR_WILDCARD_ID:
2449       if( x[0]=='.' ){
2450         psp->state = WAITING_FOR_DECL_OR_RULE;
2451       }else if( !isupper(x[0]) ){
2452         ErrorMsg(psp->filename, psp->tokenlineno,
2453           "%%wildcard argument \"%s\" should be a token", x);
2454         psp->errorcnt++;
2455       }else{
2456         struct symbol *sp = Symbol_new(x);
2457         if( psp->gp->wildcard==0 ){
2458           psp->gp->wildcard = sp;
2459         }else{
2460           ErrorMsg(psp->filename, psp->tokenlineno,
2461             "Extra wildcard to token: %s", x);
2462           psp->errorcnt++;
2463         }
2464       }
2465       break;
2466     case RESYNC_AFTER_RULE_ERROR:
2467 /*      if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2468 **      break; */
2469     case RESYNC_AFTER_DECL_ERROR:
2470       if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2471       if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
2472       break;
2473   }
2474 }
2475 
2476 /* Run the preprocessor over the input file text.  The global variables
2477 ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined
2478 ** macros.  This routine looks for "%ifdef" and "%ifndef" and "%endif" and
2479 ** comments them out.  Text in between is also commented out as appropriate.
2480 */
preprocess_input(char * z)2481 static void preprocess_input(char *z){
2482   int i, j, k, n;
2483   int exclude = 0;
2484   int start = 0;
2485   int lineno = 1;
2486   int start_lineno = 1;
2487   for(i=0; z[i]; i++){
2488     if( z[i]=='\n' ) lineno++;
2489     if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue;
2490     if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){
2491       if( exclude ){
2492         exclude--;
2493         if( exclude==0 ){
2494           for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' ';
2495         }
2496       }
2497       for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2498     }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6]))
2499           || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){
2500       if( exclude ){
2501         exclude++;
2502       }else{
2503         for(j=i+7; isspace(z[j]); j++){}
2504         for(n=0; z[j+n] && !isspace(z[j+n]); n++){}
2505         exclude = 1;
2506         for(k=0; k<nDefine; k++){
2507           if( strncmp(azDefine[k],&z[j],n)==0 && lemonStrlen(azDefine[k])==n ){
2508             exclude = 0;
2509             break;
2510           }
2511         }
2512         if( z[i+3]=='n' ) exclude = !exclude;
2513         if( exclude ){
2514           start = i;
2515           start_lineno = lineno;
2516         }
2517       }
2518       for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2519     }
2520   }
2521   if( exclude ){
2522     fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno);
2523     exit(1);
2524   }
2525 }
2526 
2527 /* In spite of its name, this function is really a scanner.  It read
2528 ** in the entire input file (all at once) then tokenizes it.  Each
2529 ** token is passed to the function "parseonetoken" which builds all
2530 ** the appropriate data structures in the global state vector "gp".
2531 */
Parse(gp)2532 void Parse(gp)
2533 struct lemon *gp;
2534 {
2535   struct pstate ps;
2536   FILE *fp;
2537   char *filebuf;
2538   int filesize;
2539   int lineno;
2540   int c;
2541   char *cp, *nextcp;
2542   int startline = 0;
2543 
2544   memset(&ps, '\0', sizeof(ps));
2545   ps.gp = gp;
2546   ps.filename = gp->filename;
2547   ps.errorcnt = 0;
2548   ps.state = INITIALIZE;
2549 
2550   /* Begin by reading the input file */
2551   fp = fopen(ps.filename,"rb");
2552   if( fp==0 ){
2553     ErrorMsg(ps.filename,0,"Can't open this file for reading.");
2554     gp->errorcnt++;
2555     return;
2556   }
2557   fseek(fp,0,2);
2558   filesize = ftell(fp);
2559   if( filesize==-1 ){
2560     ErrorMsg(ps.filename,0,"Couldn't read size of this file.");
2561     fclose(fp);
2562     gp->errorcnt++;
2563     return;
2564   }
2565   rewind(fp);
2566   filebuf = (char *)malloc( filesize+1 );
2567   if( filebuf==0 ){
2568     ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.",
2569       filesize+1);
2570     fclose(fp);
2571     gp->errorcnt++;
2572     return;
2573   }
2574   if( fread(filebuf,1,filesize,fp)!=(size_t)filesize ){
2575     ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.",
2576       filesize);
2577     fclose(fp);
2578     free(filebuf);
2579     gp->errorcnt++;
2580     return;
2581   }
2582   fclose(fp);
2583   filebuf[filesize] = 0;
2584 
2585   /* Make an initial pass through the file to handle %ifdef and %ifndef */
2586   preprocess_input(filebuf);
2587 
2588   /* Now scan the text of the input file */
2589   lineno = 1;
2590   for(cp=filebuf; (c= *cp)!=0; ){
2591     if( c=='\n' ) lineno++;              /* Keep track of the line number */
2592     if( isspace(c) ){ cp++; continue; }  /* Skip all white space */
2593     if( c=='/' && cp[1]=='/' ){          /* Skip C++ style comments */
2594       cp+=2;
2595       while( (c= *cp)!=0 && c!='\n' ) cp++;
2596       continue;
2597     }
2598     if( c=='/' && cp[1]=='*' ){          /* Skip C style comments */
2599       cp+=2;
2600       while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){
2601         if( c=='\n' ) lineno++;
2602         cp++;
2603       }
2604       if( c ) cp++;
2605       continue;
2606     }
2607     ps.tokenstart = cp;                /* Mark the beginning of the token */
2608     ps.tokenlineno = lineno;           /* Linenumber on which token begins */
2609     if( c=='\"' ){                     /* String literals */
2610       cp++;
2611       while( (c= *cp)!=0 && c!='\"' ){
2612         if( c=='\n' ) lineno++;
2613         cp++;
2614       }
2615       if( c==0 ){
2616         ErrorMsg(ps.filename,startline,
2617 "String starting on this line is not terminated before the end of the file.");
2618         ps.errorcnt++;
2619         nextcp = cp;
2620       }else{
2621         nextcp = cp+1;
2622       }
2623     }else if( c=='{' ){               /* A block of C code */
2624       int level;
2625       cp++;
2626       for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){
2627         if( c=='\n' ) lineno++;
2628         else if( c=='{' ) level++;
2629         else if( c=='}' ) level--;
2630         else if( c=='/' && cp[1]=='*' ){  /* Skip comments */
2631           int prevc;
2632           cp = &cp[2];
2633           prevc = 0;
2634           while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){
2635             if( c=='\n' ) lineno++;
2636             prevc = c;
2637             cp++;
2638 	  }
2639 	}else if( c=='/' && cp[1]=='/' ){  /* Skip C++ style comments too */
2640           cp = &cp[2];
2641           while( (c= *cp)!=0 && c!='\n' ) cp++;
2642           if( c ) lineno++;
2643 	}else if( c=='\'' || c=='\"' ){    /* String a character literals */
2644           int startchar, prevc;
2645           startchar = c;
2646           prevc = 0;
2647           for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){
2648             if( c=='\n' ) lineno++;
2649             if( prevc=='\\' ) prevc = 0;
2650             else              prevc = c;
2651 	  }
2652 	}
2653       }
2654       if( c==0 ){
2655         ErrorMsg(ps.filename,ps.tokenlineno,
2656 "C code starting on this line is not terminated before the end of the file.");
2657         ps.errorcnt++;
2658         nextcp = cp;
2659       }else{
2660         nextcp = cp+1;
2661       }
2662     }else if( isalnum(c) ){          /* Identifiers */
2663       while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2664       nextcp = cp;
2665     }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
2666       cp += 3;
2667       nextcp = cp;
2668     }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){
2669       cp += 2;
2670       while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2671       nextcp = cp;
2672     }else{                          /* All other (one character) operators */
2673       cp++;
2674       nextcp = cp;
2675     }
2676     c = *cp;
2677     *cp = 0;                        /* Null terminate the token */
2678     parseonetoken(&ps);             /* Parse the token */
2679     *cp = c;                        /* Restore the buffer */
2680     cp = nextcp;
2681   }
2682   free(filebuf);                    /* Release the buffer after parsing */
2683   gp->rule = ps.firstrule;
2684   gp->errorcnt = ps.errorcnt;
2685 }
2686 /*************************** From the file "plink.c" *********************/
2687 /*
2688 ** Routines processing configuration follow-set propagation links
2689 ** in the LEMON parser generator.
2690 */
2691 static struct plink *plink_freelist = 0;
2692 
2693 /* Allocate a new plink */
Plink_new()2694 struct plink *Plink_new(){
2695   struct plink *new;
2696 
2697   if( plink_freelist==0 ){
2698     int i;
2699     int amt = 100;
2700     plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) );
2701     if( plink_freelist==0 ){
2702       fprintf(stderr,
2703       "Unable to allocate memory for a new follow-set propagation link.\n");
2704       exit(1);
2705     }
2706     for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1];
2707     plink_freelist[amt-1].next = 0;
2708   }
2709   new = plink_freelist;
2710   plink_freelist = plink_freelist->next;
2711   return new;
2712 }
2713 
2714 /* Add a plink to a plink list */
Plink_add(plpp,cfp)2715 void Plink_add(plpp,cfp)
2716 struct plink **plpp;
2717 struct config *cfp;
2718 {
2719   struct plink *new;
2720   new = Plink_new();
2721   new->next = *plpp;
2722   *plpp = new;
2723   new->cfp = cfp;
2724 }
2725 
2726 /* Transfer every plink on the list "from" to the list "to" */
Plink_copy(to,from)2727 void Plink_copy(to,from)
2728 struct plink **to;
2729 struct plink *from;
2730 {
2731   struct plink *nextpl;
2732   while( from ){
2733     nextpl = from->next;
2734     from->next = *to;
2735     *to = from;
2736     from = nextpl;
2737   }
2738 }
2739 
2740 /* Delete every plink on the list */
Plink_delete(plp)2741 void Plink_delete(plp)
2742 struct plink *plp;
2743 {
2744   struct plink *nextpl;
2745 
2746   while( plp ){
2747     nextpl = plp->next;
2748     plp->next = plink_freelist;
2749     plink_freelist = plp;
2750     plp = nextpl;
2751   }
2752 }
2753 /*********************** From the file "report.c" **************************/
2754 /*
2755 ** Procedures for generating reports and tables in the LEMON parser generator.
2756 */
2757 
2758 /* Generate a filename with the given suffix.  Space to hold the
2759 ** name comes from malloc() and must be freed by the calling
2760 ** function.
2761 */
file_makename(lemp,suffix)2762 PRIVATE char *file_makename(lemp,suffix)
2763 struct lemon *lemp;
2764 char *suffix;
2765 {
2766   char *name;
2767   char *cp;
2768 
2769   name = malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 );
2770   if( name==0 ){
2771     fprintf(stderr,"Can't allocate space for a filename.\n");
2772     exit(1);
2773   }
2774   strcpy(name,lemp->filename);
2775   cp = strrchr(name,'.');
2776   if( cp ) *cp = 0;
2777   strcat(name,suffix);
2778   return name;
2779 }
2780 
2781 /* Open a file with a name based on the name of the input file,
2782 ** but with a different (specified) suffix, and return a pointer
2783 ** to the stream */
file_open(lemp,suffix,mode)2784 PRIVATE FILE *file_open(lemp,suffix,mode)
2785 struct lemon *lemp;
2786 char *suffix;
2787 char *mode;
2788 {
2789   FILE *fp;
2790 
2791   if( lemp->outname ) free(lemp->outname);
2792   lemp->outname = file_makename(lemp, suffix);
2793   fp = fopen(lemp->outname,mode);
2794   if( fp==0 && *mode=='w' ){
2795     fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
2796     lemp->errorcnt++;
2797     return 0;
2798   }
2799   return fp;
2800 }
2801 
2802 /* Duplicate the input file without comments and without actions
2803 ** on rules */
Reprint(lemp)2804 void Reprint(lemp)
2805 struct lemon *lemp;
2806 {
2807   struct rule *rp;
2808   struct symbol *sp;
2809   int i, j, maxlen, len, ncolumns, skip;
2810   printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename);
2811   maxlen = 10;
2812   for(i=0; i<lemp->nsymbol; i++){
2813     sp = lemp->symbols[i];
2814     len = lemonStrlen(sp->name);
2815     if( len>maxlen ) maxlen = len;
2816   }
2817   ncolumns = 76/(maxlen+5);
2818   if( ncolumns<1 ) ncolumns = 1;
2819   skip = (lemp->nsymbol + ncolumns - 1)/ncolumns;
2820   for(i=0; i<skip; i++){
2821     printf("//");
2822     for(j=i; j<lemp->nsymbol; j+=skip){
2823       sp = lemp->symbols[j];
2824       assert( sp->index==j );
2825       printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
2826     }
2827     printf("\n");
2828   }
2829   for(rp=lemp->rule; rp; rp=rp->next){
2830     printf("%s",rp->lhs->name);
2831     /*    if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
2832     printf(" ::=");
2833     for(i=0; i<rp->nrhs; i++){
2834       sp = rp->rhs[i];
2835       printf(" %s", sp->name);
2836       if( sp->type==MULTITERMINAL ){
2837         for(j=1; j<sp->nsubsym; j++){
2838           printf("|%s", sp->subsym[j]->name);
2839         }
2840       }
2841       /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
2842     }
2843     printf(".");
2844     if( rp->precsym ) printf(" [%s]",rp->precsym->name);
2845     /* if( rp->code ) printf("\n    %s",rp->code); */
2846     printf("\n");
2847   }
2848 }
2849 
ConfigPrint(fp,cfp)2850 void ConfigPrint(fp,cfp)
2851 FILE *fp;
2852 struct config *cfp;
2853 {
2854   struct rule *rp;
2855   struct symbol *sp;
2856   int i, j;
2857   rp = cfp->rp;
2858   fprintf(fp,"%s ::=",rp->lhs->name);
2859   for(i=0; i<=rp->nrhs; i++){
2860     if( i==cfp->dot ) fprintf(fp," *");
2861     if( i==rp->nrhs ) break;
2862     sp = rp->rhs[i];
2863     fprintf(fp," %s", sp->name);
2864     if( sp->type==MULTITERMINAL ){
2865       for(j=1; j<sp->nsubsym; j++){
2866         fprintf(fp,"|%s",sp->subsym[j]->name);
2867       }
2868     }
2869   }
2870 }
2871 
2872 /* #define TEST */
2873 #if 0
2874 /* Print a set */
2875 PRIVATE void SetPrint(out,set,lemp)
2876 FILE *out;
2877 char *set;
2878 struct lemon *lemp;
2879 {
2880   int i;
2881   char *spacer;
2882   spacer = "";
2883   fprintf(out,"%12s[","");
2884   for(i=0; i<lemp->nterminal; i++){
2885     if( SetFind(set,i) ){
2886       fprintf(out,"%s%s",spacer,lemp->symbols[i]->name);
2887       spacer = " ";
2888     }
2889   }
2890   fprintf(out,"]\n");
2891 }
2892 
2893 /* Print a plink chain */
2894 PRIVATE void PlinkPrint(out,plp,tag)
2895 FILE *out;
2896 struct plink *plp;
2897 char *tag;
2898 {
2899   while( plp ){
2900     fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum);
2901     ConfigPrint(out,plp->cfp);
2902     fprintf(out,"\n");
2903     plp = plp->next;
2904   }
2905 }
2906 #endif
2907 
2908 /* Print an action to the given file descriptor.  Return FALSE if
2909 ** nothing was actually printed.
2910 */
PrintAction(struct action * ap,FILE * fp,int indent)2911 int PrintAction(struct action *ap, FILE *fp, int indent){
2912   int result = 1;
2913   switch( ap->type ){
2914     case SHIFT:
2915       fprintf(fp,"%*s shift  %d",indent,ap->sp->name,ap->x.stp->statenum);
2916       break;
2917     case REDUCE:
2918       fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index);
2919       break;
2920     case ACCEPT:
2921       fprintf(fp,"%*s accept",indent,ap->sp->name);
2922       break;
2923     case ERROR:
2924       fprintf(fp,"%*s error",indent,ap->sp->name);
2925       break;
2926     case SRCONFLICT:
2927     case RRCONFLICT:
2928       fprintf(fp,"%*s reduce %-3d ** Parsing conflict **",
2929         indent,ap->sp->name,ap->x.rp->index);
2930       break;
2931     case SSCONFLICT:
2932       fprintf(fp,"%*s shift  %d ** Parsing conflict **",
2933         indent,ap->sp->name,ap->x.stp->statenum);
2934       break;
2935     case SH_RESOLVED:
2936     case RD_RESOLVED:
2937     case NOT_USED:
2938       result = 0;
2939       break;
2940   }
2941   return result;
2942 }
2943 
2944 /* Generate the "y.output" log file */
ReportOutput(lemp)2945 void ReportOutput(lemp)
2946 struct lemon *lemp;
2947 {
2948   int i;
2949   struct state *stp;
2950   struct config *cfp;
2951   struct action *ap;
2952   FILE *fp;
2953 
2954   fp = file_open(lemp,".out","wb");
2955   if( fp==0 ) return;
2956   for(i=0; i<lemp->nstate; i++){
2957     stp = lemp->sorted[i];
2958     fprintf(fp,"State %d:\n",stp->statenum);
2959     if( lemp->basisflag ) cfp=stp->bp;
2960     else                  cfp=stp->cfp;
2961     while( cfp ){
2962       char buf[20];
2963       if( cfp->dot==cfp->rp->nrhs ){
2964         sprintf(buf,"(%d)",cfp->rp->index);
2965         fprintf(fp,"    %5s ",buf);
2966       }else{
2967         fprintf(fp,"          ");
2968       }
2969       ConfigPrint(fp,cfp);
2970       fprintf(fp,"\n");
2971 #if 0
2972       SetPrint(fp,cfp->fws,lemp);
2973       PlinkPrint(fp,cfp->fplp,"To  ");
2974       PlinkPrint(fp,cfp->bplp,"From");
2975 #endif
2976       if( lemp->basisflag ) cfp=cfp->bp;
2977       else                  cfp=cfp->next;
2978     }
2979     fprintf(fp,"\n");
2980     for(ap=stp->ap; ap; ap=ap->next){
2981       if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
2982     }
2983     fprintf(fp,"\n");
2984   }
2985   fprintf(fp, "----------------------------------------------------\n");
2986   fprintf(fp, "Symbols:\n");
2987   for(i=0; i<lemp->nsymbol; i++){
2988     int j;
2989     struct symbol *sp;
2990 
2991     sp = lemp->symbols[i];
2992     fprintf(fp, "  %3d: %s", i, sp->name);
2993     if( sp->type==NONTERMINAL ){
2994       fprintf(fp, ":");
2995       if( sp->lambda ){
2996         fprintf(fp, " <lambda>");
2997       }
2998       for(j=0; j<lemp->nterminal; j++){
2999         if( sp->firstset && SetFind(sp->firstset, j) ){
3000           fprintf(fp, " %s", lemp->symbols[j]->name);
3001         }
3002       }
3003     }
3004     fprintf(fp, "\n");
3005   }
3006   fclose(fp);
3007   return;
3008 }
3009 
3010 /* Search for the file "name" which is in the same directory as
3011 ** the executable */
pathsearch(argv0,name,modemask)3012 PRIVATE char *pathsearch(argv0,name,modemask)
3013 char *argv0;
3014 char *name;
3015 int modemask;
3016 {
3017   char *pathlist;
3018   char *path,*cp;
3019   char c;
3020 
3021 #ifdef __WIN32__
3022   cp = strrchr(argv0,'\\');
3023 #else
3024   cp = strrchr(argv0,'/');
3025 #endif
3026   if( cp ){
3027     c = *cp;
3028     *cp = 0;
3029     path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 );
3030     if( path ) sprintf(path,"%s/%s",argv0,name);
3031     *cp = c;
3032   }else{
3033     extern char *getenv();
3034     pathlist = getenv("PATH");
3035     if( pathlist==0 ) pathlist = ".:/bin:/usr/bin";
3036     path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 );
3037     if( path!=0 ){
3038       while( *pathlist ){
3039         cp = strchr(pathlist,':');
3040         if( cp==0 ) cp = &pathlist[lemonStrlen(pathlist)];
3041         c = *cp;
3042         *cp = 0;
3043         sprintf(path,"%s/%s",pathlist,name);
3044         *cp = c;
3045         if( c==0 ) pathlist = "";
3046         else pathlist = &cp[1];
3047         if( access(path,modemask)==0 ) break;
3048       }
3049     }
3050   }
3051   return path;
3052 }
3053 
3054 /* Given an action, compute the integer value for that action
3055 ** which is to be put in the action table of the generated machine.
3056 ** Return negative if no action should be generated.
3057 */
compute_action(lemp,ap)3058 PRIVATE int compute_action(lemp,ap)
3059 struct lemon *lemp;
3060 struct action *ap;
3061 {
3062   int act;
3063   switch( ap->type ){
3064     case SHIFT:  act = ap->x.stp->statenum;            break;
3065     case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
3066     case ERROR:  act = lemp->nstate + lemp->nrule;     break;
3067     case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
3068     default:     act = -1; break;
3069   }
3070   return act;
3071 }
3072 
3073 #define LINESIZE 1000
3074 /* The next cluster of routines are for reading the template file
3075 ** and writing the results to the generated parser */
3076 /* The first function transfers data from "in" to "out" until
3077 ** a line is seen which begins with "%%".  The line number is
3078 ** tracked.
3079 **
3080 ** if name!=0, then any word that begin with "Parse" is changed to
3081 ** begin with *name instead.
3082 */
tplt_xfer(name,in,out,lineno)3083 PRIVATE void tplt_xfer(name,in,out,lineno)
3084 char *name;
3085 FILE *in;
3086 FILE *out;
3087 int *lineno;
3088 {
3089   int i, iStart;
3090   char line[LINESIZE];
3091   while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){
3092     (*lineno)++;
3093     iStart = 0;
3094     if( name ){
3095       for(i=0; line[i]; i++){
3096         if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0
3097           && (i==0 || !isalpha(line[i-1]))
3098         ){
3099           if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]);
3100           fprintf(out,"%s",name);
3101           i += 4;
3102           iStart = i+1;
3103         }
3104       }
3105     }
3106     fprintf(out,"%s",&line[iStart]);
3107   }
3108 }
3109 
3110 /* The next function finds the template file and opens it, returning
3111 ** a pointer to the opened file. */
tplt_open(lemp)3112 PRIVATE FILE *tplt_open(lemp)
3113 struct lemon *lemp;
3114 {
3115   static char templatename[] = "lempar.c";
3116   char buf[1000];
3117   FILE *in;
3118   char *tpltname;
3119   char *cp;
3120   char *to_free = NULL;
3121 
3122   cp = strrchr(lemp->filename,'.');
3123   if( cp ){
3124     sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
3125   }else{
3126     sprintf(buf,"%s.lt",lemp->filename);
3127   }
3128   if( access(buf,004)==0 ){
3129     tpltname = buf;
3130   }else if( access(templatename,004)==0 ){
3131     tpltname = templatename;
3132   }else{
3133     tpltname = pathsearch(lemp->argv0,templatename,0);
3134     to_free = tpltname;
3135   }
3136   if( tpltname==0 ){
3137     fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
3138     templatename);
3139     lemp->errorcnt++;
3140     return 0;
3141   }
3142   in = fopen(tpltname,"rb");
3143   if (to_free) free(to_free);
3144   if( in==0 ){
3145     fprintf(stderr,"Can't open the template file \"%s\".\n",templatename);
3146     lemp->errorcnt++;
3147     return 0;
3148   }
3149   return in;
3150 }
3151 
3152 /* Print a #line directive line to the output file. */
tplt_linedir(out,lineno,filename)3153 PRIVATE void tplt_linedir(out,lineno,filename)
3154 FILE *out;
3155 int lineno;
3156 char *filename;
3157 {
3158   fprintf(out,"#line %d \"",lineno);
3159   while( *filename ){
3160     if( *filename == '\\' ) putc('\\',out);
3161     putc(*filename,out);
3162     filename++;
3163   }
3164   fprintf(out,"\"\n");
3165 }
3166 
3167 /* Print a string to the file and keep the linenumber up to date */
tplt_print(out,lemp,str,lineno)3168 PRIVATE void tplt_print(out,lemp,str,lineno)
3169 FILE *out;
3170 struct lemon *lemp;
3171 char *str;
3172 int *lineno;
3173 {
3174   if( str==0 ) return;
3175   while( *str ){
3176     putc(*str,out);
3177     if( *str=='\n' ) (*lineno)++;
3178     str++;
3179   }
3180   if( str[-1]!='\n' ){
3181     putc('\n',out);
3182     (*lineno)++;
3183   }
3184   if (!lemp->nolinenosflag) {
3185     (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3186   }
3187   return;
3188 }
3189 
3190 /*
3191 ** The following routine emits code for the destructor for the
3192 ** symbol sp
3193 */
emit_destructor_code(out,sp,lemp,lineno)3194 void emit_destructor_code(out,sp,lemp,lineno)
3195 FILE *out;
3196 struct symbol *sp;
3197 struct lemon *lemp;
3198 int *lineno;
3199 {
3200  char *cp = 0;
3201 
3202  if( sp->type==TERMINAL ){
3203    cp = lemp->tokendest;
3204    if( cp==0 ) return;
3205    fprintf(out,"{\n"); (*lineno)++;
3206  }else if( sp->destructor ){
3207    cp = sp->destructor;
3208    fprintf(out,"{\n"); (*lineno)++;
3209    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); }
3210  }else if( lemp->vardest ){
3211    cp = lemp->vardest;
3212    if( cp==0 ) return;
3213    fprintf(out,"{\n"); (*lineno)++;
3214  }else{
3215    assert( 0 );  /* Cannot happen */
3216  }
3217  for(; *cp; cp++){
3218    if( *cp=='$' && cp[1]=='$' ){
3219      fprintf(out,"(yypminor->yy%d)",sp->dtnum);
3220      cp++;
3221      continue;
3222    }
3223    if( *cp=='\n' ) (*lineno)++;
3224    fputc(*cp,out);
3225  }
3226  fprintf(out,"\n"); (*lineno)++;
3227  if (!lemp->nolinenosflag) {
3228    (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3229  }
3230  fprintf(out,"}\n"); (*lineno)++;
3231  return;
3232 }
3233 
3234 /*
3235 ** Return TRUE (non-zero) if the given symbol has a destructor.
3236 */
has_destructor(sp,lemp)3237 int has_destructor(sp, lemp)
3238 struct symbol *sp;
3239 struct lemon *lemp;
3240 {
3241   int ret;
3242   if( sp->type==TERMINAL ){
3243     ret = lemp->tokendest!=0;
3244   }else{
3245     ret = lemp->vardest!=0 || sp->destructor!=0;
3246   }
3247   return ret;
3248 }
3249 
3250 /*
3251 ** Append text to a dynamically allocated string.  If zText is 0 then
3252 ** reset the string to be empty again.  Always return the complete text
3253 ** of the string (which is overwritten with each call).
3254 **
3255 ** n bytes of zText are stored.  If n==0 then all of zText up to the first
3256 ** \000 terminator is stored.  zText can contain up to two instances of
3257 ** %d.  The values of p1 and p2 are written into the first and second
3258 ** %d.
3259 **
3260 ** If n==-1, then the previous character is overwritten.
3261 */
append_str(char * zText,int n,int p1,int p2)3262 PRIVATE char *append_str(char *zText, int n, int p1, int p2){
3263   static char *z = 0;
3264   static int alloced = 0;
3265   static int used = 0;
3266   int c;
3267   char zInt[40];
3268 
3269   if( zText==0 ){
3270     used = 0;
3271     return z;
3272   }
3273   if( n<=0 ){
3274     if( n<0 ){
3275       used += n;
3276       assert( used>=0 );
3277     }
3278     n = lemonStrlen(zText);
3279   }
3280   if( n+sizeof(zInt)*2+used >= (size_t)alloced ){
3281     alloced = n + sizeof(zInt)*2 + used + 200;
3282     z = realloc(z,  alloced);
3283   }
3284   if( z==0 ){
3285     fprintf(stderr,"Out of memory.\n");
3286     exit(1);
3287   }
3288   while( n-- > 0 ){
3289     c = *(zText++);
3290     if( c=='%' && n>0 && zText[0]=='d' ){
3291       sprintf(zInt, "%d", p1);
3292       p1 = p2;
3293       strcpy(&z[used], zInt);
3294       used += lemonStrlen(&z[used]);
3295       zText++;
3296       n--;
3297     }else{
3298       z[used++] = c;
3299     }
3300   }
3301   z[used] = 0;
3302   return z;
3303 }
3304 
3305 /*
3306 ** zCode is a string that is the action associated with a rule.  Expand
3307 ** the symbols in this string so that the refer to elements of the parser
3308 ** stack.
3309 */
translate_code(struct lemon * lemp,struct rule * rp)3310 PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){
3311   char *cp, *xp;
3312   int i;
3313   char lhsused = 0;    /* True if the LHS element has been used */
3314   char used[MAXRHS];   /* True for each RHS element which is used */
3315 
3316   for(i=0; i<rp->nrhs; i++) used[i] = 0;
3317   lhsused = 0;
3318 
3319   if( rp->code==0 ){
3320     rp->code = "\n";
3321     rp->line = rp->ruleline;
3322   }
3323 
3324   append_str(0,0,0,0);
3325   for(cp=rp->code; *cp; cp++){
3326     if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){
3327       char saved;
3328       for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++);
3329       saved = *xp;
3330       *xp = 0;
3331       if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){
3332         append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0);
3333         cp = xp;
3334         lhsused = 1;
3335       }else{
3336         for(i=0; i<rp->nrhs; i++){
3337           if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
3338             if( cp!=rp->code && cp[-1]=='@' ){
3339               /* If the argument is of the form @X then substituted
3340               ** the token number of X, not the value of X */
3341               append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0);
3342             }else{
3343               struct symbol *sp = rp->rhs[i];
3344               int dtnum;
3345               if( sp->type==MULTITERMINAL ){
3346                 dtnum = sp->subsym[0]->dtnum;
3347               }else{
3348                 dtnum = sp->dtnum;
3349               }
3350               append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum);
3351             }
3352             cp = xp;
3353             used[i] = 1;
3354             break;
3355           }
3356         }
3357       }
3358       *xp = saved;
3359     }
3360     append_str(cp, 1, 0, 0);
3361   } /* End loop */
3362 
3363   /* Check to make sure the LHS has been used */
3364   if( rp->lhsalias && !lhsused ){
3365     ErrorMsg(lemp->filename,rp->ruleline,
3366       "Label \"%s\" for \"%s(%s)\" is never used.",
3367         rp->lhsalias,rp->lhs->name,rp->lhsalias);
3368     lemp->errorcnt++;
3369   }
3370 
3371   /* Generate destructor code for RHS symbols which are not used in the
3372   ** reduce code */
3373   for(i=0; i<rp->nrhs; i++){
3374     if( rp->rhsalias[i] && !used[i] ){
3375       ErrorMsg(lemp->filename,rp->ruleline,
3376         "Label %s for \"%s(%s)\" is never used.",
3377         rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]);
3378       lemp->errorcnt++;
3379     }else if( rp->rhsalias[i]==0 ){
3380       if( has_destructor(rp->rhs[i],lemp) ){
3381         append_str("  yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0,
3382            rp->rhs[i]->index,i-rp->nrhs+1);
3383       }else{
3384         /* No destructor defined for this term */
3385       }
3386     }
3387   }
3388   if( rp->code ){
3389     cp = append_str(0,0,0,0);
3390     rp->code = Strsafe(cp?cp:"");
3391   }
3392 }
3393 
3394 /*
3395 ** Generate code which executes when the rule "rp" is reduced.  Write
3396 ** the code to "out".  Make sure lineno stays up-to-date.
3397 */
emit_code(out,rp,lemp,lineno)3398 PRIVATE void emit_code(out,rp,lemp,lineno)
3399 FILE *out;
3400 struct rule *rp;
3401 struct lemon *lemp;
3402 int *lineno;
3403 {
3404  char *cp;
3405 
3406  /* Generate code to do the reduce action */
3407  if( rp->code ){
3408    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); }
3409    fprintf(out,"{%s",rp->code);
3410    for(cp=rp->code; *cp; cp++){
3411      if( *cp=='\n' ) (*lineno)++;
3412    } /* End loop */
3413    fprintf(out,"}\n"); (*lineno)++;
3414    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); }
3415  } /* End if( rp->code ) */
3416 
3417  return;
3418 }
3419 
3420 /*
3421 ** Print the definition of the union used for the parser's data stack.
3422 ** This union contains fields for every possible data type for tokens
3423 ** and nonterminals.  In the process of computing and printing this
3424 ** union, also set the ".dtnum" field of every terminal and nonterminal
3425 ** symbol.
3426 */
print_stack_union(out,lemp,plineno,mhflag)3427 void print_stack_union(out,lemp,plineno,mhflag)
3428 FILE *out;                  /* The output stream */
3429 struct lemon *lemp;         /* The main info structure for this parser */
3430 int *plineno;               /* Pointer to the line number */
3431 int mhflag;                 /* True if generating makeheaders output */
3432 {
3433   int lineno = *plineno;    /* The line number of the output */
3434   char **types;             /* A hash table of datatypes */
3435   int arraysize;            /* Size of the "types" array */
3436   int maxdtlength;          /* Maximum length of any ".datatype" field. */
3437   char *stddt;              /* Standardized name for a datatype */
3438   int i,j;                  /* Loop counters */
3439   int hash;                 /* For hashing the name of a type */
3440   char *name;               /* Name of the parser */
3441 
3442   /* Allocate and initialize types[] and allocate stddt[] */
3443   arraysize = lemp->nsymbol * 2;
3444   types = (char**)calloc( arraysize, sizeof(char*) );
3445   for(i=0; i<arraysize; i++) types[i] = 0;
3446   maxdtlength = 0;
3447   if( lemp->vartype ){
3448     maxdtlength = lemonStrlen(lemp->vartype);
3449   }
3450   for(i=0; i<lemp->nsymbol; i++){
3451     int len;
3452     struct symbol *sp = lemp->symbols[i];
3453     if( sp->datatype==0 ) continue;
3454     len = lemonStrlen(sp->datatype);
3455     if( len>maxdtlength ) maxdtlength = len;
3456   }
3457   stddt = (char*)malloc( maxdtlength*2 + 1 );
3458   if( types==0 || stddt==0 ){
3459     fprintf(stderr,"Out of memory.\n");
3460     exit(1);
3461   }
3462 
3463   /* Build a hash table of datatypes. The ".dtnum" field of each symbol
3464   ** is filled in with the hash index plus 1.  A ".dtnum" value of 0 is
3465   ** used for terminal symbols.  If there is no %default_type defined then
3466   ** 0 is also used as the .dtnum value for nonterminals which do not specify
3467   ** a datatype using the %type directive.
3468   */
3469   for(i=0; i<lemp->nsymbol; i++){
3470     struct symbol *sp = lemp->symbols[i];
3471     char *cp;
3472     if( sp==lemp->errsym ){
3473       sp->dtnum = arraysize+1;
3474       continue;
3475     }
3476     if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){
3477       sp->dtnum = 0;
3478       continue;
3479     }
3480     cp = sp->datatype;
3481     if( cp==0 ) cp = lemp->vartype;
3482     j = 0;
3483     while( isspace(*cp) ) cp++;
3484     while( *cp ) stddt[j++] = *cp++;
3485     while( j>0 && isspace(stddt[j-1]) ) j--;
3486     stddt[j] = 0;
3487     if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){
3488       sp->dtnum = 0;
3489       continue;
3490     }
3491     hash = 0;
3492     for(j=0; stddt[j]; j++){
3493       hash = hash*53 + stddt[j];
3494     }
3495     hash = (hash & 0x7fffffff)%arraysize;
3496     while( types[hash] ){
3497       if( strcmp(types[hash],stddt)==0 ){
3498         sp->dtnum = hash + 1;
3499         break;
3500       }
3501       hash++;
3502       if( hash>=arraysize ) hash = 0;
3503     }
3504     if( types[hash]==0 ){
3505       sp->dtnum = hash + 1;
3506       types[hash] = (char*)malloc( lemonStrlen(stddt)+1 );
3507       if( types[hash]==0 ){
3508         fprintf(stderr,"Out of memory.\n");
3509         exit(1);
3510       }
3511       strcpy(types[hash],stddt);
3512     }
3513   }
3514 
3515   /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */
3516   name = lemp->name ? lemp->name : "Parse";
3517   lineno = *plineno;
3518   if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; }
3519   fprintf(out,"#define %sTOKENTYPE %s\n",name,
3520     lemp->tokentype?lemp->tokentype:"void*");  lineno++;
3521   if( mhflag ){ fprintf(out,"#endif\n"); lineno++; }
3522   fprintf(out,"typedef union {\n"); lineno++;
3523   fprintf(out,"  int yyinit;\n"); lineno++;
3524   fprintf(out,"  %sTOKENTYPE yy0;\n",name); lineno++;
3525   for(i=0; i<arraysize; i++){
3526     if( types[i]==0 ) continue;
3527     fprintf(out,"  %s yy%d;\n",types[i],i+1); lineno++;
3528     free(types[i]);
3529   }
3530   if( lemp->errsym->useCnt ){
3531     fprintf(out,"  int yy%d;\n",lemp->errsym->dtnum); lineno++;
3532   }
3533   free(stddt);
3534   free(types);
3535   fprintf(out,"} YYMINORTYPE;\n"); lineno++;
3536   *plineno = lineno;
3537 }
3538 
3539 /*
3540 ** Return the name of a C datatype able to represent values between
3541 ** lwr and upr, inclusive.
3542 */
minimum_size_type(int lwr,int upr)3543 static const char *minimum_size_type(int lwr, int upr){
3544   if( lwr>=0 ){
3545     if( upr<=255 ){
3546       return "unsigned char";
3547     }else if( upr<65535 ){
3548       return "unsigned short int";
3549     }else{
3550       return "unsigned int";
3551     }
3552   }else if( lwr>=-127 && upr<=127 ){
3553     return "signed char";
3554   }else if( lwr>=-32767 && upr<32767 ){
3555     return "short";
3556   }else{
3557     return "int";
3558   }
3559 }
3560 
3561 /*
3562 ** Each state contains a set of token transaction and a set of
3563 ** nonterminal transactions.  Each of these sets makes an instance
3564 ** of the following structure.  An array of these structures is used
3565 ** to order the creation of entries in the yy_action[] table.
3566 */
3567 struct axset {
3568   struct state *stp;   /* A pointer to a state */
3569   int isTkn;           /* True to use tokens.  False for non-terminals */
3570   int nAction;         /* Number of actions */
3571 };
3572 
3573 /*
3574 ** Compare to axset structures for sorting purposes
3575 */
axset_compare(const void * a,const void * b)3576 static int axset_compare(const void *a, const void *b){
3577   struct axset *p1 = (struct axset*)a;
3578   struct axset *p2 = (struct axset*)b;
3579   return p2->nAction - p1->nAction;
3580 }
3581 
3582 /*
3583 ** Write text on "out" that describes the rule "rp".
3584 */
writeRuleText(FILE * out,struct rule * rp)3585 static void writeRuleText(FILE *out, struct rule *rp){
3586   int j;
3587   fprintf(out,"%s ::=", rp->lhs->name);
3588   for(j=0; j<rp->nrhs; j++){
3589     struct symbol *sp = rp->rhs[j];
3590     fprintf(out," %s", sp->name);
3591     if( sp->type==MULTITERMINAL ){
3592       int k;
3593       for(k=1; k<sp->nsubsym; k++){
3594         fprintf(out,"|%s",sp->subsym[k]->name);
3595       }
3596     }
3597   }
3598 }
3599 
3600 
3601 /* Generate C source code for the parser */
ReportTable(lemp,mhflag)3602 void ReportTable(lemp, mhflag)
3603 struct lemon *lemp;
3604 int mhflag;     /* Output in makeheaders format if true */
3605 {
3606   FILE *out, *in;
3607   char line[LINESIZE];
3608   int  lineno;
3609   struct state *stp;
3610   struct action *ap;
3611   struct rule *rp;
3612   struct acttab *pActtab;
3613   int i, j, n;
3614   char *name;
3615   int mnTknOfst, mxTknOfst;
3616   int mnNtOfst, mxNtOfst;
3617   struct axset *ax;
3618 
3619   in = tplt_open(lemp);
3620   if( in==0 ) return;
3621   if( output_filename!=0 ){
3622     char *tmp = lemp->filename;
3623     char *ext = strrchr(output_filename, '.');
3624     if( ext==0 ) ext = ".c";
3625     lemp->filename = output_filename;
3626     out = file_open(lemp,ext,"wb");
3627     lemp->filename = tmp;
3628   }else{
3629     out = file_open(lemp,".c","wb");
3630   }
3631   if( out==0 ){
3632     fclose(in);
3633     return;
3634   }
3635   lineno = 1;
3636   tplt_xfer(lemp->name,in,out,&lineno);
3637 
3638   /* Generate the include code, if any */
3639   tplt_print(out,lemp,lemp->include,&lineno);
3640   if( mhflag ){
3641     char *name = file_makename(lemp, ".h");
3642     fprintf(out,"#include \"%s\"\n", name); lineno++;
3643     free(name);
3644   }
3645   tplt_xfer(lemp->name,in,out,&lineno);
3646 
3647   /* Generate #defines for all tokens */
3648   if( mhflag ){
3649     char *prefix;
3650     fprintf(out,"#if INTERFACE\n"); lineno++;
3651     if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3652     else                    prefix = "";
3653     for(i=1; i<lemp->nterminal; i++){
3654       fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3655       lineno++;
3656     }
3657     fprintf(out,"#endif\n"); lineno++;
3658   }
3659   tplt_xfer(lemp->name,in,out,&lineno);
3660 
3661   /* Generate the defines */
3662   fprintf(out,"#define YYCODETYPE %s\n",
3663     minimum_size_type(0, lemp->nsymbol+1)); lineno++;
3664   fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1);  lineno++;
3665   fprintf(out,"#define YYACTIONTYPE %s\n",
3666     minimum_size_type(0, lemp->nstate+lemp->nrule+5));  lineno++;
3667   if( lemp->wildcard ){
3668     fprintf(out,"#define YYWILDCARD %d\n",
3669        lemp->wildcard->index); lineno++;
3670   }
3671   print_stack_union(out,lemp,&lineno,mhflag);
3672   fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++;
3673   if( lemp->stacksize ){
3674     fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize);  lineno++;
3675   }else{
3676     fprintf(out,"#define YYSTACKDEPTH 100\n");  lineno++;
3677   }
3678   fprintf(out, "#endif\n"); lineno++;
3679   if( mhflag ){
3680     fprintf(out,"#if INTERFACE\n"); lineno++;
3681   }
3682   name = lemp->name ? lemp->name : "Parse";
3683   if( lemp->arg && lemp->arg[0] ){
3684     int i;
3685     i = lemonStrlen(lemp->arg);
3686     while( i>=1 && isspace(lemp->arg[i-1]) ) i--;
3687     while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--;
3688     fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg);  lineno++;
3689     fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg);  lineno++;
3690     fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n",
3691                  name,lemp->arg,&lemp->arg[i]);  lineno++;
3692     fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n",
3693                  name,&lemp->arg[i],&lemp->arg[i]);  lineno++;
3694   }else{
3695     fprintf(out,"#define %sARG_SDECL\n",name);  lineno++;
3696     fprintf(out,"#define %sARG_PDECL\n",name);  lineno++;
3697     fprintf(out,"#define %sARG_FETCH\n",name); lineno++;
3698     fprintf(out,"#define %sARG_STORE\n",name); lineno++;
3699   }
3700   if( mhflag ){
3701     fprintf(out,"#endif\n"); lineno++;
3702   }
3703   fprintf(out,"#define YYNSTATE %d\n",lemp->nstate);  lineno++;
3704   fprintf(out,"#define YYNRULE %d\n",lemp->nrule);  lineno++;
3705   if( lemp->errsym->useCnt ){
3706     fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index);  lineno++;
3707     fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum);  lineno++;
3708   }
3709   if( lemp->has_fallback ){
3710     fprintf(out,"#define YYFALLBACK 1\n");  lineno++;
3711   }
3712   tplt_xfer(lemp->name,in,out,&lineno);
3713 
3714   /* Generate the action table and its associates:
3715   **
3716   **  yy_action[]        A single table containing all actions.
3717   **  yy_lookahead[]     A table containing the lookahead for each entry in
3718   **                     yy_action.  Used to detect hash collisions.
3719   **  yy_shift_ofst[]    For each state, the offset into yy_action for
3720   **                     shifting terminals.
3721   **  yy_reduce_ofst[]   For each state, the offset into yy_action for
3722   **                     shifting non-terminals after a reduce.
3723   **  yy_default[]       Default action for each state.
3724   */
3725 
3726   /* Compute the actions on all states and count them up */
3727   ax = calloc(lemp->nstate*2, sizeof(ax[0]));
3728   if( ax==0 ){
3729     fprintf(stderr,"malloc failed\n");
3730     exit(1);
3731   }
3732   for(i=0; i<lemp->nstate; i++){
3733     stp = lemp->sorted[i];
3734     ax[i*2].stp = stp;
3735     ax[i*2].isTkn = 1;
3736     ax[i*2].nAction = stp->nTknAct;
3737     ax[i*2+1].stp = stp;
3738     ax[i*2+1].isTkn = 0;
3739     ax[i*2+1].nAction = stp->nNtAct;
3740   }
3741   mxTknOfst = mnTknOfst = 0;
3742   mxNtOfst = mnNtOfst = 0;
3743 
3744   /* Compute the action table.  In order to try to keep the size of the
3745   ** action table to a minimum, the heuristic of placing the largest action
3746   ** sets first is used.
3747   */
3748   qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
3749   pActtab = acttab_alloc();
3750   for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
3751     stp = ax[i].stp;
3752     if( ax[i].isTkn ){
3753       for(ap=stp->ap; ap; ap=ap->next){
3754         int action;
3755         if( ap->sp->index>=lemp->nterminal ) continue;
3756         action = compute_action(lemp, ap);
3757         if( action<0 ) continue;
3758         acttab_action(pActtab, ap->sp->index, action);
3759       }
3760       stp->iTknOfst = acttab_insert(pActtab);
3761       if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
3762       if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
3763     }else{
3764       for(ap=stp->ap; ap; ap=ap->next){
3765         int action;
3766         if( ap->sp->index<lemp->nterminal ) continue;
3767         if( ap->sp->index==lemp->nsymbol ) continue;
3768         action = compute_action(lemp, ap);
3769         if( action<0 ) continue;
3770         acttab_action(pActtab, ap->sp->index, action);
3771       }
3772       stp->iNtOfst = acttab_insert(pActtab);
3773       if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
3774       if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
3775     }
3776   }
3777   free(ax);
3778 
3779   /* Output the yy_action table */
3780   fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++;
3781   n = acttab_size(pActtab);
3782   for(i=j=0; i<n; i++){
3783     int action = acttab_yyaction(pActtab, i);
3784     if( action<0 ) action = lemp->nstate + lemp->nrule + 2;
3785     if( j==0 ) fprintf(out," /* %5d */ ", i);
3786     fprintf(out, " %4d,", action);
3787     if( j==9 || i==n-1 ){
3788       fprintf(out, "\n"); lineno++;
3789       j = 0;
3790     }else{
3791       j++;
3792     }
3793   }
3794   fprintf(out, "};\n"); lineno++;
3795 
3796   /* Output the yy_lookahead table */
3797   fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++;
3798   for(i=j=0; i<n; i++){
3799     int la = acttab_yylookahead(pActtab, i);
3800     if( la<0 ) la = lemp->nsymbol;
3801     if( j==0 ) fprintf(out," /* %5d */ ", i);
3802     fprintf(out, " %4d,", la);
3803     if( j==9 || i==n-1 ){
3804       fprintf(out, "\n"); lineno++;
3805       j = 0;
3806     }else{
3807       j++;
3808     }
3809   }
3810   fprintf(out, "};\n"); lineno++;
3811 
3812   /* Output the yy_shift_ofst[] table */
3813   fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++;
3814   n = lemp->nstate;
3815   while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--;
3816   fprintf(out, "#define YY_SHIFT_MAX %d\n", n-1); lineno++;
3817   fprintf(out, "static const %s yy_shift_ofst[] = {\n",
3818           minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++;
3819   for(i=j=0; i<n; i++){
3820     int ofst;
3821     stp = lemp->sorted[i];
3822     ofst = stp->iTknOfst;
3823     if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1;
3824     if( j==0 ) fprintf(out," /* %5d */ ", i);
3825     fprintf(out, " %4d,", ofst);
3826     if( j==9 || i==n-1 ){
3827       fprintf(out, "\n"); lineno++;
3828       j = 0;
3829     }else{
3830       j++;
3831     }
3832   }
3833   fprintf(out, "};\n"); lineno++;
3834 
3835   /* Output the yy_reduce_ofst[] table */
3836   fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
3837   n = lemp->nstate;
3838   while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--;
3839   fprintf(out, "#define YY_REDUCE_MAX %d\n", n-1); lineno++;
3840   fprintf(out, "static const %s yy_reduce_ofst[] = {\n",
3841           minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++;
3842   for(i=j=0; i<n; i++){
3843     int ofst;
3844     stp = lemp->sorted[i];
3845     ofst = stp->iNtOfst;
3846     if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1;
3847     if( j==0 ) fprintf(out," /* %5d */ ", i);
3848     fprintf(out, " %4d,", ofst);
3849     if( j==9 || i==n-1 ){
3850       fprintf(out, "\n"); lineno++;
3851       j = 0;
3852     }else{
3853       j++;
3854     }
3855   }
3856   fprintf(out, "};\n"); lineno++;
3857 
3858   /* Output the default action table */
3859   fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++;
3860   n = lemp->nstate;
3861   for(i=j=0; i<n; i++){
3862     stp = lemp->sorted[i];
3863     if( j==0 ) fprintf(out," /* %5d */ ", i);
3864     fprintf(out, " %4d,", stp->iDflt);
3865     if( j==9 || i==n-1 ){
3866       fprintf(out, "\n"); lineno++;
3867       j = 0;
3868     }else{
3869       j++;
3870     }
3871   }
3872   fprintf(out, "};\n"); lineno++;
3873   tplt_xfer(lemp->name,in,out,&lineno);
3874 
3875   /* Generate the table of fallback tokens.
3876   */
3877   if( lemp->has_fallback ){
3878     int mx = lemp->nterminal - 1;
3879     while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; }
3880     for(i=0; i<=mx; i++){
3881       struct symbol *p = lemp->symbols[i];
3882       if( p->fallback==0 ){
3883         fprintf(out, "    0,  /* %10s => nothing */\n", p->name);
3884       }else{
3885         fprintf(out, "  %3d,  /* %10s => %s */\n", p->fallback->index,
3886           p->name, p->fallback->name);
3887       }
3888       lineno++;
3889     }
3890   }
3891   tplt_xfer(lemp->name, in, out, &lineno);
3892 
3893   /* Generate a table containing the symbolic name of every symbol
3894   */
3895   for(i=0; i<lemp->nsymbol; i++){
3896     sprintf(line,"\"%s\",",lemp->symbols[i]->name);
3897     fprintf(out,"  %-15s",line);
3898     if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
3899   }
3900   if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
3901   tplt_xfer(lemp->name,in,out,&lineno);
3902 
3903   /* Generate a table containing a text string that describes every
3904   ** rule in the rule set of the grammar.  This information is used
3905   ** when tracing REDUCE actions.
3906   */
3907   for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
3908     assert( rp->index==i );
3909     fprintf(out," /* %3d */ \"", i);
3910     writeRuleText(out, rp);
3911     fprintf(out,"\",\n"); lineno++;
3912   }
3913   tplt_xfer(lemp->name,in,out,&lineno);
3914 
3915   /* Generate code which executes every time a symbol is popped from
3916   ** the stack while processing errors or while destroying the parser.
3917   ** (In other words, generate the %destructor actions)
3918   */
3919   if( lemp->tokendest ){
3920     int once = 1;
3921     for(i=0; i<lemp->nsymbol; i++){
3922       struct symbol *sp = lemp->symbols[i];
3923       if( sp==0 || sp->type!=TERMINAL ) continue;
3924       if( once ){
3925         fprintf(out, "      /* TERMINAL Destructor */\n"); lineno++;
3926         once = 0;
3927       }
3928       fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3929     }
3930     for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++);
3931     if( i<lemp->nsymbol ){
3932       emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3933       fprintf(out,"      break;\n"); lineno++;
3934     }
3935   }
3936   if( lemp->vardest ){
3937     struct symbol *dflt_sp = 0;
3938     int once = 1;
3939     for(i=0; i<lemp->nsymbol; i++){
3940       struct symbol *sp = lemp->symbols[i];
3941       if( sp==0 || sp->type==TERMINAL ||
3942           sp->index<=0 || sp->destructor!=0 ) continue;
3943       if( once ){
3944         fprintf(out, "      /* Default NON-TERMINAL Destructor */\n"); lineno++;
3945         once = 0;
3946       }
3947       fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3948       dflt_sp = sp;
3949     }
3950     if( dflt_sp!=0 ){
3951       emit_destructor_code(out,dflt_sp,lemp,&lineno);
3952     }
3953     fprintf(out,"      break;\n"); lineno++;
3954   }
3955   for(i=0; i<lemp->nsymbol; i++){
3956     struct symbol *sp = lemp->symbols[i];
3957     if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue;
3958     fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3959 
3960     /* Combine duplicate destructors into a single case */
3961     for(j=i+1; j<lemp->nsymbol; j++){
3962       struct symbol *sp2 = lemp->symbols[j];
3963       if( sp2 && sp2->type!=TERMINAL && sp2->destructor
3964           && sp2->dtnum==sp->dtnum
3965           && strcmp(sp->destructor,sp2->destructor)==0 ){
3966          fprintf(out,"    case %d: /* %s */\n",
3967                  sp2->index, sp2->name); lineno++;
3968          sp2->destructor = 0;
3969       }
3970     }
3971 
3972     emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3973     fprintf(out,"      break;\n"); lineno++;
3974   }
3975   tplt_xfer(lemp->name,in,out,&lineno);
3976 
3977   /* Generate code which executes whenever the parser stack overflows */
3978   tplt_print(out,lemp,lemp->overflow,&lineno);
3979   tplt_xfer(lemp->name,in,out,&lineno);
3980 
3981   /* Generate the table of rule information
3982   **
3983   ** Note: This code depends on the fact that rules are number
3984   ** sequentually beginning with 0.
3985   */
3986   for(rp=lemp->rule; rp; rp=rp->next){
3987     fprintf(out,"  { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++;
3988   }
3989   tplt_xfer(lemp->name,in,out,&lineno);
3990 
3991   /* Generate code which execution during each REDUCE action */
3992   for(rp=lemp->rule; rp; rp=rp->next){
3993     translate_code(lemp, rp);
3994   }
3995   /* First output rules other than the default: rule */
3996   for(rp=lemp->rule; rp; rp=rp->next){
3997     struct rule *rp2;               /* Other rules with the same action */
3998     if( rp->code==0 ) continue;
3999     if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */
4000     fprintf(out,"      case %d: /* ", rp->index);
4001     writeRuleText(out, rp);
4002     fprintf(out, " */\n"); lineno++;
4003     for(rp2=rp->next; rp2; rp2=rp2->next){
4004       if( rp2->code==rp->code ){
4005         fprintf(out,"      case %d: /* ", rp2->index);
4006         writeRuleText(out, rp2);
4007         fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->index); lineno++;
4008         rp2->code = 0;
4009       }
4010     }
4011     emit_code(out,rp,lemp,&lineno);
4012     fprintf(out,"        break;\n"); lineno++;
4013     rp->code = 0;
4014   }
4015   /* Finally, output the default: rule.  We choose as the default: all
4016   ** empty actions. */
4017   fprintf(out,"      default:\n"); lineno++;
4018   for(rp=lemp->rule; rp; rp=rp->next){
4019     if( rp->code==0 ) continue;
4020     assert( rp->code[0]=='\n' && rp->code[1]==0 );
4021     fprintf(out,"      /* (%d) ", rp->index);
4022     writeRuleText(out, rp);
4023     fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->index); lineno++;
4024   }
4025   fprintf(out,"        break;\n"); lineno++;
4026   tplt_xfer(lemp->name,in,out,&lineno);
4027 
4028   /* Generate code which executes if a parse fails */
4029   tplt_print(out,lemp,lemp->failure,&lineno);
4030   tplt_xfer(lemp->name,in,out,&lineno);
4031 
4032   /* Generate code which executes when a syntax error occurs */
4033   tplt_print(out,lemp,lemp->error,&lineno);
4034   tplt_xfer(lemp->name,in,out,&lineno);
4035 
4036   /* Generate code which executes when the parser accepts its input */
4037   tplt_print(out,lemp,lemp->accept,&lineno);
4038   tplt_xfer(lemp->name,in,out,&lineno);
4039 
4040   /* Append any addition code the user desires */
4041   tplt_print(out,lemp,lemp->extracode,&lineno);
4042 
4043   fclose(in);
4044   fclose(out);
4045   return;
4046 }
4047 
4048 /* Generate a header file for the parser */
ReportHeader(lemp)4049 void ReportHeader(lemp)
4050 struct lemon *lemp;
4051 {
4052   FILE *out, *in;
4053   char *prefix;
4054   char line[LINESIZE];
4055   char pattern[LINESIZE];
4056   int i;
4057 
4058   if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
4059   else                    prefix = "";
4060   if( output_header_filename!=0 ){
4061     char *tmp = lemp->filename;
4062     char *ext = strrchr(output_header_filename, '.');
4063     if( ext==0 ) ext = ".h";
4064     lemp->filename = output_header_filename;
4065     in = file_open(lemp,ext,"rb");
4066     lemp->filename = tmp;
4067   }else{
4068     in = file_open(lemp,".h","rb");
4069   }
4070   if( in ){
4071     int nextChar;
4072     for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
4073       sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
4074       if( strcmp(line,pattern) ) break;
4075     }
4076     nextChar = fgetc(in);
4077     fclose(in);
4078     if( i==lemp->nterminal && nextChar==EOF ){
4079       /* No change in the file.  Don't rewrite it. */
4080       return;
4081     }
4082   }
4083   if( output_header_filename!=0 ){
4084     char *tmp = lemp->filename;
4085     char *ext = strrchr(output_header_filename, '.');
4086     if( ext==0 ) ext = ".h";
4087     lemp->filename = output_header_filename;
4088     out = file_open(lemp,ext,"wb");
4089     lemp->filename = tmp;
4090   }else{
4091     out = file_open(lemp,".h","wb");
4092   }
4093   if( out ){
4094     for(i=1; i<lemp->nterminal; i++){
4095       fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
4096     }
4097     fclose(out);
4098   }
4099   return;
4100 }
4101 
4102 /* Reduce the size of the action tables, if possible, by making use
4103 ** of defaults.
4104 **
4105 ** In this version, we take the most frequent REDUCE action and make
4106 ** it the default.  Except, there is no default if the wildcard token
4107 ** is a possible look-ahead.
4108 */
CompressTables(lemp)4109 void CompressTables(lemp)
4110 struct lemon *lemp;
4111 {
4112   struct state *stp;
4113   struct action *ap, *ap2;
4114   struct rule *rp, *rp2, *rbest;
4115   int nbest, n;
4116   int i;
4117   int usesWildcard;
4118 
4119   for(i=0; i<lemp->nstate; i++){
4120     stp = lemp->sorted[i];
4121     nbest = 0;
4122     rbest = 0;
4123     usesWildcard = 0;
4124 
4125     for(ap=stp->ap; ap; ap=ap->next){
4126       if( ap->type==SHIFT && ap->sp==lemp->wildcard ){
4127         usesWildcard = 1;
4128       }
4129       if( ap->type!=REDUCE ) continue;
4130       rp = ap->x.rp;
4131       if( rp->lhsStart ) continue;
4132       if( rp==rbest ) continue;
4133       n = 1;
4134       for(ap2=ap->next; ap2; ap2=ap2->next){
4135         if( ap2->type!=REDUCE ) continue;
4136         rp2 = ap2->x.rp;
4137         if( rp2==rbest ) continue;
4138         if( rp2==rp ) n++;
4139       }
4140       if( n>nbest ){
4141         nbest = n;
4142         rbest = rp;
4143       }
4144     }
4145 
4146     /* Do not make a default if the number of rules to default
4147     ** is not at least 1 or if the wildcard token is a possible
4148     ** lookahead.
4149     */
4150     if( nbest<1 || usesWildcard ) continue;
4151 
4152 
4153     /* Combine matching REDUCE actions into a single default */
4154     for(ap=stp->ap; ap; ap=ap->next){
4155       if( ap->type==REDUCE && ap->x.rp==rbest ) break;
4156     }
4157     assert( ap );
4158     ap->sp = Symbol_new("{default}");
4159     for(ap=ap->next; ap; ap=ap->next){
4160       if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED;
4161     }
4162     stp->ap = Action_sort(stp->ap);
4163   }
4164 }
4165 
4166 
4167 /*
4168 ** Compare two states for sorting purposes.  The smaller state is the
4169 ** one with the most non-terminal actions.  If they have the same number
4170 ** of non-terminal actions, then the smaller is the one with the most
4171 ** token actions.
4172 */
stateResortCompare(const void * a,const void * b)4173 static int stateResortCompare(const void *a, const void *b){
4174   const struct state *pA = *(const struct state**)a;
4175   const struct state *pB = *(const struct state**)b;
4176   int n;
4177 
4178   n = pB->nNtAct - pA->nNtAct;
4179   if( n==0 ){
4180     n = pB->nTknAct - pA->nTknAct;
4181   }
4182   return n;
4183 }
4184 
4185 
4186 /*
4187 ** Renumber and resort states so that states with fewer choices
4188 ** occur at the end.  Except, keep state 0 as the first state.
4189 */
ResortStates(lemp)4190 void ResortStates(lemp)
4191 struct lemon *lemp;
4192 {
4193   int i;
4194   struct state *stp;
4195   struct action *ap;
4196 
4197   for(i=0; i<lemp->nstate; i++){
4198     stp = lemp->sorted[i];
4199     stp->nTknAct = stp->nNtAct = 0;
4200     stp->iDflt = lemp->nstate + lemp->nrule;
4201     stp->iTknOfst = NO_OFFSET;
4202     stp->iNtOfst = NO_OFFSET;
4203     for(ap=stp->ap; ap; ap=ap->next){
4204       if( compute_action(lemp,ap)>=0 ){
4205         if( ap->sp->index<lemp->nterminal ){
4206           stp->nTknAct++;
4207         }else if( ap->sp->index<lemp->nsymbol ){
4208           stp->nNtAct++;
4209         }else{
4210           stp->iDflt = compute_action(lemp, ap);
4211         }
4212       }
4213     }
4214   }
4215   qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]),
4216         stateResortCompare);
4217   for(i=0; i<lemp->nstate; i++){
4218     lemp->sorted[i]->statenum = i;
4219   }
4220 }
4221 
4222 
4223 /***************** From the file "set.c" ************************************/
4224 /*
4225 ** Set manipulation routines for the LEMON parser generator.
4226 */
4227 
4228 static int size = 0;
4229 
4230 /* Set the set size */
SetSize(n)4231 void SetSize(n)
4232 int n;
4233 {
4234   size = n+1;
4235 }
4236 
4237 /* Allocate a new set */
SetNew()4238 char *SetNew(){
4239   char *s;
4240   s = (char*)calloc( size, 1);
4241   if( s==0 ){
4242     extern void memory_error();
4243     memory_error();
4244   }
4245   return s;
4246 }
4247 
4248 /* Deallocate a set */
SetFree(s)4249 void SetFree(s)
4250 char *s;
4251 {
4252   free(s);
4253 }
4254 
4255 /* Add a new element to the set.  Return TRUE if the element was added
4256 ** and FALSE if it was already there. */
SetAdd(s,e)4257 int SetAdd(s,e)
4258 char *s;
4259 int e;
4260 {
4261   int rv;
4262   assert( e>=0 && e<size );
4263   rv = s[e];
4264   s[e] = 1;
4265   return !rv;
4266 }
4267 
4268 /* Add every element of s2 to s1.  Return TRUE if s1 changes. */
SetUnion(s1,s2)4269 int SetUnion(s1,s2)
4270 char *s1;
4271 char *s2;
4272 {
4273   int i, progress;
4274   progress = 0;
4275   for(i=0; i<size; i++){
4276     if( s2[i]==0 ) continue;
4277     if( s1[i]==0 ){
4278       progress = 1;
4279       s1[i] = 1;
4280     }
4281   }
4282   return progress;
4283 }
4284 /********************** From the file "table.c" ****************************/
4285 /*
4286 ** All code in this file has been automatically generated
4287 ** from a specification in the file
4288 **              "table.q"
4289 ** by the associative array code building program "aagen".
4290 ** Do not edit this file!  Instead, edit the specification
4291 ** file, then rerun aagen.
4292 */
4293 /*
4294 ** Code for processing tables in the LEMON parser generator.
4295 */
4296 
strhash(x)4297 PRIVATE int strhash(x)
4298 char *x;
4299 {
4300   int h = 0;
4301   while( *x) h = h*13 + *(x++);
4302   return h;
4303 }
4304 
4305 /* Works like strdup, sort of.  Save a string in malloced memory, but
4306 ** keep strings in a table so that the same string is not in more
4307 ** than one place.
4308 */
Strsafe(y)4309 char *Strsafe(y)
4310 char *y;
4311 {
4312   char *z;
4313 
4314   if( y==0 ) return 0;
4315   z = Strsafe_find(y);
4316   if( z==0 && (z=malloc( lemonStrlen(y)+1 ))!=0 ){
4317     strcpy(z,y);
4318     Strsafe_insert(z);
4319   }
4320   MemoryCheck(z);
4321   return z;
4322 }
4323 
4324 /* There is one instance of the following structure for each
4325 ** associative array of type "x1".
4326 */
4327 struct s_x1 {
4328   int size;               /* The number of available slots. */
4329                           /*   Must be a power of 2 greater than or */
4330                           /*   equal to 1 */
4331   int count;              /* Number of currently slots filled */
4332   struct s_x1node *tbl;  /* The data stored here */
4333   struct s_x1node **ht;  /* Hash table for lookups */
4334 };
4335 
4336 /* There is one instance of this structure for every data element
4337 ** in an associative array of type "x1".
4338 */
4339 typedef struct s_x1node {
4340   char *data;                  /* The data */
4341   struct s_x1node *next;   /* Next entry with the same hash */
4342   struct s_x1node **from;  /* Previous link */
4343 } x1node;
4344 
4345 /* There is only one instance of the array, which is the following */
4346 static struct s_x1 *x1a;
4347 
4348 /* Allocate a new associative array */
Strsafe_init()4349 void Strsafe_init(){
4350   if( x1a ) return;
4351   x1a = (struct s_x1*)malloc( sizeof(struct s_x1) );
4352   if( x1a ){
4353     x1a->size = 1024;
4354     x1a->count = 0;
4355     x1a->tbl = (x1node*)malloc(
4356       (sizeof(x1node) + sizeof(x1node*))*1024 );
4357     if( x1a->tbl==0 ){
4358       free(x1a);
4359       x1a = 0;
4360     }else{
4361       int i;
4362       x1a->ht = (x1node**)&(x1a->tbl[1024]);
4363       for(i=0; i<1024; i++) x1a->ht[i] = 0;
4364     }
4365   }
4366 }
4367 /* Insert a new record into the array.  Return TRUE if successful.
4368 ** Prior data with the same key is NOT overwritten */
Strsafe_insert(data)4369 int Strsafe_insert(data)
4370 char *data;
4371 {
4372   x1node *np;
4373   int h;
4374   int ph;
4375 
4376   if( x1a==0 ) return 0;
4377   ph = strhash(data);
4378   h = ph & (x1a->size-1);
4379   np = x1a->ht[h];
4380   while( np ){
4381     if( strcmp(np->data,data)==0 ){
4382       /* An existing entry with the same key is found. */
4383       /* Fail because overwrite is not allows. */
4384       return 0;
4385     }
4386     np = np->next;
4387   }
4388   if( x1a->count>=x1a->size ){
4389     /* Need to make the hash table bigger */
4390     int i,size;
4391     struct s_x1 array;
4392     array.size = size = x1a->size*2;
4393     array.count = x1a->count;
4394     array.tbl = (x1node*)malloc(
4395       (sizeof(x1node) + sizeof(x1node*))*size );
4396     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4397     array.ht = (x1node**)&(array.tbl[size]);
4398     for(i=0; i<size; i++) array.ht[i] = 0;
4399     for(i=0; i<x1a->count; i++){
4400       x1node *oldnp, *newnp;
4401       oldnp = &(x1a->tbl[i]);
4402       h = strhash(oldnp->data) & (size-1);
4403       newnp = &(array.tbl[i]);
4404       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4405       newnp->next = array.ht[h];
4406       newnp->data = oldnp->data;
4407       newnp->from = &(array.ht[h]);
4408       array.ht[h] = newnp;
4409     }
4410     free(x1a->tbl);
4411     *x1a = array;
4412   }
4413   /* Insert the new data */
4414   h = ph & (x1a->size-1);
4415   np = &(x1a->tbl[x1a->count++]);
4416   np->data = data;
4417   if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next);
4418   np->next = x1a->ht[h];
4419   x1a->ht[h] = np;
4420   np->from = &(x1a->ht[h]);
4421   return 1;
4422 }
4423 
4424 /* Return a pointer to data assigned to the given key.  Return NULL
4425 ** if no such key. */
Strsafe_find(key)4426 char *Strsafe_find(key)
4427 char *key;
4428 {
4429   int h;
4430   x1node *np;
4431 
4432   if( x1a==0 ) return 0;
4433   h = strhash(key) & (x1a->size-1);
4434   np = x1a->ht[h];
4435   while( np ){
4436     if( strcmp(np->data,key)==0 ) break;
4437     np = np->next;
4438   }
4439   return np ? np->data : 0;
4440 }
4441 
4442 /* Return a pointer to the (terminal or nonterminal) symbol "x".
4443 ** Create a new symbol if this is the first time "x" has been seen.
4444 */
Symbol_new(x)4445 struct symbol *Symbol_new(x)
4446 char *x;
4447 {
4448   struct symbol *sp;
4449 
4450   sp = Symbol_find(x);
4451   if( sp==0 ){
4452     sp = (struct symbol *)calloc(1, sizeof(struct symbol) );
4453     MemoryCheck(sp);
4454     sp->name = Strsafe(x);
4455     sp->type = isupper(*x) ? TERMINAL : NONTERMINAL;
4456     sp->rule = 0;
4457     sp->fallback = 0;
4458     sp->prec = -1;
4459     sp->assoc = UNK;
4460     sp->firstset = 0;
4461     sp->lambda = LEMON_FALSE;
4462     sp->destructor = 0;
4463     sp->destLineno = 0;
4464     sp->datatype = 0;
4465     sp->useCnt = 0;
4466     Symbol_insert(sp,sp->name);
4467   }
4468   sp->useCnt++;
4469   return sp;
4470 }
4471 
4472 /* Compare two symbols for working purposes
4473 **
4474 ** Symbols that begin with upper case letters (terminals or tokens)
4475 ** must sort before symbols that begin with lower case letters
4476 ** (non-terminals).  Other than that, the order does not matter.
4477 **
4478 ** We find experimentally that leaving the symbols in their original
4479 ** order (the order they appeared in the grammar file) gives the
4480 ** smallest parser tables in SQLite.
4481 */
Symbolcmpp(const void * void_a,const void * void_b)4482 int Symbolcmpp(const void *void_a, const void *void_b){
4483   struct symbol *a = *(struct symbol **)void_a;
4484   struct symbol *b = *(struct symbol **)void_b;
4485   int i1 = a->index + 10000000*(a->name[0]>'Z');
4486   int i2 = b->index + 10000000*(b->name[0]>'Z');
4487   return i1-i2;
4488 }
4489 
4490 /* There is one instance of the following structure for each
4491 ** associative array of type "x2".
4492 */
4493 struct s_x2 {
4494   int size;               /* The number of available slots. */
4495                           /*   Must be a power of 2 greater than or */
4496                           /*   equal to 1 */
4497   int count;              /* Number of currently slots filled */
4498   struct s_x2node *tbl;  /* The data stored here */
4499   struct s_x2node **ht;  /* Hash table for lookups */
4500 };
4501 
4502 /* There is one instance of this structure for every data element
4503 ** in an associative array of type "x2".
4504 */
4505 typedef struct s_x2node {
4506   struct symbol *data;                  /* The data */
4507   char *key;                   /* The key */
4508   struct s_x2node *next;   /* Next entry with the same hash */
4509   struct s_x2node **from;  /* Previous link */
4510 } x2node;
4511 
4512 /* There is only one instance of the array, which is the following */
4513 static struct s_x2 *x2a;
4514 
4515 /* Allocate a new associative array */
Symbol_init()4516 void Symbol_init(){
4517   if( x2a ) return;
4518   x2a = (struct s_x2*)malloc( sizeof(struct s_x2) );
4519   if( x2a ){
4520     x2a->size = 128;
4521     x2a->count = 0;
4522     x2a->tbl = (x2node*)malloc(
4523       (sizeof(x2node) + sizeof(x2node*))*128 );
4524     if( x2a->tbl==0 ){
4525       free(x2a);
4526       x2a = 0;
4527     }else{
4528       int i;
4529       x2a->ht = (x2node**)&(x2a->tbl[128]);
4530       for(i=0; i<128; i++) x2a->ht[i] = 0;
4531     }
4532   }
4533 }
4534 /* Insert a new record into the array.  Return TRUE if successful.
4535 ** Prior data with the same key is NOT overwritten */
Symbol_insert(data,key)4536 int Symbol_insert(data,key)
4537 struct symbol *data;
4538 char *key;
4539 {
4540   x2node *np;
4541   int h;
4542   int ph;
4543 
4544   if( x2a==0 ) return 0;
4545   ph = strhash(key);
4546   h = ph & (x2a->size-1);
4547   np = x2a->ht[h];
4548   while( np ){
4549     if( strcmp(np->key,key)==0 ){
4550       /* An existing entry with the same key is found. */
4551       /* Fail because overwrite is not allows. */
4552       return 0;
4553     }
4554     np = np->next;
4555   }
4556   if( x2a->count>=x2a->size ){
4557     /* Need to make the hash table bigger */
4558     int i,size;
4559     struct s_x2 array;
4560     array.size = size = x2a->size*2;
4561     array.count = x2a->count;
4562     array.tbl = (x2node*)malloc(
4563       (sizeof(x2node) + sizeof(x2node*))*size );
4564     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4565     array.ht = (x2node**)&(array.tbl[size]);
4566     for(i=0; i<size; i++) array.ht[i] = 0;
4567     for(i=0; i<x2a->count; i++){
4568       x2node *oldnp, *newnp;
4569       oldnp = &(x2a->tbl[i]);
4570       h = strhash(oldnp->key) & (size-1);
4571       newnp = &(array.tbl[i]);
4572       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4573       newnp->next = array.ht[h];
4574       newnp->key = oldnp->key;
4575       newnp->data = oldnp->data;
4576       newnp->from = &(array.ht[h]);
4577       array.ht[h] = newnp;
4578     }
4579     free(x2a->tbl);
4580     *x2a = array;
4581   }
4582   /* Insert the new data */
4583   h = ph & (x2a->size-1);
4584   np = &(x2a->tbl[x2a->count++]);
4585   np->key = key;
4586   np->data = data;
4587   if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next);
4588   np->next = x2a->ht[h];
4589   x2a->ht[h] = np;
4590   np->from = &(x2a->ht[h]);
4591   return 1;
4592 }
4593 
4594 /* Return a pointer to data assigned to the given key.  Return NULL
4595 ** if no such key. */
Symbol_find(key)4596 struct symbol *Symbol_find(key)
4597 char *key;
4598 {
4599   int h;
4600   x2node *np;
4601 
4602   if( x2a==0 ) return 0;
4603   h = strhash(key) & (x2a->size-1);
4604   np = x2a->ht[h];
4605   while( np ){
4606     if( strcmp(np->key,key)==0 ) break;
4607     np = np->next;
4608   }
4609   return np ? np->data : 0;
4610 }
4611 
4612 /* Return the n-th data.  Return NULL if n is out of range. */
Symbol_Nth(n)4613 struct symbol *Symbol_Nth(n)
4614 int n;
4615 {
4616   struct symbol *data;
4617   if( x2a && n>0 && n<=x2a->count ){
4618     data = x2a->tbl[n-1].data;
4619   }else{
4620     data = 0;
4621   }
4622   return data;
4623 }
4624 
4625 /* Return the size of the array */
Symbol_count()4626 int Symbol_count()
4627 {
4628   return x2a ? x2a->count : 0;
4629 }
4630 
4631 /* Return an array of pointers to all data in the table.
4632 ** The array is obtained from malloc.  Return NULL if memory allocation
4633 ** problems, or if the array is empty. */
Symbol_arrayof()4634 struct symbol **Symbol_arrayof()
4635 {
4636   struct symbol **array;
4637   int i,size;
4638   if( x2a==0 ) return 0;
4639   size = x2a->count;
4640   array = (struct symbol **)calloc(size, sizeof(struct symbol *));
4641   if( array ){
4642     for(i=0; i<size; i++) array[i] = x2a->tbl[i].data;
4643   }
4644   return array;
4645 }
4646 
4647 /* Compare two configurations */
Configcmp(a,b)4648 int Configcmp(a,b)
4649 struct config *a;
4650 struct config *b;
4651 {
4652   int x;
4653   x = a->rp->index - b->rp->index;
4654   if( x==0 ) x = a->dot - b->dot;
4655   return x;
4656 }
4657 
4658 /* Compare two states */
statecmp(a,b)4659 PRIVATE int statecmp(a,b)
4660 struct config *a;
4661 struct config *b;
4662 {
4663   int rc;
4664   for(rc=0; rc==0 && a && b;  a=a->bp, b=b->bp){
4665     rc = a->rp->index - b->rp->index;
4666     if( rc==0 ) rc = a->dot - b->dot;
4667   }
4668   if( rc==0 ){
4669     if( a ) rc = 1;
4670     if( b ) rc = -1;
4671   }
4672   return rc;
4673 }
4674 
4675 /* Hash a state */
statehash(a)4676 PRIVATE int statehash(a)
4677 struct config *a;
4678 {
4679   int h=0;
4680   while( a ){
4681     h = h*571 + a->rp->index*37 + a->dot;
4682     a = a->bp;
4683   }
4684   return h;
4685 }
4686 
4687 /* Allocate a new state structure */
State_new()4688 struct state *State_new()
4689 {
4690   struct state *new;
4691   new = (struct state *)calloc(1, sizeof(struct state) );
4692   MemoryCheck(new);
4693   return new;
4694 }
4695 
4696 /* There is one instance of the following structure for each
4697 ** associative array of type "x3".
4698 */
4699 struct s_x3 {
4700   int size;               /* The number of available slots. */
4701                           /*   Must be a power of 2 greater than or */
4702                           /*   equal to 1 */
4703   int count;              /* Number of currently slots filled */
4704   struct s_x3node *tbl;  /* The data stored here */
4705   struct s_x3node **ht;  /* Hash table for lookups */
4706 };
4707 
4708 /* There is one instance of this structure for every data element
4709 ** in an associative array of type "x3".
4710 */
4711 typedef struct s_x3node {
4712   struct state *data;                  /* The data */
4713   struct config *key;                   /* The key */
4714   struct s_x3node *next;   /* Next entry with the same hash */
4715   struct s_x3node **from;  /* Previous link */
4716 } x3node;
4717 
4718 /* There is only one instance of the array, which is the following */
4719 static struct s_x3 *x3a;
4720 
4721 /* Allocate a new associative array */
State_init()4722 void State_init(){
4723   if( x3a ) return;
4724   x3a = (struct s_x3*)malloc( sizeof(struct s_x3) );
4725   if( x3a ){
4726     x3a->size = 128;
4727     x3a->count = 0;
4728     x3a->tbl = (x3node*)malloc(
4729       (sizeof(x3node) + sizeof(x3node*))*128 );
4730     if( x3a->tbl==0 ){
4731       free(x3a);
4732       x3a = 0;
4733     }else{
4734       int i;
4735       x3a->ht = (x3node**)&(x3a->tbl[128]);
4736       for(i=0; i<128; i++) x3a->ht[i] = 0;
4737     }
4738   }
4739 }
4740 /* Insert a new record into the array.  Return TRUE if successful.
4741 ** Prior data with the same key is NOT overwritten */
State_insert(data,key)4742 int State_insert(data,key)
4743 struct state *data;
4744 struct config *key;
4745 {
4746   x3node *np;
4747   int h;
4748   int ph;
4749 
4750   if( x3a==0 ) return 0;
4751   ph = statehash(key);
4752   h = ph & (x3a->size-1);
4753   np = x3a->ht[h];
4754   while( np ){
4755     if( statecmp(np->key,key)==0 ){
4756       /* An existing entry with the same key is found. */
4757       /* Fail because overwrite is not allows. */
4758       return 0;
4759     }
4760     np = np->next;
4761   }
4762   if( x3a->count>=x3a->size ){
4763     /* Need to make the hash table bigger */
4764     int i,size;
4765     struct s_x3 array;
4766     array.size = size = x3a->size*2;
4767     array.count = x3a->count;
4768     array.tbl = (x3node*)malloc(
4769       (sizeof(x3node) + sizeof(x3node*))*size );
4770     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4771     array.ht = (x3node**)&(array.tbl[size]);
4772     for(i=0; i<size; i++) array.ht[i] = 0;
4773     for(i=0; i<x3a->count; i++){
4774       x3node *oldnp, *newnp;
4775       oldnp = &(x3a->tbl[i]);
4776       h = statehash(oldnp->key) & (size-1);
4777       newnp = &(array.tbl[i]);
4778       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4779       newnp->next = array.ht[h];
4780       newnp->key = oldnp->key;
4781       newnp->data = oldnp->data;
4782       newnp->from = &(array.ht[h]);
4783       array.ht[h] = newnp;
4784     }
4785     free(x3a->tbl);
4786     *x3a = array;
4787   }
4788   /* Insert the new data */
4789   h = ph & (x3a->size-1);
4790   np = &(x3a->tbl[x3a->count++]);
4791   np->key = key;
4792   np->data = data;
4793   if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next);
4794   np->next = x3a->ht[h];
4795   x3a->ht[h] = np;
4796   np->from = &(x3a->ht[h]);
4797   return 1;
4798 }
4799 
4800 /* Return a pointer to data assigned to the given key.  Return NULL
4801 ** if no such key. */
State_find(key)4802 struct state *State_find(key)
4803 struct config *key;
4804 {
4805   int h;
4806   x3node *np;
4807 
4808   if( x3a==0 ) return 0;
4809   h = statehash(key) & (x3a->size-1);
4810   np = x3a->ht[h];
4811   while( np ){
4812     if( statecmp(np->key,key)==0 ) break;
4813     np = np->next;
4814   }
4815   return np ? np->data : 0;
4816 }
4817 
4818 /* Return an array of pointers to all data in the table.
4819 ** The array is obtained from malloc.  Return NULL if memory allocation
4820 ** problems, or if the array is empty. */
State_arrayof()4821 struct state **State_arrayof()
4822 {
4823   struct state **array;
4824   int i,size;
4825   if( x3a==0 ) return 0;
4826   size = x3a->count;
4827   array = (struct state **)malloc( sizeof(struct state *)*size );
4828   if( array ){
4829     for(i=0; i<size; i++) array[i] = x3a->tbl[i].data;
4830   }
4831   return array;
4832 }
4833 
4834 /* Hash a configuration */
confighash(a)4835 PRIVATE int confighash(a)
4836 struct config *a;
4837 {
4838   int h=0;
4839   h = h*571 + a->rp->index*37 + a->dot;
4840   return h;
4841 }
4842 
4843 /* There is one instance of the following structure for each
4844 ** associative array of type "x4".
4845 */
4846 struct s_x4 {
4847   int size;               /* The number of available slots. */
4848                           /*   Must be a power of 2 greater than or */
4849                           /*   equal to 1 */
4850   int count;              /* Number of currently slots filled */
4851   struct s_x4node *tbl;  /* The data stored here */
4852   struct s_x4node **ht;  /* Hash table for lookups */
4853 };
4854 
4855 /* There is one instance of this structure for every data element
4856 ** in an associative array of type "x4".
4857 */
4858 typedef struct s_x4node {
4859   struct config *data;                  /* The data */
4860   struct s_x4node *next;   /* Next entry with the same hash */
4861   struct s_x4node **from;  /* Previous link */
4862 } x4node;
4863 
4864 /* There is only one instance of the array, which is the following */
4865 static struct s_x4 *x4a;
4866 
4867 /* Allocate a new associative array */
Configtable_init()4868 void Configtable_init(){
4869   if( x4a ) return;
4870   x4a = (struct s_x4*)malloc( sizeof(struct s_x4) );
4871   if( x4a ){
4872     x4a->size = 64;
4873     x4a->count = 0;
4874     x4a->tbl = (x4node*)malloc(
4875       (sizeof(x4node) + sizeof(x4node*))*64 );
4876     if( x4a->tbl==0 ){
4877       free(x4a);
4878       x4a = 0;
4879     }else{
4880       int i;
4881       x4a->ht = (x4node**)&(x4a->tbl[64]);
4882       for(i=0; i<64; i++) x4a->ht[i] = 0;
4883     }
4884   }
4885 }
4886 /* Insert a new record into the array.  Return TRUE if successful.
4887 ** Prior data with the same key is NOT overwritten */
Configtable_insert(data)4888 int Configtable_insert(data)
4889 struct config *data;
4890 {
4891   x4node *np;
4892   int h;
4893   int ph;
4894 
4895   if( x4a==0 ) return 0;
4896   ph = confighash(data);
4897   h = ph & (x4a->size-1);
4898   np = x4a->ht[h];
4899   while( np ){
4900     if( Configcmp(np->data,data)==0 ){
4901       /* An existing entry with the same key is found. */
4902       /* Fail because overwrite is not allows. */
4903       return 0;
4904     }
4905     np = np->next;
4906   }
4907   if( x4a->count>=x4a->size ){
4908     /* Need to make the hash table bigger */
4909     int i,size;
4910     struct s_x4 array;
4911     array.size = size = x4a->size*2;
4912     array.count = x4a->count;
4913     array.tbl = (x4node*)malloc(
4914       (sizeof(x4node) + sizeof(x4node*))*size );
4915     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4916     array.ht = (x4node**)&(array.tbl[size]);
4917     for(i=0; i<size; i++) array.ht[i] = 0;
4918     for(i=0; i<x4a->count; i++){
4919       x4node *oldnp, *newnp;
4920       oldnp = &(x4a->tbl[i]);
4921       h = confighash(oldnp->data) & (size-1);
4922       newnp = &(array.tbl[i]);
4923       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4924       newnp->next = array.ht[h];
4925       newnp->data = oldnp->data;
4926       newnp->from = &(array.ht[h]);
4927       array.ht[h] = newnp;
4928     }
4929     free(x4a->tbl);
4930     *x4a = array;
4931   }
4932   /* Insert the new data */
4933   h = ph & (x4a->size-1);
4934   np = &(x4a->tbl[x4a->count++]);
4935   np->data = data;
4936   if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next);
4937   np->next = x4a->ht[h];
4938   x4a->ht[h] = np;
4939   np->from = &(x4a->ht[h]);
4940   return 1;
4941 }
4942 
4943 /* Return a pointer to data assigned to the given key.  Return NULL
4944 ** if no such key. */
Configtable_find(key)4945 struct config *Configtable_find(key)
4946 struct config *key;
4947 {
4948   int h;
4949   x4node *np;
4950 
4951   if( x4a==0 ) return 0;
4952   h = confighash(key) & (x4a->size-1);
4953   np = x4a->ht[h];
4954   while( np ){
4955     if( Configcmp(np->data,key)==0 ) break;
4956     np = np->next;
4957   }
4958   return np ? np->data : 0;
4959 }
4960 
4961 /* Remove all data from the table.  Pass each data to the function "f"
4962 ** as it is removed.  ("f" may be null to avoid this step.) */
4963 void Configtable_clear(f)
4964 int(*f)(/* struct config * */);
4965 {
4966   int i;
4967   if( x4a==0 || x4a->count==0 ) return;
4968   if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data);
4969   for(i=0; i<x4a->size; i++) x4a->ht[i] = 0;
4970   x4a->count = 0;
4971   return;
4972 }
4973