1 /*
2 ** This file contains all sources (including headers) to the LEMON
3 ** LALR(1) parser generator.  The sources have been combined into a
4 ** single file to make it easy to include LEMON in the source tree
5 ** and Makefile of another program.
6 **
7 ** The author of this program disclaims copyright.
8 */
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include <assert.h>
15 
16 #ifndef __WIN32__
17 #   if defined(_WIN32) || defined(WIN32)
18 #	define __WIN32__
19 #   endif
20 #endif
21 
22 #ifdef __WIN32__
23 extern int access();
24 #else
25 #include <unistd.h>
26 #endif
27 
28 /* #define PRIVATE static */
29 #define PRIVATE
30 
31 #ifdef TEST
32 #define MAXRHS 5       /* Set low to exercise exception code */
33 #else
34 #define MAXRHS 1000
35 #endif
36 
37 static int showPrecedenceConflict = 0;
38 static const char **made_files = NULL;
39 static int made_files_count = 0;
40 static int successful_exit = 0;
LemonAtExit(void)41 static void LemonAtExit(void)
42 {
43     /* if we failed, delete (most) files we made, to unconfuse build tools. */
44     int i;
45     for (i = 0; i < made_files_count; i++) {
46         if (!successful_exit) {
47             remove(made_files[i]);
48         }
49     }
50     free(made_files);
51     made_files_count = 0;
52     made_files = NULL;
53 }
54 
55 static char *msort(char*,char**,int(*)(const char*,const char*));
56 
57 /*
58 ** Compilers are getting increasingly pedantic about type conversions
59 ** as C evolves ever closer to Ada....  To work around the latest problems
60 ** we have to define the following variant of strlen().
61 */
62 #define lemonStrlen(X)   ((int)strlen(X))
63 
64 /* a few forward declarations... */
65 struct rule;
66 struct lemon;
67 struct action;
68 
69 static struct action *Action_new(void);
70 static struct action *Action_sort(struct action *);
71 
72 /********** From the file "build.h" ************************************/
73 void FindRulePrecedences();
74 void FindFirstSets();
75 void FindStates();
76 void FindLinks();
77 void FindFollowSets();
78 void FindActions();
79 
80 /********* From the file "configlist.h" *********************************/
81 void Configlist_init(void);
82 struct config *Configlist_add(struct rule *, int);
83 struct config *Configlist_addbasis(struct rule *, int);
84 void Configlist_closure(struct lemon *);
85 void Configlist_sort(void);
86 void Configlist_sortbasis(void);
87 struct config *Configlist_return(void);
88 struct config *Configlist_basis(void);
89 void Configlist_eat(struct config *);
90 void Configlist_reset(void);
91 
92 /********* From the file "error.h" ***************************************/
93 void ErrorMsg(const char *, int,const char *, ...);
94 
95 /****** From the file "option.h" ******************************************/
96 enum option_type { OPT_FLAG=1,  OPT_INT,  OPT_DBL,  OPT_STR,
97          OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR};
98 struct s_options {
99   enum option_type type;
100   const char *label;
101   char *arg;
102   const char *message;
103 };
104 int    OptInit(char**,struct s_options*,FILE*);
105 int    OptNArgs(void);
106 char  *OptArg(int);
107 void   OptErr(int);
108 void   OptPrint(void);
109 
110 /******** From the file "parse.h" *****************************************/
111 void Parse(struct lemon *lemp);
112 
113 /********* From the file "plink.h" ***************************************/
114 struct plink *Plink_new(void);
115 void Plink_add(struct plink **, struct config *);
116 void Plink_copy(struct plink **, struct plink *);
117 void Plink_delete(struct plink *);
118 
119 /********** From the file "report.h" *************************************/
120 void Reprint(struct lemon *);
121 void ReportOutput(struct lemon *);
122 void ReportTable(struct lemon *, int);
123 void ReportHeader(struct lemon *);
124 void CompressTables(struct lemon *);
125 void ResortStates(struct lemon *);
126 
127 /********** From the file "set.h" ****************************************/
128 void  SetSize(int);             /* All sets will be of size N */
129 char *SetNew(void);               /* A new set for element 0..N */
130 void  SetFree(char*);             /* Deallocate a set */
131 
132 char *SetNew(void);               /* A new set for element 0..N */
133 int SetAdd(char*,int);            /* Add element to a set */
134 int SetUnion(char *,char *);    /* A <- A U B, thru element N */
135 #define SetFind(X,Y) (X[Y])       /* True if Y is in set X */
136 
137 /********** From the file "struct.h" *************************************/
138 /*
139 ** Principal data structures for the LEMON parser generator.
140 */
141 
142 typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean;
143 
144 /* Symbols (terminals and nonterminals) of the grammar are stored
145 ** in the following: */
146 enum symbol_type {
147   TERMINAL,
148   NONTERMINAL,
149   MULTITERMINAL
150 };
151 enum e_assoc {
152     LEFT,
153     RIGHT,
154     NONE,
155     UNK
156 };
157 struct symbol {
158   const char *name;        /* Name of the symbol */
159   int index;               /* Index number for this symbol */
160   enum symbol_type type;   /* Symbols are all either TERMINALS or NTs */
161   struct rule *rule;       /* Linked list of rules of this (if an NT) */
162   struct symbol *fallback; /* fallback token in case this token doesn't parse */
163   int prec;                /* Precedence if defined (-1 otherwise) */
164   enum e_assoc assoc;      /* Associativity if precedence is defined */
165   char *firstset;          /* First-set for all rules of this symbol */
166   Boolean lambda;          /* True if NT and can generate an empty string */
167   int useCnt;              /* Number of times used */
168   char *destructor;        /* Code which executes whenever this symbol is
169                            ** popped from the stack during error processing */
170   int destLineno;          /* Line number for start of destructor */
171   char *datatype;          /* The data type of information held by this
172                            ** object. Only used if type==NONTERMINAL */
173   int dtnum;               /* The data type number.  In the parser, the value
174                            ** stack is a union.  The .yy%d element of this
175                            ** union is the correct data type for this object */
176   /* The following fields are used by MULTITERMINALs only */
177   int nsubsym;             /* Number of constituent symbols in the MULTI */
178   struct symbol **subsym;  /* Array of constituent symbols */
179 };
180 
181 /* Each production rule in the grammar is stored in the following
182 ** structure.  */
183 struct rule {
184   struct symbol *lhs;      /* Left-hand side of the rule */
185   const char *lhsalias;    /* Alias for the LHS (NULL if none) */
186   int lhsStart;            /* True if left-hand side is the start symbol */
187   int ruleline;            /* Line number for the rule */
188   int nrhs;                /* Number of RHS symbols */
189   struct symbol **rhs;     /* The RHS symbols */
190   const char **rhsalias;   /* An alias for each RHS symbol (NULL if none) */
191   int line;                /* Line number at which code begins */
192   const char *code;        /* The code executed when this rule is reduced */
193   struct symbol *precsym;  /* Precedence symbol for this rule */
194   int index;               /* An index number for this rule */
195   Boolean canReduce;       /* True if this rule is ever reduced */
196   struct rule *nextlhs;    /* Next rule with the same LHS */
197   struct rule *next;       /* Next rule in the global list */
198 };
199 
200 /* A configuration is a production rule of the grammar together with
201 ** a mark (dot) showing how much of that rule has been processed so far.
202 ** Configurations also contain a follow-set which is a list of terminal
203 ** symbols which are allowed to immediately follow the end of the rule.
204 ** Every configuration is recorded as an instance of the following: */
205 enum cfgstatus {
206   COMPLETE,
207   INCOMPLETE
208 };
209 struct config {
210   struct rule *rp;         /* The rule upon which the configuration is based */
211   int dot;                 /* The parse point */
212   char *fws;               /* Follow-set for this configuration only */
213   struct plink *fplp;      /* Follow-set forward propagation links */
214   struct plink *bplp;      /* Follow-set backwards propagation links */
215   struct state *stp;       /* Pointer to state which contains this */
216   enum cfgstatus status;   /* used during followset and shift computations */
217   struct config *next;     /* Next configuration in the state */
218   struct config *bp;       /* The next basis configuration */
219 };
220 
221 enum e_action {
222   SHIFT,
223   ACCEPT,
224   REDUCE,
225   ERROR,
226   SSCONFLICT,              /* A shift/shift conflict */
227   SRCONFLICT,              /* Was a reduce, but part of a conflict */
228   RRCONFLICT,              /* Was a reduce, but part of a conflict */
229   SH_RESOLVED,             /* Was a shift.  Precedence resolved conflict */
230   RD_RESOLVED,             /* Was reduce.  Precedence resolved conflict */
231   NOT_USED                 /* Deleted by compression */
232 };
233 
234 /* Every shift or reduce operation is stored as one of the following */
235 struct action {
236   struct symbol *sp;       /* The look-ahead symbol */
237   enum e_action type;
238   union {
239     struct state *stp;     /* The new state, if a shift */
240     struct rule *rp;       /* The rule, if a reduce */
241   } x;
242   struct action *next;     /* Next action for this state */
243   struct action *collide;  /* Next action with the same hash */
244 };
245 
246 /* Each state of the generated parser's finite state machine
247 ** is encoded as an instance of the following structure. */
248 struct state {
249   struct config *bp;       /* The basis configurations for this state */
250   struct config *cfp;      /* All configurations in this set */
251   int statenum;            /* Sequential number for this state */
252   struct action *ap;       /* Array of actions for this state */
253   int nTknAct, nNtAct;     /* Number of actions on terminals and nonterminals */
254   int iTknOfst, iNtOfst;   /* yy_action[] offset for terminals and nonterms */
255   int iDflt;               /* Default action */
256 };
257 #define NO_OFFSET (-2147483647)
258 
259 /* A followset propagation link indicates that the contents of one
260 ** configuration followset should be propagated to another whenever
261 ** the first changes. */
262 struct plink {
263   struct config *cfp;      /* The configuration to which linked */
264   struct plink *next;      /* The next propagate link */
265 };
266 
267 /* The state vector for the entire parser generator is recorded as
268 ** follows.  (LEMON uses no global variables and makes little use of
269 ** static variables.  Fields in the following structure can be thought
270 ** of as begin global variables in the program.) */
271 struct lemon {
272   struct state **sorted;   /* Table of states sorted by state number */
273   struct rule *rule;       /* List of all rules */
274   int nstate;              /* Number of states */
275   int nrule;               /* Number of rules */
276   int nsymbol;             /* Number of terminal and nonterminal symbols */
277   int nterminal;           /* Number of terminal symbols */
278   struct symbol **symbols; /* Sorted array of pointers to symbols */
279   int errorcnt;            /* Number of errors */
280   struct symbol *errsym;   /* The error symbol */
281   struct symbol *wildcard; /* Token that matches anything */
282   char *name;              /* Name of the generated parser */
283   char *arg;               /* Declaration of the 3th argument to parser */
284   char *tokentype;         /* Type of terminal symbols in the parser stack */
285   char *vartype;           /* The default type of non-terminal symbols */
286   char *start;             /* Name of the start symbol for the grammar */
287   char *stacksize;         /* Size of the parser stack */
288   char *include;           /* Code to put at the start of the C file */
289   char *error;             /* Code to execute when an error is seen */
290   char *overflow;          /* Code to execute on a stack overflow */
291   char *failure;           /* Code to execute on parser failure */
292   char *accept;            /* Code to execute when the parser excepts */
293   char *extracode;         /* Code appended to the generated file */
294   char *tokendest;         /* Code to execute to destroy token data */
295   char *vardest;           /* Code for the default non-terminal destructor */
296   char *filename;          /* Name of the input file */
297   char *outname;           /* Name of the current output file */
298   char *tokenprefix;       /* A prefix added to token names in the .h file */
299   int nconflict;           /* Number of parsing conflicts */
300   int tablesize;           /* Size of the parse tables */
301   int basisflag;           /* Print only basis configurations */
302   int has_fallback;        /* True if any %fallback is seen in the grammar */
303   int nolinenosflag;       /* True if #line statements should not be printed */
304   char *argv0;             /* Name of the program */
305 };
306 
307 #define MemoryCheck(X) if((X)==0){ \
308   extern void memory_error(); \
309   memory_error(); \
310 }
311 
312 /**************** From the file "table.h" *********************************/
313 /*
314 ** All code in this file has been automatically generated
315 ** from a specification in the file
316 **              "table.q"
317 ** by the associative array code building program "aagen".
318 ** Do not edit this file!  Instead, edit the specification
319 ** file, then rerun aagen.
320 */
321 /*
322 ** Code for processing tables in the LEMON parser generator.
323 */
324 /* Routines for handling a strings */
325 
326 const char *Strsafe(const char *);
327 
328 void Strsafe_init(void);
329 int Strsafe_insert(const char *);
330 const char *Strsafe_find(const char *);
331 
332 /* Routines for handling symbols of the grammar */
333 
334 struct symbol *Symbol_new(const char *);
335 int Symbolcmpp(const void *, const void *);
336 void Symbol_init(void);
337 int Symbol_insert(struct symbol *, const char *);
338 struct symbol *Symbol_find(const char *);
339 struct symbol *Symbol_Nth(int);
340 int Symbol_count(void);
341 struct symbol **Symbol_arrayof(void);
342 
343 /* Routines to manage the state table */
344 
345 int Configcmp(const char *, const char *);
346 struct state *State_new(void);
347 void State_init(void);
348 int State_insert(struct state *, struct config *);
349 struct state *State_find(struct config *);
350 struct state **State_arrayof(/*  */);
351 
352 /* Routines used for efficiency in Configlist_add */
353 
354 void Configtable_init(void);
355 int Configtable_insert(struct config *);
356 struct config *Configtable_find(struct config *);
357 void Configtable_clear(int(*)(struct config *));
358 
359 /****************** From the file "action.c" *******************************/
360 /*
361 ** Routines processing parser actions in the LEMON parser generator.
362 */
363 
364 /* Allocate a new parser action */
Action_new(void)365 static struct action *Action_new(void){
366   static struct action *freelist = 0;
367   struct action *newaction;
368 
369   if( freelist==0 ){
370     int i;
371     int amt = 100;
372     freelist = (struct action *)calloc(amt, sizeof(struct action));
373     if( freelist==0 ){
374       fprintf(stderr,"Unable to allocate memory for a new parser action.");
375       exit(1);
376     }
377     for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
378     freelist[amt-1].next = 0;
379   }
380   newaction = freelist;
381   freelist = freelist->next;
382   return newaction;
383 }
384 
385 /* Compare two actions for sorting purposes.  Return negative, zero, or
386 ** positive if the first action is less than, equal to, or greater than
387 ** the first
388 */
actioncmp(struct action * ap1,struct action * ap2)389 static int actioncmp(
390   struct action *ap1,
391   struct action *ap2
392 ){
393   int rc;
394   rc = ap1->sp->index - ap2->sp->index;
395   if( rc==0 ){
396     rc = (int)ap1->type - (int)ap2->type;
397   }
398   if( rc==0 && ap1->type==REDUCE ){
399     rc = ap1->x.rp->index - ap2->x.rp->index;
400   }
401   if( rc==0 ){
402     rc = (int) (ap2 - ap1);
403   }
404   return rc;
405 }
406 
407 /* Sort parser actions */
Action_sort(struct action * ap)408 static struct action *Action_sort(
409   struct action *ap
410 ){
411   ap = (struct action *)msort((char *)ap,(char **)&ap->next,
412                               (int(*)(const char*,const char*))actioncmp);
413   return ap;
414 }
415 
Action_add(struct action ** app,enum e_action type,struct symbol * sp,char * arg)416 void Action_add(
417   struct action **app,
418   enum e_action type,
419   struct symbol *sp,
420   char *arg
421 ){
422   struct action *newaction;
423   newaction = Action_new();
424   newaction->next = *app;
425   *app = newaction;
426   newaction->type = type;
427   newaction->sp = sp;
428   if( type==SHIFT ){
429     newaction->x.stp = (struct state *)arg;
430   }else{
431     newaction->x.rp = (struct rule *)arg;
432   }
433 }
434 /********************** New code to implement the "acttab" module ***********/
435 /*
436 ** This module implements routines use to construct the yy_action[] table.
437 */
438 
439 /*
440 ** The state of the yy_action table under construction is an instance of
441 ** the following structure.
442 **
443 ** The yy_action table maps the pair (state_number, lookahead) into an
444 ** action_number.  The table is an array of integers pairs.  The state_number
445 ** determines an initial offset into the yy_action array.  The lookahead
446 ** value is then added to this initial offset to get an index X into the
447 ** yy_action array. If the aAction[X].lookahead equals the value of the
448 ** of the lookahead input, then the value of the action_number output is
449 ** aAction[X].action.  If the lookaheads do not match then the
450 ** default action for the state_number is returned.
451 **
452 ** All actions associated with a single state_number are first entered
453 ** into aLookahead[] using multiple calls to acttab_action().  Then the
454 ** actions for that single state_number are placed into the aAction[]
455 ** array with a single call to acttab_insert().  The acttab_insert() call
456 ** also resets the aLookahead[] array in preparation for the next
457 ** state number.
458 */
459 struct lookahead_action {
460   int lookahead;             /* Value of the lookahead token */
461   int action;                /* Action to take on the given lookahead */
462 };
463 typedef struct acttab acttab;
464 struct acttab {
465   int nAction;                 /* Number of used slots in aAction[] */
466   int nActionAlloc;            /* Slots allocated for aAction[] */
467   struct lookahead_action
468     *aAction,                  /* The yy_action[] table under construction */
469     *aLookahead;               /* A single new transaction set */
470   int mnLookahead;             /* Minimum aLookahead[].lookahead */
471   int mnAction;                /* Action associated with mnLookahead */
472   int mxLookahead;             /* Maximum aLookahead[].lookahead */
473   int nLookahead;              /* Used slots in aLookahead[] */
474   int nLookaheadAlloc;         /* Slots allocated in aLookahead[] */
475 };
476 
477 /* Return the number of entries in the yy_action table */
478 #define acttab_size(X) ((X)->nAction)
479 
480 /* The value for the N-th entry in yy_action */
481 #define acttab_yyaction(X,N)  ((X)->aAction[N].action)
482 
483 /* The value for the N-th entry in yy_lookahead */
484 #define acttab_yylookahead(X,N)  ((X)->aAction[N].lookahead)
485 
486 /* Free all memory associated with the given acttab */
acttab_free(acttab * p)487 void acttab_free(acttab *p){
488   free( p->aAction );
489   free( p->aLookahead );
490   free( p );
491 }
492 
493 /* Allocate a new acttab structure */
acttab_alloc(void)494 acttab *acttab_alloc(void){
495   acttab *p = (acttab *) calloc( 1, sizeof(*p) );
496   if( p==0 ){
497     fprintf(stderr,"Unable to allocate memory for a new acttab.");
498     exit(1);
499   }
500   memset(p, 0, sizeof(*p));
501   return p;
502 }
503 
504 /* Add a new action to the current transaction set.
505 **
506 ** This routine is called once for each lookahead for a particular
507 ** state.
508 */
acttab_action(acttab * p,int lookahead,int action)509 void acttab_action(acttab *p, int lookahead, int action){
510   if( p->nLookahead>=p->nLookaheadAlloc ){
511     p->nLookaheadAlloc += 25;
512     p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead,
513                              sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
514     if( p->aLookahead==0 ){
515       fprintf(stderr,"malloc failed\n");
516       exit(1);
517     }
518   }
519   if( p->nLookahead==0 ){
520     p->mxLookahead = lookahead;
521     p->mnLookahead = lookahead;
522     p->mnAction = action;
523   }else{
524     if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
525     if( p->mnLookahead>lookahead ){
526       p->mnLookahead = lookahead;
527       p->mnAction = action;
528     }
529   }
530   p->aLookahead[p->nLookahead].lookahead = lookahead;
531   p->aLookahead[p->nLookahead].action = action;
532   p->nLookahead++;
533 }
534 
535 /*
536 ** Add the transaction set built up with prior calls to acttab_action()
537 ** into the current action table.  Then reset the transaction set back
538 ** to an empty set in preparation for a new round of acttab_action() calls.
539 **
540 ** Return the offset into the action table of the new transaction.
541 */
acttab_insert(acttab * p)542 int acttab_insert(acttab *p){
543   int i, j, k, n;
544   assert( p->nLookahead>0 );
545 
546   /* Make sure we have enough space to hold the expanded action table
547   ** in the worst case.  The worst case occurs if the transaction set
548   ** must be appended to the current action table
549   */
550   n = p->mxLookahead + 1;
551   if( p->nAction + n >= p->nActionAlloc ){
552     int oldAlloc = p->nActionAlloc;
553     p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
554     p->aAction = (struct lookahead_action *) realloc( p->aAction,
555                           sizeof(p->aAction[0])*p->nActionAlloc);
556     if( p->aAction==0 ){
557       fprintf(stderr,"malloc failed\n");
558       exit(1);
559     }
560     for(i=oldAlloc; i<p->nActionAlloc; i++){
561       p->aAction[i].lookahead = -1;
562       p->aAction[i].action = -1;
563     }
564   }
565 
566   /* Scan the existing action table looking for an offset that is a
567   ** duplicate of the current transaction set.  Fall out of the loop
568   ** if and when the duplicate is found.
569   **
570   ** i is the index in p->aAction[] where p->mnLookahead is inserted.
571   */
572   for(i=p->nAction-1; i>=0; i--){
573     if( p->aAction[i].lookahead==p->mnLookahead ){
574       /* All lookaheads and actions in the aLookahead[] transaction
575       ** must match against the candidate aAction[i] entry. */
576       if( p->aAction[i].action!=p->mnAction ) continue;
577       for(j=0; j<p->nLookahead; j++){
578         k = p->aLookahead[j].lookahead - p->mnLookahead + i;
579         if( k<0 || k>=p->nAction ) break;
580         if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
581         if( p->aLookahead[j].action!=p->aAction[k].action ) break;
582       }
583       if( j<p->nLookahead ) continue;
584 
585       /* No possible lookahead value that is not in the aLookahead[]
586       ** transaction is allowed to match aAction[i] */
587       n = 0;
588       for(j=0; j<p->nAction; j++){
589         if( p->aAction[j].lookahead<0 ) continue;
590         if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
591       }
592       if( n==p->nLookahead ){
593         break;  /* An exact match is found at offset i */
594       }
595     }
596   }
597 
598   /* If no existing offsets exactly match the current transaction, find an
599   ** an empty offset in the aAction[] table in which we can add the
600   ** aLookahead[] transaction.
601   */
602   if( i<0 ){
603     /* Look for holes in the aAction[] table that fit the current
604     ** aLookahead[] transaction.  Leave i set to the offset of the hole.
605     ** If no holes are found, i is left at p->nAction, which means the
606     ** transaction will be appended. */
607     for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){
608       if( p->aAction[i].lookahead<0 ){
609         for(j=0; j<p->nLookahead; j++){
610           k = p->aLookahead[j].lookahead - p->mnLookahead + i;
611           if( k<0 ) break;
612           if( p->aAction[k].lookahead>=0 ) break;
613         }
614         if( j<p->nLookahead ) continue;
615         for(j=0; j<p->nAction; j++){
616           if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
617         }
618         if( j==p->nAction ){
619           break;  /* Fits in empty slots */
620         }
621       }
622     }
623   }
624   /* Insert transaction set at index i. */
625   for(j=0; j<p->nLookahead; j++){
626     k = p->aLookahead[j].lookahead - p->mnLookahead + i;
627     p->aAction[k] = p->aLookahead[j];
628     if( k>=p->nAction ) p->nAction = k+1;
629   }
630   p->nLookahead = 0;
631 
632   /* Return the offset that is added to the lookahead in order to get the
633   ** index into yy_action of the action */
634   return i - p->mnLookahead;
635 }
636 
637 /********************** From the file "build.c" *****************************/
638 /*
639 ** Routines to construction the finite state machine for the LEMON
640 ** parser generator.
641 */
642 
643 /* Find a precedence symbol of every rule in the grammar.
644 **
645 ** Those rules which have a precedence symbol coded in the input
646 ** grammar using the "[symbol]" construct will already have the
647 ** rp->precsym field filled.  Other rules take as their precedence
648 ** symbol the first RHS symbol with a defined precedence.  If there
649 ** are not RHS symbols with a defined precedence, the precedence
650 ** symbol field is left blank.
651 */
FindRulePrecedences(struct lemon * xp)652 void FindRulePrecedences(struct lemon *xp)
653 {
654   struct rule *rp;
655   for(rp=xp->rule; rp; rp=rp->next){
656     if( rp->precsym==0 ){
657       int i, j;
658       for(i=0; i<rp->nrhs && rp->precsym==0; i++){
659         struct symbol *sp = rp->rhs[i];
660         if( sp->type==MULTITERMINAL ){
661           for(j=0; j<sp->nsubsym; j++){
662             if( sp->subsym[j]->prec>=0 ){
663               rp->precsym = sp->subsym[j];
664               break;
665             }
666           }
667         }else if( sp->prec>=0 ){
668           rp->precsym = rp->rhs[i];
669 	}
670       }
671     }
672   }
673   return;
674 }
675 
676 /* Find all nonterminals which will generate the empty string.
677 ** Then go back and compute the first sets of every nonterminal.
678 ** The first set is the set of all terminal symbols which can begin
679 ** a string generated by that nonterminal.
680 */
FindFirstSets(struct lemon * lemp)681 void FindFirstSets(struct lemon *lemp)
682 {
683   int i, j;
684   struct rule *rp;
685   int progress;
686 
687   for(i=0; i<lemp->nsymbol; i++){
688     lemp->symbols[i]->lambda = LEMON_FALSE;
689   }
690   for(i=lemp->nterminal; i<lemp->nsymbol; i++){
691     lemp->symbols[i]->firstset = SetNew();
692   }
693 
694   /* First compute all lambdas */
695   do{
696     progress = 0;
697     for(rp=lemp->rule; rp; rp=rp->next){
698       if( rp->lhs->lambda ) continue;
699       for(i=0; i<rp->nrhs; i++){
700          struct symbol *sp = rp->rhs[i];
701          if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break;
702       }
703       if( i==rp->nrhs ){
704         rp->lhs->lambda = LEMON_TRUE;
705         progress = 1;
706       }
707     }
708   }while( progress );
709 
710   /* Now compute all first sets */
711   do{
712     struct symbol *s1, *s2;
713     progress = 0;
714     for(rp=lemp->rule; rp; rp=rp->next){
715       s1 = rp->lhs;
716       for(i=0; i<rp->nrhs; i++){
717         s2 = rp->rhs[i];
718         if( s2->type==TERMINAL ){
719           progress += SetAdd(s1->firstset,s2->index);
720           break;
721         }else if( s2->type==MULTITERMINAL ){
722           for(j=0; j<s2->nsubsym; j++){
723             progress += SetAdd(s1->firstset,s2->subsym[j]->index);
724           }
725           break;
726 	}else if( s1==s2 ){
727           if( s1->lambda==LEMON_FALSE ) break;
728 	}else{
729           progress += SetUnion(s1->firstset,s2->firstset);
730           if( s2->lambda==LEMON_FALSE ) break;
731 	}
732       }
733     }
734   }while( progress );
735   return;
736 }
737 
738 /* Compute all LR(0) states for the grammar.  Links
739 ** are added to between some states so that the LR(1) follow sets
740 ** can be computed later.
741 */
742 PRIVATE struct state *getstate(struct lemon *);  /* forward reference */
FindStates(struct lemon * lemp)743 void FindStates(struct lemon *lemp)
744 {
745   struct symbol *sp;
746   struct rule *rp;
747 
748   Configlist_init();
749 
750   /* Find the start symbol */
751   if( lemp->start ){
752     sp = Symbol_find(lemp->start);
753     if( sp==0 ){
754       ErrorMsg(lemp->filename,0,
755 "The specified start symbol \"%s\" is not \
756 in a nonterminal of the grammar.  \"%s\" will be used as the start \
757 symbol instead.",lemp->start,lemp->rule->lhs->name);
758       lemp->errorcnt++;
759       sp = lemp->rule->lhs;
760     }
761   }else{
762     sp = lemp->rule->lhs;
763   }
764 
765   /* Make sure the start symbol doesn't occur on the right-hand side of
766   ** any rule.  Report an error if it does.  (YACC would generate a new
767   ** start symbol in this case.) */
768   for(rp=lemp->rule; rp; rp=rp->next){
769     int i;
770     for(i=0; i<rp->nrhs; i++){
771       if( rp->rhs[i]==sp ){   /* FIX ME:  Deal with multiterminals */
772         ErrorMsg(lemp->filename,0,
773 "The start symbol \"%s\" occurs on the \
774 right-hand side of a rule. This will result in a parser which \
775 does not work properly.",sp->name);
776         lemp->errorcnt++;
777       }
778     }
779   }
780 
781   /* The basis configuration set for the first state
782   ** is all rules which have the start symbol as their
783   ** left-hand side */
784   for(rp=sp->rule; rp; rp=rp->nextlhs){
785     struct config *newcfp;
786     rp->lhsStart = 1;
787     newcfp = Configlist_addbasis(rp,0);
788     SetAdd(newcfp->fws,0);
789   }
790 
791   /* Compute the first state.  All other states will be
792   ** computed automatically during the computation of the first one.
793   ** The returned pointer to the first state is not used. */
794   (void)getstate(lemp);
795   return;
796 }
797 
798 /* Return a pointer to a state which is described by the configuration
799 ** list which has been built from calls to Configlist_add.
800 */
801 PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */
getstate(struct lemon * lemp)802 PRIVATE struct state *getstate(struct lemon *lemp)
803 {
804   struct config *cfp, *bp;
805   struct state *stp;
806 
807   /* Extract the sorted basis of the new state.  The basis was constructed
808   ** by prior calls to "Configlist_addbasis()". */
809   Configlist_sortbasis();
810   bp = Configlist_basis();
811 
812   /* Get a state with the same basis */
813   stp = State_find(bp);
814   if( stp ){
815     /* A state with the same basis already exists!  Copy all the follow-set
816     ** propagation links from the state under construction into the
817     ** preexisting state, then return a pointer to the preexisting state */
818     struct config *x, *y;
819     for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
820       Plink_copy(&y->bplp,x->bplp);
821       Plink_delete(x->fplp);
822       x->fplp = x->bplp = 0;
823     }
824     cfp = Configlist_return();
825     Configlist_eat(cfp);
826   }else{
827     /* This really is a new state.  Construct all the details */
828     Configlist_closure(lemp);    /* Compute the configuration closure */
829     Configlist_sort();           /* Sort the configuration closure */
830     cfp = Configlist_return();   /* Get a pointer to the config list */
831     stp = State_new();           /* A new state structure */
832     MemoryCheck(stp);
833     stp->bp = bp;                /* Remember the configuration basis */
834     stp->cfp = cfp;              /* Remember the configuration closure */
835     stp->statenum = lemp->nstate++; /* Every state gets a sequence number */
836     stp->ap = 0;                 /* No actions, yet. */
837     State_insert(stp,stp->bp);   /* Add to the state table */
838     buildshifts(lemp,stp);       /* Recursively compute successor states */
839   }
840   return stp;
841 }
842 
843 /*
844 ** Return true if two symbols are the same.
845 */
same_symbol(struct symbol * a,struct symbol * b)846 int same_symbol(struct symbol *a, struct symbol *b)
847 {
848   int i;
849   if( a==b ) return 1;
850   if( a->type!=MULTITERMINAL ) return 0;
851   if( b->type!=MULTITERMINAL ) return 0;
852   if( a->nsubsym!=b->nsubsym ) return 0;
853   for(i=0; i<a->nsubsym; i++){
854     if( a->subsym[i]!=b->subsym[i] ) return 0;
855   }
856   return 1;
857 }
858 
859 /* Construct all successor states to the given state.  A "successor"
860 ** state is any state which can be reached by a shift action.
861 */
buildshifts(struct lemon * lemp,struct state * stp)862 PRIVATE void buildshifts(struct lemon *lemp, struct state *stp)
863 {
864   struct config *cfp;  /* For looping thru the config closure of "stp" */
865   struct config *bcfp; /* For the inner loop on config closure of "stp" */
866   struct config *newcfg;  /* */
867   struct symbol *sp;   /* Symbol following the dot in configuration "cfp" */
868   struct symbol *bsp;  /* Symbol following the dot in configuration "bcfp" */
869   struct state *newstp; /* A pointer to a successor state */
870 
871   /* Each configuration becomes complete after it contibutes to a successor
872   ** state.  Initially, all configurations are incomplete */
873   for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
874 
875   /* Loop through all configurations of the state "stp" */
876   for(cfp=stp->cfp; cfp; cfp=cfp->next){
877     if( cfp->status==COMPLETE ) continue;    /* Already used by inner loop */
878     if( cfp->dot>=cfp->rp->nrhs ) continue;  /* Can't shift this config */
879     Configlist_reset();                      /* Reset the new config set */
880     sp = cfp->rp->rhs[cfp->dot];             /* Symbol after the dot */
881 
882     /* For every configuration in the state "stp" which has the symbol "sp"
883     ** following its dot, add the same configuration to the basis set under
884     ** construction but with the dot shifted one symbol to the right. */
885     for(bcfp=cfp; bcfp; bcfp=bcfp->next){
886       if( bcfp->status==COMPLETE ) continue;    /* Already used */
887       if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
888       bsp = bcfp->rp->rhs[bcfp->dot];           /* Get symbol after dot */
889       if( !same_symbol(bsp,sp) ) continue;      /* Must be same as for "cfp" */
890       bcfp->status = COMPLETE;                  /* Mark this config as used */
891       newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
892       Plink_add(&newcfg->bplp,bcfp);
893     }
894 
895     /* Get a pointer to the state described by the basis configuration set
896     ** constructed in the preceding loop */
897     newstp = getstate(lemp);
898 
899     /* The state "newstp" is reached from the state "stp" by a shift action
900     ** on the symbol "sp" */
901     if( sp->type==MULTITERMINAL ){
902       int i;
903       for(i=0; i<sp->nsubsym; i++){
904         Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp);
905       }
906     }else{
907       Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
908     }
909   }
910 }
911 
912 /*
913 ** Construct the propagation links
914 */
FindLinks(struct lemon * lemp)915 void FindLinks(struct lemon *lemp)
916 {
917   int i;
918   struct config *cfp, *other;
919   struct state *stp;
920   struct plink *plp;
921 
922   /* Housekeeping detail:
923   ** Add to every propagate link a pointer back to the state to
924   ** which the link is attached. */
925   for(i=0; i<lemp->nstate; i++){
926     stp = lemp->sorted[i];
927     for(cfp=stp->cfp; cfp; cfp=cfp->next){
928       cfp->stp = stp;
929     }
930   }
931 
932   /* Convert all backlinks into forward links.  Only the forward
933   ** links are used in the follow-set computation. */
934   for(i=0; i<lemp->nstate; i++){
935     stp = lemp->sorted[i];
936     for(cfp=stp->cfp; cfp; cfp=cfp->next){
937       for(plp=cfp->bplp; plp; plp=plp->next){
938         other = plp->cfp;
939         Plink_add(&other->fplp,cfp);
940       }
941     }
942   }
943 }
944 
945 /* Compute all followsets.
946 **
947 ** A followset is the set of all symbols which can come immediately
948 ** after a configuration.
949 */
FindFollowSets(struct lemon * lemp)950 void FindFollowSets(struct lemon *lemp)
951 {
952   int i;
953   struct config *cfp;
954   struct plink *plp;
955   int progress;
956   int change;
957 
958   for(i=0; i<lemp->nstate; i++){
959     for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
960       cfp->status = INCOMPLETE;
961     }
962   }
963 
964   do{
965     progress = 0;
966     for(i=0; i<lemp->nstate; i++){
967       for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
968         if( cfp->status==COMPLETE ) continue;
969         for(plp=cfp->fplp; plp; plp=plp->next){
970           change = SetUnion(plp->cfp->fws,cfp->fws);
971           if( change ){
972             plp->cfp->status = INCOMPLETE;
973             progress = 1;
974 	  }
975 	}
976         cfp->status = COMPLETE;
977       }
978     }
979   }while( progress );
980 }
981 
982 static int resolve_conflict(struct action *,struct action *, struct symbol *);
983 
984 /* Compute the reduce actions, and resolve conflicts.
985 */
FindActions(struct lemon * lemp)986 void FindActions(struct lemon *lemp)
987 {
988   int i,j;
989   struct config *cfp;
990   struct state *stp;
991   struct symbol *sp;
992   struct rule *rp;
993 
994   /* Add all of the reduce actions
995   ** A reduce action is added for each element of the followset of
996   ** a configuration which has its dot at the extreme right.
997   */
998   for(i=0; i<lemp->nstate; i++){   /* Loop over all states */
999     stp = lemp->sorted[i];
1000     for(cfp=stp->cfp; cfp; cfp=cfp->next){  /* Loop over all configurations */
1001       if( cfp->rp->nrhs==cfp->dot ){        /* Is dot at extreme right? */
1002         for(j=0; j<lemp->nterminal; j++){
1003           if( SetFind(cfp->fws,j) ){
1004             /* Add a reduce action to the state "stp" which will reduce by the
1005             ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
1006             Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
1007           }
1008 	}
1009       }
1010     }
1011   }
1012 
1013   /* Add the accepting token */
1014   if( lemp->start ){
1015     sp = Symbol_find(lemp->start);
1016     if( sp==0 ) sp = lemp->rule->lhs;
1017   }else{
1018     sp = lemp->rule->lhs;
1019   }
1020   /* Add to the first state (which is always the starting state of the
1021   ** finite state machine) an action to ACCEPT if the lookahead is the
1022   ** start nonterminal.  */
1023   Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
1024 
1025   /* Resolve conflicts */
1026   for(i=0; i<lemp->nstate; i++){
1027     struct action *ap, *nap;
1028     struct state *stp;
1029     stp = lemp->sorted[i];
1030     /* assert( stp->ap ); */
1031     stp->ap = Action_sort(stp->ap);
1032     for(ap=stp->ap; ap && ap->next; ap=ap->next){
1033       for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
1034          /* The two actions "ap" and "nap" have the same lookahead.
1035          ** Figure out which one should be used */
1036          lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym);
1037       }
1038     }
1039   }
1040 
1041   /* Report an error for each rule that can never be reduced. */
1042   for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE;
1043   for(i=0; i<lemp->nstate; i++){
1044     struct action *ap;
1045     for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
1046       if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE;
1047     }
1048   }
1049   for(rp=lemp->rule; rp; rp=rp->next){
1050     if( rp->canReduce ) continue;
1051     ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
1052     lemp->errorcnt++;
1053   }
1054 }
1055 
1056 /* Resolve a conflict between the two given actions.  If the
1057 ** conflict can't be resolved, return non-zero.
1058 **
1059 ** NO LONGER TRUE:
1060 **   To resolve a conflict, first look to see if either action
1061 **   is on an error rule.  In that case, take the action which
1062 **   is not associated with the error rule.  If neither or both
1063 **   actions are associated with an error rule, then try to
1064 **   use precedence to resolve the conflict.
1065 **
1066 ** If either action is a SHIFT, then it must be apx.  This
1067 ** function won't work if apx->type==REDUCE and apy->type==SHIFT.
1068 */
resolve_conflict(struct action * apx,struct action * apy,struct symbol * errsym)1069 static int resolve_conflict(
1070   struct action *apx,
1071   struct action *apy,
1072   struct symbol *errsym   /* The error symbol (if defined.  NULL otherwise) */
1073 ){
1074   struct symbol *spx, *spy;
1075   int errcnt = 0;
1076   assert( apx->sp==apy->sp );  /* Otherwise there would be no conflict */
1077   if( apx->type==SHIFT && apy->type==SHIFT ){
1078     apy->type = SSCONFLICT;
1079     errcnt++;
1080   }
1081   if( apx->type==SHIFT && apy->type==REDUCE ){
1082     spx = apx->sp;
1083     spy = apy->x.rp->precsym;
1084     if( spy==0 || spx->prec<0 || spy->prec<0 ){
1085       /* Not enough precedence information. */
1086       apy->type = SRCONFLICT;
1087       errcnt++;
1088     }else if( spx->prec>spy->prec ){    /* higher precedence wins */
1089       apy->type = RD_RESOLVED;
1090     }else if( spx->prec<spy->prec ){
1091       apx->type = SH_RESOLVED;
1092     }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */
1093       apy->type = RD_RESOLVED;                             /* associativity */
1094     }else if( spx->prec==spy->prec && spx->assoc==LEFT ){  /* to break tie */
1095       apx->type = SH_RESOLVED;
1096     }else{
1097       assert( spx->prec==spy->prec && spx->assoc==NONE );
1098       apy->type = SRCONFLICT;
1099       errcnt++;
1100     }
1101   }else if( apx->type==REDUCE && apy->type==REDUCE ){
1102     spx = apx->x.rp->precsym;
1103     spy = apy->x.rp->precsym;
1104     if( spx==0 || spy==0 || spx->prec<0 ||
1105     spy->prec<0 || spx->prec==spy->prec ){
1106       apy->type = RRCONFLICT;
1107       errcnt++;
1108     }else if( spx->prec>spy->prec ){
1109       apy->type = RD_RESOLVED;
1110     }else if( spx->prec<spy->prec ){
1111       apx->type = RD_RESOLVED;
1112     }
1113   }else{
1114     assert(
1115       apx->type==SH_RESOLVED ||
1116       apx->type==RD_RESOLVED ||
1117       apx->type==SSCONFLICT ||
1118       apx->type==SRCONFLICT ||
1119       apx->type==RRCONFLICT ||
1120       apy->type==SH_RESOLVED ||
1121       apy->type==RD_RESOLVED ||
1122       apy->type==SSCONFLICT ||
1123       apy->type==SRCONFLICT ||
1124       apy->type==RRCONFLICT
1125     );
1126     /* The REDUCE/SHIFT case cannot happen because SHIFTs come before
1127     ** REDUCEs on the list.  If we reach this point it must be because
1128     ** the parser conflict had already been resolved. */
1129   }
1130   return errcnt;
1131 }
1132 /********************* From the file "configlist.c" *************************/
1133 /*
1134 ** Routines to processing a configuration list and building a state
1135 ** in the LEMON parser generator.
1136 */
1137 
1138 static struct config *freelist = 0;      /* List of free configurations */
1139 static struct config *current = 0;       /* Top of list of configurations */
1140 static struct config **currentend = 0;   /* Last on list of configs */
1141 static struct config *basis = 0;         /* Top of list of basis configs */
1142 static struct config **basisend = 0;     /* End of list of basis configs */
1143 
1144 /* Return a pointer to a new configuration */
newconfig()1145 PRIVATE struct config *newconfig(){
1146   struct config *newcfg;
1147   if( freelist==0 ){
1148     int i;
1149     int amt = 3;
1150     freelist = (struct config *)calloc( amt, sizeof(struct config) );
1151     if( freelist==0 ){
1152       fprintf(stderr,"Unable to allocate memory for a new configuration.");
1153       exit(1);
1154     }
1155     for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
1156     freelist[amt-1].next = 0;
1157   }
1158   newcfg = freelist;
1159   freelist = freelist->next;
1160   return newcfg;
1161 }
1162 
1163 /* The configuration "old" is no longer used */
deleteconfig(struct config * old)1164 PRIVATE void deleteconfig(struct config *old)
1165 {
1166   old->next = freelist;
1167   freelist = old;
1168 }
1169 
1170 /* Initialized the configuration list builder */
Configlist_init()1171 void Configlist_init(){
1172   current = 0;
1173   currentend = &current;
1174   basis = 0;
1175   basisend = &basis;
1176   Configtable_init();
1177   return;
1178 }
1179 
1180 /* Initialized the configuration list builder */
Configlist_reset()1181 void Configlist_reset(){
1182   current = 0;
1183   currentend = &current;
1184   basis = 0;
1185   basisend = &basis;
1186   Configtable_clear(0);
1187   return;
1188 }
1189 
1190 /* Add another configuration to the configuration list */
Configlist_add(struct rule * rp,int dot)1191 struct config *Configlist_add(
1192   struct rule *rp,    /* The rule */
1193   int dot             /* Index into the RHS of the rule where the dot goes */
1194 ){
1195   struct config *cfp, model;
1196 
1197   assert( currentend!=0 );
1198   model.rp = rp;
1199   model.dot = dot;
1200   cfp = Configtable_find(&model);
1201   if( cfp==0 ){
1202     cfp = newconfig();
1203     cfp->rp = rp;
1204     cfp->dot = dot;
1205     cfp->fws = SetNew();
1206     cfp->stp = 0;
1207     cfp->fplp = cfp->bplp = 0;
1208     cfp->next = 0;
1209     cfp->bp = 0;
1210     *currentend = cfp;
1211     currentend = &cfp->next;
1212     Configtable_insert(cfp);
1213   }
1214   return cfp;
1215 }
1216 
1217 /* Add a basis configuration to the configuration list */
Configlist_addbasis(struct rule * rp,int dot)1218 struct config *Configlist_addbasis(struct rule *rp, int dot)
1219 {
1220   struct config *cfp, model;
1221 
1222   assert( basisend!=0 );
1223   assert( currentend!=0 );
1224   model.rp = rp;
1225   model.dot = dot;
1226   cfp = Configtable_find(&model);
1227   if( cfp==0 ){
1228     cfp = newconfig();
1229     cfp->rp = rp;
1230     cfp->dot = dot;
1231     cfp->fws = SetNew();
1232     cfp->stp = 0;
1233     cfp->fplp = cfp->bplp = 0;
1234     cfp->next = 0;
1235     cfp->bp = 0;
1236     *currentend = cfp;
1237     currentend = &cfp->next;
1238     *basisend = cfp;
1239     basisend = &cfp->bp;
1240     Configtable_insert(cfp);
1241   }
1242   return cfp;
1243 }
1244 
1245 /* Compute the closure of the configuration list */
Configlist_closure(struct lemon * lemp)1246 void Configlist_closure(struct lemon *lemp)
1247 {
1248   struct config *cfp, *newcfp;
1249   struct rule *rp, *newrp;
1250   struct symbol *sp, *xsp;
1251   int i, dot;
1252 
1253   assert( currentend!=0 );
1254   for(cfp=current; cfp; cfp=cfp->next){
1255     rp = cfp->rp;
1256     dot = cfp->dot;
1257     if( dot>=rp->nrhs ) continue;
1258     sp = rp->rhs[dot];
1259     if( sp->type==NONTERMINAL ){
1260       if( sp->rule==0 && sp!=lemp->errsym ){
1261         ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.",
1262           sp->name);
1263         lemp->errorcnt++;
1264       }
1265       for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
1266         newcfp = Configlist_add(newrp,0);
1267         for(i=dot+1; i<rp->nrhs; i++){
1268           xsp = rp->rhs[i];
1269           if( xsp->type==TERMINAL ){
1270             SetAdd(newcfp->fws,xsp->index);
1271             break;
1272           }else if( xsp->type==MULTITERMINAL ){
1273             int k;
1274             for(k=0; k<xsp->nsubsym; k++){
1275               SetAdd(newcfp->fws, xsp->subsym[k]->index);
1276             }
1277             break;
1278 	  }else{
1279             SetUnion(newcfp->fws,xsp->firstset);
1280             if( xsp->lambda==LEMON_FALSE ) break;
1281 	  }
1282 	}
1283         if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
1284       }
1285     }
1286   }
1287   return;
1288 }
1289 
1290 /* Sort the configuration list */
Configlist_sort()1291 void Configlist_sort(){
1292   current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp);
1293   currentend = 0;
1294   return;
1295 }
1296 
1297 /* Sort the basis configuration list */
Configlist_sortbasis()1298 void Configlist_sortbasis(){
1299   basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp);
1300   basisend = 0;
1301   return;
1302 }
1303 
1304 /* Return a pointer to the head of the configuration list and
1305 ** reset the list */
Configlist_return()1306 struct config *Configlist_return(){
1307   struct config *old;
1308   old = current;
1309   current = 0;
1310   currentend = 0;
1311   return old;
1312 }
1313 
1314 /* Return a pointer to the head of the configuration list and
1315 ** reset the list */
Configlist_basis()1316 struct config *Configlist_basis(){
1317   struct config *old;
1318   old = basis;
1319   basis = 0;
1320   basisend = 0;
1321   return old;
1322 }
1323 
1324 /* Free all elements of the given configuration list */
Configlist_eat(struct config * cfp)1325 void Configlist_eat(struct config *cfp)
1326 {
1327   struct config *nextcfp;
1328   for(; cfp; cfp=nextcfp){
1329     nextcfp = cfp->next;
1330     assert( cfp->fplp==0 );
1331     assert( cfp->bplp==0 );
1332     if( cfp->fws ) SetFree(cfp->fws);
1333     deleteconfig(cfp);
1334   }
1335   return;
1336 }
1337 /***************** From the file "error.c" *********************************/
1338 /*
1339 ** Code for printing error message.
1340 */
1341 
ErrorMsg(const char * filename,int lineno,const char * format,...)1342 void ErrorMsg(const char *filename, int lineno, const char *format, ...){
1343   va_list ap;
1344   fprintf(stderr, "%s:%d: ", filename, lineno);
1345   va_start(ap, format);
1346   vfprintf(stderr,format,ap);
1347   va_end(ap);
1348   fprintf(stderr, "\n");
1349 }
1350 /**************** From the file "main.c" ************************************/
1351 /*
1352 ** Main program file for the LEMON parser generator.
1353 */
1354 
1355 /* Report an out-of-memory condition and abort.  This function
1356 ** is used mostly by the "MemoryCheck" macro in struct.h
1357 */
memory_error()1358 void memory_error(){
1359   fprintf(stderr,"Out of memory.  Aborting...\n");
1360   exit(1);
1361 }
1362 
1363 static int nDefine = 0;      /* Number of -D options on the command line */
1364 static char **azDefine = 0;  /* Name of the -D macros */
1365 
1366 /* This routine is called with the argument to each -D command-line option.
1367 ** Add the macro defined to the azDefine array.
1368 */
handle_D_option(char * z)1369 static void handle_D_option(char *z){
1370   char **paz;
1371   nDefine++;
1372   azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine);
1373   if( azDefine==0 ){
1374     fprintf(stderr,"out of memory\n");
1375     exit(1);
1376   }
1377   paz = &azDefine[nDefine-1];
1378   *paz = (char *) malloc( lemonStrlen(z)+1 );
1379   if( *paz==0 ){
1380     fprintf(stderr,"out of memory\n");
1381     exit(1);
1382   }
1383   strcpy(*paz, z);
1384   for(z=*paz; *z && *z!='='; z++){}
1385   *z = 0;
1386 }
1387 
1388 static char *user_templatename = NULL;
handle_T_option(char * z)1389 static void handle_T_option(char *z){
1390   user_templatename = (char *) malloc( lemonStrlen(z)+1 );
1391   if( user_templatename==0 ){
1392     memory_error();
1393   }
1394   strcpy(user_templatename, z);
1395 }
1396 
1397 int local_out_dir = 0;
1398 
1399 /* The main program.  Parse the command line and do it... */
main(int argc,char ** argv)1400 int main(int argc, char **argv)
1401 {
1402   static int version = 0;
1403   static int rpflag = 0;
1404   static int basisflag = 0;
1405   static int compress = 0;
1406   static int quiet = 0;
1407   static int statistics = 0;
1408   static int mhflag = 0;
1409   static int nolinenosflag = 0;
1410   static int noResort = 0;
1411   static struct s_options options[] = {
1412     {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."},
1413     {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."},
1414     {OPT_FLAG, "d", (char*)&local_out_dir, "Output files in the current directory."},
1415     {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."},
1416     {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."},
1417     {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."},
1418     {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."},
1419     {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."},
1420     {OPT_FLAG, "p", (char*)&showPrecedenceConflict,
1421                     "Show conflicts resolved by precedence rules"},
1422     {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."},
1423     {OPT_FLAG, "r", (char*)&noResort, "Do not sort or renumber states"},
1424     {OPT_FLAG, "s", (char*)&statistics,
1425                                    "Print parser stats to standard output."},
1426     {OPT_FLAG, "x", (char*)&version, "Print the version number."},
1427     {OPT_FLAG,0,0,0}
1428   };
1429   int i;
1430   int exitcode;
1431   struct lemon lem;
1432 
1433   /*atexit(LemonAtExit);*/
1434 
1435   OptInit(argv,options,stderr);
1436   if( version ){
1437      printf("Lemon version 1.0\n");
1438      exit(0);
1439   }
1440   if( OptNArgs()!=1 ){
1441     fprintf(stderr,"Exactly one filename argument is required.\n");
1442     exit(1);
1443   }
1444   memset(&lem, 0, sizeof(lem));
1445   lem.errorcnt = 0;
1446 
1447   /* Initialize the machine */
1448   Strsafe_init();
1449   Symbol_init();
1450   State_init();
1451   lem.argv0 = argv[0];
1452   lem.filename = OptArg(0);
1453   lem.basisflag = basisflag;
1454   lem.nolinenosflag = nolinenosflag;
1455   Symbol_new("$");
1456   lem.errsym = Symbol_new("error");
1457   lem.errsym->useCnt = 0;
1458 
1459   /* Parse the input file */
1460   Parse(&lem);
1461   if( lem.errorcnt ) exit(lem.errorcnt);
1462   if( lem.nrule==0 ){
1463     fprintf(stderr,"Empty grammar.\n");
1464     exit(1);
1465   }
1466 
1467   /* Count and index the symbols of the grammar */
1468   lem.nsymbol = Symbol_count();
1469   Symbol_new("{default}");
1470   lem.symbols = Symbol_arrayof();
1471   for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1472   qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp);
1473   for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1474   for(i=1; isupper(lem.symbols[i]->name[0]); i++);
1475   lem.nterminal = i;
1476 
1477   /* Generate a reprint of the grammar, if requested on the command line */
1478   if( rpflag ){
1479     Reprint(&lem);
1480   }else{
1481     /* Initialize the size for all follow and first sets */
1482     SetSize(lem.nterminal+1);
1483 
1484     /* Find the precedence for every production rule (that has one) */
1485     FindRulePrecedences(&lem);
1486 
1487     /* Compute the lambda-nonterminals and the first-sets for every
1488     ** nonterminal */
1489     FindFirstSets(&lem);
1490 
1491     /* Compute all LR(0) states.  Also record follow-set propagation
1492     ** links so that the follow-set can be computed later */
1493     lem.nstate = 0;
1494     FindStates(&lem);
1495     lem.sorted = State_arrayof();
1496 
1497     /* Tie up loose ends on the propagation links */
1498     FindLinks(&lem);
1499 
1500     /* Compute the follow set of every reducible configuration */
1501     FindFollowSets(&lem);
1502 
1503     /* Compute the action tables */
1504     FindActions(&lem);
1505 
1506     /* Compress the action tables */
1507     if( compress==0 ) CompressTables(&lem);
1508 
1509     /* Reorder and renumber the states so that states with fewer choices
1510     ** occur at the end.  This is an optimization that helps make the
1511     ** generated parser tables smaller. */
1512     if( noResort==0 ) ResortStates(&lem);
1513 
1514     /* Generate a report of the parser generated.  (the "y.output" file) */
1515     if( !quiet ) ReportOutput(&lem);
1516 
1517     /* Generate the source code for the parser */
1518     ReportTable(&lem, mhflag);
1519 
1520     /* Produce a header file for use by the scanner.  (This step is
1521     ** omitted if the "-m" option is used because makeheaders will
1522     ** generate the file for us.) */
1523     if( !mhflag ) ReportHeader(&lem);
1524   }
1525   if( statistics ){
1526     printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
1527       lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule);
1528     printf("                   %d states, %d parser table entries, %d conflicts\n",
1529       lem.nstate, lem.tablesize, lem.nconflict);
1530   }
1531   if( lem.nconflict > 0 ){
1532     fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
1533   }
1534 
1535   /* return 0 on success, 1 on failure. */
1536   exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0;
1537   successful_exit = (exitcode == 0);
1538   exit(exitcode);
1539   return (exitcode);
1540 }
1541 /******************** From the file "msort.c" *******************************/
1542 /*
1543 ** A generic merge-sort program.
1544 **
1545 ** USAGE:
1546 ** Let "ptr" be a pointer to some structure which is at the head of
1547 ** a null-terminated list.  Then to sort the list call:
1548 **
1549 **     ptr = msort(ptr,&(ptr->next),cmpfnc);
1550 **
1551 ** In the above, "cmpfnc" is a pointer to a function which compares
1552 ** two instances of the structure and returns an integer, as in
1553 ** strcmp.  The second argument is a pointer to the pointer to the
1554 ** second element of the linked list.  This address is used to compute
1555 ** the offset to the "next" field within the structure.  The offset to
1556 ** the "next" field must be constant for all structures in the list.
1557 **
1558 ** The function returns a new pointer which is the head of the list
1559 ** after sorting.
1560 **
1561 ** ALGORITHM:
1562 ** Merge-sort.
1563 */
1564 
1565 /*
1566 ** Return a pointer to the next structure in the linked list.
1567 */
1568 #define NEXT(A) (*(char**)(((unsigned long)A)+offset))
1569 
1570 /*
1571 ** Inputs:
1572 **   a:       A sorted, null-terminated linked list.  (May be null).
1573 **   b:       A sorted, null-terminated linked list.  (May be null).
1574 **   cmp:     A pointer to the comparison function.
1575 **   offset:  Offset in the structure to the "next" field.
1576 **
1577 ** Return Value:
1578 **   A pointer to the head of a sorted list containing the elements
1579 **   of both a and b.
1580 **
1581 ** Side effects:
1582 **   The "next" pointers for elements in the lists a and b are
1583 **   changed.
1584 */
merge(char * a,char * b,int (* cmp)(const char *,const char *),int offset)1585 static char *merge(
1586   char *a,
1587   char *b,
1588   int (*cmp)(const char*,const char*),
1589   int offset
1590 ){
1591   char *ptr, *head;
1592 
1593   if( a==0 ){
1594     head = b;
1595   }else if( b==0 ){
1596     head = a;
1597   }else{
1598     if( (*cmp)(a,b)<=0 ){
1599       ptr = a;
1600       a = NEXT(a);
1601     }else{
1602       ptr = b;
1603       b = NEXT(b);
1604     }
1605     head = ptr;
1606     while( a && b ){
1607       if( (*cmp)(a,b)<=0 ){
1608         NEXT(ptr) = a;
1609         ptr = a;
1610         a = NEXT(a);
1611       }else{
1612         NEXT(ptr) = b;
1613         ptr = b;
1614         b = NEXT(b);
1615       }
1616     }
1617     if( a ) NEXT(ptr) = a;
1618     else    NEXT(ptr) = b;
1619   }
1620   return head;
1621 }
1622 
1623 /*
1624 ** Inputs:
1625 **   list:      Pointer to a singly-linked list of structures.
1626 **   next:      Pointer to pointer to the second element of the list.
1627 **   cmp:       A comparison function.
1628 **
1629 ** Return Value:
1630 **   A pointer to the head of a sorted list containing the elements
1631 **   orginally in list.
1632 **
1633 ** Side effects:
1634 **   The "next" pointers for elements in list are changed.
1635 */
1636 #define LISTSIZE 30
msort(char * list,char ** next,int (* cmp)(const char *,const char *))1637 static char *msort(
1638   char *list,
1639   char **next,
1640   int (*cmp)(const char*,const char*)
1641 ){
1642   unsigned long offset;
1643   char *ep;
1644   char *set[LISTSIZE];
1645   int i;
1646   offset = (unsigned long)next - (unsigned long)list;
1647   for(i=0; i<LISTSIZE; i++) set[i] = 0;
1648   while( list ){
1649     ep = list;
1650     list = NEXT(list);
1651     NEXT(ep) = 0;
1652     for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){
1653       ep = merge(ep,set[i],cmp,offset);
1654       set[i] = 0;
1655     }
1656     set[i] = ep;
1657   }
1658   ep = 0;
1659   for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(set[i],ep,cmp,offset);
1660   return ep;
1661 }
1662 /************************ From the file "option.c" **************************/
1663 static char **argv;
1664 static struct s_options *op;
1665 static FILE *errstream;
1666 
1667 #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0)
1668 
1669 /*
1670 ** Print the command line with a carrot pointing to the k-th character
1671 ** of the n-th field.
1672 */
errline(int n,int k,FILE * err)1673 static void errline(int n, int k, FILE *err)
1674 {
1675   int spcnt, i;
1676   if( argv[0] ) fprintf(err,"%s",argv[0]);
1677   spcnt = lemonStrlen(argv[0]) + 1;
1678   for(i=1; i<n && argv[i]; i++){
1679     fprintf(err," %s",argv[i]);
1680     spcnt += lemonStrlen(argv[i])+1;
1681   }
1682   spcnt += k;
1683   for(; argv[i]; i++) fprintf(err," %s",argv[i]);
1684   if( spcnt<20 ){
1685     fprintf(err,"\n%*s^-- here\n",spcnt,"");
1686   }else{
1687     fprintf(err,"\n%*shere --^\n",spcnt-7,"");
1688   }
1689 }
1690 
1691 /*
1692 ** Return the index of the N-th non-switch argument.  Return -1
1693 ** if N is out of range.
1694 */
argindex(int n)1695 static int argindex(int n)
1696 {
1697   int i;
1698   int dashdash = 0;
1699   if( argv!=0 && *argv!=0 ){
1700     for(i=1; argv[i]; i++){
1701       if( dashdash || !ISOPT(argv[i]) ){
1702         if( n==0 ) return i;
1703         n--;
1704       }
1705       if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1706     }
1707   }
1708   return -1;
1709 }
1710 
1711 static char emsg[] = "Command line syntax error: ";
1712 
1713 /*
1714 ** Process a flag command line argument.
1715 */
handleflags(int i,FILE * err)1716 static int handleflags(int i, FILE *err)
1717 {
1718   int v;
1719   int errcnt = 0;
1720   int j;
1721   for(j=0; op[j].label; j++){
1722     if( strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break;
1723   }
1724   v = argv[i][0]=='-' ? 1 : 0;
1725   if( op[j].label==0 ){
1726     if( err ){
1727       fprintf(err,"%sundefined option.\n",emsg);
1728       errline(i,1,err);
1729     }
1730     errcnt++;
1731   }else if( op[j].type==OPT_FLAG ){
1732     *((int*)op[j].arg) = v;
1733   }else if( op[j].type==OPT_FFLAG ){
1734     (*(void(*)(int))(op[j].arg))(v);
1735   }else if( op[j].type==OPT_FSTR ){
1736     (*(void(*)(char *))(op[j].arg))(&argv[i][2]);
1737   }else{
1738     if( err ){
1739       fprintf(err,"%smissing argument on switch.\n",emsg);
1740       errline(i,1,err);
1741     }
1742     errcnt++;
1743   }
1744   return errcnt;
1745 }
1746 
1747 /*
1748 ** Process a command line switch which has an argument.
1749 */
handleswitch(int i,FILE * err)1750 static int handleswitch(int i, FILE *err)
1751 {
1752   int lv = 0;
1753   double dv = 0.0;
1754   char *sv = 0, *end;
1755   char *cp;
1756   int j;
1757   int errcnt = 0;
1758   cp = strchr(argv[i],'=');
1759   assert( cp!=0 );
1760   *cp = 0;
1761   for(j=0; op[j].label; j++){
1762     if( strcmp(argv[i],op[j].label)==0 ) break;
1763   }
1764   *cp = '=';
1765   if( op[j].label==0 ){
1766     if( err ){
1767       fprintf(err,"%sundefined option.\n",emsg);
1768       errline(i,0,err);
1769     }
1770     errcnt++;
1771   }else{
1772     cp++;
1773     switch( op[j].type ){
1774       case OPT_FLAG:
1775       case OPT_FFLAG:
1776         if( err ){
1777           fprintf(err,"%soption requires an argument.\n",emsg);
1778           errline(i,0,err);
1779         }
1780         errcnt++;
1781         break;
1782       case OPT_DBL:
1783       case OPT_FDBL:
1784         dv = strtod(cp,&end);
1785         if( *end ){
1786           if( err ){
1787             fprintf(err,"%sillegal character in floating-point argument.\n",emsg);
1788             errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
1789           }
1790           errcnt++;
1791         }
1792         break;
1793       case OPT_INT:
1794       case OPT_FINT:
1795         lv = strtol(cp,&end,0);
1796         if( *end ){
1797           if( err ){
1798             fprintf(err,"%sillegal character in integer argument.\n",emsg);
1799             errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
1800           }
1801           errcnt++;
1802         }
1803         break;
1804       case OPT_STR:
1805       case OPT_FSTR:
1806         sv = cp;
1807         break;
1808     }
1809     switch( op[j].type ){
1810       case OPT_FLAG:
1811       case OPT_FFLAG:
1812         break;
1813       case OPT_DBL:
1814         *(double*)(op[j].arg) = dv;
1815         break;
1816       case OPT_FDBL:
1817         (*(void(*)(double))(op[j].arg))(dv);
1818         break;
1819       case OPT_INT:
1820         *(int*)(op[j].arg) = lv;
1821         break;
1822       case OPT_FINT:
1823         (*(void(*)(int))(op[j].arg))((int)lv);
1824         break;
1825       case OPT_STR:
1826         *(char**)(op[j].arg) = sv;
1827         break;
1828       case OPT_FSTR:
1829         (*(void(*)(char *))(op[j].arg))(sv);
1830         break;
1831     }
1832   }
1833   return errcnt;
1834 }
1835 
OptInit(char ** a,struct s_options * o,FILE * err)1836 int OptInit(char **a, struct s_options *o, FILE *err)
1837 {
1838   int errcnt = 0;
1839   argv = a;
1840   op = o;
1841   errstream = err;
1842   if( argv && *argv && op ){
1843     int i;
1844     for(i=1; argv[i]; i++){
1845       if( argv[i][0]=='+' || argv[i][0]=='-' ){
1846         errcnt += handleflags(i,err);
1847       }else if( strchr(argv[i],'=') ){
1848         errcnt += handleswitch(i,err);
1849       }
1850     }
1851   }
1852   if( errcnt>0 ){
1853     fprintf(err,"Valid command line options for \"%s\" are:\n",*a);
1854     OptPrint();
1855     exit(1);
1856   }
1857   return 0;
1858 }
1859 
OptNArgs()1860 int OptNArgs(){
1861   int cnt = 0;
1862   int dashdash = 0;
1863   int i;
1864   if( argv!=0 && argv[0]!=0 ){
1865     for(i=1; argv[i]; i++){
1866       if( dashdash || !ISOPT(argv[i]) ) cnt++;
1867       if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1868     }
1869   }
1870   return cnt;
1871 }
1872 
OptArg(int n)1873 char *OptArg(int n)
1874 {
1875   int i;
1876   i = argindex(n);
1877   return i>=0 ? argv[i] : 0;
1878 }
1879 
OptErr(int n)1880 void OptErr(int n)
1881 {
1882   int i;
1883   i = argindex(n);
1884   if( i>=0 ) errline(i,0,errstream);
1885 }
1886 
OptPrint()1887 void OptPrint(){
1888   int i;
1889   int max, len;
1890   max = 0;
1891   for(i=0; op[i].label; i++){
1892     len = lemonStrlen(op[i].label) + 1;
1893     switch( op[i].type ){
1894       case OPT_FLAG:
1895       case OPT_FFLAG:
1896         break;
1897       case OPT_INT:
1898       case OPT_FINT:
1899         len += 9;       /* length of "<integer>" */
1900         break;
1901       case OPT_DBL:
1902       case OPT_FDBL:
1903         len += 6;       /* length of "<real>" */
1904         break;
1905       case OPT_STR:
1906       case OPT_FSTR:
1907         len += 8;       /* length of "<string>" */
1908         break;
1909     }
1910     if( len>max ) max = len;
1911   }
1912   for(i=0; op[i].label; i++){
1913     switch( op[i].type ){
1914       case OPT_FLAG:
1915       case OPT_FFLAG:
1916         fprintf(errstream,"  -%-*s  %s\n",max,op[i].label,op[i].message);
1917         break;
1918       case OPT_INT:
1919       case OPT_FINT:
1920         fprintf(errstream,"  %s=<integer>%*s  %s\n",op[i].label,
1921           (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message);
1922         break;
1923       case OPT_DBL:
1924       case OPT_FDBL:
1925         fprintf(errstream,"  %s=<real>%*s  %s\n",op[i].label,
1926           (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message);
1927         break;
1928       case OPT_STR:
1929       case OPT_FSTR:
1930         fprintf(errstream,"  %s=<string>%*s  %s\n",op[i].label,
1931           (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message);
1932         break;
1933     }
1934   }
1935 }
1936 /*********************** From the file "parse.c" ****************************/
1937 /*
1938 ** Input file parser for the LEMON parser generator.
1939 */
1940 
1941 /* The state of the parser */
1942 enum e_state {
1943   INITIALIZE,
1944   WAITING_FOR_DECL_OR_RULE,
1945   WAITING_FOR_DECL_KEYWORD,
1946   WAITING_FOR_DECL_ARG,
1947   WAITING_FOR_PRECEDENCE_SYMBOL,
1948   WAITING_FOR_ARROW,
1949   IN_RHS,
1950   LHS_ALIAS_1,
1951   LHS_ALIAS_2,
1952   LHS_ALIAS_3,
1953   RHS_ALIAS_1,
1954   RHS_ALIAS_2,
1955   PRECEDENCE_MARK_1,
1956   PRECEDENCE_MARK_2,
1957   RESYNC_AFTER_RULE_ERROR,
1958   RESYNC_AFTER_DECL_ERROR,
1959   WAITING_FOR_DESTRUCTOR_SYMBOL,
1960   WAITING_FOR_DATATYPE_SYMBOL,
1961   WAITING_FOR_FALLBACK_ID,
1962   WAITING_FOR_WILDCARD_ID
1963 };
1964 struct pstate {
1965   char *filename;       /* Name of the input file */
1966   int tokenlineno;      /* Linenumber at which current token starts */
1967   int errorcnt;         /* Number of errors so far */
1968   char *tokenstart;     /* Text of current token */
1969   struct lemon *gp;     /* Global state vector */
1970   enum e_state state;        /* The state of the parser */
1971   struct symbol *fallback;   /* The fallback token */
1972   struct symbol *lhs;        /* Left-hand side of current rule */
1973   const char *lhsalias;      /* Alias for the LHS */
1974   int nrhs;                  /* Number of right-hand side symbols seen */
1975   struct symbol *rhs[MAXRHS];  /* RHS symbols */
1976   const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */
1977   struct rule *prevrule;     /* Previous rule parsed */
1978   const char *declkeyword;   /* Keyword of a declaration */
1979   char **declargslot;        /* Where the declaration argument should be put */
1980   int insertLineMacro;       /* Add #line before declaration insert */
1981   int *decllinenoslot;       /* Where to write declaration line number */
1982   enum e_assoc declassoc;    /* Assign this association to decl arguments */
1983   int preccounter;           /* Assign this precedence to decl arguments */
1984   struct rule *firstrule;    /* Pointer to first rule in the grammar */
1985   struct rule *lastrule;     /* Pointer to the most recently parsed rule */
1986 };
1987 
1988 /* Parse a single token */
parseonetoken(struct pstate * psp)1989 static void parseonetoken(struct pstate *psp)
1990 {
1991   const char *x;
1992   x = Strsafe(psp->tokenstart);     /* Save the token permanently */
1993 #if 0
1994   printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno,
1995     x,psp->state);
1996 #endif
1997   switch( psp->state ){
1998     case INITIALIZE:
1999       psp->prevrule = 0;
2000       psp->preccounter = 0;
2001       psp->firstrule = psp->lastrule = 0;
2002       psp->gp->nrule = 0;
2003       /* Fall thru to next case */
2004     case WAITING_FOR_DECL_OR_RULE:
2005       if( x[0]=='%' ){
2006         psp->state = WAITING_FOR_DECL_KEYWORD;
2007       }else if( islower(x[0]) ){
2008         psp->lhs = Symbol_new(x);
2009         psp->nrhs = 0;
2010         psp->lhsalias = 0;
2011         psp->state = WAITING_FOR_ARROW;
2012       }else if( x[0]=='{' ){
2013         if( psp->prevrule==0 ){
2014           ErrorMsg(psp->filename,psp->tokenlineno,
2015 "There is no prior rule opon which to attach the code \
2016 fragment which begins on this line.");
2017           psp->errorcnt++;
2018 	}else if( psp->prevrule->code!=0 ){
2019           ErrorMsg(psp->filename,psp->tokenlineno,
2020 "Code fragment beginning on this line is not the first \
2021 to follow the previous rule.");
2022           psp->errorcnt++;
2023         }else{
2024           psp->prevrule->line = psp->tokenlineno;
2025           psp->prevrule->code = &x[1];
2026 	}
2027       }else if( x[0]=='[' ){
2028         psp->state = PRECEDENCE_MARK_1;
2029       }else{
2030         ErrorMsg(psp->filename,psp->tokenlineno,
2031           "Token \"%s\" should be either \"%%\" or a nonterminal name.",
2032           x);
2033         psp->errorcnt++;
2034       }
2035       break;
2036     case PRECEDENCE_MARK_1:
2037       if( !isupper(x[0]) ){
2038         ErrorMsg(psp->filename,psp->tokenlineno,
2039           "The precedence symbol must be a terminal.");
2040         psp->errorcnt++;
2041       }else if( psp->prevrule==0 ){
2042         ErrorMsg(psp->filename,psp->tokenlineno,
2043           "There is no prior rule to assign precedence \"[%s]\".",x);
2044         psp->errorcnt++;
2045       }else if( psp->prevrule->precsym!=0 ){
2046         ErrorMsg(psp->filename,psp->tokenlineno,
2047 "Precedence mark on this line is not the first \
2048 to follow the previous rule.");
2049         psp->errorcnt++;
2050       }else{
2051         psp->prevrule->precsym = Symbol_new(x);
2052       }
2053       psp->state = PRECEDENCE_MARK_2;
2054       break;
2055     case PRECEDENCE_MARK_2:
2056       if( x[0]!=']' ){
2057         ErrorMsg(psp->filename,psp->tokenlineno,
2058           "Missing \"]\" on precedence mark.");
2059         psp->errorcnt++;
2060       }
2061       psp->state = WAITING_FOR_DECL_OR_RULE;
2062       break;
2063     case WAITING_FOR_ARROW:
2064       if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2065         psp->state = IN_RHS;
2066       }else if( x[0]=='(' ){
2067         psp->state = LHS_ALIAS_1;
2068       }else{
2069         ErrorMsg(psp->filename,psp->tokenlineno,
2070           "Expected to see a \":\" following the LHS symbol \"%s\".",
2071           psp->lhs->name);
2072         psp->errorcnt++;
2073         psp->state = RESYNC_AFTER_RULE_ERROR;
2074       }
2075       break;
2076     case LHS_ALIAS_1:
2077       if( isalpha(x[0]) ){
2078         psp->lhsalias = x;
2079         psp->state = LHS_ALIAS_2;
2080       }else{
2081         ErrorMsg(psp->filename,psp->tokenlineno,
2082           "\"%s\" is not a valid alias for the LHS \"%s\"\n",
2083           x,psp->lhs->name);
2084         psp->errorcnt++;
2085         psp->state = RESYNC_AFTER_RULE_ERROR;
2086       }
2087       break;
2088     case LHS_ALIAS_2:
2089       if( x[0]==')' ){
2090         psp->state = LHS_ALIAS_3;
2091       }else{
2092         ErrorMsg(psp->filename,psp->tokenlineno,
2093           "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2094         psp->errorcnt++;
2095         psp->state = RESYNC_AFTER_RULE_ERROR;
2096       }
2097       break;
2098     case LHS_ALIAS_3:
2099       if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2100         psp->state = IN_RHS;
2101       }else{
2102         ErrorMsg(psp->filename,psp->tokenlineno,
2103           "Missing \"->\" following: \"%s(%s)\".",
2104            psp->lhs->name,psp->lhsalias);
2105         psp->errorcnt++;
2106         psp->state = RESYNC_AFTER_RULE_ERROR;
2107       }
2108       break;
2109     case IN_RHS:
2110       if( x[0]=='.' ){
2111         struct rule *rp;
2112         rp = (struct rule *)calloc( sizeof(struct rule) +
2113              sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1);
2114         if( rp==0 ){
2115           ErrorMsg(psp->filename,psp->tokenlineno,
2116             "Can't allocate enough memory for this rule.");
2117           psp->errorcnt++;
2118           psp->prevrule = 0;
2119 	}else{
2120           int i;
2121           rp->ruleline = psp->tokenlineno;
2122           rp->rhs = (struct symbol**)&rp[1];
2123           rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]);
2124           for(i=0; i<psp->nrhs; i++){
2125             rp->rhs[i] = psp->rhs[i];
2126             rp->rhsalias[i] = psp->alias[i];
2127 	  }
2128           rp->lhs = psp->lhs;
2129           rp->lhsalias = psp->lhsalias;
2130           rp->nrhs = psp->nrhs;
2131           rp->code = 0;
2132           rp->precsym = 0;
2133           rp->index = psp->gp->nrule++;
2134           rp->nextlhs = rp->lhs->rule;
2135           rp->lhs->rule = rp;
2136           rp->next = 0;
2137           if( psp->firstrule==0 ){
2138             psp->firstrule = psp->lastrule = rp;
2139 	  }else{
2140             psp->lastrule->next = rp;
2141             psp->lastrule = rp;
2142 	  }
2143           psp->prevrule = rp;
2144 	}
2145         psp->state = WAITING_FOR_DECL_OR_RULE;
2146       }else if( isalpha(x[0]) ){
2147         if( psp->nrhs>=MAXRHS ){
2148           ErrorMsg(psp->filename,psp->tokenlineno,
2149             "Too many symbols on RHS of rule beginning at \"%s\".",
2150             x);
2151           psp->errorcnt++;
2152           psp->state = RESYNC_AFTER_RULE_ERROR;
2153 	}else{
2154           psp->rhs[psp->nrhs] = Symbol_new(x);
2155           psp->alias[psp->nrhs] = 0;
2156           psp->nrhs++;
2157 	}
2158       }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){
2159         struct symbol *msp = psp->rhs[psp->nrhs-1];
2160         if( msp->type!=MULTITERMINAL ){
2161           struct symbol *origsp = msp;
2162           msp = (struct symbol *) calloc(1,sizeof(*msp));
2163           memset(msp, 0, sizeof(*msp));
2164           msp->type = MULTITERMINAL;
2165           msp->nsubsym = 1;
2166           msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*));
2167           msp->subsym[0] = origsp;
2168           msp->name = origsp->name;
2169           psp->rhs[psp->nrhs-1] = msp;
2170         }
2171         msp->nsubsym++;
2172         msp->subsym = (struct symbol **) realloc(msp->subsym,
2173           sizeof(struct symbol*)*msp->nsubsym);
2174         msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]);
2175         if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){
2176           ErrorMsg(psp->filename,psp->tokenlineno,
2177             "Cannot form a compound containing a non-terminal");
2178           psp->errorcnt++;
2179         }
2180       }else if( x[0]=='(' && psp->nrhs>0 ){
2181         psp->state = RHS_ALIAS_1;
2182       }else{
2183         ErrorMsg(psp->filename,psp->tokenlineno,
2184           "Illegal character on RHS of rule: \"%s\".",x);
2185         psp->errorcnt++;
2186         psp->state = RESYNC_AFTER_RULE_ERROR;
2187       }
2188       break;
2189     case RHS_ALIAS_1:
2190       if( isalpha(x[0]) ){
2191         psp->alias[psp->nrhs-1] = x;
2192         psp->state = RHS_ALIAS_2;
2193       }else{
2194         ErrorMsg(psp->filename,psp->tokenlineno,
2195           "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
2196           x,psp->rhs[psp->nrhs-1]->name);
2197         psp->errorcnt++;
2198         psp->state = RESYNC_AFTER_RULE_ERROR;
2199       }
2200       break;
2201     case RHS_ALIAS_2:
2202       if( x[0]==')' ){
2203         psp->state = IN_RHS;
2204       }else{
2205         ErrorMsg(psp->filename,psp->tokenlineno,
2206           "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2207         psp->errorcnt++;
2208         psp->state = RESYNC_AFTER_RULE_ERROR;
2209       }
2210       break;
2211     case WAITING_FOR_DECL_KEYWORD:
2212       if( isalpha(x[0]) ){
2213         psp->declkeyword = x;
2214         psp->declargslot = 0;
2215         psp->decllinenoslot = 0;
2216         psp->insertLineMacro = 1;
2217         psp->state = WAITING_FOR_DECL_ARG;
2218         if( strcmp(x,"name")==0 ){
2219           psp->declargslot = &(psp->gp->name);
2220           psp->insertLineMacro = 0;
2221 	}else if( strcmp(x,"include")==0 ){
2222           psp->declargslot = &(psp->gp->include);
2223 	}else if( strcmp(x,"code")==0 ){
2224           psp->declargslot = &(psp->gp->extracode);
2225 	}else if( strcmp(x,"token_destructor")==0 ){
2226           psp->declargslot = &psp->gp->tokendest;
2227 	}else if( strcmp(x,"default_destructor")==0 ){
2228           psp->declargslot = &psp->gp->vardest;
2229 	}else if( strcmp(x,"token_prefix")==0 ){
2230           psp->declargslot = &psp->gp->tokenprefix;
2231           psp->insertLineMacro = 0;
2232 	}else if( strcmp(x,"syntax_error")==0 ){
2233           psp->declargslot = &(psp->gp->error);
2234 	}else if( strcmp(x,"parse_accept")==0 ){
2235           psp->declargslot = &(psp->gp->accept);
2236 	}else if( strcmp(x,"parse_failure")==0 ){
2237           psp->declargslot = &(psp->gp->failure);
2238 	}else if( strcmp(x,"stack_overflow")==0 ){
2239           psp->declargslot = &(psp->gp->overflow);
2240         }else if( strcmp(x,"extra_argument")==0 ){
2241           psp->declargslot = &(psp->gp->arg);
2242           psp->insertLineMacro = 0;
2243         }else if( strcmp(x,"token_type")==0 ){
2244           psp->declargslot = &(psp->gp->tokentype);
2245           psp->insertLineMacro = 0;
2246         }else if( strcmp(x,"default_type")==0 ){
2247           psp->declargslot = &(psp->gp->vartype);
2248           psp->insertLineMacro = 0;
2249         }else if( strcmp(x,"stack_size")==0 ){
2250           psp->declargslot = &(psp->gp->stacksize);
2251           psp->insertLineMacro = 0;
2252         }else if( strcmp(x,"start_symbol")==0 ){
2253           psp->declargslot = &(psp->gp->start);
2254           psp->insertLineMacro = 0;
2255         }else if( strcmp(x,"left")==0 ){
2256           psp->preccounter++;
2257           psp->declassoc = LEFT;
2258           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2259         }else if( strcmp(x,"right")==0 ){
2260           psp->preccounter++;
2261           psp->declassoc = RIGHT;
2262           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2263         }else if( strcmp(x,"nonassoc")==0 ){
2264           psp->preccounter++;
2265           psp->declassoc = NONE;
2266           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2267 	}else if( strcmp(x,"destructor")==0 ){
2268           psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL;
2269 	}else if( strcmp(x,"type")==0 ){
2270           psp->state = WAITING_FOR_DATATYPE_SYMBOL;
2271         }else if( strcmp(x,"fallback")==0 ){
2272           psp->fallback = 0;
2273           psp->state = WAITING_FOR_FALLBACK_ID;
2274         }else if( strcmp(x,"wildcard")==0 ){
2275           psp->state = WAITING_FOR_WILDCARD_ID;
2276         }else{
2277           ErrorMsg(psp->filename,psp->tokenlineno,
2278             "Unknown declaration keyword: \"%%%s\".",x);
2279           psp->errorcnt++;
2280           psp->state = RESYNC_AFTER_DECL_ERROR;
2281 	}
2282       }else{
2283         ErrorMsg(psp->filename,psp->tokenlineno,
2284           "Illegal declaration keyword: \"%s\".",x);
2285         psp->errorcnt++;
2286         psp->state = RESYNC_AFTER_DECL_ERROR;
2287       }
2288       break;
2289     case WAITING_FOR_DESTRUCTOR_SYMBOL:
2290       if( !isalpha(x[0]) ){
2291         ErrorMsg(psp->filename,psp->tokenlineno,
2292           "Symbol name missing after %%destructor keyword");
2293         psp->errorcnt++;
2294         psp->state = RESYNC_AFTER_DECL_ERROR;
2295       }else{
2296         struct symbol *sp = Symbol_new(x);
2297         psp->declargslot = &sp->destructor;
2298         psp->decllinenoslot = &sp->destLineno;
2299         psp->insertLineMacro = 1;
2300         psp->state = WAITING_FOR_DECL_ARG;
2301       }
2302       break;
2303     case WAITING_FOR_DATATYPE_SYMBOL:
2304       if( !isalpha(x[0]) ){
2305         ErrorMsg(psp->filename,psp->tokenlineno,
2306           "Symbol name missing after %%type keyword");
2307         psp->errorcnt++;
2308         psp->state = RESYNC_AFTER_DECL_ERROR;
2309       }else{
2310         struct symbol *sp = Symbol_find(x);
2311         if((sp) && (sp->datatype)){
2312           ErrorMsg(psp->filename,psp->tokenlineno,
2313             "Symbol %%type \"%s\" already defined", x);
2314           psp->errorcnt++;
2315           psp->state = RESYNC_AFTER_DECL_ERROR;
2316         }else{
2317           if (!sp){
2318             sp = Symbol_new(x);
2319           }
2320           psp->declargslot = &sp->datatype;
2321           psp->insertLineMacro = 0;
2322           psp->state = WAITING_FOR_DECL_ARG;
2323         }
2324       }
2325       break;
2326     case WAITING_FOR_PRECEDENCE_SYMBOL:
2327       if( x[0]=='.' ){
2328         psp->state = WAITING_FOR_DECL_OR_RULE;
2329       }else if( isupper(x[0]) ){
2330         struct symbol *sp;
2331         sp = Symbol_new(x);
2332         if( sp->prec>=0 ){
2333           ErrorMsg(psp->filename,psp->tokenlineno,
2334             "Symbol \"%s\" has already be given a precedence.",x);
2335           psp->errorcnt++;
2336 	}else{
2337           sp->prec = psp->preccounter;
2338           sp->assoc = psp->declassoc;
2339 	}
2340       }else{
2341         ErrorMsg(psp->filename,psp->tokenlineno,
2342           "Can't assign a precedence to \"%s\".",x);
2343         psp->errorcnt++;
2344       }
2345       break;
2346     case WAITING_FOR_DECL_ARG:
2347       if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){
2348         const char *zOld, *zNew;
2349         char *zBuf, *z;
2350         int nOld, n, nLine, nNew, nBack;
2351         int addLineMacro;
2352         char zLine[50];
2353         zNew = x;
2354         if( zNew[0]=='"' || zNew[0]=='{' ) zNew++;
2355         nNew = lemonStrlen(zNew);
2356         if( *psp->declargslot ){
2357           zOld = *psp->declargslot;
2358         }else{
2359           zOld = "";
2360         }
2361         nOld = lemonStrlen(zOld);
2362         n = nOld + nNew + 20;
2363         addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro &&
2364                         (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0);
2365         if( addLineMacro ){
2366           for(z=psp->filename, nBack=0; *z; z++){
2367             if( *z=='\\' ) nBack++;
2368           }
2369           sprintf(zLine, "#line %d ", psp->tokenlineno);
2370           nLine = lemonStrlen(zLine);
2371           n += nLine + lemonStrlen(psp->filename) + nBack;
2372         }
2373         *psp->declargslot = (char *) realloc(*psp->declargslot, n);
2374         zBuf = *psp->declargslot + nOld;
2375         if( addLineMacro ){
2376           if( nOld && zBuf[-1]!='\n' ){
2377             *(zBuf++) = '\n';
2378           }
2379           memcpy(zBuf, zLine, nLine);
2380           zBuf += nLine;
2381           *(zBuf++) = '"';
2382           for(z=psp->filename; *z; z++){
2383             if( *z=='\\' ){
2384               *(zBuf++) = '\\';
2385             }
2386             *(zBuf++) = *z;
2387           }
2388           *(zBuf++) = '"';
2389           *(zBuf++) = '\n';
2390         }
2391         if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){
2392           psp->decllinenoslot[0] = psp->tokenlineno;
2393         }
2394         memcpy(zBuf, zNew, nNew);
2395         zBuf += nNew;
2396         *zBuf = 0;
2397         psp->state = WAITING_FOR_DECL_OR_RULE;
2398       }else{
2399         ErrorMsg(psp->filename,psp->tokenlineno,
2400           "Illegal argument to %%%s: %s",psp->declkeyword,x);
2401         psp->errorcnt++;
2402         psp->state = RESYNC_AFTER_DECL_ERROR;
2403       }
2404       break;
2405     case WAITING_FOR_FALLBACK_ID:
2406       if( x[0]=='.' ){
2407         psp->state = WAITING_FOR_DECL_OR_RULE;
2408       }else if( !isupper(x[0]) ){
2409         ErrorMsg(psp->filename, psp->tokenlineno,
2410           "%%fallback argument \"%s\" should be a token", x);
2411         psp->errorcnt++;
2412       }else{
2413         struct symbol *sp = Symbol_new(x);
2414         if( psp->fallback==0 ){
2415           psp->fallback = sp;
2416         }else if( sp->fallback ){
2417           ErrorMsg(psp->filename, psp->tokenlineno,
2418             "More than one fallback assigned to token %s", x);
2419           psp->errorcnt++;
2420         }else{
2421           sp->fallback = psp->fallback;
2422           psp->gp->has_fallback = 1;
2423         }
2424       }
2425       break;
2426     case WAITING_FOR_WILDCARD_ID:
2427       if( x[0]=='.' ){
2428         psp->state = WAITING_FOR_DECL_OR_RULE;
2429       }else if( !isupper(x[0]) ){
2430         ErrorMsg(psp->filename, psp->tokenlineno,
2431           "%%wildcard argument \"%s\" should be a token", x);
2432         psp->errorcnt++;
2433       }else{
2434         struct symbol *sp = Symbol_new(x);
2435         if( psp->gp->wildcard==0 ){
2436           psp->gp->wildcard = sp;
2437         }else{
2438           ErrorMsg(psp->filename, psp->tokenlineno,
2439             "Extra wildcard to token: %s", x);
2440           psp->errorcnt++;
2441         }
2442       }
2443       break;
2444     case RESYNC_AFTER_RULE_ERROR:
2445 /*      if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2446 **      break; */
2447     case RESYNC_AFTER_DECL_ERROR:
2448       if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2449       if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
2450       break;
2451   }
2452 }
2453 
2454 /* Run the preprocessor over the input file text.  The global variables
2455 ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined
2456 ** macros.  This routine looks for "%ifdef" and "%ifndef" and "%endif" and
2457 ** comments them out.  Text in between is also commented out as appropriate.
2458 */
preprocess_input(char * z)2459 static void preprocess_input(char *z){
2460   int i, j, k, n;
2461   int exclude = 0;
2462   int start = 0;
2463   int lineno = 1;
2464   int start_lineno = 1;
2465   for(i=0; z[i]; i++){
2466     if( z[i]=='\n' ) lineno++;
2467     if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue;
2468     if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){
2469       if( exclude ){
2470         exclude--;
2471         if( exclude==0 ){
2472           for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' ';
2473         }
2474       }
2475       for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2476     }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6]))
2477           || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){
2478       if( exclude ){
2479         exclude++;
2480       }else{
2481         for(j=i+7; isspace(z[j]); j++){}
2482         for(n=0; z[j+n] && !isspace(z[j+n]); n++){}
2483         exclude = 1;
2484         for(k=0; k<nDefine; k++){
2485           if( strncmp(azDefine[k],&z[j],n)==0 && lemonStrlen(azDefine[k])==n ){
2486             exclude = 0;
2487             break;
2488           }
2489         }
2490         if( z[i+3]=='n' ) exclude = !exclude;
2491         if( exclude ){
2492           start = i;
2493           start_lineno = lineno;
2494         }
2495       }
2496       for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2497     }
2498   }
2499   if( exclude ){
2500     fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno);
2501     exit(1);
2502   }
2503 }
2504 
2505 /* In spite of its name, this function is really a scanner.  It read
2506 ** in the entire input file (all at once) then tokenizes it.  Each
2507 ** token is passed to the function "parseonetoken" which builds all
2508 ** the appropriate data structures in the global state vector "gp".
2509 */
Parse(struct lemon * gp)2510 void Parse(struct lemon *gp)
2511 {
2512   struct pstate ps;
2513   FILE *fp;
2514   char *filebuf;
2515   int filesize;
2516   int lineno;
2517   int c;
2518   char *cp, *nextcp;
2519   int startline = 0;
2520 
2521   memset(&ps, '\0', sizeof(ps));
2522   ps.gp = gp;
2523   ps.filename = gp->filename;
2524   ps.errorcnt = 0;
2525   ps.state = INITIALIZE;
2526 
2527   /* Begin by reading the input file */
2528   fp = fopen(ps.filename,"rb");
2529   if( fp==0 ){
2530     ErrorMsg(ps.filename,0,"Can't open this file for reading.");
2531     gp->errorcnt++;
2532     return;
2533   }
2534   fseek(fp,0,2);
2535   filesize = ftell(fp);
2536   rewind(fp);
2537   filebuf = (char *)malloc( filesize+1 );
2538   if( filebuf==0 ){
2539     ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.",
2540       filesize+1);
2541     gp->errorcnt++;
2542     return;
2543   }
2544   if( fread(filebuf,1,filesize,fp)!=filesize ){
2545     ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.",
2546       filesize);
2547     free(filebuf);
2548     gp->errorcnt++;
2549     return;
2550   }
2551   fclose(fp);
2552   filebuf[filesize] = 0;
2553 
2554   /* Make an initial pass through the file to handle %ifdef and %ifndef */
2555   preprocess_input(filebuf);
2556 
2557   /* Now scan the text of the input file */
2558   lineno = 1;
2559   for(cp=filebuf; (c= *cp)!=0; ){
2560     if( c=='\n' ) lineno++;              /* Keep track of the line number */
2561     if( isspace(c) ){ cp++; continue; }  /* Skip all white space */
2562     if( c=='/' && cp[1]=='/' ){          /* Skip C++ style comments */
2563       cp+=2;
2564       while( (c= *cp)!=0 && c!='\n' ) cp++;
2565       continue;
2566     }
2567     if( c=='/' && cp[1]=='*' ){          /* Skip C style comments */
2568       cp+=2;
2569       while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){
2570         if( c=='\n' ) lineno++;
2571         cp++;
2572       }
2573       if( c ) cp++;
2574       continue;
2575     }
2576     ps.tokenstart = cp;                /* Mark the beginning of the token */
2577     ps.tokenlineno = lineno;           /* Linenumber on which token begins */
2578     if( c=='\"' ){                     /* String literals */
2579       cp++;
2580       while( (c= *cp)!=0 && c!='\"' ){
2581         if( c=='\n' ) lineno++;
2582         cp++;
2583       }
2584       if( c==0 ){
2585         ErrorMsg(ps.filename,startline,
2586 "String starting on this line is not terminated before the end of the file.");
2587         ps.errorcnt++;
2588         nextcp = cp;
2589       }else{
2590         nextcp = cp+1;
2591       }
2592     }else if( c=='{' ){               /* A block of C code */
2593       int level;
2594       cp++;
2595       for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){
2596         if( c=='\n' ) lineno++;
2597         else if( c=='{' ) level++;
2598         else if( c=='}' ) level--;
2599         else if( c=='/' && cp[1]=='*' ){  /* Skip comments */
2600           int prevc;
2601           cp = &cp[2];
2602           prevc = 0;
2603           while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){
2604             if( c=='\n' ) lineno++;
2605             prevc = c;
2606             cp++;
2607 	  }
2608 	}else if( c=='/' && cp[1]=='/' ){  /* Skip C++ style comments too */
2609           cp = &cp[2];
2610           while( (c= *cp)!=0 && c!='\n' ) cp++;
2611           if( c ) lineno++;
2612 	}else if( c=='\'' || c=='\"' ){    /* String a character literals */
2613           int startchar, prevc;
2614           startchar = c;
2615           prevc = 0;
2616           for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){
2617             if( c=='\n' ) lineno++;
2618             if( prevc=='\\' ) prevc = 0;
2619             else              prevc = c;
2620 	  }
2621 	}
2622       }
2623       if( c==0 ){
2624         ErrorMsg(ps.filename,ps.tokenlineno,
2625 "C code starting on this line is not terminated before the end of the file.");
2626         ps.errorcnt++;
2627         nextcp = cp;
2628       }else{
2629         nextcp = cp+1;
2630       }
2631     }else if( isalnum(c) ){          /* Identifiers */
2632       while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2633       nextcp = cp;
2634     }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
2635       cp += 3;
2636       nextcp = cp;
2637     }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){
2638       cp += 2;
2639       while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2640       nextcp = cp;
2641     }else{                          /* All other (one character) operators */
2642       cp++;
2643       nextcp = cp;
2644     }
2645     c = *cp;
2646     *cp = 0;                        /* Null terminate the token */
2647     parseonetoken(&ps);             /* Parse the token */
2648     *cp = c;                        /* Restore the buffer */
2649     cp = nextcp;
2650   }
2651   free(filebuf);                    /* Release the buffer after parsing */
2652   gp->rule = ps.firstrule;
2653   gp->errorcnt = ps.errorcnt;
2654 }
2655 /*************************** From the file "plink.c" *********************/
2656 /*
2657 ** Routines processing configuration follow-set propagation links
2658 ** in the LEMON parser generator.
2659 */
2660 static struct plink *plink_freelist = 0;
2661 
2662 /* Allocate a new plink */
Plink_new()2663 struct plink *Plink_new(){
2664   struct plink *newlink;
2665 
2666   if( plink_freelist==0 ){
2667     int i;
2668     int amt = 100;
2669     plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) );
2670     if( plink_freelist==0 ){
2671       fprintf(stderr,
2672       "Unable to allocate memory for a new follow-set propagation link.\n");
2673       exit(1);
2674     }
2675     for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1];
2676     plink_freelist[amt-1].next = 0;
2677   }
2678   newlink = plink_freelist;
2679   plink_freelist = plink_freelist->next;
2680   return newlink;
2681 }
2682 
2683 /* Add a plink to a plink list */
Plink_add(struct plink ** plpp,struct config * cfp)2684 void Plink_add(struct plink **plpp, struct config *cfp)
2685 {
2686   struct plink *newlink;
2687   newlink = Plink_new();
2688   newlink->next = *plpp;
2689   *plpp = newlink;
2690   newlink->cfp = cfp;
2691 }
2692 
2693 /* Transfer every plink on the list "from" to the list "to" */
Plink_copy(struct plink ** to,struct plink * from)2694 void Plink_copy(struct plink **to, struct plink *from)
2695 {
2696   struct plink *nextpl;
2697   while( from ){
2698     nextpl = from->next;
2699     from->next = *to;
2700     *to = from;
2701     from = nextpl;
2702   }
2703 }
2704 
2705 /* Delete every plink on the list */
Plink_delete(struct plink * plp)2706 void Plink_delete(struct plink *plp)
2707 {
2708   struct plink *nextpl;
2709 
2710   while( plp ){
2711     nextpl = plp->next;
2712     plp->next = plink_freelist;
2713     plink_freelist = plp;
2714     plp = nextpl;
2715   }
2716 }
2717 /*********************** From the file "report.c" **************************/
2718 /*
2719 ** Procedures for generating reports and tables in the LEMON parser generator.
2720 */
2721 
2722 /* Generate a filename with the given suffix.  Space to hold the
2723 ** name comes from malloc() and must be freed by the calling
2724 ** function.
2725 */
file_makename(struct lemon * lemp,const char * suffix)2726 PRIVATE char *file_makename(struct lemon *lemp, const char *suffix)
2727 {
2728   char *name;
2729   char *cp;
2730   char *filename;
2731 
2732   filename = lemp->filename;
2733   if (local_out_dir) {
2734     char *ptr;
2735 #ifdef __WIN32__
2736     for (ptr = lemp->filename + strlen (lemp->filename) - 1; (ptr > lemp->filename) && (*ptr != '\\'); ptr--);
2737 #else
2738     for (ptr = lemp->filename + strlen (lemp->filename) - 1; (ptr > lemp->filename) && (*ptr != '/'); ptr--);
2739 #endif
2740     if (ptr > lemp->filename)
2741      filename = ptr + 1;
2742   }
2743 
2744   name = (char*)malloc( lemonStrlen(filename) + lemonStrlen(suffix) + 5 );
2745   if( name==0 ){
2746     fprintf(stderr,"Can't allocate space for a filename.\n");
2747     exit(1);
2748   }
2749   strcpy(name,filename);
2750   cp = strrchr(name,'.');
2751   if( cp ) *cp = 0;
2752   strcat(name,suffix);
2753   return name;
2754 }
2755 
2756 /* Open a file with a name based on the name of the input file,
2757 ** but with a different (specified) suffix, and return a pointer
2758 ** to the stream */
file_open(struct lemon * lemp,const char * suffix,const char * mode)2759 PRIVATE FILE *file_open(
2760   struct lemon *lemp,
2761   const char *suffix,
2762   const char *mode
2763 ){
2764   FILE *fp;
2765 
2766   if( lemp->outname ) free(lemp->outname);
2767   lemp->outname = file_makename(lemp, suffix);
2768   fp = fopen(lemp->outname,mode);
2769   if( fp==0 && *mode=='w' ){
2770     fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
2771     lemp->errorcnt++;
2772     return 0;
2773   }
2774 
2775   /* Add files we create to a list, so we can delete them if we fail. This
2776   ** is to keep makefiles from getting confused. We don't include .out files,
2777   ** though: this is debug information, and you don't want it deleted if there
2778   ** was an error you need to track down.
2779   */
2780   if(( *mode=='w' ) && (strcmp(suffix, ".out") != 0)){
2781     const char **ptr = (const char **)
2782         realloc(made_files, sizeof (const char **) * (made_files_count + 1));
2783     const char *fname = Strsafe(lemp->outname);
2784     if ((ptr == NULL) || (fname == NULL)) {
2785         free(ptr);
2786         memory_error();
2787     }
2788     made_files = ptr;
2789     made_files[made_files_count++] = fname;
2790   }
2791   return fp;
2792 }
2793 
2794 /* Duplicate the input file without comments and without actions
2795 ** on rules */
Reprint(struct lemon * lemp)2796 void Reprint(struct lemon *lemp)
2797 {
2798   struct rule *rp;
2799   struct symbol *sp;
2800   int i, j, maxlen, len, ncolumns, skip;
2801   printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename);
2802   maxlen = 10;
2803   for(i=0; i<lemp->nsymbol; i++){
2804     sp = lemp->symbols[i];
2805     len = lemonStrlen(sp->name);
2806     if( len>maxlen ) maxlen = len;
2807   }
2808   ncolumns = 76/(maxlen+5);
2809   if( ncolumns<1 ) ncolumns = 1;
2810   skip = (lemp->nsymbol + ncolumns - 1)/ncolumns;
2811   for(i=0; i<skip; i++){
2812     printf("//");
2813     for(j=i; j<lemp->nsymbol; j+=skip){
2814       sp = lemp->symbols[j];
2815       assert( sp->index==j );
2816       printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
2817     }
2818     printf("\n");
2819   }
2820   for(rp=lemp->rule; rp; rp=rp->next){
2821     printf("%s",rp->lhs->name);
2822     /*    if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
2823     printf(" ::=");
2824     for(i=0; i<rp->nrhs; i++){
2825       sp = rp->rhs[i];
2826       printf(" %s", sp->name);
2827       if( sp->type==MULTITERMINAL ){
2828         for(j=1; j<sp->nsubsym; j++){
2829           printf("|%s", sp->subsym[j]->name);
2830         }
2831       }
2832       /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
2833     }
2834     printf(".");
2835     if( rp->precsym ) printf(" [%s]",rp->precsym->name);
2836     /* if( rp->code ) printf("\n    %s",rp->code); */
2837     printf("\n");
2838   }
2839 }
2840 
ConfigPrint(FILE * fp,struct config * cfp)2841 void ConfigPrint(FILE *fp, struct config *cfp)
2842 {
2843   struct rule *rp;
2844   struct symbol *sp;
2845   int i, j;
2846   rp = cfp->rp;
2847   fprintf(fp,"%s ::=",rp->lhs->name);
2848   for(i=0; i<=rp->nrhs; i++){
2849     if( i==cfp->dot ) fprintf(fp," *");
2850     if( i==rp->nrhs ) break;
2851     sp = rp->rhs[i];
2852     fprintf(fp," %s", sp->name);
2853     if( sp->type==MULTITERMINAL ){
2854       for(j=1; j<sp->nsubsym; j++){
2855         fprintf(fp,"|%s",sp->subsym[j]->name);
2856       }
2857     }
2858   }
2859 }
2860 
2861 /* #define TEST */
2862 #if 0
2863 /* Print a set */
2864 PRIVATE void SetPrint(out,set,lemp)
2865 FILE *out;
2866 char *set;
2867 struct lemon *lemp;
2868 {
2869   int i;
2870   char *spacer;
2871   spacer = "";
2872   fprintf(out,"%12s[","");
2873   for(i=0; i<lemp->nterminal; i++){
2874     if( SetFind(set,i) ){
2875       fprintf(out,"%s%s",spacer,lemp->symbols[i]->name);
2876       spacer = " ";
2877     }
2878   }
2879   fprintf(out,"]\n");
2880 }
2881 
2882 /* Print a plink chain */
2883 PRIVATE void PlinkPrint(out,plp,tag)
2884 FILE *out;
2885 struct plink *plp;
2886 char *tag;
2887 {
2888   while( plp ){
2889     fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum);
2890     ConfigPrint(out,plp->cfp);
2891     fprintf(out,"\n");
2892     plp = plp->next;
2893   }
2894 }
2895 #endif
2896 
2897 /* Print an action to the given file descriptor.  Return FALSE if
2898 ** nothing was actually printed.
2899 */
PrintAction(struct action * ap,FILE * fp,int indent)2900 int PrintAction(struct action *ap, FILE *fp, int indent){
2901   int result = 1;
2902   switch( ap->type ){
2903     case SHIFT:
2904       fprintf(fp,"%*s shift  %d",indent,ap->sp->name,ap->x.stp->statenum);
2905       break;
2906     case REDUCE:
2907       fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index);
2908       break;
2909     case ACCEPT:
2910       fprintf(fp,"%*s accept",indent,ap->sp->name);
2911       break;
2912     case ERROR:
2913       fprintf(fp,"%*s error",indent,ap->sp->name);
2914       break;
2915     case SRCONFLICT:
2916     case RRCONFLICT:
2917       fprintf(fp,"%*s reduce %-3d ** Parsing conflict **",
2918         indent,ap->sp->name,ap->x.rp->index);
2919       break;
2920     case SSCONFLICT:
2921       fprintf(fp,"%*s shift  %-3d ** Parsing conflict **",
2922         indent,ap->sp->name,ap->x.stp->statenum);
2923       break;
2924     case SH_RESOLVED:
2925       if( showPrecedenceConflict ){
2926         fprintf(fp,"%*s shift  %-3d -- dropped by precedence",
2927                 indent,ap->sp->name,ap->x.stp->statenum);
2928       }else{
2929         result = 0;
2930       }
2931       break;
2932     case RD_RESOLVED:
2933       if( showPrecedenceConflict ){
2934         fprintf(fp,"%*s reduce %-3d -- dropped by precedence",
2935                 indent,ap->sp->name,ap->x.rp->index);
2936       }else{
2937         result = 0;
2938       }
2939       break;
2940     case NOT_USED:
2941       result = 0;
2942       break;
2943   }
2944   return result;
2945 }
2946 
2947 /* Generate the "y.output" log file */
ReportOutput(struct lemon * lemp)2948 void ReportOutput(struct lemon *lemp)
2949 {
2950   int i;
2951   struct state *stp;
2952   struct config *cfp;
2953   struct action *ap;
2954   FILE *fp;
2955 
2956   fp = file_open(lemp,".out","wb");
2957   if( fp==0 ) return;
2958   for(i=0; i<lemp->nstate; i++){
2959     stp = lemp->sorted[i];
2960     fprintf(fp,"State %d:\n",stp->statenum);
2961     if( lemp->basisflag ) cfp=stp->bp;
2962     else                  cfp=stp->cfp;
2963     while( cfp ){
2964       char buf[20];
2965       if( cfp->dot==cfp->rp->nrhs ){
2966         sprintf(buf,"(%d)",cfp->rp->index);
2967         fprintf(fp,"    %5s ",buf);
2968       }else{
2969         fprintf(fp,"          ");
2970       }
2971       ConfigPrint(fp,cfp);
2972       fprintf(fp,"\n");
2973 #if 0
2974       SetPrint(fp,cfp->fws,lemp);
2975       PlinkPrint(fp,cfp->fplp,"To  ");
2976       PlinkPrint(fp,cfp->bplp,"From");
2977 #endif
2978       if( lemp->basisflag ) cfp=cfp->bp;
2979       else                  cfp=cfp->next;
2980     }
2981     fprintf(fp,"\n");
2982     for(ap=stp->ap; ap; ap=ap->next){
2983       if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
2984     }
2985     fprintf(fp,"\n");
2986   }
2987   fprintf(fp, "----------------------------------------------------\n");
2988   fprintf(fp, "Symbols:\n");
2989   for(i=0; i<lemp->nsymbol; i++){
2990     int j;
2991     struct symbol *sp;
2992 
2993     sp = lemp->symbols[i];
2994     fprintf(fp, "  %3d: %s", i, sp->name);
2995     if( sp->type==NONTERMINAL ){
2996       fprintf(fp, ":");
2997       if( sp->lambda ){
2998         fprintf(fp, " <lambda>");
2999       }
3000       for(j=0; j<lemp->nterminal; j++){
3001         if( sp->firstset && SetFind(sp->firstset, j) ){
3002           fprintf(fp, " %s", lemp->symbols[j]->name);
3003         }
3004       }
3005     }
3006     fprintf(fp, "\n");
3007   }
3008   fclose(fp);
3009   return;
3010 }
3011 
3012 /* Search for the file "name" which is in the same directory as
3013 ** the exacutable */
pathsearch(char * argv0,char * name,int modemask)3014 PRIVATE char *pathsearch(char *argv0, char *name, int modemask)
3015 {
3016   const char *pathlist;
3017   char *pathbufptr;
3018   char *pathbuf;
3019   char *path,*cp;
3020   char c;
3021 
3022 #ifdef __WIN32__
3023   cp = strrchr(argv0,'\\');
3024 #else
3025   cp = strrchr(argv0,'/');
3026 #endif
3027   if( cp ){
3028     c = *cp;
3029     *cp = 0;
3030     path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 );
3031     if( path ) sprintf(path,"%s/%s",argv0,name);
3032     *cp = c;
3033   }else{
3034     pathlist = getenv("PATH");
3035     if( pathlist==0 ) pathlist = ".:/bin:/usr/bin";
3036     pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 );
3037     path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 );
3038     if( (pathbuf != 0) && (path!=0) ){
3039       pathbufptr = pathbuf;
3040       strcpy(pathbuf, pathlist);
3041       while( *pathbuf ){
3042         cp = strchr(pathbuf,':');
3043         if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)];
3044         c = *cp;
3045         *cp = 0;
3046         sprintf(path,"%s/%s",pathbuf,name);
3047         *cp = c;
3048         if( c==0 ) pathbuf[0] = 0;
3049         else pathbuf = &cp[1];
3050         if( access(path,modemask)==0 ) break;
3051       }
3052       free(pathbufptr);
3053     }
3054   }
3055   return path;
3056 }
3057 
3058 /* Given an action, compute the integer value for that action
3059 ** which is to be put in the action table of the generated machine.
3060 ** Return negative if no action should be generated.
3061 */
compute_action(struct lemon * lemp,struct action * ap)3062 PRIVATE int compute_action(struct lemon *lemp, struct action *ap)
3063 {
3064   int act;
3065   switch( ap->type ){
3066     case SHIFT:  act = ap->x.stp->statenum;            break;
3067     case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
3068     case ERROR:  act = lemp->nstate + lemp->nrule;     break;
3069     case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
3070     default:     act = -1; break;
3071   }
3072   return act;
3073 }
3074 
3075 #define LINESIZE 1000
3076 /* The next cluster of routines are for reading the template file
3077 ** and writing the results to the generated parser */
3078 /* The first function transfers data from "in" to "out" until
3079 ** a line is seen which begins with "%%".  The line number is
3080 ** tracked.
3081 **
3082 ** if name!=0, then any word that begin with "Parse" is changed to
3083 ** begin with *name instead.
3084 */
tplt_xfer(char * name,FILE * in,FILE * out,int * lineno)3085 PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno)
3086 {
3087   int i, iStart;
3088   char line[LINESIZE];
3089   while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){
3090     (*lineno)++;
3091     iStart = 0;
3092     if( name ){
3093       for(i=0; line[i]; i++){
3094         if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0
3095           && (i==0 || !isalpha(line[i-1]))
3096         ){
3097           if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]);
3098           fprintf(out,"%s",name);
3099           i += 4;
3100           iStart = i+1;
3101         }
3102       }
3103     }
3104     fprintf(out,"%s",&line[iStart]);
3105   }
3106 }
3107 
3108 /* The next function finds the template file and opens it, returning
3109 ** a pointer to the opened file. */
tplt_open(struct lemon * lemp)3110 PRIVATE FILE *tplt_open(struct lemon *lemp)
3111 {
3112   static char templatename[] = "lempar.c";
3113   char buf[1000];
3114   FILE *in;
3115   char *tpltname;
3116   char *cp;
3117 
3118   /* first, see if user specified a template filename on the command line. */
3119   if (user_templatename != 0) {
3120     if( access(user_templatename,004)==-1 ){
3121       fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
3122         user_templatename);
3123       lemp->errorcnt++;
3124       return 0;
3125     }
3126     in = fopen(user_templatename,"rb");
3127     if( in==0 ){
3128       fprintf(stderr,"Can't open the template file \"%s\".\n",user_templatename);
3129       lemp->errorcnt++;
3130       return 0;
3131     }
3132     return in;
3133   }
3134 
3135   cp = strrchr(lemp->filename,'.');
3136   if( cp ){
3137     sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
3138   }else{
3139     sprintf(buf,"%s.lt",lemp->filename);
3140   }
3141   if( access(buf,004)==0 ){
3142     tpltname = buf;
3143   }else if( access(templatename,004)==0 ){
3144     tpltname = templatename;
3145   }else{
3146     tpltname = pathsearch(lemp->argv0,templatename,0);
3147   }
3148   if( tpltname==0 ){
3149     fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
3150     templatename);
3151     lemp->errorcnt++;
3152     return 0;
3153   }
3154   in = fopen(tpltname,"rb");
3155   if( in==0 ){
3156     fprintf(stderr,"Can't open the template file \"%s\".\n",templatename);
3157     lemp->errorcnt++;
3158     return 0;
3159   }
3160   return in;
3161 }
3162 
3163 /* Print a #line directive line to the output file. */
tplt_linedir(FILE * out,int lineno,char * filename)3164 PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename)
3165 {
3166   fprintf(out,"#line %d \"",lineno);
3167   while( *filename ){
3168     if( *filename == '\\' ) putc('\\',out);
3169     putc(*filename,out);
3170     filename++;
3171   }
3172   fprintf(out,"\"\n");
3173 }
3174 
3175 /* Print a string to the file and keep the linenumber up to date */
tplt_print(FILE * out,struct lemon * lemp,char * str,int * lineno)3176 PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno)
3177 {
3178   if( str==0 ) return;
3179   while( *str ){
3180     putc(*str,out);
3181     if( *str=='\n' ) (*lineno)++;
3182     str++;
3183   }
3184   if( str[-1]!='\n' ){
3185     putc('\n',out);
3186     (*lineno)++;
3187   }
3188   if (!lemp->nolinenosflag) {
3189     (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3190   }
3191   return;
3192 }
3193 
3194 /*
3195 ** The following routine emits code for the destructor for the
3196 ** symbol sp
3197 */
emit_destructor_code(FILE * out,struct symbol * sp,struct lemon * lemp,int * lineno)3198 void emit_destructor_code(
3199   FILE *out,
3200   struct symbol *sp,
3201   struct lemon *lemp,
3202   int *lineno
3203 ){
3204  char *cp = 0;
3205 
3206  if( sp->type==TERMINAL ){
3207    cp = lemp->tokendest;
3208    if( cp==0 ) return;
3209    fprintf(out,"{\n"); (*lineno)++;
3210  }else if( sp->destructor ){
3211    cp = sp->destructor;
3212    fprintf(out,"{\n"); (*lineno)++;
3213    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); }
3214  }else if( lemp->vardest ){
3215    cp = lemp->vardest;
3216    if( cp==0 ) return;
3217    fprintf(out,"{\n"); (*lineno)++;
3218  }else{
3219    assert( 0 );  /* Cannot happen */
3220  }
3221  for(; *cp; cp++){
3222    if( *cp=='$' && cp[1]=='$' ){
3223      fprintf(out,"(yypminor->yy%d)",sp->dtnum);
3224      cp++;
3225      continue;
3226    }
3227    if( *cp=='\n' ) (*lineno)++;
3228    fputc(*cp,out);
3229  }
3230  fprintf(out,"\n"); (*lineno)++;
3231  if (!lemp->nolinenosflag) {
3232    (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3233  }
3234  fprintf(out,"}\n"); (*lineno)++;
3235  return;
3236 }
3237 
3238 /*
3239 ** Return TRUE (non-zero) if the given symbol has a destructor.
3240 */
has_destructor(struct symbol * sp,struct lemon * lemp)3241 int has_destructor(struct symbol *sp, struct lemon *lemp)
3242 {
3243   int ret;
3244   if( sp->type==TERMINAL ){
3245     ret = lemp->tokendest!=0;
3246   }else{
3247     ret = lemp->vardest!=0 || sp->destructor!=0;
3248   }
3249   return ret;
3250 }
3251 
3252 /*
3253 ** Append text to a dynamically allocated string.  If zText is 0 then
3254 ** reset the string to be empty again.  Always return the complete text
3255 ** of the string (which is overwritten with each call).
3256 **
3257 ** n bytes of zText are stored.  If n==0 then all of zText up to the first
3258 ** \000 terminator is stored.  zText can contain up to two instances of
3259 ** %d.  The values of p1 and p2 are written into the first and second
3260 ** %d.
3261 **
3262 ** If n==-1, then the previous character is overwritten.
3263 */
append_str(const char * zText,int n,int p1,int p2)3264 PRIVATE char *append_str(const char *zText, int n, int p1, int p2){
3265   static char empty[1] = { 0 };
3266   static char *z = 0;
3267   static int alloced = 0;
3268   static int used = 0;
3269   int c;
3270   char zInt[40];
3271   if( zText==0 ){
3272     used = 0;
3273     return z;
3274   }
3275   if( n<=0 ){
3276     if( n<0 ){
3277       used += n;
3278       assert( used>=0 );
3279     }
3280     n = lemonStrlen(zText);
3281   }
3282   if( n+sizeof(zInt)*2+used >= alloced ){
3283     alloced = n + sizeof(zInt)*2 + used + 200;
3284     z = (char *) realloc(z,  alloced);
3285   }
3286   if( z==0 ) return empty;
3287   while( n-- > 0 ){
3288     c = *(zText++);
3289     if( c=='%' && n>0 && zText[0]=='d' ){
3290       sprintf(zInt, "%d", p1);
3291       p1 = p2;
3292       strcpy(&z[used], zInt);
3293       used += lemonStrlen(&z[used]);
3294       zText++;
3295       n--;
3296     }else{
3297       z[used++] = c;
3298     }
3299   }
3300   z[used] = 0;
3301   return z;
3302 }
3303 
3304 /*
3305 ** zCode is a string that is the action associated with a rule.  Expand
3306 ** the symbols in this string so that the refer to elements of the parser
3307 ** stack.
3308 */
translate_code(struct lemon * lemp,struct rule * rp)3309 PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){
3310   char *cp, *xp;
3311   int i;
3312   char lhsused = 0;    /* True if the LHS element has been used */
3313   char used[MAXRHS];   /* True for each RHS element which is used */
3314 
3315   for(i=0; i<rp->nrhs; i++) used[i] = 0;
3316   lhsused = 0;
3317 
3318   if( rp->code==0 ){
3319     static char newlinestr[2] = { '\n', '\0' };
3320     rp->code = newlinestr;
3321     rp->line = rp->ruleline;
3322   }
3323 
3324   append_str(0,0,0,0);
3325 
3326   /* This const cast is wrong but harmless, if we're careful. */
3327   for(cp=(char *)rp->code; *cp; cp++){
3328     if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){
3329       char saved;
3330       for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++);
3331       saved = *xp;
3332       *xp = 0;
3333       if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){
3334         append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0);
3335         cp = xp;
3336         lhsused = 1;
3337       }else{
3338         for(i=0; i<rp->nrhs; i++){
3339           if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
3340             if( cp!=rp->code && cp[-1]=='@' ){
3341               /* If the argument is of the form @X then substituted
3342               ** the token number of X, not the value of X */
3343               append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0);
3344             }else{
3345               struct symbol *sp = rp->rhs[i];
3346               int dtnum;
3347               if( sp->type==MULTITERMINAL ){
3348                 dtnum = sp->subsym[0]->dtnum;
3349               }else{
3350                 dtnum = sp->dtnum;
3351               }
3352               append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum);
3353             }
3354             cp = xp;
3355             used[i] = 1;
3356             break;
3357           }
3358         }
3359       }
3360       *xp = saved;
3361     }
3362     append_str(cp, 1, 0, 0);
3363   } /* End loop */
3364 
3365   /* Check to make sure the LHS has been used */
3366   if( rp->lhsalias && !lhsused ){
3367     ErrorMsg(lemp->filename,rp->ruleline,
3368       "Label \"%s\" for \"%s(%s)\" is never used.",
3369         rp->lhsalias,rp->lhs->name,rp->lhsalias);
3370     lemp->errorcnt++;
3371   }
3372 
3373   /* Generate destructor code for RHS symbols which are not used in the
3374   ** reduce code */
3375   for(i=0; i<rp->nrhs; i++){
3376     if( rp->rhsalias[i] && !used[i] ){
3377       ErrorMsg(lemp->filename,rp->ruleline,
3378         "Label %s for \"%s(%s)\" is never used.",
3379         rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]);
3380       lemp->errorcnt++;
3381     }else if( rp->rhsalias[i]==0 ){
3382       if( has_destructor(rp->rhs[i],lemp) ){
3383         append_str("  yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0,
3384            rp->rhs[i]->index,i-rp->nrhs+1);
3385       }else{
3386         /* No destructor defined for this term */
3387       }
3388     }
3389   }
3390   if( rp->code ){
3391     cp = append_str(0,0,0,0);
3392     rp->code = Strsafe(cp?cp:"");
3393   }
3394 }
3395 
3396 /*
3397 ** Generate code which executes when the rule "rp" is reduced.  Write
3398 ** the code to "out".  Make sure lineno stays up-to-date.
3399 */
emit_code(FILE * out,struct rule * rp,struct lemon * lemp,int * lineno)3400 PRIVATE void emit_code(
3401   FILE *out,
3402   struct rule *rp,
3403   struct lemon *lemp,
3404   int *lineno
3405 ){
3406  const char *cp;
3407 
3408  /* Generate code to do the reduce action */
3409  if( rp->code ){
3410    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); }
3411    fprintf(out,"{%s",rp->code);
3412    for(cp=rp->code; *cp; cp++){
3413      if( *cp=='\n' ) (*lineno)++;
3414    } /* End loop */
3415    fprintf(out,"}\n"); (*lineno)++;
3416    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); }
3417  } /* End if( rp->code ) */
3418 
3419  return;
3420 }
3421 
3422 /*
3423 ** Print the definition of the union used for the parser's data stack.
3424 ** This union contains fields for every possible data type for tokens
3425 ** and nonterminals.  In the process of computing and printing this
3426 ** union, also set the ".dtnum" field of every terminal and nonterminal
3427 ** symbol.
3428 */
print_stack_union(FILE * out,struct lemon * lemp,int * plineno,int mhflag)3429 void print_stack_union(
3430   FILE *out,                  /* The output stream */
3431   struct lemon *lemp,         /* The main info structure for this parser */
3432   int *plineno,               /* Pointer to the line number */
3433   int mhflag                  /* True if generating makeheaders output */
3434 ){
3435   int lineno = *plineno;    /* The line number of the output */
3436   char **types;             /* A hash table of datatypes */
3437   int arraysize;            /* Size of the "types" array */
3438   int maxdtlength;          /* Maximum length of any ".datatype" field. */
3439   char *stddt;              /* Standardized name for a datatype */
3440   int i,j;                  /* Loop counters */
3441   int hash;                 /* For hashing the name of a type */
3442   const char *name;         /* Name of the parser */
3443 
3444   /* Allocate and initialize types[] and allocate stddt[] */
3445   arraysize = lemp->nsymbol * 2;
3446   types = (char**)calloc( arraysize, sizeof(char*) );
3447   for(i=0; i<arraysize; i++) types[i] = 0;
3448   maxdtlength = 0;
3449   if( lemp->vartype ){
3450     maxdtlength = lemonStrlen(lemp->vartype);
3451   }
3452   for(i=0; i<lemp->nsymbol; i++){
3453     int len;
3454     struct symbol *sp = lemp->symbols[i];
3455     if( sp->datatype==0 ) continue;
3456     len = lemonStrlen(sp->datatype);
3457     if( len>maxdtlength ) maxdtlength = len;
3458   }
3459   stddt = (char*)malloc( maxdtlength*2 + 1 );
3460   if( types==0 || stddt==0 ){
3461     fprintf(stderr,"Out of memory.\n");
3462     exit(1);
3463   }
3464 
3465   /* Build a hash table of datatypes. The ".dtnum" field of each symbol
3466   ** is filled in with the hash index plus 1.  A ".dtnum" value of 0 is
3467   ** used for terminal symbols.  If there is no %default_type defined then
3468   ** 0 is also used as the .dtnum value for nonterminals which do not specify
3469   ** a datatype using the %type directive.
3470   */
3471   for(i=0; i<lemp->nsymbol; i++){
3472     struct symbol *sp = lemp->symbols[i];
3473     char *cp;
3474     if( sp==lemp->errsym ){
3475       sp->dtnum = arraysize+1;
3476       continue;
3477     }
3478     if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){
3479       sp->dtnum = 0;
3480       continue;
3481     }
3482     cp = sp->datatype;
3483     if( cp==0 ) cp = lemp->vartype;
3484     j = 0;
3485     while( isspace(*cp) ) cp++;
3486     while( *cp ) stddt[j++] = *cp++;
3487     while( j>0 && isspace(stddt[j-1]) ) j--;
3488     stddt[j] = 0;
3489     if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){
3490       sp->dtnum = 0;
3491       continue;
3492     }
3493     hash = 0;
3494     for(j=0; stddt[j]; j++){
3495       hash = hash*53 + stddt[j];
3496     }
3497     hash = (hash & 0x7fffffff)%arraysize;
3498     while( types[hash] ){
3499       if( strcmp(types[hash],stddt)==0 ){
3500         sp->dtnum = hash + 1;
3501         break;
3502       }
3503       hash++;
3504       if( hash>=arraysize ) hash = 0;
3505     }
3506     if( types[hash]==0 ){
3507       sp->dtnum = hash + 1;
3508       types[hash] = (char*)malloc( lemonStrlen(stddt)+1 );
3509       if( types[hash]==0 ){
3510         fprintf(stderr,"Out of memory.\n");
3511         exit(1);
3512       }
3513       strcpy(types[hash],stddt);
3514     }
3515   }
3516 
3517   /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */
3518   name = lemp->name ? lemp->name : "Parse";
3519   lineno = *plineno;
3520   if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; }
3521   fprintf(out,"#define %sTOKENTYPE %s\n",name,
3522     lemp->tokentype?lemp->tokentype:"void*");  lineno++;
3523   if( mhflag ){ fprintf(out,"#endif\n"); lineno++; }
3524   fprintf(out,"typedef union {\n"); lineno++;
3525   fprintf(out,"  int yyinit;\n"); lineno++;
3526   fprintf(out,"  %sTOKENTYPE yy0;\n",name); lineno++;
3527   for(i=0; i<arraysize; i++){
3528     if( types[i]==0 ) continue;
3529     fprintf(out,"  %s yy%d;\n",types[i],i+1); lineno++;
3530     free(types[i]);
3531   }
3532   if( lemp->errsym->useCnt ){
3533     fprintf(out,"  int yy%d;\n",lemp->errsym->dtnum); lineno++;
3534   }
3535   free(stddt);
3536   free(types);
3537   fprintf(out,"} YYMINORTYPE;\n"); lineno++;
3538   *plineno = lineno;
3539 }
3540 
3541 /*
3542 ** Return the name of a C datatype able to represent values between
3543 ** lwr and upr, inclusive.
3544 */
minimum_size_type(int lwr,int upr)3545 static const char *minimum_size_type(int lwr, int upr){
3546   if( lwr>=0 ){
3547     if( upr<=255 ){
3548       return "unsigned char";
3549     }else if( upr<65535 ){
3550       return "unsigned short int";
3551     }else{
3552       return "unsigned int";
3553     }
3554   }else if( lwr>=-127 && upr<=127 ){
3555     return "signed char";
3556   }else if( lwr>=-32767 && upr<32767 ){
3557     return "short";
3558   }else{
3559     return "int";
3560   }
3561 }
3562 
3563 /*
3564 ** Each state contains a set of token transaction and a set of
3565 ** nonterminal transactions.  Each of these sets makes an instance
3566 ** of the following structure.  An array of these structures is used
3567 ** to order the creation of entries in the yy_action[] table.
3568 */
3569 struct axset {
3570   struct state *stp;   /* A pointer to a state */
3571   int isTkn;           /* True to use tokens.  False for non-terminals */
3572   int nAction;         /* Number of actions */
3573   int iOrder;          /* Original order of action sets */
3574 };
3575 
3576 /*
3577 ** Compare to axset structures for sorting purposes
3578 */
axset_compare(const void * a,const void * b)3579 static int axset_compare(const void *a, const void *b){
3580   struct axset *p1 = (struct axset*)a;
3581   struct axset *p2 = (struct axset*)b;
3582   int c;
3583   c = p2->nAction - p1->nAction;
3584   if( c==0 ){
3585     c = p2->iOrder - p1->iOrder;
3586   }
3587   assert( c!=0 || p1==p2 );
3588   return c;
3589 }
3590 
3591 /*
3592 ** Write text on "out" that describes the rule "rp".
3593 */
writeRuleText(FILE * out,struct rule * rp)3594 static void writeRuleText(FILE *out, struct rule *rp){
3595   int j;
3596   fprintf(out,"%s ::=", rp->lhs->name);
3597   for(j=0; j<rp->nrhs; j++){
3598     struct symbol *sp = rp->rhs[j];
3599     fprintf(out," %s", sp->name);
3600     if( sp->type==MULTITERMINAL ){
3601       int k;
3602       for(k=1; k<sp->nsubsym; k++){
3603         fprintf(out,"|%s",sp->subsym[k]->name);
3604       }
3605     }
3606   }
3607 }
3608 
3609 
3610 /* Generate C source code for the parser */
ReportTable(struct lemon * lemp,int mhflag)3611 void ReportTable(
3612   struct lemon *lemp,
3613   int mhflag     /* Output in makeheaders format if true */
3614 ){
3615   FILE *out, *in;
3616   char line[LINESIZE];
3617   int  lineno;
3618   struct state *stp;
3619   struct action *ap;
3620   struct rule *rp;
3621   struct acttab *pActtab;
3622   int i, j, n;
3623   const char *name;
3624   int mnTknOfst, mxTknOfst;
3625   int mnNtOfst, mxNtOfst;
3626   struct axset *ax;
3627 
3628   in = tplt_open(lemp);
3629   if( in==0 ) return;
3630   out = file_open(lemp,".c","wb");
3631   if( out==0 ){
3632     fclose(in);
3633     return;
3634   }
3635   lineno = 1;
3636   tplt_xfer(lemp->name,in,out,&lineno);
3637 
3638   /* Generate the include code, if any */
3639   tplt_print(out,lemp,lemp->include,&lineno);
3640   if( mhflag ){
3641     char *name = file_makename(lemp, ".h");
3642     fprintf(out,"#include \"%s\"\n", name); lineno++;
3643     free(name);
3644   }
3645   tplt_xfer(lemp->name,in,out,&lineno);
3646 
3647   /* Generate #defines for all tokens */
3648   if( mhflag ){
3649     const char *prefix;
3650     fprintf(out,"#if INTERFACE\n"); lineno++;
3651     if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3652     else                    prefix = "";
3653     for(i=1; i<lemp->nterminal; i++){
3654       fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3655       lineno++;
3656     }
3657     fprintf(out,"#endif\n"); lineno++;
3658   }
3659   tplt_xfer(lemp->name,in,out,&lineno);
3660 
3661   /* Generate the defines */
3662   fprintf(out,"#define YYCODETYPE %s\n",
3663     minimum_size_type(0, lemp->nsymbol+1)); lineno++;
3664   fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1);  lineno++;
3665   fprintf(out,"#define YYACTIONTYPE %s\n",
3666     minimum_size_type(0, lemp->nstate+lemp->nrule+5));  lineno++;
3667   if( lemp->wildcard ){
3668     fprintf(out,"#define YYWILDCARD %d\n",
3669        lemp->wildcard->index); lineno++;
3670   }
3671   print_stack_union(out,lemp,&lineno,mhflag);
3672   fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++;
3673   if( lemp->stacksize ){
3674     fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize);  lineno++;
3675   }else{
3676     fprintf(out,"#define YYSTACKDEPTH 100\n");  lineno++;
3677   }
3678   fprintf(out, "#endif\n"); lineno++;
3679   if( mhflag ){
3680     fprintf(out,"#if INTERFACE\n"); lineno++;
3681   }
3682   name = lemp->name ? lemp->name : "Parse";
3683   if( lemp->arg && lemp->arg[0] ){
3684     int i;
3685     i = lemonStrlen(lemp->arg);
3686     while( i>=1 && isspace(lemp->arg[i-1]) ) i--;
3687     while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--;
3688     fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg);  lineno++;
3689     fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg);  lineno++;
3690     fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n",
3691                  name,lemp->arg,&lemp->arg[i]);  lineno++;
3692     fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n",
3693                  name,&lemp->arg[i],&lemp->arg[i]);  lineno++;
3694   }else{
3695     fprintf(out,"#define %sARG_SDECL\n",name);  lineno++;
3696     fprintf(out,"#define %sARG_PDECL\n",name);  lineno++;
3697     fprintf(out,"#define %sARG_FETCH\n",name); lineno++;
3698     fprintf(out,"#define %sARG_STORE\n",name); lineno++;
3699   }
3700   if( mhflag ){
3701     fprintf(out,"#endif\n"); lineno++;
3702   }
3703   fprintf(out,"#define YYNSTATE %d\n",lemp->nstate);  lineno++;
3704   fprintf(out,"#define YYNRULE %d\n",lemp->nrule);  lineno++;
3705   if( lemp->errsym->useCnt ){
3706     fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index);  lineno++;
3707     fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum);  lineno++;
3708   }
3709   if( lemp->has_fallback ){
3710     fprintf(out,"#define YYFALLBACK 1\n");  lineno++;
3711   }
3712   tplt_xfer(lemp->name,in,out,&lineno);
3713 
3714   /* Generate the action table and its associates:
3715   **
3716   **  yy_action[]        A single table containing all actions.
3717   **  yy_lookahead[]     A table containing the lookahead for each entry in
3718   **                     yy_action.  Used to detect hash collisions.
3719   **  yy_shift_ofst[]    For each state, the offset into yy_action for
3720   **                     shifting terminals.
3721   **  yy_reduce_ofst[]   For each state, the offset into yy_action for
3722   **                     shifting non-terminals after a reduce.
3723   **  yy_default[]       Default action for each state.
3724   */
3725 
3726   /* Compute the actions on all states and count them up */
3727   ax = (struct axset *) calloc(lemp->nstate*2, sizeof(ax[0]));
3728   if( ax==0 ){
3729     fprintf(stderr,"malloc failed\n");
3730     exit(1);
3731   }
3732   for(i=0; i<lemp->nstate; i++){
3733     stp = lemp->sorted[i];
3734     ax[i*2].stp = stp;
3735     ax[i*2].isTkn = 1;
3736     ax[i*2].nAction = stp->nTknAct;
3737     ax[i*2+1].stp = stp;
3738     ax[i*2+1].isTkn = 0;
3739     ax[i*2+1].nAction = stp->nNtAct;
3740   }
3741   mxTknOfst = mnTknOfst = 0;
3742   mxNtOfst = mnNtOfst = 0;
3743 
3744   /* Compute the action table.  In order to try to keep the size of the
3745   ** action table to a minimum, the heuristic of placing the largest action
3746   ** sets first is used.
3747   */
3748   for(i=0; i<lemp->nstate*2; i++) ax[i].iOrder = i;
3749   qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
3750   pActtab = acttab_alloc();
3751   for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
3752     stp = ax[i].stp;
3753     if( ax[i].isTkn ){
3754       for(ap=stp->ap; ap; ap=ap->next){
3755         int action;
3756         if( ap->sp->index>=lemp->nterminal ) continue;
3757         action = compute_action(lemp, ap);
3758         if( action<0 ) continue;
3759         acttab_action(pActtab, ap->sp->index, action);
3760       }
3761       stp->iTknOfst = acttab_insert(pActtab);
3762       if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
3763       if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
3764     }else{
3765       for(ap=stp->ap; ap; ap=ap->next){
3766         int action;
3767         if( ap->sp->index<lemp->nterminal ) continue;
3768         if( ap->sp->index==lemp->nsymbol ) continue;
3769         action = compute_action(lemp, ap);
3770         if( action<0 ) continue;
3771         acttab_action(pActtab, ap->sp->index, action);
3772       }
3773       stp->iNtOfst = acttab_insert(pActtab);
3774       if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
3775       if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
3776     }
3777   }
3778   free(ax);
3779 
3780   /* Output the yy_action table */
3781   n = acttab_size(pActtab);
3782   fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++;
3783   fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++;
3784   for(i=j=0; i<n; i++){
3785     int action = acttab_yyaction(pActtab, i);
3786     if( action<0 ) action = lemp->nstate + lemp->nrule + 2;
3787     if( j==0 ) fprintf(out," /* %5d */ ", i);
3788     fprintf(out, " %4d,", action);
3789     if( j==9 || i==n-1 ){
3790       fprintf(out, "\n"); lineno++;
3791       j = 0;
3792     }else{
3793       j++;
3794     }
3795   }
3796   fprintf(out, "};\n"); lineno++;
3797 
3798   /* Output the yy_lookahead table */
3799   fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++;
3800   for(i=j=0; i<n; i++){
3801     int la = acttab_yylookahead(pActtab, i);
3802     if( la<0 ) la = lemp->nsymbol;
3803     if( j==0 ) fprintf(out," /* %5d */ ", i);
3804     fprintf(out, " %4d,", la);
3805     if( j==9 || i==n-1 ){
3806       fprintf(out, "\n"); lineno++;
3807       j = 0;
3808     }else{
3809       j++;
3810     }
3811   }
3812   fprintf(out, "};\n"); lineno++;
3813 
3814   /* Output the yy_shift_ofst[] table */
3815   fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++;
3816   n = lemp->nstate;
3817   while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--;
3818   fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++;
3819   fprintf(out, "#define YY_SHIFT_MIN   (%d)\n", mnTknOfst); lineno++;
3820   fprintf(out, "#define YY_SHIFT_MAX   (%d)\n", mxTknOfst); lineno++;
3821   fprintf(out, "static const %s yy_shift_ofst[] = {\n",
3822           minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++;
3823   for(i=j=0; i<n; i++){
3824     int ofst;
3825     stp = lemp->sorted[i];
3826     ofst = stp->iTknOfst;
3827     if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1;
3828     if( j==0 ) fprintf(out," /* %5d */ ", i);
3829     fprintf(out, " %4d,", ofst);
3830     if( j==9 || i==n-1 ){
3831       fprintf(out, "\n"); lineno++;
3832       j = 0;
3833     }else{
3834       j++;
3835     }
3836   }
3837   fprintf(out, "};\n"); lineno++;
3838 
3839   /* Output the yy_reduce_ofst[] table */
3840   fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
3841   n = lemp->nstate;
3842   while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--;
3843   fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++;
3844   fprintf(out, "#define YY_REDUCE_MIN   (%d)\n", mnNtOfst); lineno++;
3845   fprintf(out, "#define YY_REDUCE_MAX   (%d)\n", mxNtOfst); lineno++;
3846   fprintf(out, "static const %s yy_reduce_ofst[] = {\n",
3847           minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++;
3848   for(i=j=0; i<n; i++){
3849     int ofst;
3850     stp = lemp->sorted[i];
3851     ofst = stp->iNtOfst;
3852     if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1;
3853     if( j==0 ) fprintf(out," /* %5d */ ", i);
3854     fprintf(out, " %4d,", ofst);
3855     if( j==9 || i==n-1 ){
3856       fprintf(out, "\n"); lineno++;
3857       j = 0;
3858     }else{
3859       j++;
3860     }
3861   }
3862   fprintf(out, "};\n"); lineno++;
3863 
3864   /* Output the default action table */
3865   fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++;
3866   n = lemp->nstate;
3867   for(i=j=0; i<n; i++){
3868     stp = lemp->sorted[i];
3869     if( j==0 ) fprintf(out," /* %5d */ ", i);
3870     fprintf(out, " %4d,", stp->iDflt);
3871     if( j==9 || i==n-1 ){
3872       fprintf(out, "\n"); lineno++;
3873       j = 0;
3874     }else{
3875       j++;
3876     }
3877   }
3878   fprintf(out, "};\n"); lineno++;
3879   tplt_xfer(lemp->name,in,out,&lineno);
3880 
3881   /* Generate the table of fallback tokens.
3882   */
3883   if( lemp->has_fallback ){
3884     int mx = lemp->nterminal - 1;
3885     while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; }
3886     for(i=0; i<=mx; i++){
3887       struct symbol *p = lemp->symbols[i];
3888       if( p->fallback==0 ){
3889         fprintf(out, "    0,  /* %10s => nothing */\n", p->name);
3890       }else{
3891         fprintf(out, "  %3d,  /* %10s => %s */\n", p->fallback->index,
3892           p->name, p->fallback->name);
3893       }
3894       lineno++;
3895     }
3896   }
3897   tplt_xfer(lemp->name, in, out, &lineno);
3898 
3899   /* Generate a table containing the symbolic name of every symbol
3900   */
3901   for(i=0; i<lemp->nsymbol; i++){
3902     sprintf(line,"\"%s\",",lemp->symbols[i]->name);
3903     fprintf(out,"  %-15s",line);
3904     if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
3905   }
3906   if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
3907   tplt_xfer(lemp->name,in,out,&lineno);
3908 
3909   /* Generate a table containing a text string that describes every
3910   ** rule in the rule set of the grammar.  This information is used
3911   ** when tracing REDUCE actions.
3912   */
3913   for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
3914     assert( rp->index==i );
3915     fprintf(out," /* %3d */ \"", i);
3916     writeRuleText(out, rp);
3917     fprintf(out,"\",\n"); lineno++;
3918   }
3919   tplt_xfer(lemp->name,in,out,&lineno);
3920 
3921   /* Generate code which executes every time a symbol is popped from
3922   ** the stack while processing errors or while destroying the parser.
3923   ** (In other words, generate the %destructor actions)
3924   */
3925   if( lemp->tokendest ){
3926     int once = 1;
3927     for(i=0; i<lemp->nsymbol; i++){
3928       struct symbol *sp = lemp->symbols[i];
3929       if( sp==0 || sp->type!=TERMINAL ) continue;
3930       if( once ){
3931         fprintf(out, "      /* TERMINAL Destructor */\n"); lineno++;
3932         once = 0;
3933       }
3934       fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3935     }
3936     for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++);
3937     if( i<lemp->nsymbol ){
3938       emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3939       fprintf(out,"      break;\n"); lineno++;
3940     }
3941   }
3942   if( lemp->vardest ){
3943     struct symbol *dflt_sp = 0;
3944     int once = 1;
3945     for(i=0; i<lemp->nsymbol; i++){
3946       struct symbol *sp = lemp->symbols[i];
3947       if( sp==0 || sp->type==TERMINAL ||
3948           sp->index<=0 || sp->destructor!=0 ) continue;
3949       if( once ){
3950         fprintf(out, "      /* Default NON-TERMINAL Destructor */\n"); lineno++;
3951         once = 0;
3952       }
3953       fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3954       dflt_sp = sp;
3955     }
3956     if( dflt_sp!=0 ){
3957       emit_destructor_code(out,dflt_sp,lemp,&lineno);
3958     }
3959     fprintf(out,"      break;\n"); lineno++;
3960   }
3961   for(i=0; i<lemp->nsymbol; i++){
3962     struct symbol *sp = lemp->symbols[i];
3963     if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue;
3964     fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3965 
3966     /* Combine duplicate destructors into a single case */
3967     for(j=i+1; j<lemp->nsymbol; j++){
3968       struct symbol *sp2 = lemp->symbols[j];
3969       if( sp2 && sp2->type!=TERMINAL && sp2->destructor
3970           && sp2->dtnum==sp->dtnum
3971           && strcmp(sp->destructor,sp2->destructor)==0 ){
3972          fprintf(out,"    case %d: /* %s */\n",
3973                  sp2->index, sp2->name); lineno++;
3974          sp2->destructor = 0;
3975       }
3976     }
3977 
3978     emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3979     fprintf(out,"      break;\n"); lineno++;
3980   }
3981   tplt_xfer(lemp->name,in,out,&lineno);
3982 
3983   /* Generate code which executes whenever the parser stack overflows */
3984   tplt_print(out,lemp,lemp->overflow,&lineno);
3985   tplt_xfer(lemp->name,in,out,&lineno);
3986 
3987   /* Generate the table of rule information
3988   **
3989   ** Note: This code depends on the fact that rules are number
3990   ** sequentually beginning with 0.
3991   */
3992   for(rp=lemp->rule; rp; rp=rp->next){
3993     fprintf(out,"  { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++;
3994   }
3995   tplt_xfer(lemp->name,in,out,&lineno);
3996 
3997   /* Generate code which execution during each REDUCE action */
3998   for(rp=lemp->rule; rp; rp=rp->next){
3999     translate_code(lemp, rp);
4000   }
4001   /* First output rules other than the default: rule */
4002   for(rp=lemp->rule; rp; rp=rp->next){
4003     struct rule *rp2;               /* Other rules with the same action */
4004     if( rp->code==0 ) continue;
4005     if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */
4006     fprintf(out,"      case %d: /* ", rp->index);
4007     writeRuleText(out, rp);
4008     fprintf(out, " */\n"); lineno++;
4009     for(rp2=rp->next; rp2; rp2=rp2->next){
4010       if( rp2->code==rp->code ){
4011         fprintf(out,"      case %d: /* ", rp2->index);
4012         writeRuleText(out, rp2);
4013         fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->index); lineno++;
4014         rp2->code = 0;
4015       }
4016     }
4017     emit_code(out,rp,lemp,&lineno);
4018     fprintf(out,"        break;\n"); lineno++;
4019     rp->code = 0;
4020   }
4021   /* Finally, output the default: rule.  We choose as the default: all
4022   ** empty actions. */
4023   fprintf(out,"      default:\n"); lineno++;
4024   for(rp=lemp->rule; rp; rp=rp->next){
4025     if( rp->code==0 ) continue;
4026     assert( rp->code[0]=='\n' && rp->code[1]==0 );
4027     fprintf(out,"      /* (%d) ", rp->index);
4028     writeRuleText(out, rp);
4029     fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->index); lineno++;
4030   }
4031   fprintf(out,"        break;\n"); lineno++;
4032   tplt_xfer(lemp->name,in,out,&lineno);
4033 
4034   /* Generate code which executes if a parse fails */
4035   tplt_print(out,lemp,lemp->failure,&lineno);
4036   tplt_xfer(lemp->name,in,out,&lineno);
4037 
4038   /* Generate code which executes when a syntax error occurs */
4039   tplt_print(out,lemp,lemp->error,&lineno);
4040   tplt_xfer(lemp->name,in,out,&lineno);
4041 
4042   /* Generate code which executes when the parser accepts its input */
4043   tplt_print(out,lemp,lemp->accept,&lineno);
4044   tplt_xfer(lemp->name,in,out,&lineno);
4045 
4046   /* Append any addition code the user desires */
4047   tplt_print(out,lemp,lemp->extracode,&lineno);
4048 
4049   fclose(in);
4050   fclose(out);
4051   return;
4052 }
4053 
4054 /* Generate a header file for the parser */
ReportHeader(struct lemon * lemp)4055 void ReportHeader(struct lemon *lemp)
4056 {
4057   FILE *out, *in;
4058   const char *prefix;
4059   char line[LINESIZE];
4060   char pattern[LINESIZE];
4061   int i;
4062 
4063   if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
4064   else                    prefix = "";
4065   in = file_open(lemp,".h","rb");
4066   if( in ){
4067     for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
4068       sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
4069       if( strcmp(line,pattern) ) break;
4070     }
4071     fclose(in);
4072     if( i==lemp->nterminal ){
4073       /* No change in the file.  Don't rewrite it. */
4074       return;
4075     }
4076   }
4077   out = file_open(lemp,".h","wb");
4078   if( out ){
4079     for(i=1; i<lemp->nterminal; i++){
4080       fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
4081     }
4082     fclose(out);
4083   }
4084   return;
4085 }
4086 
4087 /* Reduce the size of the action tables, if possible, by making use
4088 ** of defaults.
4089 **
4090 ** In this version, we take the most frequent REDUCE action and make
4091 ** it the default.  Except, there is no default if the wildcard token
4092 ** is a possible look-ahead.
4093 */
CompressTables(struct lemon * lemp)4094 void CompressTables(struct lemon *lemp)
4095 {
4096   struct state *stp;
4097   struct action *ap, *ap2;
4098   struct rule *rp, *rp2, *rbest;
4099   int nbest, n;
4100   int i;
4101   int usesWildcard;
4102 
4103   for(i=0; i<lemp->nstate; i++){
4104     stp = lemp->sorted[i];
4105     nbest = 0;
4106     rbest = 0;
4107     usesWildcard = 0;
4108 
4109     for(ap=stp->ap; ap; ap=ap->next){
4110       if( ap->type==SHIFT && ap->sp==lemp->wildcard ){
4111         usesWildcard = 1;
4112       }
4113       if( ap->type!=REDUCE ) continue;
4114       rp = ap->x.rp;
4115       if( rp->lhsStart ) continue;
4116       if( rp==rbest ) continue;
4117       n = 1;
4118       for(ap2=ap->next; ap2; ap2=ap2->next){
4119         if( ap2->type!=REDUCE ) continue;
4120         rp2 = ap2->x.rp;
4121         if( rp2==rbest ) continue;
4122         if( rp2==rp ) n++;
4123       }
4124       if( n>nbest ){
4125         nbest = n;
4126         rbest = rp;
4127       }
4128     }
4129 
4130     /* Do not make a default if the number of rules to default
4131     ** is not at least 1 or if the wildcard token is a possible
4132     ** lookahead.
4133     */
4134     if( nbest<1 || usesWildcard ) continue;
4135 
4136 
4137     /* Combine matching REDUCE actions into a single default */
4138     for(ap=stp->ap; ap; ap=ap->next){
4139       if( ap->type==REDUCE && ap->x.rp==rbest ) break;
4140     }
4141     assert( ap );
4142     ap->sp = Symbol_new("{default}");
4143     for(ap=ap->next; ap; ap=ap->next){
4144       if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED;
4145     }
4146     stp->ap = Action_sort(stp->ap);
4147   }
4148 }
4149 
4150 
4151 /*
4152 ** Compare two states for sorting purposes.  The smaller state is the
4153 ** one with the most non-terminal actions.  If they have the same number
4154 ** of non-terminal actions, then the smaller is the one with the most
4155 ** token actions.
4156 */
stateResortCompare(const void * a,const void * b)4157 static int stateResortCompare(const void *a, const void *b){
4158   const struct state *pA = *(const struct state**)a;
4159   const struct state *pB = *(const struct state**)b;
4160   int n;
4161 
4162   n = pB->nNtAct - pA->nNtAct;
4163   if( n==0 ){
4164     n = pB->nTknAct - pA->nTknAct;
4165     if( n==0 ){
4166       n = pB->statenum - pA->statenum;
4167     }
4168   }
4169   assert( n!=0 );
4170   return n;
4171 }
4172 
4173 
4174 /*
4175 ** Renumber and resort states so that states with fewer choices
4176 ** occur at the end.  Except, keep state 0 as the first state.
4177 */
ResortStates(struct lemon * lemp)4178 void ResortStates(struct lemon *lemp)
4179 {
4180   int i;
4181   struct state *stp;
4182   struct action *ap;
4183 
4184   for(i=0; i<lemp->nstate; i++){
4185     stp = lemp->sorted[i];
4186     stp->nTknAct = stp->nNtAct = 0;
4187     stp->iDflt = lemp->nstate + lemp->nrule;
4188     stp->iTknOfst = NO_OFFSET;
4189     stp->iNtOfst = NO_OFFSET;
4190     for(ap=stp->ap; ap; ap=ap->next){
4191       if( compute_action(lemp,ap)>=0 ){
4192         if( ap->sp->index<lemp->nterminal ){
4193           stp->nTknAct++;
4194         }else if( ap->sp->index<lemp->nsymbol ){
4195           stp->nNtAct++;
4196         }else{
4197           stp->iDflt = compute_action(lemp, ap);
4198         }
4199       }
4200     }
4201   }
4202   qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]),
4203         stateResortCompare);
4204   for(i=0; i<lemp->nstate; i++){
4205     lemp->sorted[i]->statenum = i;
4206   }
4207 }
4208 
4209 
4210 /***************** From the file "set.c" ************************************/
4211 /*
4212 ** Set manipulation routines for the LEMON parser generator.
4213 */
4214 
4215 static int size = 0;
4216 
4217 /* Set the set size */
SetSize(int n)4218 void SetSize(int n)
4219 {
4220   size = n+1;
4221 }
4222 
4223 /* Allocate a new set */
SetNew()4224 char *SetNew(){
4225   char *s;
4226   s = (char*)calloc( size, 1);
4227   if( s==0 ){
4228     extern void memory_error();
4229     memory_error();
4230   }
4231   return s;
4232 }
4233 
4234 /* Deallocate a set */
SetFree(char * s)4235 void SetFree(char *s)
4236 {
4237   free(s);
4238 }
4239 
4240 /* Add a new element to the set.  Return TRUE if the element was added
4241 ** and FALSE if it was already there. */
SetAdd(char * s,int e)4242 int SetAdd(char *s, int e)
4243 {
4244   int rv;
4245   assert( e>=0 && e<size );
4246   rv = s[e];
4247   s[e] = 1;
4248   return !rv;
4249 }
4250 
4251 /* Add every element of s2 to s1.  Return TRUE if s1 changes. */
SetUnion(char * s1,char * s2)4252 int SetUnion(char *s1, char *s2)
4253 {
4254   int i, progress;
4255   progress = 0;
4256   for(i=0; i<size; i++){
4257     if( s2[i]==0 ) continue;
4258     if( s1[i]==0 ){
4259       progress = 1;
4260       s1[i] = 1;
4261     }
4262   }
4263   return progress;
4264 }
4265 /********************** From the file "table.c" ****************************/
4266 /*
4267 ** All code in this file has been automatically generated
4268 ** from a specification in the file
4269 **              "table.q"
4270 ** by the associative array code building program "aagen".
4271 ** Do not edit this file!  Instead, edit the specification
4272 ** file, then rerun aagen.
4273 */
4274 /*
4275 ** Code for processing tables in the LEMON parser generator.
4276 */
4277 
strhash(const char * x)4278 PRIVATE int strhash(const char *x)
4279 {
4280   int h = 0;
4281   while( *x) h = h*13 + *(x++);
4282   return h;
4283 }
4284 
4285 /* Works like strdup, sort of.  Save a string in malloced memory, but
4286 ** keep strings in a table so that the same string is not in more
4287 ** than one place.
4288 */
Strsafe(const char * y)4289 const char *Strsafe(const char *y)
4290 {
4291   const char *z;
4292   char *cpy;
4293 
4294   if( y==0 ) return 0;
4295   z = Strsafe_find(y);
4296   if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){
4297     strcpy(cpy,y);
4298     z = cpy;
4299     Strsafe_insert(z);
4300   }
4301   MemoryCheck(z);
4302   return z;
4303 }
4304 
4305 /* There is one instance of the following structure for each
4306 ** associative array of type "x1".
4307 */
4308 struct s_x1 {
4309   int size;               /* The number of available slots. */
4310                           /*   Must be a power of 2 greater than or */
4311                           /*   equal to 1 */
4312   int count;              /* Number of currently slots filled */
4313   struct s_x1node *tbl;  /* The data stored here */
4314   struct s_x1node **ht;  /* Hash table for lookups */
4315 };
4316 
4317 /* There is one instance of this structure for every data element
4318 ** in an associative array of type "x1".
4319 */
4320 typedef struct s_x1node {
4321   const char *data;        /* The data */
4322   struct s_x1node *next;   /* Next entry with the same hash */
4323   struct s_x1node **from;  /* Previous link */
4324 } x1node;
4325 
4326 /* There is only one instance of the array, which is the following */
4327 static struct s_x1 *x1a;
4328 
4329 /* Allocate a new associative array */
Strsafe_init()4330 void Strsafe_init(){
4331   if( x1a ) return;
4332   x1a = (struct s_x1*)malloc( sizeof(struct s_x1) );
4333   if( x1a ){
4334     x1a->size = 1024;
4335     x1a->count = 0;
4336     x1a->tbl = (x1node*)malloc(
4337       (sizeof(x1node) + sizeof(x1node*))*1024 );
4338     if( x1a->tbl==0 ){
4339       free(x1a);
4340       x1a = 0;
4341     }else{
4342       int i;
4343       x1a->ht = (x1node**)&(x1a->tbl[1024]);
4344       for(i=0; i<1024; i++) x1a->ht[i] = 0;
4345     }
4346   }
4347 }
4348 /* Insert a new record into the array.  Return TRUE if successful.
4349 ** Prior data with the same key is NOT overwritten */
Strsafe_insert(const char * data)4350 int Strsafe_insert(const char *data)
4351 {
4352   x1node *np;
4353   int h;
4354   int ph;
4355 
4356   if( x1a==0 ) return 0;
4357   ph = strhash(data);
4358   h = ph & (x1a->size-1);
4359   np = x1a->ht[h];
4360   while( np ){
4361     if( strcmp(np->data,data)==0 ){
4362       /* An existing entry with the same key is found. */
4363       /* Fail because overwrite is not allows. */
4364       return 0;
4365     }
4366     np = np->next;
4367   }
4368   if( x1a->count>=x1a->size ){
4369     /* Need to make the hash table bigger */
4370     int i,size;
4371     struct s_x1 array;
4372     array.size = size = x1a->size*2;
4373     array.count = x1a->count;
4374     array.tbl = (x1node*)malloc(
4375       (sizeof(x1node) + sizeof(x1node*))*size );
4376     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4377     array.ht = (x1node**)&(array.tbl[size]);
4378     for(i=0; i<size; i++) array.ht[i] = 0;
4379     for(i=0; i<x1a->count; i++){
4380       x1node *oldnp, *newnp;
4381       oldnp = &(x1a->tbl[i]);
4382       h = strhash(oldnp->data) & (size-1);
4383       newnp = &(array.tbl[i]);
4384       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4385       newnp->next = array.ht[h];
4386       newnp->data = oldnp->data;
4387       newnp->from = &(array.ht[h]);
4388       array.ht[h] = newnp;
4389     }
4390     free(x1a->tbl);
4391     *x1a = array;
4392   }
4393   /* Insert the new data */
4394   h = ph & (x1a->size-1);
4395   np = &(x1a->tbl[x1a->count++]);
4396   np->data = data;
4397   if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next);
4398   np->next = x1a->ht[h];
4399   x1a->ht[h] = np;
4400   np->from = &(x1a->ht[h]);
4401   return 1;
4402 }
4403 
4404 /* Return a pointer to data assigned to the given key.  Return NULL
4405 ** if no such key. */
Strsafe_find(const char * key)4406 const char *Strsafe_find(const char *key)
4407 {
4408   int h;
4409   x1node *np;
4410 
4411   if( x1a==0 ) return 0;
4412   h = strhash(key) & (x1a->size-1);
4413   np = x1a->ht[h];
4414   while( np ){
4415     if( strcmp(np->data,key)==0 ) break;
4416     np = np->next;
4417   }
4418   return np ? np->data : 0;
4419 }
4420 
4421 /* Return a pointer to the (terminal or nonterminal) symbol "x".
4422 ** Create a new symbol if this is the first time "x" has been seen.
4423 */
Symbol_new(const char * x)4424 struct symbol *Symbol_new(const char *x)
4425 {
4426   struct symbol *sp;
4427 
4428   sp = Symbol_find(x);
4429   if( sp==0 ){
4430     sp = (struct symbol *)calloc(1, sizeof(struct symbol) );
4431     MemoryCheck(sp);
4432     sp->name = Strsafe(x);
4433     sp->type = isupper(*x) ? TERMINAL : NONTERMINAL;
4434     sp->rule = 0;
4435     sp->fallback = 0;
4436     sp->prec = -1;
4437     sp->assoc = UNK;
4438     sp->firstset = 0;
4439     sp->lambda = LEMON_FALSE;
4440     sp->destructor = 0;
4441     sp->destLineno = 0;
4442     sp->datatype = 0;
4443     sp->useCnt = 0;
4444     Symbol_insert(sp,sp->name);
4445   }
4446   sp->useCnt++;
4447   return sp;
4448 }
4449 
4450 /* Compare two symbols for working purposes
4451 **
4452 ** Symbols that begin with upper case letters (terminals or tokens)
4453 ** must sort before symbols that begin with lower case letters
4454 ** (non-terminals).  Other than that, the order does not matter.
4455 **
4456 ** We find experimentally that leaving the symbols in their original
4457 ** order (the order they appeared in the grammar file) gives the
4458 ** smallest parser tables in SQLite.
4459 */
Symbolcmpp(const void * _a,const void * _b)4460 int Symbolcmpp(const void *_a, const void *_b)
4461 {
4462   const struct symbol **a = (const struct symbol **) _a;
4463   const struct symbol **b = (const struct symbol **) _b;
4464   int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
4465   int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
4466   assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 );
4467   return i1-i2;
4468 }
4469 
4470 /* There is one instance of the following structure for each
4471 ** associative array of type "x2".
4472 */
4473 struct s_x2 {
4474   int size;               /* The number of available slots. */
4475                           /*   Must be a power of 2 greater than or */
4476                           /*   equal to 1 */
4477   int count;              /* Number of currently slots filled */
4478   struct s_x2node *tbl;  /* The data stored here */
4479   struct s_x2node **ht;  /* Hash table for lookups */
4480 };
4481 
4482 /* There is one instance of this structure for every data element
4483 ** in an associative array of type "x2".
4484 */
4485 typedef struct s_x2node {
4486   struct symbol *data;     /* The data */
4487   const char *key;         /* The key */
4488   struct s_x2node *next;   /* Next entry with the same hash */
4489   struct s_x2node **from;  /* Previous link */
4490 } x2node;
4491 
4492 /* There is only one instance of the array, which is the following */
4493 static struct s_x2 *x2a;
4494 
4495 /* Allocate a new associative array */
Symbol_init()4496 void Symbol_init(){
4497   if( x2a ) return;
4498   x2a = (struct s_x2*)malloc( sizeof(struct s_x2) );
4499   if( x2a ){
4500     x2a->size = 128;
4501     x2a->count = 0;
4502     x2a->tbl = (x2node*)malloc(
4503       (sizeof(x2node) + sizeof(x2node*))*128 );
4504     if( x2a->tbl==0 ){
4505       free(x2a);
4506       x2a = 0;
4507     }else{
4508       int i;
4509       x2a->ht = (x2node**)&(x2a->tbl[128]);
4510       for(i=0; i<128; i++) x2a->ht[i] = 0;
4511     }
4512   }
4513 }
4514 /* Insert a new record into the array.  Return TRUE if successful.
4515 ** Prior data with the same key is NOT overwritten */
Symbol_insert(struct symbol * data,const char * key)4516 int Symbol_insert(struct symbol *data, const char *key)
4517 {
4518   x2node *np;
4519   int h;
4520   int ph;
4521 
4522   if( x2a==0 ) return 0;
4523   ph = strhash(key);
4524   h = ph & (x2a->size-1);
4525   np = x2a->ht[h];
4526   while( np ){
4527     if( strcmp(np->key,key)==0 ){
4528       /* An existing entry with the same key is found. */
4529       /* Fail because overwrite is not allows. */
4530       return 0;
4531     }
4532     np = np->next;
4533   }
4534   if( x2a->count>=x2a->size ){
4535     /* Need to make the hash table bigger */
4536     int i,size;
4537     struct s_x2 array;
4538     array.size = size = x2a->size*2;
4539     array.count = x2a->count;
4540     array.tbl = (x2node*)malloc(
4541       (sizeof(x2node) + sizeof(x2node*))*size );
4542     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4543     array.ht = (x2node**)&(array.tbl[size]);
4544     for(i=0; i<size; i++) array.ht[i] = 0;
4545     for(i=0; i<x2a->count; i++){
4546       x2node *oldnp, *newnp;
4547       oldnp = &(x2a->tbl[i]);
4548       h = strhash(oldnp->key) & (size-1);
4549       newnp = &(array.tbl[i]);
4550       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4551       newnp->next = array.ht[h];
4552       newnp->key = oldnp->key;
4553       newnp->data = oldnp->data;
4554       newnp->from = &(array.ht[h]);
4555       array.ht[h] = newnp;
4556     }
4557     free(x2a->tbl);
4558     *x2a = array;
4559   }
4560   /* Insert the new data */
4561   h = ph & (x2a->size-1);
4562   np = &(x2a->tbl[x2a->count++]);
4563   np->key = key;
4564   np->data = data;
4565   if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next);
4566   np->next = x2a->ht[h];
4567   x2a->ht[h] = np;
4568   np->from = &(x2a->ht[h]);
4569   return 1;
4570 }
4571 
4572 /* Return a pointer to data assigned to the given key.  Return NULL
4573 ** if no such key. */
Symbol_find(const char * key)4574 struct symbol *Symbol_find(const char *key)
4575 {
4576   int h;
4577   x2node *np;
4578 
4579   if( x2a==0 ) return 0;
4580   h = strhash(key) & (x2a->size-1);
4581   np = x2a->ht[h];
4582   while( np ){
4583     if( strcmp(np->key,key)==0 ) break;
4584     np = np->next;
4585   }
4586   return np ? np->data : 0;
4587 }
4588 
4589 /* Return the n-th data.  Return NULL if n is out of range. */
Symbol_Nth(int n)4590 struct symbol *Symbol_Nth(int n)
4591 {
4592   struct symbol *data;
4593   if( x2a && n>0 && n<=x2a->count ){
4594     data = x2a->tbl[n-1].data;
4595   }else{
4596     data = 0;
4597   }
4598   return data;
4599 }
4600 
4601 /* Return the size of the array */
Symbol_count()4602 int Symbol_count()
4603 {
4604   return x2a ? x2a->count : 0;
4605 }
4606 
4607 /* Return an array of pointers to all data in the table.
4608 ** The array is obtained from malloc.  Return NULL if memory allocation
4609 ** problems, or if the array is empty. */
Symbol_arrayof()4610 struct symbol **Symbol_arrayof()
4611 {
4612   struct symbol **array;
4613   int i,size;
4614   if( x2a==0 ) return 0;
4615   size = x2a->count;
4616   array = (struct symbol **)calloc(size, sizeof(struct symbol *));
4617   if( array ){
4618     for(i=0; i<size; i++) array[i] = x2a->tbl[i].data;
4619   }
4620   return array;
4621 }
4622 
4623 /* Compare two configurations */
Configcmp(const char * _a,const char * _b)4624 int Configcmp(const char *_a,const char *_b)
4625 {
4626   const struct config *a = (struct config *) _a;
4627   const struct config *b = (struct config *) _b;
4628   int x;
4629   x = a->rp->index - b->rp->index;
4630   if( x==0 ) x = a->dot - b->dot;
4631   return x;
4632 }
4633 
4634 /* Compare two states */
statecmp(struct config * a,struct config * b)4635 PRIVATE int statecmp(struct config *a, struct config *b)
4636 {
4637   int rc;
4638   for(rc=0; rc==0 && a && b;  a=a->bp, b=b->bp){
4639     rc = a->rp->index - b->rp->index;
4640     if( rc==0 ) rc = a->dot - b->dot;
4641   }
4642   if( rc==0 ){
4643     if( a ) rc = 1;
4644     if( b ) rc = -1;
4645   }
4646   return rc;
4647 }
4648 
4649 /* Hash a state */
statehash(struct config * a)4650 PRIVATE int statehash(struct config *a)
4651 {
4652   int h=0;
4653   while( a ){
4654     h = h*571 + a->rp->index*37 + a->dot;
4655     a = a->bp;
4656   }
4657   return h;
4658 }
4659 
4660 /* Allocate a new state structure */
State_new()4661 struct state *State_new()
4662 {
4663   struct state *newstate;
4664   newstate = (struct state *)calloc(1, sizeof(struct state) );
4665   MemoryCheck(newstate);
4666   return newstate;
4667 }
4668 
4669 /* There is one instance of the following structure for each
4670 ** associative array of type "x3".
4671 */
4672 struct s_x3 {
4673   int size;               /* The number of available slots. */
4674                           /*   Must be a power of 2 greater than or */
4675                           /*   equal to 1 */
4676   int count;              /* Number of currently slots filled */
4677   struct s_x3node *tbl;  /* The data stored here */
4678   struct s_x3node **ht;  /* Hash table for lookups */
4679 };
4680 
4681 /* There is one instance of this structure for every data element
4682 ** in an associative array of type "x3".
4683 */
4684 typedef struct s_x3node {
4685   struct state *data;                  /* The data */
4686   struct config *key;                   /* The key */
4687   struct s_x3node *next;   /* Next entry with the same hash */
4688   struct s_x3node **from;  /* Previous link */
4689 } x3node;
4690 
4691 /* There is only one instance of the array, which is the following */
4692 static struct s_x3 *x3a;
4693 
4694 /* Allocate a new associative array */
State_init()4695 void State_init(){
4696   if( x3a ) return;
4697   x3a = (struct s_x3*)malloc( sizeof(struct s_x3) );
4698   if( x3a ){
4699     x3a->size = 128;
4700     x3a->count = 0;
4701     x3a->tbl = (x3node*)malloc(
4702       (sizeof(x3node) + sizeof(x3node*))*128 );
4703     if( x3a->tbl==0 ){
4704       free(x3a);
4705       x3a = 0;
4706     }else{
4707       int i;
4708       x3a->ht = (x3node**)&(x3a->tbl[128]);
4709       for(i=0; i<128; i++) x3a->ht[i] = 0;
4710     }
4711   }
4712 }
4713 /* Insert a new record into the array.  Return TRUE if successful.
4714 ** Prior data with the same key is NOT overwritten */
State_insert(struct state * data,struct config * key)4715 int State_insert(struct state *data, struct config *key)
4716 {
4717   x3node *np;
4718   int h;
4719   int ph;
4720 
4721   if( x3a==0 ) return 0;
4722   ph = statehash(key);
4723   h = ph & (x3a->size-1);
4724   np = x3a->ht[h];
4725   while( np ){
4726     if( statecmp(np->key,key)==0 ){
4727       /* An existing entry with the same key is found. */
4728       /* Fail because overwrite is not allows. */
4729       return 0;
4730     }
4731     np = np->next;
4732   }
4733   if( x3a->count>=x3a->size ){
4734     /* Need to make the hash table bigger */
4735     int i,size;
4736     struct s_x3 array;
4737     array.size = size = x3a->size*2;
4738     array.count = x3a->count;
4739     array.tbl = (x3node*)malloc(
4740       (sizeof(x3node) + sizeof(x3node*))*size );
4741     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4742     array.ht = (x3node**)&(array.tbl[size]);
4743     for(i=0; i<size; i++) array.ht[i] = 0;
4744     for(i=0; i<x3a->count; i++){
4745       x3node *oldnp, *newnp;
4746       oldnp = &(x3a->tbl[i]);
4747       h = statehash(oldnp->key) & (size-1);
4748       newnp = &(array.tbl[i]);
4749       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4750       newnp->next = array.ht[h];
4751       newnp->key = oldnp->key;
4752       newnp->data = oldnp->data;
4753       newnp->from = &(array.ht[h]);
4754       array.ht[h] = newnp;
4755     }
4756     free(x3a->tbl);
4757     *x3a = array;
4758   }
4759   /* Insert the new data */
4760   h = ph & (x3a->size-1);
4761   np = &(x3a->tbl[x3a->count++]);
4762   np->key = key;
4763   np->data = data;
4764   if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next);
4765   np->next = x3a->ht[h];
4766   x3a->ht[h] = np;
4767   np->from = &(x3a->ht[h]);
4768   return 1;
4769 }
4770 
4771 /* Return a pointer to data assigned to the given key.  Return NULL
4772 ** if no such key. */
State_find(struct config * key)4773 struct state *State_find(struct config *key)
4774 {
4775   int h;
4776   x3node *np;
4777 
4778   if( x3a==0 ) return 0;
4779   h = statehash(key) & (x3a->size-1);
4780   np = x3a->ht[h];
4781   while( np ){
4782     if( statecmp(np->key,key)==0 ) break;
4783     np = np->next;
4784   }
4785   return np ? np->data : 0;
4786 }
4787 
4788 /* Return an array of pointers to all data in the table.
4789 ** The array is obtained from malloc.  Return NULL if memory allocation
4790 ** problems, or if the array is empty. */
State_arrayof()4791 struct state **State_arrayof()
4792 {
4793   struct state **array;
4794   int i,size;
4795   if( x3a==0 ) return 0;
4796   size = x3a->count;
4797   array = (struct state **)malloc( sizeof(struct state *)*size );
4798   if( array ){
4799     for(i=0; i<size; i++) array[i] = x3a->tbl[i].data;
4800   }
4801   return array;
4802 }
4803 
4804 /* Hash a configuration */
confighash(struct config * a)4805 PRIVATE int confighash(struct config *a)
4806 {
4807   int h=0;
4808   h = h*571 + a->rp->index*37 + a->dot;
4809   return h;
4810 }
4811 
4812 /* There is one instance of the following structure for each
4813 ** associative array of type "x4".
4814 */
4815 struct s_x4 {
4816   int size;               /* The number of available slots. */
4817                           /*   Must be a power of 2 greater than or */
4818                           /*   equal to 1 */
4819   int count;              /* Number of currently slots filled */
4820   struct s_x4node *tbl;  /* The data stored here */
4821   struct s_x4node **ht;  /* Hash table for lookups */
4822 };
4823 
4824 /* There is one instance of this structure for every data element
4825 ** in an associative array of type "x4".
4826 */
4827 typedef struct s_x4node {
4828   struct config *data;                  /* The data */
4829   struct s_x4node *next;   /* Next entry with the same hash */
4830   struct s_x4node **from;  /* Previous link */
4831 } x4node;
4832 
4833 /* There is only one instance of the array, which is the following */
4834 static struct s_x4 *x4a;
4835 
4836 /* Allocate a new associative array */
Configtable_init()4837 void Configtable_init(){
4838   if( x4a ) return;
4839   x4a = (struct s_x4*)malloc( sizeof(struct s_x4) );
4840   if( x4a ){
4841     x4a->size = 64;
4842     x4a->count = 0;
4843     x4a->tbl = (x4node*)malloc(
4844       (sizeof(x4node) + sizeof(x4node*))*64 );
4845     if( x4a->tbl==0 ){
4846       free(x4a);
4847       x4a = 0;
4848     }else{
4849       int i;
4850       x4a->ht = (x4node**)&(x4a->tbl[64]);
4851       for(i=0; i<64; i++) x4a->ht[i] = 0;
4852     }
4853   }
4854 }
4855 /* Insert a new record into the array.  Return TRUE if successful.
4856 ** Prior data with the same key is NOT overwritten */
Configtable_insert(struct config * data)4857 int Configtable_insert(struct config *data)
4858 {
4859   x4node *np;
4860   int h;
4861   int ph;
4862 
4863   if( x4a==0 ) return 0;
4864   ph = confighash(data);
4865   h = ph & (x4a->size-1);
4866   np = x4a->ht[h];
4867   while( np ){
4868     if( Configcmp((const char *) np->data,(const char *) data)==0 ){
4869       /* An existing entry with the same key is found. */
4870       /* Fail because overwrite is not allows. */
4871       return 0;
4872     }
4873     np = np->next;
4874   }
4875   if( x4a->count>=x4a->size ){
4876     /* Need to make the hash table bigger */
4877     int i,size;
4878     struct s_x4 array;
4879     array.size = size = x4a->size*2;
4880     array.count = x4a->count;
4881     array.tbl = (x4node*)malloc(
4882       (sizeof(x4node) + sizeof(x4node*))*size );
4883     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4884     array.ht = (x4node**)&(array.tbl[size]);
4885     for(i=0; i<size; i++) array.ht[i] = 0;
4886     for(i=0; i<x4a->count; i++){
4887       x4node *oldnp, *newnp;
4888       oldnp = &(x4a->tbl[i]);
4889       h = confighash(oldnp->data) & (size-1);
4890       newnp = &(array.tbl[i]);
4891       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4892       newnp->next = array.ht[h];
4893       newnp->data = oldnp->data;
4894       newnp->from = &(array.ht[h]);
4895       array.ht[h] = newnp;
4896     }
4897     free(x4a->tbl);
4898     *x4a = array;
4899   }
4900   /* Insert the new data */
4901   h = ph & (x4a->size-1);
4902   np = &(x4a->tbl[x4a->count++]);
4903   np->data = data;
4904   if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next);
4905   np->next = x4a->ht[h];
4906   x4a->ht[h] = np;
4907   np->from = &(x4a->ht[h]);
4908   return 1;
4909 }
4910 
4911 /* Return a pointer to data assigned to the given key.  Return NULL
4912 ** if no such key. */
Configtable_find(struct config * key)4913 struct config *Configtable_find(struct config *key)
4914 {
4915   int h;
4916   x4node *np;
4917 
4918   if( x4a==0 ) return 0;
4919   h = confighash(key) & (x4a->size-1);
4920   np = x4a->ht[h];
4921   while( np ){
4922     if( Configcmp((const char *) np->data,(const char *) key)==0 ) break;
4923     np = np->next;
4924   }
4925   return np ? np->data : 0;
4926 }
4927 
4928 /* Remove all data from the table.  Pass each data to the function "f"
4929 ** as it is removed.  ("f" may be null to avoid this step.) */
Configtable_clear(int (* f)(struct config *))4930 void Configtable_clear(int(*f)(struct config *))
4931 {
4932   int i;
4933   if( x4a==0 || x4a->count==0 ) return;
4934   if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data);
4935   for(i=0; i<x4a->size; i++) x4a->ht[i] = 0;
4936   x4a->count = 0;
4937   return;
4938 }
4939