1 #include <stdio.h>
2 #include <stdarg.h>
3 #include <string.h>
4 #include <ctype.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7 
8 #include "lemon_fsm.h"
9 #include "lemon_assert.h"
10 #include "lemon_error.h"
11 #include "lemon_memory.h"
12 ////#include "lemon_msort.h"
13 //#include "lemon_option.h"
14 //#include "lemon_structs.h"
15 #include "lemon_action.h"
16 ////#include "lemon_string.h"
17 #include "lemon_set.h"
18 //#include "lemon_report.h"
19 #include "lemon_symbol.h"
20 #include "lemon_plink.h"
21 #include "lemon_config_list.h"
22 #include "lemon_state_table.h"
23 ////#include "lemon_fsm.h"
24 //#include "lemon_parse.h"
25 
26 /*
27 ** Routines to construct the finite state machine for the LEMON
28 ** parser generator.
29 */
30 
31 /* Find a precedence symbol of every rule in the grammar.
32 **
33 ** Those rules which have a precedence symbol coded in the input
34 ** grammar using the "[symbol]" construct will already have the
35 ** rp->precsym field filled.  Other rules take as their precedence
36 ** symbol the first RHS symbol with a defined precedence.  If there
37 ** are not RHS symbols with a defined precedence, the precedence
38 ** symbol field is left blank.
39 */
FindRulePrecedences(struct lemon * xp)40 void FindRulePrecedences(struct lemon *xp) {
41 	struct rule *rp;
42 	for(rp=xp->rule; rp; rp=rp->next){
43 		if (rp->precsym==0) {
44 			int i;
45 			for(i=0; i<rp->nrhs; i++){
46 				if (rp->rhs[i]->prec>=0) {
47 					rp->precsym = rp->rhs[i];
48 					break;
49 	}
50 			}
51 		}
52 	}
53 	return;
54 }
55 
56 /* Find all nonterminals which will generate the empty string.
57 ** Then go back and compute the first sets of every nonterminal.
58 ** The first set is the set of all terminal symbols which can begin
59 ** a string generated by that nonterminal.
60 */
FindFirstSets(struct lemon * lemp)61 void FindFirstSets(struct lemon *lemp) {
62 	int i;
63 	struct rule *rp;
64 	int progress;
65 
66 	for (i=0; i<lemp->nsymbol; i++) {
67 		lemp->symbols[i]->lambda = B_FALSE;
68 	}
69 	for (i=lemp->nterminal; i<lemp->nsymbol; i++) {
70 		lemp->symbols[i]->firstset = SetNew();
71 	}
72 
73 	/* First compute all lambdas */
74 	do {
75 		progress = 0;
76 		for (rp=lemp->rule; rp; rp=rp->next) {
77 			if (rp->lhs->lambda)  continue;
78 			for(i=0; i<rp->nrhs; i++){
79 				 if (rp->rhs[i]->lambda==B_FALSE)  break;
80 			}
81 			if (i==rp->nrhs) {
82 				rp->lhs->lambda = B_TRUE;
83 				progress = 1;
84 			}
85 		}
86 	} while (progress);
87 
88 	/* Now compute all first sets */
89 	do {
90 		struct symbol *s1, *s2;
91 		progress = 0;
92 		for(rp=lemp->rule; rp; rp=rp->next){
93 			s1 = rp->lhs;
94 			for(i=0; i<rp->nrhs; i++){
95 				s2 = rp->rhs[i];
96 				if (s2->type==TERMINAL) {
97 					progress += SetAdd(s1->firstset,s2->index);
98 					break;
99 				} else if (s1==s2) {
100 					if (s1->lambda==B_FALSE)  break;
101 				} else {
102 					progress += SetUnion(s1->firstset,s2->firstset);
103 					if (s2->lambda==B_FALSE)  break;
104 				}
105 			}
106 		}
107 	} while (progress);
108 	return;
109 }
110 
111 /* Compute all LR(0) states for the grammar.  Links
112 ** are added to between some states so that the LR(1) follow sets
113 ** can be computed later.
114 */
115 static struct state *getstate(/* struct lemon * */);  /* forward reference */
116 
FindStates(struct lemon * lemp)117 void FindStates(struct lemon *lemp) {
118 	struct symbol *sp;
119 	struct rule *rp;
120 
121 	Configlist_init();
122 
123 	/* Find the start symbol */
124 	if (lemp->start) {
125 		sp = Symbol_find(lemp->start);
126 		if (sp==0) {
127 			ErrorMsg(lemp->filename,0,
128 				"The specified start symbol \"%s\" is not \
129 				in a nonterminal of the grammar.  \"%s\" will be used as the start \
130 				symbol instead.",lemp->start,lemp->rule->lhs->name);
131 			lemp->errorcnt++;
132 			sp = lemp->rule->lhs;
133 		}
134 	} else {
135 		sp = lemp->rule->lhs;
136 	}
137 
138 	/* Make sure the start symbol doesn't occur on the right-hand side of
139 	** any rule.  Report an error if it does.  (YACC would generate a new
140 	** start symbol in this case.) */
141 	for(rp=lemp->rule; rp; rp=rp->next){
142 		int i;
143 		for(i=0; i<rp->nrhs; i++){
144 			if (rp->rhs[i]==sp) {
145 				ErrorMsg(lemp->filename,0,
146 					"The start symbol \"%s\" occurs on the \
147 					right-hand side of a rule. This will result in a parser which \
148 					does not work properly.",sp->name);
149 				lemp->errorcnt++;
150 			}
151 		}
152 	}
153 
154 	/* The basis configuration set for the first state
155 	** is all rules which have the start symbol as their
156 	** left-hand side */
157 	for(rp=sp->rule; rp; rp=rp->nextlhs){
158 		struct config *newcfp;
159 		newcfp = Configlist_addbasis(rp,0);
160 		SetAdd(newcfp->fws,0);
161 	}
162 
163 	/* Compute the first state.  All other states will be
164 	** computed automatically during the computation of the first one.
165 	** The returned pointer to the first state is not used. */
166 	(void)getstate(lemp);
167 	return;
168 }
169 
170 /* Return a pointer to a state which is described by the configuration
171 ** list which has been built from calls to Configlist_add.
172 */
173 static void buildshifts(struct lemon *, struct state *); /* Forward ref */
174 
getstate(struct lemon * lemp)175 static struct state *getstate(struct lemon *lemp) {
176 	struct config *cfp, *bp;
177 	struct state *stp;
178 
179 	/* Extract the sorted basis of the new state.  The basis was constructed
180 	** by prior calls to "Configlist_addbasis()". */
181 	Configlist_sortbasis();
182 	bp = Configlist_basis();
183 
184 	/* Get a state with the same basis */
185 	stp = State_find(bp);
186 	if (stp) {
187 		/* A state with the same basis already exists!  Copy all the follow-set
188 		** propagation links from the state under construction into the
189 		** preexisting state, then return a pointer to the preexisting state */
190 		struct config *x, *y;
191 		for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
192 			Plink_copy(&y->bplp,x->bplp);
193 			Plink_delete(x->fplp);
194 			x->fplp = x->bplp = 0;
195 		}
196 		cfp = Configlist_return();
197 		Configlist_eat(cfp);
198 	} else {
199 		/* This really is a new state.  Construct all the details */
200 		Configlist_closure(lemp);    /* Compute the configuration closure */
201 		Configlist_sort();           /* Sort the configuration closure */
202 		cfp = Configlist_return();   /* Get a pointer to the config list */
203 		stp = State_new();           /* A new state structure */
204 		MemoryCheck(stp);
205 		stp->bp = bp;                /* Remember the configuration basis */
206 		stp->cfp = cfp;              /* Remember the configuration closure */
207 		stp->index = lemp->nstate++; /* Every state gets a sequence number */
208 		stp->ap = 0;                 /* No actions, yet. */
209 		State_insert(stp,stp->bp);   /* Add to the state table */
210 		buildshifts(lemp,stp);       /* Recursively compute successor states */
211 	}
212 	return stp;
213 }
214 
215 /* Construct all successor states to the given state.  A "successor"
216 ** state is any state which can be reached by a shift action.
217 */
buildshifts(struct lemon * lemp,struct state * stp)218 static void buildshifts(
219 	struct lemon *lemp,
220 	struct state *stp)     /* The state from which successors are computed */
221 {
222 	struct config *cfp;  /* For looping thru the config closure of "stp" */
223 	struct config *bcfp; /* For the inner loop on config closure of "stp" */
224 	struct config *new;  /* */
225 	struct symbol *sp;   /* Symbol following the dot in configuration "cfp" */
226 	struct symbol *bsp;  /* Symbol following the dot in configuration "bcfp" */
227 	struct state *newstp; /* A pointer to a successor state */
228 
229 	/* Each configuration becomes complete after it contibutes to a successor
230 	** state.  Initially, all configurations are incomplete */
231 	for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
232 
233 	/* Loop through all configurations of the state "stp" */
234 	for(cfp=stp->cfp; cfp; cfp=cfp->next){
235 		if (cfp->status==COMPLETE)  continue;    /* Already used by inner loop */
236 		if (cfp->dot>=cfp->rp->nrhs)  continue;  /* Can't shift this config */
237 		Configlist_reset();                      /* Reset the new config set */
238 		sp = cfp->rp->rhs[cfp->dot];             /* Symbol after the dot */
239 
240 		/* For every configuration in the state "stp" which has the symbol "sp"
241 		** following its dot, add the same configuration to the basis set under
242 		** construction but with the dot shifted one symbol to the right. */
243 		for(bcfp=cfp; bcfp; bcfp=bcfp->next){
244 			if (bcfp->status==COMPLETE)  continue;    /* Already used */
245 			if (bcfp->dot>=bcfp->rp->nrhs)  continue; /* Can't shift this one */
246 			bsp = bcfp->rp->rhs[bcfp->dot];           /* Get symbol after dot */
247 			if (bsp!=sp)  continue;                   /* Must be same as for "cfp" */
248 			bcfp->status = COMPLETE;                  /* Mark this config as used */
249 			new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
250 			Plink_add(&new->bplp,bcfp);
251 		}
252 
253 		/* Get a pointer to the state described by the basis configuration set
254 		** constructed in the preceding loop */
255 		newstp = getstate(lemp);
256 
257 		/* The state "newstp" is reached from the state "stp" by a shift action
258 		** on the symbol "sp" */
259 		Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
260 	}
261 }
262 
263 /*
264 ** Construct the propagation links
265 */
FindLinks(struct lemon * lemp)266 void FindLinks(struct lemon *lemp) {
267 	int i;
268 	struct config *cfp, *other;
269 	struct state *stp;
270 	struct plink *plp;
271 
272 	/* Housekeeping detail:
273 	** Add to every propagate link a pointer back to the state to
274 	** which the link is attached. */
275 	for(i=0; i<lemp->nstate; i++){
276 		stp = lemp->sorted[i];
277 		for(cfp=stp->cfp; cfp; cfp=cfp->next){
278 			cfp->stp = stp;
279 		}
280 	}
281 
282 	/* Convert all backlinks into forward links.  Only the forward
283 	** links are used in the follow-set computation. */
284 	for(i=0; i<lemp->nstate; i++){
285 		stp = lemp->sorted[i];
286 		for(cfp=stp->cfp; cfp; cfp=cfp->next){
287 			for(plp=cfp->bplp; plp; plp=plp->next){
288 				other = plp->cfp;
289 				Plink_add(&other->fplp,cfp);
290 			}
291 		}
292 	}
293 }
294 
295 /* Compute all followsets.
296 **
297 ** A followset is the set of all symbols which can come immediately
298 ** after a configuration.
299 */
FindFollowSets(struct lemon * lemp)300 void FindFollowSets(struct lemon *lemp) {
301 	int i;
302 	struct config *cfp;
303 	struct plink *plp;
304 	int progress;
305 	int change;
306 
307 	for(i=0; i<lemp->nstate; i++){
308 		for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
309 			cfp->status = INCOMPLETE;
310 		}
311 	}
312 
313 	do {
314 		progress = 0;
315 		for (i=0; i<lemp->nstate; i++){
316 			for (cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next) {
317 				if (cfp->status==COMPLETE)  continue;
318 				for (plp=cfp->fplp; plp; plp=plp->next) {
319 					change = SetUnion(plp->cfp->fws,cfp->fws);
320 					if (change) {
321 						plp->cfp->status = INCOMPLETE;
322 						progress = 1;
323 					}
324 				}
325 				cfp->status = COMPLETE;
326 			}
327 		}
328 	} while (progress);
329 }
330 
331 static int resolve_conflict();
332 
333 /* Compute the reduce actions, and resolve conflicts.
334 */
FindActions(struct lemon * lemp)335 void FindActions(struct lemon *lemp) {
336 	int i,j;
337 	struct config *cfp;
338 	struct state *stp;
339 	struct symbol *sp;
340 	struct rule *rp;
341 
342 	/* Add all of the reduce actions
343 	** A reduce action is added for each element of the followset of
344 	** a configuration which has its dot at the extreme right.
345 	*/
346 	for(i=0; i<lemp->nstate; i++){   /* Loop over all states */
347 		stp = lemp->sorted[i];
348 		for(cfp=stp->cfp; cfp; cfp=cfp->next){  /* Loop over all configurations */
349 			if (cfp->rp->nrhs==cfp->dot) {        /* Is dot at extreme right? */
350 				for(j=0; j<lemp->nterminal; j++){
351 					if (SetFind(cfp->fws,j)) {
352 						/* Add a reduce action to the state "stp" which will reduce by the
353 						** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
354 						Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
355 					}
356 				}
357 			}
358 		}
359 	}
360 
361 	/* Add the accepting token */
362 	if (lemp->start) {
363 		sp = Symbol_find(lemp->start);
364 		if (sp==0)  sp = lemp->rule->lhs;
365 	} else {
366 		sp = lemp->rule->lhs;
367 	}
368 	/* Add to the first state (which is always the starting state of the
369 	** finite state machine) an action to ACCEPT if the lookahead is the
370 	** start nonterminal.  */
371 	Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
372 
373 	/* Resolve conflicts */
374 	for(i=0; i<lemp->nstate; i++){
375 		struct action *ap, *nap;
376 		struct state *stp;
377 		stp = lemp->sorted[i];
378 		assert (stp->ap) ;
379 		stp->ap = Action_sort(stp->ap);
380 		for(ap=stp->ap; ap && ap->next; ap=ap->next){
381 			for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
382 				 /* The two actions "ap" and "nap" have the same lookahead.
383 				 ** Figure out which one should be used */
384 				 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym);
385 			}
386 		}
387 	}
388 
389 	/* Report an error for each rule that can never be reduced. */
390 	for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = B_FALSE;
391 	for(i=0; i<lemp->nstate; i++){
392 		struct action *ap;
393 		for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
394 			if (ap->type==REDUCE)  ap->x.rp->canReduce = B_TRUE;
395 		}
396 	}
397 	for(rp=lemp->rule; rp; rp=rp->next){
398 		if (rp->canReduce)  continue;
399 		ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
400 		lemp->errorcnt++;
401 	}
402 }
403 
404 /* Resolve a conflict between the two given actions.  If the
405 ** conflict can't be resolve, return non-zero.
406 **
407 ** NO LONGER TRUE:
408 **   To resolve a conflict, first look to see if either action
409 **   is on an error rule.  In that case, take the action which
410 **   is not associated with the error rule.  If neither or both
411 **   actions are associated with an error rule, then try to
412 **   use precedence to resolve the conflict.
413 **
414 ** If either action is a SHIFT, then it must be apx.  This
415 ** function won't work if apx->type==REDUCE and apy->type==SHIFT.
416 */
resolve_conflict(struct action * apx,struct action * apy,struct symbol * errsym)417 static int resolve_conflict(
418 	struct action *apx,
419 	struct action *apy,
420 	struct symbol *errsym)   /* The error symbol (if defined.  NULL otherwise) */
421 {
422 	struct symbol *spx, *spy;
423 	int errcnt = 0;
424 	assert (apx->sp==apy->sp) ;  /* Otherwise there would be no conflict */
425 	if (apx->type==SHIFT && apy->type==REDUCE) {
426 		spx = apx->sp;
427 		spy = apy->x.rp->precsym;
428 		if (spy==0 || spx->prec<0 || spy->prec<0) {
429 			/* Not enough precedence information. */
430 			apy->type = CONFLICT;
431 			errcnt++;
432 		} else if (spx->prec>spy->prec) {    /* Lower precedence wins */
433 			apy->type = RD_RESOLVED;
434 		} else if (spx->prec<spy->prec) {
435 			apx->type = SH_RESOLVED;
436 		} else if (spx->prec==spy->prec && spx->assoc==RIGHT) { /* Use operator */
437 			apy->type = RD_RESOLVED;                             /* associativity */
438 		} else if (spx->prec==spy->prec && spx->assoc==LEFT) {  /* to break tie */
439 			apx->type = SH_RESOLVED;
440 		} else {
441 			assert (spx->prec==spy->prec && spx->assoc==NONE) ;
442 			apy->type = CONFLICT;
443 			errcnt++;
444 		}
445 	} else if (apx->type==REDUCE && apy->type==REDUCE) {
446 		spx = apx->x.rp->precsym;
447 		spy = apy->x.rp->precsym;
448 		if (spx==0 || spy==0 || spx->prec<0 ||
449 		spy->prec<0 || spx->prec==spy->prec) {
450 			apy->type = CONFLICT;
451 			errcnt++;
452 		} else if (spx->prec>spy->prec) {
453 			apy->type = RD_RESOLVED;
454 		} else if (spx->prec<spy->prec) {
455 			apx->type = RD_RESOLVED;
456 		}
457 	} else {
458 		assert (
459 			apx->type==SH_RESOLVED ||
460 			apx->type==RD_RESOLVED ||
461 			apx->type==CONFLICT ||
462 			apy->type==SH_RESOLVED ||
463 			apy->type==RD_RESOLVED ||
464 			apy->type==CONFLICT
465 	 ) ;
466 		/* The REDUCE/SHIFT case cannot happen because SHIFTs come before
467 		** REDUCEs on the list.  If we reach this point it must be because
468 		** the parser conflict had already been resolved. */
469 	}
470 	return errcnt;
471 }
472