1 #include <stdio.h>
2 #include <stdarg.h>
3 #include <string.h>
4 #include <ctype.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7
8 #include "lemon_fsm.h"
9 #include "lemon_assert.h"
10 #include "lemon_error.h"
11 #include "lemon_memory.h"
12 ////#include "lemon_msort.h"
13 //#include "lemon_option.h"
14 //#include "lemon_structs.h"
15 #include "lemon_action.h"
16 ////#include "lemon_string.h"
17 #include "lemon_set.h"
18 //#include "lemon_report.h"
19 #include "lemon_symbol.h"
20 #include "lemon_plink.h"
21 #include "lemon_config_list.h"
22 #include "lemon_state_table.h"
23 ////#include "lemon_fsm.h"
24 //#include "lemon_parse.h"
25
26 /*
27 ** Routines to construct the finite state machine for the LEMON
28 ** parser generator.
29 */
30
31 /* Find a precedence symbol of every rule in the grammar.
32 **
33 ** Those rules which have a precedence symbol coded in the input
34 ** grammar using the "[symbol]" construct will already have the
35 ** rp->precsym field filled. Other rules take as their precedence
36 ** symbol the first RHS symbol with a defined precedence. If there
37 ** are not RHS symbols with a defined precedence, the precedence
38 ** symbol field is left blank.
39 */
FindRulePrecedences(struct lemon * xp)40 void FindRulePrecedences(struct lemon *xp) {
41 struct rule *rp;
42 for(rp=xp->rule; rp; rp=rp->next){
43 if (rp->precsym==0) {
44 int i;
45 for(i=0; i<rp->nrhs; i++){
46 if (rp->rhs[i]->prec>=0) {
47 rp->precsym = rp->rhs[i];
48 break;
49 }
50 }
51 }
52 }
53 return;
54 }
55
56 /* Find all nonterminals which will generate the empty string.
57 ** Then go back and compute the first sets of every nonterminal.
58 ** The first set is the set of all terminal symbols which can begin
59 ** a string generated by that nonterminal.
60 */
FindFirstSets(struct lemon * lemp)61 void FindFirstSets(struct lemon *lemp) {
62 int i;
63 struct rule *rp;
64 int progress;
65
66 for (i=0; i<lemp->nsymbol; i++) {
67 lemp->symbols[i]->lambda = B_FALSE;
68 }
69 for (i=lemp->nterminal; i<lemp->nsymbol; i++) {
70 lemp->symbols[i]->firstset = SetNew();
71 }
72
73 /* First compute all lambdas */
74 do {
75 progress = 0;
76 for (rp=lemp->rule; rp; rp=rp->next) {
77 if (rp->lhs->lambda) continue;
78 for(i=0; i<rp->nrhs; i++){
79 if (rp->rhs[i]->lambda==B_FALSE) break;
80 }
81 if (i==rp->nrhs) {
82 rp->lhs->lambda = B_TRUE;
83 progress = 1;
84 }
85 }
86 } while (progress);
87
88 /* Now compute all first sets */
89 do {
90 struct symbol *s1, *s2;
91 progress = 0;
92 for(rp=lemp->rule; rp; rp=rp->next){
93 s1 = rp->lhs;
94 for(i=0; i<rp->nrhs; i++){
95 s2 = rp->rhs[i];
96 if (s2->type==TERMINAL) {
97 progress += SetAdd(s1->firstset,s2->index);
98 break;
99 } else if (s1==s2) {
100 if (s1->lambda==B_FALSE) break;
101 } else {
102 progress += SetUnion(s1->firstset,s2->firstset);
103 if (s2->lambda==B_FALSE) break;
104 }
105 }
106 }
107 } while (progress);
108 return;
109 }
110
111 /* Compute all LR(0) states for the grammar. Links
112 ** are added to between some states so that the LR(1) follow sets
113 ** can be computed later.
114 */
115 static struct state *getstate(/* struct lemon * */); /* forward reference */
116
FindStates(struct lemon * lemp)117 void FindStates(struct lemon *lemp) {
118 struct symbol *sp;
119 struct rule *rp;
120
121 Configlist_init();
122
123 /* Find the start symbol */
124 if (lemp->start) {
125 sp = Symbol_find(lemp->start);
126 if (sp==0) {
127 ErrorMsg(lemp->filename,0,
128 "The specified start symbol \"%s\" is not \
129 in a nonterminal of the grammar. \"%s\" will be used as the start \
130 symbol instead.",lemp->start,lemp->rule->lhs->name);
131 lemp->errorcnt++;
132 sp = lemp->rule->lhs;
133 }
134 } else {
135 sp = lemp->rule->lhs;
136 }
137
138 /* Make sure the start symbol doesn't occur on the right-hand side of
139 ** any rule. Report an error if it does. (YACC would generate a new
140 ** start symbol in this case.) */
141 for(rp=lemp->rule; rp; rp=rp->next){
142 int i;
143 for(i=0; i<rp->nrhs; i++){
144 if (rp->rhs[i]==sp) {
145 ErrorMsg(lemp->filename,0,
146 "The start symbol \"%s\" occurs on the \
147 right-hand side of a rule. This will result in a parser which \
148 does not work properly.",sp->name);
149 lemp->errorcnt++;
150 }
151 }
152 }
153
154 /* The basis configuration set for the first state
155 ** is all rules which have the start symbol as their
156 ** left-hand side */
157 for(rp=sp->rule; rp; rp=rp->nextlhs){
158 struct config *newcfp;
159 newcfp = Configlist_addbasis(rp,0);
160 SetAdd(newcfp->fws,0);
161 }
162
163 /* Compute the first state. All other states will be
164 ** computed automatically during the computation of the first one.
165 ** The returned pointer to the first state is not used. */
166 (void)getstate(lemp);
167 return;
168 }
169
170 /* Return a pointer to a state which is described by the configuration
171 ** list which has been built from calls to Configlist_add.
172 */
173 static void buildshifts(struct lemon *, struct state *); /* Forward ref */
174
getstate(struct lemon * lemp)175 static struct state *getstate(struct lemon *lemp) {
176 struct config *cfp, *bp;
177 struct state *stp;
178
179 /* Extract the sorted basis of the new state. The basis was constructed
180 ** by prior calls to "Configlist_addbasis()". */
181 Configlist_sortbasis();
182 bp = Configlist_basis();
183
184 /* Get a state with the same basis */
185 stp = State_find(bp);
186 if (stp) {
187 /* A state with the same basis already exists! Copy all the follow-set
188 ** propagation links from the state under construction into the
189 ** preexisting state, then return a pointer to the preexisting state */
190 struct config *x, *y;
191 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
192 Plink_copy(&y->bplp,x->bplp);
193 Plink_delete(x->fplp);
194 x->fplp = x->bplp = 0;
195 }
196 cfp = Configlist_return();
197 Configlist_eat(cfp);
198 } else {
199 /* This really is a new state. Construct all the details */
200 Configlist_closure(lemp); /* Compute the configuration closure */
201 Configlist_sort(); /* Sort the configuration closure */
202 cfp = Configlist_return(); /* Get a pointer to the config list */
203 stp = State_new(); /* A new state structure */
204 MemoryCheck(stp);
205 stp->bp = bp; /* Remember the configuration basis */
206 stp->cfp = cfp; /* Remember the configuration closure */
207 stp->index = lemp->nstate++; /* Every state gets a sequence number */
208 stp->ap = 0; /* No actions, yet. */
209 State_insert(stp,stp->bp); /* Add to the state table */
210 buildshifts(lemp,stp); /* Recursively compute successor states */
211 }
212 return stp;
213 }
214
215 /* Construct all successor states to the given state. A "successor"
216 ** state is any state which can be reached by a shift action.
217 */
buildshifts(struct lemon * lemp,struct state * stp)218 static void buildshifts(
219 struct lemon *lemp,
220 struct state *stp) /* The state from which successors are computed */
221 {
222 struct config *cfp; /* For looping thru the config closure of "stp" */
223 struct config *bcfp; /* For the inner loop on config closure of "stp" */
224 struct config *new; /* */
225 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */
226 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */
227 struct state *newstp; /* A pointer to a successor state */
228
229 /* Each configuration becomes complete after it contibutes to a successor
230 ** state. Initially, all configurations are incomplete */
231 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
232
233 /* Loop through all configurations of the state "stp" */
234 for(cfp=stp->cfp; cfp; cfp=cfp->next){
235 if (cfp->status==COMPLETE) continue; /* Already used by inner loop */
236 if (cfp->dot>=cfp->rp->nrhs) continue; /* Can't shift this config */
237 Configlist_reset(); /* Reset the new config set */
238 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */
239
240 /* For every configuration in the state "stp" which has the symbol "sp"
241 ** following its dot, add the same configuration to the basis set under
242 ** construction but with the dot shifted one symbol to the right. */
243 for(bcfp=cfp; bcfp; bcfp=bcfp->next){
244 if (bcfp->status==COMPLETE) continue; /* Already used */
245 if (bcfp->dot>=bcfp->rp->nrhs) continue; /* Can't shift this one */
246 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */
247 if (bsp!=sp) continue; /* Must be same as for "cfp" */
248 bcfp->status = COMPLETE; /* Mark this config as used */
249 new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
250 Plink_add(&new->bplp,bcfp);
251 }
252
253 /* Get a pointer to the state described by the basis configuration set
254 ** constructed in the preceding loop */
255 newstp = getstate(lemp);
256
257 /* The state "newstp" is reached from the state "stp" by a shift action
258 ** on the symbol "sp" */
259 Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
260 }
261 }
262
263 /*
264 ** Construct the propagation links
265 */
FindLinks(struct lemon * lemp)266 void FindLinks(struct lemon *lemp) {
267 int i;
268 struct config *cfp, *other;
269 struct state *stp;
270 struct plink *plp;
271
272 /* Housekeeping detail:
273 ** Add to every propagate link a pointer back to the state to
274 ** which the link is attached. */
275 for(i=0; i<lemp->nstate; i++){
276 stp = lemp->sorted[i];
277 for(cfp=stp->cfp; cfp; cfp=cfp->next){
278 cfp->stp = stp;
279 }
280 }
281
282 /* Convert all backlinks into forward links. Only the forward
283 ** links are used in the follow-set computation. */
284 for(i=0; i<lemp->nstate; i++){
285 stp = lemp->sorted[i];
286 for(cfp=stp->cfp; cfp; cfp=cfp->next){
287 for(plp=cfp->bplp; plp; plp=plp->next){
288 other = plp->cfp;
289 Plink_add(&other->fplp,cfp);
290 }
291 }
292 }
293 }
294
295 /* Compute all followsets.
296 **
297 ** A followset is the set of all symbols which can come immediately
298 ** after a configuration.
299 */
FindFollowSets(struct lemon * lemp)300 void FindFollowSets(struct lemon *lemp) {
301 int i;
302 struct config *cfp;
303 struct plink *plp;
304 int progress;
305 int change;
306
307 for(i=0; i<lemp->nstate; i++){
308 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
309 cfp->status = INCOMPLETE;
310 }
311 }
312
313 do {
314 progress = 0;
315 for (i=0; i<lemp->nstate; i++){
316 for (cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next) {
317 if (cfp->status==COMPLETE) continue;
318 for (plp=cfp->fplp; plp; plp=plp->next) {
319 change = SetUnion(plp->cfp->fws,cfp->fws);
320 if (change) {
321 plp->cfp->status = INCOMPLETE;
322 progress = 1;
323 }
324 }
325 cfp->status = COMPLETE;
326 }
327 }
328 } while (progress);
329 }
330
331 static int resolve_conflict();
332
333 /* Compute the reduce actions, and resolve conflicts.
334 */
FindActions(struct lemon * lemp)335 void FindActions(struct lemon *lemp) {
336 int i,j;
337 struct config *cfp;
338 struct state *stp;
339 struct symbol *sp;
340 struct rule *rp;
341
342 /* Add all of the reduce actions
343 ** A reduce action is added for each element of the followset of
344 ** a configuration which has its dot at the extreme right.
345 */
346 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */
347 stp = lemp->sorted[i];
348 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */
349 if (cfp->rp->nrhs==cfp->dot) { /* Is dot at extreme right? */
350 for(j=0; j<lemp->nterminal; j++){
351 if (SetFind(cfp->fws,j)) {
352 /* Add a reduce action to the state "stp" which will reduce by the
353 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
354 Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
355 }
356 }
357 }
358 }
359 }
360
361 /* Add the accepting token */
362 if (lemp->start) {
363 sp = Symbol_find(lemp->start);
364 if (sp==0) sp = lemp->rule->lhs;
365 } else {
366 sp = lemp->rule->lhs;
367 }
368 /* Add to the first state (which is always the starting state of the
369 ** finite state machine) an action to ACCEPT if the lookahead is the
370 ** start nonterminal. */
371 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
372
373 /* Resolve conflicts */
374 for(i=0; i<lemp->nstate; i++){
375 struct action *ap, *nap;
376 struct state *stp;
377 stp = lemp->sorted[i];
378 assert (stp->ap) ;
379 stp->ap = Action_sort(stp->ap);
380 for(ap=stp->ap; ap && ap->next; ap=ap->next){
381 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
382 /* The two actions "ap" and "nap" have the same lookahead.
383 ** Figure out which one should be used */
384 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym);
385 }
386 }
387 }
388
389 /* Report an error for each rule that can never be reduced. */
390 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = B_FALSE;
391 for(i=0; i<lemp->nstate; i++){
392 struct action *ap;
393 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
394 if (ap->type==REDUCE) ap->x.rp->canReduce = B_TRUE;
395 }
396 }
397 for(rp=lemp->rule; rp; rp=rp->next){
398 if (rp->canReduce) continue;
399 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
400 lemp->errorcnt++;
401 }
402 }
403
404 /* Resolve a conflict between the two given actions. If the
405 ** conflict can't be resolve, return non-zero.
406 **
407 ** NO LONGER TRUE:
408 ** To resolve a conflict, first look to see if either action
409 ** is on an error rule. In that case, take the action which
410 ** is not associated with the error rule. If neither or both
411 ** actions are associated with an error rule, then try to
412 ** use precedence to resolve the conflict.
413 **
414 ** If either action is a SHIFT, then it must be apx. This
415 ** function won't work if apx->type==REDUCE and apy->type==SHIFT.
416 */
resolve_conflict(struct action * apx,struct action * apy,struct symbol * errsym)417 static int resolve_conflict(
418 struct action *apx,
419 struct action *apy,
420 struct symbol *errsym) /* The error symbol (if defined. NULL otherwise) */
421 {
422 struct symbol *spx, *spy;
423 int errcnt = 0;
424 assert (apx->sp==apy->sp) ; /* Otherwise there would be no conflict */
425 if (apx->type==SHIFT && apy->type==REDUCE) {
426 spx = apx->sp;
427 spy = apy->x.rp->precsym;
428 if (spy==0 || spx->prec<0 || spy->prec<0) {
429 /* Not enough precedence information. */
430 apy->type = CONFLICT;
431 errcnt++;
432 } else if (spx->prec>spy->prec) { /* Lower precedence wins */
433 apy->type = RD_RESOLVED;
434 } else if (spx->prec<spy->prec) {
435 apx->type = SH_RESOLVED;
436 } else if (spx->prec==spy->prec && spx->assoc==RIGHT) { /* Use operator */
437 apy->type = RD_RESOLVED; /* associativity */
438 } else if (spx->prec==spy->prec && spx->assoc==LEFT) { /* to break tie */
439 apx->type = SH_RESOLVED;
440 } else {
441 assert (spx->prec==spy->prec && spx->assoc==NONE) ;
442 apy->type = CONFLICT;
443 errcnt++;
444 }
445 } else if (apx->type==REDUCE && apy->type==REDUCE) {
446 spx = apx->x.rp->precsym;
447 spy = apy->x.rp->precsym;
448 if (spx==0 || spy==0 || spx->prec<0 ||
449 spy->prec<0 || spx->prec==spy->prec) {
450 apy->type = CONFLICT;
451 errcnt++;
452 } else if (spx->prec>spy->prec) {
453 apy->type = RD_RESOLVED;
454 } else if (spx->prec<spy->prec) {
455 apx->type = RD_RESOLVED;
456 }
457 } else {
458 assert (
459 apx->type==SH_RESOLVED ||
460 apx->type==RD_RESOLVED ||
461 apx->type==CONFLICT ||
462 apy->type==SH_RESOLVED ||
463 apy->type==RD_RESOLVED ||
464 apy->type==CONFLICT
465 ) ;
466 /* The REDUCE/SHIFT case cannot happen because SHIFTs come before
467 ** REDUCEs on the list. If we reach this point it must be because
468 ** the parser conflict had already been resolved. */
469 }
470 return errcnt;
471 }
472