xref: /original-bsd/old/yacc/y1.c (revision 5f5c18da)
1 #ifndef lint
2 static char sccsid[] = "@(#)y1.c	4.2	(Berkeley)	05/10/89";
3 #endif not lint
4 
5 # include "dextern"
6 # include "pathnames.h"
7 
8 	/* variables used locally */
9 
10 	/* lookahead computations */
11 
12 int tbitset;  /* size of lookahead sets */
13 struct looksets lkst [ LSETSIZE ];
14 int nlset = 0; /* next lookahead set index */
15 int nolook = 0; /* flag to suppress lookahead computations */
16 struct looksets clset;  /* temporary storage for lookahead computations */
17 
18 	/* working set computations */
19 
20 struct wset wsets[ WSETSIZE ];
21 struct wset *cwp;
22 
23 	/* state information */
24 
25 int nstate = 0;		/* number of states */
26 struct item *pstate[NSTATES+2];	/* pointers to the descriptions of the states */
27 int tystate[NSTATES];	/* contains type information about the states */
28 int indgo[NSTATES];		/* index to the stored goto table */
29 int tstates[ NTERMS ]; /* states generated by terminal gotos */
30 int ntstates[ NNONTERM ]; /* states generated by nonterminal gotos */
31 int mstates[ NSTATES ]; /* chain of overflows of term/nonterm generation lists  */
32 
33 	/* storage for the actions in the parser */
34 
35 int amem[ACTSIZE];	/* action table storage */
36 int *memp = amem;	/* next free action table position */
37 
38 	/* other storage areas */
39 
40 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
41 int lineno= 1; /* current input line number */
42 int fatfl = 1;  	/* if on, error is fatal */
43 int nerrors = 0;	/* number of errors */
44 
45 	/* storage for information about the nonterminals */
46 
47 int **pres[NNONTERM+2];  /* vector of pointers to productions yielding each nonterminal */
48 struct looksets *pfirst[NNONTERM+2];  /* vector of pointers to first sets for each nonterminal */
49 int pempty[NNONTERM+1];  /* vector of nonterminals nontrivially deriving e */
50 
51 main(argc,argv) int argc; char *argv[]; {
52 
53 	setup(argc,argv); /* initialize and read productions */
54 	tbitset = NWORDS(ntokens);
55 	cpres(); /* make table of which productions yield a given nonterminal */
56 	cempty(); /* make a table of which nonterminals can match the empty string */
57 	cpfir(); /* make a table of firsts of nonterminals */
58 	stagen(); /* generate the states */
59 	output();  /* write the states and the tables */
60 	go2out();
61 	hideprod();
62 	summary();
63 	callopt();
64 	others();
65 	exit(0);
66 	}
67 
68 others(){ /* put out other arrays, copy the parsers */
69 	register c, i, j;
70 
71 	finput = fopen( _PATH_PARSER, "r" );
72 	if( finput == NULL ) error( "cannot find parser %s", _PATH_PARSER);
73 
74 	warray( "yyr1", levprd, nprod );
75 
76 	aryfil( temp1, nprod, 0 );
77 	PLOOP(1,i)temp1[i] = prdptr[i+1]-prdptr[i]-2;
78 	warray( "yyr2", temp1, nprod );
79 
80 	aryfil( temp1, nstate, -1000 );
81 	TLOOP(i){
82 		for( j=tstates[i]; j!=0; j=mstates[j] ){
83 			temp1[j] = tokset[i].value;
84 			}
85 		}
86 	NTLOOP(i){
87 		for( j=ntstates[i]; j!=0; j=mstates[j] ){
88 			temp1[j] = -i;
89 			}
90 		}
91 	warray( "yychk", temp1, nstate );
92 
93 	warray( "yydef", defact, nstate );
94 
95 	/* copy parser text */
96 
97 	while( (c=getc(finput) ) != EOF ){
98 		if( c == '$' ){
99 			if( (c=getc(finput)) != 'A' ) putc( '$', ftable );
100 			else { /* copy actions */
101 				faction = fopen( ACTNAME, "r" );
102 				if( faction == NULL ) error( "cannot reopen action tempfile" );
103 				while( (c=getc(faction) ) != EOF ) putc( c, ftable );
104 				fclose(faction);
105 				ZAPFILE(ACTNAME);
106 				c = getc(finput);
107 				}
108 			}
109 		putc( c, ftable );
110 		}
111 
112 	fclose( ftable );
113 	}
114 
115 char *chcopy( p, q )  char *p, *q; {
116 	/* copies string q into p, returning next free char ptr */
117 	while( *p = *q++ ) ++p;
118 	return( p );
119 	}
120 
121 # define ISIZE 400
122 char *writem(pp) int *pp; { /* creates output string for item pointed to by pp */
123 	int i,*p;
124 	static char sarr[ISIZE];
125 	char *q;
126 
127 	for( p=pp; *p>0 ; ++p ) ;
128 	p = prdptr[-*p];
129 	q = chcopy( sarr, nontrst[*p-NTBASE].name );
130 	q = chcopy( q, " : " );
131 
132 	for(;;){
133 		*q++ = ++p==pp ? '_' : ' ';
134 		*q = '\0';
135 		if((i = *p) <= 0) break;
136 		q = chcopy( q, symnam(i) );
137 		if( q> &sarr[ISIZE-30] ) error( "item too big" );
138 		}
139 
140 	if( (i = *pp) < 0 ){ /* an item calling for a reduction */
141 		q = chcopy( q, "    (" );
142 		sprintf( q, "%d)", -i );
143 		}
144 
145 	return( sarr );
146 	}
147 
148 char *symnam(i){ /* return a pointer to the name of symbol i */
149 	char *cp;
150 
151 	cp = (i>=NTBASE) ? nontrst[i-NTBASE].name : tokset[i].name ;
152 	if( *cp == ' ' ) ++cp;
153 	return( cp );
154 	}
155 
156 struct wset *zzcwp = wsets;
157 int zzgoent = 0;
158 int zzgobest = 0;
159 int zzacent = 0;
160 int zzexcp = 0;
161 int zzclose = 0;
162 int zzsrconf = 0;
163 int * zzmemsz = mem0;
164 int zzrrconf = 0;
165 
166 summary(){ /* output the summary on the tty */
167 
168 	if( foutput!=NULL ){
169 		fprintf( foutput, "\n%d/%d terminals, %d/%d nonterminals\n", ntokens, NTERMS,
170 			    nnonter, NNONTERM );
171 		fprintf( foutput, "%d/%d grammar rules, %d/%d states\n", nprod, NPROD, nstate, NSTATES );
172 		fprintf( foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", zzsrconf, zzrrconf );
173 		fprintf( foutput, "%d/%d working sets used\n", zzcwp-wsets,  WSETSIZE );
174 		fprintf( foutput, "memory: states,etc. %d/%d, parser %d/%d\n", zzmemsz-mem0, MEMSIZE,
175 			    memp-amem, ACTSIZE );
176 		fprintf( foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE );
177 		fprintf( foutput, "%d extra closures\n", zzclose - 2*nstate );
178 		fprintf( foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp );
179 		fprintf( foutput, "%d goto entries\n", zzgoent );
180 		fprintf( foutput, "%d entries saved by goto default\n", zzgobest );
181 		}
182 	if( zzsrconf!=0 || zzrrconf!=0 ){
183 		  fprintf( stdout,"\nconflicts: ");
184 		  if( zzsrconf )fprintf( stdout, "%d shift/reduce" , zzsrconf );
185 		  if( zzsrconf && zzrrconf )fprintf( stdout, ", " );
186 		  if( zzrrconf )fprintf( stdout, "%d reduce/reduce" , zzrrconf );
187 		  fprintf( stdout, "\n" );
188 		  }
189 
190 	fclose( ftemp );
191 	if( fdefine != NULL ) fclose( fdefine );
192 	}
193 
194 /* VARARGS1 */
195 error(s,a1) char *s; { /* write out error comment */
196 
197 	++nerrors;
198 	fprintf( stderr, "\n fatal error: ");
199 	fprintf( stderr, s,a1);
200 	fprintf( stderr, ", line %d\n", lineno );
201 	if( !fatfl ) return;
202 	summary();
203 	exit(1);
204 	}
205 
206 aryfil( v, n, c ) int *v,n,c; { /* set elements 0 through n-1 to c */
207 	int i;
208 	for( i=0; i<n; ++i ) v[i] = c;
209 	}
210 
211 setunion( a, b ) register *a, *b; {
212 	/* set a to the union of a and b */
213 	/* return 1 if b is not a subset of a, 0 otherwise */
214 	register i, x, sub;
215 
216 	sub = 0;
217 	SETLOOP(i){
218 		*a = (x = *a)|*b++;
219 		if( *a++ != x ) sub = 1;
220 		}
221 	return( sub );
222 	}
223 
224 prlook( p ) struct looksets *p;{
225 	register j, *pp;
226 	pp = p->lset;
227 	if( pp == 0 ) fprintf( foutput, "\tNULL");
228 	else {
229 		fprintf( foutput, " { " );
230 		TLOOP(j) {
231 			if( BIT(pp,j) ) fprintf( foutput,  "%s ", symnam(j) );
232 			}
233 		fprintf( foutput,  "}" );
234 		}
235 	}
236 
237 cpres(){ /* compute an array with the beginnings of  productions yielding given nonterminals
238 	The array pres points to these lists */
239 	/* the array pyield has the lists: the total size is only NPROD+1 */
240 	register **pmem;
241 	register c, j, i;
242 	static int * pyield[NPROD];
243 
244 	pmem = pyield;
245 
246 	NTLOOP(i){
247 		c = i+NTBASE;
248 		pres[i] = pmem;
249 		fatfl = 0;  /* make undefined  symbols  nonfatal */
250 		PLOOP(0,j){
251 			if (*prdptr[j] == c) *pmem++ =  prdptr[j]+1;
252 			}
253 		if(pres[i] == pmem){
254 			error("nonterminal %s not defined!", nontrst[i].name);
255 			}
256 		}
257 	pres[i] = pmem;
258 	fatfl = 1;
259 	if( nerrors ){
260 		summary();
261 		exit(1);
262 		}
263 	if( pmem != &pyield[nprod] ) error( "internal Yacc error: pyield %d", pmem-&pyield[nprod] );
264 	}
265 
266 int indebug = 0;
267 cpfir() {
268 	/* compute an array with the first of nonterminals */
269 	register *p, **s, i, **t, ch, changes;
270 
271 	zzcwp = &wsets[nnonter];
272 	NTLOOP(i){
273 		aryfil( wsets[i].ws.lset, tbitset, 0 );
274 		t = pres[i+1];
275 		for( s=pres[i]; s<t; ++s ){ /* initially fill the sets */
276 			for( p = *s; (ch = *p) > 0 ; ++p ) {
277 				if( ch < NTBASE ) {
278 					SETBIT( wsets[i].ws.lset, ch );
279 					break;
280 					}
281 				else if( !pempty[ch-NTBASE] ) break;
282 				}
283 			}
284 		}
285 
286 	/* now, reflect transitivity */
287 
288 	changes = 1;
289 	while( changes ){
290 		changes = 0;
291 		NTLOOP(i){
292 			t = pres[i+1];
293 			for( s=pres[i]; s<t; ++s ){
294 				for( p = *s; ( ch = (*p-NTBASE) ) >= 0; ++p ) {
295 					changes |= setunion( wsets[i].ws.lset, wsets[ch].ws.lset );
296 					if( !pempty[ch] ) break;
297 					}
298 				}
299 			}
300 		}
301 
302 	NTLOOP(i) pfirst[i] = flset( &wsets[i].ws );
303 	if( !indebug ) return;
304 	if( (foutput!=NULL) ){
305 		NTLOOP(i) {
306 			fprintf( foutput,  "\n%s: ", nontrst[i].name );
307 			prlook( pfirst[i] );
308 			fprintf( foutput,  " %d\n", pempty[i] );
309 			}
310 		}
311 	}
312 
313 state(c){ /* sorts last state,and sees if it equals earlier ones. returns state number */
314 	int size1,size2;
315 	register i;
316 	struct item *p1, *p2, *k, *l, *q1, *q2;
317 	p1 = pstate[nstate];
318 	p2 = pstate[nstate+1];
319 	if(p1==p2) return(0); /* null state */
320 	/* sort the items */
321 	for(k=p2-1;k>p1;k--) {	/* make k the biggest */
322 		for(l=k-1;l>=p1;--l)if( l->pitem > k->pitem ){
323 			int *s;
324 			struct looksets *ss;
325 			s = k->pitem;
326 			k->pitem = l->pitem;
327 			l->pitem = s;
328 			ss = k->look;
329 			k->look = l->look;
330 			l->look = ss;
331 			}
332 		}
333 	size1 = p2 - p1; /* size of state */
334 
335 	for( i= (c>=NTBASE)?ntstates[c-NTBASE]:tstates[c]; i != 0; i = mstates[i] ) {
336 		/* get ith state */
337 		q1 = pstate[i];
338 		q2 = pstate[i+1];
339 		size2 = q2 - q1;
340 		if (size1 != size2) continue;
341 		k=p1;
342 		for(l=q1;l<q2;l++) {
343 			if( l->pitem != k->pitem ) break;
344 			++k;
345 			}
346 		if (l != q2) continue;
347 		/* found it */
348 		pstate[nstate+1] = pstate[nstate]; /* delete last state */
349 		/* fix up lookaheads */
350 		if( nolook ) return(i);
351 		for( l=q1,k=p1; l<q2; ++l,++k ){
352 			int s;
353 			SETLOOP(s) clset.lset[s] = l->look->lset[s];
354 			if( setunion( clset.lset, k->look->lset ) ) {
355 				tystate[i] = MUSTDO;
356 				/* register the new set */
357 				l->look = flset( &clset );
358 				}
359 			}
360 		return (i);
361 		}
362 	/* state is new */
363 	if( nolook ) error( "yacc state/nolook error" );
364 	pstate[nstate+2] = p2;
365 	if(nstate+1 >= NSTATES) error("too many states" );
366 	if( c >= NTBASE ){
367 		mstates[ nstate ] = ntstates[ c-NTBASE ];
368 		ntstates[ c-NTBASE ] = nstate;
369 		}
370 	else {
371 		mstates[ nstate ] = tstates[ c ];
372 		tstates[ c ] = nstate;
373 		}
374 	tystate[nstate]=MUSTDO;
375 	return(nstate++);
376 	}
377 
378 int pidebug = 0; /* debugging flag for putitem */
379 putitem( ptr, lptr )  int *ptr;  struct looksets *lptr; {
380 	register struct item *j;
381 
382 	if( pidebug && (foutput!=NULL) ) {
383 		fprintf( foutput, "putitem(%s), state %d\n", writem(ptr), nstate );
384 		}
385 	j = pstate[nstate+1];
386 	j->pitem = ptr;
387 	if( !nolook ) j->look = flset( lptr );
388 	pstate[nstate+1] = ++j;
389 	if( (int *)j > zzmemsz ){
390 		zzmemsz = (int *)j;
391 		if( zzmemsz >=  &mem0[MEMSIZE] ) error( "out of state space" );
392 		}
393 	}
394 
395 cempty(){ /* mark nonterminals which derive the empty string */
396 	/* also, look for nonterminals which don't derive any token strings */
397 
398 # define EMPTY 1
399 # define WHOKNOWS 0
400 # define OK 1
401 
402 	register i, *p;
403 
404 	/* first, use the array pempty to detect productions that can never be reduced */
405 	/* set pempty to WHONOWS */
406 	aryfil( pempty, nnonter+1, WHOKNOWS );
407 
408 	/* now, look at productions, marking nonterminals which derive something */
409 
410 	more:
411 	PLOOP(0,i){
412 		if( pempty[ *prdptr[i] - NTBASE ] ) continue;
413 		for( p=prdptr[i]+1; *p>=0; ++p ){
414 			if( *p>=NTBASE && pempty[ *p-NTBASE ] == WHOKNOWS ) break;
415 			}
416 		if( *p < 0 ){ /* production can be derived */
417 			pempty[ *prdptr[i]-NTBASE ] = OK;
418 			goto more;
419 			}
420 		}
421 
422 	/* now, look at the nonterminals, to see if they are all OK */
423 
424 	NTLOOP(i){
425 		/* the added production rises or falls as the start symbol ... */
426 		if( i == 0 ) continue;
427 		if( pempty[ i ] != OK ) {
428 			fatfl = 0;
429 			error( "nonterminal %s never derives any token string", nontrst[i].name );
430 			}
431 		}
432 
433 	if( nerrors ){
434 		summary();
435 		exit(1);
436 		}
437 
438 	/* now, compute the pempty array, to see which nonterminals derive the empty string */
439 
440 	/* set pempty to WHOKNOWS */
441 
442 	aryfil( pempty, nnonter+1, WHOKNOWS );
443 
444 	/* loop as long as we keep finding empty nonterminals */
445 
446 again:
447 	PLOOP(1,i){
448 		if( pempty[ *prdptr[i]-NTBASE ]==WHOKNOWS ){ /* not known to be empty */
449 			for( p=prdptr[i]+1; *p>=NTBASE && pempty[*p-NTBASE]==EMPTY ; ++p ) ;
450 			if( *p < 0 ){ /* we have a nontrivially empty nonterminal */
451 				pempty[*prdptr[i]-NTBASE] = EMPTY;
452 				goto again; /* got one ... try for another */
453 				}
454 			}
455 		}
456 
457 	}
458 
459 int gsdebug = 0;
460 stagen(){ /* generate the states */
461 
462 	int i, j;
463 	register c;
464 	register struct wset *p, *q;
465 
466 	/* initialize */
467 
468 	nstate = 0;
469 	/* THIS IS FUNNY from the standpoint of portability */
470 	/* it represents the magic moment when the mem0 array, which has
471 	/* been holding the productions, starts to hold item pointers, of a
472 	/* different type... */
473 	/* someday, alloc should be used to allocate all this stuff... for now, we
474 	/* accept that if pointers don't fit in integers, there is a problem... */
475 
476 	pstate[0] = pstate[1] = (struct item *)mem;
477 	aryfil( clset.lset, tbitset, 0 );
478 	putitem( prdptr[0]+1, &clset );
479 	tystate[0] = MUSTDO;
480 	nstate = 1;
481 	pstate[2] = pstate[1];
482 
483 	aryfil( amem, ACTSIZE, 0 );
484 
485 	/* now, the main state generation loop */
486 
487 	more:
488 	SLOOP(i){
489 		if( tystate[i] != MUSTDO ) continue;
490 		tystate[i] = DONE;
491 		aryfil( temp1, nnonter+1, 0 );
492 		/* take state i, close it, and do gotos */
493 		closure(i);
494 		WSLOOP(wsets,p){ /* generate goto's */
495 			if( p->flag ) continue;
496 			p->flag = 1;
497 			c = *(p->pitem);
498 			if( c <= 1 ) {
499 				if( pstate[i+1]-pstate[i] <= p-wsets ) tystate[i] = MUSTLOOKAHEAD;
500 				continue;
501 				}
502 			/* do a goto on c */
503 			WSLOOP(p,q){
504 				if( c == *(q->pitem) ){ /* this item contributes to the goto */
505 					putitem( q->pitem + 1, &q->ws );
506 					q->flag = 1;
507 					}
508 				}
509 			if( c < NTBASE ) {
510 				state(c);  /* register new state */
511 				}
512 			else {
513 				temp1[c-NTBASE] = state(c);
514 				}
515 			}
516 		if( gsdebug && (foutput!=NULL) ){
517 			fprintf( foutput,  "%d: ", i );
518 			NTLOOP(j) {
519 				if( temp1[j] ) fprintf( foutput,  "%s %d, ", nontrst[j].name, temp1[j] );
520 				}
521 			fprintf( foutput, "\n");
522 			}
523 		indgo[i] = apack( &temp1[1], nnonter-1 ) - 1;
524 		goto more; /* we have done one goto; do some more */
525 		}
526 	/* no more to do... stop */
527 	}
528 
529 int cldebug = 0; /* debugging flag for closure */
530 closure(i){ /* generate the closure of state i */
531 
532 	int c, ch, work, k;
533 	register struct wset *u, *v;
534 	int *pi;
535 	int **s, **t;
536 	struct item *q;
537 	register struct item *p;
538 
539 	++zzclose;
540 
541 	/* first, copy kernel of state i to wsets */
542 
543 	cwp = wsets;
544 	ITMLOOP(i,p,q){
545 		cwp->pitem = p->pitem;
546 		cwp->flag = 1;    /* this item must get closed */
547 		SETLOOP(k) cwp->ws.lset[k] = p->look->lset[k];
548 		WSBUMP(cwp);
549 		}
550 
551 	/* now, go through the loop, closing each item */
552 
553 	work = 1;
554 	while( work ){
555 		work = 0;
556 		WSLOOP(wsets,u){
557 
558 			if( u->flag == 0 ) continue;
559 			c = *(u->pitem);  /* dot is before c */
560 
561 			if( c < NTBASE ){
562 				u->flag = 0;
563 				continue;  /* only interesting case is where . is before nonterminal */
564 				}
565 
566 			/* compute the lookahead */
567 			aryfil( clset.lset, tbitset, 0 );
568 
569 			/* find items involving c */
570 
571 			WSLOOP(u,v){
572 				if( v->flag == 1 && *(pi=v->pitem) == c ){
573 					v->flag = 0;
574 					if( nolook ) continue;
575 					while( (ch= *++pi)>0 ){
576 						if( ch < NTBASE ){ /* terminal symbol */
577 							SETBIT( clset.lset, ch );
578 							break;
579 							}
580 						/* nonterminal symbol */
581 						setunion( clset.lset, pfirst[ch-NTBASE]->lset );
582 						if( !pempty[ch-NTBASE] ) break;
583 						}
584 					if( ch<=0 ) setunion( clset.lset, v->ws.lset );
585 					}
586 				}
587 
588 			/*  now loop over productions derived from c */
589 
590 			c -= NTBASE; /* c is now nonterminal number */
591 
592 			t = pres[c+1];
593 			for( s=pres[c]; s<t; ++s ){
594 				/* put these items into the closure */
595 				WSLOOP(wsets,v){ /* is the item there */
596 					if( v->pitem == *s ){ /* yes, it is there */
597 						if( nolook ) goto nexts;
598 						if( setunion( v->ws.lset, clset.lset ) ) v->flag = work = 1;
599 						goto nexts;
600 						}
601 					}
602 
603 				/*  not there; make a new entry */
604 				if( cwp-wsets+1 >= WSETSIZE ) error( "working set overflow" );
605 				cwp->pitem = *s;
606 				cwp->flag = 1;
607 				if( !nolook ){
608 					work = 1;
609 					SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
610 					}
611 				WSBUMP(cwp);
612 
613 			nexts: ;
614 				}
615 
616 			}
617 		}
618 
619 	/* have computed closure; flags are reset; return */
620 
621 	if( cwp > zzcwp ) zzcwp = cwp;
622 	if( cldebug && (foutput!=NULL) ){
623 		fprintf( foutput, "\nState %d, nolook = %d\n", i, nolook );
624 		WSLOOP(wsets,u){
625 			if( u->flag ) fprintf( foutput, "flag set!\n");
626 			u->flag = 0;
627 			fprintf( foutput, "\t%s", writem(u->pitem));
628 			prlook( &u->ws );
629 			fprintf( foutput,  "\n" );
630 			}
631 		}
632 	}
633 
634 struct looksets *flset( p )   struct looksets *p; {
635 	/* decide if the lookahead set pointed to by p is known */
636 	/* return pointer to a perminent location for the set */
637 
638 	register struct looksets *q;
639 	int j, *w;
640 	register *u, *v;
641 
642 	for( q = &lkst[nlset]; q-- > lkst; ){
643 		u = p->lset;
644 		v = q->lset;
645 		w = & v[tbitset];
646 		while( v<w) if( *u++ != *v++ ) goto more;
647 		/* we have matched */
648 		return( q );
649 		more: ;
650 		}
651 	/* add a new one */
652 	q = &lkst[nlset++];
653 	if( nlset >= LSETSIZE )error("too many lookahead sets" );
654 	SETLOOP(j){
655 		q->lset[j] = p->lset[j];
656 		}
657 	return( q );
658 	}
659