xref: /original-bsd/usr.bin/pascal/eyacc/ey2.c (revision 2301fdfb)
1 /*
2  * Copyright (c) 1979 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 static char sccsid[] = "@(#)ey2.c	5.1 (Berkeley) 04/29/85";
9 #endif not lint
10 
11 # include "ey.h"
12 # define IDENTIFIER 257
13 # define MARK 258
14 # define TERM 259
15 # define LEFT 260
16 # define BINARY 261
17 # define RIGHT 262
18 # define PREC 263
19 # define LCURLY 264
20 # define C_IDENTIFIER 265  /* name followed by colon */
21 # define NUMBER 266
22 
23 FILE	*copen();
24 
25 setup(argc,argv) int argc; char *argv[];
26 {	int i,j,lev,t;
27 	int c;
28 
29 	foutput = stdout;
30 	i = 1;
31 	while( argc >= 2  && argv[1][0] == '-' ) {
32 		while( *++(argv[1]) ){
33 			switch( *argv[1] ){
34 			case 'v':
35 			case 'V':
36 				foutput = copen("y.output", 'w' );
37 				if( foutput == 0 ) error( "cannot open y.output");
38 				continue;
39 			case 'o':
40 			case 'O':
41 				oflag = 1;
42 				continue;
43 			case 'r':
44 			case 'R':
45 				oflag = 1;
46 				rflag = 1;
47 				continue;
48 			default:  error( "illegal option: %c", *argv[1]);
49 				}
50 			}
51 		argv++;
52 		argc--;
53 		}
54 
55 	ftable = copen( oflag ? "yacc.tmp" : "y.tab.c" , 'w' );
56 	if( ftable==0 ) error( "cannot open table file" );
57 	if( argc > 1 ) { cin = copen( argv[1], 'r' );
58 	if( cin == 0 ) error( "cannot open input" );
59 	}
60 	settab();
61 	fprintf( cout , "#\n");
62 	ctokn = "$end";
63 	defin(0);  /* eof */
64 	extval = 0400;  /* beginning of assigned values */
65 	ctokn = "error";
66 	defin(0);
67 	ctokn = "$accept";
68 	defin(1);
69 	mem=mem0;
70 	cnamp = cnames;
71 	lev=0;
72 	i=0;
73 
74 	while( ( t = gettok() ) != EOF ) {
75 		switch( t ){
76 			case IDENTIFIER:	j = chfind(0);
77 					trmlev[j] = lev;
78 					continue;
79 			case ',':
80 			case ';':		continue;
81 			case TERM:		lev=0; continue;
82 			case LEFT:		lev=(++i<<3)|01; continue;
83 			case BINARY:	lev=(++i<<3)|02; continue;
84 			case RIGHT:	lev=(++i<<3)|03; continue;
85 			case MARK:
86 					defout();
87 					if( rflag ){ /* RATFOR */
88 						fprintf( cout ,  "define yyerrok yyerrf = 0\n" );
89 						fprintf( cout ,  "define yyclearin yychar = -1\n" );
90 						fprintf( cout ,  "subroutine yyactr(yyprdn)\n");
91 						fprintf( cout ,  "common/yycomn/yylval,yyval,yypv,yyvalv(150)\n" );
92 						fprintf( cout ,  "common/yylcom/yychar,yyerrf,yydebu\n" );
93 						fprintf( cout ,  "integer yychar, yyerrf, yydebu\n" );
94 						fprintf( cout ,  "integer yyprdn,yyval,yylval,yypv,yyvalv\n" );
95 						}
96 					else {
97 						fprintf( cout ,  "#define yyclearin yychar = -1\n" );
98 						fprintf( cout ,  "#define yyerrok yyerrflag = 0\n" );
99 						fprintf( cout ,  "extern int yychar, yyerrflag;\n" );
100 						fprintf( cout , "\nint yyval 0;\nint *yypv;\nint yylval 0;");
101 						fprintf( cout , "\nyyactr(__np__){\n");
102 						}
103 					break;
104 			case LCURLY:	defout();
105 					cpycode();
106 					continue;
107 			case NUMBER:
108 				trmset[j].value = numbval;
109 				if( j < ndefout && j>2 )
110 					error("please define type # of %s earlier", trmset[j].name );
111 				continue;
112 			default:	error("bad precedence syntax, input %d", t );
113 			}
114 		break;
115 		}
116 	prdptr[0]=mem;
117 	/* added production */
118 	*mem++ = NTBASE;
119 	*mem++ = NTBASE+1;
120 	*mem++ = 1;
121 	*mem++ = 0;
122 	prdptr[1]=mem;
123 	i=0;
124 
125 	/* i is 0 when a rule can begin, 1 otherwise */
126 
127 	for(;;) switch( t=gettok() ) {
128 	case C_IDENTIFIER:		if( mem == prdptr[1] ) {  /* first time */
129 						if( rflag ){
130 							fprintf( cout ,  "goto 1000\n" );
131 							}
132 						else fprintf( cout , "\nswitch(__np__){\n");
133 						}
134 				if( i != 0 ) error( "previous rule not terminated" );
135 				*mem = chfind(1);
136 				if( *mem < NTBASE )error( "token illegal on lhs of grammar rule" );
137 				i=1;
138 				++mem;
139 				continue;
140 	case IDENTIFIER:
141 			*mem=chfind(1);
142 			if(*mem < NTBASE)levprd[nprod]=trmlev[*mem];
143 			mem++;
144 			if(i==0) error("missing :");
145 			continue;
146 	case '=':		levprd[nprod] |= 04;
147 				if( i==0 ) error("semicolon preceeds action");
148 			fprintf( cout ,  rflag?"\n%d ":"\ncase %d:", nprod );
149 			cpyact();
150 			fprintf( cout ,  rflag ? " return" : " break;" );
151 	case '|':
152 	case ';':		if(i){
153 				*mem++ = -nprod;
154 				prdptr[++nprod] = mem;
155 				levprd[nprod]=0;
156 				i=0;}
157 			if (t=='|'){i=1;*mem++ = *prdptr[nprod-1];}
158 			continue;
159 	case 0:		/* End Of File */
160 	case EOF:
161 	case MARK:	if( i != 0 ) error( "rule not terminated before %%%% or EOF" );
162 			settab();
163 			finact();
164 			/* copy the programs which follow the rules */
165 			if( t == MARK ){
166 				while (( c=fgetc( cin)) != EOF ) fputc(c,cout);
167 				}
168 			return;
169 	case PREC:
170 		if( i==0 ) error( "%%prec must appear inside rule" );
171 		if( gettok()!=IDENTIFIER)error("illegal %%prec syntax" );
172 		j=chfind(2);
173 		if(j>=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name);
174 		levprd[nprod]=trmlev[j];
175 		continue;
176 	case LCURLY:
177 		if( i!=0 ) error( "%%{ appears within a rule" );
178 		cpycode();
179 		continue;
180 	default: error( "syntax error, input %d", t  );
181 	}
182 }
183 
184 finact(){
185 	/* finish action routine */
186 	register i;
187 
188 	if( rflag ){
189 
190 		fprintf( cout ,  "\n1000 goto(" );
191 		for( i=1; i<nprod; ++i ){
192 			fprintf( cout ,  "%d,", (levprd[i]&04)==0?999:i );
193 			}
194 		fprintf( cout ,  "999),yyprdn\n" );
195 		fprintf( cout ,  "999 return\nend\n" );
196 		fprintf( cout ,  "define YYERRCODE %d\n", trmset[2].value );
197 		}
198 	else {
199 		fprintf( cout ,  "\n}\n}\n" );
200 		fprintf( cout ,  "int yyerrval %d;\n", trmset[2].value );
201 		}
202 	}
203 defin(t) {
204 /*	define ctokn to be a terminal if t=0
205 	or a nonterminal if t=1		*/
206 	char *cp,*p;
207 	int c;
208 
209 
210         if (t) {
211           if( ++nnonter >= ntlim ) error("too many nonterminals, limit %d",ntlim);
212 	  nontrst[nnonter].name = ctokn;
213 	  return( NTBASE + nnonter );
214           }
215         else {
216           if( ++nterms >= tlim ) error("too many terminals, limit %d",tlim );
217           trmset[nterms].name = ctokn;
218 	if( ctokn[0]==' ' && ctokn[2]=='\0' ) /* single character literal */
219 		trmset[nterms].value = ctokn[1];
220 	else if ( ctokn[0]==' ' && ctokn[1]=='\\' ) { /* escape sequence */
221 		if( ctokn[3] == '\0' ){ /* single character escape sequence */
222 			switch ( ctokn[2] ){
223 				 /* character which is escaped */
224 			case 'n': trmset[nterms].value = '\n'; break;
225 			case 'r': trmset[nterms].value = '\r'; break;
226 			case 'b': trmset[nterms].value = '\b'; break;
227 			case 't': trmset[nterms].value = '\t'; break;
228 			case '\'': trmset[nterms].value = '\''; break;
229 			case '"': trmset[nterms].value = '"'; break;
230 			case '\\': trmset[nterms].value = '\\'; break;
231 			default: error( "invalid escape" );
232 				}
233 			}
234 		else if( ctokn[2] <= '7' && ctokn[2]>='0' ){ /* \nnn sequence */
235 			if( ctokn[3]<'0' || ctokn[3] > '7' || ctokn[4]<'0' ||
236 				ctokn[4]>'7' || ctokn[5] != '\0' ) error("illegal \\nnn construction" );
237 			trmset[nterms].value = 64*(ctokn[2]-'0')+8*(ctokn[3]-'0')+ctokn[4]-'0';
238 			if( trmset[nterms].value == 0 ) error( "'\\000' is illegal" );
239 			}
240 		}
241 	else {
242 		trmset[nterms].value = extval++;
243 
244 		}
245 	trmlev[nterms] = 0;
246 	return( nterms );
247           }
248 }
249 
250 defout(){ /* write out the defines (at the end of the declaration section) */
251 
252 	_REGISTER int i, c;
253 	_REGISTER char *cp;
254 
255 	for( i=ndefout; i<=nterms; ++i ){
256 
257 		cp = trmset[i].name;
258 		if( *cp == ' ' ) ++cp;  /* literals */
259 
260 		for( ; (c= *cp)!='\0'; ++cp ){
261 
262 			if( c>='a' && c<='z' ||
263 			    c>='A' && c<='Z' ||
264 			    c>='0' && c<='9' ||
265 			    c=='_' )  ; /* VOID */
266 			else goto nodef;
267 			}
268 
269 		/* define it */
270 
271 		fprintf( cout ,  "%c define %s %d\n", rflag?' ':'#', trmset[i].name, trmset[i].value );
272 
273 	nodef:	;
274 		}
275 
276 	ndefout = nterms+1;
277 
278 	}
279 
280 chstash( c ){
281   /* put character away into cnames */
282   if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" );
283   else *cnamp++ = c;
284   }
285 
286 int gettok() {
287 	int j, base;
288 	static int peekline; /* number of '\n' seen in lookahead */
289 	auto int c, match, reserve;
290 
291 begin:
292 	reserve = 0;
293         if( peekc>=0 ) {
294 		c = peekc;
295 		lineno += peekline;
296 		peekc = -1;
297 		peekline = 0;
298 		}
299         else c = fgetc( cin);
300         while( c==' ' || c=='\n' || c=='\t' || c == '\014'){
301           if( c == '\n' ) ++lineno;
302           c=fgetc( cin);
303           }
304 	if (c=='/')
305 		{if (fgetc( cin)!='*')error("illegal /");
306 		c=fgetc( cin);
307 		while(c != EOF) {
308 			if( c == '\n' ) ++lineno;
309 			if (c=='*')
310 				{if((c=fgetc( cin))=='/')break;}
311 			else c=fgetc( cin);}
312 		if (!c) return(0);
313 		goto begin;}
314 	j=0;
315 	switch(c){
316 	case '"':
317 	case '\'':	match = c;
318 			ctokn = cnamp;
319 			chstash( ' ' );
320 			while(1){
321 				c = fgetc( cin);
322 				if( c == '\n' || c == '\0' )
323 					error("illegal or missing ' or \"");
324 				if( c == '\\' ){
325 					c = fgetc( cin);
326 					chstash( '\\' );
327 					}
328 				else if( c == match ) break;
329 				chstash( c );
330 				}
331 			break;
332 	case '%':
333 	case '\\':	switch(c=fgetc( cin))
334 		{case '0':	return(TERM);
335 		case '<':	return(LEFT);
336 		case '2':	return(BINARY);
337 		case '>':	return(RIGHT);
338 		case '%':
339 		case '\\':	return(MARK);
340 		case '=':	return(PREC);
341 		case '{':	return(LCURLY);
342 		default:	reserve = 1;
343 		}
344 	default:	if( c >= '0' && c <= '9' ){ /* number */
345 				numbval = c-'0' ;
346 				base = (c=='0') ? 8 : 10 ;
347 				for( c=fgetc( cin); c>='0' && c<='9'; c=fgetc( cin) ){
348 					numbval = numbval*base + c - '0';
349 					}
350 				peekc = c;
351 				return(NUMBER);
352 				}
353 			else if( (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_'||c=='.'||c=='$'){
354 				ctokn = cnamp;
355 				while(	(c>='a'&&c<='z') ||
356 					(c>='A'&&c<='Z') ||
357 					(c>='0'&&c<='9') ||
358 					c=='_' || c=='.' || c=='$' ) {
359 					chstash( c );
360 					if( peekc>=0 ) { c = peekc; peekc = -1; }
361 					else c = fgetc( cin);
362 					}
363 				}
364 			else return(c);
365 
366 			peekc=c;
367 			}
368 	chstash( '\0' );
369 
370 	if( reserve ){ /* find a reserved word */
371 		if( compare("term")) return( TERM );
372 		if( compare("TERM")) return( TERM );
373 		if( compare("token")) return( TERM );
374 		if( compare("TOKEN")) return( TERM );
375 		if( compare("left")) return( LEFT );
376 		if( compare("LEFT")) return( LEFT );
377 		if( compare("nonassoc")) return( BINARY );
378 		if( compare("NONASSOC")) return( BINARY );
379 		if( compare("binary")) return( BINARY );
380 		if( compare("BINARY")) return( BINARY );
381 		if( compare("right")) return( RIGHT );
382 		if( compare("RIGHT")) return( RIGHT );
383 		if( compare("prec")) return( PREC );
384 		if( compare("PREC")) return( PREC );
385 		error("invalid escape, or illegal reserved word: %s", ctokn );
386 		}
387 
388 	/* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */
389 
390   look:
391 	while( peekc==' ' || peekc=='\t' || peekc == '\n' || peekc == '\014' )
392 	{
393 		if( peekc == '\n' ) ++peekline;
394 		peekc = fgetc( cin);
395 	}
396 
397 	if( peekc != ':' ) return( IDENTIFIER );
398 	peekc = -1;
399 	lineno += peekline;
400 	peekline = 0;
401 	return( C_IDENTIFIER );
402 }
403 chfind(t)
404 
405 {	int i,j;
406 
407 	if (ctokn[0]==' ')t=0;
408 	for(i=1;i<=nterms;i++)
409 		if(compare(trmset[i].name)){
410 			cnamp = ctokn;
411 			return( i );
412 			}
413 	for(i=1;i<=nnonter;i++)
414 		if(compare(nontrst[i].name)) {
415 			cnamp = ctokn;
416 			return( i+NTBASE );
417 			}
418 	/* cannot find name */
419 	if( t>1 && ctokn[0] != ' ' )
420 		error( "%s should have been defined earlier", ctokn );
421 	return( defin( t ) );
422 	}
423 
424 cpycode(){ /* copies code between \{ and \} */
425 
426 	int c;
427 	c = fgetc( cin);
428 	if( c == '\n' ) {
429 		c = fgetc( cin);
430 		lineno++;
431 		}
432 	while( c != EOF ){
433 		if( c=='\\' )
434 			if( (c=fgetc( cin)) == '}' ) return;
435 			else fputc('\\',cout);
436 		if( c=='%' )
437 			if( (c=fgetc( cin)) == '}' ) return;
438 			else fputc('%',cout);
439 		fputc( c, cout );
440 		if( c == '\n' ) ++lineno;
441 		c = fgetc( cin);
442 		}
443 	error("eof before %%}");
444 	}
445 
446 cpyact(){ /* copy C action to the next ; or closing } */
447 	int brac, c, match, *i, j, s;
448 
449 	brac = 0;
450 
451 loop:
452 	c = fgetc( cin);
453 swt:
454 	switch( c ){
455 
456 case ';':
457 		if( brac == 0 ){
458 			fputc( c, cout );
459 			return;
460 			}
461 		goto lcopy;
462 
463 case '{':
464 		brac++;
465 		goto lcopy;
466 
467 case '$':
468 		s = 1;
469 		c = fgetc( cin);
470 		if( c == '$' ){
471 			fprintf( cout , "yyval");
472 			goto loop;
473 			}
474 		if( c == '-' ){
475 			s = -s;
476 			c = fgetc( cin);
477 			}
478 		if( c>='0' && c <= '9' ){
479 			j=0;
480 			while( c>='0' && c<= '9' ){
481 				j= j*10+c-'0';
482 				c = fgetc( cin);
483 				}
484 			if( rflag ) fprintf( cout ,  "yyvalv(yypv%c%d)", s==1?'+':'-', j );
485 			else fprintf( cout , "yypv[%d]", s*j );
486 			goto swt;
487 			}
488 		fputc( '$' , cout);
489 		if( s<0 ) fputc('-', cout);
490 		goto swt;
491 
492 case '}':
493 		brac--;
494 		if( brac == 0 ){
495 			fputc( c , cout);
496 			return;
497 			}
498 		goto lcopy;
499 
500 case '/':	/* look for comments */
501 		fputc( c ,cout);
502 		c = fgetc( cin);
503 		if( c != '*' ) goto swt;
504 
505 		/* it really is a comment */
506 
507 		fputc( c , cout);
508 		while( (c=fgetc( cin)) != EOF ){
509 			if( c=='*' ){
510 				fputc( c , cout);
511 				if( (c=fgetc( cin)) == '/' ) goto lcopy;
512 				}
513 			fputc( c , cout);
514 			}
515 		error( "EOF inside comment" );
516 
517 case '\'':	/* character constant */
518 		match = '\'';
519 		goto string;
520 
521 case '"':	/* character string */
522 		match = '"';
523 
524 	string:
525 
526 		fputc( c , cout);
527 		while( (c=fgetc( cin)) != EOF ){
528 
529 			if( c=='\\' ){
530 				fputc( c , cout);
531 				c=fgetc( cin);
532 				}
533 			else if( c==match ) goto lcopy;
534 			fputc( c , cout);
535 			}
536 		error( "EOF in string or character constant" );
537 
538 case '\0':
539 		error("action does not terminate");
540 case '\n':	++lineno;
541 		goto lcopy;
542 
543 		}
544 
545 lcopy:
546 	fputc( c , cout);
547 	goto loop;
548 	}
549