xref: /original-bsd/usr.bin/pascal/eyacc/ey2.c (revision c3e32dec)
1 /*-
2  * Copyright (c) 1979, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.proprietary.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)ey2.c	8.1 (Berkeley) 06/06/93";
10 #endif /* not lint */
11 
12 # include "ey.h"
13 # define IDENTIFIER 257
14 # define MARK 258
15 # define TERM 259
16 # define LEFT 260
17 # define BINARY 261
18 # define RIGHT 262
19 # define PREC 263
20 # define LCURLY 264
21 # define C_IDENTIFIER 265  /* name followed by colon */
22 # define NUMBER 266
23 
24 FILE	*copen();
25 
26 setup(argc,argv) int argc; char *argv[];
27 {	int i,j,lev,t;
28 	int c;
29 
30 	foutput = stdout;
31 	i = 1;
32 	while( argc >= 2  && argv[1][0] == '-' ) {
33 		while( *++(argv[1]) ){
34 			switch( *argv[1] ){
35 			case 'v':
36 			case 'V':
37 				foutput = copen("y.output", 'w' );
38 				if( foutput == 0 ) error( "cannot open y.output");
39 				continue;
40 			case 'o':
41 			case 'O':
42 				oflag = 1;
43 				continue;
44 			case 'r':
45 			case 'R':
46 				oflag = 1;
47 				rflag = 1;
48 				continue;
49 			default:  error( "illegal option: %c", *argv[1]);
50 				}
51 			}
52 		argv++;
53 		argc--;
54 		}
55 
56 	ftable = copen( oflag ? "yacc.tmp" : "y.tab.c" , 'w' );
57 	if( ftable==0 ) error( "cannot open table file" );
58 	if( argc > 1 ) { cin = copen( argv[1], 'r' );
59 	if( cin == 0 ) error( "cannot open input" );
60 	}
61 	settab();
62 	fprintf( cout , "#\n");
63 	ctokn = "$end";
64 	defin(0);  /* eof */
65 	extval = 0400;  /* beginning of assigned values */
66 	ctokn = "error";
67 	defin(0);
68 	ctokn = "$accept";
69 	defin(1);
70 	mem=mem0;
71 	cnamp = cnames;
72 	lev=0;
73 	i=0;
74 
75 	while( ( t = gettok() ) != EOF ) {
76 		switch( t ){
77 			case IDENTIFIER:	j = chfind(0);
78 					trmlev[j] = lev;
79 					continue;
80 			case ',':
81 			case ';':		continue;
82 			case TERM:		lev=0; continue;
83 			case LEFT:		lev=(++i<<3)|01; continue;
84 			case BINARY:	lev=(++i<<3)|02; continue;
85 			case RIGHT:	lev=(++i<<3)|03; continue;
86 			case MARK:
87 					defout();
88 					if( rflag ){ /* RATFOR */
89 						fprintf( cout ,  "define yyerrok yyerrf = 0\n" );
90 						fprintf( cout ,  "define yyclearin yychar = -1\n" );
91 						fprintf( cout ,  "subroutine yyactr(yyprdn)\n");
92 						fprintf( cout ,  "common/yycomn/yylval,yyval,yypv,yyvalv(150)\n" );
93 						fprintf( cout ,  "common/yylcom/yychar,yyerrf,yydebu\n" );
94 						fprintf( cout ,  "integer yychar, yyerrf, yydebu\n" );
95 						fprintf( cout ,  "integer yyprdn,yyval,yylval,yypv,yyvalv\n" );
96 						}
97 					else {
98 						fprintf( cout ,  "#define yyclearin yychar = -1\n" );
99 						fprintf( cout ,  "#define yyerrok yyerrflag = 0\n" );
100 						fprintf( cout ,  "extern int yychar, yyerrflag;\n" );
101 						fprintf( cout , "\nint yyval 0;\nint *yypv;\nint yylval 0;");
102 						fprintf( cout , "\nyyactr(__np__){\n");
103 						}
104 					break;
105 			case LCURLY:	defout();
106 					cpycode();
107 					continue;
108 			case NUMBER:
109 				trmset[j].value = numbval;
110 				if( j < ndefout && j>2 )
111 					error("please define type # of %s earlier", trmset[j].name );
112 				continue;
113 			default:	error("bad precedence syntax, input %d", t );
114 			}
115 		break;
116 		}
117 	prdptr[0]=mem;
118 	/* added production */
119 	*mem++ = NTBASE;
120 	*mem++ = NTBASE+1;
121 	*mem++ = 1;
122 	*mem++ = 0;
123 	prdptr[1]=mem;
124 	i=0;
125 
126 	/* i is 0 when a rule can begin, 1 otherwise */
127 
128 	for(;;) switch( t=gettok() ) {
129 	case C_IDENTIFIER:		if( mem == prdptr[1] ) {  /* first time */
130 						if( rflag ){
131 							fprintf( cout ,  "goto 1000\n" );
132 							}
133 						else fprintf( cout , "\nswitch(__np__){\n");
134 						}
135 				if( i != 0 ) error( "previous rule not terminated" );
136 				*mem = chfind(1);
137 				if( *mem < NTBASE )error( "token illegal on lhs of grammar rule" );
138 				i=1;
139 				++mem;
140 				continue;
141 	case IDENTIFIER:
142 			*mem=chfind(1);
143 			if(*mem < NTBASE)levprd[nprod]=trmlev[*mem];
144 			mem++;
145 			if(i==0) error("missing :");
146 			continue;
147 	case '=':		levprd[nprod] |= 04;
148 				if( i==0 ) error("semicolon preceeds action");
149 			fprintf( cout ,  rflag?"\n%d ":"\ncase %d:", nprod );
150 			cpyact();
151 			fprintf( cout ,  rflag ? " return" : " break;" );
152 	case '|':
153 	case ';':		if(i){
154 				*mem++ = -nprod;
155 				prdptr[++nprod] = mem;
156 				levprd[nprod]=0;
157 				i=0;}
158 			if (t=='|'){i=1;*mem++ = *prdptr[nprod-1];}
159 			continue;
160 	case 0:		/* End Of File */
161 	case EOF:
162 	case MARK:	if( i != 0 ) error( "rule not terminated before %%%% or EOF" );
163 			settab();
164 			finact();
165 			/* copy the programs which follow the rules */
166 			if( t == MARK ){
167 				while (( c=fgetc( cin)) != EOF ) fputc(c,cout);
168 				}
169 			return;
170 	case PREC:
171 		if( i==0 ) error( "%%prec must appear inside rule" );
172 		if( gettok()!=IDENTIFIER)error("illegal %%prec syntax" );
173 		j=chfind(2);
174 		if(j>=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name);
175 		levprd[nprod]=trmlev[j];
176 		continue;
177 	case LCURLY:
178 		if( i!=0 ) error( "%%{ appears within a rule" );
179 		cpycode();
180 		continue;
181 	default: error( "syntax error, input %d", t  );
182 	}
183 }
184 
185 finact(){
186 	/* finish action routine */
187 	register i;
188 
189 	if( rflag ){
190 
191 		fprintf( cout ,  "\n1000 goto(" );
192 		for( i=1; i<nprod; ++i ){
193 			fprintf( cout ,  "%d,", (levprd[i]&04)==0?999:i );
194 			}
195 		fprintf( cout ,  "999),yyprdn\n" );
196 		fprintf( cout ,  "999 return\nend\n" );
197 		fprintf( cout ,  "define YYERRCODE %d\n", trmset[2].value );
198 		}
199 	else {
200 		fprintf( cout ,  "\n}\n}\n" );
201 		fprintf( cout ,  "int yyerrval %d;\n", trmset[2].value );
202 		}
203 	}
204 defin(t) {
205 /*	define ctokn to be a terminal if t=0
206 	or a nonterminal if t=1		*/
207 	char *cp,*p;
208 	int c;
209 
210 
211         if (t) {
212           if( ++nnonter >= ntlim ) error("too many nonterminals, limit %d",ntlim);
213 	  nontrst[nnonter].name = ctokn;
214 	  return( NTBASE + nnonter );
215           }
216         else {
217           if( ++nterms >= tlim ) error("too many terminals, limit %d",tlim );
218           trmset[nterms].name = ctokn;
219 	if( ctokn[0]==' ' && ctokn[2]=='\0' ) /* single character literal */
220 		trmset[nterms].value = ctokn[1];
221 	else if ( ctokn[0]==' ' && ctokn[1]=='\\' ) { /* escape sequence */
222 		if( ctokn[3] == '\0' ){ /* single character escape sequence */
223 			switch ( ctokn[2] ){
224 				 /* character which is escaped */
225 			case 'n': trmset[nterms].value = '\n'; break;
226 			case 'r': trmset[nterms].value = '\r'; break;
227 			case 'b': trmset[nterms].value = '\b'; break;
228 			case 't': trmset[nterms].value = '\t'; break;
229 			case '\'': trmset[nterms].value = '\''; break;
230 			case '"': trmset[nterms].value = '"'; break;
231 			case '\\': trmset[nterms].value = '\\'; break;
232 			default: error( "invalid escape" );
233 				}
234 			}
235 		else if( ctokn[2] <= '7' && ctokn[2]>='0' ){ /* \nnn sequence */
236 			if( ctokn[3]<'0' || ctokn[3] > '7' || ctokn[4]<'0' ||
237 				ctokn[4]>'7' || ctokn[5] != '\0' ) error("illegal \\nnn construction" );
238 			trmset[nterms].value = 64*(ctokn[2]-'0')+8*(ctokn[3]-'0')+ctokn[4]-'0';
239 			if( trmset[nterms].value == 0 ) error( "'\\000' is illegal" );
240 			}
241 		}
242 	else {
243 		trmset[nterms].value = extval++;
244 
245 		}
246 	trmlev[nterms] = 0;
247 	return( nterms );
248           }
249 }
250 
251 defout(){ /* write out the defines (at the end of the declaration section) */
252 
253 	_REGISTER int i, c;
254 	_REGISTER char *cp;
255 
256 	for( i=ndefout; i<=nterms; ++i ){
257 
258 		cp = trmset[i].name;
259 		if( *cp == ' ' ) ++cp;  /* literals */
260 
261 		for( ; (c= *cp)!='\0'; ++cp ){
262 
263 			if( c>='a' && c<='z' ||
264 			    c>='A' && c<='Z' ||
265 			    c>='0' && c<='9' ||
266 			    c=='_' )  ; /* VOID */
267 			else goto nodef;
268 			}
269 
270 		/* define it */
271 
272 		fprintf( cout ,  "%c define %s %d\n", rflag?' ':'#', trmset[i].name, trmset[i].value );
273 
274 	nodef:	;
275 		}
276 
277 	ndefout = nterms+1;
278 
279 	}
280 
281 chstash( c ){
282   /* put character away into cnames */
283   if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" );
284   else *cnamp++ = c;
285   }
286 
287 int gettok() {
288 	int j, base;
289 	static int peekline; /* number of '\n' seen in lookahead */
290 	auto int c, match, reserve;
291 
292 begin:
293 	reserve = 0;
294         if( peekc>=0 ) {
295 		c = peekc;
296 		lineno += peekline;
297 		peekc = -1;
298 		peekline = 0;
299 		}
300         else c = fgetc( cin);
301         while( c==' ' || c=='\n' || c=='\t' || c == '\014'){
302           if( c == '\n' ) ++lineno;
303           c=fgetc( cin);
304           }
305 	if (c=='/')
306 		{if (fgetc( cin)!='*')error("illegal /");
307 		c=fgetc( cin);
308 		while(c != EOF) {
309 			if( c == '\n' ) ++lineno;
310 			if (c=='*')
311 				{if((c=fgetc( cin))=='/')break;}
312 			else c=fgetc( cin);}
313 		if (!c) return(0);
314 		goto begin;}
315 	j=0;
316 	switch(c){
317 	case '"':
318 	case '\'':	match = c;
319 			ctokn = cnamp;
320 			chstash( ' ' );
321 			while(1){
322 				c = fgetc( cin);
323 				if( c == '\n' || c == '\0' )
324 					error("illegal or missing ' or \"");
325 				if( c == '\\' ){
326 					c = fgetc( cin);
327 					chstash( '\\' );
328 					}
329 				else if( c == match ) break;
330 				chstash( c );
331 				}
332 			break;
333 	case '%':
334 	case '\\':	switch(c=fgetc( cin))
335 		{case '0':	return(TERM);
336 		case '<':	return(LEFT);
337 		case '2':	return(BINARY);
338 		case '>':	return(RIGHT);
339 		case '%':
340 		case '\\':	return(MARK);
341 		case '=':	return(PREC);
342 		case '{':	return(LCURLY);
343 		default:	reserve = 1;
344 		}
345 	default:	if( c >= '0' && c <= '9' ){ /* number */
346 				numbval = c-'0' ;
347 				base = (c=='0') ? 8 : 10 ;
348 				for( c=fgetc( cin); c>='0' && c<='9'; c=fgetc( cin) ){
349 					numbval = numbval*base + c - '0';
350 					}
351 				peekc = c;
352 				return(NUMBER);
353 				}
354 			else if( (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_'||c=='.'||c=='$'){
355 				ctokn = cnamp;
356 				while(	(c>='a'&&c<='z') ||
357 					(c>='A'&&c<='Z') ||
358 					(c>='0'&&c<='9') ||
359 					c=='_' || c=='.' || c=='$' ) {
360 					chstash( c );
361 					if( peekc>=0 ) { c = peekc; peekc = -1; }
362 					else c = fgetc( cin);
363 					}
364 				}
365 			else return(c);
366 
367 			peekc=c;
368 			}
369 	chstash( '\0' );
370 
371 	if( reserve ){ /* find a reserved word */
372 		if( compare("term")) return( TERM );
373 		if( compare("TERM")) return( TERM );
374 		if( compare("token")) return( TERM );
375 		if( compare("TOKEN")) return( TERM );
376 		if( compare("left")) return( LEFT );
377 		if( compare("LEFT")) return( LEFT );
378 		if( compare("nonassoc")) return( BINARY );
379 		if( compare("NONASSOC")) return( BINARY );
380 		if( compare("binary")) return( BINARY );
381 		if( compare("BINARY")) return( BINARY );
382 		if( compare("right")) return( RIGHT );
383 		if( compare("RIGHT")) return( RIGHT );
384 		if( compare("prec")) return( PREC );
385 		if( compare("PREC")) return( PREC );
386 		error("invalid escape, or illegal reserved word: %s", ctokn );
387 		}
388 
389 	/* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */
390 
391   look:
392 	while( peekc==' ' || peekc=='\t' || peekc == '\n' || peekc == '\014' )
393 	{
394 		if( peekc == '\n' ) ++peekline;
395 		peekc = fgetc( cin);
396 	}
397 
398 	if( peekc != ':' ) return( IDENTIFIER );
399 	peekc = -1;
400 	lineno += peekline;
401 	peekline = 0;
402 	return( C_IDENTIFIER );
403 }
404 chfind(t)
405 
406 {	int i,j;
407 
408 	if (ctokn[0]==' ')t=0;
409 	for(i=1;i<=nterms;i++)
410 		if(compare(trmset[i].name)){
411 			cnamp = ctokn;
412 			return( i );
413 			}
414 	for(i=1;i<=nnonter;i++)
415 		if(compare(nontrst[i].name)) {
416 			cnamp = ctokn;
417 			return( i+NTBASE );
418 			}
419 	/* cannot find name */
420 	if( t>1 && ctokn[0] != ' ' )
421 		error( "%s should have been defined earlier", ctokn );
422 	return( defin( t ) );
423 	}
424 
425 cpycode(){ /* copies code between \{ and \} */
426 
427 	int c;
428 	c = fgetc( cin);
429 	if( c == '\n' ) {
430 		c = fgetc( cin);
431 		lineno++;
432 		}
433 	while( c != EOF ){
434 		if( c=='\\' )
435 			if( (c=fgetc( cin)) == '}' ) return;
436 			else fputc('\\',cout);
437 		if( c=='%' )
438 			if( (c=fgetc( cin)) == '}' ) return;
439 			else fputc('%',cout);
440 		fputc( c, cout );
441 		if( c == '\n' ) ++lineno;
442 		c = fgetc( cin);
443 		}
444 	error("eof before %%}");
445 	}
446 
447 cpyact(){ /* copy C action to the next ; or closing } */
448 	int brac, c, match, *i, j, s;
449 
450 	brac = 0;
451 
452 loop:
453 	c = fgetc( cin);
454 swt:
455 	switch( c ){
456 
457 case ';':
458 		if( brac == 0 ){
459 			fputc( c, cout );
460 			return;
461 			}
462 		goto lcopy;
463 
464 case '{':
465 		brac++;
466 		goto lcopy;
467 
468 case '$':
469 		s = 1;
470 		c = fgetc( cin);
471 		if( c == '$' ){
472 			fprintf( cout , "yyval");
473 			goto loop;
474 			}
475 		if( c == '-' ){
476 			s = -s;
477 			c = fgetc( cin);
478 			}
479 		if( c>='0' && c <= '9' ){
480 			j=0;
481 			while( c>='0' && c<= '9' ){
482 				j= j*10+c-'0';
483 				c = fgetc( cin);
484 				}
485 			if( rflag ) fprintf( cout ,  "yyvalv(yypv%c%d)", s==1?'+':'-', j );
486 			else fprintf( cout , "yypv[%d]", s*j );
487 			goto swt;
488 			}
489 		fputc( '$' , cout);
490 		if( s<0 ) fputc('-', cout);
491 		goto swt;
492 
493 case '}':
494 		brac--;
495 		if( brac == 0 ){
496 			fputc( c , cout);
497 			return;
498 			}
499 		goto lcopy;
500 
501 case '/':	/* look for comments */
502 		fputc( c ,cout);
503 		c = fgetc( cin);
504 		if( c != '*' ) goto swt;
505 
506 		/* it really is a comment */
507 
508 		fputc( c , cout);
509 		while( (c=fgetc( cin)) != EOF ){
510 			if( c=='*' ){
511 				fputc( c , cout);
512 				if( (c=fgetc( cin)) == '/' ) goto lcopy;
513 				}
514 			fputc( c , cout);
515 			}
516 		error( "EOF inside comment" );
517 
518 case '\'':	/* character constant */
519 		match = '\'';
520 		goto string;
521 
522 case '"':	/* character string */
523 		match = '"';
524 
525 	string:
526 
527 		fputc( c , cout);
528 		while( (c=fgetc( cin)) != EOF ){
529 
530 			if( c=='\\' ){
531 				fputc( c , cout);
532 				c=fgetc( cin);
533 				}
534 			else if( c==match ) goto lcopy;
535 			fputc( c , cout);
536 			}
537 		error( "EOF in string or character constant" );
538 
539 case '\0':
540 		error("action does not terminate");
541 case '\n':	++lineno;
542 		goto lcopy;
543 
544 		}
545 
546 lcopy:
547 	fputc( c , cout);
548 	goto loop;
549 	}
550