xref: /original-bsd/old/lex/parser.y (revision abb30312)
1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
2 %left SCON '/' NEWE
3 %left '|'
4 %left '$' '^'
5 %left CHAR CCL NCCL '(' '.' STR NULLS
6 %left ITER
7 %left CAT
8 %left '*' '+' '?'
9 
10 %{
11 #ifndef lint
12 static char sccsid[] = "@(#)parser.y	4.3 (Berkeley) 06/24/90";
13 #endif
14 
15 # include "ldefs.c"
16 %}
17 %%
18 %{
19 int i;
20 int j,k;
21 int g;
22 char *p;
23 %}
24 acc	:	lexinput
25 	={
26 # ifdef DEBUG
27 		if(debug) sect2dump();
28 # endif
29 	}
30 	;
31 lexinput:	defns delim prods end
32 	|	defns delim end
33 	={
34 		if(!funcflag)phead2();
35 		funcflag = TRUE;
36 	}
37 	| error
38 	={
39 # ifdef DEBUG
40 		if(debug) {
41 			sect1dump();
42 			sect2dump();
43 			}
44 # endif
45 		}
46 	;
47 end:		delim | ;
48 defns:	defns STR STR
49 	={	scopy($2,dp);
50 		def[dptr] = dp;
51 		dp += slength($2) + 1;
52 		scopy($3,dp);
53 		subs[dptr++] = dp;
54 		if(dptr >= DEFSIZE)
55 			error("Too many definitions");
56 		dp += slength($3) + 1;
57 		if(dp >= dchar+DEFCHAR)
58 			error("Definitions too long");
59 		subs[dptr]=def[dptr]=0;	/* for lookup - require ending null */
60 	}
61 	|
62 	;
63 delim:	DELIM
64 	={
65 # ifdef DEBUG
66 		if(sect == DEFSECTION && debug) sect1dump();
67 # endif
68 		sect++;
69 		}
70 	;
71 prods:	prods pr
72 	={	$$ = mn2(RNEWE,$1,$2);
73 		}
74 	|	pr
75 	={	$$ = $1;}
76 	;
77 pr:	r NEWE
78 	={
79 		if(divflg == TRUE)
80 			i = mn1(S1FINAL,casecount);
81 		else i = mn1(FINAL,casecount);
82 		$$ = mn2(RCAT,$1,i);
83 		divflg = FALSE;
84 		casecount++;
85 		}
86 	| error NEWE
87 	={
88 # ifdef DEBUG
89 		if(debug) sect2dump();
90 # endif
91 		}
92 r:	CHAR
93 	={	$$ = mn0($1); }
94 	| STR
95 	={
96 		p = (char *)$1;
97 		i = mn0(*p++);
98 		while(*p)
99 			i = mn2(RSTR,i,*p++);
100 		$$ = i;
101 		}
102 	| '.'
103 	={	symbol['\n'] = 0;
104 		if(psave == FALSE){
105 			p = ccptr;
106 			psave = ccptr;
107 			for(i=1;i<'\n';i++){
108 				symbol[i] = 1;
109 				*ccptr++ = i;
110 				}
111 			for(i='\n'+1;i<NCH;i++){
112 				symbol[i] = 1;
113 				*ccptr++ = i;
114 				}
115 			*ccptr++ = 0;
116 			if(ccptr > ccl+CCLSIZE)
117 				error("Too many large character classes");
118 			}
119 		else
120 			p = psave;
121 		$$ = mn1(RCCL,p);
122 		cclinter(1);
123 		}
124 	| CCL
125 	={	$$ = mn1(RCCL,$1); }
126 	| NCCL
127 	={	$$ = mn1(RNCCL,$1); }
128 	| r '*'
129 	={	$$ = mn1(STAR,$1); }
130 	| r '+'
131 	={	$$ = mn1(PLUS,$1); }
132 	| r '?'
133 	={	$$ = mn1(QUEST,$1); }
134 	| r '|' r
135 	={	$$ = mn2(BAR,$1,$3); }
136 	| r r %prec CAT
137 	={	$$ = mn2(RCAT,$1,$2); }
138 	| r '/' r
139 	={	if(!divflg){
140 			j = mn1(S2FINAL,-casecount);
141 			i = mn2(RCAT,$1,j);
142 			$$ = mn2(DIV,i,$3);
143 			}
144 		else {
145 			$$ = mn2(RCAT,$1,$3);
146 			warning("Extra slash removed");
147 			}
148 		divflg = TRUE;
149 		}
150 	| r ITER ',' ITER '}'
151 	={	if($2 > $4){
152 			i = $2;
153 			$2 = $4;
154 			$4 = i;
155 			}
156 		if($4 <= 0)
157 			warning("Iteration range must be positive");
158 		else {
159 			j = $1;
160 			for(k = 2; k<=$2;k++)
161 				j = mn2(RCAT,j,dupl($1));
162 			for(i = $2+1; i<=$4; i++){
163 				g = dupl($1);
164 				for(k=2;k<=i;k++)
165 					g = mn2(RCAT,g,dupl($1));
166 				j = mn2(BAR,j,g);
167 				}
168 			$$ = j;
169 			}
170 	}
171 	| r ITER '}'
172 	={
173 		if($2 < 0)warning("Can't have negative iteration");
174 		else if($2 == 0) $$ = mn0(RNULLS);
175 		else {
176 			j = $1;
177 			for(k=2;k<=$2;k++)
178 				j = mn2(RCAT,j,dupl($1));
179 			$$ = j;
180 			}
181 		}
182 	| r ITER ',' '}'
183 	={
184 				/* from n to infinity */
185 		if($2 < 0)warning("Can't have negative iteration");
186 		else if($2 == 0) $$ = mn1(STAR,$1);
187 		else if($2 == 1)$$ = mn1(PLUS,$1);
188 		else {		/* >= 2 iterations minimum */
189 			j = $1;
190 			for(k=2;k<$2;k++)
191 				j = mn2(RCAT,j,dupl($1));
192 			k = mn1(PLUS,dupl($1));
193 			$$ = mn2(RCAT,j,k);
194 			}
195 		}
196 	| SCON r
197 	={	$$ = mn2(RSCON,$2,$1); }
198 	| '^' r
199 	={	$$ = mn1(CARAT,$2); }
200 	| r '$'
201 	={	i = mn0('\n');
202 		if(!divflg){
203 			j = mn1(S2FINAL,-casecount);
204 			k = mn2(RCAT,$1,j);
205 			$$ = mn2(DIV,k,i);
206 			}
207 		else $$ = mn2(RCAT,$1,i);
208 		divflg = TRUE;
209 		}
210 	| '(' r ')'
211 	={	$$ = $2; }
212 	|	NULLS
213 	={	$$ = mn0(RNULLS); }
214 	;
215 %%
216 yylex(){
217 	register char *p;
218 	register int c, i;
219 	char  *t, *xp;
220 	int n, j, k, x;
221 	static int sectbegin;
222 	static char token[TOKENSIZE];
223 	static int iter;
224 
225 # ifdef DEBUG
226 	yylval = 0;
227 # endif
228 
229 	if(sect == DEFSECTION) {		/* definitions section */
230 		while(!eof) {
231 			if(prev == '\n'){		/* next char is at beginning of line */
232 				getl(p=buf);
233 				switch(*p){
234 				case '%':
235 					switch(c= *(p+1)){
236 					case '%':
237 						lgate();
238 						if(!ratfor)fprintf(fout,"# ");
239 						fprintf(fout,"define YYNEWLINE %d\n",ctable['\n']);
240 						if(!ratfor)fprintf(fout,"yylex(){\nint nstr; extern int yyprevious;\n");
241 						sectbegin = TRUE;
242 						i = treesize*(sizeof(*name)+sizeof(*left)+
243 							sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
244 						p = myalloc(i,1);
245 						if(p == 0)
246 							error("Too little core for parse tree");
247 						free(p);
248 						name = (int *)myalloc(treesize,sizeof(*name));
249 						left = (int *)myalloc(treesize,sizeof(*left));
250 						right = (int *)myalloc(treesize,sizeof(*right));
251 						nullstr = (char *)myalloc(treesize,sizeof(*nullstr));
252 						parent = (int *)myalloc(treesize,sizeof(*parent));
253 						if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
254 							error("Too little core for parse tree");
255 						return(freturn(DELIM));
256 					case 'p': case 'P':	/* has overridden number of positions */
257 						while(*p && !digit(*p))p++;
258 						maxpos = siconv(p);
259 # ifdef DEBUG
260 						if (debug) printf("positions (%%p) now %d\n",maxpos);
261 # endif
262 						if(report == 2)report = 1;
263 						continue;
264 					case 'n': case 'N':	/* has overridden number of states */
265 						while(*p && !digit(*p))p++;
266 						nstates = siconv(p);
267 # ifdef DEBUG
268 						if(debug)printf( " no. states (%%n) now %d\n",nstates);
269 # endif
270 						if(report == 2)report = 1;
271 						continue;
272 					case 'e': case 'E':		/* has overridden number of tree nodes */
273 						while(*p && !digit(*p))p++;
274 						treesize = siconv(p);
275 # ifdef DEBUG
276 						if (debug) printf("treesize (%%e) now %d\n",treesize);
277 # endif
278 						if(report == 2)report = 1;
279 						continue;
280 					case 'o': case 'O':
281 						while (*p && !digit(*p))p++;
282 						outsize = siconv(p);
283 						if (report ==2) report=1;
284 						continue;
285 					case 'a': case 'A':		/* has overridden number of transitions */
286 						while(*p && !digit(*p))p++;
287 						if(report == 2)report = 1;
288 						ntrans = siconv(p);
289 # ifdef DEBUG
290 						if (debug)printf("N. trans (%%a) now %d\n",ntrans);
291 # endif
292 						continue;
293 					case 'k': case 'K': /* overriden packed char classes */
294 						while (*p && !digit(*p))p++;
295 						if (report==2) report=1;
296 						free(pchar);
297 						pchlen = siconv(p);
298 # ifdef DEBUG
299 						if (debug) printf( "Size classes (%%k) now %d\n",pchlen);
300 # endif
301 						pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
302 						continue;
303 					case 't': case 'T': 	/* character set specifier */
304 						ZCH = atoi(p+2);
305 						if (ZCH < NCH) ZCH = NCH;
306 						if (ZCH > 2*NCH) error("ch table needs redeclaration");
307 						chset = TRUE;
308 						for(i = 0; i<ZCH; i++)
309 							ctable[i] = 0;
310 						while(getl(p) && scomp(p,"%T") != 0 && scomp(p,"%t") != 0){
311 							if((n = siconv(p)) <= 0 || n > ZCH){
312 								warning("Character value %d out of range",n);
313 								continue;
314 								}
315 							while(!space(*p) && *p) p++;
316 							while(space(*p)) p++;
317 							t = p;
318 							while(*t){
319 								c = ctrans(&t);
320 								if(ctable[c]){
321 									if (printable(c))
322 										warning("Character '%c' used twice",c);
323 									else
324 										warning("Character %o used twice",c);
325 									}
326 								else ctable[c] = n;
327 								t++;
328 								}
329 							p = buf;
330 							}
331 						{
332 						char chused[2*NCH]; int kr;
333 						for(i=0; i<ZCH; i++)
334 							chused[i]=0;
335 						for(i=0; i<NCH; i++)
336 							chused[ctable[i]]=1;
337 						for(kr=i=1; i<NCH; i++)
338 							if (ctable[i]==0)
339 								{
340 								while (chused[kr] == 0)
341 									kr++;
342 								ctable[i]=kr;
343 								chused[kr]=1;
344 								}
345 						}
346 						lgate();
347 						continue;
348 					case 'r': case 'R':
349 						c = 'r';
350 					case 'c': case 'C':
351 						if(lgatflg)
352 							error("Too late for language specifier");
353 						ratfor = (c == 'r');
354 						continue;
355 					case '{':
356 						lgate();
357 						while(getl(p) && scomp(p,"%}") != 0)
358 							fprintf(fout, "%s\n",p);
359 						if(p[0] == '%') continue;
360 						error("Premature eof");
361 					case 's': case 'S':		/* start conditions */
362 						lgate();
363 						while(*p && index(*p," \t,") < 0) p++;
364 						n = TRUE;
365 						while(n){
366 							while(*p && index(*p," \t,") >= 0) p++;
367 							t = p;
368 							while(*p && index(*p," \t,") < 0)p++;
369 							if(!*p) n = FALSE;
370 							*p++ = 0;
371 							if (*t == 0) continue;
372 							i = sptr*2;
373 							if(!ratfor)fprintf(fout,"# ");
374 							fprintf(fout,"define %s %d\n",t,i);
375 							scopy(t,sp);
376 							sname[sptr++] = sp;
377 							sname[sptr] = 0;	/* required by lookup */
378 							if(sptr >= STARTSIZE)
379 								error("Too many start conditions");
380 							sp += slength(sp) + 1;
381 							if(sp >= schar+STARTCHAR)
382 								error("Start conditions too long");
383 							}
384 						continue;
385 					default:
386 						warning("Invalid request %s",p);
387 						continue;
388 						}	/* end of switch after seeing '%' */
389 				case ' ': case '\t':		/* must be code */
390 					lgate();
391 					fprintf(fout, "%s\n",p);
392 					continue;
393 				default:		/* definition */
394 					while(*p && !space(*p)) p++;
395 					if(*p == 0)
396 						continue;
397 					prev = *p;
398 					*p = 0;
399 					bptr = p+1;
400 					yylval = (int)buf;
401 					if(digit(buf[0]))
402 						warning("Substitution strings may not begin with digits");
403 					return(freturn(STR));
404 					}
405 				}
406 			/* still sect 1, but prev != '\n' */
407 			else {
408 				p = bptr;
409 				while(*p && space(*p)) p++;
410 				if(*p == 0)
411 					warning("No translation given - null string assumed");
412 				scopy(p,token);
413 				yylval = (int)token;
414 				prev = '\n';
415 				return(freturn(STR));
416 				}
417 			}
418 		/* end of section one processing */
419 		}
420 	else if(sect == RULESECTION){		/* rules and actions */
421 		while(!eof){
422 			switch(c=gch()){
423 			case '\0':
424 				return(freturn(0));
425 			case '\n':
426 				if(prev == '\n') continue;
427 				x = NEWE;
428 				break;
429 			case ' ':
430 			case '\t':
431 				if(sectbegin == TRUE){
432 					cpyact();
433 					while((c=gch()) && c != '\n');
434 					continue;
435 					}
436 				if(!funcflag)phead2();
437 				funcflag = TRUE;
438 				if(ratfor)fprintf(fout,"%d\n",30000+casecount);
439 				else fprintf(fout,"case %d:\n",casecount);
440 				if(cpyact()){
441 					if(ratfor)fprintf(fout,"goto 30997\n");
442 					else fprintf(fout,"break;\n");
443 					}
444 				while((c=gch()) && c != '\n');
445 				if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
446 					warning("Executable statements should occur right after %%");
447 					continue;
448 					}
449 				x = NEWE;
450 				break;
451 			case '%':
452 				if(prev != '\n') goto character;
453 				if(peek == '{'){	/* included code */
454 					getl(buf);
455 					while(!eof && getl(buf) && scomp("%}",buf) != 0)
456 						fprintf(fout,"%s\n",buf);
457 					continue;
458 					}
459 				if(peek == '%'){
460 					c = gch();
461 					c = gch();
462 					x = DELIM;
463 					break;
464 					}
465 				goto character;
466 			case '|':
467 				if(peek == ' ' || peek == '\t' || peek == '\n'){
468 					if(ratfor)fprintf(fout,"%d\n",30000+casecount++);
469 					else fprintf(fout,"case %d:\n",casecount++);
470 					continue;
471 					}
472 				x = '|';
473 				break;
474 			case '$':
475 				if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
476 					x = c;
477 					break;
478 					}
479 				goto character;
480 			case '^':
481 				if(prev != '\n' && scon != TRUE) goto character;	/* valid only at line begin */
482 				x = c;
483 				break;
484 			case '?':
485 			case '+':
486 			case '.':
487 			case '*':
488 			case '(':
489 			case ')':
490 			case ',':
491 			case '/':
492 				x = c;
493 				break;
494 			case '}':
495 				iter = FALSE;
496 				x = c;
497 				break;
498 			case '{':	/* either iteration or definition */
499 				if(digit(c=gch())){	/* iteration */
500 					iter = TRUE;
501 				ieval:
502 					i = 0;
503 					while(digit(c)){
504 						token[i++] = c;
505 						c = gch();
506 						}
507 					token[i] = 0;
508 					yylval = siconv(token);
509 					munput('c',c);
510 					x = ITER;
511 					break;
512 					}
513 				else {		/* definition */
514 					i = 0;
515 					while(c && c!='}'){
516 						token[i++] = c;
517 						c = gch();
518 						}
519 					token[i] = 0;
520 					i = lookup(token,def);
521 					if(i < 0)
522 						warning("Definition %s not found",token);
523 					else
524 						munput('s',subs[i]);
525 					continue;
526 					}
527 			case '<':		/* start condition ? */
528 				if(prev != '\n')		/* not at line begin, not start */
529 					goto character;
530 				t = slptr;
531 				do {
532 					i = 0;
533 					c = gch();
534 					while(c != ',' && c && c != '>'){
535 						token[i++] = c;
536 						c = gch();
537 						}
538 					token[i] = 0;
539 					if(i == 0)
540 						goto character;
541 					i = lookup(token,sname);
542 					if(i < 0) {
543 						warning("Undefined start condition %s",token);
544 						continue;
545 						}
546 					*slptr++ = i+1;
547 					} while(c && c != '>');
548 				*slptr++ = 0;
549 				/* check if previous value re-usable */
550 				for (xp=slist; xp<t; )
551 					{
552 					if (strcmp(xp, t)==0)
553 						break;
554 					while (*xp++);
555 					}
556 				if (xp<t)
557 					{
558 					/* re-use previous pointer to string */
559 					slptr=t;
560 					t=xp;
561 					}
562 				if(slptr > slist+STARTSIZE)		/* note not packed ! */
563 					error("Too many start conditions used");
564 				yylval = (int)t;
565 				x = SCON;
566 				break;
567 			case '"':
568 				i = 0;
569 				while((c=gch()) && c != '"' && c != '\n'){
570 					if(c == '\\') c = usescape(c=gch());
571 					token[i++] = c;
572 					if(i > TOKENSIZE){
573 						warning("String too long");
574 						i = TOKENSIZE-1;
575 						break;
576 						}
577 					}
578 				if(c == '\n') {
579 					yyline--;
580 					warning("Non-terminated string");
581 					yyline++;
582 					}
583 				token[i] = 0;
584 				if(i == 0)x = NULLS;
585 				else if(i == 1){
586 					yylval = token[0];
587 					x = CHAR;
588 					}
589 				else {
590 					yylval = (int)token;
591 					x = STR;
592 					}
593 				break;
594 			case '[':
595 				for(i=1;i<NCH;i++) symbol[i] = 0;
596 				x = CCL;
597 				if((c = gch()) == '^'){
598 					x = NCCL;
599 					c = gch();
600 					}
601 				while(c != ']' && c){
602 					if(c == '\\') c = usescape(c=gch());
603 					symbol[c] = 1;
604 					j = c;
605 					if((c=gch()) == '-' && peek != ']'){		/* range specified */
606 						c = gch();
607 						if(c == '\\') c = usescape(c=gch());
608 						k = c;
609 						if(j > k) {
610 							n = j;
611 							j = k;
612 							k = n;
613 							}
614 						if(!(('A' <= j && k <= 'Z') ||
615 						     ('a' <= j && k <= 'z') ||
616 						     ('0' <= j && k <= '9')))
617 							warning("Non-portable Character Class");
618 						for(n=j+1;n<=k;n++)
619 							symbol[n] = 1;		/* implementation dependent */
620 						c = gch();
621 						}
622 					}
623 				/* try to pack ccl's */
624 				i = 0;
625 				for(j=0;j<NCH;j++)
626 					if(symbol[j])token[i++] = j;
627 				token[i] = 0;
628 				p = ccptr;
629 				if(optim){
630 					p = ccl;
631 					while(p <ccptr && scomp(token,p) != 0)p++;
632 					}
633 				if(p < ccptr)	/* found it */
634 					yylval = (int)p;
635 				else {
636 					yylval = (int)ccptr;
637 					scopy(token,ccptr);
638 					ccptr += slength(token) + 1;
639 					if(ccptr >= ccl+CCLSIZE)
640 						error("Too many large character classes");
641 					}
642 				cclinter(x==CCL);
643 				break;
644 			case '\\':
645 				c = usescape(c=gch());
646 			default:
647 			character:
648 				if(iter){	/* second part of an iteration */
649 					iter = FALSE;
650 					if('0' <= c && c <= '9')
651 						goto ieval;
652 					}
653 				if(alpha(peek)){
654 					i = 0;
655 					yylval = (int)token;
656 					token[i++] = c;
657 					while(alpha(peek))
658 						token[i++] = gch();
659 					if(peek == '?' || peek == '*' || peek == '+')
660 						munput('c',token[--i]);
661 					token[i] = 0;
662 					if(i == 1){
663 						yylval = token[0];
664 						x = CHAR;
665 						}
666 					else x = STR;
667 					}
668 				else {
669 					yylval = c;
670 					x = CHAR;
671 					}
672 				}
673 			scon = FALSE;
674 			if(x == SCON)scon = TRUE;
675 			sectbegin = FALSE;
676 			return(freturn(x));
677 			}
678 		}
679 	/* section three */
680 	ptail();
681 # ifdef DEBUG
682 	if(debug)
683 		fprintf(fout,"\n/*this comes from section three - debug */\n");
684 # endif
685 	while(getl(buf) && !eof)
686 		fprintf(fout,"%s\n",buf);
687 	return(freturn(0));
688 	}
689 /* end of yylex */
690 # ifdef DEBUG
691 freturn(i)
692   int i; {
693 	if(yydebug) {
694 		printf("now return ");
695 		if(i < NCH) allprint(i);
696 		else printf("%d",i);
697 		printf("   yylval = ");
698 		switch(i){
699 			case STR: case CCL: case NCCL:
700 				strpt(yylval);
701 				break;
702 			case CHAR:
703 				allprint(yylval);
704 				break;
705 			default:
706 				printf("%d",yylval);
707 				break;
708 			}
709 		putchar('\n');
710 		}
711 	return(i);
712 	}
713 # endif
714