xref: /original-bsd/old/lex/parser.y (revision 8251a00e)
1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
2 %left SCON '/' NEWE
3 %left '|'
4 %left '$' '^'
5 %left CHAR CCL NCCL '(' '.' STR NULLS
6 %left ITER
7 %left CAT
8 %left '*' '+' '?'
9 
10 %{
11 #ifndef lint
12 static char sccsid[] = "@(#)parser.y	4.1 (Berkeley) 08/11/83";
13 #endif
14 
15 # include "ldefs.c"
16 %}
17 %%
18 %{
19 int i;
20 int j,k;
21 int g;
22 char *p;
23 %}
24 acc	:	lexinput
25 	={
26 # ifdef DEBUG
27 		if(debug) sect2dump();
28 # endif
29 	}
30 	;
31 lexinput:	defns delim prods end
32 	|	defns delim end
33 	={
34 		if(!funcflag)phead2();
35 		funcflag = TRUE;
36 	}
37 	| error
38 	={
39 # ifdef DEBUG
40 		if(debug) {
41 			sect1dump();
42 			sect2dump();
43 			}
44 # endif
45 		}
46 	;
47 end:		delim | ;
48 defns:	defns STR STR
49 	={	scopy($2,dp);
50 		def[dptr] = dp;
51 		dp += slength($2) + 1;
52 		scopy($3,dp);
53 		subs[dptr++] = dp;
54 		if(dptr >= DEFSIZE)
55 			error("Too many definitions");
56 		dp += slength($3) + 1;
57 		if(dp >= dchar+DEFCHAR)
58 			error("Definitions too long");
59 		subs[dptr]=def[dptr]=0;	/* for lookup - require ending null */
60 	}
61 	|
62 	;
63 delim:	DELIM
64 	={
65 # ifdef DEBUG
66 		if(sect == DEFSECTION && debug) sect1dump();
67 # endif
68 		sect++;
69 		}
70 	;
71 prods:	prods pr
72 	={	$$ = mn2(RNEWE,$1,$2);
73 		}
74 	|	pr
75 	={	$$ = $1;}
76 	;
77 pr:	r NEWE
78 	={
79 		if(divflg == TRUE)
80 			i = mn1(S1FINAL,casecount);
81 		else i = mn1(FINAL,casecount);
82 		$$ = mn2(RCAT,$1,i);
83 		divflg = FALSE;
84 		casecount++;
85 		}
86 	| error NEWE
87 	={
88 # ifdef DEBUG
89 		if(debug) sect2dump();
90 # endif
91 		}
92 r:	CHAR
93 	={	$$ = mn0($1); }
94 	| STR
95 	={
96 		p = $1;
97 		i = mn0(*p++);
98 		while(*p)
99 			i = mn2(RSTR,i,*p++);
100 		$$ = i;
101 		}
102 	| '.'
103 	={	symbol['\n'] = 0;
104 		if(psave == FALSE){
105 			p = ccptr;
106 			psave = ccptr;
107 			for(i=1;i<'\n';i++){
108 				symbol[i] = 1;
109 				*ccptr++ = i;
110 				}
111 			for(i='\n'+1;i<NCH;i++){
112 				symbol[i] = 1;
113 				*ccptr++ = i;
114 				}
115 			*ccptr++ = 0;
116 			if(ccptr > ccl+CCLSIZE)
117 				error("Too many large character classes");
118 			}
119 		else
120 			p = psave;
121 		$$ = mn1(RCCL,p);
122 		cclinter(1);
123 		}
124 	| CCL
125 	={	$$ = mn1(RCCL,$1); }
126 	| NCCL
127 	={	$$ = mn1(RNCCL,$1); }
128 	| r '*'
129 	={	$$ = mn1(STAR,$1); }
130 	| r '+'
131 	={	$$ = mn1(PLUS,$1); }
132 	| r '?'
133 	={	$$ = mn1(QUEST,$1); }
134 	| r '|' r
135 	={	$$ = mn2(BAR,$1,$3); }
136 	| r r %prec CAT
137 	={	$$ = mn2(RCAT,$1,$2); }
138 	| r '/' r
139 	={	if(!divflg){
140 			j = mn1(S2FINAL,-casecount);
141 			i = mn2(RCAT,$1,j);
142 			$$ = mn2(DIV,i,$3);
143 			}
144 		else {
145 			$$ = mn2(RCAT,$1,$3);
146 			warning("Extra slash removed");
147 			}
148 		divflg = TRUE;
149 		}
150 	| r ITER ',' ITER '}'
151 	={	if($2 > $4){
152 			i = $2;
153 			$2 = $4;
154 			$4 = i;
155 			}
156 		if($4 <= 0)
157 			warning("Iteration range must be positive");
158 		else {
159 			j = $1;
160 			for(k = 2; k<=$2;k++)
161 				j = mn2(RCAT,j,dupl($1));
162 			for(i = $2+1; i<=$4; i++){
163 				g = dupl($1);
164 				for(k=2;k<=i;k++)
165 					g = mn2(RCAT,g,dupl($1));
166 				j = mn2(BAR,j,g);
167 				}
168 			$$ = j;
169 			}
170 	}
171 	| r ITER '}'
172 	={
173 		if($2 < 0)warning("Can't have negative iteration");
174 		else if($2 == 0) $$ = mn0(RNULLS);
175 		else {
176 			j = $1;
177 			for(k=2;k<=$2;k++)
178 				j = mn2(RCAT,j,dupl($1));
179 			$$ = j;
180 			}
181 		}
182 	| r ITER ',' '}'
183 	={
184 				/* from n to infinity */
185 		if($2 < 0)warning("Can't have negative iteration");
186 		else if($2 == 0) $$ = mn1(STAR,$1);
187 		else if($2 == 1)$$ = mn1(PLUS,$1);
188 		else {		/* >= 2 iterations minimum */
189 			j = $1;
190 			for(k=2;k<$2;k++)
191 				j = mn2(RCAT,j,dupl($1));
192 			k = mn1(PLUS,dupl($1));
193 			$$ = mn2(RCAT,j,k);
194 			}
195 		}
196 	| SCON r
197 	={	$$ = mn2(RSCON,$2,$1); }
198 	| '^' r
199 	={	$$ = mn1(CARAT,$2); }
200 	| r '$'
201 	={	i = mn0('\n');
202 		if(!divflg){
203 			j = mn1(S2FINAL,-casecount);
204 			k = mn2(RCAT,$1,j);
205 			$$ = mn2(DIV,k,i);
206 			}
207 		else $$ = mn2(RCAT,$1,i);
208 		divflg = TRUE;
209 		}
210 	| '(' r ')'
211 	={	$$ = $2; }
212 	|	NULLS
213 	={	$$ = mn0(RNULLS); }
214 	;
215 %%
216 yylex(){
217 	register char *p;
218 	register int c, i;
219 	char  *t, *xp;
220 	int n, j, k, x;
221 	static int sectbegin;
222 	static char token[TOKENSIZE];
223 	static int iter;
224 
225 # ifdef DEBUG
226 	yylval = 0;
227 # endif
228 
229 	if(sect == DEFSECTION) {		/* definitions section */
230 		while(!eof) {
231 			if(prev == '\n'){		/* next char is at beginning of line */
232 				getl(p=buf);
233 				switch(*p){
234 				case '%':
235 					switch(c= *(p+1)){
236 					case '%':
237 						lgate();
238 						if(!ratfor)fprintf(fout,"# ");
239 						fprintf(fout,"define YYNEWLINE %d\n",ctable['\n']);
240 						if(!ratfor)fprintf(fout,"yylex(){\nint nstr; extern int yyprevious;\n");
241 						sectbegin = TRUE;
242 						i = treesize*(sizeof(*name)+sizeof(*left)+
243 							sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
244 						c = myalloc(i,1);
245 						if(c == 0)
246 							error("Too little core for parse tree");
247 						p = c;
248 						cfree(p,i,1);
249 						name = myalloc(treesize,sizeof(*name));
250 						left = myalloc(treesize,sizeof(*left));
251 						right = myalloc(treesize,sizeof(*right));
252 						nullstr = myalloc(treesize,sizeof(*nullstr));
253 						parent = myalloc(treesize,sizeof(*parent));
254 						if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
255 							error("Too little core for parse tree");
256 						return(freturn(DELIM));
257 					case 'p': case 'P':	/* has overridden number of positions */
258 						while(*p && !digit(*p))p++;
259 						maxpos = siconv(p);
260 # ifdef DEBUG
261 						if (debug) printf("positions (%%p) now %d\n",maxpos);
262 # endif
263 						if(report == 2)report = 1;
264 						continue;
265 					case 'n': case 'N':	/* has overridden number of states */
266 						while(*p && !digit(*p))p++;
267 						nstates = siconv(p);
268 # ifdef DEBUG
269 						if(debug)printf( " no. states (%%n) now %d\n",nstates);
270 # endif
271 						if(report == 2)report = 1;
272 						continue;
273 					case 'e': case 'E':		/* has overridden number of tree nodes */
274 						while(*p && !digit(*p))p++;
275 						treesize = siconv(p);
276 # ifdef DEBUG
277 						if (debug) printf("treesize (%%e) now %d\n",treesize);
278 # endif
279 						if(report == 2)report = 1;
280 						continue;
281 					case 'o': case 'O':
282 						while (*p && !digit(*p))p++;
283 						outsize = siconv(p);
284 						if (report ==2) report=1;
285 						continue;
286 					case 'a': case 'A':		/* has overridden number of transitions */
287 						while(*p && !digit(*p))p++;
288 						if(report == 2)report = 1;
289 						ntrans = siconv(p);
290 # ifdef DEBUG
291 						if (debug)printf("N. trans (%%a) now %d\n",ntrans);
292 # endif
293 						continue;
294 					case 'k': case 'K': /* overriden packed char classes */
295 						while (*p && !digit(*p))p++;
296 						if (report==2) report=1;
297 						cfree(pchar, pchlen, sizeof(*pchar));
298 						pchlen = siconv(p);
299 # ifdef DEBUG
300 						if (debug) printf( "Size classes (%%k) now %d\n",pchlen);
301 # endif
302 						pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
303 						continue;
304 					case 't': case 'T': 	/* character set specifier */
305 						ZCH = atoi(p+2);
306 						if (ZCH < NCH) ZCH = NCH;
307 						if (ZCH > 2*NCH) error("ch table needs redeclaration");
308 						chset = TRUE;
309 						for(i = 0; i<ZCH; i++)
310 							ctable[i] = 0;
311 						while(getl(p) && scomp(p,"%T") != 0 && scomp(p,"%t") != 0){
312 							if((n = siconv(p)) <= 0 || n > ZCH){
313 								warning("Character value %d out of range",n);
314 								continue;
315 								}
316 							while(!space(*p) && *p) p++;
317 							while(space(*p)) p++;
318 							t = p;
319 							while(*t){
320 								c = ctrans(&t);
321 								if(ctable[c]){
322 									if (printable(c))
323 										warning("Character '%c' used twice",c);
324 									else
325 										warning("Character %o used twice",c);
326 									}
327 								else ctable[c] = n;
328 								t++;
329 								}
330 							p = buf;
331 							}
332 						{
333 						char chused[2*NCH]; int kr;
334 						for(i=0; i<ZCH; i++)
335 							chused[i]=0;
336 						for(i=0; i<NCH; i++)
337 							chused[ctable[i]]=1;
338 						for(kr=i=1; i<NCH; i++)
339 							if (ctable[i]==0)
340 								{
341 								while (chused[kr] == 0)
342 									kr++;
343 								ctable[i]=kr;
344 								chused[kr]=1;
345 								}
346 						}
347 						lgate();
348 						continue;
349 					case 'r': case 'R':
350 						c = 'r';
351 					case 'c': case 'C':
352 						if(lgatflg)
353 							error("Too late for language specifier");
354 						ratfor = (c == 'r');
355 						continue;
356 					case '{':
357 						lgate();
358 						while(getl(p) && scomp(p,"%}") != 0)
359 							fprintf(fout, "%s\n",p);
360 						if(p[0] == '%') continue;
361 						error("Premature eof");
362 					case 's': case 'S':		/* start conditions */
363 						lgate();
364 						while(*p && index(*p," \t,") < 0) p++;
365 						n = TRUE;
366 						while(n){
367 							while(*p && index(*p," \t,") >= 0) p++;
368 							t = p;
369 							while(*p && index(*p," \t,") < 0)p++;
370 							if(!*p) n = FALSE;
371 							*p++ = 0;
372 							if (*t == 0) continue;
373 							i = sptr*2;
374 							if(!ratfor)fprintf(fout,"# ");
375 							fprintf(fout,"define %s %d\n",t,i);
376 							scopy(t,sp);
377 							sname[sptr++] = sp;
378 							sname[sptr] = 0;	/* required by lookup */
379 							if(sptr >= STARTSIZE)
380 								error("Too many start conditions");
381 							sp += slength(sp) + 1;
382 							if(sp >= schar+STARTCHAR)
383 								error("Start conditions too long");
384 							}
385 						continue;
386 					default:
387 						warning("Invalid request %s",p);
388 						continue;
389 						}	/* end of switch after seeing '%' */
390 				case ' ': case '\t':		/* must be code */
391 					lgate();
392 					fprintf(fout, "%s\n",p);
393 					continue;
394 				default:		/* definition */
395 					while(*p && !space(*p)) p++;
396 					if(*p == 0)
397 						continue;
398 					prev = *p;
399 					*p = 0;
400 					bptr = p+1;
401 					yylval = buf;
402 					if(digit(buf[0]))
403 						warning("Substitution strings may not begin with digits");
404 					return(freturn(STR));
405 					}
406 				}
407 			/* still sect 1, but prev != '\n' */
408 			else {
409 				p = bptr;
410 				while(*p && space(*p)) p++;
411 				if(*p == 0)
412 					warning("No translation given - null string assumed");
413 				scopy(p,token);
414 				yylval = token;
415 				prev = '\n';
416 				return(freturn(STR));
417 				}
418 			}
419 		/* end of section one processing */
420 		}
421 	else if(sect == RULESECTION){		/* rules and actions */
422 		while(!eof){
423 			switch(c=gch()){
424 			case '\0':
425 				return(freturn(0));
426 			case '\n':
427 				if(prev == '\n') continue;
428 				x = NEWE;
429 				break;
430 			case ' ':
431 			case '\t':
432 				if(sectbegin == TRUE){
433 					cpyact();
434 					while((c=gch()) && c != '\n');
435 					continue;
436 					}
437 				if(!funcflag)phead2();
438 				funcflag = TRUE;
439 				if(ratfor)fprintf(fout,"%d\n",30000+casecount);
440 				else fprintf(fout,"case %d:\n",casecount);
441 				if(cpyact()){
442 					if(ratfor)fprintf(fout,"goto 30997\n");
443 					else fprintf(fout,"break;\n");
444 					}
445 				while((c=gch()) && c != '\n');
446 				if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
447 					warning("Executable statements should occur right after %%");
448 					continue;
449 					}
450 				x = NEWE;
451 				break;
452 			case '%':
453 				if(prev != '\n') goto character;
454 				if(peek == '{'){	/* included code */
455 					getl(buf);
456 					while(!eof && getl(buf) && scomp("%}",buf) != 0)
457 						fprintf(fout,"%s\n",buf);
458 					continue;
459 					}
460 				if(peek == '%'){
461 					c = gch();
462 					c = gch();
463 					x = DELIM;
464 					break;
465 					}
466 				goto character;
467 			case '|':
468 				if(peek == ' ' || peek == '\t' || peek == '\n'){
469 					if(ratfor)fprintf(fout,"%d\n",30000+casecount++);
470 					else fprintf(fout,"case %d:\n",casecount++);
471 					continue;
472 					}
473 				x = '|';
474 				break;
475 			case '$':
476 				if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
477 					x = c;
478 					break;
479 					}
480 				goto character;
481 			case '^':
482 				if(prev != '\n' && scon != TRUE) goto character;	/* valid only at line begin */
483 				x = c;
484 				break;
485 			case '?':
486 			case '+':
487 			case '.':
488 			case '*':
489 			case '(':
490 			case ')':
491 			case ',':
492 			case '/':
493 				x = c;
494 				break;
495 			case '}':
496 				iter = FALSE;
497 				x = c;
498 				break;
499 			case '{':	/* either iteration or definition */
500 				if(digit(c=gch())){	/* iteration */
501 					iter = TRUE;
502 				ieval:
503 					i = 0;
504 					while(digit(c)){
505 						token[i++] = c;
506 						c = gch();
507 						}
508 					token[i] = 0;
509 					yylval = siconv(token);
510 					munput('c',c);
511 					x = ITER;
512 					break;
513 					}
514 				else {		/* definition */
515 					i = 0;
516 					while(c && c!='}'){
517 						token[i++] = c;
518 						c = gch();
519 						}
520 					token[i] = 0;
521 					i = lookup(token,def);
522 					if(i < 0)
523 						warning("Definition %s not found",token);
524 					else
525 						munput('s',subs[i]);
526 					continue;
527 					}
528 			case '<':		/* start condition ? */
529 				if(prev != '\n')		/* not at line begin, not start */
530 					goto character;
531 				t = slptr;
532 				do {
533 					i = 0;
534 					c = gch();
535 					while(c != ',' && c && c != '>'){
536 						token[i++] = c;
537 						c = gch();
538 						}
539 					token[i] = 0;
540 					if(i == 0)
541 						goto character;
542 					i = lookup(token,sname);
543 					if(i < 0) {
544 						warning("Undefined start condition %s",token);
545 						continue;
546 						}
547 					*slptr++ = i+1;
548 					} while(c && c != '>');
549 				*slptr++ = 0;
550 				/* check if previous value re-usable */
551 				for (xp=slist; xp<t; )
552 					{
553 					if (strcmp(xp, t)==0)
554 						break;
555 					while (*xp++);
556 					}
557 				if (xp<t)
558 					{
559 					/* re-use previous pointer to string */
560 					slptr=t;
561 					t=xp;
562 					}
563 				if(slptr > slist+STARTSIZE)		/* note not packed ! */
564 					error("Too many start conditions used");
565 				yylval = t;
566 				x = SCON;
567 				break;
568 			case '"':
569 				i = 0;
570 				while((c=gch()) && c != '"' && c != '\n'){
571 					if(c == '\\') c = usescape(c=gch());
572 					token[i++] = c;
573 					if(i > TOKENSIZE){
574 						warning("String too long");
575 						i = TOKENSIZE-1;
576 						break;
577 						}
578 					}
579 				if(c == '\n') {
580 					yyline--;
581 					warning("Non-terminated string");
582 					yyline++;
583 					}
584 				token[i] = 0;
585 				if(i == 0)x = NULLS;
586 				else if(i == 1){
587 					yylval = token[0];
588 					x = CHAR;
589 					}
590 				else {
591 					yylval = token;
592 					x = STR;
593 					}
594 				break;
595 			case '[':
596 				for(i=1;i<NCH;i++) symbol[i] = 0;
597 				x = CCL;
598 				if((c = gch()) == '^'){
599 					x = NCCL;
600 					c = gch();
601 					}
602 				while(c != ']' && c){
603 					if(c == '\\') c = usescape(c=gch());
604 					symbol[c] = 1;
605 					j = c;
606 					if((c=gch()) == '-' && peek != ']'){		/* range specified */
607 						c = gch();
608 						if(c == '\\') c = usescape(c=gch());
609 						k = c;
610 						if(j > k) {
611 							n = j;
612 							j = k;
613 							k = n;
614 							}
615 						if(!(('A' <= j && k <= 'Z') ||
616 						     ('a' <= j && k <= 'z') ||
617 						     ('0' <= j && k <= '9')))
618 							warning("Non-portable Character Class");
619 						for(n=j+1;n<=k;n++)
620 							symbol[n] = 1;		/* implementation dependent */
621 						c = gch();
622 						}
623 					}
624 				/* try to pack ccl's */
625 				i = 0;
626 				for(j=0;j<NCH;j++)
627 					if(symbol[j])token[i++] = j;
628 				token[i] = 0;
629 				p = ccptr;
630 				if(optim){
631 					p = ccl;
632 					while(p <ccptr && scomp(token,p) != 0)p++;
633 					}
634 				if(p < ccptr)	/* found it */
635 					yylval = p;
636 				else {
637 					yylval = ccptr;
638 					scopy(token,ccptr);
639 					ccptr += slength(token) + 1;
640 					if(ccptr >= ccl+CCLSIZE)
641 						error("Too many large character classes");
642 					}
643 				cclinter(x==CCL);
644 				break;
645 			case '\\':
646 				c = usescape(c=gch());
647 			default:
648 			character:
649 				if(iter){	/* second part of an iteration */
650 					iter = FALSE;
651 					if('0' <= c && c <= '9')
652 						goto ieval;
653 					}
654 				if(alpha(peek)){
655 					i = 0;
656 					yylval = token;
657 					token[i++] = c;
658 					while(alpha(peek))
659 						token[i++] = gch();
660 					if(peek == '?' || peek == '*' || peek == '+')
661 						munput('c',token[--i]);
662 					token[i] = 0;
663 					if(i == 1){
664 						yylval = token[0];
665 						x = CHAR;
666 						}
667 					else x = STR;
668 					}
669 				else {
670 					yylval = c;
671 					x = CHAR;
672 					}
673 				}
674 			scon = FALSE;
675 			if(x == SCON)scon = TRUE;
676 			sectbegin = FALSE;
677 			return(freturn(x));
678 			}
679 		}
680 	/* section three */
681 	ptail();
682 # ifdef DEBUG
683 	if(debug)
684 		fprintf(fout,"\n/*this comes from section three - debug */\n");
685 # endif
686 	while(getl(buf) && !eof)
687 		fprintf(fout,"%s\n",buf);
688 	return(freturn(0));
689 	}
690 /* end of yylex */
691 # ifdef DEBUG
692 freturn(i)
693   int i; {
694 	if(yydebug) {
695 		printf("now return ");
696 		if(i < NCH) allprint(i);
697 		else printf("%d",i);
698 		printf("   yylval = ");
699 		switch(i){
700 			case STR: case CCL: case NCCL:
701 				strpt(yylval);
702 				break;
703 			case CHAR:
704 				allprint(yylval);
705 				break;
706 			default:
707 				printf("%d",yylval);
708 				break;
709 			}
710 		putchar('\n');
711 		}
712 	return(i);
713 	}
714 # endif
715