1 /*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Robert Paul Corbett.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #ifndef lint
12 static char sccsid[] = "@(#)reader.c 5.7 (Berkeley) 01/20/91";
13 #endif /* not lint */
14
15 #include "defs.h"
16
17 /* The line size must be a positive integer. One hundred was chosen */
18 /* because few lines in Yacc input grammars exceed 100 characters. */
19 /* Note that if a line exceeds LINESIZE characters, the line buffer */
20 /* will be expanded to accomodate it. */
21
22 #define LINESIZE 100
23
24 char *cache;
25 int cinc, cache_size;
26
27 int ntags, tagmax;
28 char **tag_table;
29
30 char saw_eof, unionized;
31 char *cptr, *line;
32 int linesize;
33
34 bucket *goal;
35 int prec;
36 int gensym;
37 char last_was_action;
38
39 int maxitems;
40 bucket **pitem;
41
42 int maxrules;
43 bucket **plhs;
44
45 int name_pool_size;
46 char *name_pool;
47
48 char line_format[] = "#line %d \"%s\"\n";
49
50
cachec(c)51 cachec(c)
52 int c;
53 {
54 assert(cinc >= 0);
55 if (cinc >= cache_size)
56 {
57 cache_size += 256;
58 cache = REALLOC(cache, cache_size);
59 if (cache == 0) no_space();
60 }
61 cache[cinc] = c;
62 ++cinc;
63 }
64
65
get_line()66 get_line()
67 {
68 register FILE *f = input_file;
69 register int c;
70 register int i;
71
72 if (saw_eof || (c = getc(f)) == EOF)
73 {
74 if (line) { FREE(line); line = 0; }
75 cptr = 0;
76 saw_eof = 1;
77 return;
78 }
79
80 if (line == 0 || linesize != (LINESIZE + 1))
81 {
82 if (line) FREE(line);
83 linesize = LINESIZE + 1;
84 line = MALLOC(linesize);
85 if (line == 0) no_space();
86 }
87
88 i = 0;
89 ++lineno;
90 for (;;)
91 {
92 line[i] = c;
93 if (c == '\n') { cptr = line; return; }
94 if (++i >= linesize)
95 {
96 linesize += LINESIZE;
97 line = REALLOC(line, linesize);
98 if (line == 0) no_space();
99 }
100 c = getc(f);
101 if (c == EOF)
102 {
103 line[i] = '\n';
104 saw_eof = 1;
105 cptr = line;
106 return;
107 }
108 }
109 }
110
111
112 char *
dup_line()113 dup_line()
114 {
115 register char *p, *s, *t;
116
117 if (line == 0) return (0);
118 s = line;
119 while (*s != '\n') ++s;
120 p = MALLOC(s - line + 1);
121 if (p == 0) no_space();
122
123 s = line;
124 t = p;
125 while ((*t++ = *s++) != '\n') continue;
126 return (p);
127 }
128
129
skip_comment()130 skip_comment()
131 {
132 register char *s;
133
134 int st_lineno = lineno;
135 char *st_line = dup_line();
136 char *st_cptr = st_line + (cptr - line);
137
138 s = cptr + 2;
139 for (;;)
140 {
141 if (*s == '*' && s[1] == '/')
142 {
143 cptr = s + 2;
144 FREE(st_line);
145 return;
146 }
147 if (*s == '\n')
148 {
149 get_line();
150 if (line == 0)
151 unterminated_comment(st_lineno, st_line, st_cptr);
152 s = cptr;
153 }
154 else
155 ++s;
156 }
157 }
158
159
160 int
nextc()161 nextc()
162 {
163 register char *s;
164
165 if (line == 0)
166 {
167 get_line();
168 if (line == 0)
169 return (EOF);
170 }
171
172 s = cptr;
173 for (;;)
174 {
175 switch (*s)
176 {
177 case '\n':
178 get_line();
179 if (line == 0) return (EOF);
180 s = cptr;
181 break;
182
183 case ' ':
184 case '\t':
185 case '\f':
186 case '\r':
187 case '\v':
188 case ',':
189 case ';':
190 ++s;
191 break;
192
193 case '\\':
194 cptr = s;
195 return ('%');
196
197 case '/':
198 if (s[1] == '*')
199 {
200 cptr = s;
201 skip_comment();
202 s = cptr;
203 break;
204 }
205 else if (s[1] == '/')
206 {
207 get_line();
208 if (line == 0) return (EOF);
209 s = cptr;
210 break;
211 }
212 /* fall through */
213
214 default:
215 cptr = s;
216 return (*s);
217 }
218 }
219 }
220
221
222 int
keyword()223 keyword()
224 {
225 register int c;
226 char *t_cptr = cptr;
227
228 c = *++cptr;
229 if (isalpha(c))
230 {
231 cinc = 0;
232 for (;;)
233 {
234 if (isalpha(c))
235 {
236 if (isupper(c)) c = tolower(c);
237 cachec(c);
238 }
239 else if (isdigit(c) || c == '_' || c == '.' || c == '$')
240 cachec(c);
241 else
242 break;
243 c = *++cptr;
244 }
245 cachec(NUL);
246
247 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
248 return (TOKEN);
249 if (strcmp(cache, "type") == 0)
250 return (TYPE);
251 if (strcmp(cache, "left") == 0)
252 return (LEFT);
253 if (strcmp(cache, "right") == 0)
254 return (RIGHT);
255 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
256 return (NONASSOC);
257 if (strcmp(cache, "start") == 0)
258 return (START);
259 if (strcmp(cache, "union") == 0)
260 return (UNION);
261 if (strcmp(cache, "ident") == 0)
262 return (IDENT);
263 }
264 else
265 {
266 ++cptr;
267 if (c == '{')
268 return (TEXT);
269 if (c == '%' || c == '\\')
270 return (MARK);
271 if (c == '<')
272 return (LEFT);
273 if (c == '>')
274 return (RIGHT);
275 if (c == '0')
276 return (TOKEN);
277 if (c == '2')
278 return (NONASSOC);
279 }
280 syntax_error(lineno, line, t_cptr);
281 /*NOTREACHED*/
282 }
283
284
copy_ident()285 copy_ident()
286 {
287 register int c;
288 register FILE *f = output_file;
289
290 c = nextc();
291 if (c == EOF) unexpected_EOF();
292 if (c != '"') syntax_error(lineno, line, cptr);
293 ++outline;
294 fprintf(f, "#ident \"");
295 for (;;)
296 {
297 c = *++cptr;
298 if (c == '\n')
299 {
300 fprintf(f, "\"\n");
301 return;
302 }
303 putc(c, f);
304 if (c == '"')
305 {
306 putc('\n', f);
307 ++cptr;
308 return;
309 }
310 }
311 }
312
313
copy_text()314 copy_text()
315 {
316 register int c;
317 int quote;
318 register FILE *f = text_file;
319 int need_newline = 0;
320 int t_lineno = lineno;
321 char *t_line = dup_line();
322 char *t_cptr = t_line + (cptr - line - 2);
323
324 if (*cptr == '\n')
325 {
326 get_line();
327 if (line == 0)
328 unterminated_text(t_lineno, t_line, t_cptr);
329 }
330 if (!lflag) fprintf(f, line_format, lineno, input_file_name);
331
332 loop:
333 c = *cptr++;
334 switch (c)
335 {
336 case '\n':
337 next_line:
338 putc('\n', f);
339 need_newline = 0;
340 get_line();
341 if (line) goto loop;
342 unterminated_text(t_lineno, t_line, t_cptr);
343
344 case '\'':
345 case '"':
346 {
347 int s_lineno = lineno;
348 char *s_line = dup_line();
349 char *s_cptr = s_line + (cptr - line - 1);
350
351 quote = c;
352 putc(c, f);
353 for (;;)
354 {
355 c = *cptr++;
356 putc(c, f);
357 if (c == quote)
358 {
359 need_newline = 1;
360 FREE(s_line);
361 goto loop;
362 }
363 if (c == '\n')
364 unterminated_string(s_lineno, s_line, s_cptr);
365 if (c == '\\')
366 {
367 c = *cptr++;
368 putc(c, f);
369 if (c == '\n')
370 {
371 get_line();
372 if (line == 0)
373 unterminated_string(s_lineno, s_line, s_cptr);
374 }
375 }
376 }
377 }
378
379 case '/':
380 putc(c, f);
381 need_newline = 1;
382 c = *cptr;
383 if (c == '/')
384 {
385 putc('*', f);
386 while ((c = *++cptr) != '\n')
387 {
388 if (c == '*' && cptr[1] == '/')
389 fprintf(f, "* ");
390 else
391 putc(c, f);
392 }
393 fprintf(f, "*/");
394 goto next_line;
395 }
396 if (c == '*')
397 {
398 int c_lineno = lineno;
399 char *c_line = dup_line();
400 char *c_cptr = c_line + (cptr - line - 1);
401
402 putc('*', f);
403 ++cptr;
404 for (;;)
405 {
406 c = *cptr++;
407 putc(c, f);
408 if (c == '*' && *cptr == '/')
409 {
410 putc('/', f);
411 ++cptr;
412 FREE(c_line);
413 goto loop;
414 }
415 if (c == '\n')
416 {
417 get_line();
418 if (line == 0)
419 unterminated_comment(c_lineno, c_line, c_cptr);
420 }
421 }
422 }
423 need_newline = 1;
424 goto loop;
425
426 case '%':
427 case '\\':
428 if (*cptr == '}')
429 {
430 if (need_newline) putc('\n', f);
431 ++cptr;
432 FREE(t_line);
433 return;
434 }
435 /* fall through */
436
437 default:
438 putc(c, f);
439 need_newline = 1;
440 goto loop;
441 }
442 }
443
444
copy_union()445 copy_union()
446 {
447 register int c;
448 int quote;
449 int depth;
450 int u_lineno = lineno;
451 char *u_line = dup_line();
452 char *u_cptr = u_line + (cptr - line - 6);
453
454 if (unionized) over_unionized(cptr - 6);
455 unionized = 1;
456
457 if (!lflag)
458 fprintf(text_file, line_format, lineno, input_file_name);
459
460 fprintf(text_file, "typedef union");
461 if (dflag) fprintf(union_file, "typedef union");
462
463 depth = 0;
464 loop:
465 c = *cptr++;
466 putc(c, text_file);
467 if (dflag) putc(c, union_file);
468 switch (c)
469 {
470 case '\n':
471 next_line:
472 get_line();
473 if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
474 goto loop;
475
476 case '{':
477 ++depth;
478 goto loop;
479
480 case '}':
481 if (--depth == 0)
482 {
483 fprintf(text_file, " YYSTYPE;\n");
484 FREE(u_line);
485 return;
486 }
487 goto loop;
488
489 case '\'':
490 case '"':
491 {
492 int s_lineno = lineno;
493 char *s_line = dup_line();
494 char *s_cptr = s_line + (cptr - line - 1);
495
496 quote = c;
497 for (;;)
498 {
499 c = *cptr++;
500 putc(c, text_file);
501 if (dflag) putc(c, union_file);
502 if (c == quote)
503 {
504 FREE(s_line);
505 goto loop;
506 }
507 if (c == '\n')
508 unterminated_string(s_lineno, s_line, s_cptr);
509 if (c == '\\')
510 {
511 c = *cptr++;
512 putc(c, text_file);
513 if (dflag) putc(c, union_file);
514 if (c == '\n')
515 {
516 get_line();
517 if (line == 0)
518 unterminated_string(s_lineno, s_line, s_cptr);
519 }
520 }
521 }
522 }
523
524 case '/':
525 c = *cptr;
526 if (c == '/')
527 {
528 putc('*', text_file);
529 if (dflag) putc('*', union_file);
530 while ((c = *++cptr) != '\n')
531 {
532 if (c == '*' && cptr[1] == '/')
533 {
534 fprintf(text_file, "* ");
535 if (dflag) fprintf(union_file, "* ");
536 }
537 else
538 {
539 putc(c, text_file);
540 if (dflag) putc(c, union_file);
541 }
542 }
543 fprintf(text_file, "*/\n");
544 if (dflag) fprintf(union_file, "*/\n");
545 goto next_line;
546 }
547 if (c == '*')
548 {
549 int c_lineno = lineno;
550 char *c_line = dup_line();
551 char *c_cptr = c_line + (cptr - line - 1);
552
553 putc('*', text_file);
554 if (dflag) putc('*', union_file);
555 ++cptr;
556 for (;;)
557 {
558 c = *cptr++;
559 putc(c, text_file);
560 if (dflag) putc(c, union_file);
561 if (c == '*' && *cptr == '/')
562 {
563 putc('/', text_file);
564 if (dflag) putc('/', union_file);
565 ++cptr;
566 FREE(c_line);
567 goto loop;
568 }
569 if (c == '\n')
570 {
571 get_line();
572 if (line == 0)
573 unterminated_comment(c_lineno, c_line, c_cptr);
574 }
575 }
576 }
577 goto loop;
578
579 default:
580 goto loop;
581 }
582 }
583
584
585 int
hexval(c)586 hexval(c)
587 int c;
588 {
589 if (c >= '0' && c <= '9')
590 return (c - '0');
591 if (c >= 'A' && c <= 'F')
592 return (c - 'A' + 10);
593 if (c >= 'a' && c <= 'f')
594 return (c - 'a' + 10);
595 return (-1);
596 }
597
598
599 bucket *
get_literal()600 get_literal()
601 {
602 register int c, quote;
603 register int i;
604 register int n;
605 register char *s;
606 register bucket *bp;
607 int s_lineno = lineno;
608 char *s_line = dup_line();
609 char *s_cptr = s_line + (cptr - line);
610
611 quote = *cptr++;
612 cinc = 0;
613 for (;;)
614 {
615 c = *cptr++;
616 if (c == quote) break;
617 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
618 if (c == '\\')
619 {
620 char *c_cptr = cptr - 1;
621
622 c = *cptr++;
623 switch (c)
624 {
625 case '\n':
626 get_line();
627 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
628 continue;
629
630 case '0': case '1': case '2': case '3':
631 case '4': case '5': case '6': case '7':
632 n = c - '0';
633 c = *cptr;
634 if (IS_OCTAL(c))
635 {
636 n = (n << 3) + (c - '0');
637 c = *++cptr;
638 if (IS_OCTAL(c))
639 {
640 n = (n << 3) + (c - '0');
641 ++cptr;
642 }
643 }
644 if (n > MAXCHAR) illegal_character(c_cptr);
645 c = n;
646 break;
647
648 case 'x':
649 c = *cptr++;
650 n = hexval(c);
651 if (n < 0 || n >= 16)
652 illegal_character(c_cptr);
653 for (;;)
654 {
655 c = *cptr;
656 i = hexval(c);
657 if (i < 0 || i >= 16) break;
658 ++cptr;
659 n = (n << 4) + i;
660 if (n > MAXCHAR) illegal_character(c_cptr);
661 }
662 c = n;
663 break;
664
665 case 'a': c = 7; break;
666 case 'b': c = '\b'; break;
667 case 'f': c = '\f'; break;
668 case 'n': c = '\n'; break;
669 case 'r': c = '\r'; break;
670 case 't': c = '\t'; break;
671 case 'v': c = '\v'; break;
672 }
673 }
674 cachec(c);
675 }
676 FREE(s_line);
677
678 n = cinc;
679 s = MALLOC(n);
680 if (s == 0) no_space();
681
682 for (i = 0; i < n; ++i)
683 s[i] = cache[i];
684
685 cinc = 0;
686 if (n == 1)
687 cachec('\'');
688 else
689 cachec('"');
690
691 for (i = 0; i < n; ++i)
692 {
693 c = ((unsigned char *)s)[i];
694 if (c == '\\' || c == cache[0])
695 {
696 cachec('\\');
697 cachec(c);
698 }
699 else if (isprint(c))
700 cachec(c);
701 else
702 {
703 cachec('\\');
704 switch (c)
705 {
706 case 7: cachec('a'); break;
707 case '\b': cachec('b'); break;
708 case '\f': cachec('f'); break;
709 case '\n': cachec('n'); break;
710 case '\r': cachec('r'); break;
711 case '\t': cachec('t'); break;
712 case '\v': cachec('v'); break;
713 default:
714 cachec(((c >> 6) & 7) + '0');
715 cachec(((c >> 3) & 7) + '0');
716 cachec((c & 7) + '0');
717 break;
718 }
719 }
720 }
721
722 if (n == 1)
723 cachec('\'');
724 else
725 cachec('"');
726
727 cachec(NUL);
728 bp = lookup(cache);
729 bp->class = TERM;
730 if (n == 1 && bp->value == UNDEFINED)
731 bp->value = *(unsigned char *)s;
732 FREE(s);
733
734 return (bp);
735 }
736
737
738 int
is_reserved(name)739 is_reserved(name)
740 char *name;
741 {
742 char *s;
743
744 if (strcmp(name, ".") == 0 ||
745 strcmp(name, "$accept") == 0 ||
746 strcmp(name, "$end") == 0)
747 return (1);
748
749 if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
750 {
751 s = name + 3;
752 while (isdigit(*s)) ++s;
753 if (*s == NUL) return (1);
754 }
755
756 return (0);
757 }
758
759
760 bucket *
get_name()761 get_name()
762 {
763 register int c;
764
765 cinc = 0;
766 for (c = *cptr; IS_IDENT(c); c = *++cptr)
767 cachec(c);
768 cachec(NUL);
769
770 if (is_reserved(cache)) used_reserved(cache);
771
772 return (lookup(cache));
773 }
774
775
776 int
get_number()777 get_number()
778 {
779 register int c;
780 register int n;
781
782 n = 0;
783 for (c = *cptr; isdigit(c); c = *++cptr)
784 n = 10*n + (c - '0');
785
786 return (n);
787 }
788
789
790 char *
get_tag()791 get_tag()
792 {
793 register int c;
794 register int i;
795 register char *s;
796 int t_lineno = lineno;
797 char *t_line = dup_line();
798 char *t_cptr = t_line + (cptr - line);
799
800 ++cptr;
801 c = nextc();
802 if (c == EOF) unexpected_EOF();
803 if (!isalpha(c) && c != '_' && c != '$')
804 illegal_tag(t_lineno, t_line, t_cptr);
805
806 cinc = 0;
807 do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
808 cachec(NUL);
809
810 c = nextc();
811 if (c == EOF) unexpected_EOF();
812 if (c != '>')
813 illegal_tag(t_lineno, t_line, t_cptr);
814 ++cptr;
815
816 for (i = 0; i < ntags; ++i)
817 {
818 if (strcmp(cache, tag_table[i]) == 0)
819 return (tag_table[i]);
820 }
821
822 if (ntags >= tagmax)
823 {
824 tagmax += 16;
825 tag_table = (char **)
826 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
827 : MALLOC(tagmax*sizeof(char *)));
828 if (tag_table == 0) no_space();
829 }
830
831 s = MALLOC(cinc);
832 if (s == 0) no_space();
833 strcpy(s, cache);
834 tag_table[ntags] = s;
835 ++ntags;
836 FREE(t_line);
837 return (s);
838 }
839
840
declare_tokens(assoc)841 declare_tokens(assoc)
842 int assoc;
843 {
844 register int c;
845 register bucket *bp;
846 int value;
847 char *tag = 0;
848
849 if (assoc != TOKEN) ++prec;
850
851 c = nextc();
852 if (c == EOF) unexpected_EOF();
853 if (c == '<')
854 {
855 tag = get_tag();
856 c = nextc();
857 if (c == EOF) unexpected_EOF();
858 }
859
860 for (;;)
861 {
862 if (isalpha(c) || c == '_' || c == '.' || c == '$')
863 bp = get_name();
864 else if (c == '\'' || c == '"')
865 bp = get_literal();
866 else
867 return;
868
869 if (bp == goal) tokenized_start(bp->name);
870 bp->class = TERM;
871
872 if (tag)
873 {
874 if (bp->tag && tag != bp->tag)
875 retyped_warning(bp->name);
876 bp->tag = tag;
877 }
878
879 if (assoc != TOKEN)
880 {
881 if (bp->prec && prec != bp->prec)
882 reprec_warning(bp->name);
883 bp->assoc = assoc;
884 bp->prec = prec;
885 }
886
887 c = nextc();
888 if (c == EOF) unexpected_EOF();
889 value = UNDEFINED;
890 if (isdigit(c))
891 {
892 value = get_number();
893 if (bp->value != UNDEFINED && value != bp->value)
894 revalued_warning(bp->name);
895 bp->value = value;
896 c = nextc();
897 if (c == EOF) unexpected_EOF();
898 }
899 }
900 }
901
902
declare_types()903 declare_types()
904 {
905 register int c;
906 register bucket *bp;
907 char *tag;
908
909 c = nextc();
910 if (c == EOF) unexpected_EOF();
911 if (c != '<') syntax_error(lineno, line, cptr);
912 tag = get_tag();
913
914 for (;;)
915 {
916 c = nextc();
917 if (isalpha(c) || c == '_' || c == '.' || c == '$')
918 bp = get_name();
919 else if (c == '\'' || c == '"')
920 bp = get_literal();
921 else
922 return;
923
924 if (bp->tag && tag != bp->tag)
925 retyped_warning(bp->name);
926 bp->tag = tag;
927 }
928 }
929
930
declare_start()931 declare_start()
932 {
933 register int c;
934 register bucket *bp;
935
936 c = nextc();
937 if (c == EOF) unexpected_EOF();
938 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
939 syntax_error(lineno, line, cptr);
940 bp = get_name();
941 if (bp->class == TERM)
942 terminal_start(bp->name);
943 if (goal && goal != bp)
944 restarted_warning();
945 goal = bp;
946 }
947
948
read_declarations()949 read_declarations()
950 {
951 register int c, k;
952
953 cache_size = 256;
954 cache = MALLOC(cache_size);
955 if (cache == 0) no_space();
956
957 for (;;)
958 {
959 c = nextc();
960 if (c == EOF) unexpected_EOF();
961 if (c != '%') syntax_error(lineno, line, cptr);
962 switch (k = keyword())
963 {
964 case MARK:
965 return;
966
967 case IDENT:
968 copy_ident();
969 break;
970
971 case TEXT:
972 copy_text();
973 break;
974
975 case UNION:
976 copy_union();
977 break;
978
979 case TOKEN:
980 case LEFT:
981 case RIGHT:
982 case NONASSOC:
983 declare_tokens(k);
984 break;
985
986 case TYPE:
987 declare_types();
988 break;
989
990 case START:
991 declare_start();
992 break;
993 }
994 }
995 }
996
997
initialize_grammar()998 initialize_grammar()
999 {
1000 nitems = 4;
1001 maxitems = 300;
1002 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
1003 if (pitem == 0) no_space();
1004 pitem[0] = 0;
1005 pitem[1] = 0;
1006 pitem[2] = 0;
1007 pitem[3] = 0;
1008
1009 nrules = 3;
1010 maxrules = 100;
1011 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1012 if (plhs == 0) no_space();
1013 plhs[0] = 0;
1014 plhs[1] = 0;
1015 plhs[2] = 0;
1016 rprec = (short *) MALLOC(maxrules*sizeof(short));
1017 if (rprec == 0) no_space();
1018 rprec[0] = 0;
1019 rprec[1] = 0;
1020 rprec[2] = 0;
1021 rassoc = (char *) MALLOC(maxrules*sizeof(char));
1022 if (rassoc == 0) no_space();
1023 rassoc[0] = TOKEN;
1024 rassoc[1] = TOKEN;
1025 rassoc[2] = TOKEN;
1026 }
1027
1028
expand_items()1029 expand_items()
1030 {
1031 maxitems += 300;
1032 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1033 if (pitem == 0) no_space();
1034 }
1035
1036
expand_rules()1037 expand_rules()
1038 {
1039 maxrules += 100;
1040 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1041 if (plhs == 0) no_space();
1042 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1043 if (rprec == 0) no_space();
1044 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1045 if (rassoc == 0) no_space();
1046 }
1047
1048
advance_to_start()1049 advance_to_start()
1050 {
1051 register int c;
1052 register bucket *bp;
1053 char *s_cptr;
1054 int s_lineno;
1055
1056 for (;;)
1057 {
1058 c = nextc();
1059 if (c != '%') break;
1060 s_cptr = cptr;
1061 switch (keyword())
1062 {
1063 case MARK:
1064 no_grammar();
1065
1066 case TEXT:
1067 copy_text();
1068 break;
1069
1070 case START:
1071 declare_start();
1072 break;
1073
1074 default:
1075 syntax_error(lineno, line, s_cptr);
1076 }
1077 }
1078
1079 c = nextc();
1080 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1081 syntax_error(lineno, line, cptr);
1082 bp = get_name();
1083 if (goal == 0)
1084 {
1085 if (bp->class == TERM)
1086 terminal_start(bp->name);
1087 goal = bp;
1088 }
1089
1090 s_lineno = lineno;
1091 c = nextc();
1092 if (c == EOF) unexpected_EOF();
1093 if (c != ':') syntax_error(lineno, line, cptr);
1094 start_rule(bp, s_lineno);
1095 ++cptr;
1096 }
1097
1098
start_rule(bp,s_lineno)1099 start_rule(bp, s_lineno)
1100 register bucket *bp;
1101 int s_lineno;
1102 {
1103 if (bp->class == TERM)
1104 terminal_lhs(s_lineno);
1105 bp->class = NONTERM;
1106 if (nrules >= maxrules)
1107 expand_rules();
1108 plhs[nrules] = bp;
1109 rprec[nrules] = UNDEFINED;
1110 rassoc[nrules] = TOKEN;
1111 }
1112
1113
end_rule()1114 end_rule()
1115 {
1116 register int i;
1117
1118 if (!last_was_action && plhs[nrules]->tag)
1119 {
1120 for (i = nitems - 1; pitem[i]; --i) continue;
1121 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1122 default_action_warning();
1123 }
1124
1125 last_was_action = 0;
1126 if (nitems >= maxitems) expand_items();
1127 pitem[nitems] = 0;
1128 ++nitems;
1129 ++nrules;
1130 }
1131
1132
insert_empty_rule()1133 insert_empty_rule()
1134 {
1135 register bucket *bp, **bpp;
1136
1137 assert(cache);
1138 sprintf(cache, "$$%d", ++gensym);
1139 bp = make_bucket(cache);
1140 last_symbol->next = bp;
1141 last_symbol = bp;
1142 bp->tag = plhs[nrules]->tag;
1143 bp->class = NONTERM;
1144
1145 if ((nitems += 2) > maxitems)
1146 expand_items();
1147 bpp = pitem + nitems - 1;
1148 *bpp-- = bp;
1149 while (bpp[0] = bpp[-1]) --bpp;
1150
1151 if (++nrules >= maxrules)
1152 expand_rules();
1153 plhs[nrules] = plhs[nrules-1];
1154 plhs[nrules-1] = bp;
1155 rprec[nrules] = rprec[nrules-1];
1156 rprec[nrules-1] = 0;
1157 rassoc[nrules] = rassoc[nrules-1];
1158 rassoc[nrules-1] = TOKEN;
1159 }
1160
1161
add_symbol()1162 add_symbol()
1163 {
1164 register int c;
1165 register bucket *bp;
1166 int s_lineno = lineno;
1167
1168 c = *cptr;
1169 if (c == '\'' || c == '"')
1170 bp = get_literal();
1171 else
1172 bp = get_name();
1173
1174 c = nextc();
1175 if (c == ':')
1176 {
1177 end_rule();
1178 start_rule(bp, s_lineno);
1179 ++cptr;
1180 return;
1181 }
1182
1183 if (last_was_action)
1184 insert_empty_rule();
1185 last_was_action = 0;
1186
1187 if (++nitems > maxitems)
1188 expand_items();
1189 pitem[nitems-1] = bp;
1190 }
1191
1192
copy_action()1193 copy_action()
1194 {
1195 register int c;
1196 register int i, n;
1197 int depth;
1198 int quote;
1199 char *tag;
1200 register FILE *f = action_file;
1201 int a_lineno = lineno;
1202 char *a_line = dup_line();
1203 char *a_cptr = a_line + (cptr - line);
1204
1205 if (last_was_action)
1206 insert_empty_rule();
1207 last_was_action = 1;
1208
1209 fprintf(f, "case %d:\n", nrules - 2);
1210 if (!lflag)
1211 fprintf(f, line_format, lineno, input_file_name);
1212 if (*cptr == '=') ++cptr;
1213
1214 n = 0;
1215 for (i = nitems - 1; pitem[i]; --i) ++n;
1216
1217 depth = 0;
1218 loop:
1219 c = *cptr;
1220 if (c == '$')
1221 {
1222 if (cptr[1] == '<')
1223 {
1224 int d_lineno = lineno;
1225 char *d_line = dup_line();
1226 char *d_cptr = d_line + (cptr - line);
1227
1228 ++cptr;
1229 tag = get_tag();
1230 c = *cptr;
1231 if (c == '$')
1232 {
1233 fprintf(f, "yyval.%s", tag);
1234 ++cptr;
1235 FREE(d_line);
1236 goto loop;
1237 }
1238 else if (isdigit(c))
1239 {
1240 i = get_number();
1241 if (i > n) dollar_warning(d_lineno, i);
1242 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1243 FREE(d_line);
1244 goto loop;
1245 }
1246 else if (c == '-' && isdigit(cptr[1]))
1247 {
1248 ++cptr;
1249 i = -get_number() - n;
1250 fprintf(f, "yyvsp[%d].%s", i, tag);
1251 FREE(d_line);
1252 goto loop;
1253 }
1254 else
1255 dollar_error(d_lineno, d_line, d_cptr);
1256 }
1257 else if (cptr[1] == '$')
1258 {
1259 if (ntags)
1260 {
1261 tag = plhs[nrules]->tag;
1262 if (tag == 0) untyped_lhs();
1263 fprintf(f, "yyval.%s", tag);
1264 }
1265 else
1266 fprintf(f, "yyval");
1267 cptr += 2;
1268 goto loop;
1269 }
1270 else if (isdigit(cptr[1]))
1271 {
1272 ++cptr;
1273 i = get_number();
1274 if (ntags)
1275 {
1276 if (i <= 0 || i > n)
1277 unknown_rhs(i);
1278 tag = pitem[nitems + i - n - 1]->tag;
1279 if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1280 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1281 }
1282 else
1283 {
1284 if (i > n)
1285 dollar_warning(lineno, i);
1286 fprintf(f, "yyvsp[%d]", i - n);
1287 }
1288 goto loop;
1289 }
1290 else if (cptr[1] == '-')
1291 {
1292 cptr += 2;
1293 i = get_number();
1294 if (ntags)
1295 unknown_rhs(-i);
1296 fprintf(f, "yyvsp[%d]", -i - n);
1297 goto loop;
1298 }
1299 }
1300 if (isalpha(c) || c == '_' || c == '$')
1301 {
1302 do
1303 {
1304 putc(c, f);
1305 c = *++cptr;
1306 } while (isalnum(c) || c == '_' || c == '$');
1307 goto loop;
1308 }
1309 putc(c, f);
1310 ++cptr;
1311 switch (c)
1312 {
1313 case '\n':
1314 next_line:
1315 get_line();
1316 if (line) goto loop;
1317 unterminated_action(a_lineno, a_line, a_cptr);
1318
1319 case ';':
1320 if (depth > 0) goto loop;
1321 fprintf(f, "\nbreak;\n");
1322 return;
1323
1324 case '{':
1325 ++depth;
1326 goto loop;
1327
1328 case '}':
1329 if (--depth > 0) goto loop;
1330 fprintf(f, "\nbreak;\n");
1331 return;
1332
1333 case '\'':
1334 case '"':
1335 {
1336 int s_lineno = lineno;
1337 char *s_line = dup_line();
1338 char *s_cptr = s_line + (cptr - line - 1);
1339
1340 quote = c;
1341 for (;;)
1342 {
1343 c = *cptr++;
1344 putc(c, f);
1345 if (c == quote)
1346 {
1347 FREE(s_line);
1348 goto loop;
1349 }
1350 if (c == '\n')
1351 unterminated_string(s_lineno, s_line, s_cptr);
1352 if (c == '\\')
1353 {
1354 c = *cptr++;
1355 putc(c, f);
1356 if (c == '\n')
1357 {
1358 get_line();
1359 if (line == 0)
1360 unterminated_string(s_lineno, s_line, s_cptr);
1361 }
1362 }
1363 }
1364 }
1365
1366 case '/':
1367 c = *cptr;
1368 if (c == '/')
1369 {
1370 putc('*', f);
1371 while ((c = *++cptr) != '\n')
1372 {
1373 if (c == '*' && cptr[1] == '/')
1374 fprintf(f, "* ");
1375 else
1376 putc(c, f);
1377 }
1378 fprintf(f, "*/\n");
1379 goto next_line;
1380 }
1381 if (c == '*')
1382 {
1383 int c_lineno = lineno;
1384 char *c_line = dup_line();
1385 char *c_cptr = c_line + (cptr - line - 1);
1386
1387 putc('*', f);
1388 ++cptr;
1389 for (;;)
1390 {
1391 c = *cptr++;
1392 putc(c, f);
1393 if (c == '*' && *cptr == '/')
1394 {
1395 putc('/', f);
1396 ++cptr;
1397 FREE(c_line);
1398 goto loop;
1399 }
1400 if (c == '\n')
1401 {
1402 get_line();
1403 if (line == 0)
1404 unterminated_comment(c_lineno, c_line, c_cptr);
1405 }
1406 }
1407 }
1408 goto loop;
1409
1410 default:
1411 goto loop;
1412 }
1413 }
1414
1415
1416 int
mark_symbol()1417 mark_symbol()
1418 {
1419 register int c;
1420 register bucket *bp;
1421
1422 c = cptr[1];
1423 if (c == '%' || c == '\\')
1424 {
1425 cptr += 2;
1426 return (1);
1427 }
1428
1429 if (c == '=')
1430 cptr += 2;
1431 else if ((c == 'p' || c == 'P') &&
1432 ((c = cptr[2]) == 'r' || c == 'R') &&
1433 ((c = cptr[3]) == 'e' || c == 'E') &&
1434 ((c = cptr[4]) == 'c' || c == 'C') &&
1435 ((c = cptr[5], !IS_IDENT(c))))
1436 cptr += 5;
1437 else
1438 syntax_error(lineno, line, cptr);
1439
1440 c = nextc();
1441 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1442 bp = get_name();
1443 else if (c == '\'' || c == '"')
1444 bp = get_literal();
1445 else
1446 {
1447 syntax_error(lineno, line, cptr);
1448 /*NOTREACHED*/
1449 }
1450
1451 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1452 prec_redeclared();
1453
1454 rprec[nrules] = bp->prec;
1455 rassoc[nrules] = bp->assoc;
1456 return (0);
1457 }
1458
1459
read_grammar()1460 read_grammar()
1461 {
1462 register int c;
1463
1464 initialize_grammar();
1465 advance_to_start();
1466
1467 for (;;)
1468 {
1469 c = nextc();
1470 if (c == EOF) break;
1471 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1472 c == '"')
1473 add_symbol();
1474 else if (c == '{' || c == '=')
1475 copy_action();
1476 else if (c == '|')
1477 {
1478 end_rule();
1479 start_rule(plhs[nrules-1], 0);
1480 ++cptr;
1481 }
1482 else if (c == '%')
1483 {
1484 if (mark_symbol()) break;
1485 }
1486 else
1487 syntax_error(lineno, line, cptr);
1488 }
1489 end_rule();
1490 }
1491
1492
free_tags()1493 free_tags()
1494 {
1495 register int i;
1496
1497 if (tag_table == 0) return;
1498
1499 for (i = 0; i < ntags; ++i)
1500 {
1501 assert(tag_table[i]);
1502 FREE(tag_table[i]);
1503 }
1504 FREE(tag_table);
1505 }
1506
1507
pack_names()1508 pack_names()
1509 {
1510 register bucket *bp;
1511 register char *p, *s, *t;
1512
1513 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1514 for (bp = first_symbol; bp; bp = bp->next)
1515 name_pool_size += strlen(bp->name) + 1;
1516 name_pool = MALLOC(name_pool_size);
1517 if (name_pool == 0) no_space();
1518
1519 strcpy(name_pool, "$accept");
1520 strcpy(name_pool+8, "$end");
1521 t = name_pool + 13;
1522 for (bp = first_symbol; bp; bp = bp->next)
1523 {
1524 p = t;
1525 s = bp->name;
1526 while (*t++ = *s++) continue;
1527 FREE(bp->name);
1528 bp->name = p;
1529 }
1530 }
1531
1532
check_symbols()1533 check_symbols()
1534 {
1535 register bucket *bp;
1536
1537 if (goal->class == UNKNOWN)
1538 undefined_goal(goal->name);
1539
1540 for (bp = first_symbol; bp; bp = bp->next)
1541 {
1542 if (bp->class == UNKNOWN)
1543 {
1544 undefined_symbol_warning(bp->name);
1545 bp->class = TERM;
1546 }
1547 }
1548 }
1549
1550
pack_symbols()1551 pack_symbols()
1552 {
1553 register bucket *bp;
1554 register bucket **v;
1555 register int i, j, k, n;
1556
1557 nsyms = 2;
1558 ntokens = 1;
1559 for (bp = first_symbol; bp; bp = bp->next)
1560 {
1561 ++nsyms;
1562 if (bp->class == TERM) ++ntokens;
1563 }
1564 start_symbol = ntokens;
1565 nvars = nsyms - ntokens;
1566
1567 symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1568 if (symbol_name == 0) no_space();
1569 symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1570 if (symbol_value == 0) no_space();
1571 symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1572 if (symbol_prec == 0) no_space();
1573 symbol_assoc = MALLOC(nsyms);
1574 if (symbol_assoc == 0) no_space();
1575
1576 v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1577 if (v == 0) no_space();
1578
1579 v[0] = 0;
1580 v[start_symbol] = 0;
1581
1582 i = 1;
1583 j = start_symbol + 1;
1584 for (bp = first_symbol; bp; bp = bp->next)
1585 {
1586 if (bp->class == TERM)
1587 v[i++] = bp;
1588 else
1589 v[j++] = bp;
1590 }
1591 assert(i == ntokens && j == nsyms);
1592
1593 for (i = 1; i < ntokens; ++i)
1594 v[i]->index = i;
1595
1596 goal->index = start_symbol + 1;
1597 k = start_symbol + 2;
1598 while (++i < nsyms)
1599 if (v[i] != goal)
1600 {
1601 v[i]->index = k;
1602 ++k;
1603 }
1604
1605 goal->value = 0;
1606 k = 1;
1607 for (i = start_symbol + 1; i < nsyms; ++i)
1608 {
1609 if (v[i] != goal)
1610 {
1611 v[i]->value = k;
1612 ++k;
1613 }
1614 }
1615
1616 k = 0;
1617 for (i = 1; i < ntokens; ++i)
1618 {
1619 n = v[i]->value;
1620 if (n > 256)
1621 {
1622 for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1623 symbol_value[j] = symbol_value[j-1];
1624 symbol_value[j] = n;
1625 }
1626 }
1627
1628 if (v[1]->value == UNDEFINED)
1629 v[1]->value = 256;
1630
1631 j = 0;
1632 n = 257;
1633 for (i = 2; i < ntokens; ++i)
1634 {
1635 if (v[i]->value == UNDEFINED)
1636 {
1637 while (j < k && n == symbol_value[j])
1638 {
1639 while (++j < k && n == symbol_value[j]) continue;
1640 ++n;
1641 }
1642 v[i]->value = n;
1643 ++n;
1644 }
1645 }
1646
1647 symbol_name[0] = name_pool + 8;
1648 symbol_value[0] = 0;
1649 symbol_prec[0] = 0;
1650 symbol_assoc[0] = TOKEN;
1651 for (i = 1; i < ntokens; ++i)
1652 {
1653 symbol_name[i] = v[i]->name;
1654 symbol_value[i] = v[i]->value;
1655 symbol_prec[i] = v[i]->prec;
1656 symbol_assoc[i] = v[i]->assoc;
1657 }
1658 symbol_name[start_symbol] = name_pool;
1659 symbol_value[start_symbol] = -1;
1660 symbol_prec[start_symbol] = 0;
1661 symbol_assoc[start_symbol] = TOKEN;
1662 for (++i; i < nsyms; ++i)
1663 {
1664 k = v[i]->index;
1665 symbol_name[k] = v[i]->name;
1666 symbol_value[k] = v[i]->value;
1667 symbol_prec[k] = v[i]->prec;
1668 symbol_assoc[k] = v[i]->assoc;
1669 }
1670
1671 FREE(v);
1672 }
1673
1674
pack_grammar()1675 pack_grammar()
1676 {
1677 register int i, j;
1678 int assoc, prec;
1679
1680 ritem = (short *) MALLOC(nitems*sizeof(short));
1681 if (ritem == 0) no_space();
1682 rlhs = (short *) MALLOC(nrules*sizeof(short));
1683 if (rlhs == 0) no_space();
1684 rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1685 if (rrhs == 0) no_space();
1686 rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1687 if (rprec == 0) no_space();
1688 rassoc = REALLOC(rassoc, nrules);
1689 if (rassoc == 0) no_space();
1690
1691 ritem[0] = -1;
1692 ritem[1] = goal->index;
1693 ritem[2] = 0;
1694 ritem[3] = -2;
1695 rlhs[0] = 0;
1696 rlhs[1] = 0;
1697 rlhs[2] = start_symbol;
1698 rrhs[0] = 0;
1699 rrhs[1] = 0;
1700 rrhs[2] = 1;
1701
1702 j = 4;
1703 for (i = 3; i < nrules; ++i)
1704 {
1705 rlhs[i] = plhs[i]->index;
1706 rrhs[i] = j;
1707 assoc = TOKEN;
1708 prec = 0;
1709 while (pitem[j])
1710 {
1711 ritem[j] = pitem[j]->index;
1712 if (pitem[j]->class == TERM)
1713 {
1714 prec = pitem[j]->prec;
1715 assoc = pitem[j]->assoc;
1716 }
1717 ++j;
1718 }
1719 ritem[j] = -i;
1720 ++j;
1721 if (rprec[i] == UNDEFINED)
1722 {
1723 rprec[i] = prec;
1724 rassoc[i] = assoc;
1725 }
1726 }
1727 rrhs[i] = j;
1728
1729 FREE(plhs);
1730 FREE(pitem);
1731 }
1732
1733
print_grammar()1734 print_grammar()
1735 {
1736 register int i, j, k;
1737 int spacing;
1738 register FILE *f = verbose_file;
1739
1740 if (!vflag) return;
1741
1742 k = 1;
1743 for (i = 2; i < nrules; ++i)
1744 {
1745 if (rlhs[i] != rlhs[i-1])
1746 {
1747 if (i != 2) fprintf(f, "\n");
1748 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1749 spacing = strlen(symbol_name[rlhs[i]]) + 1;
1750 }
1751 else
1752 {
1753 fprintf(f, "%4d ", i - 2);
1754 j = spacing;
1755 while (--j >= 0) putc(' ', f);
1756 putc('|', f);
1757 }
1758
1759 while (ritem[k] >= 0)
1760 {
1761 fprintf(f, " %s", symbol_name[ritem[k]]);
1762 ++k;
1763 }
1764 ++k;
1765 putc('\n', f);
1766 }
1767 }
1768
1769
reader()1770 reader()
1771 {
1772 write_section(banner);
1773 create_symbol_table();
1774 read_declarations();
1775 read_grammar();
1776 free_symbol_table();
1777 free_tags();
1778 pack_names();
1779 check_symbols();
1780 pack_symbols();
1781 pack_grammar();
1782 free_symbols();
1783 print_grammar();
1784 }
1785