1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Robert Paul Corbett. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char sccsid[] = "@(#)reader.c 5.7 (Berkeley) 01/20/91"; 13 #endif /* not lint */ 14 15 #include "defs.h" 16 17 /* The line size must be a positive integer. One hundred was chosen */ 18 /* because few lines in Yacc input grammars exceed 100 characters. */ 19 /* Note that if a line exceeds LINESIZE characters, the line buffer */ 20 /* will be expanded to accomodate it. */ 21 22 #define LINESIZE 100 23 24 char *cache; 25 int cinc, cache_size; 26 27 int ntags, tagmax; 28 char **tag_table; 29 30 char saw_eof, unionized; 31 char *cptr, *line; 32 int linesize; 33 34 bucket *goal; 35 int prec; 36 int gensym; 37 char last_was_action; 38 39 int maxitems; 40 bucket **pitem; 41 42 int maxrules; 43 bucket **plhs; 44 45 int name_pool_size; 46 char *name_pool; 47 48 char line_format[] = "#line %d \"%s\"\n"; 49 50 51 cachec(c) 52 int c; 53 { 54 assert(cinc >= 0); 55 if (cinc >= cache_size) 56 { 57 cache_size += 256; 58 cache = REALLOC(cache, cache_size); 59 if (cache == 0) no_space(); 60 } 61 cache[cinc] = c; 62 ++cinc; 63 } 64 65 66 get_line() 67 { 68 register FILE *f = input_file; 69 register int c; 70 register int i; 71 72 if (saw_eof || (c = getc(f)) == EOF) 73 { 74 if (line) { FREE(line); line = 0; } 75 cptr = 0; 76 saw_eof = 1; 77 return; 78 } 79 80 if (line == 0 || linesize != (LINESIZE + 1)) 81 { 82 if (line) FREE(line); 83 linesize = LINESIZE + 1; 84 line = MALLOC(linesize); 85 if (line == 0) no_space(); 86 } 87 88 i = 0; 89 ++lineno; 90 for (;;) 91 { 92 line[i] = c; 93 if (c == '\n') { cptr = line; return; } 94 if (++i >= linesize) 95 { 96 linesize += LINESIZE; 97 line = REALLOC(line, linesize); 98 if (line == 0) no_space(); 99 } 100 c = getc(f); 101 if (c == EOF) 102 { 103 line[i] = '\n'; 104 saw_eof = 1; 105 cptr = line; 106 return; 107 } 108 } 109 } 110 111 112 char * 113 dup_line() 114 { 115 register char *p, *s, *t; 116 117 if (line == 0) return (0); 118 s = line; 119 while (*s != '\n') ++s; 120 p = MALLOC(s - line + 1); 121 if (p == 0) no_space(); 122 123 s = line; 124 t = p; 125 while ((*t++ = *s++) != '\n') continue; 126 return (p); 127 } 128 129 130 skip_comment() 131 { 132 register char *s; 133 134 int st_lineno = lineno; 135 char *st_line = dup_line(); 136 char *st_cptr = st_line + (cptr - line); 137 138 s = cptr + 2; 139 for (;;) 140 { 141 if (*s == '*' && s[1] == '/') 142 { 143 cptr = s + 2; 144 FREE(st_line); 145 return; 146 } 147 if (*s == '\n') 148 { 149 get_line(); 150 if (line == 0) 151 unterminated_comment(st_lineno, st_line, st_cptr); 152 s = cptr; 153 } 154 else 155 ++s; 156 } 157 } 158 159 160 int 161 nextc() 162 { 163 register char *s; 164 165 if (line == 0) 166 { 167 get_line(); 168 if (line == 0) 169 return (EOF); 170 } 171 172 s = cptr; 173 for (;;) 174 { 175 switch (*s) 176 { 177 case '\n': 178 get_line(); 179 if (line == 0) return (EOF); 180 s = cptr; 181 break; 182 183 case ' ': 184 case '\t': 185 case '\f': 186 case '\r': 187 case '\v': 188 case ',': 189 case ';': 190 ++s; 191 break; 192 193 case '\\': 194 cptr = s; 195 return ('%'); 196 197 case '/': 198 if (s[1] == '*') 199 { 200 cptr = s; 201 skip_comment(); 202 s = cptr; 203 break; 204 } 205 else if (s[1] == '/') 206 { 207 get_line(); 208 if (line == 0) return (EOF); 209 s = cptr; 210 break; 211 } 212 /* fall through */ 213 214 default: 215 cptr = s; 216 return (*s); 217 } 218 } 219 } 220 221 222 int 223 keyword() 224 { 225 register int c; 226 char *t_cptr = cptr; 227 228 c = *++cptr; 229 if (isalpha(c)) 230 { 231 cinc = 0; 232 for (;;) 233 { 234 if (isalpha(c)) 235 { 236 if (isupper(c)) c = tolower(c); 237 cachec(c); 238 } 239 else if (isdigit(c) || c == '_' || c == '.' || c == '$') 240 cachec(c); 241 else 242 break; 243 c = *++cptr; 244 } 245 cachec(NUL); 246 247 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0) 248 return (TOKEN); 249 if (strcmp(cache, "type") == 0) 250 return (TYPE); 251 if (strcmp(cache, "left") == 0) 252 return (LEFT); 253 if (strcmp(cache, "right") == 0) 254 return (RIGHT); 255 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0) 256 return (NONASSOC); 257 if (strcmp(cache, "start") == 0) 258 return (START); 259 if (strcmp(cache, "union") == 0) 260 return (UNION); 261 if (strcmp(cache, "ident") == 0) 262 return (IDENT); 263 } 264 else 265 { 266 ++cptr; 267 if (c == '{') 268 return (TEXT); 269 if (c == '%' || c == '\\') 270 return (MARK); 271 if (c == '<') 272 return (LEFT); 273 if (c == '>') 274 return (RIGHT); 275 if (c == '0') 276 return (TOKEN); 277 if (c == '2') 278 return (NONASSOC); 279 } 280 syntax_error(lineno, line, t_cptr); 281 /*NOTREACHED*/ 282 } 283 284 285 copy_ident() 286 { 287 register int c; 288 register FILE *f = output_file; 289 290 c = nextc(); 291 if (c == EOF) unexpected_EOF(); 292 if (c != '"') syntax_error(lineno, line, cptr); 293 ++outline; 294 fprintf(f, "#ident \""); 295 for (;;) 296 { 297 c = *++cptr; 298 if (c == '\n') 299 { 300 fprintf(f, "\"\n"); 301 return; 302 } 303 putc(c, f); 304 if (c == '"') 305 { 306 putc('\n', f); 307 ++cptr; 308 return; 309 } 310 } 311 } 312 313 314 copy_text() 315 { 316 register int c; 317 int quote; 318 register FILE *f = text_file; 319 int need_newline = 0; 320 int t_lineno = lineno; 321 char *t_line = dup_line(); 322 char *t_cptr = t_line + (cptr - line - 2); 323 324 if (*cptr == '\n') 325 { 326 get_line(); 327 if (line == 0) 328 unterminated_text(t_lineno, t_line, t_cptr); 329 } 330 if (!lflag) fprintf(f, line_format, lineno, input_file_name); 331 332 loop: 333 c = *cptr++; 334 switch (c) 335 { 336 case '\n': 337 next_line: 338 putc('\n', f); 339 need_newline = 0; 340 get_line(); 341 if (line) goto loop; 342 unterminated_text(t_lineno, t_line, t_cptr); 343 344 case '\'': 345 case '"': 346 { 347 int s_lineno = lineno; 348 char *s_line = dup_line(); 349 char *s_cptr = s_line + (cptr - line - 1); 350 351 quote = c; 352 putc(c, f); 353 for (;;) 354 { 355 c = *cptr++; 356 putc(c, f); 357 if (c == quote) 358 { 359 need_newline = 1; 360 FREE(s_line); 361 goto loop; 362 } 363 if (c == '\n') 364 unterminated_string(s_lineno, s_line, s_cptr); 365 if (c == '\\') 366 { 367 c = *cptr++; 368 putc(c, f); 369 if (c == '\n') 370 { 371 get_line(); 372 if (line == 0) 373 unterminated_string(s_lineno, s_line, s_cptr); 374 } 375 } 376 } 377 } 378 379 case '/': 380 putc(c, f); 381 need_newline = 1; 382 c = *cptr; 383 if (c == '/') 384 { 385 putc('*', f); 386 while ((c = *++cptr) != '\n') 387 { 388 if (c == '*' && cptr[1] == '/') 389 fprintf(f, "* "); 390 else 391 putc(c, f); 392 } 393 fprintf(f, "*/"); 394 goto next_line; 395 } 396 if (c == '*') 397 { 398 int c_lineno = lineno; 399 char *c_line = dup_line(); 400 char *c_cptr = c_line + (cptr - line - 1); 401 402 putc('*', f); 403 ++cptr; 404 for (;;) 405 { 406 c = *cptr++; 407 putc(c, f); 408 if (c == '*' && *cptr == '/') 409 { 410 putc('/', f); 411 ++cptr; 412 FREE(c_line); 413 goto loop; 414 } 415 if (c == '\n') 416 { 417 get_line(); 418 if (line == 0) 419 unterminated_comment(c_lineno, c_line, c_cptr); 420 } 421 } 422 } 423 need_newline = 1; 424 goto loop; 425 426 case '%': 427 case '\\': 428 if (*cptr == '}') 429 { 430 if (need_newline) putc('\n', f); 431 ++cptr; 432 FREE(t_line); 433 return; 434 } 435 /* fall through */ 436 437 default: 438 putc(c, f); 439 need_newline = 1; 440 goto loop; 441 } 442 } 443 444 445 copy_union() 446 { 447 register int c; 448 int quote; 449 int depth; 450 int u_lineno = lineno; 451 char *u_line = dup_line(); 452 char *u_cptr = u_line + (cptr - line - 6); 453 454 if (unionized) over_unionized(cptr - 6); 455 unionized = 1; 456 457 if (!lflag) 458 fprintf(text_file, line_format, lineno, input_file_name); 459 460 fprintf(text_file, "typedef union"); 461 if (dflag) fprintf(union_file, "typedef union"); 462 463 depth = 0; 464 loop: 465 c = *cptr++; 466 putc(c, text_file); 467 if (dflag) putc(c, union_file); 468 switch (c) 469 { 470 case '\n': 471 next_line: 472 get_line(); 473 if (line == 0) unterminated_union(u_lineno, u_line, u_cptr); 474 goto loop; 475 476 case '{': 477 ++depth; 478 goto loop; 479 480 case '}': 481 if (--depth == 0) 482 { 483 fprintf(text_file, " YYSTYPE;\n"); 484 FREE(u_line); 485 return; 486 } 487 goto loop; 488 489 case '\'': 490 case '"': 491 { 492 int s_lineno = lineno; 493 char *s_line = dup_line(); 494 char *s_cptr = s_line + (cptr - line - 1); 495 496 quote = c; 497 for (;;) 498 { 499 c = *cptr++; 500 putc(c, text_file); 501 if (dflag) putc(c, union_file); 502 if (c == quote) 503 { 504 FREE(s_line); 505 goto loop; 506 } 507 if (c == '\n') 508 unterminated_string(s_lineno, s_line, s_cptr); 509 if (c == '\\') 510 { 511 c = *cptr++; 512 putc(c, text_file); 513 if (dflag) putc(c, union_file); 514 if (c == '\n') 515 { 516 get_line(); 517 if (line == 0) 518 unterminated_string(s_lineno, s_line, s_cptr); 519 } 520 } 521 } 522 } 523 524 case '/': 525 c = *cptr; 526 if (c == '/') 527 { 528 putc('*', text_file); 529 if (dflag) putc('*', union_file); 530 while ((c = *++cptr) != '\n') 531 { 532 if (c == '*' && cptr[1] == '/') 533 { 534 fprintf(text_file, "* "); 535 if (dflag) fprintf(union_file, "* "); 536 } 537 else 538 { 539 putc(c, text_file); 540 if (dflag) putc(c, union_file); 541 } 542 } 543 fprintf(text_file, "*/\n"); 544 if (dflag) fprintf(union_file, "*/\n"); 545 goto next_line; 546 } 547 if (c == '*') 548 { 549 int c_lineno = lineno; 550 char *c_line = dup_line(); 551 char *c_cptr = c_line + (cptr - line - 1); 552 553 putc('*', text_file); 554 if (dflag) putc('*', union_file); 555 ++cptr; 556 for (;;) 557 { 558 c = *cptr++; 559 putc(c, text_file); 560 if (dflag) putc(c, union_file); 561 if (c == '*' && *cptr == '/') 562 { 563 putc('/', text_file); 564 if (dflag) putc('/', union_file); 565 ++cptr; 566 FREE(c_line); 567 goto loop; 568 } 569 if (c == '\n') 570 { 571 get_line(); 572 if (line == 0) 573 unterminated_comment(c_lineno, c_line, c_cptr); 574 } 575 } 576 } 577 goto loop; 578 579 default: 580 goto loop; 581 } 582 } 583 584 585 int 586 hexval(c) 587 int c; 588 { 589 if (c >= '0' && c <= '9') 590 return (c - '0'); 591 if (c >= 'A' && c <= 'F') 592 return (c - 'A' + 10); 593 if (c >= 'a' && c <= 'f') 594 return (c - 'a' + 10); 595 return (-1); 596 } 597 598 599 bucket * 600 get_literal() 601 { 602 register int c, quote; 603 register int i; 604 register int n; 605 register char *s; 606 register bucket *bp; 607 int s_lineno = lineno; 608 char *s_line = dup_line(); 609 char *s_cptr = s_line + (cptr - line); 610 611 quote = *cptr++; 612 cinc = 0; 613 for (;;) 614 { 615 c = *cptr++; 616 if (c == quote) break; 617 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr); 618 if (c == '\\') 619 { 620 char *c_cptr = cptr - 1; 621 622 c = *cptr++; 623 switch (c) 624 { 625 case '\n': 626 get_line(); 627 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr); 628 continue; 629 630 case '0': case '1': case '2': case '3': 631 case '4': case '5': case '6': case '7': 632 n = c - '0'; 633 c = *cptr; 634 if (IS_OCTAL(c)) 635 { 636 n = (n << 3) + (c - '0'); 637 c = *++cptr; 638 if (IS_OCTAL(c)) 639 { 640 n = (n << 3) + (c - '0'); 641 ++cptr; 642 } 643 } 644 if (n > MAXCHAR) illegal_character(c_cptr); 645 c = n; 646 break; 647 648 case 'x': 649 c = *cptr++; 650 n = hexval(c); 651 if (n < 0 || n >= 16) 652 illegal_character(c_cptr); 653 for (;;) 654 { 655 c = *cptr; 656 i = hexval(c); 657 if (i < 0 || i >= 16) break; 658 ++cptr; 659 n = (n << 4) + i; 660 if (n > MAXCHAR) illegal_character(c_cptr); 661 } 662 c = n; 663 break; 664 665 case 'a': c = 7; break; 666 case 'b': c = '\b'; break; 667 case 'f': c = '\f'; break; 668 case 'n': c = '\n'; break; 669 case 'r': c = '\r'; break; 670 case 't': c = '\t'; break; 671 case 'v': c = '\v'; break; 672 } 673 } 674 cachec(c); 675 } 676 FREE(s_line); 677 678 n = cinc; 679 s = MALLOC(n); 680 if (s == 0) no_space(); 681 682 for (i = 0; i < n; ++i) 683 s[i] = cache[i]; 684 685 cinc = 0; 686 if (n == 1) 687 cachec('\''); 688 else 689 cachec('"'); 690 691 for (i = 0; i < n; ++i) 692 { 693 c = ((unsigned char *)s)[i]; 694 if (c == '\\' || c == cache[0]) 695 { 696 cachec('\\'); 697 cachec(c); 698 } 699 else if (isprint(c)) 700 cachec(c); 701 else 702 { 703 cachec('\\'); 704 switch (c) 705 { 706 case 7: cachec('a'); break; 707 case '\b': cachec('b'); break; 708 case '\f': cachec('f'); break; 709 case '\n': cachec('n'); break; 710 case '\r': cachec('r'); break; 711 case '\t': cachec('t'); break; 712 case '\v': cachec('v'); break; 713 default: 714 cachec(((c >> 6) & 7) + '0'); 715 cachec(((c >> 3) & 7) + '0'); 716 cachec((c & 7) + '0'); 717 break; 718 } 719 } 720 } 721 722 if (n == 1) 723 cachec('\''); 724 else 725 cachec('"'); 726 727 cachec(NUL); 728 bp = lookup(cache); 729 bp->class = TERM; 730 if (n == 1 && bp->value == UNDEFINED) 731 bp->value = *(unsigned char *)s; 732 FREE(s); 733 734 return (bp); 735 } 736 737 738 int 739 is_reserved(name) 740 char *name; 741 { 742 char *s; 743 744 if (strcmp(name, ".") == 0 || 745 strcmp(name, "$accept") == 0 || 746 strcmp(name, "$end") == 0) 747 return (1); 748 749 if (name[0] == '$' && name[1] == '$' && isdigit(name[2])) 750 { 751 s = name + 3; 752 while (isdigit(*s)) ++s; 753 if (*s == NUL) return (1); 754 } 755 756 return (0); 757 } 758 759 760 bucket * 761 get_name() 762 { 763 register int c; 764 765 cinc = 0; 766 for (c = *cptr; IS_IDENT(c); c = *++cptr) 767 cachec(c); 768 cachec(NUL); 769 770 if (is_reserved(cache)) used_reserved(cache); 771 772 return (lookup(cache)); 773 } 774 775 776 int 777 get_number() 778 { 779 register int c; 780 register int n; 781 782 n = 0; 783 for (c = *cptr; isdigit(c); c = *++cptr) 784 n = 10*n + (c - '0'); 785 786 return (n); 787 } 788 789 790 char * 791 get_tag() 792 { 793 register int c; 794 register int i; 795 register char *s; 796 int t_lineno = lineno; 797 char *t_line = dup_line(); 798 char *t_cptr = t_line + (cptr - line); 799 800 ++cptr; 801 c = nextc(); 802 if (c == EOF) unexpected_EOF(); 803 if (!isalpha(c) && c != '_' && c != '$') 804 illegal_tag(t_lineno, t_line, t_cptr); 805 806 cinc = 0; 807 do { cachec(c); c = *++cptr; } while (IS_IDENT(c)); 808 cachec(NUL); 809 810 c = nextc(); 811 if (c == EOF) unexpected_EOF(); 812 if (c != '>') 813 illegal_tag(t_lineno, t_line, t_cptr); 814 ++cptr; 815 816 for (i = 0; i < ntags; ++i) 817 { 818 if (strcmp(cache, tag_table[i]) == 0) 819 return (tag_table[i]); 820 } 821 822 if (ntags >= tagmax) 823 { 824 tagmax += 16; 825 tag_table = (char **) 826 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *)) 827 : MALLOC(tagmax*sizeof(char *))); 828 if (tag_table == 0) no_space(); 829 } 830 831 s = MALLOC(cinc); 832 if (s == 0) no_space(); 833 strcpy(s, cache); 834 tag_table[ntags] = s; 835 ++ntags; 836 FREE(t_line); 837 return (s); 838 } 839 840 841 declare_tokens(assoc) 842 int assoc; 843 { 844 register int c; 845 register bucket *bp; 846 int value; 847 char *tag = 0; 848 849 if (assoc != TOKEN) ++prec; 850 851 c = nextc(); 852 if (c == EOF) unexpected_EOF(); 853 if (c == '<') 854 { 855 tag = get_tag(); 856 c = nextc(); 857 if (c == EOF) unexpected_EOF(); 858 } 859 860 for (;;) 861 { 862 if (isalpha(c) || c == '_' || c == '.' || c == '$') 863 bp = get_name(); 864 else if (c == '\'' || c == '"') 865 bp = get_literal(); 866 else 867 return; 868 869 if (bp == goal) tokenized_start(bp->name); 870 bp->class = TERM; 871 872 if (tag) 873 { 874 if (bp->tag && tag != bp->tag) 875 retyped_warning(bp->name); 876 bp->tag = tag; 877 } 878 879 if (assoc != TOKEN) 880 { 881 if (bp->prec && prec != bp->prec) 882 reprec_warning(bp->name); 883 bp->assoc = assoc; 884 bp->prec = prec; 885 } 886 887 c = nextc(); 888 if (c == EOF) unexpected_EOF(); 889 value = UNDEFINED; 890 if (isdigit(c)) 891 { 892 value = get_number(); 893 if (bp->value != UNDEFINED && value != bp->value) 894 revalued_warning(bp->name); 895 bp->value = value; 896 c = nextc(); 897 if (c == EOF) unexpected_EOF(); 898 } 899 } 900 } 901 902 903 declare_types() 904 { 905 register int c; 906 register bucket *bp; 907 char *tag; 908 909 c = nextc(); 910 if (c == EOF) unexpected_EOF(); 911 if (c != '<') syntax_error(lineno, line, cptr); 912 tag = get_tag(); 913 914 for (;;) 915 { 916 c = nextc(); 917 if (isalpha(c) || c == '_' || c == '.' || c == '$') 918 bp = get_name(); 919 else if (c == '\'' || c == '"') 920 bp = get_literal(); 921 else 922 return; 923 924 if (bp->tag && tag != bp->tag) 925 retyped_warning(bp->name); 926 bp->tag = tag; 927 } 928 } 929 930 931 declare_start() 932 { 933 register int c; 934 register bucket *bp; 935 936 c = nextc(); 937 if (c == EOF) unexpected_EOF(); 938 if (!isalpha(c) && c != '_' && c != '.' && c != '$') 939 syntax_error(lineno, line, cptr); 940 bp = get_name(); 941 if (bp->class == TERM) 942 terminal_start(bp->name); 943 if (goal && goal != bp) 944 restarted_warning(); 945 goal = bp; 946 } 947 948 949 read_declarations() 950 { 951 register int c, k; 952 953 cache_size = 256; 954 cache = MALLOC(cache_size); 955 if (cache == 0) no_space(); 956 957 for (;;) 958 { 959 c = nextc(); 960 if (c == EOF) unexpected_EOF(); 961 if (c != '%') syntax_error(lineno, line, cptr); 962 switch (k = keyword()) 963 { 964 case MARK: 965 return; 966 967 case IDENT: 968 copy_ident(); 969 break; 970 971 case TEXT: 972 copy_text(); 973 break; 974 975 case UNION: 976 copy_union(); 977 break; 978 979 case TOKEN: 980 case LEFT: 981 case RIGHT: 982 case NONASSOC: 983 declare_tokens(k); 984 break; 985 986 case TYPE: 987 declare_types(); 988 break; 989 990 case START: 991 declare_start(); 992 break; 993 } 994 } 995 } 996 997 998 initialize_grammar() 999 { 1000 nitems = 4; 1001 maxitems = 300; 1002 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *)); 1003 if (pitem == 0) no_space(); 1004 pitem[0] = 0; 1005 pitem[1] = 0; 1006 pitem[2] = 0; 1007 pitem[3] = 0; 1008 1009 nrules = 3; 1010 maxrules = 100; 1011 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *)); 1012 if (plhs == 0) no_space(); 1013 plhs[0] = 0; 1014 plhs[1] = 0; 1015 plhs[2] = 0; 1016 rprec = (short *) MALLOC(maxrules*sizeof(short)); 1017 if (rprec == 0) no_space(); 1018 rprec[0] = 0; 1019 rprec[1] = 0; 1020 rprec[2] = 0; 1021 rassoc = (char *) MALLOC(maxrules*sizeof(char)); 1022 if (rassoc == 0) no_space(); 1023 rassoc[0] = TOKEN; 1024 rassoc[1] = TOKEN; 1025 rassoc[2] = TOKEN; 1026 } 1027 1028 1029 expand_items() 1030 { 1031 maxitems += 300; 1032 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *)); 1033 if (pitem == 0) no_space(); 1034 } 1035 1036 1037 expand_rules() 1038 { 1039 maxrules += 100; 1040 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *)); 1041 if (plhs == 0) no_space(); 1042 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short)); 1043 if (rprec == 0) no_space(); 1044 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char)); 1045 if (rassoc == 0) no_space(); 1046 } 1047 1048 1049 advance_to_start() 1050 { 1051 register int c; 1052 register bucket *bp; 1053 char *s_cptr; 1054 int s_lineno; 1055 1056 for (;;) 1057 { 1058 c = nextc(); 1059 if (c != '%') break; 1060 s_cptr = cptr; 1061 switch (keyword()) 1062 { 1063 case MARK: 1064 no_grammar(); 1065 1066 case TEXT: 1067 copy_text(); 1068 break; 1069 1070 case START: 1071 declare_start(); 1072 break; 1073 1074 default: 1075 syntax_error(lineno, line, s_cptr); 1076 } 1077 } 1078 1079 c = nextc(); 1080 if (!isalpha(c) && c != '_' && c != '.' && c != '_') 1081 syntax_error(lineno, line, cptr); 1082 bp = get_name(); 1083 if (goal == 0) 1084 { 1085 if (bp->class == TERM) 1086 terminal_start(bp->name); 1087 goal = bp; 1088 } 1089 1090 s_lineno = lineno; 1091 c = nextc(); 1092 if (c == EOF) unexpected_EOF(); 1093 if (c != ':') syntax_error(lineno, line, cptr); 1094 start_rule(bp, s_lineno); 1095 ++cptr; 1096 } 1097 1098 1099 start_rule(bp, s_lineno) 1100 register bucket *bp; 1101 int s_lineno; 1102 { 1103 if (bp->class == TERM) 1104 terminal_lhs(s_lineno); 1105 bp->class = NONTERM; 1106 if (nrules >= maxrules) 1107 expand_rules(); 1108 plhs[nrules] = bp; 1109 rprec[nrules] = UNDEFINED; 1110 rassoc[nrules] = TOKEN; 1111 } 1112 1113 1114 end_rule() 1115 { 1116 register int i; 1117 1118 if (!last_was_action && plhs[nrules]->tag) 1119 { 1120 for (i = nitems - 1; pitem[i]; --i) continue; 1121 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag) 1122 default_action_warning(); 1123 } 1124 1125 last_was_action = 0; 1126 if (nitems >= maxitems) expand_items(); 1127 pitem[nitems] = 0; 1128 ++nitems; 1129 ++nrules; 1130 } 1131 1132 1133 insert_empty_rule() 1134 { 1135 register bucket *bp, **bpp; 1136 1137 assert(cache); 1138 sprintf(cache, "$$%d", ++gensym); 1139 bp = make_bucket(cache); 1140 last_symbol->next = bp; 1141 last_symbol = bp; 1142 bp->tag = plhs[nrules]->tag; 1143 bp->class = NONTERM; 1144 1145 if ((nitems += 2) > maxitems) 1146 expand_items(); 1147 bpp = pitem + nitems - 1; 1148 *bpp-- = bp; 1149 while (bpp[0] = bpp[-1]) --bpp; 1150 1151 if (++nrules >= maxrules) 1152 expand_rules(); 1153 plhs[nrules] = plhs[nrules-1]; 1154 plhs[nrules-1] = bp; 1155 rprec[nrules] = rprec[nrules-1]; 1156 rprec[nrules-1] = 0; 1157 rassoc[nrules] = rassoc[nrules-1]; 1158 rassoc[nrules-1] = TOKEN; 1159 } 1160 1161 1162 add_symbol() 1163 { 1164 register int c; 1165 register bucket *bp; 1166 int s_lineno = lineno; 1167 1168 c = *cptr; 1169 if (c == '\'' || c == '"') 1170 bp = get_literal(); 1171 else 1172 bp = get_name(); 1173 1174 c = nextc(); 1175 if (c == ':') 1176 { 1177 end_rule(); 1178 start_rule(bp, s_lineno); 1179 ++cptr; 1180 return; 1181 } 1182 1183 if (last_was_action) 1184 insert_empty_rule(); 1185 last_was_action = 0; 1186 1187 if (++nitems > maxitems) 1188 expand_items(); 1189 pitem[nitems-1] = bp; 1190 } 1191 1192 1193 copy_action() 1194 { 1195 register int c; 1196 register int i, n; 1197 int depth; 1198 int quote; 1199 char *tag; 1200 register FILE *f = action_file; 1201 int a_lineno = lineno; 1202 char *a_line = dup_line(); 1203 char *a_cptr = a_line + (cptr - line); 1204 1205 if (last_was_action) 1206 insert_empty_rule(); 1207 last_was_action = 1; 1208 1209 fprintf(f, "case %d:\n", nrules - 2); 1210 if (!lflag) 1211 fprintf(f, line_format, lineno, input_file_name); 1212 if (*cptr == '=') ++cptr; 1213 1214 n = 0; 1215 for (i = nitems - 1; pitem[i]; --i) ++n; 1216 1217 depth = 0; 1218 loop: 1219 c = *cptr; 1220 if (c == '$') 1221 { 1222 if (cptr[1] == '<') 1223 { 1224 int d_lineno = lineno; 1225 char *d_line = dup_line(); 1226 char *d_cptr = d_line + (cptr - line); 1227 1228 ++cptr; 1229 tag = get_tag(); 1230 c = *cptr; 1231 if (c == '$') 1232 { 1233 fprintf(f, "yyval.%s", tag); 1234 ++cptr; 1235 FREE(d_line); 1236 goto loop; 1237 } 1238 else if (isdigit(c)) 1239 { 1240 i = get_number(); 1241 if (i > n) dollar_warning(d_lineno, i); 1242 fprintf(f, "yyvsp[%d].%s", i - n, tag); 1243 FREE(d_line); 1244 goto loop; 1245 } 1246 else if (c == '-' && isdigit(cptr[1])) 1247 { 1248 ++cptr; 1249 i = -get_number() - n; 1250 fprintf(f, "yyvsp[%d].%s", i, tag); 1251 FREE(d_line); 1252 goto loop; 1253 } 1254 else 1255 dollar_error(d_lineno, d_line, d_cptr); 1256 } 1257 else if (cptr[1] == '$') 1258 { 1259 if (ntags) 1260 { 1261 tag = plhs[nrules]->tag; 1262 if (tag == 0) untyped_lhs(); 1263 fprintf(f, "yyval.%s", tag); 1264 } 1265 else 1266 fprintf(f, "yyval"); 1267 cptr += 2; 1268 goto loop; 1269 } 1270 else if (isdigit(cptr[1])) 1271 { 1272 ++cptr; 1273 i = get_number(); 1274 if (ntags) 1275 { 1276 if (i <= 0 || i > n) 1277 unknown_rhs(i); 1278 tag = pitem[nitems + i - n - 1]->tag; 1279 if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name); 1280 fprintf(f, "yyvsp[%d].%s", i - n, tag); 1281 } 1282 else 1283 { 1284 if (i > n) 1285 dollar_warning(lineno, i); 1286 fprintf(f, "yyvsp[%d]", i - n); 1287 } 1288 goto loop; 1289 } 1290 else if (cptr[1] == '-') 1291 { 1292 cptr += 2; 1293 i = get_number(); 1294 if (ntags) 1295 unknown_rhs(-i); 1296 fprintf(f, "yyvsp[%d]", -i - n); 1297 goto loop; 1298 } 1299 } 1300 if (isalpha(c) || c == '_' || c == '$') 1301 { 1302 do 1303 { 1304 putc(c, f); 1305 c = *++cptr; 1306 } while (isalnum(c) || c == '_' || c == '$'); 1307 goto loop; 1308 } 1309 putc(c, f); 1310 ++cptr; 1311 switch (c) 1312 { 1313 case '\n': 1314 next_line: 1315 get_line(); 1316 if (line) goto loop; 1317 unterminated_action(a_lineno, a_line, a_cptr); 1318 1319 case ';': 1320 if (depth > 0) goto loop; 1321 fprintf(f, "\nbreak;\n"); 1322 return; 1323 1324 case '{': 1325 ++depth; 1326 goto loop; 1327 1328 case '}': 1329 if (--depth > 0) goto loop; 1330 fprintf(f, "\nbreak;\n"); 1331 return; 1332 1333 case '\'': 1334 case '"': 1335 { 1336 int s_lineno = lineno; 1337 char *s_line = dup_line(); 1338 char *s_cptr = s_line + (cptr - line - 1); 1339 1340 quote = c; 1341 for (;;) 1342 { 1343 c = *cptr++; 1344 putc(c, f); 1345 if (c == quote) 1346 { 1347 FREE(s_line); 1348 goto loop; 1349 } 1350 if (c == '\n') 1351 unterminated_string(s_lineno, s_line, s_cptr); 1352 if (c == '\\') 1353 { 1354 c = *cptr++; 1355 putc(c, f); 1356 if (c == '\n') 1357 { 1358 get_line(); 1359 if (line == 0) 1360 unterminated_string(s_lineno, s_line, s_cptr); 1361 } 1362 } 1363 } 1364 } 1365 1366 case '/': 1367 c = *cptr; 1368 if (c == '/') 1369 { 1370 putc('*', f); 1371 while ((c = *++cptr) != '\n') 1372 { 1373 if (c == '*' && cptr[1] == '/') 1374 fprintf(f, "* "); 1375 else 1376 putc(c, f); 1377 } 1378 fprintf(f, "*/\n"); 1379 goto next_line; 1380 } 1381 if (c == '*') 1382 { 1383 int c_lineno = lineno; 1384 char *c_line = dup_line(); 1385 char *c_cptr = c_line + (cptr - line - 1); 1386 1387 putc('*', f); 1388 ++cptr; 1389 for (;;) 1390 { 1391 c = *cptr++; 1392 putc(c, f); 1393 if (c == '*' && *cptr == '/') 1394 { 1395 putc('/', f); 1396 ++cptr; 1397 FREE(c_line); 1398 goto loop; 1399 } 1400 if (c == '\n') 1401 { 1402 get_line(); 1403 if (line == 0) 1404 unterminated_comment(c_lineno, c_line, c_cptr); 1405 } 1406 } 1407 } 1408 goto loop; 1409 1410 default: 1411 goto loop; 1412 } 1413 } 1414 1415 1416 int 1417 mark_symbol() 1418 { 1419 register int c; 1420 register bucket *bp; 1421 1422 c = cptr[1]; 1423 if (c == '%' || c == '\\') 1424 { 1425 cptr += 2; 1426 return (1); 1427 } 1428 1429 if (c == '=') 1430 cptr += 2; 1431 else if ((c == 'p' || c == 'P') && 1432 ((c = cptr[2]) == 'r' || c == 'R') && 1433 ((c = cptr[3]) == 'e' || c == 'E') && 1434 ((c = cptr[4]) == 'c' || c == 'C') && 1435 ((c = cptr[5], !IS_IDENT(c)))) 1436 cptr += 5; 1437 else 1438 syntax_error(lineno, line, cptr); 1439 1440 c = nextc(); 1441 if (isalpha(c) || c == '_' || c == '.' || c == '$') 1442 bp = get_name(); 1443 else if (c == '\'' || c == '"') 1444 bp = get_literal(); 1445 else 1446 { 1447 syntax_error(lineno, line, cptr); 1448 /*NOTREACHED*/ 1449 } 1450 1451 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules]) 1452 prec_redeclared(); 1453 1454 rprec[nrules] = bp->prec; 1455 rassoc[nrules] = bp->assoc; 1456 return (0); 1457 } 1458 1459 1460 read_grammar() 1461 { 1462 register int c; 1463 1464 initialize_grammar(); 1465 advance_to_start(); 1466 1467 for (;;) 1468 { 1469 c = nextc(); 1470 if (c == EOF) break; 1471 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' || 1472 c == '"') 1473 add_symbol(); 1474 else if (c == '{' || c == '=') 1475 copy_action(); 1476 else if (c == '|') 1477 { 1478 end_rule(); 1479 start_rule(plhs[nrules-1], 0); 1480 ++cptr; 1481 } 1482 else if (c == '%') 1483 { 1484 if (mark_symbol()) break; 1485 } 1486 else 1487 syntax_error(lineno, line, cptr); 1488 } 1489 end_rule(); 1490 } 1491 1492 1493 free_tags() 1494 { 1495 register int i; 1496 1497 if (tag_table == 0) return; 1498 1499 for (i = 0; i < ntags; ++i) 1500 { 1501 assert(tag_table[i]); 1502 FREE(tag_table[i]); 1503 } 1504 FREE(tag_table); 1505 } 1506 1507 1508 pack_names() 1509 { 1510 register bucket *bp; 1511 register char *p, *s, *t; 1512 1513 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */ 1514 for (bp = first_symbol; bp; bp = bp->next) 1515 name_pool_size += strlen(bp->name) + 1; 1516 name_pool = MALLOC(name_pool_size); 1517 if (name_pool == 0) no_space(); 1518 1519 strcpy(name_pool, "$accept"); 1520 strcpy(name_pool+8, "$end"); 1521 t = name_pool + 13; 1522 for (bp = first_symbol; bp; bp = bp->next) 1523 { 1524 p = t; 1525 s = bp->name; 1526 while (*t++ = *s++) continue; 1527 FREE(bp->name); 1528 bp->name = p; 1529 } 1530 } 1531 1532 1533 check_symbols() 1534 { 1535 register bucket *bp; 1536 1537 if (goal->class == UNKNOWN) 1538 undefined_goal(goal->name); 1539 1540 for (bp = first_symbol; bp; bp = bp->next) 1541 { 1542 if (bp->class == UNKNOWN) 1543 { 1544 undefined_symbol_warning(bp->name); 1545 bp->class = TERM; 1546 } 1547 } 1548 } 1549 1550 1551 pack_symbols() 1552 { 1553 register bucket *bp; 1554 register bucket **v; 1555 register int i, j, k, n; 1556 1557 nsyms = 2; 1558 ntokens = 1; 1559 for (bp = first_symbol; bp; bp = bp->next) 1560 { 1561 ++nsyms; 1562 if (bp->class == TERM) ++ntokens; 1563 } 1564 start_symbol = ntokens; 1565 nvars = nsyms - ntokens; 1566 1567 symbol_name = (char **) MALLOC(nsyms*sizeof(char *)); 1568 if (symbol_name == 0) no_space(); 1569 symbol_value = (short *) MALLOC(nsyms*sizeof(short)); 1570 if (symbol_value == 0) no_space(); 1571 symbol_prec = (short *) MALLOC(nsyms*sizeof(short)); 1572 if (symbol_prec == 0) no_space(); 1573 symbol_assoc = MALLOC(nsyms); 1574 if (symbol_assoc == 0) no_space(); 1575 1576 v = (bucket **) MALLOC(nsyms*sizeof(bucket *)); 1577 if (v == 0) no_space(); 1578 1579 v[0] = 0; 1580 v[start_symbol] = 0; 1581 1582 i = 1; 1583 j = start_symbol + 1; 1584 for (bp = first_symbol; bp; bp = bp->next) 1585 { 1586 if (bp->class == TERM) 1587 v[i++] = bp; 1588 else 1589 v[j++] = bp; 1590 } 1591 assert(i == ntokens && j == nsyms); 1592 1593 for (i = 1; i < ntokens; ++i) 1594 v[i]->index = i; 1595 1596 goal->index = start_symbol + 1; 1597 k = start_symbol + 2; 1598 while (++i < nsyms) 1599 if (v[i] != goal) 1600 { 1601 v[i]->index = k; 1602 ++k; 1603 } 1604 1605 goal->value = 0; 1606 k = 1; 1607 for (i = start_symbol + 1; i < nsyms; ++i) 1608 { 1609 if (v[i] != goal) 1610 { 1611 v[i]->value = k; 1612 ++k; 1613 } 1614 } 1615 1616 k = 0; 1617 for (i = 1; i < ntokens; ++i) 1618 { 1619 n = v[i]->value; 1620 if (n > 256) 1621 { 1622 for (j = k++; j > 0 && symbol_value[j-1] > n; --j) 1623 symbol_value[j] = symbol_value[j-1]; 1624 symbol_value[j] = n; 1625 } 1626 } 1627 1628 if (v[1]->value == UNDEFINED) 1629 v[1]->value = 256; 1630 1631 j = 0; 1632 n = 257; 1633 for (i = 2; i < ntokens; ++i) 1634 { 1635 if (v[i]->value == UNDEFINED) 1636 { 1637 while (j < k && n == symbol_value[j]) 1638 { 1639 while (++j < k && n == symbol_value[j]) continue; 1640 ++n; 1641 } 1642 v[i]->value = n; 1643 ++n; 1644 } 1645 } 1646 1647 symbol_name[0] = name_pool + 8; 1648 symbol_value[0] = 0; 1649 symbol_prec[0] = 0; 1650 symbol_assoc[0] = TOKEN; 1651 for (i = 1; i < ntokens; ++i) 1652 { 1653 symbol_name[i] = v[i]->name; 1654 symbol_value[i] = v[i]->value; 1655 symbol_prec[i] = v[i]->prec; 1656 symbol_assoc[i] = v[i]->assoc; 1657 } 1658 symbol_name[start_symbol] = name_pool; 1659 symbol_value[start_symbol] = -1; 1660 symbol_prec[start_symbol] = 0; 1661 symbol_assoc[start_symbol] = TOKEN; 1662 for (++i; i < nsyms; ++i) 1663 { 1664 k = v[i]->index; 1665 symbol_name[k] = v[i]->name; 1666 symbol_value[k] = v[i]->value; 1667 symbol_prec[k] = v[i]->prec; 1668 symbol_assoc[k] = v[i]->assoc; 1669 } 1670 1671 FREE(v); 1672 } 1673 1674 1675 pack_grammar() 1676 { 1677 register int i, j; 1678 int assoc, prec; 1679 1680 ritem = (short *) MALLOC(nitems*sizeof(short)); 1681 if (ritem == 0) no_space(); 1682 rlhs = (short *) MALLOC(nrules*sizeof(short)); 1683 if (rlhs == 0) no_space(); 1684 rrhs = (short *) MALLOC((nrules+1)*sizeof(short)); 1685 if (rrhs == 0) no_space(); 1686 rprec = (short *) REALLOC(rprec, nrules*sizeof(short)); 1687 if (rprec == 0) no_space(); 1688 rassoc = REALLOC(rassoc, nrules); 1689 if (rassoc == 0) no_space(); 1690 1691 ritem[0] = -1; 1692 ritem[1] = goal->index; 1693 ritem[2] = 0; 1694 ritem[3] = -2; 1695 rlhs[0] = 0; 1696 rlhs[1] = 0; 1697 rlhs[2] = start_symbol; 1698 rrhs[0] = 0; 1699 rrhs[1] = 0; 1700 rrhs[2] = 1; 1701 1702 j = 4; 1703 for (i = 3; i < nrules; ++i) 1704 { 1705 rlhs[i] = plhs[i]->index; 1706 rrhs[i] = j; 1707 assoc = TOKEN; 1708 prec = 0; 1709 while (pitem[j]) 1710 { 1711 ritem[j] = pitem[j]->index; 1712 if (pitem[j]->class == TERM) 1713 { 1714 prec = pitem[j]->prec; 1715 assoc = pitem[j]->assoc; 1716 } 1717 ++j; 1718 } 1719 ritem[j] = -i; 1720 ++j; 1721 if (rprec[i] == UNDEFINED) 1722 { 1723 rprec[i] = prec; 1724 rassoc[i] = assoc; 1725 } 1726 } 1727 rrhs[i] = j; 1728 1729 FREE(plhs); 1730 FREE(pitem); 1731 } 1732 1733 1734 print_grammar() 1735 { 1736 register int i, j, k; 1737 int spacing; 1738 register FILE *f = verbose_file; 1739 1740 if (!vflag) return; 1741 1742 k = 1; 1743 for (i = 2; i < nrules; ++i) 1744 { 1745 if (rlhs[i] != rlhs[i-1]) 1746 { 1747 if (i != 2) fprintf(f, "\n"); 1748 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]); 1749 spacing = strlen(symbol_name[rlhs[i]]) + 1; 1750 } 1751 else 1752 { 1753 fprintf(f, "%4d ", i - 2); 1754 j = spacing; 1755 while (--j >= 0) putc(' ', f); 1756 putc('|', f); 1757 } 1758 1759 while (ritem[k] >= 0) 1760 { 1761 fprintf(f, " %s", symbol_name[ritem[k]]); 1762 ++k; 1763 } 1764 ++k; 1765 putc('\n', f); 1766 } 1767 } 1768 1769 1770 reader() 1771 { 1772 write_section(banner); 1773 create_symbol_table(); 1774 read_declarations(); 1775 read_grammar(); 1776 free_symbol_table(); 1777 free_tags(); 1778 pack_names(); 1779 check_symbols(); 1780 pack_symbols(); 1781 pack_grammar(); 1782 free_symbols(); 1783 print_grammar(); 1784 } 1785