1 /* 2 * Copyright (c) 1983 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 */ 17 18 #ifndef lint 19 static char sccsid[] = "@(#)scanner.c 5.2 (Berkeley) 05/23/89"; 20 #endif /* not lint */ 21 22 /* 23 * Debugger scanner. 24 */ 25 26 #include "defs.h" 27 #include "scanner.h" 28 #include "main.h" 29 #include "keywords.h" 30 #include "tree.h" 31 #include "symbols.h" 32 #include "names.h" 33 #include "y.tab.h" 34 35 #ifndef public 36 typedef int Token; 37 38 #define MAXLINESIZE 10240 39 40 #endif 41 42 public String initfile = ".dbxinit"; 43 44 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 45 46 private Charclass class[256 + 1]; 47 private Charclass *lexclass = class + 1; 48 49 #define isdigit(c) (lexclass[c] == NUM) 50 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 51 #define ishexdigit(c) ( \ 52 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 53 ) 54 55 public boolean chkalias; 56 public char scanner_linebuf[MAXLINESIZE]; 57 58 private File in; 59 private char *curchar, *prevchar; 60 61 #define MAXINCLDEPTH 10 62 63 private struct { 64 File savefile; 65 Filename savefn; 66 int savelineno; 67 } inclinfo[MAXINCLDEPTH]; 68 69 private unsigned int curinclindex; 70 71 private Token getident(); 72 private Token getnum(); 73 private Token getstring(); 74 private Boolean eofinput(); 75 private char charcon(); 76 77 private enterlexclass(class, s) 78 Charclass class; 79 String s; 80 { 81 register char *p; 82 83 for (p = s; *p != '\0'; p++) { 84 lexclass[*p] = class; 85 } 86 } 87 88 public scanner_init() 89 { 90 register Integer i; 91 92 for (i = 0; i < 257; i++) { 93 class[i] = OTHER; 94 } 95 enterlexclass(WHITE, " \t"); 96 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 97 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 98 enterlexclass(NUM, "0123456789"); 99 in = stdin; 100 errfilename = nil; 101 errlineno = 0; 102 curchar = scanner_linebuf; 103 scanner_linebuf[0] = '\0'; 104 chkalias = true; 105 } 106 107 /* 108 * Read a single token. 109 * 110 * The input is line buffered. Tokens cannot cross line boundaries. 111 * 112 * There are two "modes" of operation: one as in a compiler, 113 * and one for reading shell-like syntax. In the first mode 114 * there is the additional choice of doing alias processing. 115 */ 116 117 private Boolean shellmode; 118 119 public Token yylex() 120 { 121 register int c; 122 register char *p; 123 register Token t; 124 String line; 125 integer n; 126 127 p = curchar; 128 if (*p == '\0') { 129 do { 130 if (isterm(in)) { 131 printf("(%s) ", cmdname); 132 } 133 fflush(stdout); 134 line = fgets(scanner_linebuf, MAXLINESIZE, in); 135 } while (line == nil and not eofinput()); 136 if (line == nil) { 137 c = EOF; 138 } else { 139 p = scanner_linebuf; 140 while (lexclass[*p] == WHITE) { 141 p++; 142 } 143 shellmode = false; 144 } 145 chkalias = true; 146 } else { 147 while (lexclass[*p] == WHITE) { 148 p++; 149 } 150 } 151 curchar = p; 152 prevchar = curchar; 153 c = *p; 154 if (lexclass[c] == ALPHA) { 155 t = getident(chkalias); 156 } else if (lexclass[c] == NUM) { 157 if (shellmode) { 158 t = getident(chkalias); 159 } else { 160 t = getnum(); 161 } 162 } else { 163 ++curchar; 164 switch (c) { 165 case '\n': 166 t = '\n'; 167 if (errlineno != 0) { 168 errlineno++; 169 } 170 break; 171 172 case '"': 173 case '\'': 174 t = getstring(c); 175 break; 176 177 case '.': 178 if (shellmode) { 179 --curchar; 180 t = getident(chkalias); 181 } else if (isdigit(*curchar)) { 182 --curchar; 183 t = getnum(); 184 } else { 185 t = '.'; 186 } 187 break; 188 189 case '-': 190 if (shellmode) { 191 --curchar; 192 t = getident(chkalias); 193 } else if (*curchar == '>') { 194 ++curchar; 195 t = ARROW; 196 } else { 197 t = '-'; 198 } 199 break; 200 201 case '#': 202 if (not isterm(in)) { 203 *p = '\0'; 204 curchar = p; 205 t = '\n'; 206 ++errlineno; 207 } else { 208 t = '#'; 209 } 210 break; 211 212 case '\\': 213 if (*(p+1) == '\n') { 214 n = MAXLINESIZE - (p - &scanner_linebuf[0]); 215 if (n > 1) { 216 if (fgets(p, n, in) == nil) { 217 t = 0; 218 } else { 219 curchar = p; 220 t = yylex(); 221 } 222 } else { 223 t = '\\'; 224 } 225 } else { 226 t = '\\'; 227 } 228 break; 229 230 case EOF: 231 t = 0; 232 break; 233 234 default: 235 if (shellmode and index("!&*<>()[]", c) == nil) { 236 --curchar; 237 t = getident(chkalias); 238 } else { 239 t = c; 240 } 241 break; 242 } 243 } 244 chkalias = false; 245 # ifdef LEXDEBUG 246 if (lexdebug) { 247 fprintf(stderr, "yylex returns "); 248 print_token(stderr, t); 249 fprintf(stderr, "\n"); 250 } 251 # endif 252 return t; 253 } 254 255 /* 256 * Put the given string before the current character 257 * in the current line, thus inserting it into the input stream. 258 */ 259 260 public insertinput (s) 261 String s; 262 { 263 register char *p, *q; 264 int need, avail, shift; 265 266 q = s; 267 need = strlen(q); 268 avail = curchar - &scanner_linebuf[0]; 269 if (need <= avail) { 270 curchar = &scanner_linebuf[avail - need]; 271 p = curchar; 272 while (*q != '\0') { 273 *p++ = *q++; 274 } 275 } else { 276 p = curchar; 277 while (*p != '\0') { 278 ++p; 279 } 280 shift = need - avail; 281 if (p + shift >= &scanner_linebuf[MAXLINESIZE]) { 282 error("alias expansion too large"); 283 } 284 for (;;) { 285 *(p + shift) = *p; 286 if (p == curchar) { 287 break; 288 } 289 --p; 290 } 291 p = &scanner_linebuf[0]; 292 while (*q != '\0') { 293 *p++ = *q++; 294 } 295 curchar = &scanner_linebuf[0]; 296 } 297 } 298 299 /* 300 * Get the actuals for a macro call. 301 */ 302 303 private String movetochar (str, c) 304 String str; 305 char c; 306 { 307 register char *p; 308 309 while (*p != c) { 310 if (*p == '\0') { 311 error("missing ')' in macro call"); 312 } else if (*p == ')') { 313 error("not enough parameters in macro call"); 314 } else if (*p == ',') { 315 error("too many parameters in macro call"); 316 } 317 ++p; 318 } 319 return p; 320 } 321 322 private String *getactuals (n) 323 integer n; 324 { 325 String *a; 326 register char *p; 327 int i; 328 329 a = newarr(String, n); 330 p = curchar; 331 while (*p != '(') { 332 if (lexclass[*p] != WHITE) { 333 error("missing actuals for macro"); 334 } 335 ++p; 336 } 337 ++p; 338 for (i = 0; i < n - 1; i++) { 339 a[i] = p; 340 p = movetochar(p, ','); 341 *p = '\0'; 342 ++p; 343 } 344 a[n-1] = p; 345 p = movetochar(p, ')'); 346 *p = '\0'; 347 curchar = p + 1; 348 return a; 349 } 350 351 /* 352 * Do command macro expansion, assuming curchar points to the beginning 353 * of the actuals, and we are not in shell mode. 354 */ 355 356 private expand (pl, str) 357 List pl; 358 String str; 359 { 360 char buf[4096], namebuf[100]; 361 register char *p, *q, *r; 362 String *actual; 363 Name n; 364 integer i; 365 boolean match; 366 367 if (pl == nil) { 368 insertinput(str); 369 } else { 370 actual = getactuals(list_size(pl)); 371 p = buf; 372 q = str; 373 while (*q != '\0') { 374 if (p >= &buf[4096]) { 375 error("alias expansion too large"); 376 } 377 if (lexclass[*q] == ALPHA) { 378 r = namebuf; 379 do { 380 *r++ = *q++; 381 } while (isalnum(*q)); 382 *r = '\0'; 383 i = 0; 384 match = false; 385 foreach(Name, n, pl) 386 if (streq(ident(n), namebuf)) { 387 match = true; 388 break; 389 } 390 ++i; 391 endfor 392 if (match) { 393 r = actual[i]; 394 } else { 395 r = namebuf; 396 } 397 while (*r != '\0') { 398 *p++ = *r++; 399 } 400 } else { 401 *p++ = *q++; 402 } 403 } 404 *p = '\0'; 405 insertinput(buf); 406 } 407 } 408 409 /* 410 * Parser error handling. 411 */ 412 413 public yyerror(s) 414 String s; 415 { 416 register char *p; 417 register integer start; 418 419 if (streq(s, "syntax error")) { 420 beginerrmsg(); 421 p = prevchar; 422 start = p - &scanner_linebuf[0]; 423 if (p > &scanner_linebuf[0]) { 424 while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) { 425 --p; 426 } 427 } 428 fprintf(stderr, "%s", scanner_linebuf); 429 if (start != 0) { 430 fprintf(stderr, "%*c", start, ' '); 431 } 432 if (p == &scanner_linebuf[0]) { 433 fprintf(stderr, "^ unrecognized command"); 434 } else { 435 fprintf(stderr, "^ syntax error"); 436 } 437 enderrmsg(); 438 } else { 439 error(s); 440 } 441 } 442 443 /* 444 * Eat the current line. 445 */ 446 447 public gobble () 448 { 449 curchar = scanner_linebuf; 450 scanner_linebuf[0] = '\0'; 451 } 452 453 /* 454 * Scan an identifier. 455 * 456 * If chkalias is true, check first to see if it's an alias. 457 * Otherwise, check to see if it's a keyword. 458 */ 459 460 private Token getident (chkalias) 461 boolean chkalias; 462 { 463 char buf[1024]; 464 register char *p, *q; 465 register Token t; 466 List pl; 467 String str; 468 469 p = curchar; 470 q = buf; 471 if (shellmode) { 472 do { 473 *q++ = *p++; 474 } while (index(" \t\n!&<>*[]()'\"", *p) == nil); 475 } else { 476 do { 477 *q++ = *p++; 478 } while (isalnum(*p)); 479 } 480 curchar = p; 481 *q = '\0'; 482 yylval.y_name = identname(buf, false); 483 if (chkalias) { 484 if (findalias(yylval.y_name, &pl, &str)) { 485 expand(pl, str); 486 while (lexclass[*curchar] == WHITE) { 487 ++curchar; 488 } 489 if (pl == nil) { 490 t = getident(false); 491 } else { 492 t = getident(true); 493 } 494 } else if (shellmode) { 495 t = NAME; 496 } else { 497 t = findkeyword(yylval.y_name, NAME); 498 } 499 } else if (shellmode) { 500 t = NAME; 501 } else { 502 t = findkeyword(yylval.y_name, NAME); 503 } 504 return t; 505 } 506 507 /* 508 * Scan a number. 509 */ 510 511 private Token getnum() 512 { 513 char buf[1024]; 514 register Char *p, *q; 515 register Token t; 516 Integer base; 517 518 p = curchar; 519 q = buf; 520 if (*p == '0') { 521 if (*(p+1) == 'x') { 522 p += 2; 523 base = 16; 524 } else if (*(p+1) == 't') { 525 base = 10; 526 } else if (varIsSet("$hexin")) { 527 base = 16; 528 } else { 529 base = 8; 530 } 531 } else if (varIsSet("$hexin")) { 532 base = 16; 533 } else if (varIsSet("$octin")) { 534 base = 8; 535 } else { 536 base = 10; 537 } 538 if (base == 16) { 539 do { 540 *q++ = *p++; 541 } while (ishexdigit(*p)); 542 } else { 543 do { 544 *q++ = *p++; 545 } while (isdigit(*p)); 546 } 547 if (*p == '.') { 548 do { 549 *q++ = *p++; 550 } while (isdigit(*p)); 551 if (*p == 'e' or *p == 'E') { 552 p++; 553 if (*p == '+' or *p == '-' or isdigit(*p)) { 554 *q++ = 'e'; 555 do { 556 *q++ = *p++; 557 } while (isdigit(*p)); 558 } 559 } 560 *q = '\0'; 561 yylval.y_real = atof(buf); 562 t = REAL; 563 } else { 564 *q = '\0'; 565 switch (base) { 566 case 10: 567 yylval.y_int = atol(buf); 568 break; 569 570 case 8: 571 yylval.y_int = octal(buf); 572 break; 573 574 case 16: 575 yylval.y_int = hex(buf); 576 break; 577 578 default: 579 badcaseval(base); 580 } 581 t = INT; 582 } 583 curchar = p; 584 return t; 585 } 586 587 /* 588 * Convert a string of octal digits to an integer. 589 */ 590 591 private int octal(s) 592 String s; 593 { 594 register Char *p; 595 register Integer n; 596 597 n = 0; 598 for (p = s; *p != '\0'; p++) { 599 n = 8*n + (*p - '0'); 600 } 601 return n; 602 } 603 604 /* 605 * Convert a string of hexadecimal digits to an integer. 606 */ 607 608 private int hex(s) 609 String s; 610 { 611 register Char *p; 612 register Integer n; 613 614 n = 0; 615 for (p = s; *p != '\0'; p++) { 616 n *= 16; 617 if (*p >= 'a' and *p <= 'f') { 618 n += (*p - 'a' + 10); 619 } else if (*p >= 'A' and *p <= 'F') { 620 n += (*p - 'A' + 10); 621 } else { 622 n += (*p - '0'); 623 } 624 } 625 return n; 626 } 627 628 /* 629 * Scan a string. 630 */ 631 632 private Token getstring (quote) 633 char quote; 634 { 635 register char *p, *q; 636 char buf[MAXLINESIZE]; 637 boolean endofstring; 638 Token t; 639 640 p = curchar; 641 q = buf; 642 endofstring = false; 643 while (not endofstring) { 644 if (*p == '\\' and *(p+1) == '\n') { 645 if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) { 646 error("non-terminated string"); 647 } 648 p = &scanner_linebuf[0] - 1; 649 } else if (*p == '\n' or *p == '\0') { 650 error("non-terminated string"); 651 endofstring = true; 652 } else if (*p == quote) { 653 endofstring = true; 654 } else { 655 curchar = p; 656 *q++ = charcon(p); 657 p = curchar; 658 } 659 p++; 660 } 661 curchar = p; 662 *q = '\0'; 663 if (quote == '\'' and buf[1] == '\0') { 664 yylval.y_char = buf[0]; 665 t = CHAR; 666 } else { 667 yylval.y_string = strdup(buf); 668 t = STRING; 669 } 670 return t; 671 } 672 673 /* 674 * Process a character constant. 675 * Watch out for backslashes. 676 */ 677 678 private char charcon (s) 679 String s; 680 { 681 register char *p, *q; 682 char c, buf[10]; 683 684 p = s; 685 if (*p == '\\') { 686 ++p; 687 switch (*p) { 688 case '\\': 689 c = '\\'; 690 break; 691 692 case 'n': 693 c = '\n'; 694 break; 695 696 case 'r': 697 c = '\r'; 698 break; 699 700 case 't': 701 c = '\t'; 702 break; 703 704 case '\'': 705 case '"': 706 c = *p; 707 break; 708 709 default: 710 if (isdigit(*p)) { 711 q = buf; 712 do { 713 *q++ = *p++; 714 } while (isdigit(*p)); 715 *q = '\0'; 716 c = (char) octal(buf); 717 } 718 --p; 719 break; 720 } 721 curchar = p; 722 } else { 723 c = *p; 724 } 725 return c; 726 } 727 728 /* 729 * Input file management routines. 730 */ 731 732 public setinput(filename) 733 Filename filename; 734 { 735 File f; 736 737 f = fopen(filename, "r"); 738 if (f == nil) { 739 error("can't open %s", filename); 740 } else { 741 if (curinclindex >= MAXINCLDEPTH) { 742 error("unreasonable input nesting on \"%s\"", filename); 743 } 744 inclinfo[curinclindex].savefile = in; 745 inclinfo[curinclindex].savefn = errfilename; 746 inclinfo[curinclindex].savelineno = errlineno; 747 curinclindex++; 748 in = f; 749 errfilename = filename; 750 errlineno = 1; 751 } 752 } 753 754 private Boolean eofinput() 755 { 756 register Boolean b; 757 758 if (curinclindex == 0) { 759 if (isterm(in)) { 760 putchar('\n'); 761 clearerr(in); 762 b = false; 763 } else { 764 b = true; 765 } 766 } else { 767 fclose(in); 768 --curinclindex; 769 in = inclinfo[curinclindex].savefile; 770 errfilename = inclinfo[curinclindex].savefn; 771 errlineno = inclinfo[curinclindex].savelineno; 772 b = false; 773 } 774 return b; 775 } 776 777 /* 778 * Pop the current input. Return whether successful. 779 */ 780 781 public Boolean popinput() 782 { 783 Boolean b; 784 785 if (curinclindex == 0) { 786 b = false; 787 } else { 788 b = (Boolean) (not eofinput()); 789 } 790 return b; 791 } 792 793 /* 794 * Return whether we are currently reading from standard input. 795 */ 796 797 public Boolean isstdin() 798 { 799 return (Boolean) (in == stdin); 800 } 801 802 /* 803 * Send the current line to the shell. 804 */ 805 806 public shellline() 807 { 808 register char *p; 809 810 p = curchar; 811 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 812 ++p; 813 } 814 shell(p); 815 if (*p == '\0' and isterm(in)) { 816 putchar('\n'); 817 } 818 erecover(); 819 } 820 821 /* 822 * Read the rest of the current line in "shell mode". 823 */ 824 825 public beginshellmode() 826 { 827 shellmode = true; 828 } 829 830 /* 831 * Print out a token for debugging. 832 */ 833 834 public print_token(f, t) 835 File f; 836 Token t; 837 { 838 if (t == '\n') { 839 fprintf(f, "char '\\n'"); 840 } else if (t == EOF) { 841 fprintf(f, "EOF"); 842 } else if (t < 256) { 843 fprintf(f, "char '%c'", t); 844 } else { 845 fprintf(f, "\"%s\"", keywdstring(t)); 846 } 847 } 848