1 /* 2 * Copyright (c) 1983 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 */ 7 8 #ifndef lint 9 static char sccsid[] = "@(#)scanner.c 5.3 (Berkeley) 06/01/90"; 10 #endif /* not lint */ 11 12 /* 13 * Debugger scanner. 14 */ 15 16 #include "defs.h" 17 #include "scanner.h" 18 #include "main.h" 19 #include "keywords.h" 20 #include "tree.h" 21 #include "symbols.h" 22 #include "names.h" 23 #include "y.tab.h" 24 25 #ifndef public 26 typedef int Token; 27 28 #define MAXLINESIZE 10240 29 30 #endif 31 32 public String initfile = ".dbxinit"; 33 34 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 35 36 private Charclass class[256 + 1]; 37 private Charclass *lexclass = class + 1; 38 39 #define isdigit(c) (lexclass[c] == NUM) 40 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 41 #define ishexdigit(c) ( \ 42 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 43 ) 44 45 public boolean chkalias; 46 public char scanner_linebuf[MAXLINESIZE]; 47 48 private File in; 49 private char *curchar, *prevchar; 50 51 #define MAXINCLDEPTH 10 52 53 private struct { 54 File savefile; 55 Filename savefn; 56 int savelineno; 57 } inclinfo[MAXINCLDEPTH]; 58 59 private unsigned int curinclindex; 60 61 private Token getident(); 62 private Token getnum(); 63 private Token getstring(); 64 private Boolean eofinput(); 65 private char charcon(); 66 67 private enterlexclass(class, s) 68 Charclass class; 69 String s; 70 { 71 register char *p; 72 73 for (p = s; *p != '\0'; p++) { 74 lexclass[*p] = class; 75 } 76 } 77 78 public scanner_init() 79 { 80 register Integer i; 81 82 for (i = 0; i < 257; i++) { 83 class[i] = OTHER; 84 } 85 enterlexclass(WHITE, " \t"); 86 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 87 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 88 enterlexclass(NUM, "0123456789"); 89 in = stdin; 90 errfilename = nil; 91 errlineno = 0; 92 curchar = scanner_linebuf; 93 scanner_linebuf[0] = '\0'; 94 chkalias = true; 95 } 96 97 /* 98 * Read a single token. 99 * 100 * The input is line buffered. Tokens cannot cross line boundaries. 101 * 102 * There are two "modes" of operation: one as in a compiler, 103 * and one for reading shell-like syntax. In the first mode 104 * there is the additional choice of doing alias processing. 105 */ 106 107 private Boolean shellmode; 108 109 public Token yylex() 110 { 111 register int c; 112 register char *p; 113 register Token t; 114 String line; 115 integer n; 116 117 p = curchar; 118 if (*p == '\0') { 119 do { 120 if (isterm(in)) { 121 printf("(%s) ", cmdname); 122 } 123 fflush(stdout); 124 line = fgets(scanner_linebuf, MAXLINESIZE, in); 125 } while (line == nil and not eofinput()); 126 if (line == nil) { 127 c = EOF; 128 } else { 129 p = scanner_linebuf; 130 while (lexclass[*p] == WHITE) { 131 p++; 132 } 133 shellmode = false; 134 } 135 chkalias = true; 136 } else { 137 while (lexclass[*p] == WHITE) { 138 p++; 139 } 140 } 141 curchar = p; 142 prevchar = curchar; 143 c = *p; 144 if (lexclass[c] == ALPHA) { 145 t = getident(chkalias); 146 } else if (lexclass[c] == NUM) { 147 if (shellmode) { 148 t = getident(chkalias); 149 } else { 150 t = getnum(); 151 } 152 } else { 153 ++curchar; 154 switch (c) { 155 case '\n': 156 t = '\n'; 157 if (errlineno != 0) { 158 errlineno++; 159 } 160 break; 161 162 case '"': 163 case '\'': 164 t = getstring(c); 165 break; 166 167 case '.': 168 if (shellmode) { 169 --curchar; 170 t = getident(chkalias); 171 } else if (isdigit(*curchar)) { 172 --curchar; 173 t = getnum(); 174 } else { 175 t = '.'; 176 } 177 break; 178 179 case '-': 180 if (shellmode) { 181 --curchar; 182 t = getident(chkalias); 183 } else if (*curchar == '>') { 184 ++curchar; 185 t = ARROW; 186 } else { 187 t = '-'; 188 } 189 break; 190 191 case '#': 192 if (not isterm(in)) { 193 *p = '\0'; 194 curchar = p; 195 t = '\n'; 196 ++errlineno; 197 } else { 198 t = '#'; 199 } 200 break; 201 202 case '\\': 203 if (*(p+1) == '\n') { 204 n = MAXLINESIZE - (p - &scanner_linebuf[0]); 205 if (n > 1) { 206 if (fgets(p, n, in) == nil) { 207 t = 0; 208 } else { 209 curchar = p; 210 t = yylex(); 211 } 212 } else { 213 t = '\\'; 214 } 215 } else { 216 t = '\\'; 217 } 218 break; 219 220 case EOF: 221 t = 0; 222 break; 223 224 default: 225 if (shellmode and index("!&*<>()[]", c) == nil) { 226 --curchar; 227 t = getident(chkalias); 228 } else { 229 t = c; 230 } 231 break; 232 } 233 } 234 chkalias = false; 235 # ifdef LEXDEBUG 236 if (lexdebug) { 237 fprintf(stderr, "yylex returns "); 238 print_token(stderr, t); 239 fprintf(stderr, "\n"); 240 } 241 # endif 242 return t; 243 } 244 245 /* 246 * Put the given string before the current character 247 * in the current line, thus inserting it into the input stream. 248 */ 249 250 public insertinput (s) 251 String s; 252 { 253 register char *p, *q; 254 int need, avail, shift; 255 256 q = s; 257 need = strlen(q); 258 avail = curchar - &scanner_linebuf[0]; 259 if (need <= avail) { 260 curchar = &scanner_linebuf[avail - need]; 261 p = curchar; 262 while (*q != '\0') { 263 *p++ = *q++; 264 } 265 } else { 266 p = curchar; 267 while (*p != '\0') { 268 ++p; 269 } 270 shift = need - avail; 271 if (p + shift >= &scanner_linebuf[MAXLINESIZE]) { 272 error("alias expansion too large"); 273 } 274 for (;;) { 275 *(p + shift) = *p; 276 if (p == curchar) { 277 break; 278 } 279 --p; 280 } 281 p = &scanner_linebuf[0]; 282 while (*q != '\0') { 283 *p++ = *q++; 284 } 285 curchar = &scanner_linebuf[0]; 286 } 287 } 288 289 /* 290 * Get the actuals for a macro call. 291 */ 292 293 private String movetochar (str, c) 294 String str; 295 char c; 296 { 297 register char *p; 298 299 while (*p != c) { 300 if (*p == '\0') { 301 error("missing ')' in macro call"); 302 } else if (*p == ')') { 303 error("not enough parameters in macro call"); 304 } else if (*p == ',') { 305 error("too many parameters in macro call"); 306 } 307 ++p; 308 } 309 return p; 310 } 311 312 private String *getactuals (n) 313 integer n; 314 { 315 String *a; 316 register char *p; 317 int i; 318 319 a = newarr(String, n); 320 p = curchar; 321 while (*p != '(') { 322 if (lexclass[*p] != WHITE) { 323 error("missing actuals for macro"); 324 } 325 ++p; 326 } 327 ++p; 328 for (i = 0; i < n - 1; i++) { 329 a[i] = p; 330 p = movetochar(p, ','); 331 *p = '\0'; 332 ++p; 333 } 334 a[n-1] = p; 335 p = movetochar(p, ')'); 336 *p = '\0'; 337 curchar = p + 1; 338 return a; 339 } 340 341 /* 342 * Do command macro expansion, assuming curchar points to the beginning 343 * of the actuals, and we are not in shell mode. 344 */ 345 346 private expand (pl, str) 347 List pl; 348 String str; 349 { 350 char buf[4096], namebuf[100]; 351 register char *p, *q, *r; 352 String *actual; 353 Name n; 354 integer i; 355 boolean match; 356 357 if (pl == nil) { 358 insertinput(str); 359 } else { 360 actual = getactuals(list_size(pl)); 361 p = buf; 362 q = str; 363 while (*q != '\0') { 364 if (p >= &buf[4096]) { 365 error("alias expansion too large"); 366 } 367 if (lexclass[*q] == ALPHA) { 368 r = namebuf; 369 do { 370 *r++ = *q++; 371 } while (isalnum(*q)); 372 *r = '\0'; 373 i = 0; 374 match = false; 375 foreach(Name, n, pl) 376 if (streq(ident(n), namebuf)) { 377 match = true; 378 break; 379 } 380 ++i; 381 endfor 382 if (match) { 383 r = actual[i]; 384 } else { 385 r = namebuf; 386 } 387 while (*r != '\0') { 388 *p++ = *r++; 389 } 390 } else { 391 *p++ = *q++; 392 } 393 } 394 *p = '\0'; 395 insertinput(buf); 396 } 397 } 398 399 /* 400 * Parser error handling. 401 */ 402 403 public yyerror(s) 404 String s; 405 { 406 register char *p; 407 register integer start; 408 409 if (streq(s, "syntax error")) { 410 beginerrmsg(); 411 p = prevchar; 412 start = p - &scanner_linebuf[0]; 413 if (p > &scanner_linebuf[0]) { 414 while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) { 415 --p; 416 } 417 } 418 fprintf(stderr, "%s", scanner_linebuf); 419 if (start != 0) { 420 fprintf(stderr, "%*c", start, ' '); 421 } 422 if (p == &scanner_linebuf[0]) { 423 fprintf(stderr, "^ unrecognized command"); 424 } else { 425 fprintf(stderr, "^ syntax error"); 426 } 427 enderrmsg(); 428 } else { 429 error(s); 430 } 431 } 432 433 /* 434 * Eat the current line. 435 */ 436 437 public gobble () 438 { 439 curchar = scanner_linebuf; 440 scanner_linebuf[0] = '\0'; 441 } 442 443 /* 444 * Scan an identifier. 445 * 446 * If chkalias is true, check first to see if it's an alias. 447 * Otherwise, check to see if it's a keyword. 448 */ 449 450 private Token getident (chkalias) 451 boolean chkalias; 452 { 453 char buf[1024]; 454 register char *p, *q; 455 register Token t; 456 List pl; 457 String str; 458 459 p = curchar; 460 q = buf; 461 if (shellmode) { 462 do { 463 *q++ = *p++; 464 } while (index(" \t\n!&<>*[]()'\"", *p) == nil); 465 } else { 466 do { 467 *q++ = *p++; 468 } while (isalnum(*p)); 469 } 470 curchar = p; 471 *q = '\0'; 472 yylval.y_name = identname(buf, false); 473 if (chkalias) { 474 if (findalias(yylval.y_name, &pl, &str)) { 475 expand(pl, str); 476 while (lexclass[*curchar] == WHITE) { 477 ++curchar; 478 } 479 if (pl == nil) { 480 t = getident(false); 481 } else { 482 t = getident(true); 483 } 484 } else if (shellmode) { 485 t = NAME; 486 } else { 487 t = findkeyword(yylval.y_name, NAME); 488 } 489 } else if (shellmode) { 490 t = NAME; 491 } else { 492 t = findkeyword(yylval.y_name, NAME); 493 } 494 return t; 495 } 496 497 /* 498 * Scan a number. 499 */ 500 501 private Token getnum() 502 { 503 char buf[1024]; 504 register Char *p, *q; 505 register Token t; 506 Integer base; 507 508 p = curchar; 509 q = buf; 510 if (*p == '0') { 511 if (*(p+1) == 'x') { 512 p += 2; 513 base = 16; 514 } else if (*(p+1) == 't') { 515 base = 10; 516 } else if (varIsSet("$hexin")) { 517 base = 16; 518 } else { 519 base = 8; 520 } 521 } else if (varIsSet("$hexin")) { 522 base = 16; 523 } else if (varIsSet("$octin")) { 524 base = 8; 525 } else { 526 base = 10; 527 } 528 if (base == 16) { 529 do { 530 *q++ = *p++; 531 } while (ishexdigit(*p)); 532 } else { 533 do { 534 *q++ = *p++; 535 } while (isdigit(*p)); 536 } 537 if (*p == '.') { 538 do { 539 *q++ = *p++; 540 } while (isdigit(*p)); 541 if (*p == 'e' or *p == 'E') { 542 p++; 543 if (*p == '+' or *p == '-' or isdigit(*p)) { 544 *q++ = 'e'; 545 do { 546 *q++ = *p++; 547 } while (isdigit(*p)); 548 } 549 } 550 *q = '\0'; 551 yylval.y_real = atof(buf); 552 t = REAL; 553 } else { 554 *q = '\0'; 555 switch (base) { 556 case 10: 557 yylval.y_int = atol(buf); 558 break; 559 560 case 8: 561 yylval.y_int = octal(buf); 562 break; 563 564 case 16: 565 yylval.y_int = hex(buf); 566 break; 567 568 default: 569 badcaseval(base); 570 } 571 t = INT; 572 } 573 curchar = p; 574 return t; 575 } 576 577 /* 578 * Convert a string of octal digits to an integer. 579 */ 580 581 private int octal(s) 582 String s; 583 { 584 register Char *p; 585 register Integer n; 586 587 n = 0; 588 for (p = s; *p != '\0'; p++) { 589 n = 8*n + (*p - '0'); 590 } 591 return n; 592 } 593 594 /* 595 * Convert a string of hexadecimal digits to an integer. 596 */ 597 598 private int hex(s) 599 String s; 600 { 601 register Char *p; 602 register Integer n; 603 604 n = 0; 605 for (p = s; *p != '\0'; p++) { 606 n *= 16; 607 if (*p >= 'a' and *p <= 'f') { 608 n += (*p - 'a' + 10); 609 } else if (*p >= 'A' and *p <= 'F') { 610 n += (*p - 'A' + 10); 611 } else { 612 n += (*p - '0'); 613 } 614 } 615 return n; 616 } 617 618 /* 619 * Scan a string. 620 */ 621 622 private Token getstring (quote) 623 char quote; 624 { 625 register char *p, *q; 626 char buf[MAXLINESIZE]; 627 boolean endofstring; 628 Token t; 629 630 p = curchar; 631 q = buf; 632 endofstring = false; 633 while (not endofstring) { 634 if (*p == '\\' and *(p+1) == '\n') { 635 if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) { 636 error("non-terminated string"); 637 } 638 p = &scanner_linebuf[0] - 1; 639 } else if (*p == '\n' or *p == '\0') { 640 error("non-terminated string"); 641 endofstring = true; 642 } else if (*p == quote) { 643 endofstring = true; 644 } else { 645 curchar = p; 646 *q++ = charcon(p); 647 p = curchar; 648 } 649 p++; 650 } 651 curchar = p; 652 *q = '\0'; 653 if (quote == '\'' and buf[1] == '\0') { 654 yylval.y_char = buf[0]; 655 t = CHAR; 656 } else { 657 yylval.y_string = strdup(buf); 658 t = STRING; 659 } 660 return t; 661 } 662 663 /* 664 * Process a character constant. 665 * Watch out for backslashes. 666 */ 667 668 private char charcon (s) 669 String s; 670 { 671 register char *p, *q; 672 char c, buf[10]; 673 674 p = s; 675 if (*p == '\\') { 676 ++p; 677 switch (*p) { 678 case '\\': 679 c = '\\'; 680 break; 681 682 case 'n': 683 c = '\n'; 684 break; 685 686 case 'r': 687 c = '\r'; 688 break; 689 690 case 't': 691 c = '\t'; 692 break; 693 694 case '\'': 695 case '"': 696 c = *p; 697 break; 698 699 default: 700 if (isdigit(*p)) { 701 q = buf; 702 do { 703 *q++ = *p++; 704 } while (isdigit(*p)); 705 *q = '\0'; 706 c = (char) octal(buf); 707 } 708 --p; 709 break; 710 } 711 curchar = p; 712 } else { 713 c = *p; 714 } 715 return c; 716 } 717 718 /* 719 * Input file management routines. 720 */ 721 722 public setinput(filename) 723 Filename filename; 724 { 725 File f; 726 727 f = fopen(filename, "r"); 728 if (f == nil) { 729 error("can't open %s", filename); 730 } else { 731 if (curinclindex >= MAXINCLDEPTH) { 732 error("unreasonable input nesting on \"%s\"", filename); 733 } 734 inclinfo[curinclindex].savefile = in; 735 inclinfo[curinclindex].savefn = errfilename; 736 inclinfo[curinclindex].savelineno = errlineno; 737 curinclindex++; 738 in = f; 739 errfilename = filename; 740 errlineno = 1; 741 } 742 } 743 744 private Boolean eofinput() 745 { 746 register Boolean b; 747 748 if (curinclindex == 0) { 749 if (isterm(in)) { 750 putchar('\n'); 751 clearerr(in); 752 b = false; 753 } else { 754 b = true; 755 } 756 } else { 757 fclose(in); 758 --curinclindex; 759 in = inclinfo[curinclindex].savefile; 760 errfilename = inclinfo[curinclindex].savefn; 761 errlineno = inclinfo[curinclindex].savelineno; 762 b = false; 763 } 764 return b; 765 } 766 767 /* 768 * Pop the current input. Return whether successful. 769 */ 770 771 public Boolean popinput() 772 { 773 Boolean b; 774 775 if (curinclindex == 0) { 776 b = false; 777 } else { 778 b = (Boolean) (not eofinput()); 779 } 780 return b; 781 } 782 783 /* 784 * Return whether we are currently reading from standard input. 785 */ 786 787 public Boolean isstdin() 788 { 789 return (Boolean) (in == stdin); 790 } 791 792 /* 793 * Send the current line to the shell. 794 */ 795 796 public shellline() 797 { 798 register char *p; 799 800 p = curchar; 801 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 802 ++p; 803 } 804 shell(p); 805 if (*p == '\0' and isterm(in)) { 806 putchar('\n'); 807 } 808 erecover(); 809 } 810 811 /* 812 * Read the rest of the current line in "shell mode". 813 */ 814 815 public beginshellmode() 816 { 817 shellmode = true; 818 } 819 820 /* 821 * Print out a token for debugging. 822 */ 823 824 public print_token(f, t) 825 File f; 826 Token t; 827 { 828 if (t == '\n') { 829 fprintf(f, "char '\\n'"); 830 } else if (t == EOF) { 831 fprintf(f, "EOF"); 832 } else if (t < 256) { 833 fprintf(f, "char '%c'", t); 834 } else { 835 fprintf(f, "\"%s\"", keywdstring(t)); 836 } 837 } 838