1 /* 2 * Copyright (c) 1983 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 */ 6 7 #ifndef lint 8 static char sccsid[] = "@(#)scanner.c 5.1 (Berkeley) 05/31/85"; 9 #endif not lint 10 11 static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $"; 12 13 /* 14 * Debugger scanner. 15 */ 16 17 #include "defs.h" 18 #include "scanner.h" 19 #include "main.h" 20 #include "keywords.h" 21 #include "tree.h" 22 #include "symbols.h" 23 #include "names.h" 24 #include "y.tab.h" 25 26 #ifndef public 27 typedef int Token; 28 29 #define MAXLINESIZE 10240 30 31 #endif 32 33 public String initfile = ".dbxinit"; 34 35 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 36 37 private Charclass class[256 + 1]; 38 private Charclass *lexclass = class + 1; 39 40 #define isdigit(c) (lexclass[c] == NUM) 41 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 42 #define ishexdigit(c) ( \ 43 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 44 ) 45 46 public boolean chkalias; 47 public char scanner_linebuf[MAXLINESIZE]; 48 49 private File in; 50 private char *curchar, *prevchar; 51 52 #define MAXINCLDEPTH 10 53 54 private struct { 55 File savefile; 56 Filename savefn; 57 int savelineno; 58 } inclinfo[MAXINCLDEPTH]; 59 60 private unsigned int curinclindex; 61 62 private Token getident(); 63 private Token getnum(); 64 private Token getstring(); 65 private Boolean eofinput(); 66 private char charcon(); 67 68 private enterlexclass(class, s) 69 Charclass class; 70 String s; 71 { 72 register char *p; 73 74 for (p = s; *p != '\0'; p++) { 75 lexclass[*p] = class; 76 } 77 } 78 79 public scanner_init() 80 { 81 register Integer i; 82 83 for (i = 0; i < 257; i++) { 84 class[i] = OTHER; 85 } 86 enterlexclass(WHITE, " \t"); 87 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 88 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 89 enterlexclass(NUM, "0123456789"); 90 in = stdin; 91 errfilename = nil; 92 errlineno = 0; 93 curchar = scanner_linebuf; 94 scanner_linebuf[0] = '\0'; 95 chkalias = true; 96 } 97 98 /* 99 * Read a single token. 100 * 101 * The input is line buffered. Tokens cannot cross line boundaries. 102 * 103 * There are two "modes" of operation: one as in a compiler, 104 * and one for reading shell-like syntax. In the first mode 105 * there is the additional choice of doing alias processing. 106 */ 107 108 private Boolean shellmode; 109 110 public Token yylex() 111 { 112 register int c; 113 register char *p; 114 register Token t; 115 String line; 116 integer n; 117 118 p = curchar; 119 if (*p == '\0') { 120 do { 121 if (isterm(in)) { 122 printf("(%s) ", cmdname); 123 } 124 fflush(stdout); 125 line = fgets(scanner_linebuf, MAXLINESIZE, in); 126 } while (line == nil and not eofinput()); 127 if (line == nil) { 128 c = EOF; 129 } else { 130 p = scanner_linebuf; 131 while (lexclass[*p] == WHITE) { 132 p++; 133 } 134 shellmode = false; 135 } 136 chkalias = true; 137 } else { 138 while (lexclass[*p] == WHITE) { 139 p++; 140 } 141 } 142 curchar = p; 143 prevchar = curchar; 144 c = *p; 145 if (lexclass[c] == ALPHA) { 146 t = getident(chkalias); 147 } else if (lexclass[c] == NUM) { 148 if (shellmode) { 149 t = getident(chkalias); 150 } else { 151 t = getnum(); 152 } 153 } else { 154 ++curchar; 155 switch (c) { 156 case '\n': 157 t = '\n'; 158 if (errlineno != 0) { 159 errlineno++; 160 } 161 break; 162 163 case '"': 164 case '\'': 165 t = getstring(c); 166 break; 167 168 case '.': 169 if (shellmode) { 170 --curchar; 171 t = getident(chkalias); 172 } else if (isdigit(*curchar)) { 173 --curchar; 174 t = getnum(); 175 } else { 176 t = '.'; 177 } 178 break; 179 180 case '-': 181 if (shellmode) { 182 --curchar; 183 t = getident(chkalias); 184 } else if (*curchar == '>') { 185 ++curchar; 186 t = ARROW; 187 } else { 188 t = '-'; 189 } 190 break; 191 192 case '#': 193 if (not isterm(in)) { 194 *p = '\0'; 195 curchar = p; 196 t = '\n'; 197 ++errlineno; 198 } else { 199 t = '#'; 200 } 201 break; 202 203 case '\\': 204 if (*(p+1) == '\n') { 205 n = MAXLINESIZE - (p - &scanner_linebuf[0]); 206 if (n > 1) { 207 if (fgets(p, n, in) == nil) { 208 t = 0; 209 } else { 210 curchar = p; 211 t = yylex(); 212 } 213 } else { 214 t = '\\'; 215 } 216 } else { 217 t = '\\'; 218 } 219 break; 220 221 case EOF: 222 t = 0; 223 break; 224 225 default: 226 if (shellmode and index("!&*<>()[]", c) == nil) { 227 --curchar; 228 t = getident(chkalias); 229 } else { 230 t = c; 231 } 232 break; 233 } 234 } 235 chkalias = false; 236 # ifdef LEXDEBUG 237 if (lexdebug) { 238 fprintf(stderr, "yylex returns "); 239 print_token(stderr, t); 240 fprintf(stderr, "\n"); 241 } 242 # endif 243 return t; 244 } 245 246 /* 247 * Put the given string before the current character 248 * in the current line, thus inserting it into the input stream. 249 */ 250 251 public insertinput (s) 252 String s; 253 { 254 register char *p, *q; 255 int need, avail, shift; 256 257 q = s; 258 need = strlen(q); 259 avail = curchar - &scanner_linebuf[0]; 260 if (need <= avail) { 261 curchar = &scanner_linebuf[avail - need]; 262 p = curchar; 263 while (*q != '\0') { 264 *p++ = *q++; 265 } 266 } else { 267 p = curchar; 268 while (*p != '\0') { 269 ++p; 270 } 271 shift = need - avail; 272 if (p + shift >= &scanner_linebuf[MAXLINESIZE]) { 273 error("alias expansion too large"); 274 } 275 for (;;) { 276 *(p + shift) = *p; 277 if (p == curchar) { 278 break; 279 } 280 --p; 281 } 282 p = &scanner_linebuf[0]; 283 while (*q != '\0') { 284 *p++ = *q++; 285 } 286 curchar = &scanner_linebuf[0]; 287 } 288 } 289 290 /* 291 * Get the actuals for a macro call. 292 */ 293 294 private String movetochar (str, c) 295 String str; 296 char c; 297 { 298 register char *p; 299 300 while (*p != c) { 301 if (*p == '\0') { 302 error("missing ')' in macro call"); 303 } else if (*p == ')') { 304 error("not enough parameters in macro call"); 305 } else if (*p == ',') { 306 error("too many parameters in macro call"); 307 } 308 ++p; 309 } 310 return p; 311 } 312 313 private String *getactuals (n) 314 integer n; 315 { 316 String *a; 317 register char *p; 318 int i; 319 320 a = newarr(String, n); 321 p = curchar; 322 while (*p != '(') { 323 if (lexclass[*p] != WHITE) { 324 error("missing actuals for macro"); 325 } 326 ++p; 327 } 328 ++p; 329 for (i = 0; i < n - 1; i++) { 330 a[i] = p; 331 p = movetochar(p, ','); 332 *p = '\0'; 333 ++p; 334 } 335 a[n-1] = p; 336 p = movetochar(p, ')'); 337 *p = '\0'; 338 curchar = p + 1; 339 return a; 340 } 341 342 /* 343 * Do command macro expansion, assuming curchar points to the beginning 344 * of the actuals, and we are not in shell mode. 345 */ 346 347 private expand (pl, str) 348 List pl; 349 String str; 350 { 351 char buf[4096], namebuf[100]; 352 register char *p, *q, *r; 353 String *actual; 354 Name n; 355 integer i; 356 boolean match; 357 358 if (pl == nil) { 359 insertinput(str); 360 } else { 361 actual = getactuals(list_size(pl)); 362 p = buf; 363 q = str; 364 while (*q != '\0') { 365 if (p >= &buf[4096]) { 366 error("alias expansion too large"); 367 } 368 if (lexclass[*q] == ALPHA) { 369 r = namebuf; 370 do { 371 *r++ = *q++; 372 } while (isalnum(*q)); 373 *r = '\0'; 374 i = 0; 375 match = false; 376 foreach(Name, n, pl) 377 if (streq(ident(n), namebuf)) { 378 match = true; 379 break; 380 } 381 ++i; 382 endfor 383 if (match) { 384 r = actual[i]; 385 } else { 386 r = namebuf; 387 } 388 while (*r != '\0') { 389 *p++ = *r++; 390 } 391 } else { 392 *p++ = *q++; 393 } 394 } 395 *p = '\0'; 396 insertinput(buf); 397 } 398 } 399 400 /* 401 * Parser error handling. 402 */ 403 404 public yyerror(s) 405 String s; 406 { 407 register char *p; 408 register integer start; 409 410 if (streq(s, "syntax error")) { 411 beginerrmsg(); 412 p = prevchar; 413 start = p - &scanner_linebuf[0]; 414 if (p > &scanner_linebuf[0]) { 415 while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) { 416 --p; 417 } 418 } 419 fprintf(stderr, "%s", scanner_linebuf); 420 if (start != 0) { 421 fprintf(stderr, "%*c", start, ' '); 422 } 423 if (p == &scanner_linebuf[0]) { 424 fprintf(stderr, "^ unrecognized command"); 425 } else { 426 fprintf(stderr, "^ syntax error"); 427 } 428 enderrmsg(); 429 } else { 430 error(s); 431 } 432 } 433 434 /* 435 * Eat the current line. 436 */ 437 438 public gobble () 439 { 440 curchar = scanner_linebuf; 441 scanner_linebuf[0] = '\0'; 442 } 443 444 /* 445 * Scan an identifier. 446 * 447 * If chkalias is true, check first to see if it's an alias. 448 * Otherwise, check to see if it's a keyword. 449 */ 450 451 private Token getident (chkalias) 452 boolean chkalias; 453 { 454 char buf[1024]; 455 register char *p, *q; 456 register Token t; 457 List pl; 458 String str; 459 460 p = curchar; 461 q = buf; 462 if (shellmode) { 463 do { 464 *q++ = *p++; 465 } while (index(" \t\n!&<>*[]()'\"", *p) == nil); 466 } else { 467 do { 468 *q++ = *p++; 469 } while (isalnum(*p)); 470 } 471 curchar = p; 472 *q = '\0'; 473 yylval.y_name = identname(buf, false); 474 if (chkalias) { 475 if (findalias(yylval.y_name, &pl, &str)) { 476 expand(pl, str); 477 while (lexclass[*curchar] == WHITE) { 478 ++curchar; 479 } 480 if (pl == nil) { 481 t = getident(false); 482 } else { 483 t = getident(true); 484 } 485 } else if (shellmode) { 486 t = NAME; 487 } else { 488 t = findkeyword(yylval.y_name, NAME); 489 } 490 } else if (shellmode) { 491 t = NAME; 492 } else { 493 t = findkeyword(yylval.y_name, NAME); 494 } 495 return t; 496 } 497 498 /* 499 * Scan a number. 500 */ 501 502 private Token getnum() 503 { 504 char buf[1024]; 505 register Char *p, *q; 506 register Token t; 507 Integer base; 508 509 p = curchar; 510 q = buf; 511 if (*p == '0') { 512 if (*(p+1) == 'x') { 513 p += 2; 514 base = 16; 515 } else if (*(p+1) == 't') { 516 base = 10; 517 } else if (varIsSet("$hexin")) { 518 base = 16; 519 } else { 520 base = 8; 521 } 522 } else if (varIsSet("$hexin")) { 523 base = 16; 524 } else if (varIsSet("$octin")) { 525 base = 8; 526 } else { 527 base = 10; 528 } 529 if (base == 16) { 530 do { 531 *q++ = *p++; 532 } while (ishexdigit(*p)); 533 } else { 534 do { 535 *q++ = *p++; 536 } while (isdigit(*p)); 537 } 538 if (*p == '.') { 539 do { 540 *q++ = *p++; 541 } while (isdigit(*p)); 542 if (*p == 'e' or *p == 'E') { 543 p++; 544 if (*p == '+' or *p == '-' or isdigit(*p)) { 545 *q++ = 'e'; 546 do { 547 *q++ = *p++; 548 } while (isdigit(*p)); 549 } 550 } 551 *q = '\0'; 552 yylval.y_real = atof(buf); 553 t = REAL; 554 } else { 555 *q = '\0'; 556 switch (base) { 557 case 10: 558 yylval.y_int = atol(buf); 559 break; 560 561 case 8: 562 yylval.y_int = octal(buf); 563 break; 564 565 case 16: 566 yylval.y_int = hex(buf); 567 break; 568 569 default: 570 badcaseval(base); 571 } 572 t = INT; 573 } 574 curchar = p; 575 return t; 576 } 577 578 /* 579 * Convert a string of octal digits to an integer. 580 */ 581 582 private int octal(s) 583 String s; 584 { 585 register Char *p; 586 register Integer n; 587 588 n = 0; 589 for (p = s; *p != '\0'; p++) { 590 n = 8*n + (*p - '0'); 591 } 592 return n; 593 } 594 595 /* 596 * Convert a string of hexadecimal digits to an integer. 597 */ 598 599 private int hex(s) 600 String s; 601 { 602 register Char *p; 603 register Integer n; 604 605 n = 0; 606 for (p = s; *p != '\0'; p++) { 607 n *= 16; 608 if (*p >= 'a' and *p <= 'f') { 609 n += (*p - 'a' + 10); 610 } else if (*p >= 'A' and *p <= 'F') { 611 n += (*p - 'A' + 10); 612 } else { 613 n += (*p - '0'); 614 } 615 } 616 return n; 617 } 618 619 /* 620 * Scan a string. 621 */ 622 623 private Token getstring (quote) 624 char quote; 625 { 626 register char *p, *q; 627 char buf[MAXLINESIZE]; 628 boolean endofstring; 629 Token t; 630 631 p = curchar; 632 q = buf; 633 endofstring = false; 634 while (not endofstring) { 635 if (*p == '\\' and *(p+1) == '\n') { 636 if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) { 637 error("non-terminated string"); 638 } 639 p = &scanner_linebuf[0] - 1; 640 } else if (*p == '\n' or *p == '\0') { 641 error("non-terminated string"); 642 endofstring = true; 643 } else if (*p == quote) { 644 endofstring = true; 645 } else { 646 curchar = p; 647 *q++ = charcon(p); 648 p = curchar; 649 } 650 p++; 651 } 652 curchar = p; 653 *q = '\0'; 654 if (quote == '\'' and buf[1] == '\0') { 655 yylval.y_char = buf[0]; 656 t = CHAR; 657 } else { 658 yylval.y_string = strdup(buf); 659 t = STRING; 660 } 661 return t; 662 } 663 664 /* 665 * Process a character constant. 666 * Watch out for backslashes. 667 */ 668 669 private char charcon (s) 670 String s; 671 { 672 register char *p, *q; 673 char c, buf[10]; 674 675 p = s; 676 if (*p == '\\') { 677 ++p; 678 switch (*p) { 679 case '\\': 680 c = '\\'; 681 break; 682 683 case 'n': 684 c = '\n'; 685 break; 686 687 case 'r': 688 c = '\r'; 689 break; 690 691 case 't': 692 c = '\t'; 693 break; 694 695 case '\'': 696 case '"': 697 c = *p; 698 break; 699 700 default: 701 if (isdigit(*p)) { 702 q = buf; 703 do { 704 *q++ = *p++; 705 } while (isdigit(*p)); 706 *q = '\0'; 707 c = (char) octal(buf); 708 } 709 --p; 710 break; 711 } 712 curchar = p; 713 } else { 714 c = *p; 715 } 716 return c; 717 } 718 719 /* 720 * Input file management routines. 721 */ 722 723 public setinput(filename) 724 Filename filename; 725 { 726 File f; 727 728 f = fopen(filename, "r"); 729 if (f == nil) { 730 error("can't open %s", filename); 731 } else { 732 if (curinclindex >= MAXINCLDEPTH) { 733 error("unreasonable input nesting on \"%s\"", filename); 734 } 735 inclinfo[curinclindex].savefile = in; 736 inclinfo[curinclindex].savefn = errfilename; 737 inclinfo[curinclindex].savelineno = errlineno; 738 curinclindex++; 739 in = f; 740 errfilename = filename; 741 errlineno = 1; 742 } 743 } 744 745 private Boolean eofinput() 746 { 747 register Boolean b; 748 749 if (curinclindex == 0) { 750 if (isterm(in)) { 751 putchar('\n'); 752 clearerr(in); 753 b = false; 754 } else { 755 b = true; 756 } 757 } else { 758 fclose(in); 759 --curinclindex; 760 in = inclinfo[curinclindex].savefile; 761 errfilename = inclinfo[curinclindex].savefn; 762 errlineno = inclinfo[curinclindex].savelineno; 763 b = false; 764 } 765 return b; 766 } 767 768 /* 769 * Pop the current input. Return whether successful. 770 */ 771 772 public Boolean popinput() 773 { 774 Boolean b; 775 776 if (curinclindex == 0) { 777 b = false; 778 } else { 779 b = (Boolean) (not eofinput()); 780 } 781 return b; 782 } 783 784 /* 785 * Return whether we are currently reading from standard input. 786 */ 787 788 public Boolean isstdin() 789 { 790 return (Boolean) (in == stdin); 791 } 792 793 /* 794 * Send the current line to the shell. 795 */ 796 797 public shellline() 798 { 799 register char *p; 800 801 p = curchar; 802 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 803 ++p; 804 } 805 shell(p); 806 if (*p == '\0' and isterm(in)) { 807 putchar('\n'); 808 } 809 erecover(); 810 } 811 812 /* 813 * Read the rest of the current line in "shell mode". 814 */ 815 816 public beginshellmode() 817 { 818 shellmode = true; 819 } 820 821 /* 822 * Print out a token for debugging. 823 */ 824 825 public print_token(f, t) 826 File f; 827 Token t; 828 { 829 if (t == '\n') { 830 fprintf(f, "char '\\n'"); 831 } else if (t == EOF) { 832 fprintf(f, "EOF"); 833 } else if (t < 256) { 834 fprintf(f, "char '%c'", t); 835 } else { 836 fprintf(f, "\"%s\"", keywdstring(t)); 837 } 838 } 839