1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.2 12/15/82"; 4 5 /* 6 * Debugger scanner. 7 */ 8 9 #include "defs.h" 10 #include "scanner.h" 11 #include "main.h" 12 #include "keywords.h" 13 #include "tree.h" 14 #include "symbols.h" 15 #include "names.h" 16 #include "y.tab.h" 17 18 #ifndef public 19 typedef int Token; 20 #endif 21 22 public String initfile = ".dbxinit"; 23 24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 25 26 private Charclass class[256 + 1]; 27 private Charclass *lexclass = class + 1; 28 29 #define isdigit(c) (lexclass[c] == NUM) 30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 31 #define ishexdigit(c) ( \ 32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 33 ) 34 35 #define MAXLINESIZE 1024 36 37 private File in; 38 private Char linebuf[MAXLINESIZE]; 39 private Char *curchar; 40 41 #define MAXINCLDEPTH 10 42 43 private struct { 44 File savefile; 45 Filename savefn; 46 int savelineno; 47 } inclinfo[MAXINCLDEPTH]; 48 49 private unsigned int curinclindex; 50 51 private Boolean firsttoken = true; 52 private Boolean firstinit = true; 53 54 private Token getident(); 55 private Token getnum(); 56 private Token getstring(); 57 private Boolean eofinput(); 58 private Char charcon(); 59 private Char charlookup(); 60 61 private enterlexclass(class, s) 62 Charclass class; 63 String s; 64 { 65 register char *p; 66 67 for (p = s; *p != '\0'; p++) { 68 lexclass[*p] = class; 69 } 70 } 71 72 public scanner_init() 73 { 74 register Integer i; 75 76 for (i = 0; i < 257; i++) { 77 class[i] = OTHER; 78 } 79 enterlexclass(WHITE, " \t"); 80 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 81 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 82 enterlexclass(NUM, "0123456789"); 83 in = stdin; 84 errfilename = nil; 85 errlineno = 0; 86 curchar = linebuf; 87 linebuf[0] = '\0'; 88 if (runfirst) { 89 firstinit = false; 90 firsttoken = false; 91 } else if (firstinit and isterm(in)) { 92 firstinit = false; 93 printf("> "); 94 fflush(stdout); 95 } 96 } 97 98 /* 99 * Read a single token. 100 * 101 * Input is line buffered. 102 * 103 * There are two "modes" of operation: one as in a compiler, 104 * and one for reading shell-like syntax. 105 */ 106 107 private Boolean shellmode; 108 109 public Token yylex() 110 { 111 register int c; 112 register char *p; 113 register Token t; 114 String line; 115 116 p = curchar; 117 if (*p == '\0') { 118 do { 119 if (isterm(in)) { 120 if (firsttoken) { 121 firsttoken = false; 122 } else { 123 printf("> "); 124 fflush(stdout); 125 } 126 } 127 line = fgets(linebuf, MAXLINESIZE, in); 128 } while (line == nil and not eofinput()); 129 if (line == nil) { 130 c = EOF; 131 } else { 132 p = linebuf; 133 while (lexclass[*p] == WHITE) { 134 p++; 135 } 136 shellmode = false; 137 } 138 } else { 139 while (lexclass[*p] == WHITE) { 140 p++; 141 } 142 } 143 curchar = p; 144 c = *p; 145 if (lexclass[c] == ALPHA) { 146 t = getident(); 147 } else if (lexclass[c] == NUM) { 148 t = getnum(); 149 } else { 150 ++curchar; 151 switch (c) { 152 case '\n': 153 t = '\n'; 154 if (errlineno != 0) { 155 errlineno++; 156 } 157 break; 158 159 case '"': 160 case '\'': 161 t = getstring(); 162 break; 163 164 case '.': 165 if (shellmode) { 166 --curchar; 167 t = getident(); 168 } else if (isdigit(*curchar)) { 169 --curchar; 170 t = getnum(); 171 } else { 172 t = '.'; 173 } 174 break; 175 176 case '<': 177 if (not shellmode and *curchar == '<') { 178 ++curchar; 179 t = LFORMER; 180 } else { 181 t = '<'; 182 } 183 break; 184 185 case '>': 186 if (not shellmode and *curchar == '>') { 187 ++curchar; 188 t = RFORMER; 189 } else { 190 t = '>'; 191 } 192 break; 193 194 case '#': 195 if (*curchar == '^') { 196 ++curchar; 197 t = ABSTRACTION; 198 } else { 199 t = '#'; 200 } 201 break; 202 203 case '-': 204 if (shellmode) { 205 --curchar; 206 t = getident(); 207 } else if (*curchar == '>') { 208 ++curchar; 209 t = ARROW; 210 } else { 211 t = '-'; 212 } 213 break; 214 215 case EOF: 216 t = 0; 217 break; 218 219 default: 220 if (shellmode and index("!&*()[]", c) == nil) { 221 --curchar; 222 t = getident(); 223 } else { 224 t = c; 225 } 226 break; 227 } 228 } 229 # ifdef LEXDEBUG 230 if (lexdebug) { 231 fprintf(stderr, "yylex returns "); 232 print_token(stderr, t); 233 fprintf(stderr, "\n"); 234 } 235 # endif 236 return t; 237 } 238 239 /* 240 * Parser error handling. 241 */ 242 243 public yyerror(s) 244 String s; 245 { 246 register Char *p, *tokenbegin, *tokenend; 247 register Integer len; 248 249 if (streq(s, "syntax error")) { 250 beginerrmsg(); 251 tokenend = curchar - 1; 252 tokenbegin = tokenend; 253 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 254 --tokenbegin; 255 } 256 len = tokenend - tokenbegin + 1; 257 p = tokenbegin; 258 if (p > &linebuf[0]) { 259 while (lexclass[*p] == WHITE and p > &linebuf[0]) { 260 --p; 261 } 262 } 263 if (p == &linebuf[0]) { 264 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 265 } else { 266 fprintf(stderr, "syntax error"); 267 if (len != 0) { 268 fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 269 } 270 } 271 enderrmsg(); 272 } else { 273 error(s); 274 } 275 } 276 277 /* 278 * Eat the current line. 279 */ 280 281 public gobble() 282 { 283 curchar = linebuf; 284 linebuf[0] = '\0'; 285 } 286 287 /* 288 * Scan an identifier and check to see if it's a keyword. 289 */ 290 291 private Token getident() 292 { 293 char buf[256]; 294 register Char *p, *q; 295 register Token t; 296 297 p = curchar; 298 q = buf; 299 if (shellmode) { 300 do { 301 *q++ = *p++; 302 } while (index(" \t\n!&<>*[]()", *p) == nil); 303 } else { 304 do { 305 *q++ = *p++; 306 } while (isalnum(*p)); 307 } 308 curchar = p; 309 *q = '\0'; 310 yylval.y_name = identname(buf, false); 311 if (not shellmode) { 312 t = findkeyword(yylval.y_name); 313 if (t == nil) { 314 t = NAME; 315 } 316 } else { 317 t = NAME; 318 } 319 return t; 320 } 321 322 /* 323 * Scan a number. 324 */ 325 326 private Token getnum() 327 { 328 char buf[256]; 329 register Char *p, *q; 330 register Token t; 331 Integer base; 332 333 p = curchar; 334 q = buf; 335 if (*p == '0') { 336 if (*(p+1) == 'x') { 337 p += 2; 338 base = 16; 339 } else { 340 base = 8; 341 } 342 } else { 343 base = 10; 344 } 345 if (base == 16) { 346 do { 347 *q++ = *p++; 348 } while (ishexdigit(*p)); 349 } else { 350 do { 351 *q++ = *p++; 352 } while (isdigit(*p)); 353 } 354 if (*p == '.') { 355 do { 356 *q++ = *p++; 357 } while (isdigit(*p)); 358 if (*p == 'e' or *p == 'E') { 359 p++; 360 if (*p == '+' or *p == '-' or isdigit(*p)) { 361 *q++ = 'e'; 362 do { 363 *q++ = *p++; 364 } while (isdigit(*p)); 365 } 366 } 367 *q = '\0'; 368 yylval.y_real = atof(buf); 369 t = REAL; 370 } else { 371 *q = '\0'; 372 switch (base) { 373 case 10: 374 yylval.y_int = atol(buf); 375 break; 376 377 case 8: 378 yylval.y_int = octal(buf); 379 break; 380 381 case 16: 382 yylval.y_int = hex(buf); 383 break; 384 385 default: 386 badcaseval(base); 387 } 388 t = INT; 389 } 390 curchar = p; 391 return t; 392 } 393 394 /* 395 * Convert a string of octal digits to an integer. 396 */ 397 398 private int octal(s) 399 String s; 400 { 401 register Char *p; 402 register Integer n; 403 404 n = 0; 405 for (p = s; *p != '\0'; p++) { 406 n = 8*n + (*p - '0'); 407 } 408 return n; 409 } 410 411 /* 412 * Convert a string of hexadecimal digits to an integer. 413 */ 414 415 private int hex(s) 416 String s; 417 { 418 register Char *p; 419 register Integer n; 420 421 n = 0; 422 for (p = s; *p != '\0'; p++) { 423 n *= 16; 424 if (*p >= 'a' and *p <= 'f') { 425 n += (*p - 'a' + 10); 426 } else if (*p >= 'A' and *p <= 'F') { 427 n += (*p - 'A' + 10); 428 } else { 429 n += (*p - '0'); 430 } 431 } 432 return n; 433 } 434 435 /* 436 * Scan a string. 437 */ 438 439 private Token getstring() 440 { 441 char buf[256]; 442 register Char *p, *q; 443 Boolean endofstring; 444 445 p = curchar; 446 q = buf; 447 endofstring = false; 448 while (not endofstring) { 449 if (*p == '\n' or *p == '\0') { 450 error("non-terminated string"); 451 endofstring = true; 452 } else if (*p == '"') { 453 if (*(p+1) != '"') { 454 endofstring = true; 455 } else { 456 *q++ = *p; 457 } 458 } else { 459 *q++ = charcon(*p); 460 } 461 p++; 462 } 463 curchar = p; 464 *q = '\0'; 465 yylval.y_string = strdup(buf); 466 return STRING; 467 } 468 469 /* 470 * Process a character constant. 471 * Watch out for backslashes. 472 */ 473 474 private Char charcon(ch) 475 Char ch; 476 { 477 Char c, buf[10], *p, *q; 478 479 p = curchar; 480 if (ch == '\\') { 481 if (*p != '\\') { 482 q = buf; 483 do { 484 *q++ = *p++; 485 } while (*p != '\\' and *p != '\n' and *p != '\0'); 486 if (*p != '\\') { 487 ungetc(*p, in); 488 error("non-terminated character constant"); 489 } 490 *q = '\0'; 491 if (isdigit(buf[0])) { 492 c = (Char) octal(buf); 493 } else { 494 c = charlookup(buf); 495 } 496 curchar = p; 497 } else { 498 c = '\\'; 499 } 500 } else { 501 c = ch; 502 } 503 return c; 504 } 505 506 /* 507 * Do a lookup for a ASCII character name. 508 */ 509 510 private String ascii[] = { 511 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 512 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 513 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 514 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 515 "SP", nil 516 }; 517 518 private char charlookup(s) 519 String s; 520 { 521 register int i; 522 523 for (i = 0; ascii[i] != NULL; i++) { 524 if (streq(s, ascii[i])) { 525 return i; 526 } 527 } 528 if (streq(s, "DEL")) { 529 return 0177; 530 } 531 error("unknown ascii name \"%s\"", s); 532 return '?'; 533 } 534 535 /* 536 * Input file management routines. 537 */ 538 539 public setinput(filename) 540 Filename filename; 541 { 542 File f; 543 544 f = fopen(filename, "r"); 545 if (f == nil) { 546 error("can't open %s", filename); 547 } else { 548 if (curinclindex >= MAXINCLDEPTH) { 549 error("unreasonable input nesting on \"%s\"", filename); 550 } 551 inclinfo[curinclindex].savefile = in; 552 inclinfo[curinclindex].savefn = errfilename; 553 inclinfo[curinclindex].savelineno = errlineno; 554 curinclindex++; 555 in = f; 556 errfilename = filename; 557 errlineno = 1; 558 } 559 } 560 561 private Boolean eofinput() 562 { 563 register Boolean b; 564 565 if (curinclindex == 0) { 566 if (isterm(in)) { 567 putchar('\n'); 568 b = false; 569 } else { 570 b = true; 571 } 572 } else { 573 fclose(in); 574 --curinclindex; 575 in = inclinfo[curinclindex].savefile; 576 errfilename = inclinfo[curinclindex].savefn; 577 errlineno = inclinfo[curinclindex].savelineno; 578 b = false; 579 } 580 return b; 581 } 582 583 /* 584 * Pop the current input. Return whether successful. 585 */ 586 587 public Boolean popinput() 588 { 589 Boolean b; 590 591 if (curinclindex == 0) { 592 b = false; 593 } else { 594 b = (Boolean) (not eofinput()); 595 } 596 return b; 597 } 598 599 /* 600 * Return whether we are currently reading from standard input. 601 */ 602 603 public Boolean isstdin() 604 { 605 return (Boolean) (in == stdin); 606 } 607 608 /* 609 * Send the current line to the shell. 610 */ 611 612 public shellline() 613 { 614 register char *p; 615 616 p = curchar; 617 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 618 ++p; 619 } 620 shell(p); 621 if (*p == '\0' and isterm(in)) { 622 putchar('\n'); 623 } 624 erecover(); 625 } 626 627 /* 628 * Read the rest of the current line in "shell mode". 629 */ 630 631 public beginshellmode() 632 { 633 shellmode = true; 634 } 635 636 /* 637 * Print out a token for debugging. 638 */ 639 640 public print_token(f, t) 641 File f; 642 Token t; 643 { 644 if (t == '\n') { 645 fprintf(f, "char '\\n'"); 646 } else if (t == EOF) { 647 fprintf(f, "EOF"); 648 } else if (t < 256) { 649 fprintf(f, "char '%c'", t); 650 } else { 651 fprintf(f, "\"%s\"", keywdstring(t)); 652 } 653 } 654