1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.8 08/05/83"; 4 5 /* 6 * Debugger scanner. 7 */ 8 9 #include "defs.h" 10 #include "scanner.h" 11 #include "main.h" 12 #include "keywords.h" 13 #include "tree.h" 14 #include "symbols.h" 15 #include "names.h" 16 #include "y.tab.h" 17 18 #ifndef public 19 typedef int Token; 20 #endif 21 22 public String initfile = ".dbxinit"; 23 24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 25 26 private Charclass class[256 + 1]; 27 private Charclass *lexclass = class + 1; 28 29 #define isdigit(c) (lexclass[c] == NUM) 30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 31 #define ishexdigit(c) ( \ 32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 33 ) 34 35 #define MAXLINESIZE 1024 36 37 private File in; 38 private Char linebuf[MAXLINESIZE]; 39 private Char *curchar; 40 41 #define MAXINCLDEPTH 10 42 43 private struct { 44 File savefile; 45 Filename savefn; 46 int savelineno; 47 } inclinfo[MAXINCLDEPTH]; 48 49 private unsigned int curinclindex; 50 51 private Token getident(); 52 private Token getnum(); 53 private Token getstring(); 54 private Boolean eofinput(); 55 private Char charcon(); 56 private Char charlookup(); 57 58 private enterlexclass(class, s) 59 Charclass class; 60 String s; 61 { 62 register char *p; 63 64 for (p = s; *p != '\0'; p++) { 65 lexclass[*p] = class; 66 } 67 } 68 69 public scanner_init() 70 { 71 register Integer i; 72 73 for (i = 0; i < 257; i++) { 74 class[i] = OTHER; 75 } 76 enterlexclass(WHITE, " \t"); 77 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 78 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 79 enterlexclass(NUM, "0123456789"); 80 in = stdin; 81 errfilename = nil; 82 errlineno = 0; 83 curchar = linebuf; 84 linebuf[0] = '\0'; 85 } 86 87 /* 88 * Read a single token. 89 * 90 * Input is line buffered. 91 * 92 * There are two "modes" of operation: one as in a compiler, 93 * and one for reading shell-like syntax. 94 */ 95 96 private Boolean shellmode; 97 98 public Token yylex() 99 { 100 register int c; 101 register char *p; 102 register Token t; 103 String line; 104 105 p = curchar; 106 if (*p == '\0') { 107 do { 108 if (isterm(in)) { 109 printf("(%s) ", cmdname); 110 fflush(stdout); 111 } 112 line = fgets(linebuf, MAXLINESIZE, in); 113 } while (line == nil and not eofinput()); 114 if (line == nil) { 115 c = EOF; 116 } else { 117 p = linebuf; 118 while (lexclass[*p] == WHITE) { 119 p++; 120 } 121 shellmode = false; 122 } 123 } else { 124 while (lexclass[*p] == WHITE) { 125 p++; 126 } 127 } 128 curchar = p; 129 c = *p; 130 if (lexclass[c] == ALPHA) { 131 t = getident(); 132 } else if (lexclass[c] == NUM) { 133 if (shellmode) { 134 t = getident(); 135 } else { 136 t = getnum(); 137 } 138 } else { 139 ++curchar; 140 switch (c) { 141 case '\n': 142 t = '\n'; 143 if (errlineno != 0) { 144 errlineno++; 145 } 146 break; 147 148 case '"': 149 case '\'': 150 t = getstring(); 151 break; 152 153 case '.': 154 if (shellmode) { 155 --curchar; 156 t = getident(); 157 } else if (isdigit(*curchar)) { 158 --curchar; 159 t = getnum(); 160 } else { 161 t = '.'; 162 } 163 break; 164 165 case '<': 166 if (not shellmode and *curchar == '<') { 167 ++curchar; 168 t = LFORMER; 169 } else { 170 t = '<'; 171 } 172 break; 173 174 case '>': 175 if (not shellmode and *curchar == '>') { 176 ++curchar; 177 t = RFORMER; 178 } else { 179 t = '>'; 180 } 181 break; 182 183 case '#': 184 if (*curchar == '^') { 185 ++curchar; 186 t = ABSTRACTION; 187 } else { 188 t = '#'; 189 } 190 break; 191 192 case '-': 193 if (shellmode) { 194 --curchar; 195 t = getident(); 196 } else if (*curchar == '>') { 197 ++curchar; 198 t = ARROW; 199 } else { 200 t = '-'; 201 } 202 break; 203 204 case EOF: 205 t = 0; 206 break; 207 208 default: 209 if (shellmode and index("!&*()[]", c) == nil) { 210 --curchar; 211 t = getident(); 212 } else { 213 t = c; 214 } 215 break; 216 } 217 } 218 # ifdef LEXDEBUG 219 if (lexdebug) { 220 fprintf(stderr, "yylex returns "); 221 print_token(stderr, t); 222 fprintf(stderr, "\n"); 223 } 224 # endif 225 return t; 226 } 227 228 /* 229 * Parser error handling. 230 */ 231 232 public yyerror(s) 233 String s; 234 { 235 register Char *p, *tokenbegin, *tokenend; 236 register Integer len; 237 238 if (streq(s, "syntax error")) { 239 beginerrmsg(); 240 tokenend = curchar - 1; 241 tokenbegin = tokenend; 242 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 243 --tokenbegin; 244 } 245 len = tokenend - tokenbegin + 1; 246 p = tokenbegin; 247 if (p > &linebuf[0]) { 248 while (lexclass[*p] == WHITE and p > &linebuf[0]) { 249 --p; 250 } 251 } 252 if (p == &linebuf[0]) { 253 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 254 } else { 255 fprintf(stderr, "syntax error"); 256 if (len != 0) { 257 fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 258 } 259 } 260 enderrmsg(); 261 } else { 262 error(s); 263 } 264 } 265 266 /* 267 * Eat the current line. 268 */ 269 270 public gobble() 271 { 272 curchar = linebuf; 273 linebuf[0] = '\0'; 274 } 275 276 /* 277 * Scan an identifier and check to see if it's a keyword. 278 */ 279 280 private Token getident() 281 { 282 char buf[256]; 283 register Char *p, *q; 284 register Token t; 285 286 p = curchar; 287 q = buf; 288 if (shellmode) { 289 do { 290 *q++ = *p++; 291 } while (index(" \t\n!&<>*[]()", *p) == nil); 292 } else { 293 do { 294 *q++ = *p++; 295 } while (isalnum(*p)); 296 } 297 curchar = p; 298 *q = '\0'; 299 yylval.y_name = identname(buf, false); 300 if (not shellmode) { 301 t = findkeyword(yylval.y_name); 302 if (t == nil) { 303 t = NAME; 304 } 305 } else { 306 t = NAME; 307 } 308 return t; 309 } 310 311 /* 312 * Scan a number. 313 */ 314 315 private Token getnum() 316 { 317 char buf[256]; 318 register Char *p, *q; 319 register Token t; 320 Integer base; 321 322 p = curchar; 323 q = buf; 324 if (*p == '0') { 325 if (*(p+1) == 'x') { 326 p += 2; 327 base = 16; 328 } else { 329 base = 8; 330 } 331 } else { 332 base = 10; 333 } 334 if (base == 16) { 335 do { 336 *q++ = *p++; 337 } while (ishexdigit(*p)); 338 } else { 339 do { 340 *q++ = *p++; 341 } while (isdigit(*p)); 342 } 343 if (*p == '.') { 344 do { 345 *q++ = *p++; 346 } while (isdigit(*p)); 347 if (*p == 'e' or *p == 'E') { 348 p++; 349 if (*p == '+' or *p == '-' or isdigit(*p)) { 350 *q++ = 'e'; 351 do { 352 *q++ = *p++; 353 } while (isdigit(*p)); 354 } 355 } 356 *q = '\0'; 357 yylval.y_real = atof(buf); 358 t = REAL; 359 } else { 360 *q = '\0'; 361 switch (base) { 362 case 10: 363 yylval.y_int = atol(buf); 364 break; 365 366 case 8: 367 yylval.y_int = octal(buf); 368 break; 369 370 case 16: 371 yylval.y_int = hex(buf); 372 break; 373 374 default: 375 badcaseval(base); 376 } 377 t = INT; 378 } 379 curchar = p; 380 return t; 381 } 382 383 /* 384 * Convert a string of octal digits to an integer. 385 */ 386 387 private int octal(s) 388 String s; 389 { 390 register Char *p; 391 register Integer n; 392 393 n = 0; 394 for (p = s; *p != '\0'; p++) { 395 n = 8*n + (*p - '0'); 396 } 397 return n; 398 } 399 400 /* 401 * Convert a string of hexadecimal digits to an integer. 402 */ 403 404 private int hex(s) 405 String s; 406 { 407 register Char *p; 408 register Integer n; 409 410 n = 0; 411 for (p = s; *p != '\0'; p++) { 412 n *= 16; 413 if (*p >= 'a' and *p <= 'f') { 414 n += (*p - 'a' + 10); 415 } else if (*p >= 'A' and *p <= 'F') { 416 n += (*p - 'A' + 10); 417 } else { 418 n += (*p - '0'); 419 } 420 } 421 return n; 422 } 423 424 /* 425 * Scan a string. 426 */ 427 428 private Token getstring() 429 { 430 char buf[256]; 431 register Char *p, *q; 432 Boolean endofstring; 433 434 p = curchar; 435 q = buf; 436 endofstring = false; 437 while (not endofstring) { 438 if (*p == '\n' or *p == '\0') { 439 error("non-terminated string"); 440 endofstring = true; 441 } else if (*p == '"' or *p == '\'') { 442 if (*(p+1) != *p) { 443 endofstring = true; 444 } else { 445 *q++ = *p; 446 } 447 } else { 448 curchar = p; 449 *q++ = charcon(p); 450 p = curchar; 451 } 452 p++; 453 } 454 curchar = p; 455 *q = '\0'; 456 yylval.y_string = strdup(buf); 457 return STRING; 458 } 459 460 /* 461 * Process a character constant. 462 * Watch out for backslashes. 463 */ 464 465 private Char charcon(p) 466 char *p; 467 { 468 char c, buf[10], *q; 469 470 if (*p == '\\') { 471 ++p; 472 if (*p != '\\') { 473 q = buf; 474 do { 475 *q++ = *p++; 476 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 477 *q = '\0'; 478 if (isdigit(buf[0])) { 479 c = (Char) octal(buf); 480 } else { 481 c = charlookup(buf); 482 } 483 curchar = p - 1; 484 } else { 485 c = '\\'; 486 } 487 } else { 488 c = *p; 489 } 490 return c; 491 } 492 493 /* 494 * Do a lookup for a ASCII character name. 495 */ 496 497 private String ascii[] = { 498 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 499 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 500 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 501 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 502 "SP", nil 503 }; 504 505 private char charlookup(s) 506 String s; 507 { 508 register int i; 509 510 for (i = 0; ascii[i] != NULL; i++) { 511 if (streq(s, ascii[i])) { 512 return i; 513 } 514 } 515 if (streq(s, "DEL")) { 516 return 0177; 517 } 518 error("unknown ascii name \"%s\"", s); 519 return '?'; 520 } 521 522 /* 523 * Input file management routines. 524 */ 525 526 public setinput(filename) 527 Filename filename; 528 { 529 File f; 530 531 f = fopen(filename, "r"); 532 if (f == nil) { 533 error("can't open %s", filename); 534 } else { 535 if (curinclindex >= MAXINCLDEPTH) { 536 error("unreasonable input nesting on \"%s\"", filename); 537 } 538 inclinfo[curinclindex].savefile = in; 539 inclinfo[curinclindex].savefn = errfilename; 540 inclinfo[curinclindex].savelineno = errlineno; 541 curinclindex++; 542 in = f; 543 errfilename = filename; 544 errlineno = 1; 545 } 546 } 547 548 private Boolean eofinput() 549 { 550 register Boolean b; 551 552 if (curinclindex == 0) { 553 if (isterm(in)) { 554 putchar('\n'); 555 clearerr(in); 556 b = false; 557 } else { 558 b = true; 559 } 560 } else { 561 fclose(in); 562 --curinclindex; 563 in = inclinfo[curinclindex].savefile; 564 errfilename = inclinfo[curinclindex].savefn; 565 errlineno = inclinfo[curinclindex].savelineno; 566 b = false; 567 } 568 return b; 569 } 570 571 /* 572 * Pop the current input. Return whether successful. 573 */ 574 575 public Boolean popinput() 576 { 577 Boolean b; 578 579 if (curinclindex == 0) { 580 b = false; 581 } else { 582 b = (Boolean) (not eofinput()); 583 } 584 return b; 585 } 586 587 /* 588 * Return whether we are currently reading from standard input. 589 */ 590 591 public Boolean isstdin() 592 { 593 return (Boolean) (in == stdin); 594 } 595 596 /* 597 * Send the current line to the shell. 598 */ 599 600 public shellline() 601 { 602 register char *p; 603 604 p = curchar; 605 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 606 ++p; 607 } 608 shell(p); 609 if (*p == '\0' and isterm(in)) { 610 putchar('\n'); 611 } 612 erecover(); 613 } 614 615 /* 616 * Read the rest of the current line in "shell mode". 617 */ 618 619 public beginshellmode() 620 { 621 shellmode = true; 622 } 623 624 /* 625 * Print out a token for debugging. 626 */ 627 628 public print_token(f, t) 629 File f; 630 Token t; 631 { 632 if (t == '\n') { 633 fprintf(f, "char '\\n'"); 634 } else if (t == EOF) { 635 fprintf(f, "EOF"); 636 } else if (t < 256) { 637 fprintf(f, "char '%c'", t); 638 } else { 639 fprintf(f, "\"%s\"", keywdstring(t)); 640 } 641 } 642