1 /*
2 * Copyright (c) 1983 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 */
7
8 #ifndef lint
9 static char sccsid[] = "@(#)scanner.c 5.3 (Berkeley) 06/01/90";
10 #endif /* not lint */
11
12 /*
13 * Debugger scanner.
14 */
15
16 #include "defs.h"
17 #include "scanner.h"
18 #include "main.h"
19 #include "keywords.h"
20 #include "tree.h"
21 #include "symbols.h"
22 #include "names.h"
23 #include "y.tab.h"
24
25 #ifndef public
26 typedef int Token;
27
28 #define MAXLINESIZE 10240
29
30 #endif
31
32 public String initfile = ".dbxinit";
33
34 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
35
36 private Charclass class[256 + 1];
37 private Charclass *lexclass = class + 1;
38
39 #define isdigit(c) (lexclass[c] == NUM)
40 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
41 #define ishexdigit(c) ( \
42 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
43 )
44
45 public boolean chkalias;
46 public char scanner_linebuf[MAXLINESIZE];
47
48 private File in;
49 private char *curchar, *prevchar;
50
51 #define MAXINCLDEPTH 10
52
53 private struct {
54 File savefile;
55 Filename savefn;
56 int savelineno;
57 } inclinfo[MAXINCLDEPTH];
58
59 private unsigned int curinclindex;
60
61 private Token getident();
62 private Token getnum();
63 private Token getstring();
64 private Boolean eofinput();
65 private char charcon();
66
enterlexclass(class,s)67 private enterlexclass(class, s)
68 Charclass class;
69 String s;
70 {
71 register char *p;
72
73 for (p = s; *p != '\0'; p++) {
74 lexclass[*p] = class;
75 }
76 }
77
scanner_init()78 public scanner_init()
79 {
80 register Integer i;
81
82 for (i = 0; i < 257; i++) {
83 class[i] = OTHER;
84 }
85 enterlexclass(WHITE, " \t");
86 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
87 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
88 enterlexclass(NUM, "0123456789");
89 in = stdin;
90 errfilename = nil;
91 errlineno = 0;
92 curchar = scanner_linebuf;
93 scanner_linebuf[0] = '\0';
94 chkalias = true;
95 }
96
97 /*
98 * Read a single token.
99 *
100 * The input is line buffered. Tokens cannot cross line boundaries.
101 *
102 * There are two "modes" of operation: one as in a compiler,
103 * and one for reading shell-like syntax. In the first mode
104 * there is the additional choice of doing alias processing.
105 */
106
107 private Boolean shellmode;
108
yylex()109 public Token yylex()
110 {
111 register int c;
112 register char *p;
113 register Token t;
114 String line;
115 integer n;
116
117 p = curchar;
118 if (*p == '\0') {
119 do {
120 if (isterm(in)) {
121 printf("(%s) ", cmdname);
122 }
123 fflush(stdout);
124 line = fgets(scanner_linebuf, MAXLINESIZE, in);
125 } while (line == nil and not eofinput());
126 if (line == nil) {
127 c = EOF;
128 } else {
129 p = scanner_linebuf;
130 while (lexclass[*p] == WHITE) {
131 p++;
132 }
133 shellmode = false;
134 }
135 chkalias = true;
136 } else {
137 while (lexclass[*p] == WHITE) {
138 p++;
139 }
140 }
141 curchar = p;
142 prevchar = curchar;
143 c = *p;
144 if (lexclass[c] == ALPHA) {
145 t = getident(chkalias);
146 } else if (lexclass[c] == NUM) {
147 if (shellmode) {
148 t = getident(chkalias);
149 } else {
150 t = getnum();
151 }
152 } else {
153 ++curchar;
154 switch (c) {
155 case '\n':
156 t = '\n';
157 if (errlineno != 0) {
158 errlineno++;
159 }
160 break;
161
162 case '"':
163 case '\'':
164 t = getstring(c);
165 break;
166
167 case '.':
168 if (shellmode) {
169 --curchar;
170 t = getident(chkalias);
171 } else if (isdigit(*curchar)) {
172 --curchar;
173 t = getnum();
174 } else {
175 t = '.';
176 }
177 break;
178
179 case '-':
180 if (shellmode) {
181 --curchar;
182 t = getident(chkalias);
183 } else if (*curchar == '>') {
184 ++curchar;
185 t = ARROW;
186 } else {
187 t = '-';
188 }
189 break;
190
191 case '#':
192 if (not isterm(in)) {
193 *p = '\0';
194 curchar = p;
195 t = '\n';
196 ++errlineno;
197 } else {
198 t = '#';
199 }
200 break;
201
202 case '\\':
203 if (*(p+1) == '\n') {
204 n = MAXLINESIZE - (p - &scanner_linebuf[0]);
205 if (n > 1) {
206 if (fgets(p, n, in) == nil) {
207 t = 0;
208 } else {
209 curchar = p;
210 t = yylex();
211 }
212 } else {
213 t = '\\';
214 }
215 } else {
216 t = '\\';
217 }
218 break;
219
220 case EOF:
221 t = 0;
222 break;
223
224 default:
225 if (shellmode and index("!&*<>()[]", c) == nil) {
226 --curchar;
227 t = getident(chkalias);
228 } else {
229 t = c;
230 }
231 break;
232 }
233 }
234 chkalias = false;
235 # ifdef LEXDEBUG
236 if (lexdebug) {
237 fprintf(stderr, "yylex returns ");
238 print_token(stderr, t);
239 fprintf(stderr, "\n");
240 }
241 # endif
242 return t;
243 }
244
245 /*
246 * Put the given string before the current character
247 * in the current line, thus inserting it into the input stream.
248 */
249
insertinput(s)250 public insertinput (s)
251 String s;
252 {
253 register char *p, *q;
254 int need, avail, shift;
255
256 q = s;
257 need = strlen(q);
258 avail = curchar - &scanner_linebuf[0];
259 if (need <= avail) {
260 curchar = &scanner_linebuf[avail - need];
261 p = curchar;
262 while (*q != '\0') {
263 *p++ = *q++;
264 }
265 } else {
266 p = curchar;
267 while (*p != '\0') {
268 ++p;
269 }
270 shift = need - avail;
271 if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
272 error("alias expansion too large");
273 }
274 for (;;) {
275 *(p + shift) = *p;
276 if (p == curchar) {
277 break;
278 }
279 --p;
280 }
281 p = &scanner_linebuf[0];
282 while (*q != '\0') {
283 *p++ = *q++;
284 }
285 curchar = &scanner_linebuf[0];
286 }
287 }
288
289 /*
290 * Get the actuals for a macro call.
291 */
292
movetochar(str,c)293 private String movetochar (str, c)
294 String str;
295 char c;
296 {
297 register char *p;
298
299 while (*p != c) {
300 if (*p == '\0') {
301 error("missing ')' in macro call");
302 } else if (*p == ')') {
303 error("not enough parameters in macro call");
304 } else if (*p == ',') {
305 error("too many parameters in macro call");
306 }
307 ++p;
308 }
309 return p;
310 }
311
getactuals(n)312 private String *getactuals (n)
313 integer n;
314 {
315 String *a;
316 register char *p;
317 int i;
318
319 a = newarr(String, n);
320 p = curchar;
321 while (*p != '(') {
322 if (lexclass[*p] != WHITE) {
323 error("missing actuals for macro");
324 }
325 ++p;
326 }
327 ++p;
328 for (i = 0; i < n - 1; i++) {
329 a[i] = p;
330 p = movetochar(p, ',');
331 *p = '\0';
332 ++p;
333 }
334 a[n-1] = p;
335 p = movetochar(p, ')');
336 *p = '\0';
337 curchar = p + 1;
338 return a;
339 }
340
341 /*
342 * Do command macro expansion, assuming curchar points to the beginning
343 * of the actuals, and we are not in shell mode.
344 */
345
expand(pl,str)346 private expand (pl, str)
347 List pl;
348 String str;
349 {
350 char buf[4096], namebuf[100];
351 register char *p, *q, *r;
352 String *actual;
353 Name n;
354 integer i;
355 boolean match;
356
357 if (pl == nil) {
358 insertinput(str);
359 } else {
360 actual = getactuals(list_size(pl));
361 p = buf;
362 q = str;
363 while (*q != '\0') {
364 if (p >= &buf[4096]) {
365 error("alias expansion too large");
366 }
367 if (lexclass[*q] == ALPHA) {
368 r = namebuf;
369 do {
370 *r++ = *q++;
371 } while (isalnum(*q));
372 *r = '\0';
373 i = 0;
374 match = false;
375 foreach(Name, n, pl)
376 if (streq(ident(n), namebuf)) {
377 match = true;
378 break;
379 }
380 ++i;
381 endfor
382 if (match) {
383 r = actual[i];
384 } else {
385 r = namebuf;
386 }
387 while (*r != '\0') {
388 *p++ = *r++;
389 }
390 } else {
391 *p++ = *q++;
392 }
393 }
394 *p = '\0';
395 insertinput(buf);
396 }
397 }
398
399 /*
400 * Parser error handling.
401 */
402
yyerror(s)403 public yyerror(s)
404 String s;
405 {
406 register char *p;
407 register integer start;
408
409 if (streq(s, "syntax error")) {
410 beginerrmsg();
411 p = prevchar;
412 start = p - &scanner_linebuf[0];
413 if (p > &scanner_linebuf[0]) {
414 while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
415 --p;
416 }
417 }
418 fprintf(stderr, "%s", scanner_linebuf);
419 if (start != 0) {
420 fprintf(stderr, "%*c", start, ' ');
421 }
422 if (p == &scanner_linebuf[0]) {
423 fprintf(stderr, "^ unrecognized command");
424 } else {
425 fprintf(stderr, "^ syntax error");
426 }
427 enderrmsg();
428 } else {
429 error(s);
430 }
431 }
432
433 /*
434 * Eat the current line.
435 */
436
gobble()437 public gobble ()
438 {
439 curchar = scanner_linebuf;
440 scanner_linebuf[0] = '\0';
441 }
442
443 /*
444 * Scan an identifier.
445 *
446 * If chkalias is true, check first to see if it's an alias.
447 * Otherwise, check to see if it's a keyword.
448 */
449
getident(chkalias)450 private Token getident (chkalias)
451 boolean chkalias;
452 {
453 char buf[1024];
454 register char *p, *q;
455 register Token t;
456 List pl;
457 String str;
458
459 p = curchar;
460 q = buf;
461 if (shellmode) {
462 do {
463 *q++ = *p++;
464 } while (index(" \t\n!&<>*[]()'\"", *p) == nil);
465 } else {
466 do {
467 *q++ = *p++;
468 } while (isalnum(*p));
469 }
470 curchar = p;
471 *q = '\0';
472 yylval.y_name = identname(buf, false);
473 if (chkalias) {
474 if (findalias(yylval.y_name, &pl, &str)) {
475 expand(pl, str);
476 while (lexclass[*curchar] == WHITE) {
477 ++curchar;
478 }
479 if (pl == nil) {
480 t = getident(false);
481 } else {
482 t = getident(true);
483 }
484 } else if (shellmode) {
485 t = NAME;
486 } else {
487 t = findkeyword(yylval.y_name, NAME);
488 }
489 } else if (shellmode) {
490 t = NAME;
491 } else {
492 t = findkeyword(yylval.y_name, NAME);
493 }
494 return t;
495 }
496
497 /*
498 * Scan a number.
499 */
500
getnum()501 private Token getnum()
502 {
503 char buf[1024];
504 register Char *p, *q;
505 register Token t;
506 Integer base;
507
508 p = curchar;
509 q = buf;
510 if (*p == '0') {
511 if (*(p+1) == 'x') {
512 p += 2;
513 base = 16;
514 } else if (*(p+1) == 't') {
515 base = 10;
516 } else if (varIsSet("$hexin")) {
517 base = 16;
518 } else {
519 base = 8;
520 }
521 } else if (varIsSet("$hexin")) {
522 base = 16;
523 } else if (varIsSet("$octin")) {
524 base = 8;
525 } else {
526 base = 10;
527 }
528 if (base == 16) {
529 do {
530 *q++ = *p++;
531 } while (ishexdigit(*p));
532 } else {
533 do {
534 *q++ = *p++;
535 } while (isdigit(*p));
536 }
537 if (*p == '.') {
538 do {
539 *q++ = *p++;
540 } while (isdigit(*p));
541 if (*p == 'e' or *p == 'E') {
542 p++;
543 if (*p == '+' or *p == '-' or isdigit(*p)) {
544 *q++ = 'e';
545 do {
546 *q++ = *p++;
547 } while (isdigit(*p));
548 }
549 }
550 *q = '\0';
551 yylval.y_real = atof(buf);
552 t = REAL;
553 } else {
554 *q = '\0';
555 switch (base) {
556 case 10:
557 yylval.y_int = atol(buf);
558 break;
559
560 case 8:
561 yylval.y_int = octal(buf);
562 break;
563
564 case 16:
565 yylval.y_int = hex(buf);
566 break;
567
568 default:
569 badcaseval(base);
570 }
571 t = INT;
572 }
573 curchar = p;
574 return t;
575 }
576
577 /*
578 * Convert a string of octal digits to an integer.
579 */
580
octal(s)581 private int octal(s)
582 String s;
583 {
584 register Char *p;
585 register Integer n;
586
587 n = 0;
588 for (p = s; *p != '\0'; p++) {
589 n = 8*n + (*p - '0');
590 }
591 return n;
592 }
593
594 /*
595 * Convert a string of hexadecimal digits to an integer.
596 */
597
hex(s)598 private int hex(s)
599 String s;
600 {
601 register Char *p;
602 register Integer n;
603
604 n = 0;
605 for (p = s; *p != '\0'; p++) {
606 n *= 16;
607 if (*p >= 'a' and *p <= 'f') {
608 n += (*p - 'a' + 10);
609 } else if (*p >= 'A' and *p <= 'F') {
610 n += (*p - 'A' + 10);
611 } else {
612 n += (*p - '0');
613 }
614 }
615 return n;
616 }
617
618 /*
619 * Scan a string.
620 */
621
getstring(quote)622 private Token getstring (quote)
623 char quote;
624 {
625 register char *p, *q;
626 char buf[MAXLINESIZE];
627 boolean endofstring;
628 Token t;
629
630 p = curchar;
631 q = buf;
632 endofstring = false;
633 while (not endofstring) {
634 if (*p == '\\' and *(p+1) == '\n') {
635 if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
636 error("non-terminated string");
637 }
638 p = &scanner_linebuf[0] - 1;
639 } else if (*p == '\n' or *p == '\0') {
640 error("non-terminated string");
641 endofstring = true;
642 } else if (*p == quote) {
643 endofstring = true;
644 } else {
645 curchar = p;
646 *q++ = charcon(p);
647 p = curchar;
648 }
649 p++;
650 }
651 curchar = p;
652 *q = '\0';
653 if (quote == '\'' and buf[1] == '\0') {
654 yylval.y_char = buf[0];
655 t = CHAR;
656 } else {
657 yylval.y_string = strdup(buf);
658 t = STRING;
659 }
660 return t;
661 }
662
663 /*
664 * Process a character constant.
665 * Watch out for backslashes.
666 */
667
charcon(s)668 private char charcon (s)
669 String s;
670 {
671 register char *p, *q;
672 char c, buf[10];
673
674 p = s;
675 if (*p == '\\') {
676 ++p;
677 switch (*p) {
678 case '\\':
679 c = '\\';
680 break;
681
682 case 'n':
683 c = '\n';
684 break;
685
686 case 'r':
687 c = '\r';
688 break;
689
690 case 't':
691 c = '\t';
692 break;
693
694 case '\'':
695 case '"':
696 c = *p;
697 break;
698
699 default:
700 if (isdigit(*p)) {
701 q = buf;
702 do {
703 *q++ = *p++;
704 } while (isdigit(*p));
705 *q = '\0';
706 c = (char) octal(buf);
707 }
708 --p;
709 break;
710 }
711 curchar = p;
712 } else {
713 c = *p;
714 }
715 return c;
716 }
717
718 /*
719 * Input file management routines.
720 */
721
setinput(filename)722 public setinput(filename)
723 Filename filename;
724 {
725 File f;
726
727 f = fopen(filename, "r");
728 if (f == nil) {
729 error("can't open %s", filename);
730 } else {
731 if (curinclindex >= MAXINCLDEPTH) {
732 error("unreasonable input nesting on \"%s\"", filename);
733 }
734 inclinfo[curinclindex].savefile = in;
735 inclinfo[curinclindex].savefn = errfilename;
736 inclinfo[curinclindex].savelineno = errlineno;
737 curinclindex++;
738 in = f;
739 errfilename = filename;
740 errlineno = 1;
741 }
742 }
743
eofinput()744 private Boolean eofinput()
745 {
746 register Boolean b;
747
748 if (curinclindex == 0) {
749 if (isterm(in)) {
750 putchar('\n');
751 clearerr(in);
752 b = false;
753 } else {
754 b = true;
755 }
756 } else {
757 fclose(in);
758 --curinclindex;
759 in = inclinfo[curinclindex].savefile;
760 errfilename = inclinfo[curinclindex].savefn;
761 errlineno = inclinfo[curinclindex].savelineno;
762 b = false;
763 }
764 return b;
765 }
766
767 /*
768 * Pop the current input. Return whether successful.
769 */
770
popinput()771 public Boolean popinput()
772 {
773 Boolean b;
774
775 if (curinclindex == 0) {
776 b = false;
777 } else {
778 b = (Boolean) (not eofinput());
779 }
780 return b;
781 }
782
783 /*
784 * Return whether we are currently reading from standard input.
785 */
786
isstdin()787 public Boolean isstdin()
788 {
789 return (Boolean) (in == stdin);
790 }
791
792 /*
793 * Send the current line to the shell.
794 */
795
shellline()796 public shellline()
797 {
798 register char *p;
799
800 p = curchar;
801 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
802 ++p;
803 }
804 shell(p);
805 if (*p == '\0' and isterm(in)) {
806 putchar('\n');
807 }
808 erecover();
809 }
810
811 /*
812 * Read the rest of the current line in "shell mode".
813 */
814
beginshellmode()815 public beginshellmode()
816 {
817 shellmode = true;
818 }
819
820 /*
821 * Print out a token for debugging.
822 */
823
print_token(f,t)824 public print_token(f, t)
825 File f;
826 Token t;
827 {
828 if (t == '\n') {
829 fprintf(f, "char '\\n'");
830 } else if (t == EOF) {
831 fprintf(f, "EOF");
832 } else if (t < 256) {
833 fprintf(f, "char '%c'", t);
834 } else {
835 fprintf(f, "\"%s\"", keywdstring(t));
836 }
837 }
838