xref: /original-bsd/old/dbx/scanner.c (revision 331bfa8d)
1 /*
2  * Copyright (c) 1983 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)scanner.c	5.3 (Berkeley) 06/01/90";
10 #endif /* not lint */
11 
12 /*
13  * Debugger scanner.
14  */
15 
16 #include "defs.h"
17 #include "scanner.h"
18 #include "main.h"
19 #include "keywords.h"
20 #include "tree.h"
21 #include "symbols.h"
22 #include "names.h"
23 #include "y.tab.h"
24 
25 #ifndef public
26 typedef int Token;
27 
28 #define MAXLINESIZE 10240
29 
30 #endif
31 
32 public String initfile = ".dbxinit";
33 
34 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
35 
36 private Charclass class[256 + 1];
37 private Charclass *lexclass = class + 1;
38 
39 #define isdigit(c) (lexclass[c] == NUM)
40 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
41 #define ishexdigit(c) ( \
42     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
43 )
44 
45 public boolean chkalias;
46 public char scanner_linebuf[MAXLINESIZE];
47 
48 private File in;
49 private char *curchar, *prevchar;
50 
51 #define MAXINCLDEPTH 10
52 
53 private struct {
54     File savefile;
55     Filename savefn;
56     int savelineno;
57 } inclinfo[MAXINCLDEPTH];
58 
59 private unsigned int curinclindex;
60 
61 private Token getident();
62 private Token getnum();
63 private Token getstring();
64 private Boolean eofinput();
65 private char charcon();
66 
67 private enterlexclass(class, s)
68 Charclass class;
69 String s;
70 {
71     register char *p;
72 
73     for (p = s; *p != '\0'; p++) {
74 	lexclass[*p] = class;
75     }
76 }
77 
78 public scanner_init()
79 {
80     register Integer i;
81 
82     for (i = 0; i < 257; i++) {
83 	class[i] = OTHER;
84     }
85     enterlexclass(WHITE, " \t");
86     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
87     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
88     enterlexclass(NUM, "0123456789");
89     in = stdin;
90     errfilename = nil;
91     errlineno = 0;
92     curchar = scanner_linebuf;
93     scanner_linebuf[0] = '\0';
94     chkalias = true;
95 }
96 
97 /*
98  * Read a single token.
99  *
100  * The input is line buffered.  Tokens cannot cross line boundaries.
101  *
102  * There are two "modes" of operation:  one as in a compiler,
103  * and one for reading shell-like syntax.  In the first mode
104  * there is the additional choice of doing alias processing.
105  */
106 
107 private Boolean shellmode;
108 
109 public Token yylex()
110 {
111     register int c;
112     register char *p;
113     register Token t;
114     String line;
115     integer n;
116 
117     p = curchar;
118     if (*p == '\0') {
119 	do {
120 	    if (isterm(in)) {
121 		printf("(%s) ", cmdname);
122 	    }
123 	    fflush(stdout);
124 	    line = fgets(scanner_linebuf, MAXLINESIZE, in);
125 	} while (line == nil and not eofinput());
126 	if (line == nil) {
127 	    c = EOF;
128 	} else {
129 	    p = scanner_linebuf;
130 	    while (lexclass[*p] == WHITE) {
131 		p++;
132 	    }
133 	    shellmode = false;
134 	}
135 	chkalias = true;
136     } else {
137 	while (lexclass[*p] == WHITE) {
138 	    p++;
139 	}
140     }
141     curchar = p;
142     prevchar = curchar;
143     c = *p;
144     if (lexclass[c] == ALPHA) {
145 	t = getident(chkalias);
146     } else if (lexclass[c] == NUM) {
147 	if (shellmode) {
148 	    t = getident(chkalias);
149 	} else {
150 	    t = getnum();
151 	}
152     } else {
153 	++curchar;
154 	switch (c) {
155 	    case '\n':
156 		t = '\n';
157 		if (errlineno != 0) {
158 		    errlineno++;
159 		}
160 		break;
161 
162 	    case '"':
163 	    case '\'':
164 		t = getstring(c);
165 		break;
166 
167 	    case '.':
168 		if (shellmode) {
169 		    --curchar;
170 		    t = getident(chkalias);
171 		} else if (isdigit(*curchar)) {
172 		    --curchar;
173 		    t = getnum();
174 		} else {
175 		    t = '.';
176 		}
177 		break;
178 
179 	    case '-':
180 		if (shellmode) {
181 		    --curchar;
182 		    t = getident(chkalias);
183 		} else if (*curchar == '>') {
184 		    ++curchar;
185 		    t = ARROW;
186 		} else {
187 		    t = '-';
188 		}
189 		break;
190 
191 	    case '#':
192 		if (not isterm(in)) {
193 		    *p = '\0';
194 		    curchar = p;
195 		    t = '\n';
196 		    ++errlineno;
197 		} else {
198 		    t = '#';
199 		}
200 		break;
201 
202 	    case '\\':
203 		if (*(p+1) == '\n') {
204 		    n = MAXLINESIZE - (p - &scanner_linebuf[0]);
205 		    if (n > 1) {
206 			if (fgets(p, n, in) == nil) {
207 			    t = 0;
208 			} else {
209 			    curchar = p;
210 			    t = yylex();
211 			}
212 		    } else {
213 			t = '\\';
214 		    }
215 		} else {
216 		    t = '\\';
217 		}
218 		break;
219 
220 	    case EOF:
221 		t = 0;
222 		break;
223 
224 	    default:
225 		if (shellmode and index("!&*<>()[]", c) == nil) {
226 		    --curchar;
227 		    t = getident(chkalias);
228 		} else {
229 		    t = c;
230 		}
231 		break;
232 	}
233     }
234     chkalias = false;
235 #   ifdef LEXDEBUG
236 	if (lexdebug) {
237 	    fprintf(stderr, "yylex returns ");
238 	    print_token(stderr, t);
239 	    fprintf(stderr, "\n");
240 	}
241 #   endif
242     return t;
243 }
244 
245 /*
246  * Put the given string before the current character
247  * in the current line, thus inserting it into the input stream.
248  */
249 
250 public insertinput (s)
251 String s;
252 {
253     register char *p, *q;
254     int need, avail, shift;
255 
256     q = s;
257     need = strlen(q);
258     avail = curchar - &scanner_linebuf[0];
259     if (need <= avail) {
260 	curchar = &scanner_linebuf[avail - need];
261 	p = curchar;
262 	while (*q != '\0') {
263 	    *p++ = *q++;
264 	}
265     } else {
266 	p = curchar;
267 	while (*p != '\0') {
268 	    ++p;
269 	}
270 	shift = need - avail;
271 	if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
272 	    error("alias expansion too large");
273 	}
274 	for (;;) {
275 	    *(p + shift) = *p;
276 	    if (p == curchar) {
277 		break;
278 	    }
279 	    --p;
280 	}
281 	p = &scanner_linebuf[0];
282 	while (*q != '\0') {
283 	    *p++ = *q++;
284 	}
285 	curchar = &scanner_linebuf[0];
286     }
287 }
288 
289 /*
290  * Get the actuals for a macro call.
291  */
292 
293 private String movetochar (str, c)
294 String str;
295 char c;
296 {
297     register char *p;
298 
299     while (*p != c) {
300 	if (*p == '\0') {
301 	    error("missing ')' in macro call");
302 	} else if (*p == ')') {
303 	    error("not enough parameters in macro call");
304 	} else if (*p == ',') {
305 	    error("too many parameters in macro call");
306 	}
307 	++p;
308     }
309     return p;
310 }
311 
312 private String *getactuals (n)
313 integer n;
314 {
315     String *a;
316     register char *p;
317     int i;
318 
319     a = newarr(String, n);
320     p = curchar;
321     while (*p != '(') {
322 	if (lexclass[*p] != WHITE) {
323 	    error("missing actuals for macro");
324 	}
325 	++p;
326     }
327     ++p;
328     for (i = 0; i < n - 1; i++) {
329 	a[i] = p;
330 	p = movetochar(p, ',');
331 	*p = '\0';
332 	++p;
333     }
334     a[n-1] = p;
335     p = movetochar(p, ')');
336     *p = '\0';
337     curchar = p + 1;
338     return a;
339 }
340 
341 /*
342  * Do command macro expansion, assuming curchar points to the beginning
343  * of the actuals, and we are not in shell mode.
344  */
345 
346 private expand (pl, str)
347 List pl;
348 String str;
349 {
350     char buf[4096], namebuf[100];
351     register char *p, *q, *r;
352     String *actual;
353     Name n;
354     integer i;
355     boolean match;
356 
357     if (pl == nil) {
358 	insertinput(str);
359     } else {
360 	actual = getactuals(list_size(pl));
361 	p = buf;
362 	q = str;
363 	while (*q != '\0') {
364 	    if (p >= &buf[4096]) {
365 		error("alias expansion too large");
366 	    }
367 	    if (lexclass[*q] == ALPHA) {
368 		r = namebuf;
369 		do {
370 		    *r++ = *q++;
371 		} while (isalnum(*q));
372 		*r = '\0';
373 		i = 0;
374 		match = false;
375 		foreach(Name, n, pl)
376 		    if (streq(ident(n), namebuf)) {
377 			match = true;
378 			break;
379 		    }
380 		    ++i;
381 		endfor
382 		if (match) {
383 		    r = actual[i];
384 		} else {
385 		    r = namebuf;
386 		}
387 		while (*r != '\0') {
388 		    *p++ = *r++;
389 		}
390 	    } else {
391 		*p++ = *q++;
392 	    }
393 	}
394 	*p = '\0';
395 	insertinput(buf);
396     }
397 }
398 
399 /*
400  * Parser error handling.
401  */
402 
403 public yyerror(s)
404 String s;
405 {
406     register char *p;
407     register integer start;
408 
409     if (streq(s, "syntax error")) {
410 	beginerrmsg();
411 	p = prevchar;
412 	start = p - &scanner_linebuf[0];
413 	if (p > &scanner_linebuf[0]) {
414 	    while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
415 		--p;
416 	    }
417 	}
418 	fprintf(stderr, "%s", scanner_linebuf);
419 	if (start != 0) {
420 	    fprintf(stderr, "%*c", start, ' ');
421 	}
422 	if (p == &scanner_linebuf[0]) {
423 	    fprintf(stderr, "^ unrecognized command");
424 	} else {
425 	    fprintf(stderr, "^ syntax error");
426 	}
427 	enderrmsg();
428     } else {
429 	error(s);
430     }
431 }
432 
433 /*
434  * Eat the current line.
435  */
436 
437 public gobble ()
438 {
439     curchar = scanner_linebuf;
440     scanner_linebuf[0] = '\0';
441 }
442 
443 /*
444  * Scan an identifier.
445  *
446  * If chkalias is true, check first to see if it's an alias.
447  * Otherwise, check to see if it's a keyword.
448  */
449 
450 private Token getident (chkalias)
451 boolean chkalias;
452 {
453     char buf[1024];
454     register char *p, *q;
455     register Token t;
456     List pl;
457     String str;
458 
459     p = curchar;
460     q = buf;
461     if (shellmode) {
462 	do {
463 	    *q++ = *p++;
464 	} while (index(" \t\n!&<>*[]()'\"", *p) == nil);
465     } else {
466 	do {
467 	    *q++ = *p++;
468 	} while (isalnum(*p));
469     }
470     curchar = p;
471     *q = '\0';
472     yylval.y_name = identname(buf, false);
473     if (chkalias) {
474 	if (findalias(yylval.y_name, &pl, &str)) {
475 	    expand(pl, str);
476 	    while (lexclass[*curchar] == WHITE) {
477 		++curchar;
478 	    }
479 	    if (pl == nil) {
480 		t = getident(false);
481 	    } else {
482 		t = getident(true);
483 	    }
484 	} else if (shellmode) {
485 	    t = NAME;
486 	} else {
487 	    t = findkeyword(yylval.y_name, NAME);
488 	}
489     } else if (shellmode) {
490 	t = NAME;
491     } else {
492 	t = findkeyword(yylval.y_name, NAME);
493     }
494     return t;
495 }
496 
497 /*
498  * Scan a number.
499  */
500 
501 private Token getnum()
502 {
503     char buf[1024];
504     register Char *p, *q;
505     register Token t;
506     Integer base;
507 
508     p = curchar;
509     q = buf;
510     if (*p == '0') {
511 	if (*(p+1) == 'x') {
512 	    p += 2;
513 	    base = 16;
514 	} else if (*(p+1) == 't') {
515 	    base = 10;
516 	} else if (varIsSet("$hexin")) {
517 	    base = 16;
518 	} else {
519 	    base = 8;
520 	}
521     } else if (varIsSet("$hexin")) {
522 	base = 16;
523     } else if (varIsSet("$octin")) {
524 	base = 8;
525     } else {
526 	base = 10;
527     }
528     if (base == 16) {
529 	do {
530 	    *q++ = *p++;
531 	} while (ishexdigit(*p));
532     } else {
533 	do {
534 	    *q++ = *p++;
535 	} while (isdigit(*p));
536     }
537     if (*p == '.') {
538 	do {
539 	    *q++ = *p++;
540 	} while (isdigit(*p));
541 	if (*p == 'e' or *p == 'E') {
542 	    p++;
543 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
544 		*q++ = 'e';
545 		do {
546 		    *q++ = *p++;
547 		} while (isdigit(*p));
548 	    }
549 	}
550 	*q = '\0';
551 	yylval.y_real = atof(buf);
552 	t = REAL;
553     } else {
554 	*q = '\0';
555 	switch (base) {
556 	    case 10:
557 		yylval.y_int = atol(buf);
558 		break;
559 
560 	    case 8:
561 		yylval.y_int = octal(buf);
562 		break;
563 
564 	    case 16:
565 		yylval.y_int = hex(buf);
566 		break;
567 
568 	    default:
569 		badcaseval(base);
570 	}
571 	t = INT;
572     }
573     curchar = p;
574     return t;
575 }
576 
577 /*
578  * Convert a string of octal digits to an integer.
579  */
580 
581 private int octal(s)
582 String s;
583 {
584     register Char *p;
585     register Integer n;
586 
587     n = 0;
588     for (p = s; *p != '\0'; p++) {
589 	n = 8*n + (*p - '0');
590     }
591     return n;
592 }
593 
594 /*
595  * Convert a string of hexadecimal digits to an integer.
596  */
597 
598 private int hex(s)
599 String s;
600 {
601     register Char *p;
602     register Integer n;
603 
604     n = 0;
605     for (p = s; *p != '\0'; p++) {
606 	n *= 16;
607 	if (*p >= 'a' and *p <= 'f') {
608 	    n += (*p - 'a' + 10);
609 	} else if (*p >= 'A' and *p <= 'F') {
610 	    n += (*p - 'A' + 10);
611 	} else {
612 	    n += (*p - '0');
613 	}
614     }
615     return n;
616 }
617 
618 /*
619  * Scan a string.
620  */
621 
622 private Token getstring (quote)
623 char quote;
624 {
625     register char *p, *q;
626     char buf[MAXLINESIZE];
627     boolean endofstring;
628     Token t;
629 
630     p = curchar;
631     q = buf;
632     endofstring = false;
633     while (not endofstring) {
634 	if (*p == '\\' and *(p+1) == '\n') {
635 	    if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
636 		error("non-terminated string");
637 	    }
638 	    p = &scanner_linebuf[0] - 1;
639 	} else if (*p == '\n' or *p == '\0') {
640 	    error("non-terminated string");
641 	    endofstring = true;
642 	} else if (*p == quote) {
643 	    endofstring = true;
644 	} else {
645 	    curchar = p;
646 	    *q++ = charcon(p);
647 	    p = curchar;
648 	}
649 	p++;
650     }
651     curchar = p;
652     *q = '\0';
653     if (quote == '\'' and buf[1] == '\0') {
654 	yylval.y_char = buf[0];
655 	t = CHAR;
656     } else {
657 	yylval.y_string = strdup(buf);
658 	t = STRING;
659     }
660     return t;
661 }
662 
663 /*
664  * Process a character constant.
665  * Watch out for backslashes.
666  */
667 
668 private char charcon (s)
669 String s;
670 {
671     register char *p, *q;
672     char c, buf[10];
673 
674     p = s;
675     if (*p == '\\') {
676 	++p;
677 	switch (*p) {
678 	    case '\\':
679 		c = '\\';
680 		break;
681 
682 	    case 'n':
683 		c = '\n';
684 		break;
685 
686 	    case 'r':
687 		c = '\r';
688 		break;
689 
690 	    case 't':
691 		c = '\t';
692 		break;
693 
694 	    case '\'':
695 	    case '"':
696 		c = *p;
697 		break;
698 
699 	    default:
700 		if (isdigit(*p)) {
701 		    q = buf;
702 		    do {
703 			*q++ = *p++;
704 		    } while (isdigit(*p));
705 		    *q = '\0';
706 		    c = (char) octal(buf);
707 		}
708 		--p;
709 		break;
710 	}
711 	curchar = p;
712     } else {
713 	c = *p;
714     }
715     return c;
716 }
717 
718 /*
719  * Input file management routines.
720  */
721 
722 public setinput(filename)
723 Filename filename;
724 {
725     File f;
726 
727     f = fopen(filename, "r");
728     if (f == nil) {
729 	error("can't open %s", filename);
730     } else {
731 	if (curinclindex >= MAXINCLDEPTH) {
732 	    error("unreasonable input nesting on \"%s\"", filename);
733 	}
734 	inclinfo[curinclindex].savefile = in;
735 	inclinfo[curinclindex].savefn = errfilename;
736 	inclinfo[curinclindex].savelineno = errlineno;
737 	curinclindex++;
738 	in = f;
739 	errfilename = filename;
740 	errlineno = 1;
741     }
742 }
743 
744 private Boolean eofinput()
745 {
746     register Boolean b;
747 
748     if (curinclindex == 0) {
749 	if (isterm(in)) {
750 	    putchar('\n');
751 	    clearerr(in);
752 	    b = false;
753 	} else {
754 	    b = true;
755 	}
756     } else {
757 	fclose(in);
758 	--curinclindex;
759 	in = inclinfo[curinclindex].savefile;
760 	errfilename = inclinfo[curinclindex].savefn;
761 	errlineno = inclinfo[curinclindex].savelineno;
762 	b = false;
763     }
764     return b;
765 }
766 
767 /*
768  * Pop the current input.  Return whether successful.
769  */
770 
771 public Boolean popinput()
772 {
773     Boolean b;
774 
775     if (curinclindex == 0) {
776 	b = false;
777     } else {
778 	b = (Boolean) (not eofinput());
779     }
780     return b;
781 }
782 
783 /*
784  * Return whether we are currently reading from standard input.
785  */
786 
787 public Boolean isstdin()
788 {
789     return (Boolean) (in == stdin);
790 }
791 
792 /*
793  * Send the current line to the shell.
794  */
795 
796 public shellline()
797 {
798     register char *p;
799 
800     p = curchar;
801     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
802 	++p;
803     }
804     shell(p);
805     if (*p == '\0' and isterm(in)) {
806 	putchar('\n');
807     }
808     erecover();
809 }
810 
811 /*
812  * Read the rest of the current line in "shell mode".
813  */
814 
815 public beginshellmode()
816 {
817     shellmode = true;
818 }
819 
820 /*
821  * Print out a token for debugging.
822  */
823 
824 public print_token(f, t)
825 File f;
826 Token t;
827 {
828     if (t == '\n') {
829 	fprintf(f, "char '\\n'");
830     } else if (t == EOF) {
831 	fprintf(f, "EOF");
832     } else if (t < 256) {
833 	fprintf(f, "char '%c'", t);
834     } else {
835 	fprintf(f, "\"%s\"", keywdstring(t));
836     }
837 }
838