xref: /original-bsd/old/dbx/scanner.c (revision 2301fdfb)
1 /*
2  * Copyright (c) 1983 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 static char sccsid[] = "@(#)scanner.c	5.1 (Berkeley) 05/31/85";
9 #endif not lint
10 
11 static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $";
12 
13 /*
14  * Debugger scanner.
15  */
16 
17 #include "defs.h"
18 #include "scanner.h"
19 #include "main.h"
20 #include "keywords.h"
21 #include "tree.h"
22 #include "symbols.h"
23 #include "names.h"
24 #include "y.tab.h"
25 
26 #ifndef public
27 typedef int Token;
28 
29 #define MAXLINESIZE 10240
30 
31 #endif
32 
33 public String initfile = ".dbxinit";
34 
35 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
36 
37 private Charclass class[256 + 1];
38 private Charclass *lexclass = class + 1;
39 
40 #define isdigit(c) (lexclass[c] == NUM)
41 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
42 #define ishexdigit(c) ( \
43     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
44 )
45 
46 public boolean chkalias;
47 public char scanner_linebuf[MAXLINESIZE];
48 
49 private File in;
50 private char *curchar, *prevchar;
51 
52 #define MAXINCLDEPTH 10
53 
54 private struct {
55     File savefile;
56     Filename savefn;
57     int savelineno;
58 } inclinfo[MAXINCLDEPTH];
59 
60 private unsigned int curinclindex;
61 
62 private Token getident();
63 private Token getnum();
64 private Token getstring();
65 private Boolean eofinput();
66 private char charcon();
67 
68 private enterlexclass(class, s)
69 Charclass class;
70 String s;
71 {
72     register char *p;
73 
74     for (p = s; *p != '\0'; p++) {
75 	lexclass[*p] = class;
76     }
77 }
78 
79 public scanner_init()
80 {
81     register Integer i;
82 
83     for (i = 0; i < 257; i++) {
84 	class[i] = OTHER;
85     }
86     enterlexclass(WHITE, " \t");
87     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
88     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
89     enterlexclass(NUM, "0123456789");
90     in = stdin;
91     errfilename = nil;
92     errlineno = 0;
93     curchar = scanner_linebuf;
94     scanner_linebuf[0] = '\0';
95     chkalias = true;
96 }
97 
98 /*
99  * Read a single token.
100  *
101  * The input is line buffered.  Tokens cannot cross line boundaries.
102  *
103  * There are two "modes" of operation:  one as in a compiler,
104  * and one for reading shell-like syntax.  In the first mode
105  * there is the additional choice of doing alias processing.
106  */
107 
108 private Boolean shellmode;
109 
110 public Token yylex()
111 {
112     register int c;
113     register char *p;
114     register Token t;
115     String line;
116     integer n;
117 
118     p = curchar;
119     if (*p == '\0') {
120 	do {
121 	    if (isterm(in)) {
122 		printf("(%s) ", cmdname);
123 	    }
124 	    fflush(stdout);
125 	    line = fgets(scanner_linebuf, MAXLINESIZE, in);
126 	} while (line == nil and not eofinput());
127 	if (line == nil) {
128 	    c = EOF;
129 	} else {
130 	    p = scanner_linebuf;
131 	    while (lexclass[*p] == WHITE) {
132 		p++;
133 	    }
134 	    shellmode = false;
135 	}
136 	chkalias = true;
137     } else {
138 	while (lexclass[*p] == WHITE) {
139 	    p++;
140 	}
141     }
142     curchar = p;
143     prevchar = curchar;
144     c = *p;
145     if (lexclass[c] == ALPHA) {
146 	t = getident(chkalias);
147     } else if (lexclass[c] == NUM) {
148 	if (shellmode) {
149 	    t = getident(chkalias);
150 	} else {
151 	    t = getnum();
152 	}
153     } else {
154 	++curchar;
155 	switch (c) {
156 	    case '\n':
157 		t = '\n';
158 		if (errlineno != 0) {
159 		    errlineno++;
160 		}
161 		break;
162 
163 	    case '"':
164 	    case '\'':
165 		t = getstring(c);
166 		break;
167 
168 	    case '.':
169 		if (shellmode) {
170 		    --curchar;
171 		    t = getident(chkalias);
172 		} else if (isdigit(*curchar)) {
173 		    --curchar;
174 		    t = getnum();
175 		} else {
176 		    t = '.';
177 		}
178 		break;
179 
180 	    case '-':
181 		if (shellmode) {
182 		    --curchar;
183 		    t = getident(chkalias);
184 		} else if (*curchar == '>') {
185 		    ++curchar;
186 		    t = ARROW;
187 		} else {
188 		    t = '-';
189 		}
190 		break;
191 
192 	    case '#':
193 		if (not isterm(in)) {
194 		    *p = '\0';
195 		    curchar = p;
196 		    t = '\n';
197 		    ++errlineno;
198 		} else {
199 		    t = '#';
200 		}
201 		break;
202 
203 	    case '\\':
204 		if (*(p+1) == '\n') {
205 		    n = MAXLINESIZE - (p - &scanner_linebuf[0]);
206 		    if (n > 1) {
207 			if (fgets(p, n, in) == nil) {
208 			    t = 0;
209 			} else {
210 			    curchar = p;
211 			    t = yylex();
212 			}
213 		    } else {
214 			t = '\\';
215 		    }
216 		} else {
217 		    t = '\\';
218 		}
219 		break;
220 
221 	    case EOF:
222 		t = 0;
223 		break;
224 
225 	    default:
226 		if (shellmode and index("!&*<>()[]", c) == nil) {
227 		    --curchar;
228 		    t = getident(chkalias);
229 		} else {
230 		    t = c;
231 		}
232 		break;
233 	}
234     }
235     chkalias = false;
236 #   ifdef LEXDEBUG
237 	if (lexdebug) {
238 	    fprintf(stderr, "yylex returns ");
239 	    print_token(stderr, t);
240 	    fprintf(stderr, "\n");
241 	}
242 #   endif
243     return t;
244 }
245 
246 /*
247  * Put the given string before the current character
248  * in the current line, thus inserting it into the input stream.
249  */
250 
251 public insertinput (s)
252 String s;
253 {
254     register char *p, *q;
255     int need, avail, shift;
256 
257     q = s;
258     need = strlen(q);
259     avail = curchar - &scanner_linebuf[0];
260     if (need <= avail) {
261 	curchar = &scanner_linebuf[avail - need];
262 	p = curchar;
263 	while (*q != '\0') {
264 	    *p++ = *q++;
265 	}
266     } else {
267 	p = curchar;
268 	while (*p != '\0') {
269 	    ++p;
270 	}
271 	shift = need - avail;
272 	if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
273 	    error("alias expansion too large");
274 	}
275 	for (;;) {
276 	    *(p + shift) = *p;
277 	    if (p == curchar) {
278 		break;
279 	    }
280 	    --p;
281 	}
282 	p = &scanner_linebuf[0];
283 	while (*q != '\0') {
284 	    *p++ = *q++;
285 	}
286 	curchar = &scanner_linebuf[0];
287     }
288 }
289 
290 /*
291  * Get the actuals for a macro call.
292  */
293 
294 private String movetochar (str, c)
295 String str;
296 char c;
297 {
298     register char *p;
299 
300     while (*p != c) {
301 	if (*p == '\0') {
302 	    error("missing ')' in macro call");
303 	} else if (*p == ')') {
304 	    error("not enough parameters in macro call");
305 	} else if (*p == ',') {
306 	    error("too many parameters in macro call");
307 	}
308 	++p;
309     }
310     return p;
311 }
312 
313 private String *getactuals (n)
314 integer n;
315 {
316     String *a;
317     register char *p;
318     int i;
319 
320     a = newarr(String, n);
321     p = curchar;
322     while (*p != '(') {
323 	if (lexclass[*p] != WHITE) {
324 	    error("missing actuals for macro");
325 	}
326 	++p;
327     }
328     ++p;
329     for (i = 0; i < n - 1; i++) {
330 	a[i] = p;
331 	p = movetochar(p, ',');
332 	*p = '\0';
333 	++p;
334     }
335     a[n-1] = p;
336     p = movetochar(p, ')');
337     *p = '\0';
338     curchar = p + 1;
339     return a;
340 }
341 
342 /*
343  * Do command macro expansion, assuming curchar points to the beginning
344  * of the actuals, and we are not in shell mode.
345  */
346 
347 private expand (pl, str)
348 List pl;
349 String str;
350 {
351     char buf[4096], namebuf[100];
352     register char *p, *q, *r;
353     String *actual;
354     Name n;
355     integer i;
356     boolean match;
357 
358     if (pl == nil) {
359 	insertinput(str);
360     } else {
361 	actual = getactuals(list_size(pl));
362 	p = buf;
363 	q = str;
364 	while (*q != '\0') {
365 	    if (p >= &buf[4096]) {
366 		error("alias expansion too large");
367 	    }
368 	    if (lexclass[*q] == ALPHA) {
369 		r = namebuf;
370 		do {
371 		    *r++ = *q++;
372 		} while (isalnum(*q));
373 		*r = '\0';
374 		i = 0;
375 		match = false;
376 		foreach(Name, n, pl)
377 		    if (streq(ident(n), namebuf)) {
378 			match = true;
379 			break;
380 		    }
381 		    ++i;
382 		endfor
383 		if (match) {
384 		    r = actual[i];
385 		} else {
386 		    r = namebuf;
387 		}
388 		while (*r != '\0') {
389 		    *p++ = *r++;
390 		}
391 	    } else {
392 		*p++ = *q++;
393 	    }
394 	}
395 	*p = '\0';
396 	insertinput(buf);
397     }
398 }
399 
400 /*
401  * Parser error handling.
402  */
403 
404 public yyerror(s)
405 String s;
406 {
407     register char *p;
408     register integer start;
409 
410     if (streq(s, "syntax error")) {
411 	beginerrmsg();
412 	p = prevchar;
413 	start = p - &scanner_linebuf[0];
414 	if (p > &scanner_linebuf[0]) {
415 	    while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
416 		--p;
417 	    }
418 	}
419 	fprintf(stderr, "%s", scanner_linebuf);
420 	if (start != 0) {
421 	    fprintf(stderr, "%*c", start, ' ');
422 	}
423 	if (p == &scanner_linebuf[0]) {
424 	    fprintf(stderr, "^ unrecognized command");
425 	} else {
426 	    fprintf(stderr, "^ syntax error");
427 	}
428 	enderrmsg();
429     } else {
430 	error(s);
431     }
432 }
433 
434 /*
435  * Eat the current line.
436  */
437 
438 public gobble ()
439 {
440     curchar = scanner_linebuf;
441     scanner_linebuf[0] = '\0';
442 }
443 
444 /*
445  * Scan an identifier.
446  *
447  * If chkalias is true, check first to see if it's an alias.
448  * Otherwise, check to see if it's a keyword.
449  */
450 
451 private Token getident (chkalias)
452 boolean chkalias;
453 {
454     char buf[1024];
455     register char *p, *q;
456     register Token t;
457     List pl;
458     String str;
459 
460     p = curchar;
461     q = buf;
462     if (shellmode) {
463 	do {
464 	    *q++ = *p++;
465 	} while (index(" \t\n!&<>*[]()'\"", *p) == nil);
466     } else {
467 	do {
468 	    *q++ = *p++;
469 	} while (isalnum(*p));
470     }
471     curchar = p;
472     *q = '\0';
473     yylval.y_name = identname(buf, false);
474     if (chkalias) {
475 	if (findalias(yylval.y_name, &pl, &str)) {
476 	    expand(pl, str);
477 	    while (lexclass[*curchar] == WHITE) {
478 		++curchar;
479 	    }
480 	    if (pl == nil) {
481 		t = getident(false);
482 	    } else {
483 		t = getident(true);
484 	    }
485 	} else if (shellmode) {
486 	    t = NAME;
487 	} else {
488 	    t = findkeyword(yylval.y_name, NAME);
489 	}
490     } else if (shellmode) {
491 	t = NAME;
492     } else {
493 	t = findkeyword(yylval.y_name, NAME);
494     }
495     return t;
496 }
497 
498 /*
499  * Scan a number.
500  */
501 
502 private Token getnum()
503 {
504     char buf[1024];
505     register Char *p, *q;
506     register Token t;
507     Integer base;
508 
509     p = curchar;
510     q = buf;
511     if (*p == '0') {
512 	if (*(p+1) == 'x') {
513 	    p += 2;
514 	    base = 16;
515 	} else if (*(p+1) == 't') {
516 	    base = 10;
517 	} else if (varIsSet("$hexin")) {
518 	    base = 16;
519 	} else {
520 	    base = 8;
521 	}
522     } else if (varIsSet("$hexin")) {
523 	base = 16;
524     } else if (varIsSet("$octin")) {
525 	base = 8;
526     } else {
527 	base = 10;
528     }
529     if (base == 16) {
530 	do {
531 	    *q++ = *p++;
532 	} while (ishexdigit(*p));
533     } else {
534 	do {
535 	    *q++ = *p++;
536 	} while (isdigit(*p));
537     }
538     if (*p == '.') {
539 	do {
540 	    *q++ = *p++;
541 	} while (isdigit(*p));
542 	if (*p == 'e' or *p == 'E') {
543 	    p++;
544 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
545 		*q++ = 'e';
546 		do {
547 		    *q++ = *p++;
548 		} while (isdigit(*p));
549 	    }
550 	}
551 	*q = '\0';
552 	yylval.y_real = atof(buf);
553 	t = REAL;
554     } else {
555 	*q = '\0';
556 	switch (base) {
557 	    case 10:
558 		yylval.y_int = atol(buf);
559 		break;
560 
561 	    case 8:
562 		yylval.y_int = octal(buf);
563 		break;
564 
565 	    case 16:
566 		yylval.y_int = hex(buf);
567 		break;
568 
569 	    default:
570 		badcaseval(base);
571 	}
572 	t = INT;
573     }
574     curchar = p;
575     return t;
576 }
577 
578 /*
579  * Convert a string of octal digits to an integer.
580  */
581 
582 private int octal(s)
583 String s;
584 {
585     register Char *p;
586     register Integer n;
587 
588     n = 0;
589     for (p = s; *p != '\0'; p++) {
590 	n = 8*n + (*p - '0');
591     }
592     return n;
593 }
594 
595 /*
596  * Convert a string of hexadecimal digits to an integer.
597  */
598 
599 private int hex(s)
600 String s;
601 {
602     register Char *p;
603     register Integer n;
604 
605     n = 0;
606     for (p = s; *p != '\0'; p++) {
607 	n *= 16;
608 	if (*p >= 'a' and *p <= 'f') {
609 	    n += (*p - 'a' + 10);
610 	} else if (*p >= 'A' and *p <= 'F') {
611 	    n += (*p - 'A' + 10);
612 	} else {
613 	    n += (*p - '0');
614 	}
615     }
616     return n;
617 }
618 
619 /*
620  * Scan a string.
621  */
622 
623 private Token getstring (quote)
624 char quote;
625 {
626     register char *p, *q;
627     char buf[MAXLINESIZE];
628     boolean endofstring;
629     Token t;
630 
631     p = curchar;
632     q = buf;
633     endofstring = false;
634     while (not endofstring) {
635 	if (*p == '\\' and *(p+1) == '\n') {
636 	    if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
637 		error("non-terminated string");
638 	    }
639 	    p = &scanner_linebuf[0] - 1;
640 	} else if (*p == '\n' or *p == '\0') {
641 	    error("non-terminated string");
642 	    endofstring = true;
643 	} else if (*p == quote) {
644 	    endofstring = true;
645 	} else {
646 	    curchar = p;
647 	    *q++ = charcon(p);
648 	    p = curchar;
649 	}
650 	p++;
651     }
652     curchar = p;
653     *q = '\0';
654     if (quote == '\'' and buf[1] == '\0') {
655 	yylval.y_char = buf[0];
656 	t = CHAR;
657     } else {
658 	yylval.y_string = strdup(buf);
659 	t = STRING;
660     }
661     return t;
662 }
663 
664 /*
665  * Process a character constant.
666  * Watch out for backslashes.
667  */
668 
669 private char charcon (s)
670 String s;
671 {
672     register char *p, *q;
673     char c, buf[10];
674 
675     p = s;
676     if (*p == '\\') {
677 	++p;
678 	switch (*p) {
679 	    case '\\':
680 		c = '\\';
681 		break;
682 
683 	    case 'n':
684 		c = '\n';
685 		break;
686 
687 	    case 'r':
688 		c = '\r';
689 		break;
690 
691 	    case 't':
692 		c = '\t';
693 		break;
694 
695 	    case '\'':
696 	    case '"':
697 		c = *p;
698 		break;
699 
700 	    default:
701 		if (isdigit(*p)) {
702 		    q = buf;
703 		    do {
704 			*q++ = *p++;
705 		    } while (isdigit(*p));
706 		    *q = '\0';
707 		    c = (char) octal(buf);
708 		}
709 		--p;
710 		break;
711 	}
712 	curchar = p;
713     } else {
714 	c = *p;
715     }
716     return c;
717 }
718 
719 /*
720  * Input file management routines.
721  */
722 
723 public setinput(filename)
724 Filename filename;
725 {
726     File f;
727 
728     f = fopen(filename, "r");
729     if (f == nil) {
730 	error("can't open %s", filename);
731     } else {
732 	if (curinclindex >= MAXINCLDEPTH) {
733 	    error("unreasonable input nesting on \"%s\"", filename);
734 	}
735 	inclinfo[curinclindex].savefile = in;
736 	inclinfo[curinclindex].savefn = errfilename;
737 	inclinfo[curinclindex].savelineno = errlineno;
738 	curinclindex++;
739 	in = f;
740 	errfilename = filename;
741 	errlineno = 1;
742     }
743 }
744 
745 private Boolean eofinput()
746 {
747     register Boolean b;
748 
749     if (curinclindex == 0) {
750 	if (isterm(in)) {
751 	    putchar('\n');
752 	    clearerr(in);
753 	    b = false;
754 	} else {
755 	    b = true;
756 	}
757     } else {
758 	fclose(in);
759 	--curinclindex;
760 	in = inclinfo[curinclindex].savefile;
761 	errfilename = inclinfo[curinclindex].savefn;
762 	errlineno = inclinfo[curinclindex].savelineno;
763 	b = false;
764     }
765     return b;
766 }
767 
768 /*
769  * Pop the current input.  Return whether successful.
770  */
771 
772 public Boolean popinput()
773 {
774     Boolean b;
775 
776     if (curinclindex == 0) {
777 	b = false;
778     } else {
779 	b = (Boolean) (not eofinput());
780     }
781     return b;
782 }
783 
784 /*
785  * Return whether we are currently reading from standard input.
786  */
787 
788 public Boolean isstdin()
789 {
790     return (Boolean) (in == stdin);
791 }
792 
793 /*
794  * Send the current line to the shell.
795  */
796 
797 public shellline()
798 {
799     register char *p;
800 
801     p = curchar;
802     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
803 	++p;
804     }
805     shell(p);
806     if (*p == '\0' and isterm(in)) {
807 	putchar('\n');
808     }
809     erecover();
810 }
811 
812 /*
813  * Read the rest of the current line in "shell mode".
814  */
815 
816 public beginshellmode()
817 {
818     shellmode = true;
819 }
820 
821 /*
822  * Print out a token for debugging.
823  */
824 
825 public print_token(f, t)
826 File f;
827 Token t;
828 {
829     if (t == '\n') {
830 	fprintf(f, "char '\\n'");
831     } else if (t == EOF) {
832 	fprintf(f, "EOF");
833     } else if (t < 256) {
834 	fprintf(f, "char '%c'", t);
835     } else {
836 	fprintf(f, "\"%s\"", keywdstring(t));
837     }
838 }
839