xref: /original-bsd/old/dbx/scanner.c (revision dfa70498)
1 /*
2  * Copyright (c) 1983 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  */
17 
18 #ifndef lint
19 static char sccsid[] = "@(#)scanner.c	5.2 (Berkeley) 05/23/89";
20 #endif /* not lint */
21 
22 /*
23  * Debugger scanner.
24  */
25 
26 #include "defs.h"
27 #include "scanner.h"
28 #include "main.h"
29 #include "keywords.h"
30 #include "tree.h"
31 #include "symbols.h"
32 #include "names.h"
33 #include "y.tab.h"
34 
35 #ifndef public
36 typedef int Token;
37 
38 #define MAXLINESIZE 10240
39 
40 #endif
41 
42 public String initfile = ".dbxinit";
43 
44 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
45 
46 private Charclass class[256 + 1];
47 private Charclass *lexclass = class + 1;
48 
49 #define isdigit(c) (lexclass[c] == NUM)
50 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
51 #define ishexdigit(c) ( \
52     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
53 )
54 
55 public boolean chkalias;
56 public char scanner_linebuf[MAXLINESIZE];
57 
58 private File in;
59 private char *curchar, *prevchar;
60 
61 #define MAXINCLDEPTH 10
62 
63 private struct {
64     File savefile;
65     Filename savefn;
66     int savelineno;
67 } inclinfo[MAXINCLDEPTH];
68 
69 private unsigned int curinclindex;
70 
71 private Token getident();
72 private Token getnum();
73 private Token getstring();
74 private Boolean eofinput();
75 private char charcon();
76 
77 private enterlexclass(class, s)
78 Charclass class;
79 String s;
80 {
81     register char *p;
82 
83     for (p = s; *p != '\0'; p++) {
84 	lexclass[*p] = class;
85     }
86 }
87 
88 public scanner_init()
89 {
90     register Integer i;
91 
92     for (i = 0; i < 257; i++) {
93 	class[i] = OTHER;
94     }
95     enterlexclass(WHITE, " \t");
96     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
97     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
98     enterlexclass(NUM, "0123456789");
99     in = stdin;
100     errfilename = nil;
101     errlineno = 0;
102     curchar = scanner_linebuf;
103     scanner_linebuf[0] = '\0';
104     chkalias = true;
105 }
106 
107 /*
108  * Read a single token.
109  *
110  * The input is line buffered.  Tokens cannot cross line boundaries.
111  *
112  * There are two "modes" of operation:  one as in a compiler,
113  * and one for reading shell-like syntax.  In the first mode
114  * there is the additional choice of doing alias processing.
115  */
116 
117 private Boolean shellmode;
118 
119 public Token yylex()
120 {
121     register int c;
122     register char *p;
123     register Token t;
124     String line;
125     integer n;
126 
127     p = curchar;
128     if (*p == '\0') {
129 	do {
130 	    if (isterm(in)) {
131 		printf("(%s) ", cmdname);
132 	    }
133 	    fflush(stdout);
134 	    line = fgets(scanner_linebuf, MAXLINESIZE, in);
135 	} while (line == nil and not eofinput());
136 	if (line == nil) {
137 	    c = EOF;
138 	} else {
139 	    p = scanner_linebuf;
140 	    while (lexclass[*p] == WHITE) {
141 		p++;
142 	    }
143 	    shellmode = false;
144 	}
145 	chkalias = true;
146     } else {
147 	while (lexclass[*p] == WHITE) {
148 	    p++;
149 	}
150     }
151     curchar = p;
152     prevchar = curchar;
153     c = *p;
154     if (lexclass[c] == ALPHA) {
155 	t = getident(chkalias);
156     } else if (lexclass[c] == NUM) {
157 	if (shellmode) {
158 	    t = getident(chkalias);
159 	} else {
160 	    t = getnum();
161 	}
162     } else {
163 	++curchar;
164 	switch (c) {
165 	    case '\n':
166 		t = '\n';
167 		if (errlineno != 0) {
168 		    errlineno++;
169 		}
170 		break;
171 
172 	    case '"':
173 	    case '\'':
174 		t = getstring(c);
175 		break;
176 
177 	    case '.':
178 		if (shellmode) {
179 		    --curchar;
180 		    t = getident(chkalias);
181 		} else if (isdigit(*curchar)) {
182 		    --curchar;
183 		    t = getnum();
184 		} else {
185 		    t = '.';
186 		}
187 		break;
188 
189 	    case '-':
190 		if (shellmode) {
191 		    --curchar;
192 		    t = getident(chkalias);
193 		} else if (*curchar == '>') {
194 		    ++curchar;
195 		    t = ARROW;
196 		} else {
197 		    t = '-';
198 		}
199 		break;
200 
201 	    case '#':
202 		if (not isterm(in)) {
203 		    *p = '\0';
204 		    curchar = p;
205 		    t = '\n';
206 		    ++errlineno;
207 		} else {
208 		    t = '#';
209 		}
210 		break;
211 
212 	    case '\\':
213 		if (*(p+1) == '\n') {
214 		    n = MAXLINESIZE - (p - &scanner_linebuf[0]);
215 		    if (n > 1) {
216 			if (fgets(p, n, in) == nil) {
217 			    t = 0;
218 			} else {
219 			    curchar = p;
220 			    t = yylex();
221 			}
222 		    } else {
223 			t = '\\';
224 		    }
225 		} else {
226 		    t = '\\';
227 		}
228 		break;
229 
230 	    case EOF:
231 		t = 0;
232 		break;
233 
234 	    default:
235 		if (shellmode and index("!&*<>()[]", c) == nil) {
236 		    --curchar;
237 		    t = getident(chkalias);
238 		} else {
239 		    t = c;
240 		}
241 		break;
242 	}
243     }
244     chkalias = false;
245 #   ifdef LEXDEBUG
246 	if (lexdebug) {
247 	    fprintf(stderr, "yylex returns ");
248 	    print_token(stderr, t);
249 	    fprintf(stderr, "\n");
250 	}
251 #   endif
252     return t;
253 }
254 
255 /*
256  * Put the given string before the current character
257  * in the current line, thus inserting it into the input stream.
258  */
259 
260 public insertinput (s)
261 String s;
262 {
263     register char *p, *q;
264     int need, avail, shift;
265 
266     q = s;
267     need = strlen(q);
268     avail = curchar - &scanner_linebuf[0];
269     if (need <= avail) {
270 	curchar = &scanner_linebuf[avail - need];
271 	p = curchar;
272 	while (*q != '\0') {
273 	    *p++ = *q++;
274 	}
275     } else {
276 	p = curchar;
277 	while (*p != '\0') {
278 	    ++p;
279 	}
280 	shift = need - avail;
281 	if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
282 	    error("alias expansion too large");
283 	}
284 	for (;;) {
285 	    *(p + shift) = *p;
286 	    if (p == curchar) {
287 		break;
288 	    }
289 	    --p;
290 	}
291 	p = &scanner_linebuf[0];
292 	while (*q != '\0') {
293 	    *p++ = *q++;
294 	}
295 	curchar = &scanner_linebuf[0];
296     }
297 }
298 
299 /*
300  * Get the actuals for a macro call.
301  */
302 
303 private String movetochar (str, c)
304 String str;
305 char c;
306 {
307     register char *p;
308 
309     while (*p != c) {
310 	if (*p == '\0') {
311 	    error("missing ')' in macro call");
312 	} else if (*p == ')') {
313 	    error("not enough parameters in macro call");
314 	} else if (*p == ',') {
315 	    error("too many parameters in macro call");
316 	}
317 	++p;
318     }
319     return p;
320 }
321 
322 private String *getactuals (n)
323 integer n;
324 {
325     String *a;
326     register char *p;
327     int i;
328 
329     a = newarr(String, n);
330     p = curchar;
331     while (*p != '(') {
332 	if (lexclass[*p] != WHITE) {
333 	    error("missing actuals for macro");
334 	}
335 	++p;
336     }
337     ++p;
338     for (i = 0; i < n - 1; i++) {
339 	a[i] = p;
340 	p = movetochar(p, ',');
341 	*p = '\0';
342 	++p;
343     }
344     a[n-1] = p;
345     p = movetochar(p, ')');
346     *p = '\0';
347     curchar = p + 1;
348     return a;
349 }
350 
351 /*
352  * Do command macro expansion, assuming curchar points to the beginning
353  * of the actuals, and we are not in shell mode.
354  */
355 
356 private expand (pl, str)
357 List pl;
358 String str;
359 {
360     char buf[4096], namebuf[100];
361     register char *p, *q, *r;
362     String *actual;
363     Name n;
364     integer i;
365     boolean match;
366 
367     if (pl == nil) {
368 	insertinput(str);
369     } else {
370 	actual = getactuals(list_size(pl));
371 	p = buf;
372 	q = str;
373 	while (*q != '\0') {
374 	    if (p >= &buf[4096]) {
375 		error("alias expansion too large");
376 	    }
377 	    if (lexclass[*q] == ALPHA) {
378 		r = namebuf;
379 		do {
380 		    *r++ = *q++;
381 		} while (isalnum(*q));
382 		*r = '\0';
383 		i = 0;
384 		match = false;
385 		foreach(Name, n, pl)
386 		    if (streq(ident(n), namebuf)) {
387 			match = true;
388 			break;
389 		    }
390 		    ++i;
391 		endfor
392 		if (match) {
393 		    r = actual[i];
394 		} else {
395 		    r = namebuf;
396 		}
397 		while (*r != '\0') {
398 		    *p++ = *r++;
399 		}
400 	    } else {
401 		*p++ = *q++;
402 	    }
403 	}
404 	*p = '\0';
405 	insertinput(buf);
406     }
407 }
408 
409 /*
410  * Parser error handling.
411  */
412 
413 public yyerror(s)
414 String s;
415 {
416     register char *p;
417     register integer start;
418 
419     if (streq(s, "syntax error")) {
420 	beginerrmsg();
421 	p = prevchar;
422 	start = p - &scanner_linebuf[0];
423 	if (p > &scanner_linebuf[0]) {
424 	    while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
425 		--p;
426 	    }
427 	}
428 	fprintf(stderr, "%s", scanner_linebuf);
429 	if (start != 0) {
430 	    fprintf(stderr, "%*c", start, ' ');
431 	}
432 	if (p == &scanner_linebuf[0]) {
433 	    fprintf(stderr, "^ unrecognized command");
434 	} else {
435 	    fprintf(stderr, "^ syntax error");
436 	}
437 	enderrmsg();
438     } else {
439 	error(s);
440     }
441 }
442 
443 /*
444  * Eat the current line.
445  */
446 
447 public gobble ()
448 {
449     curchar = scanner_linebuf;
450     scanner_linebuf[0] = '\0';
451 }
452 
453 /*
454  * Scan an identifier.
455  *
456  * If chkalias is true, check first to see if it's an alias.
457  * Otherwise, check to see if it's a keyword.
458  */
459 
460 private Token getident (chkalias)
461 boolean chkalias;
462 {
463     char buf[1024];
464     register char *p, *q;
465     register Token t;
466     List pl;
467     String str;
468 
469     p = curchar;
470     q = buf;
471     if (shellmode) {
472 	do {
473 	    *q++ = *p++;
474 	} while (index(" \t\n!&<>*[]()'\"", *p) == nil);
475     } else {
476 	do {
477 	    *q++ = *p++;
478 	} while (isalnum(*p));
479     }
480     curchar = p;
481     *q = '\0';
482     yylval.y_name = identname(buf, false);
483     if (chkalias) {
484 	if (findalias(yylval.y_name, &pl, &str)) {
485 	    expand(pl, str);
486 	    while (lexclass[*curchar] == WHITE) {
487 		++curchar;
488 	    }
489 	    if (pl == nil) {
490 		t = getident(false);
491 	    } else {
492 		t = getident(true);
493 	    }
494 	} else if (shellmode) {
495 	    t = NAME;
496 	} else {
497 	    t = findkeyword(yylval.y_name, NAME);
498 	}
499     } else if (shellmode) {
500 	t = NAME;
501     } else {
502 	t = findkeyword(yylval.y_name, NAME);
503     }
504     return t;
505 }
506 
507 /*
508  * Scan a number.
509  */
510 
511 private Token getnum()
512 {
513     char buf[1024];
514     register Char *p, *q;
515     register Token t;
516     Integer base;
517 
518     p = curchar;
519     q = buf;
520     if (*p == '0') {
521 	if (*(p+1) == 'x') {
522 	    p += 2;
523 	    base = 16;
524 	} else if (*(p+1) == 't') {
525 	    base = 10;
526 	} else if (varIsSet("$hexin")) {
527 	    base = 16;
528 	} else {
529 	    base = 8;
530 	}
531     } else if (varIsSet("$hexin")) {
532 	base = 16;
533     } else if (varIsSet("$octin")) {
534 	base = 8;
535     } else {
536 	base = 10;
537     }
538     if (base == 16) {
539 	do {
540 	    *q++ = *p++;
541 	} while (ishexdigit(*p));
542     } else {
543 	do {
544 	    *q++ = *p++;
545 	} while (isdigit(*p));
546     }
547     if (*p == '.') {
548 	do {
549 	    *q++ = *p++;
550 	} while (isdigit(*p));
551 	if (*p == 'e' or *p == 'E') {
552 	    p++;
553 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
554 		*q++ = 'e';
555 		do {
556 		    *q++ = *p++;
557 		} while (isdigit(*p));
558 	    }
559 	}
560 	*q = '\0';
561 	yylval.y_real = atof(buf);
562 	t = REAL;
563     } else {
564 	*q = '\0';
565 	switch (base) {
566 	    case 10:
567 		yylval.y_int = atol(buf);
568 		break;
569 
570 	    case 8:
571 		yylval.y_int = octal(buf);
572 		break;
573 
574 	    case 16:
575 		yylval.y_int = hex(buf);
576 		break;
577 
578 	    default:
579 		badcaseval(base);
580 	}
581 	t = INT;
582     }
583     curchar = p;
584     return t;
585 }
586 
587 /*
588  * Convert a string of octal digits to an integer.
589  */
590 
591 private int octal(s)
592 String s;
593 {
594     register Char *p;
595     register Integer n;
596 
597     n = 0;
598     for (p = s; *p != '\0'; p++) {
599 	n = 8*n + (*p - '0');
600     }
601     return n;
602 }
603 
604 /*
605  * Convert a string of hexadecimal digits to an integer.
606  */
607 
608 private int hex(s)
609 String s;
610 {
611     register Char *p;
612     register Integer n;
613 
614     n = 0;
615     for (p = s; *p != '\0'; p++) {
616 	n *= 16;
617 	if (*p >= 'a' and *p <= 'f') {
618 	    n += (*p - 'a' + 10);
619 	} else if (*p >= 'A' and *p <= 'F') {
620 	    n += (*p - 'A' + 10);
621 	} else {
622 	    n += (*p - '0');
623 	}
624     }
625     return n;
626 }
627 
628 /*
629  * Scan a string.
630  */
631 
632 private Token getstring (quote)
633 char quote;
634 {
635     register char *p, *q;
636     char buf[MAXLINESIZE];
637     boolean endofstring;
638     Token t;
639 
640     p = curchar;
641     q = buf;
642     endofstring = false;
643     while (not endofstring) {
644 	if (*p == '\\' and *(p+1) == '\n') {
645 	    if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
646 		error("non-terminated string");
647 	    }
648 	    p = &scanner_linebuf[0] - 1;
649 	} else if (*p == '\n' or *p == '\0') {
650 	    error("non-terminated string");
651 	    endofstring = true;
652 	} else if (*p == quote) {
653 	    endofstring = true;
654 	} else {
655 	    curchar = p;
656 	    *q++ = charcon(p);
657 	    p = curchar;
658 	}
659 	p++;
660     }
661     curchar = p;
662     *q = '\0';
663     if (quote == '\'' and buf[1] == '\0') {
664 	yylval.y_char = buf[0];
665 	t = CHAR;
666     } else {
667 	yylval.y_string = strdup(buf);
668 	t = STRING;
669     }
670     return t;
671 }
672 
673 /*
674  * Process a character constant.
675  * Watch out for backslashes.
676  */
677 
678 private char charcon (s)
679 String s;
680 {
681     register char *p, *q;
682     char c, buf[10];
683 
684     p = s;
685     if (*p == '\\') {
686 	++p;
687 	switch (*p) {
688 	    case '\\':
689 		c = '\\';
690 		break;
691 
692 	    case 'n':
693 		c = '\n';
694 		break;
695 
696 	    case 'r':
697 		c = '\r';
698 		break;
699 
700 	    case 't':
701 		c = '\t';
702 		break;
703 
704 	    case '\'':
705 	    case '"':
706 		c = *p;
707 		break;
708 
709 	    default:
710 		if (isdigit(*p)) {
711 		    q = buf;
712 		    do {
713 			*q++ = *p++;
714 		    } while (isdigit(*p));
715 		    *q = '\0';
716 		    c = (char) octal(buf);
717 		}
718 		--p;
719 		break;
720 	}
721 	curchar = p;
722     } else {
723 	c = *p;
724     }
725     return c;
726 }
727 
728 /*
729  * Input file management routines.
730  */
731 
732 public setinput(filename)
733 Filename filename;
734 {
735     File f;
736 
737     f = fopen(filename, "r");
738     if (f == nil) {
739 	error("can't open %s", filename);
740     } else {
741 	if (curinclindex >= MAXINCLDEPTH) {
742 	    error("unreasonable input nesting on \"%s\"", filename);
743 	}
744 	inclinfo[curinclindex].savefile = in;
745 	inclinfo[curinclindex].savefn = errfilename;
746 	inclinfo[curinclindex].savelineno = errlineno;
747 	curinclindex++;
748 	in = f;
749 	errfilename = filename;
750 	errlineno = 1;
751     }
752 }
753 
754 private Boolean eofinput()
755 {
756     register Boolean b;
757 
758     if (curinclindex == 0) {
759 	if (isterm(in)) {
760 	    putchar('\n');
761 	    clearerr(in);
762 	    b = false;
763 	} else {
764 	    b = true;
765 	}
766     } else {
767 	fclose(in);
768 	--curinclindex;
769 	in = inclinfo[curinclindex].savefile;
770 	errfilename = inclinfo[curinclindex].savefn;
771 	errlineno = inclinfo[curinclindex].savelineno;
772 	b = false;
773     }
774     return b;
775 }
776 
777 /*
778  * Pop the current input.  Return whether successful.
779  */
780 
781 public Boolean popinput()
782 {
783     Boolean b;
784 
785     if (curinclindex == 0) {
786 	b = false;
787     } else {
788 	b = (Boolean) (not eofinput());
789     }
790     return b;
791 }
792 
793 /*
794  * Return whether we are currently reading from standard input.
795  */
796 
797 public Boolean isstdin()
798 {
799     return (Boolean) (in == stdin);
800 }
801 
802 /*
803  * Send the current line to the shell.
804  */
805 
806 public shellline()
807 {
808     register char *p;
809 
810     p = curchar;
811     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
812 	++p;
813     }
814     shell(p);
815     if (*p == '\0' and isterm(in)) {
816 	putchar('\n');
817     }
818     erecover();
819 }
820 
821 /*
822  * Read the rest of the current line in "shell mode".
823  */
824 
825 public beginshellmode()
826 {
827     shellmode = true;
828 }
829 
830 /*
831  * Print out a token for debugging.
832  */
833 
834 public print_token(f, t)
835 File f;
836 Token t;
837 {
838     if (t == '\n') {
839 	fprintf(f, "char '\\n'");
840     } else if (t == EOF) {
841 	fprintf(f, "EOF");
842     } else if (t < 256) {
843 	fprintf(f, "char '%c'", t);
844     } else {
845 	fprintf(f, "\"%s\"", keywdstring(t));
846     }
847 }
848