xref: /original-bsd/old/dbx/scanner.c (revision f0fd5f8a)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static char sccsid[] = "@(#)scanner.c 1.2 12/15/82";
4 
5 /*
6  * Debugger scanner.
7  */
8 
9 #include "defs.h"
10 #include "scanner.h"
11 #include "main.h"
12 #include "keywords.h"
13 #include "tree.h"
14 #include "symbols.h"
15 #include "names.h"
16 #include "y.tab.h"
17 
18 #ifndef public
19 typedef int Token;
20 #endif
21 
22 public String initfile = ".dbxinit";
23 
24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25 
26 private Charclass class[256 + 1];
27 private Charclass *lexclass = class + 1;
28 
29 #define isdigit(c) (lexclass[c] == NUM)
30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31 #define ishexdigit(c) ( \
32     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33 )
34 
35 #define MAXLINESIZE 1024
36 
37 private File in;
38 private Char linebuf[MAXLINESIZE];
39 private Char *curchar;
40 
41 #define MAXINCLDEPTH 10
42 
43 private struct {
44     File savefile;
45     Filename savefn;
46     int savelineno;
47 } inclinfo[MAXINCLDEPTH];
48 
49 private unsigned int curinclindex;
50 
51 private Boolean firsttoken = true;
52 private Boolean firstinit = true;
53 
54 private Token getident();
55 private Token getnum();
56 private Token getstring();
57 private Boolean eofinput();
58 private Char charcon();
59 private Char charlookup();
60 
61 private enterlexclass(class, s)
62 Charclass class;
63 String s;
64 {
65     register char *p;
66 
67     for (p = s; *p != '\0'; p++) {
68 	lexclass[*p] = class;
69     }
70 }
71 
72 public scanner_init()
73 {
74     register Integer i;
75 
76     for (i = 0; i < 257; i++) {
77 	class[i] = OTHER;
78     }
79     enterlexclass(WHITE, " \t");
80     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
81     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
82     enterlexclass(NUM, "0123456789");
83     in = stdin;
84     errfilename = nil;
85     errlineno = 0;
86     curchar = linebuf;
87     linebuf[0] = '\0';
88     if (runfirst) {
89 	firstinit = false;
90 	firsttoken = false;
91     } else if (firstinit and isterm(in)) {
92 	firstinit = false;
93 	printf("> ");
94 	fflush(stdout);
95     }
96 }
97 
98 /*
99  * Read a single token.
100  *
101  * Input is line buffered.
102  *
103  * There are two "modes" of operation:  one as in a compiler,
104  * and one for reading shell-like syntax.
105  */
106 
107 private Boolean shellmode;
108 
109 public Token yylex()
110 {
111     register int c;
112     register char *p;
113     register Token t;
114     String line;
115 
116     p = curchar;
117     if (*p == '\0') {
118 	do {
119 	    if (isterm(in)) {
120 		if (firsttoken) {
121 		    firsttoken = false;
122 		} else {
123 		    printf("> ");
124 		    fflush(stdout);
125 		}
126 	    }
127 	    line = fgets(linebuf, MAXLINESIZE, in);
128 	} while (line == nil and not eofinput());
129 	if (line == nil) {
130 	    c = EOF;
131 	} else {
132 	    p = linebuf;
133 	    while (lexclass[*p] == WHITE) {
134 		p++;
135 	    }
136 	    shellmode = false;
137 	}
138     } else {
139 	while (lexclass[*p] == WHITE) {
140 	    p++;
141 	}
142     }
143     curchar = p;
144     c = *p;
145     if (lexclass[c] == ALPHA) {
146 	t = getident();
147     } else if (lexclass[c] == NUM) {
148 	t = getnum();
149     } else {
150 	++curchar;
151 	switch (c) {
152 	    case '\n':
153 		t = '\n';
154 		if (errlineno != 0) {
155 		    errlineno++;
156 		}
157 		break;
158 
159 	    case '"':
160 	    case '\'':
161 		t = getstring();
162 		break;
163 
164 	    case '.':
165 		if (shellmode) {
166 		    --curchar;
167 		    t = getident();
168 		} else if (isdigit(*curchar)) {
169 		    --curchar;
170 		    t = getnum();
171 		} else {
172 		    t = '.';
173 		}
174 		break;
175 
176 	    case '<':
177 		if (not shellmode and *curchar == '<') {
178 		    ++curchar;
179 		    t = LFORMER;
180 		} else {
181 		    t = '<';
182 		}
183 		break;
184 
185 	    case '>':
186 		if (not shellmode and *curchar == '>') {
187 		    ++curchar;
188 		    t = RFORMER;
189 		} else {
190 		    t = '>';
191 		}
192 		break;
193 
194 	    case '#':
195 		if (*curchar == '^') {
196 		    ++curchar;
197 		    t = ABSTRACTION;
198 		} else {
199 		    t = '#';
200 		}
201 		break;
202 
203 	    case '-':
204 		if (shellmode) {
205 		    --curchar;
206 		    t = getident();
207 		} else if (*curchar == '>') {
208 		    ++curchar;
209 		    t = ARROW;
210 		} else {
211 		    t = '-';
212 		}
213 		break;
214 
215 	    case EOF:
216 		t = 0;
217 		break;
218 
219 	    default:
220 		if (shellmode and index("!&*()[]", c) == nil) {
221 		    --curchar;
222 		    t = getident();
223 		} else {
224 		    t = c;
225 		}
226 		break;
227 	}
228     }
229 #   ifdef LEXDEBUG
230 	if (lexdebug) {
231 	    fprintf(stderr, "yylex returns ");
232 	    print_token(stderr, t);
233 	    fprintf(stderr, "\n");
234 	}
235 #   endif
236     return t;
237 }
238 
239 /*
240  * Parser error handling.
241  */
242 
243 public yyerror(s)
244 String s;
245 {
246     register Char *p, *tokenbegin, *tokenend;
247     register Integer len;
248 
249     if (streq(s, "syntax error")) {
250 	beginerrmsg();
251 	tokenend = curchar - 1;
252 	tokenbegin = tokenend;
253 	while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
254 	    --tokenbegin;
255 	}
256 	len = tokenend - tokenbegin + 1;
257 	p = tokenbegin;
258 	if (p > &linebuf[0]) {
259 	    while (lexclass[*p] == WHITE and p > &linebuf[0]) {
260 		--p;
261 	    }
262 	}
263 	if (p == &linebuf[0]) {
264 	    fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
265 	} else {
266 	    fprintf(stderr, "syntax error");
267 	    if (len != 0) {
268 		fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
269 	    }
270 	}
271 	enderrmsg();
272     } else {
273 	error(s);
274     }
275 }
276 
277 /*
278  * Eat the current line.
279  */
280 
281 public gobble()
282 {
283     curchar = linebuf;
284     linebuf[0] = '\0';
285 }
286 
287 /*
288  * Scan an identifier and check to see if it's a keyword.
289  */
290 
291 private Token getident()
292 {
293     char buf[256];
294     register Char *p, *q;
295     register Token t;
296 
297     p = curchar;
298     q = buf;
299     if (shellmode) {
300 	do {
301 	    *q++ = *p++;
302 	} while (index(" \t\n!&<>*[]()", *p) == nil);
303     } else {
304 	do {
305 	    *q++ = *p++;
306 	} while (isalnum(*p));
307     }
308     curchar = p;
309     *q = '\0';
310     yylval.y_name = identname(buf, false);
311     if (not shellmode) {
312 	t = findkeyword(yylval.y_name);
313 	if (t == nil) {
314 	    t = NAME;
315 	}
316     } else {
317 	t = NAME;
318     }
319     return t;
320 }
321 
322 /*
323  * Scan a number.
324  */
325 
326 private Token getnum()
327 {
328     char buf[256];
329     register Char *p, *q;
330     register Token t;
331     Integer base;
332 
333     p = curchar;
334     q = buf;
335     if (*p == '0') {
336 	if (*(p+1) == 'x') {
337 	    p += 2;
338 	    base = 16;
339 	} else {
340 	    base = 8;
341 	}
342     } else {
343 	base = 10;
344     }
345     if (base == 16) {
346 	do {
347 	    *q++ = *p++;
348 	} while (ishexdigit(*p));
349     } else {
350 	do {
351 	    *q++ = *p++;
352 	} while (isdigit(*p));
353     }
354     if (*p == '.') {
355 	do {
356 	    *q++ = *p++;
357 	} while (isdigit(*p));
358 	if (*p == 'e' or *p == 'E') {
359 	    p++;
360 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
361 		*q++ = 'e';
362 		do {
363 		    *q++ = *p++;
364 		} while (isdigit(*p));
365 	    }
366 	}
367 	*q = '\0';
368 	yylval.y_real = atof(buf);
369 	t = REAL;
370     } else {
371 	*q = '\0';
372 	switch (base) {
373 	    case 10:
374 		yylval.y_int = atol(buf);
375 		break;
376 
377 	    case 8:
378 		yylval.y_int = octal(buf);
379 		break;
380 
381 	    case 16:
382 		yylval.y_int = hex(buf);
383 		break;
384 
385 	    default:
386 		badcaseval(base);
387 	}
388 	t = INT;
389     }
390     curchar = p;
391     return t;
392 }
393 
394 /*
395  * Convert a string of octal digits to an integer.
396  */
397 
398 private int octal(s)
399 String s;
400 {
401     register Char *p;
402     register Integer n;
403 
404     n = 0;
405     for (p = s; *p != '\0'; p++) {
406 	n = 8*n + (*p - '0');
407     }
408     return n;
409 }
410 
411 /*
412  * Convert a string of hexadecimal digits to an integer.
413  */
414 
415 private int hex(s)
416 String s;
417 {
418     register Char *p;
419     register Integer n;
420 
421     n = 0;
422     for (p = s; *p != '\0'; p++) {
423 	n *= 16;
424 	if (*p >= 'a' and *p <= 'f') {
425 	    n += (*p - 'a' + 10);
426 	} else if (*p >= 'A' and *p <= 'F') {
427 	    n += (*p - 'A' + 10);
428 	} else {
429 	    n += (*p - '0');
430 	}
431     }
432     return n;
433 }
434 
435 /*
436  * Scan a string.
437  */
438 
439 private Token getstring()
440 {
441     char buf[256];
442     register Char *p, *q;
443     Boolean endofstring;
444 
445     p = curchar;
446     q = buf;
447     endofstring = false;
448     while (not endofstring) {
449 	if (*p == '\n' or *p == '\0') {
450 	    error("non-terminated string");
451 	    endofstring = true;
452 	} else if (*p == '"') {
453 	    if (*(p+1) != '"') {
454 		endofstring = true;
455 	    } else {
456 		*q++ = *p;
457 	    }
458 	} else {
459 	    *q++ = charcon(*p);
460 	}
461 	p++;
462     }
463     curchar = p;
464     *q = '\0';
465     yylval.y_string = strdup(buf);
466     return STRING;
467 }
468 
469 /*
470  * Process a character constant.
471  * Watch out for backslashes.
472  */
473 
474 private Char charcon(ch)
475 Char ch;
476 {
477     Char c, buf[10], *p, *q;
478 
479     p = curchar;
480     if (ch == '\\') {
481 	if (*p != '\\') {
482 	    q = buf;
483 	    do {
484 		*q++ = *p++;
485 	    } while (*p != '\\' and *p != '\n' and *p != '\0');
486 	    if (*p != '\\') {
487 		ungetc(*p, in);
488 		error("non-terminated character constant");
489 	    }
490 	    *q = '\0';
491 	    if (isdigit(buf[0])) {
492 		c = (Char) octal(buf);
493 	    } else {
494 		c = charlookup(buf);
495 	    }
496 	    curchar = p;
497 	} else {
498 	    c = '\\';
499 	}
500     } else {
501 	c = ch;
502     }
503     return c;
504 }
505 
506 /*
507  * Do a lookup for a ASCII character name.
508  */
509 
510 private String ascii[] = {
511     "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
512     "BS",  "HT",  "NL",  "VT",  "NP",  "CR",  "SO",  "SI",
513     "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
514     "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
515     "SP", nil
516 };
517 
518 private char charlookup(s)
519 String s;
520 {
521     register int i;
522 
523     for (i = 0; ascii[i] != NULL; i++) {
524 	if (streq(s, ascii[i])) {
525 	    return i;
526 	}
527     }
528     if (streq(s, "DEL")) {
529 	return 0177;
530     }
531     error("unknown ascii name \"%s\"", s);
532     return '?';
533 }
534 
535 /*
536  * Input file management routines.
537  */
538 
539 public setinput(filename)
540 Filename filename;
541 {
542     File f;
543 
544     f = fopen(filename, "r");
545     if (f == nil) {
546 	error("can't open %s", filename);
547     } else {
548 	if (curinclindex >= MAXINCLDEPTH) {
549 	    error("unreasonable input nesting on \"%s\"", filename);
550 	}
551 	inclinfo[curinclindex].savefile = in;
552 	inclinfo[curinclindex].savefn = errfilename;
553 	inclinfo[curinclindex].savelineno = errlineno;
554 	curinclindex++;
555 	in = f;
556 	errfilename = filename;
557 	errlineno = 1;
558     }
559 }
560 
561 private Boolean eofinput()
562 {
563     register Boolean b;
564 
565     if (curinclindex == 0) {
566 	if (isterm(in)) {
567 	    putchar('\n');
568 	    b = false;
569 	} else {
570 	    b = true;
571 	}
572     } else {
573 	fclose(in);
574 	--curinclindex;
575 	in = inclinfo[curinclindex].savefile;
576 	errfilename = inclinfo[curinclindex].savefn;
577 	errlineno = inclinfo[curinclindex].savelineno;
578 	b = false;
579     }
580     return b;
581 }
582 
583 /*
584  * Pop the current input.  Return whether successful.
585  */
586 
587 public Boolean popinput()
588 {
589     Boolean b;
590 
591     if (curinclindex == 0) {
592 	b = false;
593     } else {
594 	b = (Boolean) (not eofinput());
595     }
596     return b;
597 }
598 
599 /*
600  * Return whether we are currently reading from standard input.
601  */
602 
603 public Boolean isstdin()
604 {
605     return (Boolean) (in == stdin);
606 }
607 
608 /*
609  * Send the current line to the shell.
610  */
611 
612 public shellline()
613 {
614     register char *p;
615 
616     p = curchar;
617     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
618 	++p;
619     }
620     shell(p);
621     if (*p == '\0' and isterm(in)) {
622 	putchar('\n');
623     }
624     erecover();
625 }
626 
627 /*
628  * Read the rest of the current line in "shell mode".
629  */
630 
631 public beginshellmode()
632 {
633     shellmode = true;
634 }
635 
636 /*
637  * Print out a token for debugging.
638  */
639 
640 public print_token(f, t)
641 File f;
642 Token t;
643 {
644     if (t == '\n') {
645 	fprintf(f, "char '\\n'");
646     } else if (t == EOF) {
647 	fprintf(f, "EOF");
648     } else if (t < 256) {
649 	fprintf(f, "char '%c'", t);
650     } else {
651 	fprintf(f, "\"%s\"", keywdstring(t));
652     }
653 }
654