xref: /original-bsd/old/dbx/scanner.c (revision e74403ba)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static char sccsid[] = "@(#)scanner.c 1.8 08/05/83";
4 
5 /*
6  * Debugger scanner.
7  */
8 
9 #include "defs.h"
10 #include "scanner.h"
11 #include "main.h"
12 #include "keywords.h"
13 #include "tree.h"
14 #include "symbols.h"
15 #include "names.h"
16 #include "y.tab.h"
17 
18 #ifndef public
19 typedef int Token;
20 #endif
21 
22 public String initfile = ".dbxinit";
23 
24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25 
26 private Charclass class[256 + 1];
27 private Charclass *lexclass = class + 1;
28 
29 #define isdigit(c) (lexclass[c] == NUM)
30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31 #define ishexdigit(c) ( \
32     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33 )
34 
35 #define MAXLINESIZE 1024
36 
37 private File in;
38 private Char linebuf[MAXLINESIZE];
39 private Char *curchar;
40 
41 #define MAXINCLDEPTH 10
42 
43 private struct {
44     File savefile;
45     Filename savefn;
46     int savelineno;
47 } inclinfo[MAXINCLDEPTH];
48 
49 private unsigned int curinclindex;
50 
51 private Token getident();
52 private Token getnum();
53 private Token getstring();
54 private Boolean eofinput();
55 private Char charcon();
56 private Char charlookup();
57 
58 private enterlexclass(class, s)
59 Charclass class;
60 String s;
61 {
62     register char *p;
63 
64     for (p = s; *p != '\0'; p++) {
65 	lexclass[*p] = class;
66     }
67 }
68 
69 public scanner_init()
70 {
71     register Integer i;
72 
73     for (i = 0; i < 257; i++) {
74 	class[i] = OTHER;
75     }
76     enterlexclass(WHITE, " \t");
77     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
78     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
79     enterlexclass(NUM, "0123456789");
80     in = stdin;
81     errfilename = nil;
82     errlineno = 0;
83     curchar = linebuf;
84     linebuf[0] = '\0';
85 }
86 
87 /*
88  * Read a single token.
89  *
90  * Input is line buffered.
91  *
92  * There are two "modes" of operation:  one as in a compiler,
93  * and one for reading shell-like syntax.
94  */
95 
96 private Boolean shellmode;
97 
98 public Token yylex()
99 {
100     register int c;
101     register char *p;
102     register Token t;
103     String line;
104 
105     p = curchar;
106     if (*p == '\0') {
107 	do {
108 	    if (isterm(in)) {
109 		printf("(%s) ", cmdname);
110 		fflush(stdout);
111 	    }
112 	    line = fgets(linebuf, MAXLINESIZE, in);
113 	} while (line == nil and not eofinput());
114 	if (line == nil) {
115 	    c = EOF;
116 	} else {
117 	    p = linebuf;
118 	    while (lexclass[*p] == WHITE) {
119 		p++;
120 	    }
121 	    shellmode = false;
122 	}
123     } else {
124 	while (lexclass[*p] == WHITE) {
125 	    p++;
126 	}
127     }
128     curchar = p;
129     c = *p;
130     if (lexclass[c] == ALPHA) {
131 	t = getident();
132     } else if (lexclass[c] == NUM) {
133 	if (shellmode) {
134 	    t = getident();
135 	} else {
136 	    t = getnum();
137 	}
138     } else {
139 	++curchar;
140 	switch (c) {
141 	    case '\n':
142 		t = '\n';
143 		if (errlineno != 0) {
144 		    errlineno++;
145 		}
146 		break;
147 
148 	    case '"':
149 	    case '\'':
150 		t = getstring();
151 		break;
152 
153 	    case '.':
154 		if (shellmode) {
155 		    --curchar;
156 		    t = getident();
157 		} else if (isdigit(*curchar)) {
158 		    --curchar;
159 		    t = getnum();
160 		} else {
161 		    t = '.';
162 		}
163 		break;
164 
165 	    case '<':
166 		if (not shellmode and *curchar == '<') {
167 		    ++curchar;
168 		    t = LFORMER;
169 		} else {
170 		    t = '<';
171 		}
172 		break;
173 
174 	    case '>':
175 		if (not shellmode and *curchar == '>') {
176 		    ++curchar;
177 		    t = RFORMER;
178 		} else {
179 		    t = '>';
180 		}
181 		break;
182 
183 	    case '#':
184 		if (*curchar == '^') {
185 		    ++curchar;
186 		    t = ABSTRACTION;
187 		} else {
188 		    t = '#';
189 		}
190 		break;
191 
192 	    case '-':
193 		if (shellmode) {
194 		    --curchar;
195 		    t = getident();
196 		} else if (*curchar == '>') {
197 		    ++curchar;
198 		    t = ARROW;
199 		} else {
200 		    t = '-';
201 		}
202 		break;
203 
204 	    case EOF:
205 		t = 0;
206 		break;
207 
208 	    default:
209 		if (shellmode and index("!&*()[]", c) == nil) {
210 		    --curchar;
211 		    t = getident();
212 		} else {
213 		    t = c;
214 		}
215 		break;
216 	}
217     }
218 #   ifdef LEXDEBUG
219 	if (lexdebug) {
220 	    fprintf(stderr, "yylex returns ");
221 	    print_token(stderr, t);
222 	    fprintf(stderr, "\n");
223 	}
224 #   endif
225     return t;
226 }
227 
228 /*
229  * Parser error handling.
230  */
231 
232 public yyerror(s)
233 String s;
234 {
235     register Char *p, *tokenbegin, *tokenend;
236     register Integer len;
237 
238     if (streq(s, "syntax error")) {
239 	beginerrmsg();
240 	tokenend = curchar - 1;
241 	tokenbegin = tokenend;
242 	while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
243 	    --tokenbegin;
244 	}
245 	len = tokenend - tokenbegin + 1;
246 	p = tokenbegin;
247 	if (p > &linebuf[0]) {
248 	    while (lexclass[*p] == WHITE and p > &linebuf[0]) {
249 		--p;
250 	    }
251 	}
252 	if (p == &linebuf[0]) {
253 	    fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
254 	} else {
255 	    fprintf(stderr, "syntax error");
256 	    if (len != 0) {
257 		fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
258 	    }
259 	}
260 	enderrmsg();
261     } else {
262 	error(s);
263     }
264 }
265 
266 /*
267  * Eat the current line.
268  */
269 
270 public gobble()
271 {
272     curchar = linebuf;
273     linebuf[0] = '\0';
274 }
275 
276 /*
277  * Scan an identifier and check to see if it's a keyword.
278  */
279 
280 private Token getident()
281 {
282     char buf[256];
283     register Char *p, *q;
284     register Token t;
285 
286     p = curchar;
287     q = buf;
288     if (shellmode) {
289 	do {
290 	    *q++ = *p++;
291 	} while (index(" \t\n!&<>*[]()", *p) == nil);
292     } else {
293 	do {
294 	    *q++ = *p++;
295 	} while (isalnum(*p));
296     }
297     curchar = p;
298     *q = '\0';
299     yylval.y_name = identname(buf, false);
300     if (not shellmode) {
301 	t = findkeyword(yylval.y_name);
302 	if (t == nil) {
303 	    t = NAME;
304 	}
305     } else {
306 	t = NAME;
307     }
308     return t;
309 }
310 
311 /*
312  * Scan a number.
313  */
314 
315 private Token getnum()
316 {
317     char buf[256];
318     register Char *p, *q;
319     register Token t;
320     Integer base;
321 
322     p = curchar;
323     q = buf;
324     if (*p == '0') {
325 	if (*(p+1) == 'x') {
326 	    p += 2;
327 	    base = 16;
328 	} else {
329 	    base = 8;
330 	}
331     } else {
332 	base = 10;
333     }
334     if (base == 16) {
335 	do {
336 	    *q++ = *p++;
337 	} while (ishexdigit(*p));
338     } else {
339 	do {
340 	    *q++ = *p++;
341 	} while (isdigit(*p));
342     }
343     if (*p == '.') {
344 	do {
345 	    *q++ = *p++;
346 	} while (isdigit(*p));
347 	if (*p == 'e' or *p == 'E') {
348 	    p++;
349 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
350 		*q++ = 'e';
351 		do {
352 		    *q++ = *p++;
353 		} while (isdigit(*p));
354 	    }
355 	}
356 	*q = '\0';
357 	yylval.y_real = atof(buf);
358 	t = REAL;
359     } else {
360 	*q = '\0';
361 	switch (base) {
362 	    case 10:
363 		yylval.y_int = atol(buf);
364 		break;
365 
366 	    case 8:
367 		yylval.y_int = octal(buf);
368 		break;
369 
370 	    case 16:
371 		yylval.y_int = hex(buf);
372 		break;
373 
374 	    default:
375 		badcaseval(base);
376 	}
377 	t = INT;
378     }
379     curchar = p;
380     return t;
381 }
382 
383 /*
384  * Convert a string of octal digits to an integer.
385  */
386 
387 private int octal(s)
388 String s;
389 {
390     register Char *p;
391     register Integer n;
392 
393     n = 0;
394     for (p = s; *p != '\0'; p++) {
395 	n = 8*n + (*p - '0');
396     }
397     return n;
398 }
399 
400 /*
401  * Convert a string of hexadecimal digits to an integer.
402  */
403 
404 private int hex(s)
405 String s;
406 {
407     register Char *p;
408     register Integer n;
409 
410     n = 0;
411     for (p = s; *p != '\0'; p++) {
412 	n *= 16;
413 	if (*p >= 'a' and *p <= 'f') {
414 	    n += (*p - 'a' + 10);
415 	} else if (*p >= 'A' and *p <= 'F') {
416 	    n += (*p - 'A' + 10);
417 	} else {
418 	    n += (*p - '0');
419 	}
420     }
421     return n;
422 }
423 
424 /*
425  * Scan a string.
426  */
427 
428 private Token getstring()
429 {
430     char buf[256];
431     register Char *p, *q;
432     Boolean endofstring;
433 
434     p = curchar;
435     q = buf;
436     endofstring = false;
437     while (not endofstring) {
438 	if (*p == '\n' or *p == '\0') {
439 	    error("non-terminated string");
440 	    endofstring = true;
441 	} else if (*p == '"' or *p == '\'') {
442 	    if (*(p+1) != *p) {
443 		endofstring = true;
444 	    } else {
445 		*q++ = *p;
446 	    }
447 	} else {
448 	    curchar = p;
449 	    *q++ = charcon(p);
450 	    p = curchar;
451 	}
452 	p++;
453     }
454     curchar = p;
455     *q = '\0';
456     yylval.y_string = strdup(buf);
457     return STRING;
458 }
459 
460 /*
461  * Process a character constant.
462  * Watch out for backslashes.
463  */
464 
465 private Char charcon(p)
466 char *p;
467 {
468     char c, buf[10], *q;
469 
470     if (*p == '\\') {
471 	++p;
472 	if (*p != '\\') {
473 	    q = buf;
474 	    do {
475 		*q++ = *p++;
476 	    } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
477 	    *q = '\0';
478 	    if (isdigit(buf[0])) {
479 		c = (Char) octal(buf);
480 	    } else {
481 		c = charlookup(buf);
482 	    }
483 	    curchar = p - 1;
484 	} else {
485 	    c = '\\';
486 	}
487     } else {
488 	c = *p;
489     }
490     return c;
491 }
492 
493 /*
494  * Do a lookup for a ASCII character name.
495  */
496 
497 private String ascii[] = {
498     "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
499     "BS",  "HT",  "NL",  "VT",  "NP",  "CR",  "SO",  "SI",
500     "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
501     "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
502     "SP", nil
503 };
504 
505 private char charlookup(s)
506 String s;
507 {
508     register int i;
509 
510     for (i = 0; ascii[i] != NULL; i++) {
511 	if (streq(s, ascii[i])) {
512 	    return i;
513 	}
514     }
515     if (streq(s, "DEL")) {
516 	return 0177;
517     }
518     error("unknown ascii name \"%s\"", s);
519     return '?';
520 }
521 
522 /*
523  * Input file management routines.
524  */
525 
526 public setinput(filename)
527 Filename filename;
528 {
529     File f;
530 
531     f = fopen(filename, "r");
532     if (f == nil) {
533 	error("can't open %s", filename);
534     } else {
535 	if (curinclindex >= MAXINCLDEPTH) {
536 	    error("unreasonable input nesting on \"%s\"", filename);
537 	}
538 	inclinfo[curinclindex].savefile = in;
539 	inclinfo[curinclindex].savefn = errfilename;
540 	inclinfo[curinclindex].savelineno = errlineno;
541 	curinclindex++;
542 	in = f;
543 	errfilename = filename;
544 	errlineno = 1;
545     }
546 }
547 
548 private Boolean eofinput()
549 {
550     register Boolean b;
551 
552     if (curinclindex == 0) {
553 	if (isterm(in)) {
554 	    putchar('\n');
555 	    clearerr(in);
556 	    b = false;
557 	} else {
558 	    b = true;
559 	}
560     } else {
561 	fclose(in);
562 	--curinclindex;
563 	in = inclinfo[curinclindex].savefile;
564 	errfilename = inclinfo[curinclindex].savefn;
565 	errlineno = inclinfo[curinclindex].savelineno;
566 	b = false;
567     }
568     return b;
569 }
570 
571 /*
572  * Pop the current input.  Return whether successful.
573  */
574 
575 public Boolean popinput()
576 {
577     Boolean b;
578 
579     if (curinclindex == 0) {
580 	b = false;
581     } else {
582 	b = (Boolean) (not eofinput());
583     }
584     return b;
585 }
586 
587 /*
588  * Return whether we are currently reading from standard input.
589  */
590 
591 public Boolean isstdin()
592 {
593     return (Boolean) (in == stdin);
594 }
595 
596 /*
597  * Send the current line to the shell.
598  */
599 
600 public shellline()
601 {
602     register char *p;
603 
604     p = curchar;
605     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
606 	++p;
607     }
608     shell(p);
609     if (*p == '\0' and isterm(in)) {
610 	putchar('\n');
611     }
612     erecover();
613 }
614 
615 /*
616  * Read the rest of the current line in "shell mode".
617  */
618 
619 public beginshellmode()
620 {
621     shellmode = true;
622 }
623 
624 /*
625  * Print out a token for debugging.
626  */
627 
628 public print_token(f, t)
629 File f;
630 Token t;
631 {
632     if (t == '\n') {
633 	fprintf(f, "char '\\n'");
634     } else if (t == EOF) {
635 	fprintf(f, "EOF");
636     } else if (t < 256) {
637 	fprintf(f, "char '%c'", t);
638     } else {
639 	fprintf(f, "\"%s\"", keywdstring(t));
640     }
641 }
642