xref: /original-bsd/usr.bin/window/scanner.c (revision 823023b8)
1 /*
2  * Copyright (c) 1983 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  */
17 
18 #ifndef lint
19 static char sccsid[] = "@(#)scanner.c	3.11 (Berkeley) 06/29/88";
20 #endif /* not lint */
21 
22 #include <stdio.h>
23 #include "value.h"
24 #include "token.h"
25 #include "context.h"
26 #include "string.h"
27 
28 s_getc()
29 {
30 	register c;
31 
32 	switch (cx.x_type) {
33 	case X_FILE:
34 		c = getc(cx.x_fp);
35 		if (cx.x_bol && c != EOF) {
36 			cx.x_bol = 0;
37 			cx.x_lineno++;
38 		}
39 		if (c == '\n')
40 			cx.x_bol = 1;
41 		return c;
42 	case X_BUF:
43 		if (*cx.x_bufp != 0)
44 			return *cx.x_bufp++ & 0xff;
45 		else
46 			return EOF;
47 	}
48 	/*NOTREACHED*/
49 }
50 
51 s_ungetc(c)
52 {
53 	if (c == EOF)
54 		return EOF;
55 	switch (cx.x_type) {
56 	case X_FILE:
57 		cx.x_bol = 0;
58 		return ungetc(c, cx.x_fp);
59 	case X_BUF:
60 		if (cx.x_bufp > cx.x_buf)
61 			return *--cx.x_bufp = c;
62 		else
63 			return EOF;
64 	}
65 	/*NOTREACHED*/
66 }
67 
68 s_gettok()
69 {
70 	char buf[100];
71 	register char *p = buf;
72 	register c;
73 	register state = 0;
74 
75 loop:
76 	c = s_getc();
77 	switch (state) {
78 	case 0:
79 		switch (c) {
80 		case ' ':
81 		case '\t':
82 			break;
83 		case '\n':
84 		case ';':
85 			cx.x_token = T_EOL;
86 			state = -1;
87 			break;
88 		case '#':
89 			state = 1;
90 			break;
91 		case EOF:
92 			cx.x_token = T_EOF;
93 			state = -1;
94 			break;
95 		case 'a': case 'b': case 'c': case 'd': case 'e':
96 		case 'f': case 'g': case 'h': case 'i': case 'j':
97 		case 'k': case 'l': case 'm': case 'n': case 'o':
98 		case 'p': case 'q': case 'r': case 's': case 't':
99 		case 'u': case 'v': case 'w': case 'x': case 'y':
100 		case 'z':
101 		case 'A': case 'B': case 'C': case 'D': case 'E':
102 		case 'F': case 'G': case 'H': case 'I': case 'J':
103 		case 'K': case 'L': case 'M': case 'N': case 'O':
104 		case 'P': case 'Q': case 'R': case 'S': case 'T':
105 		case 'U': case 'V': case 'W': case 'X': case 'Y':
106 		case 'Z':
107 		case '_': case '.':
108 			*p++ = c;
109 			state = 2;
110 			break;
111 		case '"':
112 			state = 3;
113 			break;
114 		case '\'':
115 			state = 4;
116 			break;
117 		case '\\':
118 			switch (c = s_gettok1()) {
119 			case -1:
120 				break;
121 			case -2:
122 				state = 0;
123 				break;
124 			default:
125 				*p++ = c;
126 				state = 2;
127 			}
128 			break;
129 		case '0':
130 			cx.x_val.v_num = 0;
131 			state = 10;
132 			break;
133 		case '1': case '2': case '3': case '4':
134 		case '5': case '6': case '7': case '8': case '9':
135 			cx.x_val.v_num = c - '0';
136 			state = 11;
137 			break;
138 		case '>':
139 			state = 20;
140 			break;
141 		case '<':
142 			state = 21;
143 			break;
144 		case '=':
145 			state = 22;
146 			break;
147 		case '!':
148 			state = 23;
149 			break;
150 		case '&':
151 			state = 24;
152 			break;
153 		case '|':
154 			state = 25;
155 			break;
156 		case '$':
157 			state = 26;
158 			break;
159 		case '~':
160 			cx.x_token = T_COMP;
161 			state = -1;
162 			break;
163 		case '+':
164 			cx.x_token = T_PLUS;
165 			state = -1;
166 			break;
167 		case '-':
168 			cx.x_token = T_MINUS;
169 			state = -1;
170 			break;
171 		case '*':
172 			cx.x_token = T_MUL;
173 			state = -1;
174 			break;
175 		case '/':
176 			cx.x_token = T_DIV;
177 			state = -1;
178 			break;
179 		case '%':
180 			cx.x_token = T_MOD;
181 			state = -1;
182 			break;
183 		case '^':
184 			cx.x_token = T_XOR;
185 			state = -1;
186 			break;
187 		case '(':
188 			cx.x_token = T_LP;
189 			state = -1;
190 			break;
191 		case ')':
192 			cx.x_token = T_RP;
193 			state = -1;
194 			break;
195 		case ',':
196 			cx.x_token = T_COMMA;
197 			state = -1;
198 			break;
199 		case '?':
200 			cx.x_token = T_QUEST;
201 			state = -1;
202 			break;
203 		case ':':
204 			cx.x_token = T_COLON;
205 			state = -1;
206 			break;
207 		case '[':
208 			cx.x_token = T_LB;
209 			state = -1;
210 			break;
211 		case ']':
212 			cx.x_token = T_RB;
213 			state = -1;
214 			break;
215 		default:
216 			cx.x_val.v_num = c;
217 			cx.x_token = T_CHAR;
218 			state = -1;
219 			break;
220 		}
221 		break;
222 	case 1:				/* got # */
223 		if (c == '\n' || c == EOF) {
224 			(void) s_ungetc(c);
225 			state = 0;
226 		}
227 		break;
228 	case 2:				/* unquoted string */
229 		switch (c) {
230 		case 'a': case 'b': case 'c': case 'd': case 'e':
231 		case 'f': case 'g': case 'h': case 'i': case 'j':
232 		case 'k': case 'l': case 'm': case 'n': case 'o':
233 		case 'p': case 'q': case 'r': case 's': case 't':
234 		case 'u': case 'v': case 'w': case 'x': case 'y':
235 		case 'z':
236 		case 'A': case 'B': case 'C': case 'D': case 'E':
237 		case 'F': case 'G': case 'H': case 'I': case 'J':
238 		case 'K': case 'L': case 'M': case 'N': case 'O':
239 		case 'P': case 'Q': case 'R': case 'S': case 'T':
240 		case 'U': case 'V': case 'W': case 'X': case 'Y':
241 		case 'Z':
242 		case '_': case '.':
243 		case '0': case '1': case '2': case '3': case '4':
244 		case '5': case '6': case '7': case '8': case '9':
245 			if (p < buf + sizeof buf - 1)
246 				*p++ = c;
247 			break;
248 		case '"':
249 			state = 3;
250 			break;
251 		case '\'':
252 			state = 4;
253 			break;
254 		case '\\':
255 			switch (c = s_gettok1()) {
256 			case -2:
257 				(void) s_ungetc(' ');
258 			case -1:
259 				break;
260 			default:
261 				if (p < buf + sizeof buf - 1)
262 					*p++ = c;
263 			}
264 			break;
265 		default:
266 			(void) s_ungetc(c);
267 		case EOF:
268 			*p = 0;
269 			cx.x_token = T_STR;
270 			switch (*buf) {
271 			case 'i':
272 				if (buf[1] == 'f' && buf[2] == 0)
273 					cx.x_token = T_IF;
274 				break;
275 			case 't':
276 				if (buf[1] == 'h' && buf[2] == 'e'
277 				    && buf[3] == 'n' && buf[4] == 0)
278 					cx.x_token = T_THEN;
279 				break;
280 			case 'e':
281 				if (buf[1] == 'n' && buf[2] == 'd'
282 				    && buf[3] == 'i' && buf[4] == 'f'
283 				    && buf[5] == 0)
284 					cx.x_token = T_ENDIF;
285 				else if (buf[1] == 'l' && buf[2] == 's')
286 					if (buf[3] == 'i' && buf[4] == 'f'
287 					    && buf[5] == 0)
288 						cx.x_token = T_ELSIF;
289 					else if (buf[3] == 'e' && buf[4] == 0)
290 						cx.x_token = T_ELSE;
291 				break;
292 			}
293 			if (cx.x_token == T_STR
294 			    && (cx.x_val.v_str = str_cpy(buf)) == 0) {
295 				p_memerror();
296 				cx.x_token = T_EOF;
297 			}
298 			state = -1;
299 			break;
300 		}
301 		break;
302 	case 3:				/* " quoted string */
303 		switch (c) {
304 		case '\n':
305 			(void) s_ungetc(c);
306 		case EOF:
307 		case '"':
308 			state = 2;
309 			break;
310 		case '\\':
311 			switch (c = s_gettok1()) {
312 			case -1:
313 			case -2:	/* newlines are invisible */
314 				break;
315 			default:
316 				if (p < buf + sizeof buf - 1)
317 					*p++ = c;
318 			}
319 			break;
320 		default:
321 			if (p < buf + sizeof buf - 1)
322 				*p++ = c;
323 			break;
324 		}
325 		break;
326 	case 4:				/* ' quoted string */
327 		switch (c) {
328 		case '\n':
329 			(void) s_ungetc(c);
330 		case EOF:
331 		case '\'':
332 			state = 2;
333 			break;
334 		case '\\':
335 			switch (c = s_gettok1()) {
336 			case -1:
337 			case -2:	/* newlines are invisible */
338 				break;
339 			default:
340 				if (p < buf + sizeof buf - 1)
341 					*p++ = c;
342 			}
343 			break;
344 		default:
345 			if (p < buf + sizeof buf - 1)
346 				*p++ = c;
347 			break;
348 		}
349 		break;
350 	case 10:			/* got 0 */
351 		switch (c) {
352 		case 'x':
353 		case 'X':
354 			cx.x_val.v_num = 0;
355 			state = 12;
356 			break;
357 		case '0': case '1': case '2': case '3': case '4':
358 		case '5': case '6': case '7':
359 			cx.x_val.v_num = c - '0';
360 			state = 13;
361 			break;
362 		case '8': case '9':
363 			cx.x_val.v_num = c - '0';
364 			state = 11;
365 			break;
366 		default:
367 			(void) s_ungetc(c);
368 			state = -1;
369 			cx.x_token = T_NUM;
370 		}
371 		break;
372 	case 11:			/* decimal number */
373 		switch (c) {
374 		case '0': case '1': case '2': case '3': case '4':
375 		case '5': case '6': case '7': case '8': case '9':
376 			cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
377 			break;
378 		default:
379 			(void) s_ungetc(c);
380 			state = -1;
381 			cx.x_token = T_NUM;
382 		}
383 		break;
384 	case 12:			/* hex number */
385 		switch (c) {
386 		case '0': case '1': case '2': case '3': case '4':
387 		case '5': case '6': case '7': case '8': case '9':
388 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
389 			break;
390 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
391 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
392 			break;
393 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
394 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
395 			break;
396 		default:
397 			(void) s_ungetc(c);
398 			state = -1;
399 			cx.x_token = T_NUM;
400 		}
401 		break;
402 	case 13:			/* octal number */
403 		switch (c) {
404 		case '0': case '1': case '2': case '3': case '4':
405 		case '5': case '6': case '7':
406 			cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
407 			break;
408 		default:
409 			(void) s_ungetc(c);
410 			state = -1;
411 			cx.x_token = T_NUM;
412 		}
413 		break;
414 	case 20:			/* got > */
415 		switch (c) {
416 		case '=':
417 			cx.x_token = T_GE;
418 			state = -1;
419 			break;
420 		case '>':
421 			cx.x_token = T_RS;
422 			state = -1;
423 			break;
424 		default:
425 			(void) s_ungetc(c);
426 			cx.x_token = T_GT;
427 			state = -1;
428 		}
429 		break;
430 	case 21:			/* got < */
431 		switch (c) {
432 		case '=':
433 			cx.x_token = T_LE;
434 			state = -1;
435 			break;
436 		case '<':
437 			cx.x_token = T_LS;
438 			state = -1;
439 			break;
440 		default:
441 			(void) s_ungetc(c);
442 			cx.x_token = T_LT;
443 			state = -1;
444 		}
445 		break;
446 	case 22:			/* got = */
447 		switch (c) {
448 		case '=':
449 			cx.x_token = T_EQ;
450 			state = -1;
451 			break;
452 		default:
453 			(void) s_ungetc(c);
454 			cx.x_token = T_ASSIGN;
455 			state = -1;
456 		}
457 		break;
458 	case 23:			/* got ! */
459 		switch (c) {
460 		case '=':
461 			cx.x_token = T_NE;
462 			state = -1;
463 			break;
464 		default:
465 			(void) s_ungetc(c);
466 			cx.x_token = T_NOT;
467 			state = -1;
468 		}
469 		break;
470 	case 24:			/* got & */
471 		switch (c) {
472 		case '&':
473 			cx.x_token = T_ANDAND;
474 			state = -1;
475 			break;
476 		default:
477 			(void) s_ungetc(c);
478 			cx.x_token = T_AND;
479 			state = -1;
480 		}
481 		break;
482 	case 25:			/* got | */
483 		switch (c) {
484 		case '|':
485 			cx.x_token = T_OROR;
486 			state = -1;
487 			break;
488 		default:
489 			(void) s_ungetc(c);
490 			cx.x_token = T_OR;
491 			state = -1;
492 		}
493 		break;
494 	case 26:			/* got $ */
495 		switch (c) {
496 		case '?':
497 			cx.x_token = T_DQ;
498 			state = -1;
499 			break;
500 		default:
501 			(void) s_ungetc(c);
502 			cx.x_token = T_DOLLAR;
503 			state = -1;
504 		}
505 		break;
506 	default:
507 		abort();
508 	}
509 	if (state >= 0)
510 		goto loop;
511 	return cx.x_token;
512 }
513 
514 s_gettok1()
515 {
516 	register c;
517 	register n;
518 
519 	c = s_getc();			/* got \ */
520 	switch (c) {
521 	case EOF:
522 		return -1;
523 	case '\n':
524 		return -2;
525 	case 'b':
526 		return '\b';
527 	case 'f':
528 		return '\f';
529 	case 'n':
530 		return '\n';
531 	case 'r':
532 		return '\r';
533 	case 't':
534 		return '\t';
535 	default:
536 		return c;
537 	case '0': case '1': case '2': case '3': case '4':
538 	case '5': case '6': case '7':
539 		break;
540 	}
541 	n = c - '0';
542 	c = s_getc();			/* got \[0-7] */
543 	if (c < '0' || c > '7') {
544 		(void) s_ungetc(c);
545 		return n;
546 	}
547 	n = n * 8 + c - '0';
548 	c = s_getc();			/* got \[0-7][0-7] */
549 	if (c < '0' || c > '7') {
550 		(void) s_ungetc(c);
551 		return n;
552 	}
553 	return n * 8 + c - '0';
554 }
555