xref: /original-bsd/usr.bin/window/scanner.c (revision 4c3b28fe)
1 /*
2  * Copyright (c) 1983 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  */
17 
18 #ifndef lint
19 static char sccsid[] = "@(#)scanner.c	3.12 (Berkeley) 10/13/89";
20 #endif /* not lint */
21 
22 #include "value.h"
23 #include "token.h"
24 #include "context.h"
25 #include "string.h"
26 
27 s_getc()
28 {
29 	register c;
30 
31 	switch (cx.x_type) {
32 	case X_FILE:
33 		c = getc(cx.x_fp);
34 		if (cx.x_bol && c != EOF) {
35 			cx.x_bol = 0;
36 			cx.x_lineno++;
37 		}
38 		if (c == '\n')
39 			cx.x_bol = 1;
40 		return c;
41 	case X_BUF:
42 		if (*cx.x_bufp != 0)
43 			return *cx.x_bufp++ & 0xff;
44 		else
45 			return EOF;
46 	}
47 	/*NOTREACHED*/
48 }
49 
50 s_ungetc(c)
51 {
52 	if (c == EOF)
53 		return EOF;
54 	switch (cx.x_type) {
55 	case X_FILE:
56 		cx.x_bol = 0;
57 		return ungetc(c, cx.x_fp);
58 	case X_BUF:
59 		if (cx.x_bufp > cx.x_buf)
60 			return *--cx.x_bufp = c;
61 		else
62 			return EOF;
63 	}
64 	/*NOTREACHED*/
65 }
66 
67 s_gettok()
68 {
69 	char buf[100];
70 	register char *p = buf;
71 	register c;
72 	register state = 0;
73 
74 loop:
75 	c = s_getc();
76 	switch (state) {
77 	case 0:
78 		switch (c) {
79 		case ' ':
80 		case '\t':
81 			break;
82 		case '\n':
83 		case ';':
84 			cx.x_token = T_EOL;
85 			state = -1;
86 			break;
87 		case '#':
88 			state = 1;
89 			break;
90 		case EOF:
91 			cx.x_token = T_EOF;
92 			state = -1;
93 			break;
94 		case 'a': case 'b': case 'c': case 'd': case 'e':
95 		case 'f': case 'g': case 'h': case 'i': case 'j':
96 		case 'k': case 'l': case 'm': case 'n': case 'o':
97 		case 'p': case 'q': case 'r': case 's': case 't':
98 		case 'u': case 'v': case 'w': case 'x': case 'y':
99 		case 'z':
100 		case 'A': case 'B': case 'C': case 'D': case 'E':
101 		case 'F': case 'G': case 'H': case 'I': case 'J':
102 		case 'K': case 'L': case 'M': case 'N': case 'O':
103 		case 'P': case 'Q': case 'R': case 'S': case 'T':
104 		case 'U': case 'V': case 'W': case 'X': case 'Y':
105 		case 'Z':
106 		case '_': case '.':
107 			*p++ = c;
108 			state = 2;
109 			break;
110 		case '"':
111 			state = 3;
112 			break;
113 		case '\'':
114 			state = 4;
115 			break;
116 		case '\\':
117 			switch (c = s_gettok1()) {
118 			case -1:
119 				break;
120 			case -2:
121 				state = 0;
122 				break;
123 			default:
124 				*p++ = c;
125 				state = 2;
126 			}
127 			break;
128 		case '0':
129 			cx.x_val.v_num = 0;
130 			state = 10;
131 			break;
132 		case '1': case '2': case '3': case '4':
133 		case '5': case '6': case '7': case '8': case '9':
134 			cx.x_val.v_num = c - '0';
135 			state = 11;
136 			break;
137 		case '>':
138 			state = 20;
139 			break;
140 		case '<':
141 			state = 21;
142 			break;
143 		case '=':
144 			state = 22;
145 			break;
146 		case '!':
147 			state = 23;
148 			break;
149 		case '&':
150 			state = 24;
151 			break;
152 		case '|':
153 			state = 25;
154 			break;
155 		case '$':
156 			state = 26;
157 			break;
158 		case '~':
159 			cx.x_token = T_COMP;
160 			state = -1;
161 			break;
162 		case '+':
163 			cx.x_token = T_PLUS;
164 			state = -1;
165 			break;
166 		case '-':
167 			cx.x_token = T_MINUS;
168 			state = -1;
169 			break;
170 		case '*':
171 			cx.x_token = T_MUL;
172 			state = -1;
173 			break;
174 		case '/':
175 			cx.x_token = T_DIV;
176 			state = -1;
177 			break;
178 		case '%':
179 			cx.x_token = T_MOD;
180 			state = -1;
181 			break;
182 		case '^':
183 			cx.x_token = T_XOR;
184 			state = -1;
185 			break;
186 		case '(':
187 			cx.x_token = T_LP;
188 			state = -1;
189 			break;
190 		case ')':
191 			cx.x_token = T_RP;
192 			state = -1;
193 			break;
194 		case ',':
195 			cx.x_token = T_COMMA;
196 			state = -1;
197 			break;
198 		case '?':
199 			cx.x_token = T_QUEST;
200 			state = -1;
201 			break;
202 		case ':':
203 			cx.x_token = T_COLON;
204 			state = -1;
205 			break;
206 		case '[':
207 			cx.x_token = T_LB;
208 			state = -1;
209 			break;
210 		case ']':
211 			cx.x_token = T_RB;
212 			state = -1;
213 			break;
214 		default:
215 			cx.x_val.v_num = c;
216 			cx.x_token = T_CHAR;
217 			state = -1;
218 			break;
219 		}
220 		break;
221 	case 1:				/* got # */
222 		if (c == '\n' || c == EOF) {
223 			(void) s_ungetc(c);
224 			state = 0;
225 		}
226 		break;
227 	case 2:				/* unquoted string */
228 		switch (c) {
229 		case 'a': case 'b': case 'c': case 'd': case 'e':
230 		case 'f': case 'g': case 'h': case 'i': case 'j':
231 		case 'k': case 'l': case 'm': case 'n': case 'o':
232 		case 'p': case 'q': case 'r': case 's': case 't':
233 		case 'u': case 'v': case 'w': case 'x': case 'y':
234 		case 'z':
235 		case 'A': case 'B': case 'C': case 'D': case 'E':
236 		case 'F': case 'G': case 'H': case 'I': case 'J':
237 		case 'K': case 'L': case 'M': case 'N': case 'O':
238 		case 'P': case 'Q': case 'R': case 'S': case 'T':
239 		case 'U': case 'V': case 'W': case 'X': case 'Y':
240 		case 'Z':
241 		case '_': case '.':
242 		case '0': case '1': case '2': case '3': case '4':
243 		case '5': case '6': case '7': case '8': case '9':
244 			if (p < buf + sizeof buf - 1)
245 				*p++ = c;
246 			break;
247 		case '"':
248 			state = 3;
249 			break;
250 		case '\'':
251 			state = 4;
252 			break;
253 		case '\\':
254 			switch (c = s_gettok1()) {
255 			case -2:
256 				(void) s_ungetc(' ');
257 			case -1:
258 				break;
259 			default:
260 				if (p < buf + sizeof buf - 1)
261 					*p++ = c;
262 			}
263 			break;
264 		default:
265 			(void) s_ungetc(c);
266 		case EOF:
267 			*p = 0;
268 			cx.x_token = T_STR;
269 			switch (*buf) {
270 			case 'i':
271 				if (buf[1] == 'f' && buf[2] == 0)
272 					cx.x_token = T_IF;
273 				break;
274 			case 't':
275 				if (buf[1] == 'h' && buf[2] == 'e'
276 				    && buf[3] == 'n' && buf[4] == 0)
277 					cx.x_token = T_THEN;
278 				break;
279 			case 'e':
280 				if (buf[1] == 'n' && buf[2] == 'd'
281 				    && buf[3] == 'i' && buf[4] == 'f'
282 				    && buf[5] == 0)
283 					cx.x_token = T_ENDIF;
284 				else if (buf[1] == 'l' && buf[2] == 's')
285 					if (buf[3] == 'i' && buf[4] == 'f'
286 					    && buf[5] == 0)
287 						cx.x_token = T_ELSIF;
288 					else if (buf[3] == 'e' && buf[4] == 0)
289 						cx.x_token = T_ELSE;
290 				break;
291 			}
292 			if (cx.x_token == T_STR
293 			    && (cx.x_val.v_str = str_cpy(buf)) == 0) {
294 				p_memerror();
295 				cx.x_token = T_EOF;
296 			}
297 			state = -1;
298 			break;
299 		}
300 		break;
301 	case 3:				/* " quoted string */
302 		switch (c) {
303 		case '\n':
304 			(void) s_ungetc(c);
305 		case EOF:
306 		case '"':
307 			state = 2;
308 			break;
309 		case '\\':
310 			switch (c = s_gettok1()) {
311 			case -1:
312 			case -2:	/* newlines are invisible */
313 				break;
314 			default:
315 				if (p < buf + sizeof buf - 1)
316 					*p++ = c;
317 			}
318 			break;
319 		default:
320 			if (p < buf + sizeof buf - 1)
321 				*p++ = c;
322 			break;
323 		}
324 		break;
325 	case 4:				/* ' quoted string */
326 		switch (c) {
327 		case '\n':
328 			(void) s_ungetc(c);
329 		case EOF:
330 		case '\'':
331 			state = 2;
332 			break;
333 		case '\\':
334 			switch (c = s_gettok1()) {
335 			case -1:
336 			case -2:	/* newlines are invisible */
337 				break;
338 			default:
339 				if (p < buf + sizeof buf - 1)
340 					*p++ = c;
341 			}
342 			break;
343 		default:
344 			if (p < buf + sizeof buf - 1)
345 				*p++ = c;
346 			break;
347 		}
348 		break;
349 	case 10:			/* got 0 */
350 		switch (c) {
351 		case 'x':
352 		case 'X':
353 			cx.x_val.v_num = 0;
354 			state = 12;
355 			break;
356 		case '0': case '1': case '2': case '3': case '4':
357 		case '5': case '6': case '7':
358 			cx.x_val.v_num = c - '0';
359 			state = 13;
360 			break;
361 		case '8': case '9':
362 			cx.x_val.v_num = c - '0';
363 			state = 11;
364 			break;
365 		default:
366 			(void) s_ungetc(c);
367 			state = -1;
368 			cx.x_token = T_NUM;
369 		}
370 		break;
371 	case 11:			/* decimal number */
372 		switch (c) {
373 		case '0': case '1': case '2': case '3': case '4':
374 		case '5': case '6': case '7': case '8': case '9':
375 			cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
376 			break;
377 		default:
378 			(void) s_ungetc(c);
379 			state = -1;
380 			cx.x_token = T_NUM;
381 		}
382 		break;
383 	case 12:			/* hex number */
384 		switch (c) {
385 		case '0': case '1': case '2': case '3': case '4':
386 		case '5': case '6': case '7': case '8': case '9':
387 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
388 			break;
389 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
390 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
391 			break;
392 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
393 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
394 			break;
395 		default:
396 			(void) s_ungetc(c);
397 			state = -1;
398 			cx.x_token = T_NUM;
399 		}
400 		break;
401 	case 13:			/* octal number */
402 		switch (c) {
403 		case '0': case '1': case '2': case '3': case '4':
404 		case '5': case '6': case '7':
405 			cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
406 			break;
407 		default:
408 			(void) s_ungetc(c);
409 			state = -1;
410 			cx.x_token = T_NUM;
411 		}
412 		break;
413 	case 20:			/* got > */
414 		switch (c) {
415 		case '=':
416 			cx.x_token = T_GE;
417 			state = -1;
418 			break;
419 		case '>':
420 			cx.x_token = T_RS;
421 			state = -1;
422 			break;
423 		default:
424 			(void) s_ungetc(c);
425 			cx.x_token = T_GT;
426 			state = -1;
427 		}
428 		break;
429 	case 21:			/* got < */
430 		switch (c) {
431 		case '=':
432 			cx.x_token = T_LE;
433 			state = -1;
434 			break;
435 		case '<':
436 			cx.x_token = T_LS;
437 			state = -1;
438 			break;
439 		default:
440 			(void) s_ungetc(c);
441 			cx.x_token = T_LT;
442 			state = -1;
443 		}
444 		break;
445 	case 22:			/* got = */
446 		switch (c) {
447 		case '=':
448 			cx.x_token = T_EQ;
449 			state = -1;
450 			break;
451 		default:
452 			(void) s_ungetc(c);
453 			cx.x_token = T_ASSIGN;
454 			state = -1;
455 		}
456 		break;
457 	case 23:			/* got ! */
458 		switch (c) {
459 		case '=':
460 			cx.x_token = T_NE;
461 			state = -1;
462 			break;
463 		default:
464 			(void) s_ungetc(c);
465 			cx.x_token = T_NOT;
466 			state = -1;
467 		}
468 		break;
469 	case 24:			/* got & */
470 		switch (c) {
471 		case '&':
472 			cx.x_token = T_ANDAND;
473 			state = -1;
474 			break;
475 		default:
476 			(void) s_ungetc(c);
477 			cx.x_token = T_AND;
478 			state = -1;
479 		}
480 		break;
481 	case 25:			/* got | */
482 		switch (c) {
483 		case '|':
484 			cx.x_token = T_OROR;
485 			state = -1;
486 			break;
487 		default:
488 			(void) s_ungetc(c);
489 			cx.x_token = T_OR;
490 			state = -1;
491 		}
492 		break;
493 	case 26:			/* got $ */
494 		switch (c) {
495 		case '?':
496 			cx.x_token = T_DQ;
497 			state = -1;
498 			break;
499 		default:
500 			(void) s_ungetc(c);
501 			cx.x_token = T_DOLLAR;
502 			state = -1;
503 		}
504 		break;
505 	default:
506 		abort();
507 	}
508 	if (state >= 0)
509 		goto loop;
510 	return cx.x_token;
511 }
512 
513 s_gettok1()
514 {
515 	register c;
516 	register n;
517 
518 	c = s_getc();			/* got \ */
519 	switch (c) {
520 	case EOF:
521 		return -1;
522 	case '\n':
523 		return -2;
524 	case 'b':
525 		return '\b';
526 	case 'f':
527 		return '\f';
528 	case 'n':
529 		return '\n';
530 	case 'r':
531 		return '\r';
532 	case 't':
533 		return '\t';
534 	default:
535 		return c;
536 	case '0': case '1': case '2': case '3': case '4':
537 	case '5': case '6': case '7':
538 		break;
539 	}
540 	n = c - '0';
541 	c = s_getc();			/* got \[0-7] */
542 	if (c < '0' || c > '7') {
543 		(void) s_ungetc(c);
544 		return n;
545 	}
546 	n = n * 8 + c - '0';
547 	c = s_getc();			/* got \[0-7][0-7] */
548 	if (c < '0' || c > '7') {
549 		(void) s_ungetc(c);
550 		return n;
551 	}
552 	return n * 8 + c - '0';
553 }
554