xref: /dragonfly/usr.bin/window/scanner.c (revision d257a695)
1 /*	$NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $	*/
2 
3 /*
4  * Copyright (c) 1983, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Edward Wang at The University of California, Berkeley.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)scanner.c	8.1 (Berkeley) 6/6/93";
39 #else
40 __RCSID("$NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $");
41 #endif
42 #endif /* not lint */
43 
44 #include <stdlib.h>
45 
46 #include "defs.h"
47 #include "token.h"
48 #include "context.h"
49 #include "window_string.h"
50 
51 int	s_getc(void);
52 int	s_gettok1(void);
53 int	s_ungetc(int);
54 
55 int
56 s_getc(void)
57 {
58 	int c;
59 
60 	switch (cx.x_type) {
61 	case X_FILE:
62 		c = getc(cx.x_fp);
63 		if (cx.x_bol && c != EOF) {
64 			cx.x_bol = 0;
65 			cx.x_lineno++;
66 		}
67 		if (c == '\n')
68 			cx.x_bol = 1;
69 		return c;
70 	case X_BUF:
71 		if (*cx.x_bufp != 0)
72 			return *cx.x_bufp++ & 0xff;
73 		else
74 			return EOF;
75 	}
76 	/*NOTREACHED*/
77 	return(0);		/* XXX: placate gcc */
78 }
79 
80 int
81 s_ungetc(int c)
82 {
83 	if (c == EOF)
84 		return EOF;
85 	switch (cx.x_type) {
86 	case X_FILE:
87 		cx.x_bol = 0;
88 		return ungetc(c, cx.x_fp);
89 	case X_BUF:
90 		if (cx.x_bufp > cx.x_buf)
91 			return *--cx.x_bufp = c;
92 		else
93 			return EOF;
94 	}
95 	/*NOTREACHED*/
96 	return(0);		/* XXX: placate gcc */
97 }
98 
99 int
100 s_gettok(void)
101 {
102 	char buf[100];
103 	char *p = buf;
104 	int c;
105 	int state = 0;
106 
107 loop:
108 	c = s_getc();
109 	switch (state) {
110 	case 0:
111 		switch (c) {
112 		case ' ':
113 		case '\t':
114 			break;
115 		case '\n':
116 		case ';':
117 			cx.x_token = T_EOL;
118 			state = -1;
119 			break;
120 		case '#':
121 			state = 1;
122 			break;
123 		case EOF:
124 			cx.x_token = T_EOF;
125 			state = -1;
126 			break;
127 		case 'a': case 'b': case 'c': case 'd': case 'e':
128 		case 'f': case 'g': case 'h': case 'i': case 'j':
129 		case 'k': case 'l': case 'm': case 'n': case 'o':
130 		case 'p': case 'q': case 'r': case 's': case 't':
131 		case 'u': case 'v': case 'w': case 'x': case 'y':
132 		case 'z':
133 		case 'A': case 'B': case 'C': case 'D': case 'E':
134 		case 'F': case 'G': case 'H': case 'I': case 'J':
135 		case 'K': case 'L': case 'M': case 'N': case 'O':
136 		case 'P': case 'Q': case 'R': case 'S': case 'T':
137 		case 'U': case 'V': case 'W': case 'X': case 'Y':
138 		case 'Z':
139 		case '_': case '.':
140 			*p++ = c;
141 			state = 2;
142 			break;
143 		case '"':
144 			state = 3;
145 			break;
146 		case '\'':
147 			state = 4;
148 			break;
149 		case '\\':
150 			switch (c = s_gettok1()) {
151 			case -1:
152 				break;
153 			case -2:
154 				state = 0;
155 				break;
156 			default:
157 				*p++ = c;
158 				state = 2;
159 			}
160 			break;
161 		case '0':
162 			cx.x_val.v_num = 0;
163 			state = 10;
164 			break;
165 		case '1': case '2': case '3': case '4':
166 		case '5': case '6': case '7': case '8': case '9':
167 			cx.x_val.v_num = c - '0';
168 			state = 11;
169 			break;
170 		case '>':
171 			state = 20;
172 			break;
173 		case '<':
174 			state = 21;
175 			break;
176 		case '=':
177 			state = 22;
178 			break;
179 		case '!':
180 			state = 23;
181 			break;
182 		case '&':
183 			state = 24;
184 			break;
185 		case '|':
186 			state = 25;
187 			break;
188 		case '$':
189 			state = 26;
190 			break;
191 		case '~':
192 			cx.x_token = T_COMP;
193 			state = -1;
194 			break;
195 		case '+':
196 			cx.x_token = T_PLUS;
197 			state = -1;
198 			break;
199 		case '-':
200 			cx.x_token = T_MINUS;
201 			state = -1;
202 			break;
203 		case '*':
204 			cx.x_token = T_MUL;
205 			state = -1;
206 			break;
207 		case '/':
208 			cx.x_token = T_DIV;
209 			state = -1;
210 			break;
211 		case '%':
212 			cx.x_token = T_MOD;
213 			state = -1;
214 			break;
215 		case '^':
216 			cx.x_token = T_XOR;
217 			state = -1;
218 			break;
219 		case '(':
220 			cx.x_token = T_LP;
221 			state = -1;
222 			break;
223 		case ')':
224 			cx.x_token = T_RP;
225 			state = -1;
226 			break;
227 		case ',':
228 			cx.x_token = T_COMMA;
229 			state = -1;
230 			break;
231 		case '?':
232 			cx.x_token = T_QUEST;
233 			state = -1;
234 			break;
235 		case ':':
236 			cx.x_token = T_COLON;
237 			state = -1;
238 			break;
239 		case '[':
240 			cx.x_token = T_LB;
241 			state = -1;
242 			break;
243 		case ']':
244 			cx.x_token = T_RB;
245 			state = -1;
246 			break;
247 		default:
248 			cx.x_val.v_num = c;
249 			cx.x_token = T_CHAR;
250 			state = -1;
251 			break;
252 		}
253 		break;
254 	case 1:				/* got # */
255 		if (c == '\n' || c == EOF) {
256 			(void) s_ungetc(c);
257 			state = 0;
258 		}
259 		break;
260 	case 2:				/* unquoted string */
261 		switch (c) {
262 		case 'a': case 'b': case 'c': case 'd': case 'e':
263 		case 'f': case 'g': case 'h': case 'i': case 'j':
264 		case 'k': case 'l': case 'm': case 'n': case 'o':
265 		case 'p': case 'q': case 'r': case 's': case 't':
266 		case 'u': case 'v': case 'w': case 'x': case 'y':
267 		case 'z':
268 		case 'A': case 'B': case 'C': case 'D': case 'E':
269 		case 'F': case 'G': case 'H': case 'I': case 'J':
270 		case 'K': case 'L': case 'M': case 'N': case 'O':
271 		case 'P': case 'Q': case 'R': case 'S': case 'T':
272 		case 'U': case 'V': case 'W': case 'X': case 'Y':
273 		case 'Z':
274 		case '_': case '.':
275 		case '0': case '1': case '2': case '3': case '4':
276 		case '5': case '6': case '7': case '8': case '9':
277 			if (p < buf + sizeof buf - 1)
278 				*p++ = c;
279 			break;
280 		case '"':
281 			state = 3;
282 			break;
283 		case '\'':
284 			state = 4;
285 			break;
286 		case '\\':
287 			switch (c = s_gettok1()) {
288 			case -2:
289 				(void) s_ungetc(' ');
290 			case -1:
291 				break;
292 			default:
293 				if (p < buf + sizeof buf - 1)
294 					*p++ = c;
295 			}
296 			break;
297 		default:
298 			(void) s_ungetc(c);
299 		case EOF:
300 			*p = 0;
301 			cx.x_token = T_STR;
302 			switch (*buf) {
303 			case 'i':
304 				if (buf[1] == 'f' && buf[2] == 0)
305 					cx.x_token = T_IF;
306 				break;
307 			case 't':
308 				if (buf[1] == 'h' && buf[2] == 'e'
309 				    && buf[3] == 'n' && buf[4] == 0)
310 					cx.x_token = T_THEN;
311 				break;
312 			case 'e':
313 				if (buf[1] == 'n' && buf[2] == 'd'
314 				    && buf[3] == 'i' && buf[4] == 'f'
315 				    && buf[5] == 0)
316 					cx.x_token = T_ENDIF;
317 				else {
318 					if (buf[1] == 'l' && buf[2] == 's') {
319 						if (buf[3] == 'i'
320 						&&  buf[4] == 'f'
321 						&&  buf[5] == 0)
322 							cx.x_token = T_ELSIF;
323 						else {
324 							if (buf[3] == 'e'
325 							&& buf[4] == 0)
326 								cx.x_token =
327 								    T_ELSE;
328 						}
329 					}
330 				}
331 				break;
332 			}
333 			if (cx.x_token == T_STR
334 			    && (cx.x_val.v_str = str_cpy(buf)) == 0) {
335 				p_memerror();
336 				cx.x_token = T_EOF;
337 			}
338 			state = -1;
339 			break;
340 		}
341 		break;
342 	case 3:				/* " quoted string */
343 		switch (c) {
344 		case '\n':
345 			(void) s_ungetc(c);
346 		case EOF:
347 		case '"':
348 			state = 2;
349 			break;
350 		case '\\':
351 			switch (c = s_gettok1()) {
352 			case -1:
353 			case -2:	/* newlines are invisible */
354 				break;
355 			default:
356 				if (p < buf + sizeof buf - 1)
357 					*p++ = c;
358 			}
359 			break;
360 		default:
361 			if (p < buf + sizeof buf - 1)
362 				*p++ = c;
363 			break;
364 		}
365 		break;
366 	case 4:				/* ' quoted string */
367 		switch (c) {
368 		case '\n':
369 			(void) s_ungetc(c);
370 		case EOF:
371 		case '\'':
372 			state = 2;
373 			break;
374 		case '\\':
375 			switch (c = s_gettok1()) {
376 			case -1:
377 			case -2:	/* newlines are invisible */
378 				break;
379 			default:
380 				if (p < buf + sizeof buf - 1)
381 					*p++ = c;
382 			}
383 			break;
384 		default:
385 			if (p < buf + sizeof buf - 1)
386 				*p++ = c;
387 			break;
388 		}
389 		break;
390 	case 10:			/* got 0 */
391 		switch (c) {
392 		case 'x':
393 		case 'X':
394 			cx.x_val.v_num = 0;
395 			state = 12;
396 			break;
397 		case '0': case '1': case '2': case '3': case '4':
398 		case '5': case '6': case '7':
399 			cx.x_val.v_num = c - '0';
400 			state = 13;
401 			break;
402 		case '8': case '9':
403 			cx.x_val.v_num = c - '0';
404 			state = 11;
405 			break;
406 		default:
407 			(void) s_ungetc(c);
408 			state = -1;
409 			cx.x_token = T_NUM;
410 		}
411 		break;
412 	case 11:			/* decimal number */
413 		switch (c) {
414 		case '0': case '1': case '2': case '3': case '4':
415 		case '5': case '6': case '7': case '8': case '9':
416 			cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
417 			break;
418 		default:
419 			(void) s_ungetc(c);
420 			state = -1;
421 			cx.x_token = T_NUM;
422 		}
423 		break;
424 	case 12:			/* hex number */
425 		switch (c) {
426 		case '0': case '1': case '2': case '3': case '4':
427 		case '5': case '6': case '7': case '8': case '9':
428 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
429 			break;
430 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
431 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
432 			break;
433 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
434 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
435 			break;
436 		default:
437 			(void) s_ungetc(c);
438 			state = -1;
439 			cx.x_token = T_NUM;
440 		}
441 		break;
442 	case 13:			/* octal number */
443 		switch (c) {
444 		case '0': case '1': case '2': case '3': case '4':
445 		case '5': case '6': case '7':
446 			cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
447 			break;
448 		default:
449 			(void) s_ungetc(c);
450 			state = -1;
451 			cx.x_token = T_NUM;
452 		}
453 		break;
454 	case 20:			/* got > */
455 		switch (c) {
456 		case '=':
457 			cx.x_token = T_GE;
458 			state = -1;
459 			break;
460 		case '>':
461 			cx.x_token = T_RS;
462 			state = -1;
463 			break;
464 		default:
465 			(void) s_ungetc(c);
466 			cx.x_token = T_GT;
467 			state = -1;
468 		}
469 		break;
470 	case 21:			/* got < */
471 		switch (c) {
472 		case '=':
473 			cx.x_token = T_LE;
474 			state = -1;
475 			break;
476 		case '<':
477 			cx.x_token = T_LS;
478 			state = -1;
479 			break;
480 		default:
481 			(void) s_ungetc(c);
482 			cx.x_token = T_LT;
483 			state = -1;
484 		}
485 		break;
486 	case 22:			/* got = */
487 		switch (c) {
488 		case '=':
489 			cx.x_token = T_EQ;
490 			state = -1;
491 			break;
492 		default:
493 			(void) s_ungetc(c);
494 			cx.x_token = T_ASSIGN;
495 			state = -1;
496 		}
497 		break;
498 	case 23:			/* got ! */
499 		switch (c) {
500 		case '=':
501 			cx.x_token = T_NE;
502 			state = -1;
503 			break;
504 		default:
505 			(void) s_ungetc(c);
506 			cx.x_token = T_NOT;
507 			state = -1;
508 		}
509 		break;
510 	case 24:			/* got & */
511 		switch (c) {
512 		case '&':
513 			cx.x_token = T_ANDAND;
514 			state = -1;
515 			break;
516 		default:
517 			(void) s_ungetc(c);
518 			cx.x_token = T_AND;
519 			state = -1;
520 		}
521 		break;
522 	case 25:			/* got | */
523 		switch (c) {
524 		case '|':
525 			cx.x_token = T_OROR;
526 			state = -1;
527 			break;
528 		default:
529 			(void) s_ungetc(c);
530 			cx.x_token = T_OR;
531 			state = -1;
532 		}
533 		break;
534 	case 26:			/* got $ */
535 		switch (c) {
536 		case '?':
537 			cx.x_token = T_DQ;
538 			state = -1;
539 			break;
540 		default:
541 			(void) s_ungetc(c);
542 			cx.x_token = T_DOLLAR;
543 			state = -1;
544 		}
545 		break;
546 	default:
547 		abort();
548 	}
549 	if (state >= 0)
550 		goto loop;
551 	return cx.x_token;
552 }
553 
554 int
555 s_gettok1(void)
556 {
557 	int c;
558 	int n;
559 
560 	c = s_getc();			/* got \ */
561 	switch (c) {
562 	case EOF:
563 		return -1;
564 	case '\n':
565 		return -2;
566 	case 'b':
567 		return '\b';
568 	case 'f':
569 		return '\f';
570 	case 'n':
571 		return '\n';
572 	case 'r':
573 		return '\r';
574 	case 't':
575 		return '\t';
576 	default:
577 		return c;
578 	case '0': case '1': case '2': case '3': case '4':
579 	case '5': case '6': case '7':
580 		break;
581 	}
582 	n = c - '0';
583 	c = s_getc();			/* got \[0-7] */
584 	if (c < '0' || c > '7') {
585 		(void) s_ungetc(c);
586 		return n;
587 	}
588 	n = n * 8 + c - '0';
589 	c = s_getc();			/* got \[0-7][0-7] */
590 	if (c < '0' || c > '7') {
591 		(void) s_ungetc(c);
592 		return n;
593 	}
594 	return n * 8 + c - '0';
595 }
596