xref: /dragonfly/usr.bin/window/scanner.c (revision ec21d9fb)
1 /*	@(#)scanner.c	8.1 (Berkeley) 6/6/93	*/
2 /*	$NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $	*/
3 
4 /*
5  * Copyright (c) 1983, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Edward Wang at The University of California, Berkeley.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <stdlib.h>
37 
38 #include "defs.h"
39 #include "token.h"
40 #include "context.h"
41 #include "window_string.h"
42 
43 int	s_getc(void);
44 int	s_gettok1(void);
45 int	s_ungetc(int);
46 
47 int
48 s_getc(void)
49 {
50 	int c;
51 
52 	switch (cx.x_type) {
53 	case X_FILE:
54 		c = getc(cx.x_fp);
55 		if (cx.x_bol && c != EOF) {
56 			cx.x_bol = 0;
57 			cx.x_lineno++;
58 		}
59 		if (c == '\n')
60 			cx.x_bol = 1;
61 		return c;
62 	case X_BUF:
63 		if (*cx.x_bufp != 0)
64 			return *cx.x_bufp++ & 0xff;
65 		else
66 			return EOF;
67 	}
68 	/*NOTREACHED*/
69 	return(0);		/* XXX: placate gcc */
70 }
71 
72 int
73 s_ungetc(int c)
74 {
75 	if (c == EOF)
76 		return EOF;
77 	switch (cx.x_type) {
78 	case X_FILE:
79 		cx.x_bol = 0;
80 		return ungetc(c, cx.x_fp);
81 	case X_BUF:
82 		if (cx.x_bufp > cx.x_buf)
83 			return *--cx.x_bufp = c;
84 		else
85 			return EOF;
86 	}
87 	/*NOTREACHED*/
88 	return(0);		/* XXX: placate gcc */
89 }
90 
91 int
92 s_gettok(void)
93 {
94 	char buf[100];
95 	char *p = buf;
96 	int c;
97 	int state = 0;
98 
99 loop:
100 	c = s_getc();
101 	switch (state) {
102 	case 0:
103 		switch (c) {
104 		case ' ':
105 		case '\t':
106 			break;
107 		case '\n':
108 		case ';':
109 			cx.x_token = T_EOL;
110 			state = -1;
111 			break;
112 		case '#':
113 			state = 1;
114 			break;
115 		case EOF:
116 			cx.x_token = T_EOF;
117 			state = -1;
118 			break;
119 		case 'a': case 'b': case 'c': case 'd': case 'e':
120 		case 'f': case 'g': case 'h': case 'i': case 'j':
121 		case 'k': case 'l': case 'm': case 'n': case 'o':
122 		case 'p': case 'q': case 'r': case 's': case 't':
123 		case 'u': case 'v': case 'w': case 'x': case 'y':
124 		case 'z':
125 		case 'A': case 'B': case 'C': case 'D': case 'E':
126 		case 'F': case 'G': case 'H': case 'I': case 'J':
127 		case 'K': case 'L': case 'M': case 'N': case 'O':
128 		case 'P': case 'Q': case 'R': case 'S': case 'T':
129 		case 'U': case 'V': case 'W': case 'X': case 'Y':
130 		case 'Z':
131 		case '_': case '.':
132 			*p++ = c;
133 			state = 2;
134 			break;
135 		case '"':
136 			state = 3;
137 			break;
138 		case '\'':
139 			state = 4;
140 			break;
141 		case '\\':
142 			switch (c = s_gettok1()) {
143 			case -1:
144 				break;
145 			case -2:
146 				state = 0;
147 				break;
148 			default:
149 				*p++ = c;
150 				state = 2;
151 			}
152 			break;
153 		case '0':
154 			cx.x_val.v_num = 0;
155 			state = 10;
156 			break;
157 		case '1': case '2': case '3': case '4':
158 		case '5': case '6': case '7': case '8': case '9':
159 			cx.x_val.v_num = c - '0';
160 			state = 11;
161 			break;
162 		case '>':
163 			state = 20;
164 			break;
165 		case '<':
166 			state = 21;
167 			break;
168 		case '=':
169 			state = 22;
170 			break;
171 		case '!':
172 			state = 23;
173 			break;
174 		case '&':
175 			state = 24;
176 			break;
177 		case '|':
178 			state = 25;
179 			break;
180 		case '$':
181 			state = 26;
182 			break;
183 		case '~':
184 			cx.x_token = T_COMP;
185 			state = -1;
186 			break;
187 		case '+':
188 			cx.x_token = T_PLUS;
189 			state = -1;
190 			break;
191 		case '-':
192 			cx.x_token = T_MINUS;
193 			state = -1;
194 			break;
195 		case '*':
196 			cx.x_token = T_MUL;
197 			state = -1;
198 			break;
199 		case '/':
200 			cx.x_token = T_DIV;
201 			state = -1;
202 			break;
203 		case '%':
204 			cx.x_token = T_MOD;
205 			state = -1;
206 			break;
207 		case '^':
208 			cx.x_token = T_XOR;
209 			state = -1;
210 			break;
211 		case '(':
212 			cx.x_token = T_LP;
213 			state = -1;
214 			break;
215 		case ')':
216 			cx.x_token = T_RP;
217 			state = -1;
218 			break;
219 		case ',':
220 			cx.x_token = T_COMMA;
221 			state = -1;
222 			break;
223 		case '?':
224 			cx.x_token = T_QUEST;
225 			state = -1;
226 			break;
227 		case ':':
228 			cx.x_token = T_COLON;
229 			state = -1;
230 			break;
231 		case '[':
232 			cx.x_token = T_LB;
233 			state = -1;
234 			break;
235 		case ']':
236 			cx.x_token = T_RB;
237 			state = -1;
238 			break;
239 		default:
240 			cx.x_val.v_num = c;
241 			cx.x_token = T_CHAR;
242 			state = -1;
243 			break;
244 		}
245 		break;
246 	case 1:				/* got # */
247 		if (c == '\n' || c == EOF) {
248 			(void) s_ungetc(c);
249 			state = 0;
250 		}
251 		break;
252 	case 2:				/* unquoted string */
253 		switch (c) {
254 		case 'a': case 'b': case 'c': case 'd': case 'e':
255 		case 'f': case 'g': case 'h': case 'i': case 'j':
256 		case 'k': case 'l': case 'm': case 'n': case 'o':
257 		case 'p': case 'q': case 'r': case 's': case 't':
258 		case 'u': case 'v': case 'w': case 'x': case 'y':
259 		case 'z':
260 		case 'A': case 'B': case 'C': case 'D': case 'E':
261 		case 'F': case 'G': case 'H': case 'I': case 'J':
262 		case 'K': case 'L': case 'M': case 'N': case 'O':
263 		case 'P': case 'Q': case 'R': case 'S': case 'T':
264 		case 'U': case 'V': case 'W': case 'X': case 'Y':
265 		case 'Z':
266 		case '_': case '.':
267 		case '0': case '1': case '2': case '3': case '4':
268 		case '5': case '6': case '7': case '8': case '9':
269 			if (p < buf + sizeof buf - 1)
270 				*p++ = c;
271 			break;
272 		case '"':
273 			state = 3;
274 			break;
275 		case '\'':
276 			state = 4;
277 			break;
278 		case '\\':
279 			switch (c = s_gettok1()) {
280 			case -2:
281 				(void) s_ungetc(' ');
282 			case -1:
283 				break;
284 			default:
285 				if (p < buf + sizeof buf - 1)
286 					*p++ = c;
287 			}
288 			break;
289 		default:
290 			(void) s_ungetc(c);
291 			/* FALLTHROUGH */
292 		case EOF:
293 			*p = 0;
294 			cx.x_token = T_STR;
295 			switch (*buf) {
296 			case 'i':
297 				if (buf[1] == 'f' && buf[2] == 0)
298 					cx.x_token = T_IF;
299 				break;
300 			case 't':
301 				if (buf[1] == 'h' && buf[2] == 'e'
302 				    && buf[3] == 'n' && buf[4] == 0)
303 					cx.x_token = T_THEN;
304 				break;
305 			case 'e':
306 				if (buf[1] == 'n' && buf[2] == 'd'
307 				    && buf[3] == 'i' && buf[4] == 'f'
308 				    && buf[5] == 0)
309 					cx.x_token = T_ENDIF;
310 				else {
311 					if (buf[1] == 'l' && buf[2] == 's') {
312 						if (buf[3] == 'i'
313 						&&  buf[4] == 'f'
314 						&&  buf[5] == 0)
315 							cx.x_token = T_ELSIF;
316 						else {
317 							if (buf[3] == 'e'
318 							&& buf[4] == 0)
319 								cx.x_token =
320 								    T_ELSE;
321 						}
322 					}
323 				}
324 				break;
325 			}
326 			if (cx.x_token == T_STR
327 			    && (cx.x_val.v_str = str_cpy(buf)) == 0) {
328 				p_memerror();
329 				cx.x_token = T_EOF;
330 			}
331 			state = -1;
332 			break;
333 		}
334 		break;
335 	case 3:				/* " quoted string */
336 		switch (c) {
337 		case '\n':
338 			(void) s_ungetc(c);
339 			/* FALLTHROUGH */
340 		case EOF:
341 		case '"':
342 			state = 2;
343 			break;
344 		case '\\':
345 			switch (c = s_gettok1()) {
346 			case -1:
347 			case -2:	/* newlines are invisible */
348 				break;
349 			default:
350 				if (p < buf + sizeof buf - 1)
351 					*p++ = c;
352 			}
353 			break;
354 		default:
355 			if (p < buf + sizeof buf - 1)
356 				*p++ = c;
357 			break;
358 		}
359 		break;
360 	case 4:				/* ' quoted string */
361 		switch (c) {
362 		case '\n':
363 			(void) s_ungetc(c);
364 			/* FALLTHROUGH */
365 		case EOF:
366 		case '\'':
367 			state = 2;
368 			break;
369 		case '\\':
370 			switch (c = s_gettok1()) {
371 			case -1:
372 			case -2:	/* newlines are invisible */
373 				break;
374 			default:
375 				if (p < buf + sizeof buf - 1)
376 					*p++ = c;
377 			}
378 			break;
379 		default:
380 			if (p < buf + sizeof buf - 1)
381 				*p++ = c;
382 			break;
383 		}
384 		break;
385 	case 10:			/* got 0 */
386 		switch (c) {
387 		case 'x':
388 		case 'X':
389 			cx.x_val.v_num = 0;
390 			state = 12;
391 			break;
392 		case '0': case '1': case '2': case '3': case '4':
393 		case '5': case '6': case '7':
394 			cx.x_val.v_num = c - '0';
395 			state = 13;
396 			break;
397 		case '8': case '9':
398 			cx.x_val.v_num = c - '0';
399 			state = 11;
400 			break;
401 		default:
402 			(void) s_ungetc(c);
403 			state = -1;
404 			cx.x_token = T_NUM;
405 		}
406 		break;
407 	case 11:			/* decimal number */
408 		switch (c) {
409 		case '0': case '1': case '2': case '3': case '4':
410 		case '5': case '6': case '7': case '8': case '9':
411 			cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
412 			break;
413 		default:
414 			(void) s_ungetc(c);
415 			state = -1;
416 			cx.x_token = T_NUM;
417 		}
418 		break;
419 	case 12:			/* hex number */
420 		switch (c) {
421 		case '0': case '1': case '2': case '3': case '4':
422 		case '5': case '6': case '7': case '8': case '9':
423 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
424 			break;
425 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
426 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
427 			break;
428 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
429 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
430 			break;
431 		default:
432 			(void) s_ungetc(c);
433 			state = -1;
434 			cx.x_token = T_NUM;
435 		}
436 		break;
437 	case 13:			/* octal number */
438 		switch (c) {
439 		case '0': case '1': case '2': case '3': case '4':
440 		case '5': case '6': case '7':
441 			cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
442 			break;
443 		default:
444 			(void) s_ungetc(c);
445 			state = -1;
446 			cx.x_token = T_NUM;
447 		}
448 		break;
449 	case 20:			/* got > */
450 		switch (c) {
451 		case '=':
452 			cx.x_token = T_GE;
453 			state = -1;
454 			break;
455 		case '>':
456 			cx.x_token = T_RS;
457 			state = -1;
458 			break;
459 		default:
460 			(void) s_ungetc(c);
461 			cx.x_token = T_GT;
462 			state = -1;
463 		}
464 		break;
465 	case 21:			/* got < */
466 		switch (c) {
467 		case '=':
468 			cx.x_token = T_LE;
469 			state = -1;
470 			break;
471 		case '<':
472 			cx.x_token = T_LS;
473 			state = -1;
474 			break;
475 		default:
476 			(void) s_ungetc(c);
477 			cx.x_token = T_LT;
478 			state = -1;
479 		}
480 		break;
481 	case 22:			/* got = */
482 		switch (c) {
483 		case '=':
484 			cx.x_token = T_EQ;
485 			state = -1;
486 			break;
487 		default:
488 			(void) s_ungetc(c);
489 			cx.x_token = T_ASSIGN;
490 			state = -1;
491 		}
492 		break;
493 	case 23:			/* got ! */
494 		switch (c) {
495 		case '=':
496 			cx.x_token = T_NE;
497 			state = -1;
498 			break;
499 		default:
500 			(void) s_ungetc(c);
501 			cx.x_token = T_NOT;
502 			state = -1;
503 		}
504 		break;
505 	case 24:			/* got & */
506 		switch (c) {
507 		case '&':
508 			cx.x_token = T_ANDAND;
509 			state = -1;
510 			break;
511 		default:
512 			(void) s_ungetc(c);
513 			cx.x_token = T_AND;
514 			state = -1;
515 		}
516 		break;
517 	case 25:			/* got | */
518 		switch (c) {
519 		case '|':
520 			cx.x_token = T_OROR;
521 			state = -1;
522 			break;
523 		default:
524 			(void) s_ungetc(c);
525 			cx.x_token = T_OR;
526 			state = -1;
527 		}
528 		break;
529 	case 26:			/* got $ */
530 		switch (c) {
531 		case '?':
532 			cx.x_token = T_DQ;
533 			state = -1;
534 			break;
535 		default:
536 			(void) s_ungetc(c);
537 			cx.x_token = T_DOLLAR;
538 			state = -1;
539 		}
540 		break;
541 	default:
542 		abort();
543 	}
544 	if (state >= 0)
545 		goto loop;
546 	return cx.x_token;
547 }
548 
549 int
550 s_gettok1(void)
551 {
552 	int c;
553 	int n;
554 
555 	c = s_getc();			/* got \ */
556 	switch (c) {
557 	case EOF:
558 		return -1;
559 	case '\n':
560 		return -2;
561 	case 'b':
562 		return '\b';
563 	case 'f':
564 		return '\f';
565 	case 'n':
566 		return '\n';
567 	case 'r':
568 		return '\r';
569 	case 't':
570 		return '\t';
571 	default:
572 		return c;
573 	case '0': case '1': case '2': case '3': case '4':
574 	case '5': case '6': case '7':
575 		break;
576 	}
577 	n = c - '0';
578 	c = s_getc();			/* got \[0-7] */
579 	if (c < '0' || c > '7') {
580 		(void) s_ungetc(c);
581 		return n;
582 	}
583 	n = n * 8 + c - '0';
584 	c = s_getc();			/* got \[0-7][0-7] */
585 	if (c < '0' || c > '7') {
586 		(void) s_ungetc(c);
587 		return n;
588 	}
589 	return n * 8 + c - '0';
590 }
591