xref: /dragonfly/usr.bin/window/scanner.c (revision 6e278935)
1 /*	@(#)scanner.c	8.1 (Berkeley) 6/6/93	*/
2 /*	$NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $	*/
3 
4 /*
5  * Copyright (c) 1983, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Edward Wang at The University of California, Berkeley.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <stdlib.h>
37 
38 #include "defs.h"
39 #include "token.h"
40 #include "context.h"
41 #include "window_string.h"
42 
43 int	s_getc(void);
44 int	s_gettok1(void);
45 int	s_ungetc(int);
46 
47 int
48 s_getc(void)
49 {
50 	int c;
51 
52 	switch (cx.x_type) {
53 	case X_FILE:
54 		c = getc(cx.x_fp);
55 		if (cx.x_bol && c != EOF) {
56 			cx.x_bol = 0;
57 			cx.x_lineno++;
58 		}
59 		if (c == '\n')
60 			cx.x_bol = 1;
61 		return c;
62 	case X_BUF:
63 		if (*cx.x_bufp != 0)
64 			return *cx.x_bufp++ & 0xff;
65 		else
66 			return EOF;
67 	}
68 	/*NOTREACHED*/
69 	return(0);		/* XXX: placate gcc */
70 }
71 
72 int
73 s_ungetc(int c)
74 {
75 	if (c == EOF)
76 		return EOF;
77 	switch (cx.x_type) {
78 	case X_FILE:
79 		cx.x_bol = 0;
80 		return ungetc(c, cx.x_fp);
81 	case X_BUF:
82 		if (cx.x_bufp > cx.x_buf)
83 			return *--cx.x_bufp = c;
84 		else
85 			return EOF;
86 	}
87 	/*NOTREACHED*/
88 	return(0);		/* XXX: placate gcc */
89 }
90 
91 int
92 s_gettok(void)
93 {
94 	char buf[100];
95 	char *p = buf;
96 	int c;
97 	int state = 0;
98 
99 loop:
100 	c = s_getc();
101 	switch (state) {
102 	case 0:
103 		switch (c) {
104 		case ' ':
105 		case '\t':
106 			break;
107 		case '\n':
108 		case ';':
109 			cx.x_token = T_EOL;
110 			state = -1;
111 			break;
112 		case '#':
113 			state = 1;
114 			break;
115 		case EOF:
116 			cx.x_token = T_EOF;
117 			state = -1;
118 			break;
119 		case 'a': case 'b': case 'c': case 'd': case 'e':
120 		case 'f': case 'g': case 'h': case 'i': case 'j':
121 		case 'k': case 'l': case 'm': case 'n': case 'o':
122 		case 'p': case 'q': case 'r': case 's': case 't':
123 		case 'u': case 'v': case 'w': case 'x': case 'y':
124 		case 'z':
125 		case 'A': case 'B': case 'C': case 'D': case 'E':
126 		case 'F': case 'G': case 'H': case 'I': case 'J':
127 		case 'K': case 'L': case 'M': case 'N': case 'O':
128 		case 'P': case 'Q': case 'R': case 'S': case 'T':
129 		case 'U': case 'V': case 'W': case 'X': case 'Y':
130 		case 'Z':
131 		case '_': case '.':
132 			*p++ = c;
133 			state = 2;
134 			break;
135 		case '"':
136 			state = 3;
137 			break;
138 		case '\'':
139 			state = 4;
140 			break;
141 		case '\\':
142 			switch (c = s_gettok1()) {
143 			case -1:
144 				break;
145 			case -2:
146 				state = 0;
147 				break;
148 			default:
149 				*p++ = c;
150 				state = 2;
151 			}
152 			break;
153 		case '0':
154 			cx.x_val.v_num = 0;
155 			state = 10;
156 			break;
157 		case '1': case '2': case '3': case '4':
158 		case '5': case '6': case '7': case '8': case '9':
159 			cx.x_val.v_num = c - '0';
160 			state = 11;
161 			break;
162 		case '>':
163 			state = 20;
164 			break;
165 		case '<':
166 			state = 21;
167 			break;
168 		case '=':
169 			state = 22;
170 			break;
171 		case '!':
172 			state = 23;
173 			break;
174 		case '&':
175 			state = 24;
176 			break;
177 		case '|':
178 			state = 25;
179 			break;
180 		case '$':
181 			state = 26;
182 			break;
183 		case '~':
184 			cx.x_token = T_COMP;
185 			state = -1;
186 			break;
187 		case '+':
188 			cx.x_token = T_PLUS;
189 			state = -1;
190 			break;
191 		case '-':
192 			cx.x_token = T_MINUS;
193 			state = -1;
194 			break;
195 		case '*':
196 			cx.x_token = T_MUL;
197 			state = -1;
198 			break;
199 		case '/':
200 			cx.x_token = T_DIV;
201 			state = -1;
202 			break;
203 		case '%':
204 			cx.x_token = T_MOD;
205 			state = -1;
206 			break;
207 		case '^':
208 			cx.x_token = T_XOR;
209 			state = -1;
210 			break;
211 		case '(':
212 			cx.x_token = T_LP;
213 			state = -1;
214 			break;
215 		case ')':
216 			cx.x_token = T_RP;
217 			state = -1;
218 			break;
219 		case ',':
220 			cx.x_token = T_COMMA;
221 			state = -1;
222 			break;
223 		case '?':
224 			cx.x_token = T_QUEST;
225 			state = -1;
226 			break;
227 		case ':':
228 			cx.x_token = T_COLON;
229 			state = -1;
230 			break;
231 		case '[':
232 			cx.x_token = T_LB;
233 			state = -1;
234 			break;
235 		case ']':
236 			cx.x_token = T_RB;
237 			state = -1;
238 			break;
239 		default:
240 			cx.x_val.v_num = c;
241 			cx.x_token = T_CHAR;
242 			state = -1;
243 			break;
244 		}
245 		break;
246 	case 1:				/* got # */
247 		if (c == '\n' || c == EOF) {
248 			(void) s_ungetc(c);
249 			state = 0;
250 		}
251 		break;
252 	case 2:				/* unquoted string */
253 		switch (c) {
254 		case 'a': case 'b': case 'c': case 'd': case 'e':
255 		case 'f': case 'g': case 'h': case 'i': case 'j':
256 		case 'k': case 'l': case 'm': case 'n': case 'o':
257 		case 'p': case 'q': case 'r': case 's': case 't':
258 		case 'u': case 'v': case 'w': case 'x': case 'y':
259 		case 'z':
260 		case 'A': case 'B': case 'C': case 'D': case 'E':
261 		case 'F': case 'G': case 'H': case 'I': case 'J':
262 		case 'K': case 'L': case 'M': case 'N': case 'O':
263 		case 'P': case 'Q': case 'R': case 'S': case 'T':
264 		case 'U': case 'V': case 'W': case 'X': case 'Y':
265 		case 'Z':
266 		case '_': case '.':
267 		case '0': case '1': case '2': case '3': case '4':
268 		case '5': case '6': case '7': case '8': case '9':
269 			if (p < buf + sizeof buf - 1)
270 				*p++ = c;
271 			break;
272 		case '"':
273 			state = 3;
274 			break;
275 		case '\'':
276 			state = 4;
277 			break;
278 		case '\\':
279 			switch (c = s_gettok1()) {
280 			case -2:
281 				(void) s_ungetc(' ');
282 			case -1:
283 				break;
284 			default:
285 				if (p < buf + sizeof buf - 1)
286 					*p++ = c;
287 			}
288 			break;
289 		default:
290 			(void) s_ungetc(c);
291 		case EOF:
292 			*p = 0;
293 			cx.x_token = T_STR;
294 			switch (*buf) {
295 			case 'i':
296 				if (buf[1] == 'f' && buf[2] == 0)
297 					cx.x_token = T_IF;
298 				break;
299 			case 't':
300 				if (buf[1] == 'h' && buf[2] == 'e'
301 				    && buf[3] == 'n' && buf[4] == 0)
302 					cx.x_token = T_THEN;
303 				break;
304 			case 'e':
305 				if (buf[1] == 'n' && buf[2] == 'd'
306 				    && buf[3] == 'i' && buf[4] == 'f'
307 				    && buf[5] == 0)
308 					cx.x_token = T_ENDIF;
309 				else {
310 					if (buf[1] == 'l' && buf[2] == 's') {
311 						if (buf[3] == 'i'
312 						&&  buf[4] == 'f'
313 						&&  buf[5] == 0)
314 							cx.x_token = T_ELSIF;
315 						else {
316 							if (buf[3] == 'e'
317 							&& buf[4] == 0)
318 								cx.x_token =
319 								    T_ELSE;
320 						}
321 					}
322 				}
323 				break;
324 			}
325 			if (cx.x_token == T_STR
326 			    && (cx.x_val.v_str = str_cpy(buf)) == 0) {
327 				p_memerror();
328 				cx.x_token = T_EOF;
329 			}
330 			state = -1;
331 			break;
332 		}
333 		break;
334 	case 3:				/* " quoted string */
335 		switch (c) {
336 		case '\n':
337 			(void) s_ungetc(c);
338 		case EOF:
339 		case '"':
340 			state = 2;
341 			break;
342 		case '\\':
343 			switch (c = s_gettok1()) {
344 			case -1:
345 			case -2:	/* newlines are invisible */
346 				break;
347 			default:
348 				if (p < buf + sizeof buf - 1)
349 					*p++ = c;
350 			}
351 			break;
352 		default:
353 			if (p < buf + sizeof buf - 1)
354 				*p++ = c;
355 			break;
356 		}
357 		break;
358 	case 4:				/* ' quoted string */
359 		switch (c) {
360 		case '\n':
361 			(void) s_ungetc(c);
362 		case EOF:
363 		case '\'':
364 			state = 2;
365 			break;
366 		case '\\':
367 			switch (c = s_gettok1()) {
368 			case -1:
369 			case -2:	/* newlines are invisible */
370 				break;
371 			default:
372 				if (p < buf + sizeof buf - 1)
373 					*p++ = c;
374 			}
375 			break;
376 		default:
377 			if (p < buf + sizeof buf - 1)
378 				*p++ = c;
379 			break;
380 		}
381 		break;
382 	case 10:			/* got 0 */
383 		switch (c) {
384 		case 'x':
385 		case 'X':
386 			cx.x_val.v_num = 0;
387 			state = 12;
388 			break;
389 		case '0': case '1': case '2': case '3': case '4':
390 		case '5': case '6': case '7':
391 			cx.x_val.v_num = c - '0';
392 			state = 13;
393 			break;
394 		case '8': case '9':
395 			cx.x_val.v_num = c - '0';
396 			state = 11;
397 			break;
398 		default:
399 			(void) s_ungetc(c);
400 			state = -1;
401 			cx.x_token = T_NUM;
402 		}
403 		break;
404 	case 11:			/* decimal number */
405 		switch (c) {
406 		case '0': case '1': case '2': case '3': case '4':
407 		case '5': case '6': case '7': case '8': case '9':
408 			cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
409 			break;
410 		default:
411 			(void) s_ungetc(c);
412 			state = -1;
413 			cx.x_token = T_NUM;
414 		}
415 		break;
416 	case 12:			/* hex number */
417 		switch (c) {
418 		case '0': case '1': case '2': case '3': case '4':
419 		case '5': case '6': case '7': case '8': case '9':
420 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
421 			break;
422 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
423 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
424 			break;
425 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
426 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
427 			break;
428 		default:
429 			(void) s_ungetc(c);
430 			state = -1;
431 			cx.x_token = T_NUM;
432 		}
433 		break;
434 	case 13:			/* octal number */
435 		switch (c) {
436 		case '0': case '1': case '2': case '3': case '4':
437 		case '5': case '6': case '7':
438 			cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
439 			break;
440 		default:
441 			(void) s_ungetc(c);
442 			state = -1;
443 			cx.x_token = T_NUM;
444 		}
445 		break;
446 	case 20:			/* got > */
447 		switch (c) {
448 		case '=':
449 			cx.x_token = T_GE;
450 			state = -1;
451 			break;
452 		case '>':
453 			cx.x_token = T_RS;
454 			state = -1;
455 			break;
456 		default:
457 			(void) s_ungetc(c);
458 			cx.x_token = T_GT;
459 			state = -1;
460 		}
461 		break;
462 	case 21:			/* got < */
463 		switch (c) {
464 		case '=':
465 			cx.x_token = T_LE;
466 			state = -1;
467 			break;
468 		case '<':
469 			cx.x_token = T_LS;
470 			state = -1;
471 			break;
472 		default:
473 			(void) s_ungetc(c);
474 			cx.x_token = T_LT;
475 			state = -1;
476 		}
477 		break;
478 	case 22:			/* got = */
479 		switch (c) {
480 		case '=':
481 			cx.x_token = T_EQ;
482 			state = -1;
483 			break;
484 		default:
485 			(void) s_ungetc(c);
486 			cx.x_token = T_ASSIGN;
487 			state = -1;
488 		}
489 		break;
490 	case 23:			/* got ! */
491 		switch (c) {
492 		case '=':
493 			cx.x_token = T_NE;
494 			state = -1;
495 			break;
496 		default:
497 			(void) s_ungetc(c);
498 			cx.x_token = T_NOT;
499 			state = -1;
500 		}
501 		break;
502 	case 24:			/* got & */
503 		switch (c) {
504 		case '&':
505 			cx.x_token = T_ANDAND;
506 			state = -1;
507 			break;
508 		default:
509 			(void) s_ungetc(c);
510 			cx.x_token = T_AND;
511 			state = -1;
512 		}
513 		break;
514 	case 25:			/* got | */
515 		switch (c) {
516 		case '|':
517 			cx.x_token = T_OROR;
518 			state = -1;
519 			break;
520 		default:
521 			(void) s_ungetc(c);
522 			cx.x_token = T_OR;
523 			state = -1;
524 		}
525 		break;
526 	case 26:			/* got $ */
527 		switch (c) {
528 		case '?':
529 			cx.x_token = T_DQ;
530 			state = -1;
531 			break;
532 		default:
533 			(void) s_ungetc(c);
534 			cx.x_token = T_DOLLAR;
535 			state = -1;
536 		}
537 		break;
538 	default:
539 		abort();
540 	}
541 	if (state >= 0)
542 		goto loop;
543 	return cx.x_token;
544 }
545 
546 int
547 s_gettok1(void)
548 {
549 	int c;
550 	int n;
551 
552 	c = s_getc();			/* got \ */
553 	switch (c) {
554 	case EOF:
555 		return -1;
556 	case '\n':
557 		return -2;
558 	case 'b':
559 		return '\b';
560 	case 'f':
561 		return '\f';
562 	case 'n':
563 		return '\n';
564 	case 'r':
565 		return '\r';
566 	case 't':
567 		return '\t';
568 	default:
569 		return c;
570 	case '0': case '1': case '2': case '3': case '4':
571 	case '5': case '6': case '7':
572 		break;
573 	}
574 	n = c - '0';
575 	c = s_getc();			/* got \[0-7] */
576 	if (c < '0' || c > '7') {
577 		(void) s_ungetc(c);
578 		return n;
579 	}
580 	n = n * 8 + c - '0';
581 	c = s_getc();			/* got \[0-7][0-7] */
582 	if (c < '0' || c > '7') {
583 		(void) s_ungetc(c);
584 		return n;
585 	}
586 	return n * 8 + c - '0';
587 }
588