xref: /dragonfly/usr.bin/window/scanner.c (revision 21c1c48a)
1 /*
2  * Copyright (c) 1983, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)scanner.c	8.1 (Berkeley) 6/6/93
37  * $FreeBSD: src/usr.bin/window/scanner.c,v 1.2.14.2 2001/05/17 09:46:29 obrien Exp $
38  * $DragonFly: src/usr.bin/window/scanner.c,v 1.2 2003/06/17 04:29:34 dillon Exp $
39  */
40 
41 #include <ctype.h>
42 #include <stdlib.h>
43 
44 #include "value.h"
45 #include "token.h"
46 #include "context.h"
47 #include "string.h"
48 #include "mystring.h"
49 
50 s_getc()
51 {
52 	register c;
53 
54 	switch (cx.x_type) {
55 	case X_FILE:
56 		c = getc(cx.x_fp);
57 		if (cx.x_bol && c != EOF) {
58 			cx.x_bol = 0;
59 			cx.x_lineno++;
60 		}
61 		if (c == '\n')
62 			cx.x_bol = 1;
63 		return c;
64 	case X_BUF:
65 		if (*cx.x_bufp != 0)
66 			return *cx.x_bufp++ & 0xff;
67 		else
68 			return EOF;
69 	}
70 	/*NOTREACHED*/
71 }
72 
73 s_ungetc(c)
74 {
75 	if (c == EOF)
76 		return EOF;
77 	switch (cx.x_type) {
78 	case X_FILE:
79 		cx.x_bol = 0;
80 		return ungetc(c, cx.x_fp);
81 	case X_BUF:
82 		if (cx.x_bufp > cx.x_buf)
83 			return *--cx.x_bufp = c;
84 		else
85 			return EOF;
86 	}
87 	/*NOTREACHED*/
88 }
89 
90 s_gettok()
91 {
92 	char buf[100];
93 	register char *p = buf;
94 	register c;
95 	register state = 0;
96 
97 loop:
98 	c = s_getc();
99 	switch (state) {
100 	case 0:
101 		switch (c) {
102 		case ' ':
103 		case '\t':
104 			break;
105 		case '\n':
106 		case ';':
107 			cx.x_token = T_EOL;
108 			state = -1;
109 			break;
110 		case '#':
111 			state = 1;
112 			break;
113 		case EOF:
114 			cx.x_token = T_EOF;
115 			state = -1;
116 			break;
117 		case '"':
118 			state = 3;
119 			break;
120 		case '\'':
121 			state = 4;
122 			break;
123 		case '\\':
124 			switch (c = s_gettok1()) {
125 			case -1:
126 				break;
127 			case -2:
128 				state = 0;
129 				break;
130 			default:
131 				*p++ = c;
132 				state = 2;
133 			}
134 			break;
135 		case '0':
136 			cx.x_val.v_num = 0;
137 			state = 10;
138 			break;
139 		case '1': case '2': case '3': case '4':
140 		case '5': case '6': case '7': case '8': case '9':
141 			cx.x_val.v_num = c - '0';
142 			state = 11;
143 			break;
144 		case '>':
145 			state = 20;
146 			break;
147 		case '<':
148 			state = 21;
149 			break;
150 		case '=':
151 			state = 22;
152 			break;
153 		case '!':
154 			state = 23;
155 			break;
156 		case '&':
157 			state = 24;
158 			break;
159 		case '|':
160 			state = 25;
161 			break;
162 		case '$':
163 			state = 26;
164 			break;
165 		case '~':
166 			cx.x_token = T_COMP;
167 			state = -1;
168 			break;
169 		case '+':
170 			cx.x_token = T_PLUS;
171 			state = -1;
172 			break;
173 		case '-':
174 			cx.x_token = T_MINUS;
175 			state = -1;
176 			break;
177 		case '*':
178 			cx.x_token = T_MUL;
179 			state = -1;
180 			break;
181 		case '/':
182 			cx.x_token = T_DIV;
183 			state = -1;
184 			break;
185 		case '%':
186 			cx.x_token = T_MOD;
187 			state = -1;
188 			break;
189 		case '^':
190 			cx.x_token = T_XOR;
191 			state = -1;
192 			break;
193 		case '(':
194 			cx.x_token = T_LP;
195 			state = -1;
196 			break;
197 		case ')':
198 			cx.x_token = T_RP;
199 			state = -1;
200 			break;
201 		case ',':
202 			cx.x_token = T_COMMA;
203 			state = -1;
204 			break;
205 		case '?':
206 			cx.x_token = T_QUEST;
207 			state = -1;
208 			break;
209 		case ':':
210 			cx.x_token = T_COLON;
211 			state = -1;
212 			break;
213 		case '[':
214 			cx.x_token = T_LB;
215 			state = -1;
216 			break;
217 		case ']':
218 			cx.x_token = T_RB;
219 			state = -1;
220 			break;
221 		default:
222 			if (isalpha(c) || c == '_' || c == '.') {
223 				*p++ = c;
224 				state = 2;
225 				break;
226 			}
227 			cx.x_val.v_num = c;
228 			cx.x_token = T_CHAR;
229 			state = -1;
230 			break;
231 		}
232 		break;
233 	case 1:				/* got # */
234 		if (c == '\n' || c == EOF) {
235 			(void) s_ungetc(c);
236 			state = 0;
237 		}
238 		break;
239 	case 2:				/* unquoted string */
240 		switch (c) {
241 		case '"':
242 			state = 3;
243 			break;
244 		case '\'':
245 			state = 4;
246 			break;
247 		case '\\':
248 			switch (c = s_gettok1()) {
249 			case -2:
250 				(void) s_ungetc(' ');
251 			case -1:
252 				break;
253 			default:
254 				if (p < buf + sizeof buf - 1)
255 					*p++ = c;
256 			}
257 			break;
258 		default:
259 			if (isalnum(c) || c == '_' || c == '.') {
260 				if (p < buf + sizeof buf - 1)
261 					*p++ = c;
262 				break;
263 			}
264 			(void) s_ungetc(c);
265 		case EOF:
266 			*p = 0;
267 			cx.x_token = T_STR;
268 			switch (*buf) {
269 			case 'i':
270 				if (buf[1] == 'f' && buf[2] == 0)
271 					cx.x_token = T_IF;
272 				break;
273 			case 't':
274 				if (buf[1] == 'h' && buf[2] == 'e'
275 				    && buf[3] == 'n' && buf[4] == 0)
276 					cx.x_token = T_THEN;
277 				break;
278 			case 'e':
279 				if (buf[1] == 'n' && buf[2] == 'd'
280 				    && buf[3] == 'i' && buf[4] == 'f'
281 				    && buf[5] == 0)
282 					cx.x_token = T_ENDIF;
283 				else if (buf[1] == 'l' && buf[2] == 's')
284 					if (buf[3] == 'i' && buf[4] == 'f'
285 					    && buf[5] == 0)
286 						cx.x_token = T_ELSIF;
287 					else if (buf[3] == 'e' && buf[4] == 0)
288 						cx.x_token = T_ELSE;
289 				break;
290 			}
291 			if (cx.x_token == T_STR
292 			    && (cx.x_val.v_str = str_cpy(buf)) == 0) {
293 				p_memerror();
294 				cx.x_token = T_EOF;
295 			}
296 			state = -1;
297 			break;
298 		}
299 		break;
300 	case 3:				/* " quoted string */
301 		switch (c) {
302 		case '\n':
303 			(void) s_ungetc(c);
304 		case EOF:
305 		case '"':
306 			state = 2;
307 			break;
308 		case '\\':
309 			switch (c = s_gettok1()) {
310 			case -1:
311 			case -2:	/* newlines are invisible */
312 				break;
313 			default:
314 				if (p < buf + sizeof buf - 1)
315 					*p++ = c;
316 			}
317 			break;
318 		default:
319 			if (p < buf + sizeof buf - 1)
320 				*p++ = c;
321 			break;
322 		}
323 		break;
324 	case 4:				/* ' quoted string */
325 		switch (c) {
326 		case '\n':
327 			(void) s_ungetc(c);
328 		case EOF:
329 		case '\'':
330 			state = 2;
331 			break;
332 		case '\\':
333 			switch (c = s_gettok1()) {
334 			case -1:
335 			case -2:	/* newlines are invisible */
336 				break;
337 			default:
338 				if (p < buf + sizeof buf - 1)
339 					*p++ = c;
340 			}
341 			break;
342 		default:
343 			if (p < buf + sizeof buf - 1)
344 				*p++ = c;
345 			break;
346 		}
347 		break;
348 	case 10:			/* got 0 */
349 		switch (c) {
350 		case 'x':
351 		case 'X':
352 			cx.x_val.v_num = 0;
353 			state = 12;
354 			break;
355 		case '0': case '1': case '2': case '3': case '4':
356 		case '5': case '6': case '7':
357 			cx.x_val.v_num = c - '0';
358 			state = 13;
359 			break;
360 		case '8': case '9':
361 			cx.x_val.v_num = c - '0';
362 			state = 11;
363 			break;
364 		default:
365 			(void) s_ungetc(c);
366 			state = -1;
367 			cx.x_token = T_NUM;
368 		}
369 		break;
370 	case 11:			/* decimal number */
371 		switch (c) {
372 		case '0': case '1': case '2': case '3': case '4':
373 		case '5': case '6': case '7': case '8': case '9':
374 			cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
375 			break;
376 		default:
377 			(void) s_ungetc(c);
378 			state = -1;
379 			cx.x_token = T_NUM;
380 		}
381 		break;
382 	case 12:			/* hex number */
383 		switch (c) {
384 		case '0': case '1': case '2': case '3': case '4':
385 		case '5': case '6': case '7': case '8': case '9':
386 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
387 			break;
388 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
389 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
390 			break;
391 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
392 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
393 			break;
394 		default:
395 			(void) s_ungetc(c);
396 			state = -1;
397 			cx.x_token = T_NUM;
398 		}
399 		break;
400 	case 13:			/* octal number */
401 		switch (c) {
402 		case '0': case '1': case '2': case '3': case '4':
403 		case '5': case '6': case '7':
404 			cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
405 			break;
406 		default:
407 			(void) s_ungetc(c);
408 			state = -1;
409 			cx.x_token = T_NUM;
410 		}
411 		break;
412 	case 20:			/* got > */
413 		switch (c) {
414 		case '=':
415 			cx.x_token = T_GE;
416 			state = -1;
417 			break;
418 		case '>':
419 			cx.x_token = T_RS;
420 			state = -1;
421 			break;
422 		default:
423 			(void) s_ungetc(c);
424 			cx.x_token = T_GT;
425 			state = -1;
426 		}
427 		break;
428 	case 21:			/* got < */
429 		switch (c) {
430 		case '=':
431 			cx.x_token = T_LE;
432 			state = -1;
433 			break;
434 		case '<':
435 			cx.x_token = T_LS;
436 			state = -1;
437 			break;
438 		default:
439 			(void) s_ungetc(c);
440 			cx.x_token = T_LT;
441 			state = -1;
442 		}
443 		break;
444 	case 22:			/* got = */
445 		switch (c) {
446 		case '=':
447 			cx.x_token = T_EQ;
448 			state = -1;
449 			break;
450 		default:
451 			(void) s_ungetc(c);
452 			cx.x_token = T_ASSIGN;
453 			state = -1;
454 		}
455 		break;
456 	case 23:			/* got ! */
457 		switch (c) {
458 		case '=':
459 			cx.x_token = T_NE;
460 			state = -1;
461 			break;
462 		default:
463 			(void) s_ungetc(c);
464 			cx.x_token = T_NOT;
465 			state = -1;
466 		}
467 		break;
468 	case 24:			/* got & */
469 		switch (c) {
470 		case '&':
471 			cx.x_token = T_ANDAND;
472 			state = -1;
473 			break;
474 		default:
475 			(void) s_ungetc(c);
476 			cx.x_token = T_AND;
477 			state = -1;
478 		}
479 		break;
480 	case 25:			/* got | */
481 		switch (c) {
482 		case '|':
483 			cx.x_token = T_OROR;
484 			state = -1;
485 			break;
486 		default:
487 			(void) s_ungetc(c);
488 			cx.x_token = T_OR;
489 			state = -1;
490 		}
491 		break;
492 	case 26:			/* got $ */
493 		switch (c) {
494 		case '?':
495 			cx.x_token = T_DQ;
496 			state = -1;
497 			break;
498 		default:
499 			(void) s_ungetc(c);
500 			cx.x_token = T_DOLLAR;
501 			state = -1;
502 		}
503 		break;
504 	default:
505 		abort();
506 	}
507 	if (state >= 0)
508 		goto loop;
509 	return cx.x_token;
510 }
511 
512 s_gettok1()
513 {
514 	register c;
515 	register n;
516 
517 	c = s_getc();			/* got \ */
518 	switch (c) {
519 	case EOF:
520 		return -1;
521 	case '\n':
522 		return -2;
523 	case 'b':
524 		return '\b';
525 	case 'f':
526 		return '\f';
527 	case 'n':
528 		return '\n';
529 	case 'r':
530 		return '\r';
531 	case 't':
532 		return '\t';
533 	default:
534 		return c;
535 	case '0': case '1': case '2': case '3': case '4':
536 	case '5': case '6': case '7':
537 		break;
538 	}
539 	n = c - '0';
540 	c = s_getc();			/* got \[0-7] */
541 	if (c < '0' || c > '7') {
542 		(void) s_ungetc(c);
543 		return n;
544 	}
545 	n = n * 8 + c - '0';
546 	c = s_getc();			/* got \[0-7][0-7] */
547 	if (c < '0' || c > '7') {
548 		(void) s_ungetc(c);
549 		return n;
550 	}
551 	return n * 8 + c - '0';
552 }
553