xref: /dragonfly/usr.bin/window/scanner.c (revision 0cfebe3d)
1 /*
2  * Copyright (c) 1983, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)scanner.c	8.1 (Berkeley) 6/6/93
37  * $FreeBSD: src/usr.bin/window/scanner.c,v 1.2.14.2 2001/05/17 09:46:29 obrien Exp $
38  * $DragonFly: src/usr.bin/window/scanner.c,v 1.2 2003/06/17 04:29:34 dillon Exp $
39  */
40 
41 #include <ctype.h>
42 #include "value.h"
43 #include "token.h"
44 #include "context.h"
45 #include "string.h"
46 #include "mystring.h"
47 
48 s_getc()
49 {
50 	register c;
51 
52 	switch (cx.x_type) {
53 	case X_FILE:
54 		c = getc(cx.x_fp);
55 		if (cx.x_bol && c != EOF) {
56 			cx.x_bol = 0;
57 			cx.x_lineno++;
58 		}
59 		if (c == '\n')
60 			cx.x_bol = 1;
61 		return c;
62 	case X_BUF:
63 		if (*cx.x_bufp != 0)
64 			return *cx.x_bufp++ & 0xff;
65 		else
66 			return EOF;
67 	}
68 	/*NOTREACHED*/
69 }
70 
71 s_ungetc(c)
72 {
73 	if (c == EOF)
74 		return EOF;
75 	switch (cx.x_type) {
76 	case X_FILE:
77 		cx.x_bol = 0;
78 		return ungetc(c, cx.x_fp);
79 	case X_BUF:
80 		if (cx.x_bufp > cx.x_buf)
81 			return *--cx.x_bufp = c;
82 		else
83 			return EOF;
84 	}
85 	/*NOTREACHED*/
86 }
87 
88 s_gettok()
89 {
90 	char buf[100];
91 	register char *p = buf;
92 	register c;
93 	register state = 0;
94 
95 loop:
96 	c = s_getc();
97 	switch (state) {
98 	case 0:
99 		switch (c) {
100 		case ' ':
101 		case '\t':
102 			break;
103 		case '\n':
104 		case ';':
105 			cx.x_token = T_EOL;
106 			state = -1;
107 			break;
108 		case '#':
109 			state = 1;
110 			break;
111 		case EOF:
112 			cx.x_token = T_EOF;
113 			state = -1;
114 			break;
115 		case '"':
116 			state = 3;
117 			break;
118 		case '\'':
119 			state = 4;
120 			break;
121 		case '\\':
122 			switch (c = s_gettok1()) {
123 			case -1:
124 				break;
125 			case -2:
126 				state = 0;
127 				break;
128 			default:
129 				*p++ = c;
130 				state = 2;
131 			}
132 			break;
133 		case '0':
134 			cx.x_val.v_num = 0;
135 			state = 10;
136 			break;
137 		case '1': case '2': case '3': case '4':
138 		case '5': case '6': case '7': case '8': case '9':
139 			cx.x_val.v_num = c - '0';
140 			state = 11;
141 			break;
142 		case '>':
143 			state = 20;
144 			break;
145 		case '<':
146 			state = 21;
147 			break;
148 		case '=':
149 			state = 22;
150 			break;
151 		case '!':
152 			state = 23;
153 			break;
154 		case '&':
155 			state = 24;
156 			break;
157 		case '|':
158 			state = 25;
159 			break;
160 		case '$':
161 			state = 26;
162 			break;
163 		case '~':
164 			cx.x_token = T_COMP;
165 			state = -1;
166 			break;
167 		case '+':
168 			cx.x_token = T_PLUS;
169 			state = -1;
170 			break;
171 		case '-':
172 			cx.x_token = T_MINUS;
173 			state = -1;
174 			break;
175 		case '*':
176 			cx.x_token = T_MUL;
177 			state = -1;
178 			break;
179 		case '/':
180 			cx.x_token = T_DIV;
181 			state = -1;
182 			break;
183 		case '%':
184 			cx.x_token = T_MOD;
185 			state = -1;
186 			break;
187 		case '^':
188 			cx.x_token = T_XOR;
189 			state = -1;
190 			break;
191 		case '(':
192 			cx.x_token = T_LP;
193 			state = -1;
194 			break;
195 		case ')':
196 			cx.x_token = T_RP;
197 			state = -1;
198 			break;
199 		case ',':
200 			cx.x_token = T_COMMA;
201 			state = -1;
202 			break;
203 		case '?':
204 			cx.x_token = T_QUEST;
205 			state = -1;
206 			break;
207 		case ':':
208 			cx.x_token = T_COLON;
209 			state = -1;
210 			break;
211 		case '[':
212 			cx.x_token = T_LB;
213 			state = -1;
214 			break;
215 		case ']':
216 			cx.x_token = T_RB;
217 			state = -1;
218 			break;
219 		default:
220 			if (isalpha(c) || c == '_' || c == '.') {
221 				*p++ = c;
222 				state = 2;
223 				break;
224 			}
225 			cx.x_val.v_num = c;
226 			cx.x_token = T_CHAR;
227 			state = -1;
228 			break;
229 		}
230 		break;
231 	case 1:				/* got # */
232 		if (c == '\n' || c == EOF) {
233 			(void) s_ungetc(c);
234 			state = 0;
235 		}
236 		break;
237 	case 2:				/* unquoted string */
238 		switch (c) {
239 		case '"':
240 			state = 3;
241 			break;
242 		case '\'':
243 			state = 4;
244 			break;
245 		case '\\':
246 			switch (c = s_gettok1()) {
247 			case -2:
248 				(void) s_ungetc(' ');
249 			case -1:
250 				break;
251 			default:
252 				if (p < buf + sizeof buf - 1)
253 					*p++ = c;
254 			}
255 			break;
256 		default:
257 			if (isalnum(c) || c == '_' || c == '.') {
258 				if (p < buf + sizeof buf - 1)
259 					*p++ = c;
260 				break;
261 			}
262 			(void) s_ungetc(c);
263 		case EOF:
264 			*p = 0;
265 			cx.x_token = T_STR;
266 			switch (*buf) {
267 			case 'i':
268 				if (buf[1] == 'f' && buf[2] == 0)
269 					cx.x_token = T_IF;
270 				break;
271 			case 't':
272 				if (buf[1] == 'h' && buf[2] == 'e'
273 				    && buf[3] == 'n' && buf[4] == 0)
274 					cx.x_token = T_THEN;
275 				break;
276 			case 'e':
277 				if (buf[1] == 'n' && buf[2] == 'd'
278 				    && buf[3] == 'i' && buf[4] == 'f'
279 				    && buf[5] == 0)
280 					cx.x_token = T_ENDIF;
281 				else if (buf[1] == 'l' && buf[2] == 's')
282 					if (buf[3] == 'i' && buf[4] == 'f'
283 					    && buf[5] == 0)
284 						cx.x_token = T_ELSIF;
285 					else if (buf[3] == 'e' && buf[4] == 0)
286 						cx.x_token = T_ELSE;
287 				break;
288 			}
289 			if (cx.x_token == T_STR
290 			    && (cx.x_val.v_str = str_cpy(buf)) == 0) {
291 				p_memerror();
292 				cx.x_token = T_EOF;
293 			}
294 			state = -1;
295 			break;
296 		}
297 		break;
298 	case 3:				/* " quoted string */
299 		switch (c) {
300 		case '\n':
301 			(void) s_ungetc(c);
302 		case EOF:
303 		case '"':
304 			state = 2;
305 			break;
306 		case '\\':
307 			switch (c = s_gettok1()) {
308 			case -1:
309 			case -2:	/* newlines are invisible */
310 				break;
311 			default:
312 				if (p < buf + sizeof buf - 1)
313 					*p++ = c;
314 			}
315 			break;
316 		default:
317 			if (p < buf + sizeof buf - 1)
318 				*p++ = c;
319 			break;
320 		}
321 		break;
322 	case 4:				/* ' quoted string */
323 		switch (c) {
324 		case '\n':
325 			(void) s_ungetc(c);
326 		case EOF:
327 		case '\'':
328 			state = 2;
329 			break;
330 		case '\\':
331 			switch (c = s_gettok1()) {
332 			case -1:
333 			case -2:	/* newlines are invisible */
334 				break;
335 			default:
336 				if (p < buf + sizeof buf - 1)
337 					*p++ = c;
338 			}
339 			break;
340 		default:
341 			if (p < buf + sizeof buf - 1)
342 				*p++ = c;
343 			break;
344 		}
345 		break;
346 	case 10:			/* got 0 */
347 		switch (c) {
348 		case 'x':
349 		case 'X':
350 			cx.x_val.v_num = 0;
351 			state = 12;
352 			break;
353 		case '0': case '1': case '2': case '3': case '4':
354 		case '5': case '6': case '7':
355 			cx.x_val.v_num = c - '0';
356 			state = 13;
357 			break;
358 		case '8': case '9':
359 			cx.x_val.v_num = c - '0';
360 			state = 11;
361 			break;
362 		default:
363 			(void) s_ungetc(c);
364 			state = -1;
365 			cx.x_token = T_NUM;
366 		}
367 		break;
368 	case 11:			/* decimal number */
369 		switch (c) {
370 		case '0': case '1': case '2': case '3': case '4':
371 		case '5': case '6': case '7': case '8': case '9':
372 			cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
373 			break;
374 		default:
375 			(void) s_ungetc(c);
376 			state = -1;
377 			cx.x_token = T_NUM;
378 		}
379 		break;
380 	case 12:			/* hex number */
381 		switch (c) {
382 		case '0': case '1': case '2': case '3': case '4':
383 		case '5': case '6': case '7': case '8': case '9':
384 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
385 			break;
386 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
387 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
388 			break;
389 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
390 			cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
391 			break;
392 		default:
393 			(void) s_ungetc(c);
394 			state = -1;
395 			cx.x_token = T_NUM;
396 		}
397 		break;
398 	case 13:			/* octal number */
399 		switch (c) {
400 		case '0': case '1': case '2': case '3': case '4':
401 		case '5': case '6': case '7':
402 			cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
403 			break;
404 		default:
405 			(void) s_ungetc(c);
406 			state = -1;
407 			cx.x_token = T_NUM;
408 		}
409 		break;
410 	case 20:			/* got > */
411 		switch (c) {
412 		case '=':
413 			cx.x_token = T_GE;
414 			state = -1;
415 			break;
416 		case '>':
417 			cx.x_token = T_RS;
418 			state = -1;
419 			break;
420 		default:
421 			(void) s_ungetc(c);
422 			cx.x_token = T_GT;
423 			state = -1;
424 		}
425 		break;
426 	case 21:			/* got < */
427 		switch (c) {
428 		case '=':
429 			cx.x_token = T_LE;
430 			state = -1;
431 			break;
432 		case '<':
433 			cx.x_token = T_LS;
434 			state = -1;
435 			break;
436 		default:
437 			(void) s_ungetc(c);
438 			cx.x_token = T_LT;
439 			state = -1;
440 		}
441 		break;
442 	case 22:			/* got = */
443 		switch (c) {
444 		case '=':
445 			cx.x_token = T_EQ;
446 			state = -1;
447 			break;
448 		default:
449 			(void) s_ungetc(c);
450 			cx.x_token = T_ASSIGN;
451 			state = -1;
452 		}
453 		break;
454 	case 23:			/* got ! */
455 		switch (c) {
456 		case '=':
457 			cx.x_token = T_NE;
458 			state = -1;
459 			break;
460 		default:
461 			(void) s_ungetc(c);
462 			cx.x_token = T_NOT;
463 			state = -1;
464 		}
465 		break;
466 	case 24:			/* got & */
467 		switch (c) {
468 		case '&':
469 			cx.x_token = T_ANDAND;
470 			state = -1;
471 			break;
472 		default:
473 			(void) s_ungetc(c);
474 			cx.x_token = T_AND;
475 			state = -1;
476 		}
477 		break;
478 	case 25:			/* got | */
479 		switch (c) {
480 		case '|':
481 			cx.x_token = T_OROR;
482 			state = -1;
483 			break;
484 		default:
485 			(void) s_ungetc(c);
486 			cx.x_token = T_OR;
487 			state = -1;
488 		}
489 		break;
490 	case 26:			/* got $ */
491 		switch (c) {
492 		case '?':
493 			cx.x_token = T_DQ;
494 			state = -1;
495 			break;
496 		default:
497 			(void) s_ungetc(c);
498 			cx.x_token = T_DOLLAR;
499 			state = -1;
500 		}
501 		break;
502 	default:
503 		abort();
504 	}
505 	if (state >= 0)
506 		goto loop;
507 	return cx.x_token;
508 }
509 
510 s_gettok1()
511 {
512 	register c;
513 	register n;
514 
515 	c = s_getc();			/* got \ */
516 	switch (c) {
517 	case EOF:
518 		return -1;
519 	case '\n':
520 		return -2;
521 	case 'b':
522 		return '\b';
523 	case 'f':
524 		return '\f';
525 	case 'n':
526 		return '\n';
527 	case 'r':
528 		return '\r';
529 	case 't':
530 		return '\t';
531 	default:
532 		return c;
533 	case '0': case '1': case '2': case '3': case '4':
534 	case '5': case '6': case '7':
535 		break;
536 	}
537 	n = c - '0';
538 	c = s_getc();			/* got \[0-7] */
539 	if (c < '0' || c > '7') {
540 		(void) s_ungetc(c);
541 		return n;
542 	}
543 	n = n * 8 + c - '0';
544 	c = s_getc();			/* got \[0-7][0-7] */
545 	if (c < '0' || c > '7') {
546 		(void) s_ungetc(c);
547 		return n;
548 	}
549 	return n * 8 + c - '0';
550 }
551