1 /*	$Id: lexer.c,v 1.9 2001/08/10 15:18:08 sandro Exp $	*/
2 
3 /*
4  * Copyright (c) 1997-2001 Sandro Sigala.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #include "slisp.h"
35 #include "extern.h"
36 
37 FILE *input_file;
38 int lineno;
39 
40 #define LEX_BUF_MAX 16
41 static int lex_buf[LEX_BUF_MAX];
42 static int *lex_bufp;
43 #define xgetc()		(lex_bufp > lex_buf ? *--lex_bufp : fgetc(input_file))
44 #define xungetc(c)	*lex_bufp++ = c
45 
46 char *token_buffer;
47 static int token_buffer_max;
48 
49 void
init_lex(void)50 init_lex(void)
51 {
52 	lineno = 1;
53 	token_buffer_max = 10;
54 	token_buffer = (char *)xmalloc(token_buffer_max);
55 	lex_bufp = lex_buf;
56 }
57 
58 void
done_lex(void)59 done_lex(void)
60 {
61 	free(token_buffer);
62 }
63 
64 static char *
extend_buf(char * p)65 extend_buf(char *p)
66 {
67 	int off = p - token_buffer;
68 
69 	token_buffer_max += 10;
70 	token_buffer = (char *)xrealloc(token_buffer, token_buffer_max);
71 
72 	return token_buffer + off;
73 }
74 
75 int
gettoken(void)76 gettoken(void)
77 {
78 	char *p;
79 	int c;
80 
81 	for (;;) {
82 		c = xgetc();
83 		switch (c) {
84 		case '\n':
85 			++lineno;
86 			break;
87 
88 		case ' ': case '\f': case '\t': case '\v': case '\r':
89 			break;
90 
91 		case ';':
92 			/* Comment: ";".*"\n" */
93 			while ((c = xgetc()) != '\n')
94 				;
95 			++lineno;
96 			break;
97 
98 		case '?':
99 			/* Character: "?". */
100 			c = xgetc();
101 			sprintf(token_buffer, "%d", c);
102 			return INTEGER;
103 
104 		case '-':
105 			/* Minus sign: "-". */
106 			c = xgetc();
107 			if (!isdigit(c)) {
108 				xungetc(c);
109 				c = '-';
110 				goto got_id;
111 			}
112 			xungetc(c);
113 			c = '-';
114 			/* FALLTRHU */
115 
116 		case '0':
117 		case '1': case '2': case '3':
118 		case '4': case '5': case '6':
119 		case '7': case '8': case '9':
120 			/* Integer: [0-9]+ */
121 			p = token_buffer;
122 			do {
123 				if (p - token_buffer >= token_buffer_max)
124 					p = extend_buf(p);
125 				*p++ = c;
126 				c = xgetc();
127 			} while (isdigit(c));
128 			xungetc(c);
129 			*p = '\0';
130 			return INTEGER;
131 
132 		got_id:
133 		case '_': case '+': case '*': case '/': case '%':
134 		case '<': case '>': case '=': case '&':
135 		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
136 		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
137 		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
138 		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
139 		case 'y': case 'z':
140 		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
141 		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
142 		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
143 		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
144 		case 'Y': case 'Z':
145 			/* Identifier: [-/+*%<>=&a-zA-Z_][-/+*%<>=&a-zA-Z_0-9]* */
146 			p = token_buffer;
147 			do {
148 				if (p - token_buffer >= token_buffer_max)
149 					p = extend_buf(p);
150 				*p++ = c;
151 				c = xgetc();
152 			} while (isalnum(c) || strchr("_-+*/%<>=&", c) != NULL);
153 			xungetc(c);
154 			*p = '\0';
155 			return IDENTIFIER;
156 
157 		case '"':
158 			/* String: "\""([^"]|"\\".)*"\"" */
159 			p = token_buffer;
160 			while ((c = xgetc()) != '"' && c != EOF) {
161 				if (p - token_buffer >= token_buffer_max)
162 					p = extend_buf(p);
163 				if (c == '\\') {
164 					c = xgetc();
165 					switch (c) {
166 					case '\n': ++lineno; break;
167 					case 'a': *p++ = '\a'; break;
168 					case 'b': *p++ = '\b'; break;
169 					case 'f': *p++ = '\f'; break;
170 					case 'n': *p++ = '\n'; break;
171 					case 'r': *p++ = '\r'; break;
172 					case 't': *p++ = '\t'; break;
173 					case 'v': *p++ = '\v'; break;
174 					default: *p++ = c;
175 					}
176 				} else {
177 					if (c == '\n')
178 						++lineno;
179 					*p++ = c;
180 				}
181 			}
182 			*p = '\0';
183 			return STRING;
184 
185 		default:
186 			return c;
187 		}
188 	}
189 }
190