xref: /386bsd/usr/src/kernel/ddb/db_lex.c (revision a2142627)
1 /*
2  * Mach Operating System
3  * Copyright (c) 1991,1990 Carnegie Mellon University
4  * All Rights Reserved.
5  *
6  * Permission to use, copy, modify and distribute this software and its
7  * documentation is hereby granted, provided that both the copyright
8  * notice and this permission notice appear in all copies of the
9  * software, derivative works or modified versions, and any portions
10  * thereof, and that both notices appear in supporting documentation.
11  *
12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15  *
16  * Carnegie Mellon requests users of this software to return to
17  *
18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
19  *  School of Computer Science
20  *  Carnegie Mellon University
21  *  Pittsburgh PA 15213-3890
22  *
23  * any improvements or extensions that they make and grant Carnegie the
24  * rights to redistribute these changes.
25  */
26 /*
27  * HISTORY
28  * $Log: db_lex.c,v $
29  * Revision 1.1  1992/03/25  21:45:13  pace
30  * Initial revision
31  *
32  * Revision 2.3  91/02/05  17:06:36  mrt
33  * 	Changed to new Mach copyright
34  * 	[91/01/31  16:18:20  mrt]
35  *
36  * Revision 2.2  90/08/27  21:51:10  dbg
37  * 	Add 'dotdot' token.
38  * 	[90/08/22            dbg]
39  *
40  * 	Allow backslash to quote any character into an identifier.
41  * 	Allow colon in identifier for symbol table qualification.
42  * 	[90/08/16            dbg]
43  * 	Reduce lint.
44  * 	[90/08/07            dbg]
45  * 	Created.
46  * 	[90/07/25            dbg]
47  *
48  */
49 /*
50  *	Author: David B. Golub, Carnegie Mellon University
51  *	Date:	7/90
52  */
53 /*
54  * Lexical analyzer.
55  */
56 #include "db_lex.h"
57 
58 char	db_line[120];
59 char *	db_lp, *db_endlp;
60 
61 int
db_read_line()62 db_read_line()
63 {
64 	int	i;
65 
66 	i = db_readline(db_line, sizeof(db_line));
67 	if (i == 0)
68 	    return (0);	/* EOI */
69 	db_lp = db_line;
70 	db_endlp = db_lp + i;
71 	return (i);
72 }
73 
74 void
db_flush_line()75 db_flush_line()
76 {
77 	db_lp = db_line;
78 	db_endlp = db_line;
79 }
80 
81 int	db_look_char = 0;
82 
83 int
db_read_char()84 db_read_char()
85 {
86 	int	c;
87 
88 	if (db_look_char != 0) {
89 	    c = db_look_char;
90 	    db_look_char = 0;
91 	}
92 	else if (db_lp >= db_endlp)
93 	    c = -1;
94 	else
95 	    c = *db_lp++;
96 	return (c);
97 }
98 
99 void
db_unread_char(c)100 db_unread_char(c)
101 {
102 	db_look_char = c;
103 }
104 
105 int	db_look_token = 0;
106 
107 void
db_unread_token(t)108 db_unread_token(t)
109 	int	t;
110 {
111 	db_look_token = t;
112 }
113 
114 int
db_read_token()115 db_read_token()
116 {
117 	int	t;
118 
119 	if (db_look_token) {
120 	    t = db_look_token;
121 	    db_look_token = 0;
122 	}
123 	else
124 	    t = db_lex();
125 	return (t);
126 }
127 
128 int	db_tok_number;
129 char	db_tok_string[TOK_STRING_SIZE];
130 
131 int	db_radix = 16;
132 
133 void
db_flush_lex()134 db_flush_lex()
135 {
136 	db_flush_line();
137 	db_look_char = 0;
138 	db_look_token = 0;
139 }
140 
141 int
db_lex()142 db_lex()
143 {
144 	int	c;
145 
146 	c = db_read_char();
147 	while (c <= ' ' || c > '~') {
148 	    if (c == '\n' || c == -1)
149 		return (tEOL);
150 	    c = db_read_char();
151 	}
152 
153 	if (c >= '0' && c <= '9') {
154 	    /* number */
155 	    int	r, digit;
156 
157 	    if (c > '0')
158 		r = db_radix;
159 	    else {
160 		c = db_read_char();
161 		if (c == 'O' || c == 'o')
162 		    r = 8;
163 		else if (c == 'T' || c == 't')
164 		    r = 10;
165 		else if (c == 'X' || c == 'x')
166 		    r = 16;
167 		else {
168 		    r = db_radix;
169 		    db_unread_char(c);
170 		}
171 		c = db_read_char();
172 	    }
173 	    db_tok_number = 0;
174 	    for (;;) {
175 		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
176 		    digit = c - '0';
177 		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
178 				     (c >= 'a' && c <= 'f'))) {
179 		    if (c >= 'a')
180 			digit = c - 'a' + 10;
181 		    else if (c >= 'A')
182 			digit = c - 'A' + 10;
183 		}
184 		else
185 		    break;
186 		db_tok_number = db_tok_number * r + digit;
187 		c = db_read_char();
188 	    }
189 	    if ((c >= '0' && c <= '9') ||
190 		(c >= 'A' && c <= 'Z') ||
191 		(c >= 'a' && c <= 'z') ||
192 		(c == '_'))
193 	    {
194 		db_error("Bad character in number\n");
195 		db_flush_lex();
196 		return (tEOF);
197 	    }
198 	    db_unread_char(c);
199 	    return (tNUMBER);
200 	}
201 	if ((c >= 'A' && c <= 'Z') ||
202 	    (c >= 'a' && c <= 'z') ||
203 	    c == '_' || c == '\\')
204 	{
205 	    /* string */
206 	    char *cp;
207 
208 	    cp = db_tok_string;
209 	    if (c == '\\') {
210 		c = db_read_char();
211 		if (c == '\n' || c == -1)
212 		    db_error("Bad escape\n");
213 	    }
214 	    *cp++ = c;
215 	    while (1) {
216 		c = db_read_char();
217 		if ((c >= 'A' && c <= 'Z') ||
218 		    (c >= 'a' && c <= 'z') ||
219 		    (c >= '0' && c <= '9') ||
220 		    c == '_' || c == '\\' || c == ':')
221 		{
222 		    if (c == '\\') {
223 			c = db_read_char();
224 			if (c == '\n' || c == -1)
225 			    db_error("Bad escape\n");
226 		    }
227 		    *cp++ = c;
228 		    if (cp == db_tok_string+sizeof(db_tok_string)) {
229 			db_error("String too long\n");
230 			db_flush_lex();
231 			return (tEOF);
232 		    }
233 		    continue;
234 		}
235 		else {
236 		    *cp = '\0';
237 		    break;
238 		}
239 	    }
240 	    db_unread_char(c);
241 	    return (tIDENT);
242 	}
243 
244 	switch (c) {
245 	    case '+':
246 		return (tPLUS);
247 	    case '-':
248 		return (tMINUS);
249 	    case '.':
250 		c = db_read_char();
251 		if (c == '.')
252 		    return (tDOTDOT);
253 		db_unread_char(c);
254 		return (tDOT);
255 	    case '*':
256 		return (tSTAR);
257 	    case '/':
258 		return (tSLASH);
259 	    case '=':
260 		return (tEQ);
261 	    case '%':
262 		return (tPCT);
263 	    case '#':
264 		return (tHASH);
265 	    case '(':
266 		return (tLPAREN);
267 	    case ')':
268 		return (tRPAREN);
269 	    case ',':
270 		return (tCOMMA);
271 	    case '"':
272 		return (tDITTO);
273 	    case '$':
274 		return (tDOLLAR);
275 	    case '!':
276 		return (tEXCL);
277 	    case '<':
278 		c = db_read_char();
279 		if (c == '<')
280 		    return (tSHIFT_L);
281 		db_unread_char(c);
282 		break;
283 	    case '>':
284 		c = db_read_char();
285 		if (c == '>')
286 		    return (tSHIFT_R);
287 		db_unread_char(c);
288 		break;
289 	    case -1:
290 		return (tEOF);
291 	}
292 	db_printf("Bad character\n");
293 	db_flush_lex();
294 	return (tEOF);
295 }
296