xref: /openbsd/sys/ddb/db_lex.c (revision 5abbae66)
1 /*	$OpenBSD: db_lex.c,v 1.15 2020/10/15 03:14:00 deraadt Exp $	*/
2 /*	$NetBSD: db_lex.c,v 1.8 1996/02/05 01:57:05 christos Exp $	*/
3 
4 /*
5  * Mach Operating System
6  * Copyright (c) 1993,1992,1991,1990 Carnegie Mellon University
7  * All Rights Reserved.
8  *
9  * Permission to use, copy, modify and distribute this software and its
10  * documentation is hereby granted, provided that both the copyright
11  * notice and this permission notice appear in all copies of the
12  * software, derivative works or modified versions, and any portions
13  * thereof, and that both notices appear in supporting documentation.
14  *
15  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
16  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
17  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18  *
19  * Carnegie Mellon requests users of this software to return to
20  *
21  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
22  *  School of Computer Science
23  *  Carnegie Mellon University
24  *  Pittsburgh PA 15213-3890
25  *
26  * any improvements or extensions that they make and grant Carnegie Mellon
27  * the rights to redistribute these changes.
28  *
29  *	Author: David B. Golub, Carnegie Mellon University
30  *	Date:	7/90
31  */
32 
33 /*
34  * Lexical analyzer.
35  */
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 
39 #include <machine/db_machdep.h>
40 
41 #include <ddb/db_lex.h>
42 #include <ddb/db_output.h>
43 #include <ddb/db_command.h>
44 #include <ddb/db_extern.h>
45 #include <ddb/db_var.h>
46 
47 char	db_line[120];
48 char *	db_lp, *db_endlp;
49 
50 db_expr_t db_tok_number;
51 char	db_tok_string[TOK_STRING_SIZE];
52 
53 void db_flush_line(void);
54 int db_read_char(void);
55 void db_unread_char(int);
56 
57 int
db_read_line(void)58 db_read_line(void)
59 {
60 	int	i;
61 
62 	i = db_readline(db_line, sizeof(db_line));
63 	if (i == 0)
64 		return (0);	/* EOI */
65 	db_lp = db_line;
66 	db_endlp = db_lp + i;
67 	return (i);
68 }
69 
70 void
db_flush_line(void)71 db_flush_line(void)
72 {
73 	db_lp = db_line;
74 	db_endlp = db_line;
75 }
76 
77 int	db_look_char = 0;
78 
79 int
db_read_char(void)80 db_read_char(void)
81 {
82 	int	c;
83 
84 	if (db_look_char != 0) {
85 		c = db_look_char;
86 		db_look_char = 0;
87 	} else if (db_lp >= db_endlp)
88 		c = -1;
89 	else
90 		c = *db_lp++;
91 	return (c);
92 }
93 
94 void
db_unread_char(int c)95 db_unread_char(int c)
96 {
97 	db_look_char = c;
98 }
99 
100 int	db_look_token = 0;
101 
102 void
db_unread_token(int t)103 db_unread_token(int t)
104 {
105 	db_look_token = t;
106 }
107 
108 int
db_read_token(void)109 db_read_token(void)
110 {
111 	int	t;
112 
113 	if (db_look_token) {
114 		t = db_look_token;
115 		db_look_token = 0;
116 	} else
117 		t = db_lex();
118 	return (t);
119 }
120 
121 void
db_flush_lex(void)122 db_flush_lex(void)
123 {
124 	db_flush_line();
125 	db_look_char = 0;
126 	db_look_token = 0;
127 }
128 
129 int
db_lex(void)130 db_lex(void)
131 {
132 	int	c;
133 
134 	c = db_read_char();
135 	while (c <= ' ' || c > '~') {
136 		if (c == '\n' || c == -1)
137 			return (tEOL);
138 		c = db_read_char();
139 	}
140 
141 	if (c >= '0' && c <= '9') {
142 		/* number */
143 		int	r, digit = 0;
144 
145 		if (c > '0')
146 			r = db_radix;
147 		else {
148 			c = db_read_char();
149 			if (c == 'O' || c == 'o')
150 				r = 8;
151 			else if (c == 'T' || c == 't')
152 				r = 10;
153 			else if (c == 'X' || c == 'x')
154 				r = 16;
155 			else {
156 				r = db_radix;
157 				db_unread_char(c);
158 			}
159 			c = db_read_char();
160 		}
161 		db_tok_number = 0;
162 		for (;;) {
163 			if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
164 				digit = c - '0';
165 			else if (r == 16 && ((c >= 'A' && c <= 'F') ||
166 			    (c >= 'a' && c <= 'f'))) {
167 				if (c >= 'a')
168 					digit = c - 'a' + 10;
169 				else if (c >= 'A')
170 					digit = c - 'A' + 10;
171 			} else
172 				break;
173 			db_tok_number = db_tok_number * r + digit;
174 			c = db_read_char();
175 		}
176 		if ((c >= '0' && c <= '9') ||
177 		    (c >= 'A' && c <= 'Z') ||
178 		    (c >= 'a' && c <= 'z') ||
179 		    (c == '_')) {
180 			db_error("Bad character in number\n");
181 			/*NOTREACHED*/
182 		}
183 		db_unread_char(c);
184 		return (tNUMBER);
185 	}
186 	if ((c >= 'A' && c <= 'Z') ||
187 	    (c >= 'a' && c <= 'z') ||
188 	    c == '_' || c == '\\') {
189 		/* string */
190 		char *cp;
191 
192 		cp = db_tok_string;
193 		if (c == '\\') {
194 			c = db_read_char();
195 			if (c == '\n' || c == -1) {
196 				db_error("Bad escape\n");
197 				/*NOTREACHED*/
198 			}
199 		}
200 		*cp++ = c;
201 		while (1) {
202 			c = db_read_char();
203 			if ((c >= 'A' && c <= 'Z') ||
204 			    (c >= 'a' && c <= 'z') ||
205 			    (c >= '0' && c <= '9') ||
206 			    c == '_' || c == '\\' || c == ':') {
207 				if (c == '\\') {
208 					c = db_read_char();
209 					if (c == '\n' || c == -1) {
210 						db_error("Bad escape\n");
211 						/*NOTREACHED*/
212 					}
213 				}
214 				*cp++ = c;
215 				if (cp == db_tok_string+sizeof(db_tok_string)) {
216 					db_error("String too long\n");
217 					/*NOTREACHED*/
218 				}
219 				continue;
220 			} else {
221 				*cp = '\0';
222 				break;
223 			}
224 		}
225 		db_unread_char(c);
226 		return (tIDENT);
227 	}
228 
229 	switch (c) {
230 	case '+':
231 		return (tPLUS);
232 	case '-':
233 		return (tMINUS);
234 	case '.':
235 		c = db_read_char();
236 		if (c == '.')
237 			return (tDOTDOT);
238 		db_unread_char(c);
239 		return (tDOT);
240 	case '*':
241 		return (tSTAR);
242 	case '/':
243 		return (tSLASH);
244 	case '=':
245 		return (tEQ);
246 	case '%':
247 		return (tPCT);
248 	case '#':
249 		return (tHASH);
250 	case '(':
251 		return (tLPAREN);
252 	case ')':
253 		return (tRPAREN);
254 	case ',':
255 		return (tCOMMA);
256 	case '"':
257 		return (tDITTO);
258 	case '$':
259 		return (tDOLLAR);
260 	case '!':
261 		return (tEXCL);
262 	case '<':
263 		c = db_read_char();
264 		if (c == '<')
265 			return (tSHIFT_L);
266 		db_unread_char(c);
267 		break;
268 	case '>':
269 		c = db_read_char();
270 		if (c == '>')
271 			return (tSHIFT_R);
272 		db_unread_char(c);
273 		break;
274 	case -1:
275 		return (tEOF);
276 	}
277 	db_printf("Bad character\n");
278 	db_flush_lex();
279 	return (tEOF);
280 }
281