1 /*****************************************************************************
2 * Written by Chris Dunlap <cdunlap@llnl.gov>.
3 * Copyright (C) 2007-2018 Lawrence Livermore National Security, LLC.
4 * Copyright (C) 2001-2007 The Regents of the University of California.
5 * UCRL-CODE-2002-009.
6 *
7 * This file is part of ConMan: The Console Manager.
8 * For details, see <https://dun.github.io/conman/>.
9 *
10 * ConMan is free software: you can redistribute it and/or modify it under
11 * the terms of the GNU General Public License as published by the Free
12 * Software Foundation, either version 3 of the License, or (at your option)
13 * any later version.
14 *
15 * ConMan is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with ConMan. If not, see <http://www.gnu.org/licenses/>.
22 *****************************************************************************
23 * Refer to "lex.h" for documentation on public functions.
24 *****************************************************************************/
25
26
27 #if HAVE_CONFIG_H
28 # include <config.h>
29 #endif /* HAVE_CONFIG_H */
30
31 #include <assert.h>
32 #include <ctype.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include "lex.h"
37
38
39 /*******************\
40 ** Out of Memory **
41 \*******************/
42
43 #ifdef WITH_OOMF
44 # undef out_of_memory
45 extern void * out_of_memory(void);
46 #else /* !WITH_OOMF */
47 # ifndef out_of_memory
48 # define out_of_memory() (NULL)
49 # endif /* !out_of_memory */
50 #endif /* WITH_OOMF */
51
52
53 /***************\
54 ** Constants **
55 \***************/
56
57 #define LEX_MAGIC 0xDEADBEEF
58
59
60 /****************\
61 ** Data Types **
62 \****************/
63
64 struct lexer_state {
65 char *pos; /* current ptr in buffer */
66 char **toks; /* array of recognized strings */
67 int numtoks; /* number of strings in toks[] */
68 char text[LEX_MAX_STR]; /* tmp buffer for lexed strings */
69 int prev; /* prev token returned by lex_next() */
70 int line; /* current line number in buffer */
71 int gotEOL; /* true if next token is on new line */
72 #ifndef NDEBUG
73 unsigned int magic; /* sentinel for asserting validity */
74 #endif /* NDEBUG */
75 };
76
77
78 /****************\
79 ** Prototypes **
80 \****************/
81
82 #ifndef NDEBUG
83 static int validate_sorted_tokens(char *toks[]);
84 #endif /* !NDEBUG */
85 static int lookup_token(char *str, char *toks[], int numtoks);
86
87
88 /************\
89 ** Macros **
90 \************/
91
92 #ifndef MIN
93 # define MIN(x,y) (((x) <= (y)) ? (x) : (y))
94 #endif /* !MIN */
95
96
97 /***************\
98 ** Functions **
99 \***************/
100
lex_create(void * buf,char * toks[])101 Lex lex_create(void *buf, char *toks[])
102 {
103 Lex l;
104
105 assert(buf != NULL);
106 assert(validate_sorted_tokens(toks) >= 0);
107
108 if (!(l = (Lex) malloc(sizeof(struct lexer_state)))) {
109 return(out_of_memory());
110 }
111 l->pos = buf;
112 l->toks = toks;
113 if (!toks) {
114 l->numtoks = 0;
115 }
116 else {
117 int n;
118 for (n=0; toks[n] != NULL; n++) {;}
119 l->numtoks = n;
120 }
121 l->text[0] = '\0';
122 l->prev = 0;
123 l->line = 0;
124 l->gotEOL = 1;
125 assert((l->magic = LEX_MAGIC)); /* set magic via assert abuse */
126 return(l);
127 }
128
129
lex_destroy(Lex l)130 void lex_destroy(Lex l)
131 {
132 assert(l != NULL);
133 assert(l->magic == LEX_MAGIC);
134
135 assert((l->magic = 1)); /* clear magic via assert abuse */
136 free(l);
137 return;
138 }
139
140
lex_next(Lex l)141 int lex_next(Lex l)
142 {
143 char *p;
144 int len;
145
146 assert(l != NULL);
147 assert(l->magic == LEX_MAGIC);
148
149 if (l->gotEOL) { /* deferred line count increment */
150 l->line++;
151 l->gotEOL = 0;
152 }
153
154 for (;;) {
155 switch (*l->pos) {
156 case '\0': /* EOF */
157 l->text[0] = '\0';
158 return(l->prev = LEX_EOF);
159 break;
160 case ' ': /* ignore whitespace */
161 case '\t':
162 case '\v':
163 case '\f':
164 l->pos++;
165 break;
166 case '#': /* ignore comments */
167 do {
168 l->pos++;
169 } while (*l->pos && (*l->pos != '\n') && (*l->pos != '\r'));
170 break;
171 case '\r': /* EOL: CR, LF, CR/LF */
172 if (*(l->pos+1) == '\n')
173 l->pos++;
174 /* fall-thru... whee! */
175 case '\n':
176 l->text[0] = *l->pos++;
177 l->text[1] = '\0';
178 l->gotEOL = 1; /* do not back up;severe tire damage */
179 return(l->prev = LEX_EOL);
180 case '"':
181 case '\'':
182 for (p=l->pos+1; *p && *p!=*l->pos && *p!='\r' && *p!='\n'; p++){;}
183 if (*p == *l->pos) { /* valid string */
184 len = MIN(p - l->pos - 1, LEX_MAX_STR - 1);
185 memcpy(l->text, l->pos + 1, len);
186 l->text[len] = '\0';
187 l->pos = p + 1;
188 return(l->prev = LEX_STR);
189 }
190 else { /* unmatched quote */
191 l->text[0] = '\0';
192 l->pos = p;
193 return(l->prev = LEX_ERR);
194 }
195 case '\\':
196 if (*(l->pos+1) == '\n') { /* ignore EOL, continue to next line */
197 l->pos += 2;
198 l->line++;
199 break;
200 }
201 else if ((*(l->pos+1) == '\r') && (*(l->pos+2) == '\n')) {
202 l->pos += 3;
203 l->line++;
204 break;
205 }
206 /* fall-thru... whee! */
207 default:
208 if (isalpha((int)*l->pos) || (*l->pos == '_')) {
209 for (p=l->pos+1; *p && (isalnum((int)*p) || *p=='_'); p++) {;}
210 len = MIN(p - l->pos, LEX_MAX_STR - 1);
211 memcpy(l->text, l->pos, len);
212 l->text[len] = '\0';
213 l->pos = p;
214 return(l->prev = lookup_token(l->text, l->toks, l->numtoks));
215 }
216 else if (isdigit((int)*l->pos)
217 || (((*l->pos == '-') || (*l->pos == '+'))
218 && isdigit((int)*(l->pos+1)))) {
219 /* integer: [-+]?[0-9]+ */
220 for (p=l->pos+1; *p && isdigit((int)*p); p++) {;}
221 len = MIN(p - l->pos, LEX_MAX_STR - 1);
222 memcpy(l->text, l->pos, len);
223 l->text[len] = '\0';
224 l->pos = p;
225 return(l->prev = LEX_INT);
226 }
227 l->text[0] = *l->pos++; /* single-character token */
228 l->text[1] = '\0';
229 return(l->prev = l->text[0]);
230 }
231 }
232 }
233
234
lex_prev(Lex l)235 int lex_prev(Lex l)
236 {
237 assert(l != NULL);
238 assert(l->magic == LEX_MAGIC);
239 return(l->prev);
240 }
241
242
lex_line(Lex l)243 int lex_line(Lex l)
244 {
245 assert(l != NULL);
246 assert(l->magic == LEX_MAGIC);
247 return(l->line);
248 }
249
250
lex_text(Lex l)251 const char * lex_text(Lex l)
252 {
253 assert(l != NULL);
254 assert(l->magic == LEX_MAGIC);
255 return(l->text);
256 }
257
258
lex_tok_to_str(Lex l,int tok)259 const char * lex_tok_to_str(Lex l, int tok)
260 {
261 int i;
262
263 assert(l != NULL);
264 assert(l->magic == LEX_MAGIC);
265 assert(l->toks != NULL);
266 assert(l->toks[l->numtoks] == NULL);
267
268 if (!l || !l->toks) {
269 return(NULL);
270 }
271 i = tok - LEX_TOK_OFFSET;
272 if ((i >= 0) && (i < l->numtoks)) {
273 return((const char *) l->toks[i]);
274 }
275 return(NULL);
276 }
277
278
279 #if ! HAVE_STRCASECMP
xstrcasecmp(const char * s1,const char * s2)280 static int xstrcasecmp(const char *s1, const char *s2)
281 {
282 /* Compares the two strings (s1) and (s2), ignoring the case of the chars.
283 */
284 const char *p, *q;
285
286 p = s1;
287 q = s2;
288 while (*p && toupper((int) *p) == toupper((int) *q))
289 p++, q++;
290 return(toupper((int) *p) - toupper((int) *q));
291 }
292 #else
293 # define xstrcasecmp strcasecmp
294 #endif /* !HAVE_STRCASECMP */
295
296
297 #ifndef NDEBUG
validate_sorted_tokens(char * toks[])298 static int validate_sorted_tokens(char *toks[])
299 {
300 /* Determines whether the NULL-terminated array of strings (toks) is sorted.
301 * Returns 0 if the array is sorted; o/w, returns -1.
302 */
303 char **pp;
304 char *p, *q;
305
306 if (!toks) {
307 return(-1);
308 }
309 if ((pp = toks) && *pp) {
310 for (p=*pp++, q=*pp++; q; p=q, q=*pp++) {
311 if (xstrcasecmp(p, q) > 0)
312 return(-1);
313 }
314 }
315 return(0);
316 }
317 #endif /* !NDEBUG */
318
319
lookup_token(char * str,char * toks[],int numtoks)320 static int lookup_token(char *str, char *toks[], int numtoks)
321 {
322 /* Determines if and where the string (str) is in the NULL-terminated array
323 * of (numtoks) sorted strings (toks).
324 * Returns the token corresponding to the matched string in the array (toks),
325 * or the generic string token if no match is found.
326 */
327 int low, middle, high;
328 int x;
329
330 if (toks) {
331 low = 0;
332 high = numtoks - 1;
333 while (low <= high) {
334 middle = (low + high) / 2;
335 x = xstrcasecmp(str, toks[middle]);
336 if (x < 0)
337 high = middle - 1;
338 else if (x > 0)
339 low = middle + 1;
340 else /* token found, whoohoo! */
341 return(middle + LEX_TOK_OFFSET);
342 }
343 }
344 return(LEX_STR); /* token not found; doh! */
345 }
346
347
lex_encode(char * str)348 char * lex_encode(char *str)
349 {
350 char *p;
351
352 if (!str)
353 return(NULL);
354 for (p=str; *p; p++) {
355 assert(!(*p & 0x80)); /* assert all high bits are cleared */
356 if (*p == '\'' || *p == '"')
357 *p |= 0x80; /* set high bit to encode funky char */
358 }
359 return(str);
360 }
361
362
lex_decode(char * str)363 char * lex_decode(char *str)
364 {
365 char *p;
366
367 if (!str)
368 return(NULL);
369 for (p=str; *p; p++) {
370 *p &= 0x7F; /* clear all high bits */
371 }
372 return(str);
373 }
374
375
lex_parse_test(char * buf,char * toks[])376 void lex_parse_test(char *buf, char *toks[])
377 {
378 Lex l;
379 int tok;
380 int newline = 1;
381 const char *p;
382
383 if (!buf || !(l = lex_create(buf, toks)))
384 return;
385
386 while ((tok = lex_next(l)) != LEX_EOF) {
387 assert(lex_prev(l) == tok);
388 if (newline) {
389 printf("%3d: ", lex_line(l));
390 newline = 0;
391 }
392 switch(tok) {
393 case LEX_ERR:
394 printf("ERR\n");
395 newline = 1;
396 break;
397 case LEX_EOL:
398 printf("EOL\n");
399 newline = 1;
400 break;
401 case LEX_INT:
402 printf("INT(%d) ", atoi(lex_text(l)));
403 break;
404 case LEX_STR:
405 printf("STR(%s) ", lex_text(l));
406 break;
407 default:
408 if (tok < LEX_TOK_OFFSET)
409 printf("CHR(%c) ", lex_text(l)[0]);
410 else if ((p = lex_tok_to_str(l, tok)))
411 printf("TOK(%d:%s) ", tok, p);
412 else
413 printf("\nINTERNAL ERROR: line=%d, tok=%d, str=\"%s\"\n",
414 lex_line(l), lex_prev(l), lex_text(l));
415 break;
416 }
417 }
418 lex_destroy(l);
419 return;
420 }
421