1 /*****************************************************************************
2  *  Written by Chris Dunlap <cdunlap@llnl.gov>.
3  *  Copyright (C) 2007-2018 Lawrence Livermore National Security, LLC.
4  *  Copyright (C) 2001-2007 The Regents of the University of California.
5  *  UCRL-CODE-2002-009.
6  *
7  *  This file is part of ConMan: The Console Manager.
8  *  For details, see <https://dun.github.io/conman/>.
9  *
10  *  ConMan is free software: you can redistribute it and/or modify it under
11  *  the terms of the GNU General Public License as published by the Free
12  *  Software Foundation, either version 3 of the License, or (at your option)
13  *  any later version.
14  *
15  *  ConMan is distributed in the hope that it will be useful, but WITHOUT
16  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18  *  for more details.
19  *
20  *  You should have received a copy of the GNU General Public License along
21  *  with ConMan.  If not, see <http://www.gnu.org/licenses/>.
22  *****************************************************************************
23  *  Refer to "lex.h" for documentation on public functions.
24  *****************************************************************************/
25 
26 
27 #if HAVE_CONFIG_H
28 #  include <config.h>
29 #endif /* HAVE_CONFIG_H */
30 
31 #include <assert.h>
32 #include <ctype.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include "lex.h"
37 
38 
39 /*******************\
40 **  Out of Memory  **
41 \*******************/
42 
43 #ifdef WITH_OOMF
44 #  undef out_of_memory
45    extern void * out_of_memory(void);
46 #else /* !WITH_OOMF */
47 #  ifndef out_of_memory
48 #    define out_of_memory() (NULL)
49 #  endif /* !out_of_memory */
50 #endif /* WITH_OOMF */
51 
52 
53 /***************\
54 **  Constants  **
55 \***************/
56 
57 #define LEX_MAGIC 0xDEADBEEF
58 
59 
60 /****************\
61 **  Data Types  **
62 \****************/
63 
64 struct lexer_state {
65     char          *pos;                 /* current ptr in buffer             */
66     char         **toks;                /* array of recognized strings       */
67     int            numtoks;             /* number of strings in toks[]       */
68     char           text[LEX_MAX_STR];   /* tmp buffer for lexed strings      */
69     int            prev;                /* prev token returned by lex_next() */
70     int            line;                /* current line number in buffer     */
71     int            gotEOL;              /* true if next token is on new line */
72 #ifndef NDEBUG
73     unsigned int   magic;               /* sentinel for asserting validity   */
74 #endif /* NDEBUG */
75 };
76 
77 
78 /****************\
79 **  Prototypes  **
80 \****************/
81 
82 #ifndef NDEBUG
83 static int validate_sorted_tokens(char *toks[]);
84 #endif /* !NDEBUG */
85 static int lookup_token(char *str, char *toks[], int numtoks);
86 
87 
88 /************\
89 **  Macros  **
90 \************/
91 
92 #ifndef MIN
93 #  define MIN(x,y) (((x) <= (y)) ? (x) : (y))
94 #endif /* !MIN */
95 
96 
97 /***************\
98 **  Functions  **
99 \***************/
100 
lex_create(void * buf,char * toks[])101 Lex lex_create(void *buf, char *toks[])
102 {
103     Lex l;
104 
105     assert(buf != NULL);
106     assert(validate_sorted_tokens(toks) >= 0);
107 
108     if (!(l = (Lex) malloc(sizeof(struct lexer_state)))) {
109         return(out_of_memory());
110     }
111     l->pos = buf;
112     l->toks = toks;
113     if (!toks) {
114         l->numtoks = 0;
115     }
116     else {
117         int n;
118         for (n=0; toks[n] != NULL; n++) {;}
119         l->numtoks = n;
120     }
121     l->text[0] = '\0';
122     l->prev = 0;
123     l->line = 0;
124     l->gotEOL = 1;
125     assert((l->magic = LEX_MAGIC));     /* set magic via assert abuse */
126     return(l);
127 }
128 
129 
lex_destroy(Lex l)130 void lex_destroy(Lex l)
131 {
132     assert(l != NULL);
133     assert(l->magic == LEX_MAGIC);
134 
135     assert((l->magic = 1));             /* clear magic via assert abuse */
136     free(l);
137     return;
138 }
139 
140 
lex_next(Lex l)141 int lex_next(Lex l)
142 {
143     char *p;
144     int len;
145 
146     assert(l != NULL);
147     assert(l->magic == LEX_MAGIC);
148 
149     if (l->gotEOL) {                    /* deferred line count increment */
150         l->line++;
151         l->gotEOL = 0;
152     }
153 
154     for (;;) {
155         switch (*l->pos) {
156         case '\0':                      /* EOF */
157             l->text[0] = '\0';
158             return(l->prev = LEX_EOF);
159             break;
160         case ' ':                       /* ignore whitespace */
161         case '\t':
162         case '\v':
163         case '\f':
164             l->pos++;
165             break;
166         case '#':                       /* ignore comments */
167             do {
168                 l->pos++;
169             } while (*l->pos && (*l->pos != '\n') && (*l->pos != '\r'));
170             break;
171         case '\r':                      /* EOL: CR, LF, CR/LF */
172             if (*(l->pos+1) == '\n')
173                 l->pos++;
174             /* fall-thru... whee! */
175         case '\n':
176             l->text[0] = *l->pos++;
177             l->text[1] = '\0';
178             l->gotEOL = 1;              /* do not back up;severe tire damage */
179             return(l->prev = LEX_EOL);
180         case '"':
181         case '\'':
182             for (p=l->pos+1; *p && *p!=*l->pos && *p!='\r' && *p!='\n'; p++){;}
183             if (*p == *l->pos) {        /* valid string */
184                 len = MIN(p - l->pos - 1, LEX_MAX_STR - 1);
185                 memcpy(l->text, l->pos + 1, len);
186                 l->text[len] = '\0';
187                 l->pos = p + 1;
188                 return(l->prev = LEX_STR);
189             }
190             else {                      /* unmatched quote */
191                 l->text[0] = '\0';
192                 l->pos = p;
193                 return(l->prev = LEX_ERR);
194             }
195         case '\\':
196             if (*(l->pos+1) == '\n') {  /* ignore EOL, continue to next line */
197                 l->pos += 2;
198                 l->line++;
199                 break;
200             }
201             else if ((*(l->pos+1) == '\r') && (*(l->pos+2) == '\n')) {
202                 l->pos += 3;
203                 l->line++;
204                 break;
205             }
206             /* fall-thru... whee! */
207         default:
208             if (isalpha((int)*l->pos) || (*l->pos == '_')) {
209                 for (p=l->pos+1; *p && (isalnum((int)*p) || *p=='_'); p++) {;}
210                 len = MIN(p - l->pos, LEX_MAX_STR - 1);
211                 memcpy(l->text, l->pos, len);
212                 l->text[len] = '\0';
213                 l->pos = p;
214                 return(l->prev = lookup_token(l->text, l->toks, l->numtoks));
215             }
216             else if (isdigit((int)*l->pos)
217               || (((*l->pos == '-') || (*l->pos == '+'))
218               && isdigit((int)*(l->pos+1)))) {
219                 /* integer: [-+]?[0-9]+ */
220                 for (p=l->pos+1; *p && isdigit((int)*p); p++) {;}
221                 len = MIN(p - l->pos, LEX_MAX_STR - 1);
222                 memcpy(l->text, l->pos, len);
223                 l->text[len] = '\0';
224                 l->pos = p;
225                 return(l->prev = LEX_INT);
226             }
227             l->text[0] = *l->pos++;     /* single-character token */
228             l->text[1] = '\0';
229             return(l->prev = l->text[0]);
230         }
231     }
232 }
233 
234 
lex_prev(Lex l)235 int lex_prev(Lex l)
236 {
237     assert(l != NULL);
238     assert(l->magic == LEX_MAGIC);
239     return(l->prev);
240 }
241 
242 
lex_line(Lex l)243 int lex_line(Lex l)
244 {
245     assert(l != NULL);
246     assert(l->magic == LEX_MAGIC);
247     return(l->line);
248 }
249 
250 
lex_text(Lex l)251 const char * lex_text(Lex l)
252 {
253     assert(l != NULL);
254     assert(l->magic == LEX_MAGIC);
255     return(l->text);
256 }
257 
258 
lex_tok_to_str(Lex l,int tok)259 const char * lex_tok_to_str(Lex l, int tok)
260 {
261     int i;
262 
263     assert(l != NULL);
264     assert(l->magic == LEX_MAGIC);
265     assert(l->toks != NULL);
266     assert(l->toks[l->numtoks] == NULL);
267 
268     if (!l || !l->toks) {
269         return(NULL);
270     }
271     i = tok - LEX_TOK_OFFSET;
272     if ((i >= 0) && (i < l->numtoks)) {
273         return((const char *) l->toks[i]);
274     }
275     return(NULL);
276 }
277 
278 
279 #if ! HAVE_STRCASECMP
xstrcasecmp(const char * s1,const char * s2)280 static int xstrcasecmp(const char *s1, const char *s2)
281 {
282 /*  Compares the two strings (s1) and (s2), ignoring the case of the chars.
283  */
284     const char *p, *q;
285 
286     p = s1;
287     q = s2;
288     while (*p && toupper((int) *p) == toupper((int) *q))
289         p++, q++;
290     return(toupper((int) *p) - toupper((int) *q));
291 }
292 #else
293 #  define xstrcasecmp strcasecmp
294 #endif /* !HAVE_STRCASECMP */
295 
296 
297 #ifndef NDEBUG
validate_sorted_tokens(char * toks[])298 static int validate_sorted_tokens(char *toks[])
299 {
300 /*  Determines whether the NULL-terminated array of strings (toks) is sorted.
301  *  Returns 0 if the array is sorted; o/w, returns -1.
302  */
303     char **pp;
304     char *p, *q;
305 
306     if (!toks) {
307         return(-1);
308     }
309     if ((pp = toks) && *pp) {
310         for (p=*pp++, q=*pp++; q; p=q, q=*pp++) {
311             if (xstrcasecmp(p, q) > 0)
312                 return(-1);
313         }
314     }
315     return(0);
316 }
317 #endif /* !NDEBUG */
318 
319 
lookup_token(char * str,char * toks[],int numtoks)320 static int lookup_token(char *str, char *toks[], int numtoks)
321 {
322 /*  Determines if and where the string (str) is in the NULL-terminated array
323  *    of (numtoks) sorted strings (toks).
324  *  Returns the token corresponding to the matched string in the array (toks),
325  *    or the generic string token if no match is found.
326  */
327     int low, middle, high;
328     int x;
329 
330     if (toks) {
331         low = 0;
332         high = numtoks - 1;
333         while (low <= high) {
334             middle = (low + high) / 2;
335             x = xstrcasecmp(str, toks[middle]);
336             if (x < 0)
337                 high = middle - 1;
338             else if (x > 0)
339                 low = middle + 1;
340             else                        /* token found, whoohoo! */
341                 return(middle + LEX_TOK_OFFSET);
342         }
343     }
344     return(LEX_STR);                    /* token not found; doh! */
345 }
346 
347 
lex_encode(char * str)348 char * lex_encode(char *str)
349 {
350     char *p;
351 
352     if (!str)
353         return(NULL);
354     for (p=str; *p; p++) {
355         assert(!(*p & 0x80));           /* assert all high bits are cleared */
356         if (*p == '\'' || *p == '"')
357             *p |= 0x80;                 /* set high bit to encode funky char */
358     }
359     return(str);
360 }
361 
362 
lex_decode(char * str)363 char * lex_decode(char *str)
364 {
365     char *p;
366 
367     if (!str)
368         return(NULL);
369     for (p=str; *p; p++) {
370         *p &= 0x7F;                     /* clear all high bits */
371     }
372     return(str);
373 }
374 
375 
lex_parse_test(char * buf,char * toks[])376 void lex_parse_test(char *buf, char *toks[])
377 {
378     Lex l;
379     int tok;
380     int newline = 1;
381     const char *p;
382 
383     if (!buf || !(l = lex_create(buf, toks)))
384         return;
385 
386     while ((tok = lex_next(l)) != LEX_EOF) {
387         assert(lex_prev(l) == tok);
388         if (newline) {
389             printf("%3d: ", lex_line(l));
390             newline = 0;
391         }
392         switch(tok) {
393         case LEX_ERR:
394             printf("ERR\n");
395             newline = 1;
396             break;
397         case LEX_EOL:
398             printf("EOL\n");
399             newline = 1;
400             break;
401         case LEX_INT:
402             printf("INT(%d) ", atoi(lex_text(l)));
403             break;
404         case LEX_STR:
405             printf("STR(%s) ", lex_text(l));
406             break;
407         default:
408             if (tok < LEX_TOK_OFFSET)
409                 printf("CHR(%c) ", lex_text(l)[0]);
410             else if ((p = lex_tok_to_str(l, tok)))
411                 printf("TOK(%d:%s) ", tok, p);
412             else
413                 printf("\nINTERNAL ERROR: line=%d, tok=%d, str=\"%s\"\n",
414                     lex_line(l), lex_prev(l), lex_text(l));
415             break;
416         }
417     }
418     lex_destroy(l);
419     return;
420 }
421