1 /*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 * PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 /* $Id: lex.c,v 1.15 2022/06/25 12:14:18 jsg Exp $ */
18
19 /*! \file */
20
21 #include <stdlib.h>
22
23 #include <isc/buffer.h>
24
25 #include <isc/lex.h>
26
27 #include <errno.h>
28 #include <string.h>
29 #include <isc/util.h>
30
31 #include "unix/errno2result.h"
32
33 typedef struct inputsource {
34 isc_result_t result;
35 int is_file;
36 int need_close;
37 int at_eof;
38 int last_was_eol;
39 isc_buffer_t * pushback;
40 unsigned int ignored;
41 void * input;
42 char * name;
43 unsigned long line;
44 unsigned long saved_line;
45 ISC_LINK(struct inputsource) link;
46 } inputsource;
47
48 struct isc_lex {
49 /* Unlocked. */
50 size_t max_token;
51 char * data;
52 unsigned int comments;
53 int comment_ok;
54 int last_was_eol;
55 unsigned int paren_count;
56 unsigned int saved_paren_count;
57 isc_lexspecials_t specials;
58 LIST(struct inputsource) sources;
59 };
60
61 static inline isc_result_t
grow_data(isc_lex_t * lex,size_t * remainingp,char ** currp,char ** prevp)62 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
63 char *tmp;
64
65 tmp = malloc(lex->max_token * 2 + 1);
66 if (tmp == NULL)
67 return (ISC_R_NOMEMORY);
68 memmove(tmp, lex->data, lex->max_token + 1);
69 *currp = tmp + (*currp - lex->data);
70 if (*prevp != NULL)
71 *prevp = tmp + (*prevp - lex->data);
72 free(lex->data);
73 lex->data = tmp;
74 *remainingp += lex->max_token;
75 lex->max_token *= 2;
76 return (ISC_R_SUCCESS);
77 }
78
79 isc_result_t
isc_lex_create(size_t max_token,isc_lex_t ** lexp)80 isc_lex_create(size_t max_token, isc_lex_t **lexp) {
81 isc_lex_t *lex;
82
83 /*
84 * Create a lexer.
85 */
86 REQUIRE(lexp != NULL && *lexp == NULL);
87
88 if (max_token == 0U)
89 max_token = 1;
90
91 lex = malloc(sizeof(*lex));
92 if (lex == NULL)
93 return (ISC_R_NOMEMORY);
94 lex->data = malloc(max_token + 1);
95 if (lex->data == NULL) {
96 free(lex);
97 return (ISC_R_NOMEMORY);
98 }
99 lex->max_token = max_token;
100 lex->comments = 0;
101 lex->comment_ok = 1;
102 lex->last_was_eol = 1;
103 lex->paren_count = 0;
104 lex->saved_paren_count = 0;
105 memset(lex->specials, 0, 256);
106 INIT_LIST(lex->sources);
107
108 *lexp = lex;
109
110 return (ISC_R_SUCCESS);
111 }
112
113 void
isc_lex_destroy(isc_lex_t ** lexp)114 isc_lex_destroy(isc_lex_t **lexp) {
115 isc_lex_t *lex;
116
117 /*
118 * Destroy the lexer.
119 */
120
121 REQUIRE(lexp != NULL);
122 lex = *lexp;
123
124 while (!EMPTY(lex->sources))
125 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
126 if (lex->data != NULL)
127 free(lex->data);
128 free(lex);
129
130 *lexp = NULL;
131 }
132
133 void
isc_lex_setcomments(isc_lex_t * lex,unsigned int comments)134 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
135 /*
136 * Set allowed lexer commenting styles.
137 */
138
139 lex->comments = comments;
140 }
141
142 void
isc_lex_setspecials(isc_lex_t * lex,isc_lexspecials_t specials)143 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
144 /*
145 * The characters in 'specials' are returned as tokens. Along with
146 * whitespace, they delimit strings and numbers.
147 */
148
149 memmove(lex->specials, specials, 256);
150 }
151
152 static inline isc_result_t
new_source(isc_lex_t * lex,int is_file,int need_close,void * input,const char * name)153 new_source(isc_lex_t *lex, int is_file, int need_close,
154 void *input, const char *name)
155 {
156 inputsource *source;
157 isc_result_t result;
158
159 source = malloc(sizeof(*source));
160 if (source == NULL)
161 return (ISC_R_NOMEMORY);
162 source->result = ISC_R_SUCCESS;
163 source->is_file = is_file;
164 source->need_close = need_close;
165 source->at_eof = 0;
166 source->last_was_eol = lex->last_was_eol;
167 source->input = input;
168 source->name = strdup(name);
169 if (source->name == NULL) {
170 free(source);
171 return (ISC_R_NOMEMORY);
172 }
173 source->pushback = NULL;
174 result = isc_buffer_allocate(&source->pushback,
175 (unsigned int)lex->max_token);
176 if (result != ISC_R_SUCCESS) {
177 free(source->name);
178 free(source);
179 return (result);
180 }
181 source->ignored = 0;
182 source->line = 1;
183 ISC_LIST_INITANDPREPEND(lex->sources, source, link);
184
185 return (ISC_R_SUCCESS);
186 }
187
188 isc_result_t
isc_lex_openfile(isc_lex_t * lex,const char * filename)189 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
190 isc_result_t result = ISC_R_SUCCESS;
191 FILE *stream = NULL;
192
193 /*
194 * Open 'filename' and make it the current input source for 'lex'.
195 */
196
197 if ((stream = fopen(filename, "r")) == NULL)
198 return (isc__errno2result(errno));
199
200 result = new_source(lex, 1, 1, stream, filename);
201 if (result != ISC_R_SUCCESS)
202 (void)fclose(stream);
203 return (result);
204 }
205
206 isc_result_t
isc_lex_close(isc_lex_t * lex)207 isc_lex_close(isc_lex_t *lex) {
208 inputsource *source;
209
210 /*
211 * Close the most recently opened object (i.e. file or buffer).
212 */
213
214 source = HEAD(lex->sources);
215 if (source == NULL)
216 return (ISC_R_NOMORE);
217
218 ISC_LIST_UNLINK(lex->sources, source, link);
219 lex->last_was_eol = source->last_was_eol;
220 if (source->is_file) {
221 if (source->need_close)
222 (void)fclose((FILE *)(source->input));
223 }
224 free(source->name);
225 isc_buffer_free(&source->pushback);
226 free(source);
227
228 return (ISC_R_SUCCESS);
229 }
230
231 typedef enum {
232 lexstate_start,
233 lexstate_string,
234 lexstate_maybecomment,
235 lexstate_ccomment,
236 lexstate_ccommentend,
237 lexstate_eatline,
238 lexstate_qstring
239 } lexstate;
240
241 static void
pushback(inputsource * source,int c)242 pushback(inputsource *source, int c) {
243 REQUIRE(source->pushback->current > 0);
244 if (c == EOF) {
245 source->at_eof = 0;
246 return;
247 }
248 source->pushback->current--;
249 if (c == '\n')
250 source->line--;
251 }
252
253 static isc_result_t
pushandgrow(inputsource * source,int c)254 pushandgrow(inputsource *source, int c) {
255 if (isc_buffer_availablelength(source->pushback) == 0) {
256 isc_buffer_t *tbuf = NULL;
257 unsigned int oldlen;
258 isc_region_t used;
259 isc_result_t result;
260
261 oldlen = isc_buffer_length(source->pushback);
262 result = isc_buffer_allocate(&tbuf, oldlen * 2);
263 if (result != ISC_R_SUCCESS)
264 return (result);
265 isc_buffer_usedregion(source->pushback, &used);
266 result = isc_buffer_copyregion(tbuf, &used);
267 INSIST(result == ISC_R_SUCCESS);
268 tbuf->current = source->pushback->current;
269 isc_buffer_free(&source->pushback);
270 source->pushback = tbuf;
271 }
272 isc_buffer_putuint8(source->pushback, (uint8_t)c);
273 return (ISC_R_SUCCESS);
274 }
275
276 isc_result_t
isc_lex_gettoken(isc_lex_t * lex,unsigned int options,isc_token_t * tokenp)277 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
278 inputsource *source;
279 int c;
280 int done = 0;
281 int no_comments = 0;
282 int escaped = 0;
283 lexstate state = lexstate_start;
284 lexstate saved_state = lexstate_start;
285 isc_buffer_t *buffer;
286 FILE *stream;
287 char *curr, *prev;
288 size_t remaining;
289 isc_result_t result;
290
291 /*
292 * Get the next token.
293 */
294
295 source = HEAD(lex->sources);
296 REQUIRE(tokenp != NULL);
297
298 if (source == NULL) {
299 if ((options & ISC_LEXOPT_NOMORE) != 0) {
300 tokenp->type = isc_tokentype_nomore;
301 return (ISC_R_SUCCESS);
302 }
303 return (ISC_R_NOMORE);
304 }
305
306 if (source->result != ISC_R_SUCCESS)
307 return (source->result);
308
309 lex->saved_paren_count = lex->paren_count;
310 source->saved_line = source->line;
311
312 if (isc_buffer_remaininglength(source->pushback) == 0 &&
313 source->at_eof)
314 {
315 if ((options & ISC_LEXOPT_EOF) != 0) {
316 tokenp->type = isc_tokentype_eof;
317 return (ISC_R_SUCCESS);
318 }
319 return (ISC_R_EOF);
320 }
321
322 isc_buffer_compact(source->pushback);
323
324 curr = lex->data;
325 *curr = '\0';
326
327 prev = NULL;
328 remaining = lex->max_token;
329
330 if (source->is_file)
331 flockfile(source->input);
332
333 do {
334 if (isc_buffer_remaininglength(source->pushback) == 0) {
335 if (source->is_file) {
336 stream = source->input;
337
338 c = getc_unlocked(stream);
339 if (c == EOF) {
340 if (ferror(stream)) {
341 source->result = ISC_R_IOERROR;
342 result = source->result;
343 goto done;
344 }
345 source->at_eof = 1;
346 }
347 } else {
348 buffer = source->input;
349
350 if (buffer->current == buffer->used) {
351 c = EOF;
352 source->at_eof = 1;
353 } else {
354 c = *((unsigned char *)buffer->base +
355 buffer->current);
356 buffer->current++;
357 }
358 }
359 if (c != EOF) {
360 source->result = pushandgrow(source, c);
361 if (source->result != ISC_R_SUCCESS) {
362 result = source->result;
363 goto done;
364 }
365 }
366 }
367
368 if (!source->at_eof) {
369 if (state == lexstate_start)
370 /* Token has not started yet. */
371 source->ignored =
372 isc_buffer_consumedlength(source->pushback);
373 c = isc_buffer_getuint8(source->pushback);
374 } else {
375 c = EOF;
376 }
377
378 if (c == '\n')
379 source->line++;
380
381 if (lex->comment_ok && !no_comments) {
382 if (c == '/' &&
383 (lex->comments &
384 (ISC_LEXCOMMENT_C|
385 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
386 saved_state = state;
387 state = lexstate_maybecomment;
388 no_comments = 1;
389 continue;
390 } else if (c == '#' &&
391 ((lex->comments & ISC_LEXCOMMENT_SHELL)
392 != 0)) {
393 saved_state = state;
394 state = lexstate_eatline;
395 no_comments = 1;
396 continue;
397 }
398 }
399
400 no_read:
401 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */
402 switch (state) {
403 case lexstate_start:
404 if (c == EOF) {
405 lex->last_was_eol = 0;
406 if ((options & ISC_LEXOPT_EOF) == 0) {
407 result = ISC_R_EOF;
408 goto done;
409 }
410 tokenp->type = isc_tokentype_eof;
411 done = 1;
412 } else if (c == ' ' || c == '\t') {
413 lex->last_was_eol = 0;
414 } else if (c == '\n') {
415 lex->last_was_eol = 1;
416 } else if (c == '\r') {
417 lex->last_was_eol = 0;
418 } else if (c == '"' &&
419 (options & ISC_LEXOPT_QSTRING) != 0) {
420 lex->last_was_eol = 0;
421 no_comments = 1;
422 state = lexstate_qstring;
423 } else if (lex->specials[c]) {
424 lex->last_was_eol = 0;
425 tokenp->type = isc_tokentype_special;
426 tokenp->value.as_char = c;
427 done = 1;
428 } else {
429 lex->last_was_eol = 0;
430 state = lexstate_string;
431 goto no_read;
432 }
433 break;
434 case lexstate_string:
435 /*
436 * EOF needs to be checked before lex->specials[c]
437 * as lex->specials[EOF] is not a good idea.
438 */
439 if (c == '\r' || c == '\n' || c == EOF ||
440 (!escaped &&
441 (c == ' ' || c == '\t' || lex->specials[c]))) {
442 pushback(source, c);
443 if (source->result != ISC_R_SUCCESS) {
444 result = source->result;
445 goto done;
446 }
447 tokenp->type = isc_tokentype_string;
448 tokenp->value.as_textregion.base = lex->data;
449 tokenp->value.as_textregion.length =
450 (unsigned int)
451 (lex->max_token - remaining);
452 done = 1;
453 continue;
454 }
455 if (remaining == 0U) {
456 result = grow_data(lex, &remaining,
457 &curr, &prev);
458 if (result != ISC_R_SUCCESS)
459 goto done;
460 }
461 INSIST(remaining > 0U);
462 *curr++ = c;
463 *curr = '\0';
464 remaining--;
465 break;
466 case lexstate_maybecomment:
467 if (c == '*' &&
468 (lex->comments & ISC_LEXCOMMENT_C) != 0) {
469 state = lexstate_ccomment;
470 continue;
471 } else if (c == '/' &&
472 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
473 state = lexstate_eatline;
474 continue;
475 }
476 pushback(source, c);
477 c = '/';
478 no_comments = 0;
479 state = saved_state;
480 goto no_read;
481 case lexstate_ccomment:
482 if (c == EOF) {
483 result = ISC_R_UNEXPECTEDEND;
484 goto done;
485 }
486 if (c == '*')
487 state = lexstate_ccommentend;
488 break;
489 case lexstate_ccommentend:
490 if (c == EOF) {
491 result = ISC_R_UNEXPECTEDEND;
492 goto done;
493 }
494 if (c == '/') {
495 /*
496 * C-style comments become a single space.
497 * We do this to ensure that a comment will
498 * act as a delimiter for strings and
499 * numbers.
500 */
501 c = ' ';
502 no_comments = 0;
503 state = saved_state;
504 goto no_read;
505 } else if (c != '*')
506 state = lexstate_ccomment;
507 break;
508 case lexstate_eatline:
509 if ((c == '\n') || (c == EOF)) {
510 no_comments = 0;
511 state = saved_state;
512 goto no_read;
513 }
514 break;
515 case lexstate_qstring:
516 if (c == EOF) {
517 result = ISC_R_UNEXPECTEDEND;
518 goto done;
519 }
520 if (c == '"') {
521 if (escaped) {
522 escaped = 0;
523 /*
524 * Overwrite the preceding backslash.
525 */
526 INSIST(prev != NULL);
527 *prev = '"';
528 } else {
529 tokenp->type = isc_tokentype_qstring;
530 tokenp->value.as_textregion.base =
531 lex->data;
532 tokenp->value.as_textregion.length =
533 (unsigned int)
534 (lex->max_token - remaining);
535 no_comments = 0;
536 done = 1;
537 }
538 } else {
539 if (c == '\n' && !escaped &&
540 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
541 pushback(source, c);
542 result = ISC_R_UNBALANCEDQUOTES;
543 goto done;
544 }
545 if (c == '\\' && !escaped)
546 escaped = 1;
547 else
548 escaped = 0;
549 if (remaining == 0U) {
550 result = grow_data(lex, &remaining,
551 &curr, &prev);
552 if (result != ISC_R_SUCCESS)
553 goto done;
554 }
555 INSIST(remaining > 0U);
556 prev = curr;
557 *curr++ = c;
558 *curr = '\0';
559 remaining--;
560 }
561 break;
562 default:
563 FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d",
564 state);
565 /* Does not return. */
566 }
567
568 } while (!done);
569
570 result = ISC_R_SUCCESS;
571 done:
572 if (source->is_file)
573 funlockfile(source->input);
574 return (result);
575 }
576
577 void
isc_lex_ungettoken(isc_lex_t * lex,isc_token_t * tokenp)578 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
579 inputsource *source;
580 /*
581 * Unget the current token.
582 */
583
584 source = HEAD(lex->sources);
585 REQUIRE(source != NULL);
586 REQUIRE(tokenp != NULL);
587 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
588 tokenp->type == isc_tokentype_eof);
589
590 UNUSED(tokenp);
591
592 isc_buffer_first(source->pushback);
593 lex->paren_count = lex->saved_paren_count;
594 source->line = source->saved_line;
595 source->at_eof = 0;
596 }
597
598 void
isc_lex_getlasttokentext(isc_lex_t * lex,isc_token_t * tokenp,isc_region_t * r)599 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
600 {
601 inputsource *source;
602
603 source = HEAD(lex->sources);
604 REQUIRE(source != NULL);
605 REQUIRE(tokenp != NULL);
606 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
607 tokenp->type == isc_tokentype_eof);
608
609 UNUSED(tokenp);
610
611 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
612 r->base = (unsigned char *)isc_buffer_base(source->pushback) +
613 source->ignored;
614 r->length = isc_buffer_consumedlength(source->pushback) -
615 source->ignored;
616 }
617
618 char *
isc_lex_getsourcename(isc_lex_t * lex)619 isc_lex_getsourcename(isc_lex_t *lex) {
620 inputsource *source;
621
622 source = HEAD(lex->sources);
623
624 if (source == NULL)
625 return (NULL);
626
627 return (source->name);
628 }
629
630 unsigned long
isc_lex_getsourceline(isc_lex_t * lex)631 isc_lex_getsourceline(isc_lex_t *lex) {
632 inputsource *source;
633
634 source = HEAD(lex->sources);
635
636 if (source == NULL)
637 return (0);
638
639 return (source->line);
640 }
641