1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* $Id: lex.c,v 1.8 2020/02/25 05:00:43 jsg Exp $ */ 18 19 /*! \file */ 20 21 #include <ctype.h> 22 #include <stdlib.h> 23 24 #include <isc/buffer.h> 25 26 #include <isc/lex.h> 27 28 #include <isc/parseint.h> 29 30 #include <errno.h> 31 #include <string.h> 32 #include <isc/util.h> 33 34 #include "unix/errno2result.h" 35 36 typedef struct inputsource { 37 isc_result_t result; 38 isc_boolean_t is_file; 39 isc_boolean_t need_close; 40 isc_boolean_t at_eof; 41 isc_boolean_t last_was_eol; 42 isc_buffer_t * pushback; 43 unsigned int ignored; 44 void * input; 45 char * name; 46 unsigned long line; 47 unsigned long saved_line; 48 ISC_LINK(struct inputsource) link; 49 } inputsource; 50 51 struct isc_lex { 52 /* Unlocked. */ 53 size_t max_token; 54 char * data; 55 unsigned int comments; 56 isc_boolean_t comment_ok; 57 isc_boolean_t last_was_eol; 58 unsigned int paren_count; 59 unsigned int saved_paren_count; 60 isc_lexspecials_t specials; 61 LIST(struct inputsource) sources; 62 }; 63 64 static inline isc_result_t 65 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) { 66 char *tmp; 67 68 tmp = malloc(lex->max_token * 2 + 1); 69 if (tmp == NULL) 70 return (ISC_R_NOMEMORY); 71 memmove(tmp, lex->data, lex->max_token + 1); 72 *currp = tmp + (*currp - lex->data); 73 if (*prevp != NULL) 74 *prevp = tmp + (*prevp - lex->data); 75 free(lex->data); 76 lex->data = tmp; 77 *remainingp += lex->max_token; 78 lex->max_token *= 2; 79 return (ISC_R_SUCCESS); 80 } 81 82 isc_result_t 83 isc_lex_create(size_t max_token, isc_lex_t **lexp) { 84 isc_lex_t *lex; 85 86 /* 87 * Create a lexer. 88 */ 89 REQUIRE(lexp != NULL && *lexp == NULL); 90 91 if (max_token == 0U) 92 max_token = 1; 93 94 lex = malloc(sizeof(*lex)); 95 if (lex == NULL) 96 return (ISC_R_NOMEMORY); 97 lex->data = malloc(max_token + 1); 98 if (lex->data == NULL) { 99 free(lex); 100 return (ISC_R_NOMEMORY); 101 } 102 lex->max_token = max_token; 103 lex->comments = 0; 104 lex->comment_ok = ISC_TRUE; 105 lex->last_was_eol = ISC_TRUE; 106 lex->paren_count = 0; 107 lex->saved_paren_count = 0; 108 memset(lex->specials, 0, 256); 109 INIT_LIST(lex->sources); 110 111 *lexp = lex; 112 113 return (ISC_R_SUCCESS); 114 } 115 116 void 117 isc_lex_destroy(isc_lex_t **lexp) { 118 isc_lex_t *lex; 119 120 /* 121 * Destroy the lexer. 122 */ 123 124 REQUIRE(lexp != NULL); 125 lex = *lexp; 126 127 while (!EMPTY(lex->sources)) 128 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS); 129 if (lex->data != NULL) 130 free(lex->data); 131 free(lex); 132 133 *lexp = NULL; 134 } 135 136 void 137 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) { 138 /* 139 * Set allowed lexer commenting styles. 140 */ 141 142 lex->comments = comments; 143 } 144 145 void 146 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) { 147 /* 148 * The characters in 'specials' are returned as tokens. Along with 149 * whitespace, they delimit strings and numbers. 150 */ 151 152 memmove(lex->specials, specials, 256); 153 } 154 155 static inline isc_result_t 156 new_source(isc_lex_t *lex, isc_boolean_t is_file, isc_boolean_t need_close, 157 void *input, const char *name) 158 { 159 inputsource *source; 160 isc_result_t result; 161 162 source = malloc(sizeof(*source)); 163 if (source == NULL) 164 return (ISC_R_NOMEMORY); 165 source->result = ISC_R_SUCCESS; 166 source->is_file = is_file; 167 source->need_close = need_close; 168 source->at_eof = ISC_FALSE; 169 source->last_was_eol = lex->last_was_eol; 170 source->input = input; 171 source->name = strdup(name); 172 if (source->name == NULL) { 173 free(source); 174 return (ISC_R_NOMEMORY); 175 } 176 source->pushback = NULL; 177 result = isc_buffer_allocate(&source->pushback, 178 (unsigned int)lex->max_token); 179 if (result != ISC_R_SUCCESS) { 180 free(source->name); 181 free(source); 182 return (result); 183 } 184 source->ignored = 0; 185 source->line = 1; 186 ISC_LIST_INITANDPREPEND(lex->sources, source, link); 187 188 return (ISC_R_SUCCESS); 189 } 190 191 isc_result_t 192 isc_lex_openfile(isc_lex_t *lex, const char *filename) { 193 isc_result_t result = ISC_R_SUCCESS; 194 FILE *stream = NULL; 195 196 /* 197 * Open 'filename' and make it the current input source for 'lex'. 198 */ 199 200 if ((stream = fopen(filename, "r")) == NULL) 201 return (isc__errno2result(errno)); 202 203 result = new_source(lex, ISC_TRUE, ISC_TRUE, stream, filename); 204 if (result != ISC_R_SUCCESS) 205 (void)fclose(stream); 206 return (result); 207 } 208 209 isc_result_t 210 isc_lex_close(isc_lex_t *lex) { 211 inputsource *source; 212 213 /* 214 * Close the most recently opened object (i.e. file or buffer). 215 */ 216 217 source = HEAD(lex->sources); 218 if (source == NULL) 219 return (ISC_R_NOMORE); 220 221 ISC_LIST_UNLINK(lex->sources, source, link); 222 lex->last_was_eol = source->last_was_eol; 223 if (source->is_file) { 224 if (source->need_close) 225 (void)fclose((FILE *)(source->input)); 226 } 227 free(source->name); 228 isc_buffer_free(&source->pushback); 229 free(source); 230 231 return (ISC_R_SUCCESS); 232 } 233 234 typedef enum { 235 lexstate_start, 236 lexstate_crlf, 237 lexstate_string, 238 lexstate_number, 239 lexstate_maybecomment, 240 lexstate_ccomment, 241 lexstate_ccommentend, 242 lexstate_eatline, 243 lexstate_qstring 244 } lexstate; 245 246 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL) 247 248 static void 249 pushback(inputsource *source, int c) { 250 REQUIRE(source->pushback->current > 0); 251 if (c == EOF) { 252 source->at_eof = ISC_FALSE; 253 return; 254 } 255 source->pushback->current--; 256 if (c == '\n') 257 source->line--; 258 } 259 260 static isc_result_t 261 pushandgrow(inputsource *source, int c) { 262 if (isc_buffer_availablelength(source->pushback) == 0) { 263 isc_buffer_t *tbuf = NULL; 264 unsigned int oldlen; 265 isc_region_t used; 266 isc_result_t result; 267 268 oldlen = isc_buffer_length(source->pushback); 269 result = isc_buffer_allocate(&tbuf, oldlen * 2); 270 if (result != ISC_R_SUCCESS) 271 return (result); 272 isc_buffer_usedregion(source->pushback, &used); 273 result = isc_buffer_copyregion(tbuf, &used); 274 INSIST(result == ISC_R_SUCCESS); 275 tbuf->current = source->pushback->current; 276 isc_buffer_free(&source->pushback); 277 source->pushback = tbuf; 278 } 279 isc_buffer_putuint8(source->pushback, (uint8_t)c); 280 return (ISC_R_SUCCESS); 281 } 282 283 isc_result_t 284 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) { 285 inputsource *source; 286 int c; 287 isc_boolean_t done = ISC_FALSE; 288 isc_boolean_t no_comments = ISC_FALSE; 289 isc_boolean_t escaped = ISC_FALSE; 290 lexstate state = lexstate_start; 291 lexstate saved_state = lexstate_start; 292 isc_buffer_t *buffer; 293 FILE *stream; 294 char *curr, *prev; 295 size_t remaining; 296 uint32_t as_ulong; 297 unsigned int saved_options; 298 isc_result_t result; 299 300 /* 301 * Get the next token. 302 */ 303 304 source = HEAD(lex->sources); 305 REQUIRE(tokenp != NULL); 306 307 if (source == NULL) { 308 if ((options & ISC_LEXOPT_NOMORE) != 0) { 309 tokenp->type = isc_tokentype_nomore; 310 return (ISC_R_SUCCESS); 311 } 312 return (ISC_R_NOMORE); 313 } 314 315 if (source->result != ISC_R_SUCCESS) 316 return (source->result); 317 318 lex->saved_paren_count = lex->paren_count; 319 source->saved_line = source->line; 320 321 if (isc_buffer_remaininglength(source->pushback) == 0 && 322 source->at_eof) 323 { 324 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && 325 lex->paren_count != 0) { 326 lex->paren_count = 0; 327 return (ISC_R_UNBALANCED); 328 } 329 if ((options & ISC_LEXOPT_EOF) != 0) { 330 tokenp->type = isc_tokentype_eof; 331 return (ISC_R_SUCCESS); 332 } 333 return (ISC_R_EOF); 334 } 335 336 isc_buffer_compact(source->pushback); 337 338 saved_options = options; 339 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && lex->paren_count > 0) 340 options &= ~IWSEOL; 341 342 curr = lex->data; 343 *curr = '\0'; 344 345 prev = NULL; 346 remaining = lex->max_token; 347 348 if (source->is_file) 349 flockfile(source->input); 350 351 do { 352 if (isc_buffer_remaininglength(source->pushback) == 0) { 353 if (source->is_file) { 354 stream = source->input; 355 356 c = getc_unlocked(stream); 357 if (c == EOF) { 358 if (ferror(stream)) { 359 source->result = ISC_R_IOERROR; 360 result = source->result; 361 goto done; 362 } 363 source->at_eof = ISC_TRUE; 364 } 365 } else { 366 buffer = source->input; 367 368 if (buffer->current == buffer->used) { 369 c = EOF; 370 source->at_eof = ISC_TRUE; 371 } else { 372 c = *((unsigned char *)buffer->base + 373 buffer->current); 374 buffer->current++; 375 } 376 } 377 if (c != EOF) { 378 source->result = pushandgrow(source, c); 379 if (source->result != ISC_R_SUCCESS) { 380 result = source->result; 381 goto done; 382 } 383 } 384 } 385 386 if (!source->at_eof) { 387 if (state == lexstate_start) 388 /* Token has not started yet. */ 389 source->ignored = 390 isc_buffer_consumedlength(source->pushback); 391 c = isc_buffer_getuint8(source->pushback); 392 } else { 393 c = EOF; 394 } 395 396 if (c == '\n') 397 source->line++; 398 399 if (lex->comment_ok && !no_comments) { 400 if (!escaped && c == ';' && 401 ((lex->comments & ISC_LEXCOMMENT_DNSMASTERFILE) 402 != 0)) { 403 saved_state = state; 404 state = lexstate_eatline; 405 no_comments = ISC_TRUE; 406 continue; 407 } else if (c == '/' && 408 (lex->comments & 409 (ISC_LEXCOMMENT_C| 410 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) { 411 saved_state = state; 412 state = lexstate_maybecomment; 413 no_comments = ISC_TRUE; 414 continue; 415 } else if (c == '#' && 416 ((lex->comments & ISC_LEXCOMMENT_SHELL) 417 != 0)) { 418 saved_state = state; 419 state = lexstate_eatline; 420 no_comments = ISC_TRUE; 421 continue; 422 } 423 } 424 425 no_read: 426 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */ 427 switch (state) { 428 case lexstate_start: 429 if (c == EOF) { 430 lex->last_was_eol = ISC_FALSE; 431 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && 432 lex->paren_count != 0) { 433 lex->paren_count = 0; 434 result = ISC_R_UNBALANCED; 435 goto done; 436 } 437 if ((options & ISC_LEXOPT_EOF) == 0) { 438 result = ISC_R_EOF; 439 goto done; 440 } 441 tokenp->type = isc_tokentype_eof; 442 done = ISC_TRUE; 443 } else if (c == ' ' || c == '\t') { 444 if (lex->last_was_eol && 445 (options & ISC_LEXOPT_INITIALWS) 446 != 0) { 447 lex->last_was_eol = ISC_FALSE; 448 tokenp->type = isc_tokentype_initialws; 449 tokenp->value.as_char = c; 450 done = ISC_TRUE; 451 } 452 } else if (c == '\n') { 453 if ((options & ISC_LEXOPT_EOL) != 0) { 454 tokenp->type = isc_tokentype_eol; 455 done = ISC_TRUE; 456 } 457 lex->last_was_eol = ISC_TRUE; 458 } else if (c == '\r') { 459 if ((options & ISC_LEXOPT_EOL) != 0) 460 state = lexstate_crlf; 461 } else if (c == '"' && 462 (options & ISC_LEXOPT_QSTRING) != 0) { 463 lex->last_was_eol = ISC_FALSE; 464 no_comments = ISC_TRUE; 465 state = lexstate_qstring; 466 } else if (lex->specials[c]) { 467 lex->last_was_eol = ISC_FALSE; 468 if ((c == '(' || c == ')') && 469 (options & ISC_LEXOPT_DNSMULTILINE) != 0) { 470 if (c == '(') { 471 if (lex->paren_count == 0) 472 options &= ~IWSEOL; 473 lex->paren_count++; 474 } else { 475 if (lex->paren_count == 0) { 476 result = ISC_R_UNBALANCED; 477 goto done; 478 } 479 lex->paren_count--; 480 if (lex->paren_count == 0) 481 options = 482 saved_options; 483 } 484 continue; 485 } 486 tokenp->type = isc_tokentype_special; 487 tokenp->value.as_char = c; 488 done = ISC_TRUE; 489 } else if (isdigit((unsigned char)c) && 490 (options & ISC_LEXOPT_NUMBER) != 0) { 491 lex->last_was_eol = ISC_FALSE; 492 if ((options & ISC_LEXOPT_OCTAL) != 0 && 493 (c == '8' || c == '9')) 494 state = lexstate_string; 495 else 496 state = lexstate_number; 497 goto no_read; 498 } else { 499 lex->last_was_eol = ISC_FALSE; 500 state = lexstate_string; 501 goto no_read; 502 } 503 break; 504 case lexstate_crlf: 505 if (c != '\n') 506 pushback(source, c); 507 tokenp->type = isc_tokentype_eol; 508 done = ISC_TRUE; 509 lex->last_was_eol = ISC_TRUE; 510 break; 511 case lexstate_number: 512 if (c == EOF || !isdigit((unsigned char)c)) { 513 if (c == ' ' || c == '\t' || c == '\r' || 514 c == '\n' || c == EOF || 515 lex->specials[c]) { 516 int base; 517 if ((options & ISC_LEXOPT_OCTAL) != 0) 518 base = 8; 519 else if ((options & ISC_LEXOPT_CNUMBER) != 0) 520 base = 0; 521 else 522 base = 10; 523 pushback(source, c); 524 525 result = isc_parse_uint32(&as_ulong, 526 lex->data, 527 base); 528 if (result == ISC_R_SUCCESS) { 529 tokenp->type = 530 isc_tokentype_number; 531 tokenp->value.as_ulong = 532 as_ulong; 533 } else if (result == ISC_R_BADNUMBER) { 534 isc_tokenvalue_t *v; 535 536 tokenp->type = 537 isc_tokentype_string; 538 v = &(tokenp->value); 539 v->as_textregion.base = 540 lex->data; 541 v->as_textregion.length = 542 (unsigned int) 543 (lex->max_token - 544 remaining); 545 } else 546 goto done; 547 done = ISC_TRUE; 548 continue; 549 } else if (!(options & ISC_LEXOPT_CNUMBER) || 550 ((c != 'x' && c != 'X') || 551 (curr != &lex->data[1]) || 552 (lex->data[0] != '0'))) { 553 /* Above test supports hex numbers */ 554 state = lexstate_string; 555 } 556 } else if ((options & ISC_LEXOPT_OCTAL) != 0 && 557 (c == '8' || c == '9')) { 558 state = lexstate_string; 559 } 560 if (remaining == 0U) { 561 result = grow_data(lex, &remaining, 562 &curr, &prev); 563 if (result != ISC_R_SUCCESS) 564 goto done; 565 } 566 INSIST(remaining > 0U); 567 *curr++ = c; 568 *curr = '\0'; 569 remaining--; 570 break; 571 case lexstate_string: 572 /* 573 * EOF needs to be checked before lex->specials[c] 574 * as lex->specials[EOF] is not a good idea. 575 */ 576 if (c == '\r' || c == '\n' || c == EOF || 577 (!escaped && 578 (c == ' ' || c == '\t' || lex->specials[c]))) { 579 pushback(source, c); 580 if (source->result != ISC_R_SUCCESS) { 581 result = source->result; 582 goto done; 583 } 584 tokenp->type = isc_tokentype_string; 585 tokenp->value.as_textregion.base = lex->data; 586 tokenp->value.as_textregion.length = 587 (unsigned int) 588 (lex->max_token - remaining); 589 done = ISC_TRUE; 590 continue; 591 } 592 if ((options & ISC_LEXOPT_ESCAPE) != 0) 593 escaped = (!escaped && c == '\\') ? 594 ISC_TRUE : ISC_FALSE; 595 if (remaining == 0U) { 596 result = grow_data(lex, &remaining, 597 &curr, &prev); 598 if (result != ISC_R_SUCCESS) 599 goto done; 600 } 601 INSIST(remaining > 0U); 602 *curr++ = c; 603 *curr = '\0'; 604 remaining--; 605 break; 606 case lexstate_maybecomment: 607 if (c == '*' && 608 (lex->comments & ISC_LEXCOMMENT_C) != 0) { 609 state = lexstate_ccomment; 610 continue; 611 } else if (c == '/' && 612 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) { 613 state = lexstate_eatline; 614 continue; 615 } 616 pushback(source, c); 617 c = '/'; 618 no_comments = ISC_FALSE; 619 state = saved_state; 620 goto no_read; 621 case lexstate_ccomment: 622 if (c == EOF) { 623 result = ISC_R_UNEXPECTEDEND; 624 goto done; 625 } 626 if (c == '*') 627 state = lexstate_ccommentend; 628 break; 629 case lexstate_ccommentend: 630 if (c == EOF) { 631 result = ISC_R_UNEXPECTEDEND; 632 goto done; 633 } 634 if (c == '/') { 635 /* 636 * C-style comments become a single space. 637 * We do this to ensure that a comment will 638 * act as a delimiter for strings and 639 * numbers. 640 */ 641 c = ' '; 642 no_comments = ISC_FALSE; 643 state = saved_state; 644 goto no_read; 645 } else if (c != '*') 646 state = lexstate_ccomment; 647 break; 648 case lexstate_eatline: 649 if ((c == '\n') || (c == EOF)) { 650 no_comments = ISC_FALSE; 651 state = saved_state; 652 goto no_read; 653 } 654 break; 655 case lexstate_qstring: 656 if (c == EOF) { 657 result = ISC_R_UNEXPECTEDEND; 658 goto done; 659 } 660 if (c == '"') { 661 if (escaped) { 662 escaped = ISC_FALSE; 663 /* 664 * Overwrite the preceding backslash. 665 */ 666 INSIST(prev != NULL); 667 *prev = '"'; 668 } else { 669 tokenp->type = isc_tokentype_qstring; 670 tokenp->value.as_textregion.base = 671 lex->data; 672 tokenp->value.as_textregion.length = 673 (unsigned int) 674 (lex->max_token - remaining); 675 no_comments = ISC_FALSE; 676 done = ISC_TRUE; 677 } 678 } else { 679 if (c == '\n' && !escaped && 680 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) { 681 pushback(source, c); 682 result = ISC_R_UNBALANCEDQUOTES; 683 goto done; 684 } 685 if (c == '\\' && !escaped) 686 escaped = ISC_TRUE; 687 else 688 escaped = ISC_FALSE; 689 if (remaining == 0U) { 690 result = grow_data(lex, &remaining, 691 &curr, &prev); 692 if (result != ISC_R_SUCCESS) 693 goto done; 694 } 695 INSIST(remaining > 0U); 696 prev = curr; 697 *curr++ = c; 698 *curr = '\0'; 699 remaining--; 700 } 701 break; 702 default: 703 FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d", 704 state); 705 /* Does not return. */ 706 } 707 708 } while (!done); 709 710 result = ISC_R_SUCCESS; 711 done: 712 if (source->is_file) 713 funlockfile(source->input); 714 return (result); 715 } 716 717 void 718 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) { 719 inputsource *source; 720 /* 721 * Unget the current token. 722 */ 723 724 source = HEAD(lex->sources); 725 REQUIRE(source != NULL); 726 REQUIRE(tokenp != NULL); 727 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 728 tokenp->type == isc_tokentype_eof); 729 730 UNUSED(tokenp); 731 732 isc_buffer_first(source->pushback); 733 lex->paren_count = lex->saved_paren_count; 734 source->line = source->saved_line; 735 source->at_eof = ISC_FALSE; 736 } 737 738 void 739 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r) 740 { 741 inputsource *source; 742 743 source = HEAD(lex->sources); 744 REQUIRE(source != NULL); 745 REQUIRE(tokenp != NULL); 746 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 747 tokenp->type == isc_tokentype_eof); 748 749 UNUSED(tokenp); 750 751 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback)); 752 r->base = (unsigned char *)isc_buffer_base(source->pushback) + 753 source->ignored; 754 r->length = isc_buffer_consumedlength(source->pushback) - 755 source->ignored; 756 } 757 758 char * 759 isc_lex_getsourcename(isc_lex_t *lex) { 760 inputsource *source; 761 762 source = HEAD(lex->sources); 763 764 if (source == NULL) 765 return (NULL); 766 767 return (source->name); 768 } 769 770 unsigned long 771 isc_lex_getsourceline(isc_lex_t *lex) { 772 inputsource *source; 773 774 source = HEAD(lex->sources); 775 776 if (source == NULL) 777 return (0); 778 779 return (source->line); 780 } 781