1 %top{ 2 /*------------------------------------------------------------------------- 3 * 4 * psqlscanslash.l 5 * lexical scanner for psql backslash commands 6 * 7 * XXX Avoid creating backtracking cases --- see the backend lexer for info. 8 * 9 * See fe_utils/psqlscan_int.h for additional commentary. 10 * 11 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 12 * Portions Copyright (c) 1994, Regents of the University of California 13 * 14 * IDENTIFICATION 15 * src/bin/psql/psqlscanslash.l 16 * 17 *------------------------------------------------------------------------- 18 */ 19 #include "postgres_fe.h" 20 21 #include "psqlscanslash.h" 22 #include "common/logging.h" 23 #include "fe_utils/conditional.h" 24 25 #include "libpq-fe.h" 26 } 27 28 %{ 29 #include "fe_utils/psqlscan_int.h" 30 31 #define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) 32 33 /* 34 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer 35 * doesn't presently make use of that argument, so just declare it as int. 36 */ 37 typedef int YYSTYPE; 38 39 /* 40 * Set the type of yyextra; we use it as a pointer back to the containing 41 * PsqlScanState. 42 */ 43 #define YY_EXTRA_TYPE PsqlScanState 44 45 /* 46 * These variables do not need to be saved across calls. Yeah, it's a bit 47 * of a hack, but putting them into PsqlScanStateData would be klugy too. 48 */ 49 static enum slash_option_type option_type; 50 static char *option_quote; 51 static int unquoted_option_chars; 52 static int backtick_start_offset; 53 54 55 /* Return values from yylex() */ 56 #define LEXRES_EOL 0 /* end of input */ 57 #define LEXRES_OK 1 /* OK completion of backslash argument */ 58 59 60 static void evaluate_backtick(PsqlScanState state); 61 62 #define ECHO psqlscan_emit(cur_state, yytext, yyleng) 63 64 /* 65 * Work around a bug in flex 2.5.35: it emits a couple of functions that 66 * it forgets to emit declarations for. Since we use -Wmissing-prototypes, 67 * this would cause warnings. Providing our own declarations should be 68 * harmless even when the bug gets fixed. 69 */ 70 extern int slash_yyget_column(yyscan_t yyscanner); 71 extern void slash_yyset_column(int column_no, yyscan_t yyscanner); 72 73 /* LCOV_EXCL_START */ 74 75 %} 76 77 /* Except for the prefix, these options should match psqlscan.l */ 78 %option reentrant 79 %option bison-bridge 80 %option 8bit 81 %option never-interactive 82 %option nodefault 83 %option noinput 84 %option nounput 85 %option noyywrap 86 %option warn 87 %option prefix="slash_yy" 88 89 /* 90 * OK, here is a short description of lex/flex rules behavior. 91 * The longest pattern which matches an input string is always chosen. 92 * For equal-length patterns, the first occurring in the rules list is chosen. 93 * INITIAL is the starting state, to which all non-conditional rules apply. 94 * Exclusive states change parsing rules while the state is active. When in 95 * an exclusive state, only those rules defined for that state apply. 96 */ 97 98 /* Exclusive states for lexing backslash commands */ 99 %x xslashcmd 100 %x xslashargstart 101 %x xslasharg 102 %x xslashquote 103 %x xslashbackquote 104 %x xslashdquote 105 %x xslashwholeline 106 %x xslashend 107 108 /* 109 * Assorted character class definitions that should match psqlscan.l. 110 */ 111 space [ \t\n\r\f] 112 quote ' 113 xeoctesc [\\][0-7]{1,3} 114 xehexesc [\\]x[0-9A-Fa-f]{1,2} 115 xqdouble {quote}{quote} 116 dquote \" 117 variable_char [A-Za-z\200-\377_0-9] 118 119 other . 120 121 %% 122 123 %{ 124 /* Declare some local variables inside yylex(), for convenience */ 125 PsqlScanState cur_state = yyextra; 126 PQExpBuffer output_buf = cur_state->output_buf; 127 128 /* 129 * Force flex into the state indicated by start_state. This has a 130 * couple of purposes: it lets some of the functions below set a new 131 * starting state without ugly direct access to flex variables, and it 132 * allows us to transition from one flex lexer to another so that we 133 * can lex different parts of the source string using separate lexers. 134 */ 135 BEGIN(cur_state->start_state); 136 %} 137 138 /* 139 * We don't really expect to be invoked in the INITIAL state in this 140 * lexer; but if we are, just spit data to the output_buf until EOF. 141 */ 142 143 {other}|\n { ECHO; } 144 145 /* 146 * Exclusive lexer states to handle backslash command lexing 147 */ 148 149 <xslashcmd>{ 150 /* command name ends at whitespace or backslash; eat all else */ 151 152 {space}|"\\" { 153 yyless(0); 154 cur_state->start_state = YY_START; 155 return LEXRES_OK; 156 } 157 158 {other} { ECHO; } 159 160 } 161 162 <xslashargstart>{ 163 /* 164 * Discard any whitespace before argument, then go to xslasharg state. 165 * An exception is that "|" is only special at start of argument, so we 166 * check for it here. 167 */ 168 169 {space}+ { } 170 171 "|" { 172 if (option_type == OT_FILEPIPE) 173 { 174 /* treat like whole-string case */ 175 ECHO; 176 BEGIN(xslashwholeline); 177 } 178 else 179 { 180 /* vertical bar is not special otherwise */ 181 yyless(0); 182 BEGIN(xslasharg); 183 } 184 } 185 186 {other} { 187 yyless(0); 188 BEGIN(xslasharg); 189 } 190 191 } 192 193 <xslasharg>{ 194 /* 195 * Default processing of text in a slash command's argument. 196 * 197 * Note: unquoted_option_chars counts the number of characters at the 198 * end of the argument that were not subject to any form of quoting. 199 * psql_scan_slash_option needs this to strip trailing semicolons safely. 200 */ 201 202 {space}|"\\" { 203 /* 204 * Unquoted space is end of arg; do not eat. Likewise 205 * backslash is end of command or next command, do not eat 206 * 207 * XXX this means we can't conveniently accept options 208 * that include unquoted backslashes; therefore, option 209 * processing that encourages use of backslashes is rather 210 * broken. 211 */ 212 yyless(0); 213 cur_state->start_state = YY_START; 214 return LEXRES_OK; 215 } 216 217 {quote} { 218 *option_quote = '\''; 219 unquoted_option_chars = 0; 220 BEGIN(xslashquote); 221 } 222 223 "`" { 224 backtick_start_offset = output_buf->len; 225 *option_quote = '`'; 226 unquoted_option_chars = 0; 227 BEGIN(xslashbackquote); 228 } 229 230 {dquote} { 231 ECHO; 232 *option_quote = '"'; 233 unquoted_option_chars = 0; 234 BEGIN(xslashdquote); 235 } 236 237 :{variable_char}+ { 238 /* Possible psql variable substitution */ 239 if (cur_state->callbacks->get_variable == NULL) 240 ECHO; 241 else 242 { 243 char *varname; 244 char *value; 245 246 varname = psqlscan_extract_substring(cur_state, 247 yytext + 1, 248 yyleng - 1); 249 value = cur_state->callbacks->get_variable(varname, 250 PQUOTE_PLAIN, 251 cur_state->cb_passthrough); 252 free(varname); 253 254 /* 255 * The variable value is just emitted without any 256 * further examination. This is consistent with the 257 * pre-8.0 code behavior, if not with the way that 258 * variables are handled outside backslash commands. 259 * Note that we needn't guard against recursion here. 260 */ 261 if (value) 262 { 263 appendPQExpBufferStr(output_buf, value); 264 free(value); 265 } 266 else 267 ECHO; 268 269 *option_quote = ':'; 270 } 271 unquoted_option_chars = 0; 272 } 273 274 :'{variable_char}+' { 275 psqlscan_escape_variable(cur_state, yytext, yyleng, 276 PQUOTE_SQL_LITERAL); 277 *option_quote = ':'; 278 unquoted_option_chars = 0; 279 } 280 281 282 :\"{variable_char}+\" { 283 psqlscan_escape_variable(cur_state, yytext, yyleng, 284 PQUOTE_SQL_IDENT); 285 *option_quote = ':'; 286 unquoted_option_chars = 0; 287 } 288 289 :\{\?{variable_char}+\} { 290 psqlscan_test_variable(cur_state, yytext, yyleng); 291 } 292 293 :'{variable_char}* { 294 /* Throw back everything but the colon */ 295 yyless(1); 296 unquoted_option_chars++; 297 ECHO; 298 } 299 300 :\"{variable_char}* { 301 /* Throw back everything but the colon */ 302 yyless(1); 303 unquoted_option_chars++; 304 ECHO; 305 } 306 307 :\{\?{variable_char}* { 308 /* Throw back everything but the colon */ 309 yyless(1); 310 unquoted_option_chars++; 311 ECHO; 312 } 313 314 :\{ { 315 /* Throw back everything but the colon */ 316 yyless(1); 317 unquoted_option_chars++; 318 ECHO; 319 } 320 321 {other} { 322 unquoted_option_chars++; 323 ECHO; 324 } 325 326 } 327 328 <xslashquote>{ 329 /* 330 * single-quoted text: copy literally except for '' and backslash 331 * sequences 332 */ 333 334 {quote} { BEGIN(xslasharg); } 335 336 {xqdouble} { appendPQExpBufferChar(output_buf, '\''); } 337 338 "\\n" { appendPQExpBufferChar(output_buf, '\n'); } 339 "\\t" { appendPQExpBufferChar(output_buf, '\t'); } 340 "\\b" { appendPQExpBufferChar(output_buf, '\b'); } 341 "\\r" { appendPQExpBufferChar(output_buf, '\r'); } 342 "\\f" { appendPQExpBufferChar(output_buf, '\f'); } 343 344 {xeoctesc} { 345 /* octal case */ 346 appendPQExpBufferChar(output_buf, 347 (char) strtol(yytext + 1, NULL, 8)); 348 } 349 350 {xehexesc} { 351 /* hex case */ 352 appendPQExpBufferChar(output_buf, 353 (char) strtol(yytext + 2, NULL, 16)); 354 } 355 356 "\\". { psqlscan_emit(cur_state, yytext + 1, 1); } 357 358 {other}|\n { ECHO; } 359 360 } 361 362 <xslashbackquote>{ 363 /* 364 * backticked text: copy everything until next backquote (expanding 365 * variable references, but doing nought else), then evaluate. 366 */ 367 368 "`" { 369 /* In an inactive \if branch, don't evaluate the command */ 370 if (cur_state->cb_passthrough == NULL || 371 conditional_active((ConditionalStack) cur_state->cb_passthrough)) 372 evaluate_backtick(cur_state); 373 BEGIN(xslasharg); 374 } 375 376 :{variable_char}+ { 377 /* Possible psql variable substitution */ 378 if (cur_state->callbacks->get_variable == NULL) 379 ECHO; 380 else 381 { 382 char *varname; 383 char *value; 384 385 varname = psqlscan_extract_substring(cur_state, 386 yytext + 1, 387 yyleng - 1); 388 value = cur_state->callbacks->get_variable(varname, 389 PQUOTE_PLAIN, 390 cur_state->cb_passthrough); 391 free(varname); 392 393 if (value) 394 { 395 appendPQExpBufferStr(output_buf, value); 396 free(value); 397 } 398 else 399 ECHO; 400 } 401 } 402 403 :'{variable_char}+' { 404 psqlscan_escape_variable(cur_state, yytext, yyleng, 405 PQUOTE_SHELL_ARG); 406 } 407 408 :'{variable_char}* { 409 /* Throw back everything but the colon */ 410 yyless(1); 411 ECHO; 412 } 413 414 {other}|\n { ECHO; } 415 416 } 417 418 <xslashdquote>{ 419 /* double-quoted text: copy verbatim, including the double quotes */ 420 421 {dquote} { 422 ECHO; 423 BEGIN(xslasharg); 424 } 425 426 {other}|\n { ECHO; } 427 428 } 429 430 <xslashwholeline>{ 431 /* copy everything until end of input line */ 432 /* but suppress leading whitespace */ 433 434 {space}+ { 435 if (output_buf->len > 0) 436 ECHO; 437 } 438 439 {other} { ECHO; } 440 441 } 442 443 <xslashend>{ 444 /* at end of command, eat a double backslash, but not anything else */ 445 446 "\\\\" { 447 cur_state->start_state = YY_START; 448 return LEXRES_OK; 449 } 450 451 {other}|\n { 452 yyless(0); 453 cur_state->start_state = YY_START; 454 return LEXRES_OK; 455 } 456 457 } 458 459 <<EOF>> { 460 if (cur_state->buffer_stack == NULL) 461 { 462 cur_state->start_state = YY_START; 463 return LEXRES_EOL; /* end of input reached */ 464 } 465 466 /* 467 * We were expanding a variable, so pop the inclusion 468 * stack and keep lexing 469 */ 470 psqlscan_pop_buffer_stack(cur_state); 471 psqlscan_select_top_buffer(cur_state); 472 } 473 474 %% 475 476 /* LCOV_EXCL_STOP */ 477 478 /* 479 * Scan the command name of a psql backslash command. This should be called 480 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input 481 * has been consumed through the leading backslash. 482 * 483 * The return value is a malloc'd copy of the command name, as parsed off 484 * from the input. 485 */ 486 char * 487 psql_scan_slash_command(PsqlScanState state) 488 { 489 PQExpBufferData mybuf; 490 491 /* Must be scanning already */ 492 Assert(state->scanbufhandle != NULL); 493 494 /* Build a local buffer that we'll return the data of */ 495 initPQExpBuffer(&mybuf); 496 497 /* Set current output target */ 498 state->output_buf = &mybuf; 499 500 /* Set input source */ 501 if (state->buffer_stack != NULL) 502 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 503 else 504 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 505 506 /* 507 * Set lexer start state. Note that this is sufficient to switch 508 * state->scanner over to using the tables in this lexer file. 509 */ 510 state->start_state = xslashcmd; 511 512 /* And lex. */ 513 yylex(NULL, state->scanner); 514 515 /* There are no possible errors in this lex state... */ 516 517 /* 518 * In case the caller returns to using the regular SQL lexer, reselect the 519 * appropriate initial state. 520 */ 521 psql_scan_reselect_sql_lexer(state); 522 523 return mybuf.data; 524 } 525 526 /* 527 * Parse off the next argument for a backslash command, and return it as a 528 * malloc'd string. If there are no more arguments, returns NULL. 529 * 530 * type tells what processing, if any, to perform on the option string; 531 * for example, if it's a SQL identifier, we want to downcase any unquoted 532 * letters. 533 * 534 * if quote is not NULL, *quote is set to 0 if no quoting was found, else 535 * the last quote symbol used in the argument. 536 * 537 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise 538 * be taken as part of the option string will be stripped. 539 * 540 * NOTE: the only possible syntax errors for backslash options are unmatched 541 * quotes, which are detected when we run out of input. Therefore, on a 542 * syntax error we just throw away the string and return NULL; there is no 543 * need to worry about flushing remaining input. 544 */ 545 char * 546 psql_scan_slash_option(PsqlScanState state, 547 enum slash_option_type type, 548 char *quote, 549 bool semicolon) 550 { 551 PQExpBufferData mybuf; 552 int lexresult PG_USED_FOR_ASSERTS_ONLY; 553 int final_state; 554 char local_quote; 555 556 /* Must be scanning already */ 557 Assert(state->scanbufhandle != NULL); 558 559 if (quote == NULL) 560 quote = &local_quote; 561 *quote = 0; 562 563 /* Build a local buffer that we'll return the data of */ 564 initPQExpBuffer(&mybuf); 565 566 /* Set up static variables that will be used by yylex */ 567 option_type = type; 568 option_quote = quote; 569 unquoted_option_chars = 0; 570 571 /* Set current output target */ 572 state->output_buf = &mybuf; 573 574 /* Set input source */ 575 if (state->buffer_stack != NULL) 576 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 577 else 578 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 579 580 /* Set lexer start state */ 581 if (type == OT_WHOLE_LINE) 582 state->start_state = xslashwholeline; 583 else 584 state->start_state = xslashargstart; 585 586 /* And lex. */ 587 lexresult = yylex(NULL, state->scanner); 588 589 /* Save final state for a moment... */ 590 final_state = state->start_state; 591 592 /* 593 * In case the caller returns to using the regular SQL lexer, reselect the 594 * appropriate initial state. 595 */ 596 psql_scan_reselect_sql_lexer(state); 597 598 /* 599 * Check the lex result: we should have gotten back either LEXRES_OK 600 * or LEXRES_EOL (the latter indicating end of string). If we were inside 601 * a quoted string, as indicated by final_state, EOL is an error. 602 */ 603 Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK); 604 605 switch (final_state) 606 { 607 case xslashargstart: 608 /* empty arg */ 609 break; 610 case xslasharg: 611 /* Strip any unquoted trailing semi-colons if requested */ 612 if (semicolon) 613 { 614 while (unquoted_option_chars-- > 0 && 615 mybuf.len > 0 && 616 mybuf.data[mybuf.len - 1] == ';') 617 { 618 mybuf.data[--mybuf.len] = '\0'; 619 } 620 } 621 622 /* 623 * If SQL identifier processing was requested, then we strip out 624 * excess double quotes and optionally downcase unquoted letters. 625 */ 626 if (type == OT_SQLID || type == OT_SQLIDHACK) 627 { 628 dequote_downcase_identifier(mybuf.data, 629 (type != OT_SQLIDHACK), 630 state->encoding); 631 /* update mybuf.len for possible shortening */ 632 mybuf.len = strlen(mybuf.data); 633 } 634 break; 635 case xslashquote: 636 case xslashbackquote: 637 case xslashdquote: 638 /* must have hit EOL inside quotes */ 639 pg_log_error("unterminated quoted string"); 640 termPQExpBuffer(&mybuf); 641 return NULL; 642 case xslashwholeline: 643 /* always okay */ 644 break; 645 default: 646 /* can't get here */ 647 fprintf(stderr, "invalid YY_START\n"); 648 exit(1); 649 } 650 651 /* 652 * An unquoted empty argument isn't possible unless we are at end of 653 * command. Return NULL instead. 654 */ 655 if (mybuf.len == 0 && *quote == 0) 656 { 657 termPQExpBuffer(&mybuf); 658 return NULL; 659 } 660 661 /* Else return the completed string. */ 662 return mybuf.data; 663 } 664 665 /* 666 * Eat up any unused \\ to complete a backslash command. 667 */ 668 void 669 psql_scan_slash_command_end(PsqlScanState state) 670 { 671 /* Must be scanning already */ 672 Assert(state->scanbufhandle != NULL); 673 674 /* Set current output target */ 675 state->output_buf = NULL; /* we won't output anything */ 676 677 /* Set input source */ 678 if (state->buffer_stack != NULL) 679 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 680 else 681 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 682 683 /* Set lexer start state */ 684 state->start_state = xslashend; 685 686 /* And lex. */ 687 yylex(NULL, state->scanner); 688 689 /* There are no possible errors in this lex state... */ 690 691 /* 692 * We expect the caller to return to using the regular SQL lexer, so 693 * reselect the appropriate initial state. 694 */ 695 psql_scan_reselect_sql_lexer(state); 696 } 697 698 /* 699 * Fetch current paren nesting depth 700 */ 701 int 702 psql_scan_get_paren_depth(PsqlScanState state) 703 { 704 return state->paren_depth; 705 } 706 707 /* 708 * Set paren nesting depth 709 */ 710 void 711 psql_scan_set_paren_depth(PsqlScanState state, int depth) 712 { 713 Assert(depth >= 0); 714 state->paren_depth = depth; 715 } 716 717 /* 718 * De-quote and optionally downcase a SQL identifier. 719 * 720 * The string at *str is modified in-place; it can become shorter, 721 * but not longer. 722 * 723 * If downcase is true then non-quoted letters are folded to lower case. 724 * Ideally this behavior will match the backend's downcase_identifier(); 725 * but note that it could differ if LC_CTYPE is different in the frontend. 726 * 727 * Note that a string like FOO"BAR"BAZ will be converted to fooBARbaz; 728 * this is somewhat inconsistent with the SQL spec, which would have us 729 * parse it as several identifiers. But for psql's purposes, we want a 730 * string like "foo"."bar" to be treated as one option, so there's little 731 * choice; this routine doesn't get to change the token boundaries. 732 */ 733 void 734 dequote_downcase_identifier(char *str, bool downcase, int encoding) 735 { 736 bool inquotes = false; 737 char *cp = str; 738 739 while (*cp) 740 { 741 if (*cp == '"') 742 { 743 if (inquotes && cp[1] == '"') 744 { 745 /* Keep the first quote, remove the second */ 746 cp++; 747 } 748 else 749 inquotes = !inquotes; 750 /* Collapse out quote at *cp */ 751 memmove(cp, cp + 1, strlen(cp)); 752 /* do not advance cp */ 753 } 754 else 755 { 756 if (downcase && !inquotes) 757 *cp = pg_tolower((unsigned char) *cp); 758 cp += PQmblenBounded(cp, encoding); 759 } 760 } 761 } 762 763 /* 764 * Evaluate a backticked substring of a slash command's argument. 765 * 766 * The portion of output_buf starting at backtick_start_offset is evaluated 767 * as a shell command and then replaced by the command's output. 768 */ 769 static void 770 evaluate_backtick(PsqlScanState state) 771 { 772 PQExpBuffer output_buf = state->output_buf; 773 char *cmd = output_buf->data + backtick_start_offset; 774 PQExpBufferData cmd_output; 775 FILE *fd; 776 bool error = false; 777 char buf[512]; 778 size_t result; 779 780 initPQExpBuffer(&cmd_output); 781 782 fd = popen(cmd, "r"); 783 if (!fd) 784 { 785 pg_log_error("%s: %m", cmd); 786 error = true; 787 } 788 789 if (!error) 790 { 791 do 792 { 793 result = fread(buf, 1, sizeof(buf), fd); 794 if (ferror(fd)) 795 { 796 pg_log_error("%s: %m", cmd); 797 error = true; 798 break; 799 } 800 appendBinaryPQExpBuffer(&cmd_output, buf, result); 801 } while (!feof(fd)); 802 } 803 804 if (fd && pclose(fd) == -1) 805 { 806 pg_log_error("%s: %m", cmd); 807 error = true; 808 } 809 810 if (PQExpBufferDataBroken(cmd_output)) 811 { 812 pg_log_error("%s: out of memory", cmd); 813 error = true; 814 } 815 816 /* Now done with cmd, delete it from output_buf */ 817 output_buf->len = backtick_start_offset; 818 output_buf->data[output_buf->len] = '\0'; 819 820 /* If no error, transfer result to output_buf */ 821 if (!error) 822 { 823 /* strip any trailing newline (but only one) */ 824 if (cmd_output.len > 0 && 825 cmd_output.data[cmd_output.len - 1] == '\n') 826 cmd_output.len--; 827 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len); 828 } 829 830 termPQExpBuffer(&cmd_output); 831 } 832