1 %top{ 2 /*------------------------------------------------------------------------- 3 * 4 * psqlscanslash.l 5 * lexical scanner for psql backslash commands 6 * 7 * XXX Avoid creating backtracking cases --- see the backend lexer for info. 8 * 9 * See fe_utils/psqlscan_int.h for additional commentary. 10 * 11 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group 12 * Portions Copyright (c) 1994, Regents of the University of California 13 * 14 * IDENTIFICATION 15 * src/bin/psql/psqlscanslash.l 16 * 17 *------------------------------------------------------------------------- 18 */ 19 #include "postgres_fe.h" 20 21 #include "psqlscanslash.h" 22 #include "common/logging.h" 23 #include "fe_utils/conditional.h" 24 25 #include "libpq-fe.h" 26 } 27 28 %{ 29 #include "fe_utils/psqlscan_int.h" 30 31 /* 32 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer 33 * doesn't presently make use of that argument, so just declare it as int. 34 */ 35 typedef int YYSTYPE; 36 37 /* 38 * Set the type of yyextra; we use it as a pointer back to the containing 39 * PsqlScanState. 40 */ 41 #define YY_EXTRA_TYPE PsqlScanState 42 43 /* 44 * These variables do not need to be saved across calls. Yeah, it's a bit 45 * of a hack, but putting them into PsqlScanStateData would be klugy too. 46 */ 47 static enum slash_option_type option_type; 48 static char *option_quote; 49 static int unquoted_option_chars; 50 static int backtick_start_offset; 51 52 53 /* Return values from yylex() */ 54 #define LEXRES_EOL 0 /* end of input */ 55 #define LEXRES_OK 1 /* OK completion of backslash argument */ 56 57 58 static void evaluate_backtick(PsqlScanState state); 59 60 #define ECHO psqlscan_emit(cur_state, yytext, yyleng) 61 62 /* 63 * Work around a bug in flex 2.5.35: it emits a couple of functions that 64 * it forgets to emit declarations for. Since we use -Wmissing-prototypes, 65 * this would cause warnings. Providing our own declarations should be 66 * harmless even when the bug gets fixed. 67 */ 68 extern int slash_yyget_column(yyscan_t yyscanner); 69 extern void slash_yyset_column(int column_no, yyscan_t yyscanner); 70 71 /* LCOV_EXCL_START */ 72 73 %} 74 75 /* Except for the prefix, these options should match psqlscan.l */ 76 %option reentrant 77 %option bison-bridge 78 %option 8bit 79 %option never-interactive 80 %option nodefault 81 %option noinput 82 %option nounput 83 %option noyywrap 84 %option warn 85 %option prefix="slash_yy" 86 87 /* 88 * OK, here is a short description of lex/flex rules behavior. 89 * The longest pattern which matches an input string is always chosen. 90 * For equal-length patterns, the first occurring in the rules list is chosen. 91 * INITIAL is the starting state, to which all non-conditional rules apply. 92 * Exclusive states change parsing rules while the state is active. When in 93 * an exclusive state, only those rules defined for that state apply. 94 */ 95 96 /* Exclusive states for lexing backslash commands */ 97 %x xslashcmd 98 %x xslashargstart 99 %x xslasharg 100 %x xslashquote 101 %x xslashbackquote 102 %x xslashdquote 103 %x xslashwholeline 104 %x xslashend 105 106 /* 107 * Assorted character class definitions that should match psqlscan.l. 108 */ 109 space [ \t\n\r\f] 110 quote ' 111 xeoctesc [\\][0-7]{1,3} 112 xehexesc [\\]x[0-9A-Fa-f]{1,2} 113 xqdouble {quote}{quote} 114 dquote \" 115 variable_char [A-Za-z\200-\377_0-9] 116 117 other . 118 119 %% 120 121 %{ 122 /* Declare some local variables inside yylex(), for convenience */ 123 PsqlScanState cur_state = yyextra; 124 PQExpBuffer output_buf = cur_state->output_buf; 125 126 /* 127 * Force flex into the state indicated by start_state. This has a 128 * couple of purposes: it lets some of the functions below set a new 129 * starting state without ugly direct access to flex variables, and it 130 * allows us to transition from one flex lexer to another so that we 131 * can lex different parts of the source string using separate lexers. 132 */ 133 BEGIN(cur_state->start_state); 134 %} 135 136 /* 137 * We don't really expect to be invoked in the INITIAL state in this 138 * lexer; but if we are, just spit data to the output_buf until EOF. 139 */ 140 141 {other}|\n { ECHO; } 142 143 /* 144 * Exclusive lexer states to handle backslash command lexing 145 */ 146 147 <xslashcmd>{ 148 /* command name ends at whitespace or backslash; eat all else */ 149 150 {space}|"\\" { 151 yyless(0); 152 cur_state->start_state = YY_START; 153 return LEXRES_OK; 154 } 155 156 {other} { ECHO; } 157 158 } 159 160 <xslashargstart>{ 161 /* 162 * Discard any whitespace before argument, then go to xslasharg state. 163 * An exception is that "|" is only special at start of argument, so we 164 * check for it here. 165 */ 166 167 {space}+ { } 168 169 "|" { 170 if (option_type == OT_FILEPIPE) 171 { 172 /* treat like whole-string case */ 173 ECHO; 174 BEGIN(xslashwholeline); 175 } 176 else 177 { 178 /* vertical bar is not special otherwise */ 179 yyless(0); 180 BEGIN(xslasharg); 181 } 182 } 183 184 {other} { 185 yyless(0); 186 BEGIN(xslasharg); 187 } 188 189 } 190 191 <xslasharg>{ 192 /* 193 * Default processing of text in a slash command's argument. 194 * 195 * Note: unquoted_option_chars counts the number of characters at the 196 * end of the argument that were not subject to any form of quoting. 197 * psql_scan_slash_option needs this to strip trailing semicolons safely. 198 */ 199 200 {space}|"\\" { 201 /* 202 * Unquoted space is end of arg; do not eat. Likewise 203 * backslash is end of command or next command, do not eat 204 * 205 * XXX this means we can't conveniently accept options 206 * that include unquoted backslashes; therefore, option 207 * processing that encourages use of backslashes is rather 208 * broken. 209 */ 210 yyless(0); 211 cur_state->start_state = YY_START; 212 return LEXRES_OK; 213 } 214 215 {quote} { 216 *option_quote = '\''; 217 unquoted_option_chars = 0; 218 BEGIN(xslashquote); 219 } 220 221 "`" { 222 backtick_start_offset = output_buf->len; 223 *option_quote = '`'; 224 unquoted_option_chars = 0; 225 BEGIN(xslashbackquote); 226 } 227 228 {dquote} { 229 ECHO; 230 *option_quote = '"'; 231 unquoted_option_chars = 0; 232 BEGIN(xslashdquote); 233 } 234 235 :{variable_char}+ { 236 /* Possible psql variable substitution */ 237 if (cur_state->callbacks->get_variable == NULL) 238 ECHO; 239 else 240 { 241 char *varname; 242 char *value; 243 244 varname = psqlscan_extract_substring(cur_state, 245 yytext + 1, 246 yyleng - 1); 247 value = cur_state->callbacks->get_variable(varname, 248 PQUOTE_PLAIN, 249 cur_state->cb_passthrough); 250 free(varname); 251 252 /* 253 * The variable value is just emitted without any 254 * further examination. This is consistent with the 255 * pre-8.0 code behavior, if not with the way that 256 * variables are handled outside backslash commands. 257 * Note that we needn't guard against recursion here. 258 */ 259 if (value) 260 { 261 appendPQExpBufferStr(output_buf, value); 262 free(value); 263 } 264 else 265 ECHO; 266 267 *option_quote = ':'; 268 } 269 unquoted_option_chars = 0; 270 } 271 272 :'{variable_char}+' { 273 psqlscan_escape_variable(cur_state, yytext, yyleng, 274 PQUOTE_SQL_LITERAL); 275 *option_quote = ':'; 276 unquoted_option_chars = 0; 277 } 278 279 280 :\"{variable_char}+\" { 281 psqlscan_escape_variable(cur_state, yytext, yyleng, 282 PQUOTE_SQL_IDENT); 283 *option_quote = ':'; 284 unquoted_option_chars = 0; 285 } 286 287 :\{\?{variable_char}+\} { 288 psqlscan_test_variable(cur_state, yytext, yyleng); 289 } 290 291 :'{variable_char}* { 292 /* Throw back everything but the colon */ 293 yyless(1); 294 unquoted_option_chars++; 295 ECHO; 296 } 297 298 :\"{variable_char}* { 299 /* Throw back everything but the colon */ 300 yyless(1); 301 unquoted_option_chars++; 302 ECHO; 303 } 304 305 :\{\?{variable_char}* { 306 /* Throw back everything but the colon */ 307 yyless(1); 308 unquoted_option_chars++; 309 ECHO; 310 } 311 312 :\{ { 313 /* Throw back everything but the colon */ 314 yyless(1); 315 unquoted_option_chars++; 316 ECHO; 317 } 318 319 {other} { 320 unquoted_option_chars++; 321 ECHO; 322 } 323 324 } 325 326 <xslashquote>{ 327 /* 328 * single-quoted text: copy literally except for '' and backslash 329 * sequences 330 */ 331 332 {quote} { BEGIN(xslasharg); } 333 334 {xqdouble} { appendPQExpBufferChar(output_buf, '\''); } 335 336 "\\n" { appendPQExpBufferChar(output_buf, '\n'); } 337 "\\t" { appendPQExpBufferChar(output_buf, '\t'); } 338 "\\b" { appendPQExpBufferChar(output_buf, '\b'); } 339 "\\r" { appendPQExpBufferChar(output_buf, '\r'); } 340 "\\f" { appendPQExpBufferChar(output_buf, '\f'); } 341 342 {xeoctesc} { 343 /* octal case */ 344 appendPQExpBufferChar(output_buf, 345 (char) strtol(yytext + 1, NULL, 8)); 346 } 347 348 {xehexesc} { 349 /* hex case */ 350 appendPQExpBufferChar(output_buf, 351 (char) strtol(yytext + 2, NULL, 16)); 352 } 353 354 "\\". { psqlscan_emit(cur_state, yytext + 1, 1); } 355 356 {other}|\n { ECHO; } 357 358 } 359 360 <xslashbackquote>{ 361 /* 362 * backticked text: copy everything until next backquote (expanding 363 * variable references, but doing nought else), then evaluate. 364 */ 365 366 "`" { 367 /* In an inactive \if branch, don't evaluate the command */ 368 if (cur_state->cb_passthrough == NULL || 369 conditional_active((ConditionalStack) cur_state->cb_passthrough)) 370 evaluate_backtick(cur_state); 371 BEGIN(xslasharg); 372 } 373 374 :{variable_char}+ { 375 /* Possible psql variable substitution */ 376 if (cur_state->callbacks->get_variable == NULL) 377 ECHO; 378 else 379 { 380 char *varname; 381 char *value; 382 383 varname = psqlscan_extract_substring(cur_state, 384 yytext + 1, 385 yyleng - 1); 386 value = cur_state->callbacks->get_variable(varname, 387 PQUOTE_PLAIN, 388 cur_state->cb_passthrough); 389 free(varname); 390 391 if (value) 392 { 393 appendPQExpBufferStr(output_buf, value); 394 free(value); 395 } 396 else 397 ECHO; 398 } 399 } 400 401 :'{variable_char}+' { 402 psqlscan_escape_variable(cur_state, yytext, yyleng, 403 PQUOTE_SHELL_ARG); 404 } 405 406 :'{variable_char}* { 407 /* Throw back everything but the colon */ 408 yyless(1); 409 ECHO; 410 } 411 412 {other}|\n { ECHO; } 413 414 } 415 416 <xslashdquote>{ 417 /* double-quoted text: copy verbatim, including the double quotes */ 418 419 {dquote} { 420 ECHO; 421 BEGIN(xslasharg); 422 } 423 424 {other}|\n { ECHO; } 425 426 } 427 428 <xslashwholeline>{ 429 /* copy everything until end of input line */ 430 /* but suppress leading whitespace */ 431 432 {space}+ { 433 if (output_buf->len > 0) 434 ECHO; 435 } 436 437 {other} { ECHO; } 438 439 } 440 441 <xslashend>{ 442 /* at end of command, eat a double backslash, but not anything else */ 443 444 "\\\\" { 445 cur_state->start_state = YY_START; 446 return LEXRES_OK; 447 } 448 449 {other}|\n { 450 yyless(0); 451 cur_state->start_state = YY_START; 452 return LEXRES_OK; 453 } 454 455 } 456 457 <<EOF>> { 458 if (cur_state->buffer_stack == NULL) 459 { 460 cur_state->start_state = YY_START; 461 return LEXRES_EOL; /* end of input reached */ 462 } 463 464 /* 465 * We were expanding a variable, so pop the inclusion 466 * stack and keep lexing 467 */ 468 psqlscan_pop_buffer_stack(cur_state); 469 psqlscan_select_top_buffer(cur_state); 470 } 471 472 %% 473 474 /* LCOV_EXCL_STOP */ 475 476 /* 477 * Scan the command name of a psql backslash command. This should be called 478 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input 479 * has been consumed through the leading backslash. 480 * 481 * The return value is a malloc'd copy of the command name, as parsed off 482 * from the input. 483 */ 484 char * 485 psql_scan_slash_command(PsqlScanState state) 486 { 487 PQExpBufferData mybuf; 488 489 /* Must be scanning already */ 490 Assert(state->scanbufhandle != NULL); 491 492 /* Build a local buffer that we'll return the data of */ 493 initPQExpBuffer(&mybuf); 494 495 /* Set current output target */ 496 state->output_buf = &mybuf; 497 498 /* Set input source */ 499 if (state->buffer_stack != NULL) 500 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 501 else 502 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 503 504 /* 505 * Set lexer start state. Note that this is sufficient to switch 506 * state->scanner over to using the tables in this lexer file. 507 */ 508 state->start_state = xslashcmd; 509 510 /* And lex. */ 511 yylex(NULL, state->scanner); 512 513 /* There are no possible errors in this lex state... */ 514 515 /* 516 * In case the caller returns to using the regular SQL lexer, reselect the 517 * appropriate initial state. 518 */ 519 psql_scan_reselect_sql_lexer(state); 520 521 return mybuf.data; 522 } 523 524 /* 525 * Parse off the next argument for a backslash command, and return it as a 526 * malloc'd string. If there are no more arguments, returns NULL. 527 * 528 * type tells what processing, if any, to perform on the option string; 529 * for example, if it's a SQL identifier, we want to downcase any unquoted 530 * letters. 531 * 532 * if quote is not NULL, *quote is set to 0 if no quoting was found, else 533 * the last quote symbol used in the argument. 534 * 535 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise 536 * be taken as part of the option string will be stripped. 537 * 538 * NOTE: the only possible syntax errors for backslash options are unmatched 539 * quotes, which are detected when we run out of input. Therefore, on a 540 * syntax error we just throw away the string and return NULL; there is no 541 * need to worry about flushing remaining input. 542 */ 543 char * 544 psql_scan_slash_option(PsqlScanState state, 545 enum slash_option_type type, 546 char *quote, 547 bool semicolon) 548 { 549 PQExpBufferData mybuf; 550 int lexresult PG_USED_FOR_ASSERTS_ONLY; 551 int final_state; 552 char local_quote; 553 554 /* Must be scanning already */ 555 Assert(state->scanbufhandle != NULL); 556 557 if (quote == NULL) 558 quote = &local_quote; 559 *quote = 0; 560 561 /* Build a local buffer that we'll return the data of */ 562 initPQExpBuffer(&mybuf); 563 564 /* Set up static variables that will be used by yylex */ 565 option_type = type; 566 option_quote = quote; 567 unquoted_option_chars = 0; 568 569 /* Set current output target */ 570 state->output_buf = &mybuf; 571 572 /* Set input source */ 573 if (state->buffer_stack != NULL) 574 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 575 else 576 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 577 578 /* Set lexer start state */ 579 if (type == OT_WHOLE_LINE) 580 state->start_state = xslashwholeline; 581 else 582 state->start_state = xslashargstart; 583 584 /* And lex. */ 585 lexresult = yylex(NULL, state->scanner); 586 587 /* Save final state for a moment... */ 588 final_state = state->start_state; 589 590 /* 591 * In case the caller returns to using the regular SQL lexer, reselect the 592 * appropriate initial state. 593 */ 594 psql_scan_reselect_sql_lexer(state); 595 596 /* 597 * Check the lex result: we should have gotten back either LEXRES_OK 598 * or LEXRES_EOL (the latter indicating end of string). If we were inside 599 * a quoted string, as indicated by final_state, EOL is an error. 600 */ 601 Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK); 602 603 switch (final_state) 604 { 605 case xslashargstart: 606 /* empty arg */ 607 break; 608 case xslasharg: 609 /* Strip any unquoted trailing semi-colons if requested */ 610 if (semicolon) 611 { 612 while (unquoted_option_chars-- > 0 && 613 mybuf.len > 0 && 614 mybuf.data[mybuf.len - 1] == ';') 615 { 616 mybuf.data[--mybuf.len] = '\0'; 617 } 618 } 619 620 /* 621 * If SQL identifier processing was requested, then we strip out 622 * excess double quotes and optionally downcase unquoted letters. 623 */ 624 if (type == OT_SQLID || type == OT_SQLIDHACK) 625 { 626 dequote_downcase_identifier(mybuf.data, 627 (type != OT_SQLIDHACK), 628 state->encoding); 629 /* update mybuf.len for possible shortening */ 630 mybuf.len = strlen(mybuf.data); 631 } 632 break; 633 case xslashquote: 634 case xslashbackquote: 635 case xslashdquote: 636 /* must have hit EOL inside quotes */ 637 pg_log_error("unterminated quoted string"); 638 termPQExpBuffer(&mybuf); 639 return NULL; 640 case xslashwholeline: 641 /* always okay */ 642 break; 643 default: 644 /* can't get here */ 645 fprintf(stderr, "invalid YY_START\n"); 646 exit(1); 647 } 648 649 /* 650 * An unquoted empty argument isn't possible unless we are at end of 651 * command. Return NULL instead. 652 */ 653 if (mybuf.len == 0 && *quote == 0) 654 { 655 termPQExpBuffer(&mybuf); 656 return NULL; 657 } 658 659 /* Else return the completed string. */ 660 return mybuf.data; 661 } 662 663 /* 664 * Eat up any unused \\ to complete a backslash command. 665 */ 666 void 667 psql_scan_slash_command_end(PsqlScanState state) 668 { 669 /* Must be scanning already */ 670 Assert(state->scanbufhandle != NULL); 671 672 /* Set current output target */ 673 state->output_buf = NULL; /* we won't output anything */ 674 675 /* Set input source */ 676 if (state->buffer_stack != NULL) 677 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 678 else 679 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 680 681 /* Set lexer start state */ 682 state->start_state = xslashend; 683 684 /* And lex. */ 685 yylex(NULL, state->scanner); 686 687 /* There are no possible errors in this lex state... */ 688 689 /* 690 * We expect the caller to return to using the regular SQL lexer, so 691 * reselect the appropriate initial state. 692 */ 693 psql_scan_reselect_sql_lexer(state); 694 } 695 696 /* 697 * Fetch current paren nesting depth 698 */ 699 int 700 psql_scan_get_paren_depth(PsqlScanState state) 701 { 702 return state->paren_depth; 703 } 704 705 /* 706 * Set paren nesting depth 707 */ 708 void 709 psql_scan_set_paren_depth(PsqlScanState state, int depth) 710 { 711 Assert(depth >= 0); 712 state->paren_depth = depth; 713 } 714 715 /* 716 * De-quote and optionally downcase a SQL identifier. 717 * 718 * The string at *str is modified in-place; it can become shorter, 719 * but not longer. 720 * 721 * If downcase is true then non-quoted letters are folded to lower case. 722 * Ideally this behavior will match the backend's downcase_identifier(); 723 * but note that it could differ if LC_CTYPE is different in the frontend. 724 * 725 * Note that a string like FOO"BAR"BAZ will be converted to fooBARbaz; 726 * this is somewhat inconsistent with the SQL spec, which would have us 727 * parse it as several identifiers. But for psql's purposes, we want a 728 * string like "foo"."bar" to be treated as one option, so there's little 729 * choice; this routine doesn't get to change the token boundaries. 730 */ 731 void 732 dequote_downcase_identifier(char *str, bool downcase, int encoding) 733 { 734 bool inquotes = false; 735 char *cp = str; 736 737 while (*cp) 738 { 739 if (*cp == '"') 740 { 741 if (inquotes && cp[1] == '"') 742 { 743 /* Keep the first quote, remove the second */ 744 cp++; 745 } 746 else 747 inquotes = !inquotes; 748 /* Collapse out quote at *cp */ 749 memmove(cp, cp + 1, strlen(cp)); 750 /* do not advance cp */ 751 } 752 else 753 { 754 if (downcase && !inquotes) 755 *cp = pg_tolower((unsigned char) *cp); 756 cp += PQmblenBounded(cp, encoding); 757 } 758 } 759 } 760 761 /* 762 * Evaluate a backticked substring of a slash command's argument. 763 * 764 * The portion of output_buf starting at backtick_start_offset is evaluated 765 * as a shell command and then replaced by the command's output. 766 */ 767 static void 768 evaluate_backtick(PsqlScanState state) 769 { 770 PQExpBuffer output_buf = state->output_buf; 771 char *cmd = output_buf->data + backtick_start_offset; 772 PQExpBufferData cmd_output; 773 FILE *fd; 774 bool error = false; 775 char buf[512]; 776 size_t result; 777 778 initPQExpBuffer(&cmd_output); 779 780 fd = popen(cmd, "r"); 781 if (!fd) 782 { 783 pg_log_error("%s: %m", cmd); 784 error = true; 785 } 786 787 if (!error) 788 { 789 do 790 { 791 result = fread(buf, 1, sizeof(buf), fd); 792 if (ferror(fd)) 793 { 794 pg_log_error("%s: %m", cmd); 795 error = true; 796 break; 797 } 798 appendBinaryPQExpBuffer(&cmd_output, buf, result); 799 } while (!feof(fd)); 800 } 801 802 if (fd && pclose(fd) == -1) 803 { 804 pg_log_error("%s: %m", cmd); 805 error = true; 806 } 807 808 if (PQExpBufferDataBroken(cmd_output)) 809 { 810 pg_log_error("%s: out of memory", cmd); 811 error = true; 812 } 813 814 /* Now done with cmd, delete it from output_buf */ 815 output_buf->len = backtick_start_offset; 816 output_buf->data[output_buf->len] = '\0'; 817 818 /* If no error, transfer result to output_buf */ 819 if (!error) 820 { 821 /* strip any trailing newline (but only one) */ 822 if (cmd_output.len > 0 && 823 cmd_output.data[cmd_output.len - 1] == '\n') 824 cmd_output.len--; 825 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len); 826 } 827 828 termPQExpBuffer(&cmd_output); 829 } 830