1 %top{ 2 /*------------------------------------------------------------------------- 3 * 4 * psqlscanslash.l 5 * lexical scanner for psql backslash commands 6 * 7 * XXX Avoid creating backtracking cases --- see the backend lexer for info. 8 * 9 * See fe_utils/psqlscan_int.h for additional commentary. 10 * 11 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group 12 * Portions Copyright (c) 1994, Regents of the University of California 13 * 14 * IDENTIFICATION 15 * src/bin/psql/psqlscanslash.l 16 * 17 *------------------------------------------------------------------------- 18 */ 19 #include "postgres_fe.h" 20 21 #include "psqlscanslash.h" 22 #include "conditional.h" 23 24 #include "libpq-fe.h" 25 } 26 27 %{ 28 #include "fe_utils/psqlscan_int.h" 29 30 #define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) 31 32 /* 33 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer 34 * doesn't presently make use of that argument, so just declare it as int. 35 */ 36 typedef int YYSTYPE; 37 38 /* 39 * Set the type of yyextra; we use it as a pointer back to the containing 40 * PsqlScanState. 41 */ 42 #define YY_EXTRA_TYPE PsqlScanState 43 44 /* 45 * These variables do not need to be saved across calls. Yeah, it's a bit 46 * of a hack, but putting them into PsqlScanStateData would be klugy too. 47 */ 48 static enum slash_option_type option_type; 49 static char *option_quote; 50 static int unquoted_option_chars; 51 static int backtick_start_offset; 52 53 54 /* Return values from yylex() */ 55 #define LEXRES_EOL 0 /* end of input */ 56 #define LEXRES_OK 1 /* OK completion of backslash argument */ 57 58 59 static void evaluate_backtick(PsqlScanState state); 60 61 #define ECHO psqlscan_emit(cur_state, yytext, yyleng) 62 63 /* 64 * Work around a bug in flex 2.5.35: it emits a couple of functions that 65 * it forgets to emit declarations for. Since we use -Wmissing-prototypes, 66 * this would cause warnings. Providing our own declarations should be 67 * harmless even when the bug gets fixed. 68 */ 69 extern int slash_yyget_column(yyscan_t yyscanner); 70 extern void slash_yyset_column(int column_no, yyscan_t yyscanner); 71 72 %} 73 74 /* Except for the prefix, these options should match psqlscan.l */ 75 %option reentrant 76 %option bison-bridge 77 %option 8bit 78 %option never-interactive 79 %option nodefault 80 %option noinput 81 %option nounput 82 %option noyywrap 83 %option warn 84 %option prefix="slash_yy" 85 86 /* 87 * OK, here is a short description of lex/flex rules behavior. 88 * The longest pattern which matches an input string is always chosen. 89 * For equal-length patterns, the first occurring in the rules list is chosen. 90 * INITIAL is the starting state, to which all non-conditional rules apply. 91 * Exclusive states change parsing rules while the state is active. When in 92 * an exclusive state, only those rules defined for that state apply. 93 */ 94 95 /* Exclusive states for lexing backslash commands */ 96 %x xslashcmd 97 %x xslashargstart 98 %x xslasharg 99 %x xslashquote 100 %x xslashbackquote 101 %x xslashdquote 102 %x xslashwholeline 103 %x xslashend 104 105 /* 106 * Assorted character class definitions that should match psqlscan.l. 107 */ 108 space [ \t\n\r\f] 109 quote ' 110 xeoctesc [\\][0-7]{1,3} 111 xehexesc [\\]x[0-9A-Fa-f]{1,2} 112 xqdouble {quote}{quote} 113 dquote \" 114 variable_char [A-Za-z\200-\377_0-9] 115 116 other . 117 118 %% 119 120 %{ 121 /* Declare some local variables inside yylex(), for convenience */ 122 PsqlScanState cur_state = yyextra; 123 PQExpBuffer output_buf = cur_state->output_buf; 124 125 /* 126 * Force flex into the state indicated by start_state. This has a 127 * couple of purposes: it lets some of the functions below set a new 128 * starting state without ugly direct access to flex variables, and it 129 * allows us to transition from one flex lexer to another so that we 130 * can lex different parts of the source string using separate lexers. 131 */ 132 BEGIN(cur_state->start_state); 133 %} 134 135 /* 136 * We don't really expect to be invoked in the INITIAL state in this 137 * lexer; but if we are, just spit data to the output_buf until EOF. 138 */ 139 140 {other}|\n { ECHO; } 141 142 /* 143 * Exclusive lexer states to handle backslash command lexing 144 */ 145 146 <xslashcmd>{ 147 /* command name ends at whitespace or backslash; eat all else */ 148 149 {space}|"\\" { 150 yyless(0); 151 cur_state->start_state = YY_START; 152 return LEXRES_OK; 153 } 154 155 {other} { ECHO; } 156 157 } 158 159 <xslashargstart>{ 160 /* 161 * Discard any whitespace before argument, then go to xslasharg state. 162 * An exception is that "|" is only special at start of argument, so we 163 * check for it here. 164 */ 165 166 {space}+ { } 167 168 "|" { 169 if (option_type == OT_FILEPIPE) 170 { 171 /* treat like whole-string case */ 172 ECHO; 173 BEGIN(xslashwholeline); 174 } 175 else 176 { 177 /* vertical bar is not special otherwise */ 178 yyless(0); 179 BEGIN(xslasharg); 180 } 181 } 182 183 {other} { 184 yyless(0); 185 BEGIN(xslasharg); 186 } 187 188 } 189 190 <xslasharg>{ 191 /* 192 * Default processing of text in a slash command's argument. 193 * 194 * Note: unquoted_option_chars counts the number of characters at the 195 * end of the argument that were not subject to any form of quoting. 196 * psql_scan_slash_option needs this to strip trailing semicolons safely. 197 */ 198 199 {space}|"\\" { 200 /* 201 * Unquoted space is end of arg; do not eat. Likewise 202 * backslash is end of command or next command, do not eat 203 * 204 * XXX this means we can't conveniently accept options 205 * that include unquoted backslashes; therefore, option 206 * processing that encourages use of backslashes is rather 207 * broken. 208 */ 209 yyless(0); 210 cur_state->start_state = YY_START; 211 return LEXRES_OK; 212 } 213 214 {quote} { 215 *option_quote = '\''; 216 unquoted_option_chars = 0; 217 BEGIN(xslashquote); 218 } 219 220 "`" { 221 backtick_start_offset = output_buf->len; 222 *option_quote = '`'; 223 unquoted_option_chars = 0; 224 BEGIN(xslashbackquote); 225 } 226 227 {dquote} { 228 ECHO; 229 *option_quote = '"'; 230 unquoted_option_chars = 0; 231 BEGIN(xslashdquote); 232 } 233 234 :{variable_char}+ { 235 /* Possible psql variable substitution */ 236 if (cur_state->callbacks->get_variable == NULL) 237 ECHO; 238 else 239 { 240 char *varname; 241 char *value; 242 243 varname = psqlscan_extract_substring(cur_state, 244 yytext + 1, 245 yyleng - 1); 246 value = cur_state->callbacks->get_variable(varname, 247 PQUOTE_PLAIN, 248 cur_state->cb_passthrough); 249 free(varname); 250 251 /* 252 * The variable value is just emitted without any 253 * further examination. This is consistent with the 254 * pre-8.0 code behavior, if not with the way that 255 * variables are handled outside backslash commands. 256 * Note that we needn't guard against recursion here. 257 */ 258 if (value) 259 { 260 appendPQExpBufferStr(output_buf, value); 261 free(value); 262 } 263 else 264 ECHO; 265 266 *option_quote = ':'; 267 } 268 unquoted_option_chars = 0; 269 } 270 271 :'{variable_char}+' { 272 psqlscan_escape_variable(cur_state, yytext, yyleng, 273 PQUOTE_SQL_LITERAL); 274 *option_quote = ':'; 275 unquoted_option_chars = 0; 276 } 277 278 279 :\"{variable_char}+\" { 280 psqlscan_escape_variable(cur_state, yytext, yyleng, 281 PQUOTE_SQL_IDENT); 282 *option_quote = ':'; 283 unquoted_option_chars = 0; 284 } 285 286 :'{variable_char}* { 287 /* Throw back everything but the colon */ 288 yyless(1); 289 unquoted_option_chars++; 290 ECHO; 291 } 292 293 :\"{variable_char}* { 294 /* Throw back everything but the colon */ 295 yyless(1); 296 unquoted_option_chars++; 297 ECHO; 298 } 299 300 {other} { 301 unquoted_option_chars++; 302 ECHO; 303 } 304 305 } 306 307 <xslashquote>{ 308 /* 309 * single-quoted text: copy literally except for '' and backslash 310 * sequences 311 */ 312 313 {quote} { BEGIN(xslasharg); } 314 315 {xqdouble} { appendPQExpBufferChar(output_buf, '\''); } 316 317 "\\n" { appendPQExpBufferChar(output_buf, '\n'); } 318 "\\t" { appendPQExpBufferChar(output_buf, '\t'); } 319 "\\b" { appendPQExpBufferChar(output_buf, '\b'); } 320 "\\r" { appendPQExpBufferChar(output_buf, '\r'); } 321 "\\f" { appendPQExpBufferChar(output_buf, '\f'); } 322 323 {xeoctesc} { 324 /* octal case */ 325 appendPQExpBufferChar(output_buf, 326 (char) strtol(yytext + 1, NULL, 8)); 327 } 328 329 {xehexesc} { 330 /* hex case */ 331 appendPQExpBufferChar(output_buf, 332 (char) strtol(yytext + 2, NULL, 16)); 333 } 334 335 "\\". { psqlscan_emit(cur_state, yytext + 1, 1); } 336 337 {other}|\n { ECHO; } 338 339 } 340 341 <xslashbackquote>{ 342 /* 343 * backticked text: copy everything until next backquote (expanding 344 * variable references, but doing nought else), then evaluate. 345 */ 346 347 "`" { 348 /* In an inactive \if branch, don't evaluate the command */ 349 if (cur_state->cb_passthrough == NULL || 350 conditional_active((ConditionalStack) cur_state->cb_passthrough)) 351 evaluate_backtick(cur_state); 352 BEGIN(xslasharg); 353 } 354 355 :{variable_char}+ { 356 /* Possible psql variable substitution */ 357 if (cur_state->callbacks->get_variable == NULL) 358 ECHO; 359 else 360 { 361 char *varname; 362 char *value; 363 364 varname = psqlscan_extract_substring(cur_state, 365 yytext + 1, 366 yyleng - 1); 367 value = cur_state->callbacks->get_variable(varname, 368 PQUOTE_PLAIN, 369 cur_state->cb_passthrough); 370 free(varname); 371 372 if (value) 373 { 374 appendPQExpBufferStr(output_buf, value); 375 free(value); 376 } 377 else 378 ECHO; 379 } 380 } 381 382 :'{variable_char}+' { 383 psqlscan_escape_variable(cur_state, yytext, yyleng, 384 PQUOTE_SHELL_ARG); 385 } 386 387 :'{variable_char}* { 388 /* Throw back everything but the colon */ 389 yyless(1); 390 ECHO; 391 } 392 393 {other}|\n { ECHO; } 394 395 } 396 397 <xslashdquote>{ 398 /* double-quoted text: copy verbatim, including the double quotes */ 399 400 {dquote} { 401 ECHO; 402 BEGIN(xslasharg); 403 } 404 405 {other}|\n { ECHO; } 406 407 } 408 409 <xslashwholeline>{ 410 /* copy everything until end of input line */ 411 /* but suppress leading whitespace */ 412 413 {space}+ { 414 if (output_buf->len > 0) 415 ECHO; 416 } 417 418 {other} { ECHO; } 419 420 } 421 422 <xslashend>{ 423 /* at end of command, eat a double backslash, but not anything else */ 424 425 "\\\\" { 426 cur_state->start_state = YY_START; 427 return LEXRES_OK; 428 } 429 430 {other}|\n { 431 yyless(0); 432 cur_state->start_state = YY_START; 433 return LEXRES_OK; 434 } 435 436 } 437 438 <<EOF>> { 439 if (cur_state->buffer_stack == NULL) 440 { 441 cur_state->start_state = YY_START; 442 return LEXRES_EOL; /* end of input reached */ 443 } 444 445 /* 446 * We were expanding a variable, so pop the inclusion 447 * stack and keep lexing 448 */ 449 psqlscan_pop_buffer_stack(cur_state); 450 psqlscan_select_top_buffer(cur_state); 451 } 452 453 %% 454 455 /* 456 * Scan the command name of a psql backslash command. This should be called 457 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input 458 * has been consumed through the leading backslash. 459 * 460 * The return value is a malloc'd copy of the command name, as parsed off 461 * from the input. 462 */ 463 char * 464 psql_scan_slash_command(PsqlScanState state) 465 { 466 PQExpBufferData mybuf; 467 468 /* Must be scanning already */ 469 Assert(state->scanbufhandle != NULL); 470 471 /* Build a local buffer that we'll return the data of */ 472 initPQExpBuffer(&mybuf); 473 474 /* Set current output target */ 475 state->output_buf = &mybuf; 476 477 /* Set input source */ 478 if (state->buffer_stack != NULL) 479 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 480 else 481 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 482 483 /* 484 * Set lexer start state. Note that this is sufficient to switch 485 * state->scanner over to using the tables in this lexer file. 486 */ 487 state->start_state = xslashcmd; 488 489 /* And lex. */ 490 yylex(NULL, state->scanner); 491 492 /* There are no possible errors in this lex state... */ 493 494 /* 495 * In case the caller returns to using the regular SQL lexer, reselect the 496 * appropriate initial state. 497 */ 498 psql_scan_reselect_sql_lexer(state); 499 500 return mybuf.data; 501 } 502 503 /* 504 * Parse off the next argument for a backslash command, and return it as a 505 * malloc'd string. If there are no more arguments, returns NULL. 506 * 507 * type tells what processing, if any, to perform on the option string; 508 * for example, if it's a SQL identifier, we want to downcase any unquoted 509 * letters. 510 * 511 * if quote is not NULL, *quote is set to 0 if no quoting was found, else 512 * the last quote symbol used in the argument. 513 * 514 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise 515 * be taken as part of the option string will be stripped. 516 * 517 * NOTE: the only possible syntax errors for backslash options are unmatched 518 * quotes, which are detected when we run out of input. Therefore, on a 519 * syntax error we just throw away the string and return NULL; there is no 520 * need to worry about flushing remaining input. 521 */ 522 char * 523 psql_scan_slash_option(PsqlScanState state, 524 enum slash_option_type type, 525 char *quote, 526 bool semicolon) 527 { 528 PQExpBufferData mybuf; 529 int lexresult PG_USED_FOR_ASSERTS_ONLY; 530 int final_state; 531 char local_quote; 532 533 /* Must be scanning already */ 534 Assert(state->scanbufhandle != NULL); 535 536 if (quote == NULL) 537 quote = &local_quote; 538 *quote = 0; 539 540 /* Build a local buffer that we'll return the data of */ 541 initPQExpBuffer(&mybuf); 542 543 /* Set up static variables that will be used by yylex */ 544 option_type = type; 545 option_quote = quote; 546 unquoted_option_chars = 0; 547 548 /* Set current output target */ 549 state->output_buf = &mybuf; 550 551 /* Set input source */ 552 if (state->buffer_stack != NULL) 553 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 554 else 555 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 556 557 /* Set lexer start state */ 558 if (type == OT_WHOLE_LINE) 559 state->start_state = xslashwholeline; 560 else 561 state->start_state = xslashargstart; 562 563 /* And lex. */ 564 lexresult = yylex(NULL, state->scanner); 565 566 /* Save final state for a moment... */ 567 final_state = state->start_state; 568 569 /* 570 * In case the caller returns to using the regular SQL lexer, reselect the 571 * appropriate initial state. 572 */ 573 psql_scan_reselect_sql_lexer(state); 574 575 /* 576 * Check the lex result: we should have gotten back either LEXRES_OK 577 * or LEXRES_EOL (the latter indicating end of string). If we were inside 578 * a quoted string, as indicated by final_state, EOL is an error. 579 */ 580 Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK); 581 582 switch (final_state) 583 { 584 case xslashargstart: 585 /* empty arg */ 586 break; 587 case xslasharg: 588 /* Strip any unquoted trailing semi-colons if requested */ 589 if (semicolon) 590 { 591 while (unquoted_option_chars-- > 0 && 592 mybuf.len > 0 && 593 mybuf.data[mybuf.len - 1] == ';') 594 { 595 mybuf.data[--mybuf.len] = '\0'; 596 } 597 } 598 599 /* 600 * If SQL identifier processing was requested, then we strip out 601 * excess double quotes and optionally downcase unquoted letters. 602 */ 603 if (type == OT_SQLID || type == OT_SQLIDHACK) 604 { 605 dequote_downcase_identifier(mybuf.data, 606 (type != OT_SQLIDHACK), 607 state->encoding); 608 /* update mybuf.len for possible shortening */ 609 mybuf.len = strlen(mybuf.data); 610 } 611 break; 612 case xslashquote: 613 case xslashbackquote: 614 case xslashdquote: 615 /* must have hit EOL inside quotes */ 616 state->callbacks->write_error("unterminated quoted string\n"); 617 termPQExpBuffer(&mybuf); 618 return NULL; 619 case xslashwholeline: 620 /* always okay */ 621 break; 622 default: 623 /* can't get here */ 624 fprintf(stderr, "invalid YY_START\n"); 625 exit(1); 626 } 627 628 /* 629 * An unquoted empty argument isn't possible unless we are at end of 630 * command. Return NULL instead. 631 */ 632 if (mybuf.len == 0 && *quote == 0) 633 { 634 termPQExpBuffer(&mybuf); 635 return NULL; 636 } 637 638 /* Else return the completed string. */ 639 return mybuf.data; 640 } 641 642 /* 643 * Eat up any unused \\ to complete a backslash command. 644 */ 645 void 646 psql_scan_slash_command_end(PsqlScanState state) 647 { 648 /* Must be scanning already */ 649 Assert(state->scanbufhandle != NULL); 650 651 /* Set current output target */ 652 state->output_buf = NULL; /* we won't output anything */ 653 654 /* Set input source */ 655 if (state->buffer_stack != NULL) 656 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 657 else 658 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 659 660 /* Set lexer start state */ 661 state->start_state = xslashend; 662 663 /* And lex. */ 664 yylex(NULL, state->scanner); 665 666 /* There are no possible errors in this lex state... */ 667 668 /* 669 * We expect the caller to return to using the regular SQL lexer, so 670 * reselect the appropriate initial state. 671 */ 672 psql_scan_reselect_sql_lexer(state); 673 } 674 675 /* 676 * Fetch current paren nesting depth 677 */ 678 int 679 psql_scan_get_paren_depth(PsqlScanState state) 680 { 681 return state->paren_depth; 682 } 683 684 /* 685 * Set paren nesting depth 686 */ 687 void 688 psql_scan_set_paren_depth(PsqlScanState state, int depth) 689 { 690 Assert(depth >= 0); 691 state->paren_depth = depth; 692 } 693 694 /* 695 * De-quote and optionally downcase a SQL identifier. 696 * 697 * The string at *str is modified in-place; it can become shorter, 698 * but not longer. 699 * 700 * If downcase is true then non-quoted letters are folded to lower case. 701 * Ideally this behavior will match the backend's downcase_identifier(); 702 * but note that it could differ if LC_CTYPE is different in the frontend. 703 * 704 * Note that a string like FOO"BAR"BAZ will be converted to fooBARbaz; 705 * this is somewhat inconsistent with the SQL spec, which would have us 706 * parse it as several identifiers. But for psql's purposes, we want a 707 * string like "foo"."bar" to be treated as one option, so there's little 708 * choice; this routine doesn't get to change the token boundaries. 709 */ 710 void 711 dequote_downcase_identifier(char *str, bool downcase, int encoding) 712 { 713 bool inquotes = false; 714 char *cp = str; 715 716 while (*cp) 717 { 718 if (*cp == '"') 719 { 720 if (inquotes && cp[1] == '"') 721 { 722 /* Keep the first quote, remove the second */ 723 cp++; 724 } 725 else 726 inquotes = !inquotes; 727 /* Collapse out quote at *cp */ 728 memmove(cp, cp + 1, strlen(cp)); 729 /* do not advance cp */ 730 } 731 else 732 { 733 if (downcase && !inquotes) 734 *cp = pg_tolower((unsigned char) *cp); 735 cp += PQmblenBounded(cp, encoding); 736 } 737 } 738 } 739 740 /* 741 * Evaluate a backticked substring of a slash command's argument. 742 * 743 * The portion of output_buf starting at backtick_start_offset is evaluated 744 * as a shell command and then replaced by the command's output. 745 */ 746 static void 747 evaluate_backtick(PsqlScanState state) 748 { 749 PQExpBuffer output_buf = state->output_buf; 750 char *cmd = output_buf->data + backtick_start_offset; 751 PQExpBufferData cmd_output; 752 FILE *fd; 753 bool error = false; 754 char buf[512]; 755 size_t result; 756 757 initPQExpBuffer(&cmd_output); 758 759 fd = popen(cmd, "r"); 760 if (!fd) 761 { 762 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno)); 763 error = true; 764 } 765 766 if (!error) 767 { 768 do 769 { 770 result = fread(buf, 1, sizeof(buf), fd); 771 if (ferror(fd)) 772 { 773 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno)); 774 error = true; 775 break; 776 } 777 appendBinaryPQExpBuffer(&cmd_output, buf, result); 778 } while (!feof(fd)); 779 } 780 781 if (fd && pclose(fd) == -1) 782 { 783 state->callbacks->write_error("%s: %s\n", cmd, strerror(errno)); 784 error = true; 785 } 786 787 if (PQExpBufferDataBroken(cmd_output)) 788 { 789 state->callbacks->write_error("%s: out of memory\n", cmd); 790 error = true; 791 } 792 793 /* Now done with cmd, delete it from output_buf */ 794 output_buf->len = backtick_start_offset; 795 output_buf->data[output_buf->len] = '\0'; 796 797 /* If no error, transfer result to output_buf */ 798 if (!error) 799 { 800 /* strip any trailing newline (but only one) */ 801 if (cmd_output.len > 0 && 802 cmd_output.data[cmd_output.len - 1] == '\n') 803 cmd_output.len--; 804 appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len); 805 } 806 807 termPQExpBuffer(&cmd_output); 808 } 809