1 %{ 2 /*------------------------------------------------------------------------- 3 * 4 * exprscan.l 5 * lexical scanner for pgbench backslash commands 6 * 7 * This lexer supports two operating modes: 8 * 9 * In INITIAL state, just parse off whitespace-separated words (this mode 10 * is basically equivalent to strtok(), which is what we used to use). 11 * 12 * In EXPR state, lex for the simple expression syntax of exprparse.y. 13 * 14 * In either mode, stop upon hitting newline or end of string. 15 * 16 * Note that this lexer operates within the framework created by psqlscan.l, 17 * 18 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group 19 * Portions Copyright (c) 1994, Regents of the University of California 20 * 21 * src/bin/pgbench/exprscan.l 22 * 23 *------------------------------------------------------------------------- 24 */ 25 26 #include "fe_utils/psqlscan_int.h" 27 28 /* context information for reporting errors in expressions */ 29 static const char *expr_source = NULL; 30 static int expr_lineno = 0; 31 static int expr_start_offset = 0; 32 static const char *expr_command = NULL; 33 34 /* indicates whether last yylex() call read a newline */ 35 static bool last_was_newline = false; 36 37 /* 38 * Work around a bug in flex 2.5.35: it emits a couple of functions that 39 * it forgets to emit declarations for. Since we use -Wmissing-prototypes, 40 * this would cause warnings. Providing our own declarations should be 41 * harmless even when the bug gets fixed. 42 */ 43 extern int expr_yyget_column(yyscan_t yyscanner); 44 extern void expr_yyset_column(int column_no, yyscan_t yyscanner); 45 46 /* LCOV_EXCL_START */ 47 48 %} 49 50 /* Except for the prefix, these options should match psqlscan.l */ 51 %option reentrant 52 %option bison-bridge 53 %option 8bit 54 %option never-interactive 55 %option nodefault 56 %option noinput 57 %option nounput 58 %option noyywrap 59 %option warn 60 %option prefix="expr_yy" 61 62 /* Character classes */ 63 alpha [a-zA-Z\200-\377_] 64 digit [0-9] 65 alnum [A-Za-z\200-\377_0-9] 66 /* {space} + {nonspace} + {newline} should cover all characters */ 67 space [ \t\r\f\v] 68 nonspace [^ \t\r\f\v\n] 69 newline [\n] 70 71 /* Line continuation marker */ 72 continuation \\\r?{newline} 73 74 /* case insensitive keywords */ 75 and [Aa][Nn][Dd] 76 or [Oo][Rr] 77 not [Nn][Oo][Tt] 78 case [Cc][Aa][Ss][Ee] 79 when [Ww][Hh][Ee][Nn] 80 then [Tt][Hh][Ee][Nn] 81 else [Ee][Ll][Ss][Ee] 82 end [Ee][Nn][Dd] 83 true [Tt][Rr][Uu][Ee] 84 false [Ff][Aa][Ll][Ss][Ee] 85 null [Nn][Uu][Ll][Ll] 86 is [Ii][Ss] 87 isnull [Ii][Ss][Nn][Uu][Ll][Ll] 88 notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll] 89 90 /* Exclusive states */ 91 %x EXPR 92 93 %% 94 95 %{ 96 /* Declare some local variables inside yylex(), for convenience */ 97 PsqlScanState cur_state = yyextra; 98 99 /* 100 * Force flex into the state indicated by start_state. This has a 101 * couple of purposes: it lets some of the functions below set a new 102 * starting state without ugly direct access to flex variables, and it 103 * allows us to transition from one flex lexer to another so that we 104 * can lex different parts of the source string using separate lexers. 105 */ 106 BEGIN(cur_state->start_state); 107 108 /* Reset was-newline flag */ 109 last_was_newline = false; 110 %} 111 112 /* INITIAL state */ 113 114 {nonspace}+ { 115 /* Found a word, emit and return it */ 116 psqlscan_emit(cur_state, yytext, yyleng); 117 return 1; 118 } 119 120 /* 121 * We need this rule to avoid returning "word\" instead of recognizing 122 * a continuation marker just after a word: 123 */ 124 {nonspace}+{continuation} { 125 /* Found "word\\\r?\n", emit and return just "word" */ 126 int wordlen = yyleng - 2; 127 if (yytext[wordlen] == '\r') 128 wordlen--; 129 Assert(yytext[wordlen] == '\\'); 130 psqlscan_emit(cur_state, yytext, wordlen); 131 return 1; 132 } 133 134 {space}+ { /* ignore */ } 135 136 {continuation} { /* ignore */ } 137 138 {newline} { 139 /* report end of command */ 140 last_was_newline = true; 141 return 0; 142 } 143 144 /* EXPR state */ 145 146 <EXPR>{ 147 148 "+" { return '+'; } 149 "-" { return '-'; } 150 "*" { return '*'; } 151 "/" { return '/'; } 152 "%" { return '%'; } /* C version, also in Pg SQL */ 153 "=" { return '='; } 154 "<>" { return NE_OP; } 155 "!=" { return NE_OP; } /* C version, also in Pg SQL */ 156 "<=" { return LE_OP; } 157 ">=" { return GE_OP; } 158 "<<" { return LS_OP; } 159 ">>" { return RS_OP; } 160 "<" { return '<'; } 161 ">" { return '>'; } 162 "|" { return '|'; } 163 "&" { return '&'; } 164 "#" { return '#'; } 165 "~" { return '~'; } 166 167 "(" { return '('; } 168 ")" { return ')'; } 169 "," { return ','; } 170 171 {and} { return AND_OP; } 172 {or} { return OR_OP; } 173 {not} { return NOT_OP; } 174 {is} { return IS_OP; } 175 {isnull} { return ISNULL_OP; } 176 {notnull} { return NOTNULL_OP; } 177 178 {case} { return CASE_KW; } 179 {when} { return WHEN_KW; } 180 {then} { return THEN_KW; } 181 {else} { return ELSE_KW; } 182 {end} { return END_KW; } 183 184 :{alnum}+ { 185 yylval->str = pg_strdup(yytext + 1); 186 return VARIABLE; 187 } 188 189 {null} { return NULL_CONST; } 190 {true} { 191 yylval->bval = true; 192 return BOOLEAN_CONST; 193 } 194 {false} { 195 yylval->bval = false; 196 return BOOLEAN_CONST; 197 } 198 "9223372036854775808" { 199 /* 200 * Special handling for PG_INT64_MIN, which can't 201 * accurately be represented here, as the minus sign is 202 * lexed separately and INT64_MIN can't be represented as 203 * a positive integer. 204 */ 205 return MAXINT_PLUS_ONE_CONST; 206 } 207 {digit}+ { 208 if (!strtoint64(yytext, true, &yylval->ival)) 209 expr_yyerror_more(yyscanner, "bigint constant overflow", 210 strdup(yytext)); 211 return INTEGER_CONST; 212 } 213 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? { 214 if (!strtodouble(yytext, true, &yylval->dval)) 215 expr_yyerror_more(yyscanner, "double constant overflow", 216 strdup(yytext)); 217 return DOUBLE_CONST; 218 } 219 \.{digit}+([eE][-+]?{digit}+)? { 220 if (!strtodouble(yytext, true, &yylval->dval)) 221 expr_yyerror_more(yyscanner, "double constant overflow", 222 strdup(yytext)); 223 return DOUBLE_CONST; 224 } 225 {alpha}{alnum}* { 226 yylval->str = pg_strdup(yytext); 227 return FUNCTION; 228 } 229 230 {space}+ { /* ignore */ } 231 232 {continuation} { /* ignore */ } 233 234 {newline} { 235 /* report end of command */ 236 last_was_newline = true; 237 return 0; 238 } 239 240 . { 241 /* 242 * must strdup yytext so that expr_yyerror_more doesn't 243 * change it while finding end of line 244 */ 245 expr_yyerror_more(yyscanner, "unexpected character", 246 pg_strdup(yytext)); 247 /* NOTREACHED, syntax_error calls exit() */ 248 return 0; 249 } 250 251 } 252 253 <<EOF>> { 254 if (cur_state->buffer_stack == NULL) 255 return 0; /* end of input reached */ 256 257 /* 258 * We were expanding a variable, so pop the inclusion 259 * stack and keep lexing 260 */ 261 psqlscan_pop_buffer_stack(cur_state); 262 psqlscan_select_top_buffer(cur_state); 263 } 264 265 %% 266 267 /* LCOV_EXCL_STOP */ 268 269 void 270 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) 271 { 272 PsqlScanState state = yyget_extra(yyscanner); 273 int error_detection_offset = expr_scanner_offset(state) - 1; 274 YYSTYPE lval; 275 char *full_line; 276 277 /* 278 * While parsing an expression, we may not have collected the whole line 279 * yet from the input source. Lex till EOL so we can report whole line. 280 * (If we're at EOF, it's okay to call yylex() an extra time.) 281 */ 282 if (!last_was_newline) 283 { 284 while (yylex(&lval, yyscanner)) 285 /* skip */ ; 286 } 287 288 /* Extract the line, trimming trailing newline if any */ 289 full_line = expr_scanner_get_substring(state, 290 expr_start_offset, 291 expr_scanner_offset(state), 292 true); 293 294 syntax_error(expr_source, expr_lineno, full_line, expr_command, 295 message, more, error_detection_offset - expr_start_offset); 296 } 297 298 void 299 expr_yyerror(yyscan_t yyscanner, const char *message) 300 { 301 expr_yyerror_more(yyscanner, message, NULL); 302 } 303 304 /* 305 * Collect a space-separated word from a backslash command and return it 306 * in word_buf, along with its starting string offset in *offset. 307 * Returns true if successful, false if at end of command. 308 */ 309 bool 310 expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset) 311 { 312 int lexresult; 313 YYSTYPE lval; 314 315 /* Must be scanning already */ 316 Assert(state->scanbufhandle != NULL); 317 318 /* Set current output target */ 319 state->output_buf = word_buf; 320 resetPQExpBuffer(word_buf); 321 322 /* Set input source */ 323 if (state->buffer_stack != NULL) 324 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 325 else 326 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 327 328 /* Set start state */ 329 state->start_state = INITIAL; 330 331 /* And lex. */ 332 lexresult = yylex(&lval, state->scanner); 333 334 /* 335 * Save start offset of word, if any. We could do this more efficiently, 336 * but for now this seems fine. 337 */ 338 if (lexresult) 339 *offset = expr_scanner_offset(state) - word_buf->len; 340 else 341 *offset = -1; 342 343 /* 344 * In case the caller returns to using the regular SQL lexer, reselect the 345 * appropriate initial state. 346 */ 347 psql_scan_reselect_sql_lexer(state); 348 349 return (bool) lexresult; 350 } 351 352 /* 353 * Prepare to lex an expression via expr_yyparse(). 354 * 355 * Returns the yyscan_t that is to be passed to expr_yyparse(). 356 * (This is just state->scanner, but callers don't need to know that.) 357 */ 358 yyscan_t 359 expr_scanner_init(PsqlScanState state, 360 const char *source, int lineno, int start_offset, 361 const char *command) 362 { 363 /* Save error context info */ 364 expr_source = source; 365 expr_lineno = lineno; 366 expr_start_offset = start_offset; 367 expr_command = command; 368 369 /* Must be scanning already */ 370 Assert(state->scanbufhandle != NULL); 371 372 /* Set current output target */ 373 state->output_buf = NULL; 374 375 /* Set input source */ 376 if (state->buffer_stack != NULL) 377 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 378 else 379 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 380 381 /* Set start state */ 382 state->start_state = EXPR; 383 384 return state->scanner; 385 } 386 387 /* 388 * Finish lexing an expression. 389 */ 390 void 391 expr_scanner_finish(yyscan_t yyscanner) 392 { 393 PsqlScanState state = yyget_extra(yyscanner); 394 395 /* 396 * Reselect appropriate initial state for SQL lexer. 397 */ 398 psql_scan_reselect_sql_lexer(state); 399 } 400 401 /* 402 * Get offset from start of string to end of current lexer token. 403 * 404 * We rely on the knowledge that flex modifies the scan buffer by storing 405 * a NUL at the end of the current token (yytext). Note that this might 406 * not work quite right if we were parsing a sub-buffer, but since pgbench 407 * never invokes that functionality, it doesn't matter. 408 */ 409 int 410 expr_scanner_offset(PsqlScanState state) 411 { 412 return strlen(state->scanbuf); 413 } 414 415 /* 416 * Get a malloc'd copy of the lexer input string from start_offset 417 * to just before end_offset. If chomp is true, drop any trailing 418 * newline(s). 419 */ 420 char * 421 expr_scanner_get_substring(PsqlScanState state, 422 int start_offset, int end_offset, 423 bool chomp) 424 { 425 char *result; 426 const char *scanptr = state->scanbuf + start_offset; 427 int slen = end_offset - start_offset; 428 429 Assert(slen >= 0); 430 Assert(end_offset <= strlen(state->scanbuf)); 431 432 if (chomp) 433 { 434 while (slen > 0 && 435 (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r')) 436 slen--; 437 } 438 439 result = (char *) pg_malloc(slen + 1); 440 memcpy(result, scanptr, slen); 441 result[slen] = '\0'; 442 443 return result; 444 } 445 446 /* 447 * Get the line number associated with the given string offset 448 * (which must not be past the end of where we've lexed to). 449 */ 450 int 451 expr_scanner_get_lineno(PsqlScanState state, int offset) 452 { 453 int lineno = 1; 454 const char *p = state->scanbuf; 455 456 while (*p && offset > 0) 457 { 458 if (*p == '\n') 459 lineno++; 460 p++, offset--; 461 } 462 return lineno; 463 } 464