1 %{ 2 /*------------------------------------------------------------------------- 3 * 4 * exprscan.l 5 * lexical scanner for pgbench backslash commands 6 * 7 * This lexer supports two operating modes: 8 * 9 * In INITIAL state, just parse off whitespace-separated words (this mode 10 * is basically equivalent to strtok(), which is what we used to use). 11 * 12 * In EXPR state, lex for the simple expression syntax of exprparse.y. 13 * 14 * In either mode, stop upon hitting newline or end of string. 15 * 16 * Note that this lexer operates within the framework created by psqlscan.l, 17 * 18 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 19 * Portions Copyright (c) 1994, Regents of the University of California 20 * 21 * src/bin/pgbench/exprscan.l 22 * 23 *------------------------------------------------------------------------- 24 */ 25 26 #include "fe_utils/psqlscan_int.h" 27 28 /* context information for reporting errors in expressions */ 29 static const char *expr_source = NULL; 30 static int expr_lineno = 0; 31 static int expr_start_offset = 0; 32 static const char *expr_command = NULL; 33 34 /* indicates whether last yylex() call read a newline */ 35 static bool last_was_newline = false; 36 37 /* 38 * Work around a bug in flex 2.5.35: it emits a couple of functions that 39 * it forgets to emit declarations for. Since we use -Wmissing-prototypes, 40 * this would cause warnings. Providing our own declarations should be 41 * harmless even when the bug gets fixed. 42 */ 43 extern int expr_yyget_column(yyscan_t yyscanner); 44 extern void expr_yyset_column(int column_no, yyscan_t yyscanner); 45 46 /* LCOV_EXCL_START */ 47 48 %} 49 50 /* Except for the prefix, these options should match psqlscan.l */ 51 %option reentrant 52 %option bison-bridge 53 %option 8bit 54 %option never-interactive 55 %option nodefault 56 %option noinput 57 %option nounput 58 %option noyywrap 59 %option warn 60 %option prefix="expr_yy" 61 62 /* Character classes */ 63 alpha [a-zA-Z\200-\377_] 64 digit [0-9] 65 alnum [A-Za-z\200-\377_0-9] 66 /* {space} + {nonspace} + {newline} should cover all characters */ 67 space [ \t\r\f\v] 68 nonspace [^ \t\r\f\v\n] 69 newline [\n] 70 71 /* Line continuation marker */ 72 continuation \\\r?{newline} 73 74 /* case insensitive keywords */ 75 and [Aa][Nn][Dd] 76 or [Oo][Rr] 77 not [Nn][Oo][Tt] 78 case [Cc][Aa][Ss][Ee] 79 when [Ww][Hh][Ee][Nn] 80 then [Tt][Hh][Ee][Nn] 81 else [Ee][Ll][Ss][Ee] 82 end [Ee][Nn][Dd] 83 true [Tt][Rr][Uu][Ee] 84 false [Ff][Aa][Ll][Ss][Ee] 85 null [Nn][Uu][Ll][Ll] 86 is [Ii][Ss] 87 isnull [Ii][Ss][Nn][Uu][Ll][Ll] 88 notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll] 89 90 /* Exclusive states */ 91 %x EXPR 92 93 %% 94 95 %{ 96 /* Declare some local variables inside yylex(), for convenience */ 97 PsqlScanState cur_state = yyextra; 98 99 /* 100 * Force flex into the state indicated by start_state. This has a 101 * couple of purposes: it lets some of the functions below set a new 102 * starting state without ugly direct access to flex variables, and it 103 * allows us to transition from one flex lexer to another so that we 104 * can lex different parts of the source string using separate lexers. 105 */ 106 BEGIN(cur_state->start_state); 107 108 /* Reset was-newline flag */ 109 last_was_newline = false; 110 %} 111 112 /* INITIAL state */ 113 114 {nonspace}+ { 115 /* Found a word, emit and return it */ 116 psqlscan_emit(cur_state, yytext, yyleng); 117 return 1; 118 } 119 120 /* 121 * We need this rule to avoid returning "word\" instead of recognizing 122 * a continuation marker just after a word: 123 */ 124 {nonspace}+{continuation} { 125 /* Found "word\\\r?\n", emit and return just "word" */ 126 int wordlen = yyleng - 2; 127 if (yytext[wordlen] == '\r') 128 wordlen--; 129 Assert(yytext[wordlen] == '\\'); 130 psqlscan_emit(cur_state, yytext, wordlen); 131 return 1; 132 } 133 134 {space}+ { /* ignore */ } 135 136 {continuation} { /* ignore */ } 137 138 {newline} { 139 /* report end of command */ 140 last_was_newline = true; 141 return 0; 142 } 143 144 /* EXPR state */ 145 146 <EXPR>{ 147 148 "+" { return '+'; } 149 "-" { return '-'; } 150 "*" { return '*'; } 151 "/" { return '/'; } 152 "%" { return '%'; } /* C version, also in Pg SQL */ 153 "=" { return '='; } 154 "<>" { return NE_OP; } 155 "!=" { return NE_OP; } /* C version, also in Pg SQL */ 156 "<=" { return LE_OP; } 157 ">=" { return GE_OP; } 158 "<<" { return LS_OP; } 159 ">>" { return RS_OP; } 160 "<" { return '<'; } 161 ">" { return '>'; } 162 "|" { return '|'; } 163 "&" { return '&'; } 164 "#" { return '#'; } 165 "~" { return '~'; } 166 167 "(" { return '('; } 168 ")" { return ')'; } 169 "," { return ','; } 170 171 {and} { return AND_OP; } 172 {or} { return OR_OP; } 173 {not} { return NOT_OP; } 174 {is} { return IS_OP; } 175 {isnull} { return ISNULL_OP; } 176 {notnull} { return NOTNULL_OP; } 177 178 {case} { return CASE_KW; } 179 {when} { return WHEN_KW; } 180 {then} { return THEN_KW; } 181 {else} { return ELSE_KW; } 182 {end} { return END_KW; } 183 184 :{alnum}+ { 185 yylval->str = pg_strdup(yytext + 1); 186 return VARIABLE; 187 } 188 189 {null} { return NULL_CONST; } 190 {true} { 191 yylval->bval = true; 192 return BOOLEAN_CONST; 193 } 194 {false} { 195 yylval->bval = false; 196 return BOOLEAN_CONST; 197 } 198 {digit}+ { 199 yylval->ival = strtoint64(yytext); 200 return INTEGER_CONST; 201 } 202 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? { 203 yylval->dval = atof(yytext); 204 return DOUBLE_CONST; 205 } 206 \.{digit}+([eE][-+]?{digit}+)? { 207 yylval->dval = atof(yytext); 208 return DOUBLE_CONST; 209 } 210 {alpha}{alnum}* { 211 yylval->str = pg_strdup(yytext); 212 return FUNCTION; 213 } 214 215 {space}+ { /* ignore */ } 216 217 {continuation} { /* ignore */ } 218 219 {newline} { 220 /* report end of command */ 221 last_was_newline = true; 222 return 0; 223 } 224 225 . { 226 /* 227 * must strdup yytext so that expr_yyerror_more doesn't 228 * change it while finding end of line 229 */ 230 expr_yyerror_more(yyscanner, "unexpected character", 231 pg_strdup(yytext)); 232 /* NOTREACHED, syntax_error calls exit() */ 233 return 0; 234 } 235 236 } 237 238 <<EOF>> { 239 if (cur_state->buffer_stack == NULL) 240 return 0; /* end of input reached */ 241 242 /* 243 * We were expanding a variable, so pop the inclusion 244 * stack and keep lexing 245 */ 246 psqlscan_pop_buffer_stack(cur_state); 247 psqlscan_select_top_buffer(cur_state); 248 } 249 250 %% 251 252 /* LCOV_EXCL_STOP */ 253 254 void 255 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) 256 { 257 PsqlScanState state = yyget_extra(yyscanner); 258 int error_detection_offset = expr_scanner_offset(state) - 1; 259 YYSTYPE lval; 260 char *full_line; 261 262 /* 263 * While parsing an expression, we may not have collected the whole line 264 * yet from the input source. Lex till EOL so we can report whole line. 265 * (If we're at EOF, it's okay to call yylex() an extra time.) 266 */ 267 if (!last_was_newline) 268 { 269 while (yylex(&lval, yyscanner)) 270 /* skip */ ; 271 } 272 273 /* Extract the line, trimming trailing newline if any */ 274 full_line = expr_scanner_get_substring(state, 275 expr_start_offset, 276 expr_scanner_offset(state), 277 true); 278 279 syntax_error(expr_source, expr_lineno, full_line, expr_command, 280 message, more, error_detection_offset - expr_start_offset); 281 } 282 283 void 284 expr_yyerror(yyscan_t yyscanner, const char *message) 285 { 286 expr_yyerror_more(yyscanner, message, NULL); 287 } 288 289 /* 290 * Collect a space-separated word from a backslash command and return it 291 * in word_buf, along with its starting string offset in *offset. 292 * Returns true if successful, false if at end of command. 293 */ 294 bool 295 expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset) 296 { 297 int lexresult; 298 YYSTYPE lval; 299 300 /* Must be scanning already */ 301 Assert(state->scanbufhandle != NULL); 302 303 /* Set current output target */ 304 state->output_buf = word_buf; 305 resetPQExpBuffer(word_buf); 306 307 /* Set input source */ 308 if (state->buffer_stack != NULL) 309 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 310 else 311 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 312 313 /* Set start state */ 314 state->start_state = INITIAL; 315 316 /* And lex. */ 317 lexresult = yylex(&lval, state->scanner); 318 319 /* 320 * Save start offset of word, if any. We could do this more efficiently, 321 * but for now this seems fine. 322 */ 323 if (lexresult) 324 *offset = expr_scanner_offset(state) - word_buf->len; 325 else 326 *offset = -1; 327 328 /* 329 * In case the caller returns to using the regular SQL lexer, reselect the 330 * appropriate initial state. 331 */ 332 psql_scan_reselect_sql_lexer(state); 333 334 return (bool) lexresult; 335 } 336 337 /* 338 * Prepare to lex an expression via expr_yyparse(). 339 * 340 * Returns the yyscan_t that is to be passed to expr_yyparse(). 341 * (This is just state->scanner, but callers don't need to know that.) 342 */ 343 yyscan_t 344 expr_scanner_init(PsqlScanState state, 345 const char *source, int lineno, int start_offset, 346 const char *command) 347 { 348 /* Save error context info */ 349 expr_source = source; 350 expr_lineno = lineno; 351 expr_start_offset = start_offset; 352 expr_command = command; 353 354 /* Must be scanning already */ 355 Assert(state->scanbufhandle != NULL); 356 357 /* Set current output target */ 358 state->output_buf = NULL; 359 360 /* Set input source */ 361 if (state->buffer_stack != NULL) 362 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); 363 else 364 yy_switch_to_buffer(state->scanbufhandle, state->scanner); 365 366 /* Set start state */ 367 state->start_state = EXPR; 368 369 return state->scanner; 370 } 371 372 /* 373 * Finish lexing an expression. 374 */ 375 void 376 expr_scanner_finish(yyscan_t yyscanner) 377 { 378 PsqlScanState state = yyget_extra(yyscanner); 379 380 /* 381 * Reselect appropriate initial state for SQL lexer. 382 */ 383 psql_scan_reselect_sql_lexer(state); 384 } 385 386 /* 387 * Get offset from start of string to end of current lexer token. 388 * 389 * We rely on the knowledge that flex modifies the scan buffer by storing 390 * a NUL at the end of the current token (yytext). Note that this might 391 * not work quite right if we were parsing a sub-buffer, but since pgbench 392 * never invokes that functionality, it doesn't matter. 393 */ 394 int 395 expr_scanner_offset(PsqlScanState state) 396 { 397 return strlen(state->scanbuf); 398 } 399 400 /* 401 * Get a malloc'd copy of the lexer input string from start_offset 402 * to just before end_offset. If chomp is true, drop any trailing 403 * newline(s). 404 */ 405 char * 406 expr_scanner_get_substring(PsqlScanState state, 407 int start_offset, int end_offset, 408 bool chomp) 409 { 410 char *result; 411 const char *scanptr = state->scanbuf + start_offset; 412 int slen = end_offset - start_offset; 413 414 Assert(slen >= 0); 415 Assert(end_offset <= strlen(state->scanbuf)); 416 417 if (chomp) 418 { 419 while (slen > 0 && 420 (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r')) 421 slen--; 422 } 423 424 result = (char *) pg_malloc(slen + 1); 425 memcpy(result, scanptr, slen); 426 result[slen] = '\0'; 427 428 return result; 429 } 430 431 /* 432 * Get the line number associated with the given string offset 433 * (which must not be past the end of where we've lexed to). 434 */ 435 int 436 expr_scanner_get_lineno(PsqlScanState state, int offset) 437 { 438 int lineno = 1; 439 const char *p = state->scanbuf; 440 441 while (*p && offset > 0) 442 { 443 if (*p == '\n') 444 lineno++; 445 p++, offset--; 446 } 447 return lineno; 448 } 449