1 %{
2 /*-------------------------------------------------------------------------
3 *
4 * exprscan.l
5 * lexical scanner for pgbench backslash commands
6 *
7 * This lexer supports two operating modes:
8 *
9 * In INITIAL state, just parse off whitespace-separated words (this mode
10 * is basically equivalent to strtok(), which is what we used to use).
11 *
12 * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 *
14 * In either mode, stop upon hitting newline or end of string.
15 *
16 * Note that this lexer operates within the framework created by psqlscan.l,
17 *
18 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
19 * Portions Copyright (c) 1994, Regents of the University of California
20 *
21 * src/bin/pgbench/exprscan.l
22 *
23 *-------------------------------------------------------------------------
24 */
25
26 #include "fe_utils/psqlscan_int.h"
27
28 /* context information for reporting errors in expressions */
29 static const char *expr_source = NULL;
30 static int expr_lineno = 0;
31 static int expr_start_offset = 0;
32 static const char *expr_command = NULL;
33
34 /* indicates whether last yylex() call read a newline */
35 static bool last_was_newline = false;
36
37 /*
38 * Work around a bug in flex 2.5.35: it emits a couple of functions that
39 * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
40 * this would cause warnings. Providing our own declarations should be
41 * harmless even when the bug gets fixed.
42 */
43 extern int expr_yyget_column(yyscan_t yyscanner);
44 extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
45
46 %}
47
48 /* Except for the prefix, these options should match psqlscan.l */
49 %option reentrant
50 %option bison-bridge
51 %option 8bit
52 %option never-interactive
53 %option nodefault
54 %option noinput
55 %option nounput
56 %option noyywrap
57 %option warn
58 %option prefix="expr_yy"
59
60 /* Character classes */
61 alpha [a-zA-Z_]
62 digit [0-9]
63 alnum [a-zA-Z0-9_]
64 /* {space} + {nonspace} + {newline} should cover all characters */
65 space [ \t\r\f\v]
66 nonspace [^ \t\r\f\v\n]
67 newline [\n]
68
69 /* Line continuation marker */
70 continuation \\\r?{newline}
71
72 /* Exclusive states */
73 %x EXPR
74
75 %%
76
77 %{
78 /* Declare some local variables inside yylex(), for convenience */
79 PsqlScanState cur_state = yyextra;
80
81 /*
82 * Force flex into the state indicated by start_state. This has a
83 * couple of purposes: it lets some of the functions below set a new
84 * starting state without ugly direct access to flex variables, and it
85 * allows us to transition from one flex lexer to another so that we
86 * can lex different parts of the source string using separate lexers.
87 */
88 BEGIN(cur_state->start_state);
89
90 /* Reset was-newline flag */
91 last_was_newline = false;
92 %}
93
94 /* INITIAL state */
95
96 {nonspace}+ {
97 /* Found a word, emit and return it */
98 psqlscan_emit(cur_state, yytext, yyleng);
99 return 1;
100 }
101
102 /*
103 * We need this rule to avoid returning "word\" instead of recognizing
104 * a continuation marker just after a word:
105 */
106 {nonspace}+{continuation} {
107 /* Found "word\\\r?\n", emit and return just "word" */
108 int wordlen = yyleng - 2;
109 if (yytext[wordlen] == '\r')
110 wordlen--;
111 Assert(yytext[wordlen] == '\\');
112 psqlscan_emit(cur_state, yytext, wordlen);
113 return 1;
114 }
115
116 {space}+ { /* ignore */ }
117
118 {continuation} { /* ignore */ }
119
120 {newline} {
121 /* report end of command */
122 last_was_newline = true;
123 return 0;
124 }
125
126 /* EXPR state */
127
128 <EXPR>{
129
130 "+" { return '+'; }
131 "-" { return '-'; }
132 "*" { return '*'; }
133 "/" { return '/'; }
134 "%" { return '%'; }
135 "(" { return '('; }
136 ")" { return ')'; }
137 "," { return ','; }
138
139 :{alnum}+ {
140 yylval->str = pg_strdup(yytext + 1);
141 return VARIABLE;
142 }
143 {digit}+ {
144 yylval->ival = strtoint64(yytext);
145 return INTEGER_CONST;
146 }
147 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
148 yylval->dval = atof(yytext);
149 return DOUBLE_CONST;
150 }
151 \.{digit}+([eE][-+]?{digit}+)? {
152 yylval->dval = atof(yytext);
153 return DOUBLE_CONST;
154 }
155 {alpha}{alnum}* {
156 yylval->str = pg_strdup(yytext);
157 return FUNCTION;
158 }
159
160 {space}+ { /* ignore */ }
161
162 {continuation} { /* ignore */ }
163
164 {newline} {
165 /* report end of command */
166 last_was_newline = true;
167 return 0;
168 }
169
170 . {
171 /*
172 * must strdup yytext so that expr_yyerror_more doesn't
173 * change it while finding end of line
174 */
175 expr_yyerror_more(yyscanner, "unexpected character",
176 pg_strdup(yytext));
177 /* NOTREACHED, syntax_error calls exit() */
178 return 0;
179 }
180
181 }
182
183 <<EOF>> {
184 if (cur_state->buffer_stack == NULL)
185 return 0; /* end of input reached */
186
187 /*
188 * We were expanding a variable, so pop the inclusion
189 * stack and keep lexing
190 */
191 psqlscan_pop_buffer_stack(cur_state);
192 psqlscan_select_top_buffer(cur_state);
193 }
194
195 %%
196
197 void
198 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
199 {
200 PsqlScanState state = yyget_extra(yyscanner);
201 int error_detection_offset = expr_scanner_offset(state) - 1;
202 YYSTYPE lval;
203 char *full_line;
204 size_t l;
205
206 /*
207 * While parsing an expression, we may not have collected the whole line
208 * yet from the input source. Lex till EOL so we can report whole line.
209 * (If we're at EOF, it's okay to call yylex() an extra time.)
210 */
211 if (!last_was_newline)
212 {
213 while (yylex(&lval, yyscanner))
214 /* skip */ ;
215 }
216
217 full_line = expr_scanner_get_substring(state,
218 expr_start_offset,
219 expr_scanner_offset(state));
220 /* Trim trailing newline if any */
221 l = strlen(full_line);
222 while (l > 0 && full_line[l - 1] == '\n')
223 full_line[--l] = '\0';
224
225 syntax_error(expr_source, expr_lineno, full_line, expr_command,
226 message, more, error_detection_offset - expr_start_offset);
227 }
228
229 void
expr_yyerror(yyscan_t yyscanner,const char * message)230 expr_yyerror(yyscan_t yyscanner, const char *message)
231 {
232 expr_yyerror_more(yyscanner, message, NULL);
233 }
234
235 /*
236 * Collect a space-separated word from a backslash command and return it
237 * in word_buf, along with its starting string offset in *offset.
238 * Returns true if successful, false if at end of command.
239 */
240 bool
expr_lex_one_word(PsqlScanState state,PQExpBuffer word_buf,int * offset)241 expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
242 {
243 int lexresult;
244 YYSTYPE lval;
245
246 /* Must be scanning already */
247 Assert(state->scanbufhandle != NULL);
248
249 /* Set current output target */
250 state->output_buf = word_buf;
251 resetPQExpBuffer(word_buf);
252
253 /* Set input source */
254 if (state->buffer_stack != NULL)
255 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
256 else
257 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
258
259 /* Set start state */
260 state->start_state = INITIAL;
261
262 /* And lex. */
263 lexresult = yylex(&lval, state->scanner);
264
265 /*
266 * Save start offset of word, if any. We could do this more efficiently,
267 * but for now this seems fine.
268 */
269 if (lexresult)
270 *offset = expr_scanner_offset(state) - word_buf->len;
271 else
272 *offset = -1;
273
274 /*
275 * In case the caller returns to using the regular SQL lexer, reselect the
276 * appropriate initial state.
277 */
278 psql_scan_reselect_sql_lexer(state);
279
280 return (bool) lexresult;
281 }
282
283 /*
284 * Prepare to lex an expression via expr_yyparse().
285 *
286 * Returns the yyscan_t that is to be passed to expr_yyparse().
287 * (This is just state->scanner, but callers don't need to know that.)
288 */
289 yyscan_t
expr_scanner_init(PsqlScanState state,const char * source,int lineno,int start_offset,const char * command)290 expr_scanner_init(PsqlScanState state,
291 const char *source, int lineno, int start_offset,
292 const char *command)
293 {
294 /* Save error context info */
295 expr_source = source;
296 expr_lineno = lineno;
297 expr_start_offset = start_offset;
298 expr_command = command;
299
300 /* Must be scanning already */
301 Assert(state->scanbufhandle != NULL);
302
303 /* Set current output target */
304 state->output_buf = NULL;
305
306 /* Set input source */
307 if (state->buffer_stack != NULL)
308 yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
309 else
310 yy_switch_to_buffer(state->scanbufhandle, state->scanner);
311
312 /* Set start state */
313 state->start_state = EXPR;
314
315 return state->scanner;
316 }
317
318 /*
319 * Finish lexing an expression.
320 */
321 void
expr_scanner_finish(yyscan_t yyscanner)322 expr_scanner_finish(yyscan_t yyscanner)
323 {
324 PsqlScanState state = yyget_extra(yyscanner);
325
326 /*
327 * Reselect appropriate initial state for SQL lexer.
328 */
329 psql_scan_reselect_sql_lexer(state);
330 }
331
332 /*
333 * Get offset from start of string to end of current lexer token.
334 *
335 * We rely on the knowledge that flex modifies the scan buffer by storing
336 * a NUL at the end of the current token (yytext). Note that this might
337 * not work quite right if we were parsing a sub-buffer, but since pgbench
338 * never invokes that functionality, it doesn't matter.
339 */
340 int
expr_scanner_offset(PsqlScanState state)341 expr_scanner_offset(PsqlScanState state)
342 {
343 return strlen(state->scanbuf);
344 }
345
346 /*
347 * Get a malloc'd copy of the lexer input string from start_offset
348 * to just before end_offset.
349 */
350 char *
expr_scanner_get_substring(PsqlScanState state,int start_offset,int end_offset)351 expr_scanner_get_substring(PsqlScanState state,
352 int start_offset, int end_offset)
353 {
354 char *result;
355 int slen = end_offset - start_offset;
356
357 Assert(slen >= 0);
358 Assert(end_offset <= strlen(state->scanbuf));
359 result = (char *) pg_malloc(slen + 1);
360 memcpy(result, state->scanbuf + start_offset, slen);
361 result[slen] = '\0';
362
363 return result;
364 }
365
366 /*
367 * Get the line number associated with the given string offset
368 * (which must not be past the end of where we've lexed to).
369 */
370 int
expr_scanner_get_lineno(PsqlScanState state,int offset)371 expr_scanner_get_lineno(PsqlScanState state, int offset)
372 {
373 int lineno = 1;
374 const char *p = state->scanbuf;
375
376 while (*p && offset > 0)
377 {
378 if (*p == '\n')
379 lineno++;
380 p++, offset--;
381 }
382 return lineno;
383 }
384