1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * exprscan.l
5  *	  lexical scanner for pgbench backslash commands
6  *
7  * This lexer supports two operating modes:
8  *
9  * In INITIAL state, just parse off whitespace-separated words (this mode
10  * is basically equivalent to strtok(), which is what we used to use).
11  *
12  * In EXPR state, lex for the simple expression syntax of exprparse.y.
13  *
14  * In either mode, stop upon hitting newline or end of string.
15  *
16  * Note that this lexer operates within the framework created by psqlscan.l,
17  *
18  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1994, Regents of the University of California
20  *
21  * src/bin/pgbench/exprscan.l
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "fe_utils/psqlscan_int.h"
27 
28 /* context information for reporting errors in expressions */
29 static const char *expr_source = NULL;
30 static int	expr_lineno = 0;
31 static int	expr_start_offset = 0;
32 static const char *expr_command = NULL;
33 
34 /* indicates whether last yylex() call read a newline */
35 static bool last_was_newline = false;
36 
37 /*
38  * Work around a bug in flex 2.5.35: it emits a couple of functions that
39  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
40  * this would cause warnings.  Providing our own declarations should be
41  * harmless even when the bug gets fixed.
42  */
43 extern int	expr_yyget_column(yyscan_t yyscanner);
44 extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
45 
46 /* LCOV_EXCL_START */
47 
48 %}
49 
50 /* Except for the prefix, these options should match psqlscan.l */
51 %option reentrant
52 %option bison-bridge
53 %option 8bit
54 %option never-interactive
55 %option nodefault
56 %option noinput
57 %option nounput
58 %option noyywrap
59 %option warn
60 %option prefix="expr_yy"
61 
62 /* Character classes */
63 alpha			[a-zA-Z\200-\377_]
64 digit			[0-9]
65 alnum			[A-Za-z\200-\377_0-9]
66 /* {space} + {nonspace} + {newline} should cover all characters */
67 space			[ \t\r\f\v]
68 nonspace		[^ \t\r\f\v\n]
69 newline			[\n]
70 
71 /* Line continuation marker */
72 continuation	\\\r?{newline}
73 
74 /* case insensitive keywords */
75 and				[Aa][Nn][Dd]
76 or				[Oo][Rr]
77 not				[Nn][Oo][Tt]
78 case			[Cc][Aa][Ss][Ee]
79 when			[Ww][Hh][Ee][Nn]
80 then			[Tt][Hh][Ee][Nn]
81 else			[Ee][Ll][Ss][Ee]
82 end				[Ee][Nn][Dd]
83 true			[Tt][Rr][Uu][Ee]
84 false			[Ff][Aa][Ll][Ss][Ee]
85 null			[Nn][Uu][Ll][Ll]
86 is				[Ii][Ss]
87 isnull			[Ii][Ss][Nn][Uu][Ll][Ll]
88 notnull			[Nn][Oo][Tt][Nn][Uu][Ll][Ll]
89 
90 /* Exclusive states */
91 %x EXPR
92 
93 %%
94 
95 %{
96 		/* Declare some local variables inside yylex(), for convenience */
97 		PsqlScanState cur_state = yyextra;
98 
99 		/*
100 		 * Force flex into the state indicated by start_state.  This has a
101 		 * couple of purposes: it lets some of the functions below set a new
102 		 * starting state without ugly direct access to flex variables, and it
103 		 * allows us to transition from one flex lexer to another so that we
104 		 * can lex different parts of the source string using separate lexers.
105 		 */
106 		BEGIN(cur_state->start_state);
107 
108 		/* Reset was-newline flag */
109 		last_was_newline = false;
110 %}
111 
112 	/* INITIAL state */
113 
114 {nonspace}+		{
115 					/* Found a word, emit and return it */
116 					psqlscan_emit(cur_state, yytext, yyleng);
117 					return 1;
118 				}
119 
120 	/*
121 	 * We need this rule to avoid returning "word\" instead of recognizing
122 	 * a continuation marker just after a word:
123 	 */
124 {nonspace}+{continuation}	{
125 					/* Found "word\\\r?\n", emit and return just "word" */
126 					int		wordlen = yyleng - 2;
127 					if (yytext[wordlen] == '\r')
128 						wordlen--;
129 					Assert(yytext[wordlen] == '\\');
130 					psqlscan_emit(cur_state, yytext, wordlen);
131 					return 1;
132 				}
133 
134 {space}+		{ /* ignore */ }
135 
136 {continuation}	{ /* ignore */ }
137 
138 {newline}		{
139 					/* report end of command */
140 					last_was_newline = true;
141 					return 0;
142 				}
143 
144 	/* EXPR state */
145 
146 <EXPR>{
147 
148 "+"				{ return '+'; }
149 "-"				{ return '-'; }
150 "*"				{ return '*'; }
151 "/"				{ return '/'; }
152 "%"				{ return '%'; } /* C version, also in Pg SQL */
153 "="				{ return '='; }
154 "<>"			{ return NE_OP; }
155 "!="			{ return NE_OP; } /* C version, also in Pg SQL */
156 "<="			{ return LE_OP; }
157 ">="			{ return GE_OP; }
158 "<<"			{ return LS_OP; }
159 ">>"			{ return RS_OP; }
160 "<"				{ return '<'; }
161 ">"				{ return '>'; }
162 "|"				{ return '|'; }
163 "&"				{ return '&'; }
164 "#"				{ return '#'; }
165 "~"				{ return '~'; }
166 
167 "("				{ return '('; }
168 ")"				{ return ')'; }
169 ","				{ return ','; }
170 
171 {and}			{ return AND_OP; }
172 {or}			{ return OR_OP; }
173 {not}			{ return NOT_OP; }
174 {is}			{ return IS_OP; }
175 {isnull}		{ return ISNULL_OP; }
176 {notnull}		{ return NOTNULL_OP; }
177 
178 {case}			{ return CASE_KW; }
179 {when}			{ return WHEN_KW; }
180 {then}			{ return THEN_KW; }
181 {else}			{ return ELSE_KW; }
182 {end}			{ return END_KW; }
183 
184 :{alnum}+		{
185 					yylval->str = pg_strdup(yytext + 1);
186 					return VARIABLE;
187 				}
188 
189 {null}			{ return NULL_CONST; }
190 {true}			{
191 					yylval->bval = true;
192 					return BOOLEAN_CONST;
193 				}
194 {false}			{
195 					yylval->bval = false;
196 					return BOOLEAN_CONST;
197 				}
198 "9223372036854775808" {
199 					/*
200 					 * Special handling for PG_INT64_MIN, which can't
201 					 * accurately be represented here, as the minus sign is
202 					 * lexed separately and INT64_MIN can't be represented as
203 					 * a positive integer.
204 					 */
205 					return MAXINT_PLUS_ONE_CONST;
206 				}
207 {digit}+		{
208 					if (!strtoint64(yytext, true, &yylval->ival))
209 						expr_yyerror_more(yyscanner, "bigint constant overflow",
210 										  strdup(yytext));
211 					return INTEGER_CONST;
212 				}
213 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)?	{
214 					if (!strtodouble(yytext, true, &yylval->dval))
215 						expr_yyerror_more(yyscanner, "double constant overflow",
216 										  strdup(yytext));
217 					return DOUBLE_CONST;
218 				}
219 \.{digit}+([eE][-+]?{digit}+)?	{
220 					if (!strtodouble(yytext, true, &yylval->dval))
221 						expr_yyerror_more(yyscanner, "double constant overflow",
222 										  strdup(yytext));
223 					return DOUBLE_CONST;
224 				}
225 {alpha}{alnum}*	{
226 					yylval->str = pg_strdup(yytext);
227 					return FUNCTION;
228 				}
229 
230 {space}+		{ /* ignore */ }
231 
232 {continuation}	{ /* ignore */ }
233 
234 {newline}		{
235 					/* report end of command */
236 					last_was_newline = true;
237 					return 0;
238 				}
239 
240 .				{
241 					/*
242 					 * must strdup yytext so that expr_yyerror_more doesn't
243 					 * change it while finding end of line
244 					 */
245 					expr_yyerror_more(yyscanner, "unexpected character",
246 									  pg_strdup(yytext));
247 					/* NOTREACHED, syntax_error calls exit() */
248 					return 0;
249 				}
250 
251 }
252 
253 <<EOF>>			{
254 					if (cur_state->buffer_stack == NULL)
255 						return 0;			/* end of input reached */
256 
257 					/*
258 					 * We were expanding a variable, so pop the inclusion
259 					 * stack and keep lexing
260 					 */
261 					psqlscan_pop_buffer_stack(cur_state);
262 					psqlscan_select_top_buffer(cur_state);
263 				}
264 
265 %%
266 
267 /* LCOV_EXCL_STOP */
268 
269 void
270 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
271 {
272 	PsqlScanState state = yyget_extra(yyscanner);
273 	int			error_detection_offset = expr_scanner_offset(state) - 1;
274 	YYSTYPE		lval;
275 	char	   *full_line;
276 
277 	/*
278 	 * While parsing an expression, we may not have collected the whole line
279 	 * yet from the input source.  Lex till EOL so we can report whole line.
280 	 * (If we're at EOF, it's okay to call yylex() an extra time.)
281 	 */
282 	if (!last_was_newline)
283 	{
284 		while (yylex(&lval, yyscanner))
285 			 /* skip */ ;
286 	}
287 
288 	/* Extract the line, trimming trailing newline if any */
289 	full_line = expr_scanner_get_substring(state,
290 										   expr_start_offset,
291 										   expr_scanner_offset(state),
292 										   true);
293 
294 	syntax_error(expr_source, expr_lineno, full_line, expr_command,
295 				 message, more, error_detection_offset - expr_start_offset);
296 }
297 
298 void
299 expr_yyerror(yyscan_t yyscanner, const char *message)
300 {
301 	expr_yyerror_more(yyscanner, message, NULL);
302 }
303 
304 /*
305  * Collect a space-separated word from a backslash command and return it
306  * in word_buf, along with its starting string offset in *offset.
307  * Returns true if successful, false if at end of command.
308  */
309 bool
310 expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
311 {
312 	int			lexresult;
313 	YYSTYPE		lval;
314 
315 	/* Must be scanning already */
316 	Assert(state->scanbufhandle != NULL);
317 
318 	/* Set current output target */
319 	state->output_buf = word_buf;
320 	resetPQExpBuffer(word_buf);
321 
322 	/* Set input source */
323 	if (state->buffer_stack != NULL)
324 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
325 	else
326 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
327 
328 	/* Set start state */
329 	state->start_state = INITIAL;
330 
331 	/* And lex. */
332 	lexresult = yylex(&lval, state->scanner);
333 
334 	/*
335 	 * Save start offset of word, if any.  We could do this more efficiently,
336 	 * but for now this seems fine.
337 	 */
338 	if (lexresult)
339 		*offset = expr_scanner_offset(state) - word_buf->len;
340 	else
341 		*offset = -1;
342 
343 	/*
344 	 * In case the caller returns to using the regular SQL lexer, reselect the
345 	 * appropriate initial state.
346 	 */
347 	psql_scan_reselect_sql_lexer(state);
348 
349 	return (bool) lexresult;
350 }
351 
352 /*
353  * Prepare to lex an expression via expr_yyparse().
354  *
355  * Returns the yyscan_t that is to be passed to expr_yyparse().
356  * (This is just state->scanner, but callers don't need to know that.)
357  */
358 yyscan_t
359 expr_scanner_init(PsqlScanState state,
360 				  const char *source, int lineno, int start_offset,
361 				  const char *command)
362 {
363 	/* Save error context info */
364 	expr_source = source;
365 	expr_lineno = lineno;
366 	expr_start_offset = start_offset;
367 	expr_command = command;
368 
369 	/* Must be scanning already */
370 	Assert(state->scanbufhandle != NULL);
371 
372 	/* Set current output target */
373 	state->output_buf = NULL;
374 
375 	/* Set input source */
376 	if (state->buffer_stack != NULL)
377 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
378 	else
379 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
380 
381 	/* Set start state */
382 	state->start_state = EXPR;
383 
384 	return state->scanner;
385 }
386 
387 /*
388  * Finish lexing an expression.
389  */
390 void
391 expr_scanner_finish(yyscan_t yyscanner)
392 {
393 	PsqlScanState state = yyget_extra(yyscanner);
394 
395 	/*
396 	 * Reselect appropriate initial state for SQL lexer.
397 	 */
398 	psql_scan_reselect_sql_lexer(state);
399 }
400 
401 /*
402  * Get offset from start of string to end of current lexer token.
403  *
404  * We rely on the knowledge that flex modifies the scan buffer by storing
405  * a NUL at the end of the current token (yytext).  Note that this might
406  * not work quite right if we were parsing a sub-buffer, but since pgbench
407  * never invokes that functionality, it doesn't matter.
408  */
409 int
410 expr_scanner_offset(PsqlScanState state)
411 {
412 	return strlen(state->scanbuf);
413 }
414 
415 /*
416  * Get a malloc'd copy of the lexer input string from start_offset
417  * to just before end_offset.  If chomp is true, drop any trailing
418  * newline(s).
419  */
420 char *
421 expr_scanner_get_substring(PsqlScanState state,
422 						   int start_offset, int end_offset,
423 						   bool chomp)
424 {
425 	char	   *result;
426 	const char *scanptr = state->scanbuf + start_offset;
427 	int			slen = end_offset - start_offset;
428 
429 	Assert(slen >= 0);
430 	Assert(end_offset <= strlen(state->scanbuf));
431 
432 	if (chomp)
433 	{
434 		while (slen > 0 &&
435 			   (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
436 			slen--;
437 	}
438 
439 	result = (char *) pg_malloc(slen + 1);
440 	memcpy(result, scanptr, slen);
441 	result[slen] = '\0';
442 
443 	return result;
444 }
445 
446 /*
447  * Get the line number associated with the given string offset
448  * (which must not be past the end of where we've lexed to).
449  */
450 int
451 expr_scanner_get_lineno(PsqlScanState state, int offset)
452 {
453 	int			lineno = 1;
454 	const char *p = state->scanbuf;
455 
456 	while (*p && offset > 0)
457 	{
458 		if (*p == '\n')
459 			lineno++;
460 		p++, offset--;
461 	}
462 	return lineno;
463 }
464