1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * exprscan.l
5  *	  lexical scanner for pgbench backslash commands
6  *
7  * This lexer supports two operating modes:
8  *
9  * In INITIAL state, just parse off whitespace-separated words (this mode
10  * is basically equivalent to strtok(), which is what we used to use).
11  *
12  * In EXPR state, lex for the simple expression syntax of exprparse.y.
13  *
14  * In either mode, stop upon hitting newline or end of string.
15  *
16  * Note that this lexer operates within the framework created by psqlscan.l,
17  *
18  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1994, Regents of the University of California
20  *
21  * src/bin/pgbench/exprscan.l
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "fe_utils/psqlscan_int.h"
27 
28 /* context information for reporting errors in expressions */
29 static const char *expr_source = NULL;
30 static int	expr_lineno = 0;
31 static int	expr_start_offset = 0;
32 static const char *expr_command = NULL;
33 
34 /* indicates whether last yylex() call read a newline */
35 static bool last_was_newline = false;
36 
37 /*
38  * Work around a bug in flex 2.5.35: it emits a couple of functions that
39  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
40  * this would cause warnings.  Providing our own declarations should be
41  * harmless even when the bug gets fixed.
42  */
43 extern int	expr_yyget_column(yyscan_t yyscanner);
44 extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
45 
46 /* LCOV_EXCL_START */
47 
48 %}
49 
50 /* Except for the prefix, these options should match psqlscan.l */
51 %option reentrant
52 %option bison-bridge
53 %option 8bit
54 %option never-interactive
55 %option nodefault
56 %option noinput
57 %option nounput
58 %option noyywrap
59 %option warn
60 %option prefix="expr_yy"
61 
62 /* Character classes */
63 alpha			[a-zA-Z\200-\377_]
64 digit			[0-9]
65 alnum			[A-Za-z\200-\377_0-9]
66 /* {space} + {nonspace} + {newline} should cover all characters */
67 space			[ \t\r\f\v]
68 nonspace		[^ \t\r\f\v\n]
69 newline			[\n]
70 
71 /* Line continuation marker */
72 continuation	\\\r?{newline}
73 
74 /* case insensitive keywords */
75 and				[Aa][Nn][Dd]
76 or				[Oo][Rr]
77 not				[Nn][Oo][Tt]
78 case			[Cc][Aa][Ss][Ee]
79 when			[Ww][Hh][Ee][Nn]
80 then			[Tt][Hh][Ee][Nn]
81 else			[Ee][Ll][Ss][Ee]
82 end				[Ee][Nn][Dd]
83 true			[Tt][Rr][Uu][Ee]
84 false			[Ff][Aa][Ll][Ss][Ee]
85 null			[Nn][Uu][Ll][Ll]
86 is				[Ii][Ss]
87 isnull			[Ii][Ss][Nn][Uu][Ll][Ll]
88 notnull			[Nn][Oo][Tt][Nn][Uu][Ll][Ll]
89 
90 /* Exclusive states */
91 %x EXPR
92 
93 %%
94 
95 %{
96 		/* Declare some local variables inside yylex(), for convenience */
97 		PsqlScanState cur_state = yyextra;
98 
99 		/*
100 		 * Force flex into the state indicated by start_state.  This has a
101 		 * couple of purposes: it lets some of the functions below set a new
102 		 * starting state without ugly direct access to flex variables, and it
103 		 * allows us to transition from one flex lexer to another so that we
104 		 * can lex different parts of the source string using separate lexers.
105 		 */
106 		BEGIN(cur_state->start_state);
107 
108 		/* Reset was-newline flag */
109 		last_was_newline = false;
110 %}
111 
112 	/* INITIAL state */
113 
114 {nonspace}+		{
115 					/* Found a word, emit and return it */
116 					psqlscan_emit(cur_state, yytext, yyleng);
117 					return 1;
118 				}
119 
120 	/*
121 	 * We need this rule to avoid returning "word\" instead of recognizing
122 	 * a continuation marker just after a word:
123 	 */
124 {nonspace}+{continuation}	{
125 					/* Found "word\\\r?\n", emit and return just "word" */
126 					int		wordlen = yyleng - 2;
127 					if (yytext[wordlen] == '\r')
128 						wordlen--;
129 					Assert(yytext[wordlen] == '\\');
130 					psqlscan_emit(cur_state, yytext, wordlen);
131 					return 1;
132 				}
133 
134 {space}+		{ /* ignore */ }
135 
136 {continuation}	{ /* ignore */ }
137 
138 {newline}		{
139 					/* report end of command */
140 					last_was_newline = true;
141 					return 0;
142 				}
143 
144 	/* EXPR state */
145 
146 <EXPR>{
147 
148 "+"				{ return '+'; }
149 "-"				{ return '-'; }
150 "*"				{ return '*'; }
151 "/"				{ return '/'; }
152 "%"				{ return '%'; } /* C version, also in Pg SQL */
153 "="				{ return '='; }
154 "<>"			{ return NE_OP; }
155 "!="			{ return NE_OP; } /* C version, also in Pg SQL */
156 "<="			{ return LE_OP; }
157 ">="			{ return GE_OP; }
158 "<<"			{ return LS_OP; }
159 ">>"			{ return RS_OP; }
160 "<"				{ return '<'; }
161 ">"				{ return '>'; }
162 "|"				{ return '|'; }
163 "&"				{ return '&'; }
164 "#"				{ return '#'; }
165 "~"				{ return '~'; }
166 
167 "("				{ return '('; }
168 ")"				{ return ')'; }
169 ","				{ return ','; }
170 
171 {and}			{ return AND_OP; }
172 {or}			{ return OR_OP; }
173 {not}			{ return NOT_OP; }
174 {is}			{ return IS_OP; }
175 {isnull}		{ return ISNULL_OP; }
176 {notnull}		{ return NOTNULL_OP; }
177 
178 {case}			{ return CASE_KW; }
179 {when}			{ return WHEN_KW; }
180 {then}			{ return THEN_KW; }
181 {else}			{ return ELSE_KW; }
182 {end}			{ return END_KW; }
183 
184 :{alnum}+		{
185 					yylval->str = pg_strdup(yytext + 1);
186 					return VARIABLE;
187 				}
188 
189 {null}			{ return NULL_CONST; }
190 {true}			{
191 					yylval->bval = true;
192 					return BOOLEAN_CONST;
193 				}
194 {false}			{
195 					yylval->bval = false;
196 					return BOOLEAN_CONST;
197 				}
198 {digit}+		{
199 					yylval->ival = strtoint64(yytext);
200 					return INTEGER_CONST;
201 				}
202 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)?	{
203 					yylval->dval = atof(yytext);
204 					return DOUBLE_CONST;
205 				}
206 \.{digit}+([eE][-+]?{digit}+)?	{
207 					yylval->dval = atof(yytext);
208 					return DOUBLE_CONST;
209 				}
210 {alpha}{alnum}*	{
211 					yylval->str = pg_strdup(yytext);
212 					return FUNCTION;
213 				}
214 
215 {space}+		{ /* ignore */ }
216 
217 {continuation}	{ /* ignore */ }
218 
219 {newline}		{
220 					/* report end of command */
221 					last_was_newline = true;
222 					return 0;
223 				}
224 
225 .				{
226 					/*
227 					 * must strdup yytext so that expr_yyerror_more doesn't
228 					 * change it while finding end of line
229 					 */
230 					expr_yyerror_more(yyscanner, "unexpected character",
231 									  pg_strdup(yytext));
232 					/* NOTREACHED, syntax_error calls exit() */
233 					return 0;
234 				}
235 
236 }
237 
238 <<EOF>>			{
239 					if (cur_state->buffer_stack == NULL)
240 						return 0;			/* end of input reached */
241 
242 					/*
243 					 * We were expanding a variable, so pop the inclusion
244 					 * stack and keep lexing
245 					 */
246 					psqlscan_pop_buffer_stack(cur_state);
247 					psqlscan_select_top_buffer(cur_state);
248 				}
249 
250 %%
251 
252 /* LCOV_EXCL_STOP */
253 
254 void
255 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
256 {
257 	PsqlScanState state = yyget_extra(yyscanner);
258 	int			error_detection_offset = expr_scanner_offset(state) - 1;
259 	YYSTYPE		lval;
260 	char	   *full_line;
261 
262 	/*
263 	 * While parsing an expression, we may not have collected the whole line
264 	 * yet from the input source.  Lex till EOL so we can report whole line.
265 	 * (If we're at EOF, it's okay to call yylex() an extra time.)
266 	 */
267 	if (!last_was_newline)
268 	{
269 		while (yylex(&lval, yyscanner))
270 			 /* skip */ ;
271 	}
272 
273 	/* Extract the line, trimming trailing newline if any */
274 	full_line = expr_scanner_get_substring(state,
275 										   expr_start_offset,
276 										   expr_scanner_offset(state),
277 										   true);
278 
279 	syntax_error(expr_source, expr_lineno, full_line, expr_command,
280 				 message, more, error_detection_offset - expr_start_offset);
281 }
282 
283 void
284 expr_yyerror(yyscan_t yyscanner, const char *message)
285 {
286 	expr_yyerror_more(yyscanner, message, NULL);
287 }
288 
289 /*
290  * Collect a space-separated word from a backslash command and return it
291  * in word_buf, along with its starting string offset in *offset.
292  * Returns true if successful, false if at end of command.
293  */
294 bool
295 expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
296 {
297 	int			lexresult;
298 	YYSTYPE		lval;
299 
300 	/* Must be scanning already */
301 	Assert(state->scanbufhandle != NULL);
302 
303 	/* Set current output target */
304 	state->output_buf = word_buf;
305 	resetPQExpBuffer(word_buf);
306 
307 	/* Set input source */
308 	if (state->buffer_stack != NULL)
309 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
310 	else
311 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
312 
313 	/* Set start state */
314 	state->start_state = INITIAL;
315 
316 	/* And lex. */
317 	lexresult = yylex(&lval, state->scanner);
318 
319 	/*
320 	 * Save start offset of word, if any.  We could do this more efficiently,
321 	 * but for now this seems fine.
322 	 */
323 	if (lexresult)
324 		*offset = expr_scanner_offset(state) - word_buf->len;
325 	else
326 		*offset = -1;
327 
328 	/*
329 	 * In case the caller returns to using the regular SQL lexer, reselect the
330 	 * appropriate initial state.
331 	 */
332 	psql_scan_reselect_sql_lexer(state);
333 
334 	return (bool) lexresult;
335 }
336 
337 /*
338  * Prepare to lex an expression via expr_yyparse().
339  *
340  * Returns the yyscan_t that is to be passed to expr_yyparse().
341  * (This is just state->scanner, but callers don't need to know that.)
342  */
343 yyscan_t
344 expr_scanner_init(PsqlScanState state,
345 				  const char *source, int lineno, int start_offset,
346 				  const char *command)
347 {
348 	/* Save error context info */
349 	expr_source = source;
350 	expr_lineno = lineno;
351 	expr_start_offset = start_offset;
352 	expr_command = command;
353 
354 	/* Must be scanning already */
355 	Assert(state->scanbufhandle != NULL);
356 
357 	/* Set current output target */
358 	state->output_buf = NULL;
359 
360 	/* Set input source */
361 	if (state->buffer_stack != NULL)
362 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
363 	else
364 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
365 
366 	/* Set start state */
367 	state->start_state = EXPR;
368 
369 	return state->scanner;
370 }
371 
372 /*
373  * Finish lexing an expression.
374  */
375 void
376 expr_scanner_finish(yyscan_t yyscanner)
377 {
378 	PsqlScanState state = yyget_extra(yyscanner);
379 
380 	/*
381 	 * Reselect appropriate initial state for SQL lexer.
382 	 */
383 	psql_scan_reselect_sql_lexer(state);
384 }
385 
386 /*
387  * Get offset from start of string to end of current lexer token.
388  *
389  * We rely on the knowledge that flex modifies the scan buffer by storing
390  * a NUL at the end of the current token (yytext).  Note that this might
391  * not work quite right if we were parsing a sub-buffer, but since pgbench
392  * never invokes that functionality, it doesn't matter.
393  */
394 int
395 expr_scanner_offset(PsqlScanState state)
396 {
397 	return strlen(state->scanbuf);
398 }
399 
400 /*
401  * Get a malloc'd copy of the lexer input string from start_offset
402  * to just before end_offset.  If chomp is true, drop any trailing
403  * newline(s).
404  */
405 char *
406 expr_scanner_get_substring(PsqlScanState state,
407 						   int start_offset, int end_offset,
408 						   bool chomp)
409 {
410 	char	   *result;
411 	const char *scanptr = state->scanbuf + start_offset;
412 	int			slen = end_offset - start_offset;
413 
414 	Assert(slen >= 0);
415 	Assert(end_offset <= strlen(state->scanbuf));
416 
417 	if (chomp)
418 	{
419 		while (slen > 0 &&
420 			   (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
421 			slen--;
422 	}
423 
424 	result = (char *) pg_malloc(slen + 1);
425 	memcpy(result, scanptr, slen);
426 	result[slen] = '\0';
427 
428 	return result;
429 }
430 
431 /*
432  * Get the line number associated with the given string offset
433  * (which must not be past the end of where we've lexed to).
434  */
435 int
436 expr_scanner_get_lineno(PsqlScanState state, int offset)
437 {
438 	int			lineno = 1;
439 	const char *p = state->scanbuf;
440 
441 	while (*p && offset > 0)
442 	{
443 		if (*p == '\n')
444 			lineno++;
445 		p++, offset--;
446 	}
447 	return lineno;
448 }
449