1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * exprscan.l
5  *	  lexical scanner for pgbench backslash commands
6  *
7  * This lexer supports two operating modes:
8  *
9  * In INITIAL state, just parse off whitespace-separated words (this mode
10  * is basically equivalent to strtok(), which is what we used to use).
11  *
12  * In EXPR state, lex for the simple expression syntax of exprparse.y.
13  *
14  * In either mode, stop upon hitting newline or end of string.
15  *
16  * Note that this lexer operates within the framework created by psqlscan.l,
17  *
18  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1994, Regents of the University of California
20  *
21  * src/bin/pgbench/exprscan.l
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "fe_utils/psqlscan_int.h"
27 
28 /* context information for reporting errors in expressions */
29 static const char *expr_source = NULL;
30 static int	expr_lineno = 0;
31 static int	expr_start_offset = 0;
32 static const char *expr_command = NULL;
33 
34 /* indicates whether last yylex() call read a newline */
35 static bool last_was_newline = false;
36 
37 /*
38  * Work around a bug in flex 2.5.35: it emits a couple of functions that
39  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
40  * this would cause warnings.  Providing our own declarations should be
41  * harmless even when the bug gets fixed.
42  */
43 extern int	expr_yyget_column(yyscan_t yyscanner);
44 extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
45 
46 %}
47 
48 /* Except for the prefix, these options should match psqlscan.l */
49 %option reentrant
50 %option bison-bridge
51 %option 8bit
52 %option never-interactive
53 %option nodefault
54 %option noinput
55 %option nounput
56 %option noyywrap
57 %option warn
58 %option prefix="expr_yy"
59 
60 /* Character classes */
61 alpha			[a-zA-Z_]
62 digit			[0-9]
63 alnum			[a-zA-Z0-9_]
64 /* {space} + {nonspace} + {newline} should cover all characters */
65 space			[ \t\r\f\v]
66 nonspace		[^ \t\r\f\v\n]
67 newline			[\n]
68 
69 /* Line continuation marker */
70 continuation	\\\r?{newline}
71 
72 /* Exclusive states */
73 %x EXPR
74 
75 %%
76 
77 %{
78 		/* Declare some local variables inside yylex(), for convenience */
79 		PsqlScanState cur_state = yyextra;
80 
81 		/*
82 		 * Force flex into the state indicated by start_state.  This has a
83 		 * couple of purposes: it lets some of the functions below set a new
84 		 * starting state without ugly direct access to flex variables, and it
85 		 * allows us to transition from one flex lexer to another so that we
86 		 * can lex different parts of the source string using separate lexers.
87 		 */
88 		BEGIN(cur_state->start_state);
89 
90 		/* Reset was-newline flag */
91 		last_was_newline = false;
92 %}
93 
94 	/* INITIAL state */
95 
96 {nonspace}+		{
97 					/* Found a word, emit and return it */
98 					psqlscan_emit(cur_state, yytext, yyleng);
99 					return 1;
100 				}
101 
102 	/*
103 	 * We need this rule to avoid returning "word\" instead of recognizing
104 	 * a continuation marker just after a word:
105 	 */
106 {nonspace}+{continuation}	{
107 					/* Found "word\\\r?\n", emit and return just "word" */
108 					int		wordlen = yyleng - 2;
109 					if (yytext[wordlen] == '\r')
110 						wordlen--;
111 					Assert(yytext[wordlen] == '\\');
112 					psqlscan_emit(cur_state, yytext, wordlen);
113 					return 1;
114 				}
115 
116 {space}+		{ /* ignore */ }
117 
118 {continuation}	{ /* ignore */ }
119 
120 {newline}		{
121 					/* report end of command */
122 					last_was_newline = true;
123 					return 0;
124 				}
125 
126 	/* EXPR state */
127 
128 <EXPR>{
129 
130 "+"				{ return '+'; }
131 "-"				{ return '-'; }
132 "*"				{ return '*'; }
133 "/"				{ return '/'; }
134 "%"				{ return '%'; }
135 "("				{ return '('; }
136 ")"				{ return ')'; }
137 ","				{ return ','; }
138 
139 :{alnum}+		{
140 					yylval->str = pg_strdup(yytext + 1);
141 					return VARIABLE;
142 				}
143 {digit}+		{
144 					yylval->ival = strtoint64(yytext);
145 					return INTEGER_CONST;
146 				}
147 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)?	{
148 					yylval->dval = atof(yytext);
149 					return DOUBLE_CONST;
150 				}
151 \.{digit}+([eE][-+]?{digit}+)?	{
152 					yylval->dval = atof(yytext);
153 					return DOUBLE_CONST;
154 				}
155 {alpha}{alnum}*	{
156 					yylval->str = pg_strdup(yytext);
157 					return FUNCTION;
158 				}
159 
160 {space}+		{ /* ignore */ }
161 
162 {continuation}	{ /* ignore */ }
163 
164 {newline}		{
165 					/* report end of command */
166 					last_was_newline = true;
167 					return 0;
168 				}
169 
170 .				{
171 					/*
172 					 * must strdup yytext so that expr_yyerror_more doesn't
173 					 * change it while finding end of line
174 					 */
175 					expr_yyerror_more(yyscanner, "unexpected character",
176 									  pg_strdup(yytext));
177 					/* NOTREACHED, syntax_error calls exit() */
178 					return 0;
179 				}
180 
181 }
182 
183 <<EOF>>			{
184 					if (cur_state->buffer_stack == NULL)
185 						return 0;			/* end of input reached */
186 
187 					/*
188 					 * We were expanding a variable, so pop the inclusion
189 					 * stack and keep lexing
190 					 */
191 					psqlscan_pop_buffer_stack(cur_state);
192 					psqlscan_select_top_buffer(cur_state);
193 				}
194 
195 %%
196 
197 void
198 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
199 {
200 	PsqlScanState state = yyget_extra(yyscanner);
201 	int			error_detection_offset = expr_scanner_offset(state) - 1;
202 	YYSTYPE		lval;
203 	char	   *full_line;
204 	size_t		l;
205 
206 	/*
207 	 * While parsing an expression, we may not have collected the whole line
208 	 * yet from the input source.  Lex till EOL so we can report whole line.
209 	 * (If we're at EOF, it's okay to call yylex() an extra time.)
210 	 */
211 	if (!last_was_newline)
212 	{
213 		while (yylex(&lval, yyscanner))
214 			 /* skip */ ;
215 	}
216 
217 	full_line = expr_scanner_get_substring(state,
218 										   expr_start_offset,
219 										   expr_scanner_offset(state));
220 	/* Trim trailing newline if any */
221 	l = strlen(full_line);
222 	while (l > 0 && full_line[l - 1] == '\n')
223 		full_line[--l] = '\0';
224 
225 	syntax_error(expr_source, expr_lineno, full_line, expr_command,
226 				 message, more, error_detection_offset - expr_start_offset);
227 }
228 
229 void
expr_yyerror(yyscan_t yyscanner,const char * message)230 expr_yyerror(yyscan_t yyscanner, const char *message)
231 {
232 	expr_yyerror_more(yyscanner, message, NULL);
233 }
234 
235 /*
236  * Collect a space-separated word from a backslash command and return it
237  * in word_buf, along with its starting string offset in *offset.
238  * Returns true if successful, false if at end of command.
239  */
240 bool
expr_lex_one_word(PsqlScanState state,PQExpBuffer word_buf,int * offset)241 expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
242 {
243 	int			lexresult;
244 	YYSTYPE		lval;
245 
246 	/* Must be scanning already */
247 	Assert(state->scanbufhandle != NULL);
248 
249 	/* Set current output target */
250 	state->output_buf = word_buf;
251 	resetPQExpBuffer(word_buf);
252 
253 	/* Set input source */
254 	if (state->buffer_stack != NULL)
255 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
256 	else
257 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
258 
259 	/* Set start state */
260 	state->start_state = INITIAL;
261 
262 	/* And lex. */
263 	lexresult = yylex(&lval, state->scanner);
264 
265 	/*
266 	 * Save start offset of word, if any.  We could do this more efficiently,
267 	 * but for now this seems fine.
268 	 */
269 	if (lexresult)
270 		*offset = expr_scanner_offset(state) - word_buf->len;
271 	else
272 		*offset = -1;
273 
274 	/*
275 	 * In case the caller returns to using the regular SQL lexer, reselect the
276 	 * appropriate initial state.
277 	 */
278 	psql_scan_reselect_sql_lexer(state);
279 
280 	return (bool) lexresult;
281 }
282 
283 /*
284  * Prepare to lex an expression via expr_yyparse().
285  *
286  * Returns the yyscan_t that is to be passed to expr_yyparse().
287  * (This is just state->scanner, but callers don't need to know that.)
288  */
289 yyscan_t
expr_scanner_init(PsqlScanState state,const char * source,int lineno,int start_offset,const char * command)290 expr_scanner_init(PsqlScanState state,
291 				  const char *source, int lineno, int start_offset,
292 				  const char *command)
293 {
294 	/* Save error context info */
295 	expr_source = source;
296 	expr_lineno = lineno;
297 	expr_start_offset = start_offset;
298 	expr_command = command;
299 
300 	/* Must be scanning already */
301 	Assert(state->scanbufhandle != NULL);
302 
303 	/* Set current output target */
304 	state->output_buf = NULL;
305 
306 	/* Set input source */
307 	if (state->buffer_stack != NULL)
308 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
309 	else
310 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
311 
312 	/* Set start state */
313 	state->start_state = EXPR;
314 
315 	return state->scanner;
316 }
317 
318 /*
319  * Finish lexing an expression.
320  */
321 void
expr_scanner_finish(yyscan_t yyscanner)322 expr_scanner_finish(yyscan_t yyscanner)
323 {
324 	PsqlScanState state = yyget_extra(yyscanner);
325 
326 	/*
327 	 * Reselect appropriate initial state for SQL lexer.
328 	 */
329 	psql_scan_reselect_sql_lexer(state);
330 }
331 
332 /*
333  * Get offset from start of string to end of current lexer token.
334  *
335  * We rely on the knowledge that flex modifies the scan buffer by storing
336  * a NUL at the end of the current token (yytext).  Note that this might
337  * not work quite right if we were parsing a sub-buffer, but since pgbench
338  * never invokes that functionality, it doesn't matter.
339  */
340 int
expr_scanner_offset(PsqlScanState state)341 expr_scanner_offset(PsqlScanState state)
342 {
343 	return strlen(state->scanbuf);
344 }
345 
346 /*
347  * Get a malloc'd copy of the lexer input string from start_offset
348  * to just before end_offset.
349  */
350 char *
expr_scanner_get_substring(PsqlScanState state,int start_offset,int end_offset)351 expr_scanner_get_substring(PsqlScanState state,
352 						   int start_offset, int end_offset)
353 {
354 	char	   *result;
355 	int			slen = end_offset - start_offset;
356 
357 	Assert(slen >= 0);
358 	Assert(end_offset <= strlen(state->scanbuf));
359 	result = (char *) pg_malloc(slen + 1);
360 	memcpy(result, state->scanbuf + start_offset, slen);
361 	result[slen] = '\0';
362 
363 	return result;
364 }
365 
366 /*
367  * Get the line number associated with the given string offset
368  * (which must not be past the end of where we've lexed to).
369  */
370 int
expr_scanner_get_lineno(PsqlScanState state,int offset)371 expr_scanner_get_lineno(PsqlScanState state, int offset)
372 {
373 	int			lineno = 1;
374 	const char *p = state->scanbuf;
375 
376 	while (*p && offset > 0)
377 	{
378 		if (*p == '\n')
379 			lineno++;
380 		p++, offset--;
381 	}
382 	return lineno;
383 }
384