1 %{
2 /*-------------------------------------------------------------------------
3  *
4  * exprscan.l
5  *	  lexical scanner for pgbench backslash commands
6  *
7  * This lexer supports two operating modes:
8  *
9  * In INITIAL state, just parse off whitespace-separated words (this mode
10  * is basically equivalent to strtok(), which is what we used to use).
11  *
12  * In EXPR state, lex for the simple expression syntax of exprparse.y.
13  *
14  * In either mode, stop upon hitting newline or end of string.
15  *
16  * Note that this lexer operates within the framework created by psqlscan.l,
17  *
18  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1994, Regents of the University of California
20  *
21  * src/bin/pgbench/exprscan.l
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "fe_utils/psqlscan_int.h"
27 
28 /* context information for reporting errors in expressions */
29 static const char *expr_source = NULL;
30 static int	expr_lineno = 0;
31 static int	expr_start_offset = 0;
32 static const char *expr_command = NULL;
33 
34 /* indicates whether last yylex() call read a newline */
35 static bool last_was_newline = false;
36 
37 /*
38  * Work around a bug in flex 2.5.35: it emits a couple of functions that
39  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
40  * this would cause warnings.  Providing our own declarations should be
41  * harmless even when the bug gets fixed.
42  */
43 extern int	expr_yyget_column(yyscan_t yyscanner);
44 extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
45 
46 %}
47 
48 /* Except for the prefix, these options should match psqlscan.l */
49 %option reentrant
50 %option bison-bridge
51 %option 8bit
52 %option never-interactive
53 %option nodefault
54 %option noinput
55 %option nounput
56 %option noyywrap
57 %option warn
58 %option prefix="expr_yy"
59 
60 /* Character classes */
61 alpha			[a-zA-Z_]
62 digit			[0-9]
63 alnum			[a-zA-Z0-9_]
64 /* {space} + {nonspace} + {newline} should cover all characters */
65 space			[ \t\r\f\v]
66 nonspace		[^ \t\r\f\v\n]
67 newline			[\n]
68 
69 /* Exclusive states */
70 %x EXPR
71 
72 %%
73 
74 %{
75 		/* Declare some local variables inside yylex(), for convenience */
76 		PsqlScanState cur_state = yyextra;
77 
78 		/*
79 		 * Force flex into the state indicated by start_state.  This has a
80 		 * couple of purposes: it lets some of the functions below set a new
81 		 * starting state without ugly direct access to flex variables, and it
82 		 * allows us to transition from one flex lexer to another so that we
83 		 * can lex different parts of the source string using separate lexers.
84 		 */
85 		BEGIN(cur_state->start_state);
86 
87 		/* Reset was-newline flag */
88 		last_was_newline = false;
89 %}
90 
91 	/* INITIAL state */
92 
93 {nonspace}+		{
94 					/* Found a word, emit and return it */
95 					psqlscan_emit(cur_state, yytext, yyleng);
96 					return 1;
97 				}
98 
99 {space}+		{ /* ignore */ }
100 
101 {newline}		{
102 					/* report end of command */
103 					last_was_newline = true;
104 					return 0;
105 				}
106 
107 	/* EXPR state */
108 
109 <EXPR>{
110 
111 "+"				{ return '+'; }
112 "-"				{ return '-'; }
113 "*"				{ return '*'; }
114 "/"				{ return '/'; }
115 "%"				{ return '%'; }
116 "("				{ return '('; }
117 ")"				{ return ')'; }
118 ","				{ return ','; }
119 
120 :{alnum}+		{
121 					yylval->str = pg_strdup(yytext + 1);
122 					return VARIABLE;
123 				}
124 {digit}+		{
125 					yylval->ival = strtoint64(yytext);
126 					return INTEGER_CONST;
127 				}
128 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)?	{
129 					yylval->dval = atof(yytext);
130 					return DOUBLE_CONST;
131 				}
132 \.{digit}+([eE][-+]?{digit}+)?	{
133 					yylval->dval = atof(yytext);
134 					return DOUBLE_CONST;
135 				}
136 {alpha}{alnum}*	{
137 					yylval->str = pg_strdup(yytext);
138 					return FUNCTION;
139 				}
140 
141 {newline}		{
142 					/* report end of command */
143 					last_was_newline = true;
144 					return 0;
145 				}
146 
147 {space}+		{ /* ignore */ }
148 
149 .				{
150 					/*
151 					 * must strdup yytext so that expr_yyerror_more doesn't
152 					 * change it while finding end of line
153 					 */
154 					expr_yyerror_more(yyscanner, "unexpected character",
155 									  pg_strdup(yytext));
156 					/* NOTREACHED, syntax_error calls exit() */
157 					return 0;
158 				}
159 
160 }
161 
162 <<EOF>>			{
163 					if (cur_state->buffer_stack == NULL)
164 						return 0;			/* end of input reached */
165 
166 					/*
167 					 * We were expanding a variable, so pop the inclusion
168 					 * stack and keep lexing
169 					 */
170 					psqlscan_pop_buffer_stack(cur_state);
171 					psqlscan_select_top_buffer(cur_state);
172 				}
173 
174 %%
175 
176 void
177 expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
178 {
179 	PsqlScanState state = yyget_extra(yyscanner);
180 	int			error_detection_offset = expr_scanner_offset(state) - 1;
181 	YYSTYPE		lval;
182 	char	   *full_line;
183 	size_t		l;
184 
185 	/*
186 	 * While parsing an expression, we may not have collected the whole line
187 	 * yet from the input source.  Lex till EOL so we can report whole line.
188 	 * (If we're at EOF, it's okay to call yylex() an extra time.)
189 	 */
190 	if (!last_was_newline)
191 	{
192 		while (yylex(&lval, yyscanner))
193 			 /* skip */ ;
194 	}
195 
196 	full_line = expr_scanner_get_substring(state,
197 										   expr_start_offset,
198 										   expr_scanner_offset(state));
199 	/* Trim trailing newline if any */
200 	l = strlen(full_line);
201 	while (l > 0 && full_line[l - 1] == '\n')
202 		full_line[--l] = '\0';
203 
204 	syntax_error(expr_source, expr_lineno, full_line, expr_command,
205 				 message, more, error_detection_offset - expr_start_offset);
206 }
207 
208 void
expr_yyerror(yyscan_t yyscanner,const char * message)209 expr_yyerror(yyscan_t yyscanner, const char *message)
210 {
211 	expr_yyerror_more(yyscanner, message, NULL);
212 }
213 
214 /*
215  * Collect a space-separated word from a backslash command and return it
216  * in word_buf, along with its starting string offset in *offset.
217  * Returns true if successful, false if at end of command.
218  */
219 bool
expr_lex_one_word(PsqlScanState state,PQExpBuffer word_buf,int * offset)220 expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
221 {
222 	int			lexresult;
223 	YYSTYPE		lval;
224 
225 	/* Must be scanning already */
226 	Assert(state->scanbufhandle != NULL);
227 
228 	/* Set current output target */
229 	state->output_buf = word_buf;
230 	resetPQExpBuffer(word_buf);
231 
232 	/* Set input source */
233 	if (state->buffer_stack != NULL)
234 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
235 	else
236 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
237 
238 	/* Set start state */
239 	state->start_state = INITIAL;
240 
241 	/* And lex. */
242 	lexresult = yylex(&lval, state->scanner);
243 
244 	/*
245 	 * Save start offset of word, if any.  We could do this more efficiently,
246 	 * but for now this seems fine.
247 	 */
248 	if (lexresult)
249 		*offset = expr_scanner_offset(state) - word_buf->len;
250 	else
251 		*offset = -1;
252 
253 	/*
254 	 * In case the caller returns to using the regular SQL lexer, reselect the
255 	 * appropriate initial state.
256 	 */
257 	psql_scan_reselect_sql_lexer(state);
258 
259 	return (bool) lexresult;
260 }
261 
262 /*
263  * Prepare to lex an expression via expr_yyparse().
264  *
265  * Returns the yyscan_t that is to be passed to expr_yyparse().
266  * (This is just state->scanner, but callers don't need to know that.)
267  */
268 yyscan_t
expr_scanner_init(PsqlScanState state,const char * source,int lineno,int start_offset,const char * command)269 expr_scanner_init(PsqlScanState state,
270 				  const char *source, int lineno, int start_offset,
271 				  const char *command)
272 {
273 	/* Save error context info */
274 	expr_source = source;
275 	expr_lineno = lineno;
276 	expr_start_offset = start_offset;
277 	expr_command = command;
278 
279 	/* Must be scanning already */
280 	Assert(state->scanbufhandle != NULL);
281 
282 	/* Set current output target */
283 	state->output_buf = NULL;
284 
285 	/* Set input source */
286 	if (state->buffer_stack != NULL)
287 		yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
288 	else
289 		yy_switch_to_buffer(state->scanbufhandle, state->scanner);
290 
291 	/* Set start state */
292 	state->start_state = EXPR;
293 
294 	return state->scanner;
295 }
296 
297 /*
298  * Finish lexing an expression.
299  */
300 void
expr_scanner_finish(yyscan_t yyscanner)301 expr_scanner_finish(yyscan_t yyscanner)
302 {
303 	PsqlScanState state = yyget_extra(yyscanner);
304 
305 	/*
306 	 * Reselect appropriate initial state for SQL lexer.
307 	 */
308 	psql_scan_reselect_sql_lexer(state);
309 }
310 
311 /*
312  * Get offset from start of string to end of current lexer token.
313  *
314  * We rely on the knowledge that flex modifies the scan buffer by storing
315  * a NUL at the end of the current token (yytext).  Note that this might
316  * not work quite right if we were parsing a sub-buffer, but since pgbench
317  * never invokes that functionality, it doesn't matter.
318  */
319 int
expr_scanner_offset(PsqlScanState state)320 expr_scanner_offset(PsqlScanState state)
321 {
322 	return strlen(state->scanbuf);
323 }
324 
325 /*
326  * Get a malloc'd copy of the lexer input string from start_offset
327  * to just before end_offset.
328  */
329 char *
expr_scanner_get_substring(PsqlScanState state,int start_offset,int end_offset)330 expr_scanner_get_substring(PsqlScanState state,
331 						   int start_offset, int end_offset)
332 {
333 	char	   *result;
334 	int			slen = end_offset - start_offset;
335 
336 	Assert(slen >= 0);
337 	Assert(end_offset <= strlen(state->scanbuf));
338 	result = (char *) pg_malloc(slen + 1);
339 	memcpy(result, state->scanbuf + start_offset, slen);
340 	result[slen] = '\0';
341 
342 	return result;
343 }
344 
345 /*
346  * Get the line number associated with the given string offset
347  * (which must not be past the end of where we've lexed to).
348  */
349 int
expr_scanner_get_lineno(PsqlScanState state,int offset)350 expr_scanner_get_lineno(PsqlScanState state, int offset)
351 {
352 	int			lineno = 1;
353 	const char *p = state->scanbuf;
354 
355 	while (*p && offset > 0)
356 	{
357 		if (*p == '\n')
358 			lineno++;
359 		p++, offset--;
360 	}
361 	return lineno;
362 }
363