1 /*-------------------------------------------------------------------------
2  *
3  * pl_scanner.c
4  *	  lexical scanning for PL/pgSQL
5  *
6  *
7  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  *
11  * IDENTIFICATION
12  *	  src/pl/plpgsql/src/pl_scanner.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "plpgsql.h"
17 
18 #include "mb/pg_wchar.h"
19 #include "parser/scanner.h"
20 
21 #include "pl_gram.h"			/* must be after parser/scanner.h */
22 
23 #define PG_KEYWORD(a,b,c) {a,b,c},
24 
25 
26 /* Klugy flag to tell scanner how to look up identifiers */
27 IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
28 
29 /*
30  * A word about keywords:
31  *
32  * We keep reserved and unreserved keywords in separate arrays.  The
33  * reserved keywords are passed to the core scanner, so they will be
34  * recognized before (and instead of) any variable name.  Unreserved words
35  * are checked for separately, usually after determining that the identifier
36  * isn't a known variable name.  If plpgsql_IdentifierLookup is DECLARE then
37  * no variable names will be recognized, so the unreserved words always work.
38  * (Note in particular that this helps us avoid reserving keywords that are
39  * only needed in DECLARE sections.)
40  *
41  * In certain contexts it is desirable to prefer recognizing an unreserved
42  * keyword over recognizing a variable name.  In particular, at the start
43  * of a statement we should prefer unreserved keywords unless the statement
44  * looks like an assignment (i.e., first token is followed by ':=' or '[').
45  * This rule allows most statement-introducing keywords to be kept unreserved.
46  * (We still have to reserve initial keywords that might follow a block
47  * label, unfortunately, since the method used to determine if we are at
48  * start of statement doesn't recognize such cases.  We'd also have to
49  * reserve any keyword that could legitimately be followed by ':=' or '['.)
50  * Some additional cases are handled in pl_gram.y using tok_is_keyword().
51  *
52  * We try to avoid reserving more keywords than we have to; but there's
53  * little point in not reserving a word if it's reserved in the core grammar.
54  * Currently, the following words are reserved here but not in the core:
55  * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
56  */
57 
58 /*
59  * Lists of keyword (name, token-value, category) entries.
60  *
61  * !!WARNING!!: These lists must be sorted by ASCII name, because binary
62  *		 search is used to locate entries.
63  *
64  * Be careful not to put the same word in both lists.  Also be sure that
65  * pl_gram.y's unreserved_keyword production agrees with the second list.
66  */
67 
68 static const ScanKeyword reserved_keywords[] = {
69 	PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
70 	PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
71 	PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
72 	PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
73 	PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
74 	PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
75 	PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
76 	PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
77 	PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
78 	PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
79 	PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
80 	PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
81 	PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
82 	PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
83 	PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
84 	PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
85 	PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
86 	PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
87 	PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
88 	PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
89 	PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
90 	PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
91 	PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
92 	PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
93 };
94 
95 static const int num_reserved_keywords = lengthof(reserved_keywords);
96 
97 static const ScanKeyword unreserved_keywords[] = {
98 	PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
99 	PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
100 	PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
101 	PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
102 	PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
103 	PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
104 	PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
105 	PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
106 	PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
107 	PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
108 	PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
109 	PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
110 	PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
111 	PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
112 	PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
113 	PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
114 	PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
115 	PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
116 	PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
117 	PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
118 	PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
119 	PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
120 	PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
121 	PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
122 	PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
123 	PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
124 	PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
125 	PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
126 	PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
127 	PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
128 	PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
129 	PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
130 	PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
131 	PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
132 	PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
133 	PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
134 	PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
135 	PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
136 	PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
137 	PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
138 	PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
139 	PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
140 	PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
141 	PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
142 	PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
143 	PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
144 	PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
145 	PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
146 	PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
147 	PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
148 	PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
149 	PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
150 	PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
151 	PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
152 	PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
153 	PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
154 	PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
155 	PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
156 	PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
157 	PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
158 	PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
159 	PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
160 	PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
161 	PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
162 	PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
163 	PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
164 	PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
165 	PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
166 	PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
167 	PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
168 	PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
169 	PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
170 	PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
171 	PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
172 	PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
173 	PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
174 };
175 
176 static const int num_unreserved_keywords = lengthof(unreserved_keywords);
177 
178 /*
179  * This macro must recognize all tokens that can immediately precede a
180  * PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the
181  * grammar).  Fortunately, there are not very many, so hard-coding in this
182  * fashion seems sufficient.
183  */
184 #define AT_STMT_START(prev_token) \
185 	((prev_token) == ';' || \
186 	 (prev_token) == K_BEGIN || \
187 	 (prev_token) == K_THEN || \
188 	 (prev_token) == K_ELSE || \
189 	 (prev_token) == K_LOOP)
190 
191 
192 /* Auxiliary data about a token (other than the token type) */
193 typedef struct
194 {
195 	YYSTYPE		lval;			/* semantic information */
196 	YYLTYPE		lloc;			/* offset in scanbuf */
197 	int			leng;			/* length in bytes */
198 } TokenAuxData;
199 
200 /*
201  * Scanner working state.  At some point we might wish to fold all this
202  * into a YY_EXTRA struct.  For the moment, there is no need for plpgsql's
203  * lexer to be re-entrant, and the notational burden of passing a yyscanner
204  * pointer around is great enough to not want to do it without need.
205  */
206 
207 /* The stuff the core lexer needs */
208 static core_yyscan_t yyscanner = NULL;
209 static core_yy_extra_type core_yy;
210 
211 /* The original input string */
212 static const char *scanorig;
213 
214 /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
215 static int	plpgsql_yyleng;
216 
217 /* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */
218 static int	plpgsql_yytoken;
219 
220 /* Token pushback stack */
221 #define MAX_PUSHBACKS 4
222 
223 static int	num_pushbacks;
224 static int	pushback_token[MAX_PUSHBACKS];
225 static TokenAuxData pushback_auxdata[MAX_PUSHBACKS];
226 
227 /* State for plpgsql_location_to_lineno() */
228 static const char *cur_line_start;
229 static const char *cur_line_end;
230 static int	cur_line_num;
231 
232 /* Internal functions */
233 static int	internal_yylex(TokenAuxData *auxdata);
234 static void push_back_token(int token, TokenAuxData *auxdata);
235 static void location_lineno_init(void);
236 
237 
238 /*
239  * This is the yylex routine called from the PL/pgSQL grammar.
240  * It is a wrapper around the core lexer, with the ability to recognize
241  * PL/pgSQL variables and return them as special T_DATUM tokens.  If a
242  * word or compound word does not match any variable name, or if matching
243  * is turned off by plpgsql_IdentifierLookup, it is returned as
244  * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
245  * matches one of those.
246  */
247 int
plpgsql_yylex(void)248 plpgsql_yylex(void)
249 {
250 	int			tok1;
251 	TokenAuxData aux1;
252 	const ScanKeyword *kw;
253 
254 	tok1 = internal_yylex(&aux1);
255 	if (tok1 == IDENT || tok1 == PARAM)
256 	{
257 		int			tok2;
258 		TokenAuxData aux2;
259 
260 		tok2 = internal_yylex(&aux2);
261 		if (tok2 == '.')
262 		{
263 			int			tok3;
264 			TokenAuxData aux3;
265 
266 			tok3 = internal_yylex(&aux3);
267 			if (tok3 == IDENT)
268 			{
269 				int			tok4;
270 				TokenAuxData aux4;
271 
272 				tok4 = internal_yylex(&aux4);
273 				if (tok4 == '.')
274 				{
275 					int			tok5;
276 					TokenAuxData aux5;
277 
278 					tok5 = internal_yylex(&aux5);
279 					if (tok5 == IDENT)
280 					{
281 						if (plpgsql_parse_tripword(aux1.lval.str,
282 												   aux3.lval.str,
283 												   aux5.lval.str,
284 												   &aux1.lval.wdatum,
285 												   &aux1.lval.cword))
286 							tok1 = T_DATUM;
287 						else
288 							tok1 = T_CWORD;
289 					}
290 					else
291 					{
292 						/* not A.B.C, so just process A.B */
293 						push_back_token(tok5, &aux5);
294 						push_back_token(tok4, &aux4);
295 						if (plpgsql_parse_dblword(aux1.lval.str,
296 												  aux3.lval.str,
297 												  &aux1.lval.wdatum,
298 												  &aux1.lval.cword))
299 							tok1 = T_DATUM;
300 						else
301 							tok1 = T_CWORD;
302 					}
303 				}
304 				else
305 				{
306 					/* not A.B.C, so just process A.B */
307 					push_back_token(tok4, &aux4);
308 					if (plpgsql_parse_dblword(aux1.lval.str,
309 											  aux3.lval.str,
310 											  &aux1.lval.wdatum,
311 											  &aux1.lval.cword))
312 						tok1 = T_DATUM;
313 					else
314 						tok1 = T_CWORD;
315 				}
316 			}
317 			else
318 			{
319 				/* not A.B, so just process A */
320 				push_back_token(tok3, &aux3);
321 				push_back_token(tok2, &aux2);
322 				if (plpgsql_parse_word(aux1.lval.str,
323 									   core_yy.scanbuf + aux1.lloc,
324 									   &aux1.lval.wdatum,
325 									   &aux1.lval.word))
326 					tok1 = T_DATUM;
327 				else if (!aux1.lval.word.quoted &&
328 						 (kw = ScanKeywordLookup(aux1.lval.word.ident,
329 												 unreserved_keywords,
330 												 num_unreserved_keywords)))
331 				{
332 					aux1.lval.keyword = kw->name;
333 					tok1 = kw->value;
334 				}
335 				else
336 					tok1 = T_WORD;
337 			}
338 		}
339 		else
340 		{
341 			/* not A.B, so just process A */
342 			push_back_token(tok2, &aux2);
343 
344 			/*
345 			 * If we are at start of statement, prefer unreserved keywords
346 			 * over variable names, unless the next token is assignment or
347 			 * '[', in which case prefer variable names.  (Note we need not
348 			 * consider '.' as the next token; that case was handled above,
349 			 * and we always prefer variable names in that case.)  If we are
350 			 * not at start of statement, always prefer variable names over
351 			 * unreserved keywords.
352 			 */
353 			if (AT_STMT_START(plpgsql_yytoken) &&
354 				!(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
355 			{
356 				/* try for unreserved keyword, then for variable name */
357 				if (core_yy.scanbuf[aux1.lloc] != '"' &&
358 					(kw = ScanKeywordLookup(aux1.lval.str,
359 											unreserved_keywords,
360 											num_unreserved_keywords)))
361 				{
362 					aux1.lval.keyword = kw->name;
363 					tok1 = kw->value;
364 				}
365 				else if (plpgsql_parse_word(aux1.lval.str,
366 											core_yy.scanbuf + aux1.lloc,
367 											&aux1.lval.wdatum,
368 											&aux1.lval.word))
369 					tok1 = T_DATUM;
370 				else
371 					tok1 = T_WORD;
372 			}
373 			else
374 			{
375 				/* try for variable name, then for unreserved keyword */
376 				if (plpgsql_parse_word(aux1.lval.str,
377 									   core_yy.scanbuf + aux1.lloc,
378 									   &aux1.lval.wdatum,
379 									   &aux1.lval.word))
380 					tok1 = T_DATUM;
381 				else if (!aux1.lval.word.quoted &&
382 						 (kw = ScanKeywordLookup(aux1.lval.word.ident,
383 												 unreserved_keywords,
384 												 num_unreserved_keywords)))
385 				{
386 					aux1.lval.keyword = kw->name;
387 					tok1 = kw->value;
388 				}
389 				else
390 					tok1 = T_WORD;
391 			}
392 		}
393 	}
394 	else
395 	{
396 		/*
397 		 * Not a potential plpgsql variable name, just return the data.
398 		 *
399 		 * Note that we also come through here if the grammar pushed back a
400 		 * T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a
401 		 * previous lookup cycle; thus, pushbacks do not incur extra lookup
402 		 * work, since we'll never do the above code twice for the same token.
403 		 * This property also makes it safe to rely on the old value of
404 		 * plpgsql_yytoken in the is-this-start-of-statement test above.
405 		 */
406 	}
407 
408 	plpgsql_yylval = aux1.lval;
409 	plpgsql_yylloc = aux1.lloc;
410 	plpgsql_yyleng = aux1.leng;
411 	plpgsql_yytoken = tok1;
412 	return tok1;
413 }
414 
415 /*
416  * Internal yylex function.  This wraps the core lexer and adds one feature:
417  * a token pushback stack.  We also make a couple of trivial single-token
418  * translations from what the core lexer does to what we want, in particular
419  * interfacing from the core_YYSTYPE to YYSTYPE union.
420  */
421 static int
internal_yylex(TokenAuxData * auxdata)422 internal_yylex(TokenAuxData *auxdata)
423 {
424 	int			token;
425 	const char *yytext;
426 
427 	if (num_pushbacks > 0)
428 	{
429 		num_pushbacks--;
430 		token = pushback_token[num_pushbacks];
431 		*auxdata = pushback_auxdata[num_pushbacks];
432 	}
433 	else
434 	{
435 		token = core_yylex(&auxdata->lval.core_yystype,
436 						   &auxdata->lloc,
437 						   yyscanner);
438 
439 		/* remember the length of yytext before it gets changed */
440 		yytext = core_yy.scanbuf + auxdata->lloc;
441 		auxdata->leng = strlen(yytext);
442 
443 		/* Check for << >> and #, which the core considers operators */
444 		if (token == Op)
445 		{
446 			if (strcmp(auxdata->lval.str, "<<") == 0)
447 				token = LESS_LESS;
448 			else if (strcmp(auxdata->lval.str, ">>") == 0)
449 				token = GREATER_GREATER;
450 			else if (strcmp(auxdata->lval.str, "#") == 0)
451 				token = '#';
452 		}
453 
454 		/* The core returns PARAM as ival, but we treat it like IDENT */
455 		else if (token == PARAM)
456 		{
457 			auxdata->lval.str = pstrdup(yytext);
458 		}
459 	}
460 
461 	return token;
462 }
463 
464 /*
465  * Push back a token to be re-read by next internal_yylex() call.
466  */
467 static void
push_back_token(int token,TokenAuxData * auxdata)468 push_back_token(int token, TokenAuxData *auxdata)
469 {
470 	if (num_pushbacks >= MAX_PUSHBACKS)
471 		elog(ERROR, "too many tokens pushed back");
472 	pushback_token[num_pushbacks] = token;
473 	pushback_auxdata[num_pushbacks] = *auxdata;
474 	num_pushbacks++;
475 }
476 
477 /*
478  * Push back a single token to be re-read by next plpgsql_yylex() call.
479  *
480  * NOTE: this does not cause yylval or yylloc to "back up".  Also, it
481  * is not a good idea to push back a token code other than what you read.
482  */
483 void
plpgsql_push_back_token(int token)484 plpgsql_push_back_token(int token)
485 {
486 	TokenAuxData auxdata;
487 
488 	auxdata.lval = plpgsql_yylval;
489 	auxdata.lloc = plpgsql_yylloc;
490 	auxdata.leng = plpgsql_yyleng;
491 	push_back_token(token, &auxdata);
492 }
493 
494 /*
495  * Tell whether a token is an unreserved keyword.
496  *
497  * (If it is, its lowercased form was returned as the token value, so we
498  * do not need to offer that data here.)
499  */
500 bool
plpgsql_token_is_unreserved_keyword(int token)501 plpgsql_token_is_unreserved_keyword(int token)
502 {
503 	int			i;
504 
505 	for (i = 0; i < num_unreserved_keywords; i++)
506 	{
507 		if (unreserved_keywords[i].value == token)
508 			return true;
509 	}
510 	return false;
511 }
512 
513 /*
514  * Append the function text starting at startlocation and extending to
515  * (not including) endlocation onto the existing contents of "buf".
516  */
517 void
plpgsql_append_source_text(StringInfo buf,int startlocation,int endlocation)518 plpgsql_append_source_text(StringInfo buf,
519 						   int startlocation, int endlocation)
520 {
521 	Assert(startlocation <= endlocation);
522 	appendBinaryStringInfo(buf, scanorig + startlocation,
523 						   endlocation - startlocation);
524 }
525 
526 /*
527  * Peek one token ahead in the input stream.  Only the token code is
528  * made available, not any of the auxiliary info such as location.
529  *
530  * NB: no variable or unreserved keyword lookup is performed here, they will
531  * be returned as IDENT. Reserved keywords are resolved as usual.
532  */
533 int
plpgsql_peek(void)534 plpgsql_peek(void)
535 {
536 	int			tok1;
537 	TokenAuxData aux1;
538 
539 	tok1 = internal_yylex(&aux1);
540 	push_back_token(tok1, &aux1);
541 	return tok1;
542 }
543 
544 /*
545  * Peek two tokens ahead in the input stream. The first token and its
546  * location in the query are returned in *tok1_p and *tok1_loc, second token
547  * and its location in *tok2_p and *tok2_loc.
548  *
549  * NB: no variable or unreserved keyword lookup is performed here, they will
550  * be returned as IDENT. Reserved keywords are resolved as usual.
551  */
552 void
plpgsql_peek2(int * tok1_p,int * tok2_p,int * tok1_loc,int * tok2_loc)553 plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
554 {
555 	int			tok1,
556 				tok2;
557 	TokenAuxData aux1,
558 				aux2;
559 
560 	tok1 = internal_yylex(&aux1);
561 	tok2 = internal_yylex(&aux2);
562 
563 	*tok1_p = tok1;
564 	if (tok1_loc)
565 		*tok1_loc = aux1.lloc;
566 	*tok2_p = tok2;
567 	if (tok2_loc)
568 		*tok2_loc = aux2.lloc;
569 
570 	push_back_token(tok2, &aux2);
571 	push_back_token(tok1, &aux1);
572 }
573 
574 /*
575  * plpgsql_scanner_errposition
576  *		Report an error cursor position, if possible.
577  *
578  * This is expected to be used within an ereport() call.  The return value
579  * is a dummy (always 0, in fact).
580  *
581  * Note that this can only be used for messages emitted during initial
582  * parsing of a plpgsql function, since it requires the scanorig string
583  * to still be available.
584  */
585 int
plpgsql_scanner_errposition(int location)586 plpgsql_scanner_errposition(int location)
587 {
588 	int			pos;
589 
590 	if (location < 0 || scanorig == NULL)
591 		return 0;				/* no-op if location is unknown */
592 
593 	/* Convert byte offset to character number */
594 	pos = pg_mbstrlen_with_len(scanorig, location) + 1;
595 	/* And pass it to the ereport mechanism */
596 	(void) internalerrposition(pos);
597 	/* Also pass the function body string */
598 	return internalerrquery(scanorig);
599 }
600 
601 /*
602  * plpgsql_yyerror
603  *		Report a lexer or grammar error.
604  *
605  * The message's cursor position refers to the current token (the one
606  * last returned by plpgsql_yylex()).
607  * This is OK for syntax error messages from the Bison parser, because Bison
608  * parsers report error as soon as the first unparsable token is reached.
609  * Beware of using yyerror for other purposes, as the cursor position might
610  * be misleading!
611  */
612 void
plpgsql_yyerror(const char * message)613 plpgsql_yyerror(const char *message)
614 {
615 	char	   *yytext = core_yy.scanbuf + plpgsql_yylloc;
616 
617 	if (*yytext == '\0')
618 	{
619 		ereport(ERROR,
620 				(errcode(ERRCODE_SYNTAX_ERROR),
621 		/* translator: %s is typically the translation of "syntax error" */
622 				 errmsg("%s at end of input", _(message)),
623 				 plpgsql_scanner_errposition(plpgsql_yylloc)));
624 	}
625 	else
626 	{
627 		/*
628 		 * If we have done any lookahead then flex will have restored the
629 		 * character after the end-of-token.  Zap it again so that we report
630 		 * only the single token here.  This modifies scanbuf but we no longer
631 		 * care about that.
632 		 */
633 		yytext[plpgsql_yyleng] = '\0';
634 
635 		ereport(ERROR,
636 				(errcode(ERRCODE_SYNTAX_ERROR),
637 		/* translator: first %s is typically the translation of "syntax error" */
638 				 errmsg("%s at or near \"%s\"", _(message), yytext),
639 				 plpgsql_scanner_errposition(plpgsql_yylloc)));
640 	}
641 }
642 
643 /*
644  * Given a location (a byte offset in the function source text),
645  * return a line number.
646  *
647  * We expect that this is typically called for a sequence of increasing
648  * location values, so optimize accordingly by tracking the endpoints
649  * of the "current" line.
650  */
651 int
plpgsql_location_to_lineno(int location)652 plpgsql_location_to_lineno(int location)
653 {
654 	const char *loc;
655 
656 	if (location < 0 || scanorig == NULL)
657 		return 0;				/* garbage in, garbage out */
658 	loc = scanorig + location;
659 
660 	/* be correct, but not fast, if input location goes backwards */
661 	if (loc < cur_line_start)
662 		location_lineno_init();
663 
664 	while (cur_line_end != NULL && loc > cur_line_end)
665 	{
666 		cur_line_start = cur_line_end + 1;
667 		cur_line_num++;
668 		cur_line_end = strchr(cur_line_start, '\n');
669 	}
670 
671 	return cur_line_num;
672 }
673 
674 /* initialize or reset the state for plpgsql_location_to_lineno */
675 static void
location_lineno_init(void)676 location_lineno_init(void)
677 {
678 	cur_line_start = scanorig;
679 	cur_line_num = 1;
680 
681 	cur_line_end = strchr(cur_line_start, '\n');
682 }
683 
684 /* return the most recently computed lineno */
685 int
plpgsql_latest_lineno(void)686 plpgsql_latest_lineno(void)
687 {
688 	return cur_line_num;
689 }
690 
691 
692 /*
693  * Called before any actual parsing is done
694  *
695  * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
696  * Although it is not fed directly to flex, we need the original string
697  * to cite in error messages.
698  */
699 void
plpgsql_scanner_init(const char * str)700 plpgsql_scanner_init(const char *str)
701 {
702 	/* Start up the core scanner */
703 	yyscanner = scanner_init(str, &core_yy,
704 							 reserved_keywords, num_reserved_keywords);
705 
706 	/*
707 	 * scanorig points to the original string, which unlike the scanner's
708 	 * scanbuf won't be modified on-the-fly by flex.  Notice that although
709 	 * yytext points into scanbuf, we rely on being able to apply locations
710 	 * (offsets from string start) to scanorig as well.
711 	 */
712 	scanorig = str;
713 
714 	/* Other setup */
715 	plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
716 	plpgsql_yytoken = 0;
717 
718 	num_pushbacks = 0;
719 
720 	location_lineno_init();
721 }
722 
723 /*
724  * Called after parsing is done to clean up after plpgsql_scanner_init()
725  */
726 void
plpgsql_scanner_finish(void)727 plpgsql_scanner_finish(void)
728 {
729 	/* release storage */
730 	scanner_finish(yyscanner);
731 	/* avoid leaving any dangling pointers */
732 	yyscanner = NULL;
733 	scanorig = NULL;
734 }
735