1 /*--------------------------------------------------------------------
2  * Symbols referenced in this file:
3  * - plpgsql_scanner_init
4  * - plpgsql_IdentifierLookup
5  * - yyscanner
6  * - core_yy
7  * - reserved_keywords
8  * - num_reserved_keywords
9  * - scanorig
10  * - plpgsql_yytoken
11  * - num_pushbacks
12  * - location_lineno_init
13  * - cur_line_start
14  * - cur_line_num
15  * - cur_line_end
16  * - plpgsql_yylex
17  * - internal_yylex
18  * - pushback_token
19  * - pushback_auxdata
20  * - push_back_token
21  * - unreserved_keywords
22  * - num_unreserved_keywords
23  * - plpgsql_yyleng
TypeSymbolEmitter()24  * - plpgsql_location_to_lineno
25  * - plpgsql_scanner_errposition
26  * - plpgsql_yyerror
27  * - plpgsql_push_back_token
28  * - plpgsql_token_is_unreserved_keyword
29  * - plpgsql_peek
30  * - plpgsql_append_source_text
31  * - plpgsql_peek2
32  * - plpgsql_scanner_finish
33  * - plpgsql_latest_lineno
34  *--------------------------------------------------------------------
35  */
36 
37 /*-------------------------------------------------------------------------
38  *
39  * pl_scanner.c
40  *	  lexical scanning for PL/pgSQL
41  *
42  *
43  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
44  * Portions Copyright (c) 1994, Regents of the University of California
45  *
46  *
47  * IDENTIFICATION
48  *	  src/pl/plpgsql/src/pl_scanner.c
49  *
50  *-------------------------------------------------------------------------
51  */
52 #include "postgres.h"
53 
54 #include "mb/pg_wchar.h"
55 #include "parser/scanner.h"
56 
57 #include "plpgsql.h"
58 #include "pl_gram.h"			/* must be after parser/scanner.h */
59 
60 
61 #define PG_KEYWORD(a,b,c) {a,b,c},
62 
63 
64 /* Klugy flag to tell scanner how to look up identifiers */
65 __thread IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
66 
67 
68 /*
69  * A word about keywords:
70  *
71  * We keep reserved and unreserved keywords in separate arrays.  The
72  * reserved keywords are passed to the core scanner, so they will be
73  * recognized before (and instead of) any variable name.  Unreserved words
74  * are checked for separately, usually after determining that the identifier
75  * isn't a known variable name.  If plpgsql_IdentifierLookup is DECLARE then
76  * no variable names will be recognized, so the unreserved words always work.
77  * (Note in particular that this helps us avoid reserving keywords that are
78  * only needed in DECLARE sections.)
79  *
80  * In certain contexts it is desirable to prefer recognizing an unreserved
81  * keyword over recognizing a variable name.  In particular, at the start
82  * of a statement we should prefer unreserved keywords unless the statement
83  * looks like an assignment (i.e., first token is followed by ':=' or '[').
84  * This rule allows most statement-introducing keywords to be kept unreserved.
85  * (We still have to reserve initial keywords that might follow a block
86  * label, unfortunately, since the method used to determine if we are at
87  * start of statement doesn't recognize such cases.  We'd also have to
88  * reserve any keyword that could legitimately be followed by ':=' or '['.)
89  * Some additional cases are handled in pl_gram.y using tok_is_keyword().
90  *
91  * We try to avoid reserving more keywords than we have to; but there's
92  * little point in not reserving a word if it's reserved in the core grammar.
93  * Currently, the following words are reserved here but not in the core:
94  * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
95  */
96 
97 /*
98  * Lists of keyword (name, token-value, category) entries.
99  *
100  * !!WARNING!!: These lists must be sorted by ASCII name, because binary
101  *		 search is used to locate entries.
102  *
103  * Be careful not to put the same word in both lists.  Also be sure that
104  * pl_gram.y's unreserved_keyword production agrees with the second list.
105  */
106 
107 static const ScanKeyword reserved_keywords[] = {
108 	PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
109 	PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
110 	PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
111 	PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
112 	PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
113 	PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
114 	PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
115 	PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
116 	PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
117 	PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
118 	PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
119 	PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
120 	PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
121 	PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
122 	PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
123 	PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
124 	PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
125 	PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
126 	PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
127 	PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
128 	PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
129 	PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
130 	PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
131 	PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
132 };
133 
134 static const int num_reserved_keywords = lengthof(reserved_keywords);
135 
136 static const ScanKeyword unreserved_keywords[] = {
137 	PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
138 	PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
139 	PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
140 	PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
141 	PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
142 	PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
143 	PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
144 	PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
145 	PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
146 	PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
147 	PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
148 	PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
149 	PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
150 	PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
151 	PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
152 	PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
153 	PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
154 	PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
155 	PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
156 	PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
157 	PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
158 	PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
159 	PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
160 	PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
161 	PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
162 	PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
163 	PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
164 	PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
165 	PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
166 	PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
167 	PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
168 	PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
169 	PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
170 	PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
171 	PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
172 	PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
173 	PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
174 	PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
175 	PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
176 	PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
177 	PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
178 	PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
179 	PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
180 	PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
181 	PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
182 	PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
183 	PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
184 	PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
185 	PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
186 	PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
187 	PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
188 	PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
189 	PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
190 	PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
191 	PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
192 	PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
193 	PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
194 	PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
195 	PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
196 	PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
197 	PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
198 	PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
199 	PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
200 	PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
201 	PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
202 	PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
203 	PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
204 	PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
205 	PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
206 	PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
207 	PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
208 	PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
209 	PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
210 	PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
211 	PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
212 	PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
213 };
214 
215 static const int num_unreserved_keywords = lengthof(unreserved_keywords);
216 
217 /*
218  * This macro must recognize all tokens that can immediately precede a
219  * PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the
220  * grammar).  Fortunately, there are not very many, so hard-coding in this
221  * fashion seems sufficient.
222  */
223 #define AT_STMT_START(prev_token) \
224 	((prev_token) == ';' || \
225 	 (prev_token) == K_BEGIN || \
226 	 (prev_token) == K_THEN || \
227 	 (prev_token) == K_ELSE || \
228 	 (prev_token) == K_LOOP)
229 
230 
231 /* Auxiliary data about a token (other than the token type) */
232 typedef struct
233 {
234 	YYSTYPE		lval;			/* semantic information */
235 	YYLTYPE		lloc;			/* offset in scanbuf */
236 	int			leng;			/* length in bytes */
237 } TokenAuxData;
238 
239 /*
240  * Scanner working state.  At some point we might wish to fold all this
241  * into a YY_EXTRA struct.  For the moment, there is no need for plpgsql's
242  * lexer to be re-entrant, and the notational burden of passing a yyscanner
243  * pointer around is great enough to not want to do it without need.
244  */
245 
246 /* The stuff the core lexer needs */
247 static __thread core_yyscan_t yyscanner = NULL;
248 
249 static __thread core_yy_extra_type core_yy;
250 
251 
252 /* The original input string */
253 static __thread const char *scanorig;
254 
255 
256 /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
257 static __thread int	plpgsql_yyleng;
258 
259 
260 /* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */
261 static __thread int	plpgsql_yytoken;
262 
263 
264 /* Token pushback stack */
265 #define MAX_PUSHBACKS 4
266 
267 static __thread int	num_pushbacks;
268 
269 static __thread int	pushback_token[MAX_PUSHBACKS];
270 
271 static __thread TokenAuxData pushback_auxdata[MAX_PUSHBACKS];
272 
273 
274 /* State for plpgsql_location_to_lineno() */
275 static __thread const char *cur_line_start;
276 
277 static __thread const char *cur_line_end;
278 
279 static __thread int	cur_line_num;
280 
281 
282 /* Internal functions */
283 static int	internal_yylex(TokenAuxData *auxdata);
284 static void push_back_token(int token, TokenAuxData *auxdata);
285 static void location_lineno_init(void);
286 
287 
288 /*
289  * This is the yylex routine called from the PL/pgSQL grammar.
290  * It is a wrapper around the core lexer, with the ability to recognize
291  * PL/pgSQL variables and return them as special T_DATUM tokens.  If a
292  * word or compound word does not match any variable name, or if matching
293  * is turned off by plpgsql_IdentifierLookup, it is returned as
294  * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
295  * matches one of those.
296  */
297 int
298 plpgsql_yylex(void)
299 {
300 	int			tok1;
301 	TokenAuxData aux1;
302 	const ScanKeyword *kw;
303 
304 	tok1 = internal_yylex(&aux1);
305 	if (tok1 == IDENT || tok1 == PARAM)
306 	{
307 		int			tok2;
308 		TokenAuxData aux2;
309 
310 		tok2 = internal_yylex(&aux2);
311 		if (tok2 == '.')
312 		{
313 			int			tok3;
314 			TokenAuxData aux3;
315 
316 			tok3 = internal_yylex(&aux3);
317 			if (tok3 == IDENT)
318 			{
319 				int			tok4;
320 				TokenAuxData aux4;
321 
322 				tok4 = internal_yylex(&aux4);
323 				if (tok4 == '.')
324 				{
325 					int			tok5;
326 					TokenAuxData aux5;
327 
328 					tok5 = internal_yylex(&aux5);
329 					if (tok5 == IDENT)
330 					{
331 						if (plpgsql_parse_tripword(aux1.lval.str,
332 												   aux3.lval.str,
333 												   aux5.lval.str,
334 												   &aux1.lval.wdatum,
335 												   &aux1.lval.cword))
336 							tok1 = T_DATUM;
337 						else
338 							tok1 = T_CWORD;
339 					}
340 					else
341 					{
342 						/* not A.B.C, so just process A.B */
343 						push_back_token(tok5, &aux5);
344 						push_back_token(tok4, &aux4);
345 						if (plpgsql_parse_dblword(aux1.lval.str,
346 												  aux3.lval.str,
347 												  &aux1.lval.wdatum,
348 												  &aux1.lval.cword))
349 							tok1 = T_DATUM;
350 						else
351 							tok1 = T_CWORD;
352 					}
353 				}
354 				else
355 				{
356 					/* not A.B.C, so just process A.B */
357 					push_back_token(tok4, &aux4);
358 					if (plpgsql_parse_dblword(aux1.lval.str,
359 											  aux3.lval.str,
360 											  &aux1.lval.wdatum,
361 											  &aux1.lval.cword))
362 						tok1 = T_DATUM;
363 					else
364 						tok1 = T_CWORD;
365 				}
366 			}
367 			else
368 			{
369 				/* not A.B, so just process A */
370 				push_back_token(tok3, &aux3);
371 				push_back_token(tok2, &aux2);
372 				if (plpgsql_parse_word(aux1.lval.str,
373 									   core_yy.scanbuf + aux1.lloc,
374 									   &aux1.lval.wdatum,
375 									   &aux1.lval.word))
376 					tok1 = T_DATUM;
377 				else if (!aux1.lval.word.quoted &&
378 						 (kw = ScanKeywordLookup(aux1.lval.word.ident,
379 												 unreserved_keywords,
380 												 num_unreserved_keywords)))
381 				{
382 					aux1.lval.keyword = kw->name;
383 					tok1 = kw->value;
384 				}
385 				else
386 					tok1 = T_WORD;
387 			}
388 		}
389 		else
390 		{
391 			/* not A.B, so just process A */
392 			push_back_token(tok2, &aux2);
393 
394 			/*
395 			 * If we are at start of statement, prefer unreserved keywords
396 			 * over variable names, unless the next token is assignment or
397 			 * '[', in which case prefer variable names.  (Note we need not
398 			 * consider '.' as the next token; that case was handled above,
399 			 * and we always prefer variable names in that case.)  If we are
400 			 * not at start of statement, always prefer variable names over
401 			 * unreserved keywords.
402 			 */
403 			if (AT_STMT_START(plpgsql_yytoken) &&
404 				!(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
405 			{
406 				/* try for unreserved keyword, then for variable name */
407 				if (core_yy.scanbuf[aux1.lloc] != '"' &&
408 					(kw = ScanKeywordLookup(aux1.lval.str,
409 											unreserved_keywords,
410 											num_unreserved_keywords)))
411 				{
412 					aux1.lval.keyword = kw->name;
413 					tok1 = kw->value;
414 				}
415 				else if (plpgsql_parse_word(aux1.lval.str,
416 											core_yy.scanbuf + aux1.lloc,
417 											&aux1.lval.wdatum,
418 											&aux1.lval.word))
419 					tok1 = T_DATUM;
420 				else
421 					tok1 = T_WORD;
422 			}
423 			else
424 			{
425 				/* try for variable name, then for unreserved keyword */
426 				if (plpgsql_parse_word(aux1.lval.str,
427 									   core_yy.scanbuf + aux1.lloc,
428 									   &aux1.lval.wdatum,
429 									   &aux1.lval.word))
430 					tok1 = T_DATUM;
431 				else if (!aux1.lval.word.quoted &&
432 						 (kw = ScanKeywordLookup(aux1.lval.word.ident,
433 												 unreserved_keywords,
434 												 num_unreserved_keywords)))
435 				{
436 					aux1.lval.keyword = kw->name;
437 					tok1 = kw->value;
438 				}
439 				else
440 					tok1 = T_WORD;
441 			}
442 		}
443 	}
444 	else
445 	{
446 		/*
447 		 * Not a potential plpgsql variable name, just return the data.
448 		 *
449 		 * Note that we also come through here if the grammar pushed back a
450 		 * T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a
451 		 * previous lookup cycle; thus, pushbacks do not incur extra lookup
452 		 * work, since we'll never do the above code twice for the same token.
453 		 * This property also makes it safe to rely on the old value of
454 		 * plpgsql_yytoken in the is-this-start-of-statement test above.
455 		 */
456 	}
457 
458 	plpgsql_yylval = aux1.lval;
459 	plpgsql_yylloc = aux1.lloc;
460 	plpgsql_yyleng = aux1.leng;
461 	plpgsql_yytoken = tok1;
462 	return tok1;
463 }
464 
465 /*
466  * Internal yylex function.  This wraps the core lexer and adds one feature:
467  * a token pushback stack.  We also make a couple of trivial single-token
468  * translations from what the core lexer does to what we want, in particular
469  * interfacing from the core_YYSTYPE to YYSTYPE union.
470  */
471 static int
472 internal_yylex(TokenAuxData *auxdata)
473 {
474 	int			token;
475 	const char *yytext;
476 
477 	if (num_pushbacks > 0)
478 	{
479 		num_pushbacks--;
480 		token = pushback_token[num_pushbacks];
481 		*auxdata = pushback_auxdata[num_pushbacks];
482 	}
483 	else
484 	{
485 		token = core_yylex(&auxdata->lval.core_yystype,
486 						   &auxdata->lloc,
487 						   yyscanner);
488 
489 		/* remember the length of yytext before it gets changed */
490 		yytext = core_yy.scanbuf + auxdata->lloc;
491 		auxdata->leng = strlen(yytext);
492 
493 		/* Check for << >> and #, which the core considers operators */
494 		if (token == Op)
495 		{
496 			if (strcmp(auxdata->lval.str, "<<") == 0)
497 				token = LESS_LESS;
498 			else if (strcmp(auxdata->lval.str, ">>") == 0)
499 				token = GREATER_GREATER;
500 			else if (strcmp(auxdata->lval.str, "#") == 0)
501 				token = '#';
502 		}
503 
504 		/* The core returns PARAM as ival, but we treat it like IDENT */
505 		else if (token == PARAM)
506 		{
507 			auxdata->lval.str = pstrdup(yytext);
508 		}
509 	}
510 
511 	return token;
512 }
513 
514 /*
515  * Push back a token to be re-read by next internal_yylex() call.
516  */
517 static void
518 push_back_token(int token, TokenAuxData *auxdata)
519 {
520 	if (num_pushbacks >= MAX_PUSHBACKS)
521 		elog(ERROR, "too many tokens pushed back");
522 	pushback_token[num_pushbacks] = token;
523 	pushback_auxdata[num_pushbacks] = *auxdata;
524 	num_pushbacks++;
525 }
526 
527 /*
528  * Push back a single token to be re-read by next plpgsql_yylex() call.
529  *
530  * NOTE: this does not cause yylval or yylloc to "back up".  Also, it
531  * is not a good idea to push back a token code other than what you read.
532  */
533 void
534 plpgsql_push_back_token(int token)
535 {
536 	TokenAuxData auxdata;
537 
538 	auxdata.lval = plpgsql_yylval;
539 	auxdata.lloc = plpgsql_yylloc;
540 	auxdata.leng = plpgsql_yyleng;
541 	push_back_token(token, &auxdata);
542 }
543 
544 /*
545  * Tell whether a token is an unreserved keyword.
546  *
547  * (If it is, its lowercased form was returned as the token value, so we
548  * do not need to offer that data here.)
549  */
550 bool
551 plpgsql_token_is_unreserved_keyword(int token)
552 {
553 	int			i;
554 
555 	for (i = 0; i < num_unreserved_keywords; i++)
556 	{
557 		if (unreserved_keywords[i].value == token)
558 			return true;
559 	}
560 	return false;
561 }
562 
563 /*
564  * Append the function text starting at startlocation and extending to
565  * (not including) endlocation onto the existing contents of "buf".
566  */
567 void
568 plpgsql_append_source_text(StringInfo buf,
569 						   int startlocation, int endlocation)
570 {
571 	Assert(startlocation <= endlocation);
572 	appendBinaryStringInfo(buf, scanorig + startlocation,
573 						   endlocation - startlocation);
574 }
575 
576 /*
577  * Peek one token ahead in the input stream.  Only the token code is
578  * made available, not any of the auxiliary info such as location.
579  *
580  * NB: no variable or unreserved keyword lookup is performed here, they will
581  * be returned as IDENT. Reserved keywords are resolved as usual.
582  */
583 int
584 plpgsql_peek(void)
585 {
586 	int			tok1;
587 	TokenAuxData aux1;
588 
589 	tok1 = internal_yylex(&aux1);
590 	push_back_token(tok1, &aux1);
591 	return tok1;
592 }
593 
594 /*
595  * Peek two tokens ahead in the input stream. The first token and its
596  * location in the query are returned in *tok1_p and *tok1_loc, second token
597  * and its location in *tok2_p and *tok2_loc.
598  *
599  * NB: no variable or unreserved keyword lookup is performed here, they will
600  * be returned as IDENT. Reserved keywords are resolved as usual.
601  */
602 void
603 plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
604 {
605 	int			tok1,
606 				tok2;
607 	TokenAuxData aux1,
608 				aux2;
609 
610 	tok1 = internal_yylex(&aux1);
611 	tok2 = internal_yylex(&aux2);
612 
613 	*tok1_p = tok1;
614 	if (tok1_loc)
615 		*tok1_loc = aux1.lloc;
616 	*tok2_p = tok2;
617 	if (tok2_loc)
618 		*tok2_loc = aux2.lloc;
619 
620 	push_back_token(tok2, &aux2);
621 	push_back_token(tok1, &aux1);
622 }
623 
624 /*
625  * plpgsql_scanner_errposition
626  *		Report an error cursor position, if possible.
627  *
628  * This is expected to be used within an ereport() call.  The return value
629  * is a dummy (always 0, in fact).
630  *
631  * Note that this can only be used for messages emitted during initial
632  * parsing of a plpgsql function, since it requires the scanorig string
633  * to still be available.
634  */
635 int
636 plpgsql_scanner_errposition(int location)
637 {
638 	int			pos;
639 
640 	if (location < 0 || scanorig == NULL)
641 		return 0;				/* no-op if location is unknown */
642 
643 	/* Convert byte offset to character number */
644 	pos = pg_mbstrlen_with_len(scanorig, location) + 1;
645 	/* And pass it to the ereport mechanism */
646 	(void) internalerrposition(pos);
647 	/* Also pass the function body string */
648 	return internalerrquery(scanorig);
649 }
650 
651 /*
652  * plpgsql_yyerror
653  *		Report a lexer or grammar error.
654  *
655  * The message's cursor position refers to the current token (the one
656  * last returned by plpgsql_yylex()).
657  * This is OK for syntax error messages from the Bison parser, because Bison
658  * parsers report error as soon as the first unparsable token is reached.
659  * Beware of using yyerror for other purposes, as the cursor position might
660  * be misleading!
661  */
662 void
663 plpgsql_yyerror(const char *message)
664 {
665 	char	   *yytext = core_yy.scanbuf + plpgsql_yylloc;
666 
667 	if (*yytext == '\0')
668 	{
669 		ereport(ERROR,
670 				(errcode(ERRCODE_SYNTAX_ERROR),
671 		/* translator: %s is typically the translation of "syntax error" */
672 				 errmsg("%s at end of input", _(message)),
673 				 plpgsql_scanner_errposition(plpgsql_yylloc)));
674 	}
675 	else
676 	{
677 		/*
678 		 * If we have done any lookahead then flex will have restored the
679 		 * character after the end-of-token.  Zap it again so that we report
680 		 * only the single token here.  This modifies scanbuf but we no longer
681 		 * care about that.
682 		 */
683 		yytext[plpgsql_yyleng] = '\0';
684 
685 		ereport(ERROR,
686 				(errcode(ERRCODE_SYNTAX_ERROR),
687 		/* translator: first %s is typically the translation of "syntax error" */
688 				 errmsg("%s at or near \"%s\"", _(message), yytext),
689 				 plpgsql_scanner_errposition(plpgsql_yylloc)));
690 	}
691 }
692 
693 /*
694  * Given a location (a byte offset in the function source text),
695  * return a line number.
696  *
697  * We expect that this is typically called for a sequence of increasing
698  * location values, so optimize accordingly by tracking the endpoints
699  * of the "current" line.
700  */
701 int
702 plpgsql_location_to_lineno(int location)
703 {
704 	const char *loc;
705 
706 	if (location < 0 || scanorig == NULL)
707 		return 0;				/* garbage in, garbage out */
708 	loc = scanorig + location;
709 
710 	/* be correct, but not fast, if input location goes backwards */
711 	if (loc < cur_line_start)
712 		location_lineno_init();
713 
714 	while (cur_line_end != NULL && loc > cur_line_end)
715 	{
716 		cur_line_start = cur_line_end + 1;
717 		cur_line_num++;
718 		cur_line_end = strchr(cur_line_start, '\n');
719 	}
720 
721 	return cur_line_num;
722 }
723 
724 /* initialize or reset the state for plpgsql_location_to_lineno */
725 static void
726 location_lineno_init(void)
727 {
728 	cur_line_start = scanorig;
729 	cur_line_num = 1;
730 
731 	cur_line_end = strchr(cur_line_start, '\n');
732 }
733 
734 /* return the most recently computed lineno */
735 int
736 plpgsql_latest_lineno(void)
737 {
738 	return cur_line_num;
739 }
740 
741 
742 /*
743  * Called before any actual parsing is done
744  *
745  * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
746  * Although it is not fed directly to flex, we need the original string
747  * to cite in error messages.
748  */
749 void
750 plpgsql_scanner_init(const char *str)
751 {
752 	/* Start up the core scanner */
753 	yyscanner = scanner_init(str, &core_yy,
754 							 reserved_keywords, num_reserved_keywords);
755 
756 	/*
757 	 * scanorig points to the original string, which unlike the scanner's
758 	 * scanbuf won't be modified on-the-fly by flex.  Notice that although
759 	 * yytext points into scanbuf, we rely on being able to apply locations
760 	 * (offsets from string start) to scanorig as well.
761 	 */
762 	scanorig = str;
763 
764 	/* Other setup */
765 	plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
766 	plpgsql_yytoken = 0;
767 
768 	num_pushbacks = 0;
769 
770 	location_lineno_init();
771 }
772 
773 /*
774  * Called after parsing is done to clean up after plpgsql_scanner_init()
775  */
776 void
777 plpgsql_scanner_finish(void)
778 {
779 	/* release storage */
780 	scanner_finish(yyscanner);
781 	/* avoid leaving any dangling pointers */
782 	yyscanner = NULL;
783 	scanorig = NULL;
784 }
785