1 /*-------------------------------------------------------------------------
2 *
3 * pl_scanner.c
4 * lexical scanning for PL/pgSQL
5 *
6 *
7 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 *
11 * IDENTIFICATION
12 * src/pl/plpgsql/src/pl_scanner.c
13 *
14 *-------------------------------------------------------------------------
15 */
16 #include "postgres.h"
17
18 #include "mb/pg_wchar.h"
19 #include "parser/scanner.h"
20
21 #include "plpgsql.h"
22 #include "pl_gram.h" /* must be after parser/scanner.h */
23
24
25 #define PG_KEYWORD(a,b,c) {a,b,c},
26
27
28 /* Klugy flag to tell scanner how to look up identifiers */
29 IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
30
31 /*
32 * A word about keywords:
33 *
34 * We keep reserved and unreserved keywords in separate arrays. The
35 * reserved keywords are passed to the core scanner, so they will be
36 * recognized before (and instead of) any variable name. Unreserved words
37 * are checked for separately, usually after determining that the identifier
38 * isn't a known variable name. If plpgsql_IdentifierLookup is DECLARE then
39 * no variable names will be recognized, so the unreserved words always work.
40 * (Note in particular that this helps us avoid reserving keywords that are
41 * only needed in DECLARE sections.)
42 *
43 * In certain contexts it is desirable to prefer recognizing an unreserved
44 * keyword over recognizing a variable name. In particular, at the start
45 * of a statement we should prefer unreserved keywords unless the statement
46 * looks like an assignment (i.e., first token is followed by ':=' or '[').
47 * This rule allows most statement-introducing keywords to be kept unreserved.
48 * (We still have to reserve initial keywords that might follow a block
49 * label, unfortunately, since the method used to determine if we are at
50 * start of statement doesn't recognize such cases. We'd also have to
51 * reserve any keyword that could legitimately be followed by ':=' or '['.)
52 * Some additional cases are handled in pl_gram.y using tok_is_keyword().
53 *
54 * We try to avoid reserving more keywords than we have to; but there's
55 * little point in not reserving a word if it's reserved in the core grammar.
56 * Currently, the following words are reserved here but not in the core:
57 * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
58 */
59
60 /*
61 * Lists of keyword (name, token-value, category) entries.
62 *
63 * !!WARNING!!: These lists must be sorted by ASCII name, because binary
64 * search is used to locate entries.
65 *
66 * Be careful not to put the same word in both lists. Also be sure that
67 * pl_gram.y's unreserved_keyword production agrees with the second list.
68 */
69
70 static const ScanKeyword reserved_keywords[] = {
71 PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
72 PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
73 PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
74 PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
75 PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
76 PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
77 PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
78 PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
79 PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
80 PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
81 PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
82 PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
83 PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
84 PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
85 PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
86 PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
87 PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
88 PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
89 PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
90 PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
91 PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
92 PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
93 PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
94 PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
95 };
96
97 static const int num_reserved_keywords = lengthof(reserved_keywords);
98
99 static const ScanKeyword unreserved_keywords[] = {
100 PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
101 PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
102 PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
103 PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
104 PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
105 PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
106 PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
107 PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
108 PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
109 PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
110 PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
111 PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
112 PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
113 PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
114 PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
115 PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
116 PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
117 PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
118 PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
119 PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
120 PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
121 PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
122 PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
123 PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
124 PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
125 PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
126 PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
127 PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
128 PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
129 PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
130 PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
131 PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
132 PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
133 PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
134 PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
135 PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
136 PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
137 PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
138 PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
139 PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
140 PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
141 PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
142 PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
143 PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
144 PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
145 PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
146 PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
147 PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
148 PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
149 PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
150 PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
151 PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
152 PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
153 PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
154 PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
155 PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
156 PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
157 PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
158 PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
159 PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
160 PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
161 PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
162 PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
163 PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
164 PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
165 PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
166 PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
167 PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
168 PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
169 PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
170 PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
171 PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
172 PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
173 PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
174 PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
175 PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
176 };
177
178 static const int num_unreserved_keywords = lengthof(unreserved_keywords);
179
180 /*
181 * This macro must recognize all tokens that can immediately precede a
182 * PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the
183 * grammar). Fortunately, there are not very many, so hard-coding in this
184 * fashion seems sufficient.
185 */
186 #define AT_STMT_START(prev_token) \
187 ((prev_token) == ';' || \
188 (prev_token) == K_BEGIN || \
189 (prev_token) == K_THEN || \
190 (prev_token) == K_ELSE || \
191 (prev_token) == K_LOOP)
192
193
194 /* Auxiliary data about a token (other than the token type) */
195 typedef struct
196 {
197 YYSTYPE lval; /* semantic information */
198 YYLTYPE lloc; /* offset in scanbuf */
199 int leng; /* length in bytes */
200 } TokenAuxData;
201
202 /*
203 * Scanner working state. At some point we might wish to fold all this
204 * into a YY_EXTRA struct. For the moment, there is no need for plpgsql's
205 * lexer to be re-entrant, and the notational burden of passing a yyscanner
206 * pointer around is great enough to not want to do it without need.
207 */
208
209 /* The stuff the core lexer needs */
210 static core_yyscan_t yyscanner = NULL;
211 static core_yy_extra_type core_yy;
212
213 /* The original input string */
214 static const char *scanorig;
215
216 /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
217 static int plpgsql_yyleng;
218
219 /* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */
220 static int plpgsql_yytoken;
221
222 /* Token pushback stack */
223 #define MAX_PUSHBACKS 4
224
225 static int num_pushbacks;
226 static int pushback_token[MAX_PUSHBACKS];
227 static TokenAuxData pushback_auxdata[MAX_PUSHBACKS];
228
229 /* State for plpgsql_location_to_lineno() */
230 static const char *cur_line_start;
231 static const char *cur_line_end;
232 static int cur_line_num;
233
234 /* Internal functions */
235 static int internal_yylex(TokenAuxData *auxdata);
236 static void push_back_token(int token, TokenAuxData *auxdata);
237 static void location_lineno_init(void);
238
239
240 /*
241 * This is the yylex routine called from the PL/pgSQL grammar.
242 * It is a wrapper around the core lexer, with the ability to recognize
243 * PL/pgSQL variables and return them as special T_DATUM tokens. If a
244 * word or compound word does not match any variable name, or if matching
245 * is turned off by plpgsql_IdentifierLookup, it is returned as
246 * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
247 * matches one of those.
248 */
249 int
plpgsql_yylex(void)250 plpgsql_yylex(void)
251 {
252 int tok1;
253 TokenAuxData aux1;
254 const ScanKeyword *kw;
255
256 tok1 = internal_yylex(&aux1);
257 if (tok1 == IDENT || tok1 == PARAM)
258 {
259 int tok2;
260 TokenAuxData aux2;
261
262 tok2 = internal_yylex(&aux2);
263 if (tok2 == '.')
264 {
265 int tok3;
266 TokenAuxData aux3;
267
268 tok3 = internal_yylex(&aux3);
269 if (tok3 == IDENT)
270 {
271 int tok4;
272 TokenAuxData aux4;
273
274 tok4 = internal_yylex(&aux4);
275 if (tok4 == '.')
276 {
277 int tok5;
278 TokenAuxData aux5;
279
280 tok5 = internal_yylex(&aux5);
281 if (tok5 == IDENT)
282 {
283 if (plpgsql_parse_tripword(aux1.lval.str,
284 aux3.lval.str,
285 aux5.lval.str,
286 &aux1.lval.wdatum,
287 &aux1.lval.cword))
288 tok1 = T_DATUM;
289 else
290 tok1 = T_CWORD;
291 }
292 else
293 {
294 /* not A.B.C, so just process A.B */
295 push_back_token(tok5, &aux5);
296 push_back_token(tok4, &aux4);
297 if (plpgsql_parse_dblword(aux1.lval.str,
298 aux3.lval.str,
299 &aux1.lval.wdatum,
300 &aux1.lval.cword))
301 tok1 = T_DATUM;
302 else
303 tok1 = T_CWORD;
304 }
305 }
306 else
307 {
308 /* not A.B.C, so just process A.B */
309 push_back_token(tok4, &aux4);
310 if (plpgsql_parse_dblword(aux1.lval.str,
311 aux3.lval.str,
312 &aux1.lval.wdatum,
313 &aux1.lval.cword))
314 tok1 = T_DATUM;
315 else
316 tok1 = T_CWORD;
317 }
318 }
319 else
320 {
321 /* not A.B, so just process A */
322 push_back_token(tok3, &aux3);
323 push_back_token(tok2, &aux2);
324 if (plpgsql_parse_word(aux1.lval.str,
325 core_yy.scanbuf + aux1.lloc,
326 &aux1.lval.wdatum,
327 &aux1.lval.word))
328 tok1 = T_DATUM;
329 else if (!aux1.lval.word.quoted &&
330 (kw = ScanKeywordLookup(aux1.lval.word.ident,
331 unreserved_keywords,
332 num_unreserved_keywords)))
333 {
334 aux1.lval.keyword = kw->name;
335 tok1 = kw->value;
336 }
337 else
338 tok1 = T_WORD;
339 }
340 }
341 else
342 {
343 /* not A.B, so just process A */
344 push_back_token(tok2, &aux2);
345
346 /*
347 * If we are at start of statement, prefer unreserved keywords
348 * over variable names, unless the next token is assignment or
349 * '[', in which case prefer variable names. (Note we need not
350 * consider '.' as the next token; that case was handled above,
351 * and we always prefer variable names in that case.) If we are
352 * not at start of statement, always prefer variable names over
353 * unreserved keywords.
354 */
355 if (AT_STMT_START(plpgsql_yytoken) &&
356 !(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
357 {
358 /* try for unreserved keyword, then for variable name */
359 if (core_yy.scanbuf[aux1.lloc] != '"' &&
360 (kw = ScanKeywordLookup(aux1.lval.str,
361 unreserved_keywords,
362 num_unreserved_keywords)))
363 {
364 aux1.lval.keyword = kw->name;
365 tok1 = kw->value;
366 }
367 else if (plpgsql_parse_word(aux1.lval.str,
368 core_yy.scanbuf + aux1.lloc,
369 &aux1.lval.wdatum,
370 &aux1.lval.word))
371 tok1 = T_DATUM;
372 else
373 tok1 = T_WORD;
374 }
375 else
376 {
377 /* try for variable name, then for unreserved keyword */
378 if (plpgsql_parse_word(aux1.lval.str,
379 core_yy.scanbuf + aux1.lloc,
380 &aux1.lval.wdatum,
381 &aux1.lval.word))
382 tok1 = T_DATUM;
383 else if (!aux1.lval.word.quoted &&
384 (kw = ScanKeywordLookup(aux1.lval.word.ident,
385 unreserved_keywords,
386 num_unreserved_keywords)))
387 {
388 aux1.lval.keyword = kw->name;
389 tok1 = kw->value;
390 }
391 else
392 tok1 = T_WORD;
393 }
394 }
395 }
396 else
397 {
398 /*
399 * Not a potential plpgsql variable name, just return the data.
400 *
401 * Note that we also come through here if the grammar pushed back a
402 * T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a
403 * previous lookup cycle; thus, pushbacks do not incur extra lookup
404 * work, since we'll never do the above code twice for the same token.
405 * This property also makes it safe to rely on the old value of
406 * plpgsql_yytoken in the is-this-start-of-statement test above.
407 */
408 }
409
410 plpgsql_yylval = aux1.lval;
411 plpgsql_yylloc = aux1.lloc;
412 plpgsql_yyleng = aux1.leng;
413 plpgsql_yytoken = tok1;
414 return tok1;
415 }
416
417 /*
418 * Internal yylex function. This wraps the core lexer and adds one feature:
419 * a token pushback stack. We also make a couple of trivial single-token
420 * translations from what the core lexer does to what we want, in particular
421 * interfacing from the core_YYSTYPE to YYSTYPE union.
422 */
423 static int
internal_yylex(TokenAuxData * auxdata)424 internal_yylex(TokenAuxData *auxdata)
425 {
426 int token;
427 const char *yytext;
428
429 if (num_pushbacks > 0)
430 {
431 num_pushbacks--;
432 token = pushback_token[num_pushbacks];
433 *auxdata = pushback_auxdata[num_pushbacks];
434 }
435 else
436 {
437 token = core_yylex(&auxdata->lval.core_yystype,
438 &auxdata->lloc,
439 yyscanner);
440
441 /* remember the length of yytext before it gets changed */
442 yytext = core_yy.scanbuf + auxdata->lloc;
443 auxdata->leng = strlen(yytext);
444
445 /* Check for << >> and #, which the core considers operators */
446 if (token == Op)
447 {
448 if (strcmp(auxdata->lval.str, "<<") == 0)
449 token = LESS_LESS;
450 else if (strcmp(auxdata->lval.str, ">>") == 0)
451 token = GREATER_GREATER;
452 else if (strcmp(auxdata->lval.str, "#") == 0)
453 token = '#';
454 }
455
456 /* The core returns PARAM as ival, but we treat it like IDENT */
457 else if (token == PARAM)
458 {
459 auxdata->lval.str = pstrdup(yytext);
460 }
461 }
462
463 return token;
464 }
465
466 /*
467 * Push back a token to be re-read by next internal_yylex() call.
468 */
469 static void
push_back_token(int token,TokenAuxData * auxdata)470 push_back_token(int token, TokenAuxData *auxdata)
471 {
472 if (num_pushbacks >= MAX_PUSHBACKS)
473 elog(ERROR, "too many tokens pushed back");
474 pushback_token[num_pushbacks] = token;
475 pushback_auxdata[num_pushbacks] = *auxdata;
476 num_pushbacks++;
477 }
478
479 /*
480 * Push back a single token to be re-read by next plpgsql_yylex() call.
481 *
482 * NOTE: this does not cause yylval or yylloc to "back up". Also, it
483 * is not a good idea to push back a token code other than what you read.
484 */
485 void
plpgsql_push_back_token(int token)486 plpgsql_push_back_token(int token)
487 {
488 TokenAuxData auxdata;
489
490 auxdata.lval = plpgsql_yylval;
491 auxdata.lloc = plpgsql_yylloc;
492 auxdata.leng = plpgsql_yyleng;
493 push_back_token(token, &auxdata);
494 }
495
496 /*
497 * Tell whether a token is an unreserved keyword.
498 *
499 * (If it is, its lowercased form was returned as the token value, so we
500 * do not need to offer that data here.)
501 */
502 bool
plpgsql_token_is_unreserved_keyword(int token)503 plpgsql_token_is_unreserved_keyword(int token)
504 {
505 int i;
506
507 for (i = 0; i < num_unreserved_keywords; i++)
508 {
509 if (unreserved_keywords[i].value == token)
510 return true;
511 }
512 return false;
513 }
514
515 /*
516 * Append the function text starting at startlocation and extending to
517 * (not including) endlocation onto the existing contents of "buf".
518 */
519 void
plpgsql_append_source_text(StringInfo buf,int startlocation,int endlocation)520 plpgsql_append_source_text(StringInfo buf,
521 int startlocation, int endlocation)
522 {
523 Assert(startlocation <= endlocation);
524 appendBinaryStringInfo(buf, scanorig + startlocation,
525 endlocation - startlocation);
526 }
527
528 /*
529 * Peek one token ahead in the input stream. Only the token code is
530 * made available, not any of the auxiliary info such as location.
531 *
532 * NB: no variable or unreserved keyword lookup is performed here, they will
533 * be returned as IDENT. Reserved keywords are resolved as usual.
534 */
535 int
plpgsql_peek(void)536 plpgsql_peek(void)
537 {
538 int tok1;
539 TokenAuxData aux1;
540
541 tok1 = internal_yylex(&aux1);
542 push_back_token(tok1, &aux1);
543 return tok1;
544 }
545
546 /*
547 * Peek two tokens ahead in the input stream. The first token and its
548 * location in the query are returned in *tok1_p and *tok1_loc, second token
549 * and its location in *tok2_p and *tok2_loc.
550 *
551 * NB: no variable or unreserved keyword lookup is performed here, they will
552 * be returned as IDENT. Reserved keywords are resolved as usual.
553 */
554 void
plpgsql_peek2(int * tok1_p,int * tok2_p,int * tok1_loc,int * tok2_loc)555 plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
556 {
557 int tok1,
558 tok2;
559 TokenAuxData aux1,
560 aux2;
561
562 tok1 = internal_yylex(&aux1);
563 tok2 = internal_yylex(&aux2);
564
565 *tok1_p = tok1;
566 if (tok1_loc)
567 *tok1_loc = aux1.lloc;
568 *tok2_p = tok2;
569 if (tok2_loc)
570 *tok2_loc = aux2.lloc;
571
572 push_back_token(tok2, &aux2);
573 push_back_token(tok1, &aux1);
574 }
575
576 /*
577 * plpgsql_scanner_errposition
578 * Report an error cursor position, if possible.
579 *
580 * This is expected to be used within an ereport() call. The return value
581 * is a dummy (always 0, in fact).
582 *
583 * Note that this can only be used for messages emitted during initial
584 * parsing of a plpgsql function, since it requires the scanorig string
585 * to still be available.
586 */
587 int
plpgsql_scanner_errposition(int location)588 plpgsql_scanner_errposition(int location)
589 {
590 int pos;
591
592 if (location < 0 || scanorig == NULL)
593 return 0; /* no-op if location is unknown */
594
595 /* Convert byte offset to character number */
596 pos = pg_mbstrlen_with_len(scanorig, location) + 1;
597 /* And pass it to the ereport mechanism */
598 (void) internalerrposition(pos);
599 /* Also pass the function body string */
600 return internalerrquery(scanorig);
601 }
602
603 /*
604 * plpgsql_yyerror
605 * Report a lexer or grammar error.
606 *
607 * The message's cursor position refers to the current token (the one
608 * last returned by plpgsql_yylex()).
609 * This is OK for syntax error messages from the Bison parser, because Bison
610 * parsers report error as soon as the first unparsable token is reached.
611 * Beware of using yyerror for other purposes, as the cursor position might
612 * be misleading!
613 */
614 void
plpgsql_yyerror(const char * message)615 plpgsql_yyerror(const char *message)
616 {
617 char *yytext = core_yy.scanbuf + plpgsql_yylloc;
618
619 if (*yytext == '\0')
620 {
621 ereport(ERROR,
622 (errcode(ERRCODE_SYNTAX_ERROR),
623 /* translator: %s is typically the translation of "syntax error" */
624 errmsg("%s at end of input", _(message)),
625 plpgsql_scanner_errposition(plpgsql_yylloc)));
626 }
627 else
628 {
629 /*
630 * If we have done any lookahead then flex will have restored the
631 * character after the end-of-token. Zap it again so that we report
632 * only the single token here. This modifies scanbuf but we no longer
633 * care about that.
634 */
635 yytext[plpgsql_yyleng] = '\0';
636
637 ereport(ERROR,
638 (errcode(ERRCODE_SYNTAX_ERROR),
639 /* translator: first %s is typically the translation of "syntax error" */
640 errmsg("%s at or near \"%s\"", _(message), yytext),
641 plpgsql_scanner_errposition(plpgsql_yylloc)));
642 }
643 }
644
645 /*
646 * Given a location (a byte offset in the function source text),
647 * return a line number.
648 *
649 * We expect that this is typically called for a sequence of increasing
650 * location values, so optimize accordingly by tracking the endpoints
651 * of the "current" line.
652 */
653 int
plpgsql_location_to_lineno(int location)654 plpgsql_location_to_lineno(int location)
655 {
656 const char *loc;
657
658 if (location < 0 || scanorig == NULL)
659 return 0; /* garbage in, garbage out */
660 loc = scanorig + location;
661
662 /* be correct, but not fast, if input location goes backwards */
663 if (loc < cur_line_start)
664 location_lineno_init();
665
666 while (cur_line_end != NULL && loc > cur_line_end)
667 {
668 cur_line_start = cur_line_end + 1;
669 cur_line_num++;
670 cur_line_end = strchr(cur_line_start, '\n');
671 }
672
673 return cur_line_num;
674 }
675
676 /* initialize or reset the state for plpgsql_location_to_lineno */
677 static void
location_lineno_init(void)678 location_lineno_init(void)
679 {
680 cur_line_start = scanorig;
681 cur_line_num = 1;
682
683 cur_line_end = strchr(cur_line_start, '\n');
684 }
685
686 /* return the most recently computed lineno */
687 int
plpgsql_latest_lineno(void)688 plpgsql_latest_lineno(void)
689 {
690 return cur_line_num;
691 }
692
693
694 /*
695 * Called before any actual parsing is done
696 *
697 * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
698 * Although it is not fed directly to flex, we need the original string
699 * to cite in error messages.
700 */
701 void
plpgsql_scanner_init(const char * str)702 plpgsql_scanner_init(const char *str)
703 {
704 /* Start up the core scanner */
705 yyscanner = scanner_init(str, &core_yy,
706 reserved_keywords, num_reserved_keywords);
707
708 /*
709 * scanorig points to the original string, which unlike the scanner's
710 * scanbuf won't be modified on-the-fly by flex. Notice that although
711 * yytext points into scanbuf, we rely on being able to apply locations
712 * (offsets from string start) to scanorig as well.
713 */
714 scanorig = str;
715
716 /* Other setup */
717 plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
718 plpgsql_yytoken = 0;
719
720 num_pushbacks = 0;
721
722 location_lineno_init();
723 }
724
725 /*
726 * Called after parsing is done to clean up after plpgsql_scanner_init()
727 */
728 void
plpgsql_scanner_finish(void)729 plpgsql_scanner_finish(void)
730 {
731 /* release storage */
732 scanner_finish(yyscanner);
733 /* avoid leaving any dangling pointers */
734 yyscanner = NULL;
735 scanorig = NULL;
736 }
737