1 /*-------------------------------------------------------------------- 2 * Symbols referenced in this file: 3 * - plpgsql_scanner_init 4 * - plpgsql_IdentifierLookup 5 * - yyscanner 6 * - core_yy 7 * - reserved_keywords 8 * - num_reserved_keywords 9 * - scanorig 10 * - plpgsql_yytoken 11 * - num_pushbacks 12 * - location_lineno_init 13 * - cur_line_start 14 * - cur_line_num 15 * - cur_line_end 16 * - plpgsql_yylex 17 * - internal_yylex 18 * - pushback_token 19 * - pushback_auxdata 20 * - push_back_token 21 * - unreserved_keywords 22 * - num_unreserved_keywords 23 * - plpgsql_yyleng TypeSymbolEmitter()24 * - plpgsql_location_to_lineno 25 * - plpgsql_scanner_errposition 26 * - plpgsql_yyerror 27 * - plpgsql_push_back_token 28 * - plpgsql_token_is_unreserved_keyword 29 * - plpgsql_peek 30 * - plpgsql_append_source_text 31 * - plpgsql_peek2 32 * - plpgsql_scanner_finish 33 * - plpgsql_latest_lineno 34 *-------------------------------------------------------------------- 35 */ 36 37 /*------------------------------------------------------------------------- 38 * 39 * pl_scanner.c 40 * lexical scanning for PL/pgSQL 41 * 42 * 43 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group 44 * Portions Copyright (c) 1994, Regents of the University of California 45 * 46 * 47 * IDENTIFICATION 48 * src/pl/plpgsql/src/pl_scanner.c 49 * 50 *------------------------------------------------------------------------- 51 */ 52 #include "postgres.h" 53 54 #include "mb/pg_wchar.h" 55 #include "parser/scanner.h" 56 57 #include "plpgsql.h" 58 #include "pl_gram.h" /* must be after parser/scanner.h */ 59 60 61 #define PG_KEYWORD(a,b,c) {a,b,c}, 62 63 64 /* Klugy flag to tell scanner how to look up identifiers */ 65 __thread IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL; 66 67 68 /* 69 * A word about keywords: 70 * 71 * We keep reserved and unreserved keywords in separate arrays. The 72 * reserved keywords are passed to the core scanner, so they will be 73 * recognized before (and instead of) any variable name. Unreserved words 74 * are checked for separately, usually after determining that the identifier 75 * isn't a known variable name. If plpgsql_IdentifierLookup is DECLARE then 76 * no variable names will be recognized, so the unreserved words always work. 77 * (Note in particular that this helps us avoid reserving keywords that are 78 * only needed in DECLARE sections.) 79 * 80 * In certain contexts it is desirable to prefer recognizing an unreserved 81 * keyword over recognizing a variable name. In particular, at the start 82 * of a statement we should prefer unreserved keywords unless the statement 83 * looks like an assignment (i.e., first token is followed by ':=' or '['). 84 * This rule allows most statement-introducing keywords to be kept unreserved. 85 * (We still have to reserve initial keywords that might follow a block 86 * label, unfortunately, since the method used to determine if we are at 87 * start of statement doesn't recognize such cases. We'd also have to 88 * reserve any keyword that could legitimately be followed by ':=' or '['.) 89 * Some additional cases are handled in pl_gram.y using tok_is_keyword(). 90 * 91 * We try to avoid reserving more keywords than we have to; but there's 92 * little point in not reserving a word if it's reserved in the core grammar. 93 * Currently, the following words are reserved here but not in the core: 94 * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE 95 */ 96 97 /* 98 * Lists of keyword (name, token-value, category) entries. 99 * 100 * !!WARNING!!: These lists must be sorted by ASCII name, because binary 101 * search is used to locate entries. 102 * 103 * Be careful not to put the same word in both lists. Also be sure that 104 * pl_gram.y's unreserved_keyword production agrees with the second list. 105 */ 106 107 static const ScanKeyword reserved_keywords[] = { 108 PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD) 109 PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD) 110 PG_KEYWORD("by", K_BY, RESERVED_KEYWORD) 111 PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD) 112 PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD) 113 PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD) 114 PG_KEYWORD("end", K_END, RESERVED_KEYWORD) 115 PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD) 116 PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD) 117 PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD) 118 PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD) 119 PG_KEYWORD("if", K_IF, RESERVED_KEYWORD) 120 PG_KEYWORD("in", K_IN, RESERVED_KEYWORD) 121 PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD) 122 PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD) 123 PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD) 124 PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD) 125 PG_KEYWORD("or", K_OR, RESERVED_KEYWORD) 126 PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD) 127 PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD) 128 PG_KEYWORD("to", K_TO, RESERVED_KEYWORD) 129 PG_KEYWORD("using", K_USING, RESERVED_KEYWORD) 130 PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD) 131 PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD) 132 }; 133 134 static const int num_reserved_keywords = lengthof(reserved_keywords); 135 136 static const ScanKeyword unreserved_keywords[] = { 137 PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD) 138 PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD) 139 PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD) 140 PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD) 141 PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD) 142 PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD) 143 PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD) 144 PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD) 145 PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD) 146 PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD) 147 PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD) 148 PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD) 149 PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD) 150 PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD) 151 PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD) 152 PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD) 153 PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD) 154 PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD) 155 PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD) 156 PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD) 157 PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD) 158 PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD) 159 PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD) 160 PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD) 161 PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD) 162 PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD) 163 PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD) 164 PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD) 165 PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD) 166 PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD) 167 PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD) 168 PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD) 169 PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD) 170 PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD) 171 PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD) 172 PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD) 173 PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD) 174 PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD) 175 PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD) 176 PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD) 177 PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD) 178 PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD) 179 PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD) 180 PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD) 181 PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD) 182 PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD) 183 PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD) 184 PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD) 185 PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD) 186 PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD) 187 PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD) 188 PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD) 189 PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD) 190 PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD) 191 PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD) 192 PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD) 193 PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD) 194 PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD) 195 PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD) 196 PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD) 197 PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD) 198 PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD) 199 PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD) 200 PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD) 201 PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD) 202 PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD) 203 PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD) 204 PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD) 205 PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD) 206 PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD) 207 PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD) 208 PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD) 209 PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD) 210 PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD) 211 PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD) 212 PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD) 213 }; 214 215 static const int num_unreserved_keywords = lengthof(unreserved_keywords); 216 217 /* 218 * This macro must recognize all tokens that can immediately precede a 219 * PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the 220 * grammar). Fortunately, there are not very many, so hard-coding in this 221 * fashion seems sufficient. 222 */ 223 #define AT_STMT_START(prev_token) \ 224 ((prev_token) == ';' || \ 225 (prev_token) == K_BEGIN || \ 226 (prev_token) == K_THEN || \ 227 (prev_token) == K_ELSE || \ 228 (prev_token) == K_LOOP) 229 230 231 /* Auxiliary data about a token (other than the token type) */ 232 typedef struct 233 { 234 YYSTYPE lval; /* semantic information */ 235 YYLTYPE lloc; /* offset in scanbuf */ 236 int leng; /* length in bytes */ 237 } TokenAuxData; 238 239 /* 240 * Scanner working state. At some point we might wish to fold all this 241 * into a YY_EXTRA struct. For the moment, there is no need for plpgsql's 242 * lexer to be re-entrant, and the notational burden of passing a yyscanner 243 * pointer around is great enough to not want to do it without need. 244 */ 245 246 /* The stuff the core lexer needs */ 247 static __thread core_yyscan_t yyscanner = NULL; 248 249 static __thread core_yy_extra_type core_yy; 250 251 252 /* The original input string */ 253 static __thread const char *scanorig; 254 255 256 /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */ 257 static __thread int plpgsql_yyleng; 258 259 260 /* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */ 261 static __thread int plpgsql_yytoken; 262 263 264 /* Token pushback stack */ 265 #define MAX_PUSHBACKS 4 266 267 static __thread int num_pushbacks; 268 269 static __thread int pushback_token[MAX_PUSHBACKS]; 270 271 static __thread TokenAuxData pushback_auxdata[MAX_PUSHBACKS]; 272 273 274 /* State for plpgsql_location_to_lineno() */ 275 static __thread const char *cur_line_start; 276 277 static __thread const char *cur_line_end; 278 279 static __thread int cur_line_num; 280 281 282 /* Internal functions */ 283 static int internal_yylex(TokenAuxData *auxdata); 284 static void push_back_token(int token, TokenAuxData *auxdata); 285 static void location_lineno_init(void); 286 287 288 /* 289 * This is the yylex routine called from the PL/pgSQL grammar. 290 * It is a wrapper around the core lexer, with the ability to recognize 291 * PL/pgSQL variables and return them as special T_DATUM tokens. If a 292 * word or compound word does not match any variable name, or if matching 293 * is turned off by plpgsql_IdentifierLookup, it is returned as 294 * T_WORD or T_CWORD respectively, or as an unreserved keyword if it 295 * matches one of those. 296 */ 297 int 298 plpgsql_yylex(void) 299 { 300 int tok1; 301 TokenAuxData aux1; 302 const ScanKeyword *kw; 303 304 tok1 = internal_yylex(&aux1); 305 if (tok1 == IDENT || tok1 == PARAM) 306 { 307 int tok2; 308 TokenAuxData aux2; 309 310 tok2 = internal_yylex(&aux2); 311 if (tok2 == '.') 312 { 313 int tok3; 314 TokenAuxData aux3; 315 316 tok3 = internal_yylex(&aux3); 317 if (tok3 == IDENT) 318 { 319 int tok4; 320 TokenAuxData aux4; 321 322 tok4 = internal_yylex(&aux4); 323 if (tok4 == '.') 324 { 325 int tok5; 326 TokenAuxData aux5; 327 328 tok5 = internal_yylex(&aux5); 329 if (tok5 == IDENT) 330 { 331 if (plpgsql_parse_tripword(aux1.lval.str, 332 aux3.lval.str, 333 aux5.lval.str, 334 &aux1.lval.wdatum, 335 &aux1.lval.cword)) 336 tok1 = T_DATUM; 337 else 338 tok1 = T_CWORD; 339 } 340 else 341 { 342 /* not A.B.C, so just process A.B */ 343 push_back_token(tok5, &aux5); 344 push_back_token(tok4, &aux4); 345 if (plpgsql_parse_dblword(aux1.lval.str, 346 aux3.lval.str, 347 &aux1.lval.wdatum, 348 &aux1.lval.cword)) 349 tok1 = T_DATUM; 350 else 351 tok1 = T_CWORD; 352 } 353 } 354 else 355 { 356 /* not A.B.C, so just process A.B */ 357 push_back_token(tok4, &aux4); 358 if (plpgsql_parse_dblword(aux1.lval.str, 359 aux3.lval.str, 360 &aux1.lval.wdatum, 361 &aux1.lval.cword)) 362 tok1 = T_DATUM; 363 else 364 tok1 = T_CWORD; 365 } 366 } 367 else 368 { 369 /* not A.B, so just process A */ 370 push_back_token(tok3, &aux3); 371 push_back_token(tok2, &aux2); 372 if (plpgsql_parse_word(aux1.lval.str, 373 core_yy.scanbuf + aux1.lloc, 374 &aux1.lval.wdatum, 375 &aux1.lval.word)) 376 tok1 = T_DATUM; 377 else if (!aux1.lval.word.quoted && 378 (kw = ScanKeywordLookup(aux1.lval.word.ident, 379 unreserved_keywords, 380 num_unreserved_keywords))) 381 { 382 aux1.lval.keyword = kw->name; 383 tok1 = kw->value; 384 } 385 else 386 tok1 = T_WORD; 387 } 388 } 389 else 390 { 391 /* not A.B, so just process A */ 392 push_back_token(tok2, &aux2); 393 394 /* 395 * If we are at start of statement, prefer unreserved keywords 396 * over variable names, unless the next token is assignment or 397 * '[', in which case prefer variable names. (Note we need not 398 * consider '.' as the next token; that case was handled above, 399 * and we always prefer variable names in that case.) If we are 400 * not at start of statement, always prefer variable names over 401 * unreserved keywords. 402 */ 403 if (AT_STMT_START(plpgsql_yytoken) && 404 !(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '[')) 405 { 406 /* try for unreserved keyword, then for variable name */ 407 if (core_yy.scanbuf[aux1.lloc] != '"' && 408 (kw = ScanKeywordLookup(aux1.lval.str, 409 unreserved_keywords, 410 num_unreserved_keywords))) 411 { 412 aux1.lval.keyword = kw->name; 413 tok1 = kw->value; 414 } 415 else if (plpgsql_parse_word(aux1.lval.str, 416 core_yy.scanbuf + aux1.lloc, 417 &aux1.lval.wdatum, 418 &aux1.lval.word)) 419 tok1 = T_DATUM; 420 else 421 tok1 = T_WORD; 422 } 423 else 424 { 425 /* try for variable name, then for unreserved keyword */ 426 if (plpgsql_parse_word(aux1.lval.str, 427 core_yy.scanbuf + aux1.lloc, 428 &aux1.lval.wdatum, 429 &aux1.lval.word)) 430 tok1 = T_DATUM; 431 else if (!aux1.lval.word.quoted && 432 (kw = ScanKeywordLookup(aux1.lval.word.ident, 433 unreserved_keywords, 434 num_unreserved_keywords))) 435 { 436 aux1.lval.keyword = kw->name; 437 tok1 = kw->value; 438 } 439 else 440 tok1 = T_WORD; 441 } 442 } 443 } 444 else 445 { 446 /* 447 * Not a potential plpgsql variable name, just return the data. 448 * 449 * Note that we also come through here if the grammar pushed back a 450 * T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a 451 * previous lookup cycle; thus, pushbacks do not incur extra lookup 452 * work, since we'll never do the above code twice for the same token. 453 * This property also makes it safe to rely on the old value of 454 * plpgsql_yytoken in the is-this-start-of-statement test above. 455 */ 456 } 457 458 plpgsql_yylval = aux1.lval; 459 plpgsql_yylloc = aux1.lloc; 460 plpgsql_yyleng = aux1.leng; 461 plpgsql_yytoken = tok1; 462 return tok1; 463 } 464 465 /* 466 * Internal yylex function. This wraps the core lexer and adds one feature: 467 * a token pushback stack. We also make a couple of trivial single-token 468 * translations from what the core lexer does to what we want, in particular 469 * interfacing from the core_YYSTYPE to YYSTYPE union. 470 */ 471 static int 472 internal_yylex(TokenAuxData *auxdata) 473 { 474 int token; 475 const char *yytext; 476 477 if (num_pushbacks > 0) 478 { 479 num_pushbacks--; 480 token = pushback_token[num_pushbacks]; 481 *auxdata = pushback_auxdata[num_pushbacks]; 482 } 483 else 484 { 485 token = core_yylex(&auxdata->lval.core_yystype, 486 &auxdata->lloc, 487 yyscanner); 488 489 /* remember the length of yytext before it gets changed */ 490 yytext = core_yy.scanbuf + auxdata->lloc; 491 auxdata->leng = strlen(yytext); 492 493 /* Check for << >> and #, which the core considers operators */ 494 if (token == Op) 495 { 496 if (strcmp(auxdata->lval.str, "<<") == 0) 497 token = LESS_LESS; 498 else if (strcmp(auxdata->lval.str, ">>") == 0) 499 token = GREATER_GREATER; 500 else if (strcmp(auxdata->lval.str, "#") == 0) 501 token = '#'; 502 } 503 504 /* The core returns PARAM as ival, but we treat it like IDENT */ 505 else if (token == PARAM) 506 { 507 auxdata->lval.str = pstrdup(yytext); 508 } 509 } 510 511 return token; 512 } 513 514 /* 515 * Push back a token to be re-read by next internal_yylex() call. 516 */ 517 static void 518 push_back_token(int token, TokenAuxData *auxdata) 519 { 520 if (num_pushbacks >= MAX_PUSHBACKS) 521 elog(ERROR, "too many tokens pushed back"); 522 pushback_token[num_pushbacks] = token; 523 pushback_auxdata[num_pushbacks] = *auxdata; 524 num_pushbacks++; 525 } 526 527 /* 528 * Push back a single token to be re-read by next plpgsql_yylex() call. 529 * 530 * NOTE: this does not cause yylval or yylloc to "back up". Also, it 531 * is not a good idea to push back a token code other than what you read. 532 */ 533 void 534 plpgsql_push_back_token(int token) 535 { 536 TokenAuxData auxdata; 537 538 auxdata.lval = plpgsql_yylval; 539 auxdata.lloc = plpgsql_yylloc; 540 auxdata.leng = plpgsql_yyleng; 541 push_back_token(token, &auxdata); 542 } 543 544 /* 545 * Tell whether a token is an unreserved keyword. 546 * 547 * (If it is, its lowercased form was returned as the token value, so we 548 * do not need to offer that data here.) 549 */ 550 bool 551 plpgsql_token_is_unreserved_keyword(int token) 552 { 553 int i; 554 555 for (i = 0; i < num_unreserved_keywords; i++) 556 { 557 if (unreserved_keywords[i].value == token) 558 return true; 559 } 560 return false; 561 } 562 563 /* 564 * Append the function text starting at startlocation and extending to 565 * (not including) endlocation onto the existing contents of "buf". 566 */ 567 void 568 plpgsql_append_source_text(StringInfo buf, 569 int startlocation, int endlocation) 570 { 571 Assert(startlocation <= endlocation); 572 appendBinaryStringInfo(buf, scanorig + startlocation, 573 endlocation - startlocation); 574 } 575 576 /* 577 * Peek one token ahead in the input stream. Only the token code is 578 * made available, not any of the auxiliary info such as location. 579 * 580 * NB: no variable or unreserved keyword lookup is performed here, they will 581 * be returned as IDENT. Reserved keywords are resolved as usual. 582 */ 583 int 584 plpgsql_peek(void) 585 { 586 int tok1; 587 TokenAuxData aux1; 588 589 tok1 = internal_yylex(&aux1); 590 push_back_token(tok1, &aux1); 591 return tok1; 592 } 593 594 /* 595 * Peek two tokens ahead in the input stream. The first token and its 596 * location in the query are returned in *tok1_p and *tok1_loc, second token 597 * and its location in *tok2_p and *tok2_loc. 598 * 599 * NB: no variable or unreserved keyword lookup is performed here, they will 600 * be returned as IDENT. Reserved keywords are resolved as usual. 601 */ 602 void 603 plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc) 604 { 605 int tok1, 606 tok2; 607 TokenAuxData aux1, 608 aux2; 609 610 tok1 = internal_yylex(&aux1); 611 tok2 = internal_yylex(&aux2); 612 613 *tok1_p = tok1; 614 if (tok1_loc) 615 *tok1_loc = aux1.lloc; 616 *tok2_p = tok2; 617 if (tok2_loc) 618 *tok2_loc = aux2.lloc; 619 620 push_back_token(tok2, &aux2); 621 push_back_token(tok1, &aux1); 622 } 623 624 /* 625 * plpgsql_scanner_errposition 626 * Report an error cursor position, if possible. 627 * 628 * This is expected to be used within an ereport() call. The return value 629 * is a dummy (always 0, in fact). 630 * 631 * Note that this can only be used for messages emitted during initial 632 * parsing of a plpgsql function, since it requires the scanorig string 633 * to still be available. 634 */ 635 int 636 plpgsql_scanner_errposition(int location) 637 { 638 int pos; 639 640 if (location < 0 || scanorig == NULL) 641 return 0; /* no-op if location is unknown */ 642 643 /* Convert byte offset to character number */ 644 pos = pg_mbstrlen_with_len(scanorig, location) + 1; 645 /* And pass it to the ereport mechanism */ 646 (void) internalerrposition(pos); 647 /* Also pass the function body string */ 648 return internalerrquery(scanorig); 649 } 650 651 /* 652 * plpgsql_yyerror 653 * Report a lexer or grammar error. 654 * 655 * The message's cursor position refers to the current token (the one 656 * last returned by plpgsql_yylex()). 657 * This is OK for syntax error messages from the Bison parser, because Bison 658 * parsers report error as soon as the first unparsable token is reached. 659 * Beware of using yyerror for other purposes, as the cursor position might 660 * be misleading! 661 */ 662 void 663 plpgsql_yyerror(const char *message) 664 { 665 char *yytext = core_yy.scanbuf + plpgsql_yylloc; 666 667 if (*yytext == '\0') 668 { 669 ereport(ERROR, 670 (errcode(ERRCODE_SYNTAX_ERROR), 671 /* translator: %s is typically the translation of "syntax error" */ 672 errmsg("%s at end of input", _(message)), 673 plpgsql_scanner_errposition(plpgsql_yylloc))); 674 } 675 else 676 { 677 /* 678 * If we have done any lookahead then flex will have restored the 679 * character after the end-of-token. Zap it again so that we report 680 * only the single token here. This modifies scanbuf but we no longer 681 * care about that. 682 */ 683 yytext[plpgsql_yyleng] = '\0'; 684 685 ereport(ERROR, 686 (errcode(ERRCODE_SYNTAX_ERROR), 687 /* translator: first %s is typically the translation of "syntax error" */ 688 errmsg("%s at or near \"%s\"", _(message), yytext), 689 plpgsql_scanner_errposition(plpgsql_yylloc))); 690 } 691 } 692 693 /* 694 * Given a location (a byte offset in the function source text), 695 * return a line number. 696 * 697 * We expect that this is typically called for a sequence of increasing 698 * location values, so optimize accordingly by tracking the endpoints 699 * of the "current" line. 700 */ 701 int 702 plpgsql_location_to_lineno(int location) 703 { 704 const char *loc; 705 706 if (location < 0 || scanorig == NULL) 707 return 0; /* garbage in, garbage out */ 708 loc = scanorig + location; 709 710 /* be correct, but not fast, if input location goes backwards */ 711 if (loc < cur_line_start) 712 location_lineno_init(); 713 714 while (cur_line_end != NULL && loc > cur_line_end) 715 { 716 cur_line_start = cur_line_end + 1; 717 cur_line_num++; 718 cur_line_end = strchr(cur_line_start, '\n'); 719 } 720 721 return cur_line_num; 722 } 723 724 /* initialize or reset the state for plpgsql_location_to_lineno */ 725 static void 726 location_lineno_init(void) 727 { 728 cur_line_start = scanorig; 729 cur_line_num = 1; 730 731 cur_line_end = strchr(cur_line_start, '\n'); 732 } 733 734 /* return the most recently computed lineno */ 735 int 736 plpgsql_latest_lineno(void) 737 { 738 return cur_line_num; 739 } 740 741 742 /* 743 * Called before any actual parsing is done 744 * 745 * Note: the passed "str" must remain valid until plpgsql_scanner_finish(). 746 * Although it is not fed directly to flex, we need the original string 747 * to cite in error messages. 748 */ 749 void 750 plpgsql_scanner_init(const char *str) 751 { 752 /* Start up the core scanner */ 753 yyscanner = scanner_init(str, &core_yy, 754 reserved_keywords, num_reserved_keywords); 755 756 /* 757 * scanorig points to the original string, which unlike the scanner's 758 * scanbuf won't be modified on-the-fly by flex. Notice that although 759 * yytext points into scanbuf, we rely on being able to apply locations 760 * (offsets from string start) to scanorig as well. 761 */ 762 scanorig = str; 763 764 /* Other setup */ 765 plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL; 766 plpgsql_yytoken = 0; 767 768 num_pushbacks = 0; 769 770 location_lineno_init(); 771 } 772 773 /* 774 * Called after parsing is done to clean up after plpgsql_scanner_init() 775 */ 776 void 777 plpgsql_scanner_finish(void) 778 { 779 /* release storage */ 780 scanner_finish(yyscanner); 781 /* avoid leaving any dangling pointers */ 782 yyscanner = NULL; 783 scanorig = NULL; 784 } 785