/*------------------------------------------------------------------------- * * parser.c * Main entry point/driver for PostgreSQL grammar * * This should match src/backend/parser/parser.c, except that we do not * need to bother with re-entrant interfaces. * * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * src/interfaces/ecpg/preproc/parser.c * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include "preproc_extern.h" #include "preproc.h" static bool have_lookahead; /* is lookahead info valid? */ static int lookahead_token; /* one-token lookahead */ static YYSTYPE lookahead_yylval; /* yylval for lookahead token */ static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ static char *lookahead_yytext; /* start current token */ static char *lookahead_end; /* end of current token */ static char lookahead_hold_char; /* to be put back at *lookahead_end */ /* * Intermediate filter between parser and base lexer (base_yylex in scan.l). * * This filter is needed because in some cases the standard SQL grammar * requires more than one token lookahead. We reduce these cases to one-token * lookahead by replacing tokens here, in order to keep the grammar LALR(1). * * Using a filter is simpler than trying to recognize multiword tokens * directly in scan.l, because we'd have to allow for comments between the * words. Furthermore it's not clear how to do that without re-introducing * scanner backtrack, which would cost more performance than this filter * layer does. */ int filtered_base_yylex(void) { int cur_token; int next_token; int cur_token_length; YYSTYPE cur_yylval; YYLTYPE cur_yylloc; char *cur_yytext; /* Get next token --- we might already have it */ if (have_lookahead) { cur_token = lookahead_token; base_yylval = lookahead_yylval; base_yylloc = lookahead_yylloc; base_yytext = lookahead_yytext; *lookahead_end = lookahead_hold_char; have_lookahead = false; } else cur_token = base_yylex(); /* * If this token isn't one that requires lookahead, just return it. If it * does, determine the token length. (We could get that via strlen(), but * since we have such a small set of possibilities, hardwiring seems * feasible and more efficient.) */ switch (cur_token) { case NOT: cur_token_length = 3; break; case NULLS_P: cur_token_length = 5; break; case WITH: cur_token_length = 4; break; default: return cur_token; } /* * Identify end+1 of current token. base_yylex() has temporarily stored a * '\0' here, and will undo that when we call it again. We need to redo * it to fully revert the lookahead call for error reporting purposes. */ lookahead_end = base_yytext + cur_token_length; Assert(*lookahead_end == '\0'); /* Save and restore lexer output variables around the call */ cur_yylval = base_yylval; cur_yylloc = base_yylloc; cur_yytext = base_yytext; /* Get next token, saving outputs into lookahead variables */ next_token = base_yylex(); lookahead_token = next_token; lookahead_yylval = base_yylval; lookahead_yylloc = base_yylloc; lookahead_yytext = base_yytext; base_yylval = cur_yylval; base_yylloc = cur_yylloc; base_yytext = cur_yytext; /* Now revert the un-truncation of the current token */ lookahead_hold_char = *lookahead_end; *lookahead_end = '\0'; have_lookahead = true; /* Replace cur_token if needed, based on lookahead */ switch (cur_token) { case NOT: /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ switch (next_token) { case BETWEEN: case IN_P: case LIKE: case ILIKE: case SIMILAR: cur_token = NOT_LA; break; } break; case NULLS_P: /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ switch (next_token) { case FIRST_P: case LAST_P: cur_token = NULLS_LA; break; } break; case WITH: /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ switch (next_token) { case TIME: case ORDINALITY: cur_token = WITH_LA; break; } break; } return cur_token; }