1 /*-------------------------------------------------------------------------
2 *
3 * parser.c
4 * Main entry point/driver for PostgreSQL grammar
5 *
6 * This should match src/backend/parser/parser.c, except that we do not
7 * need to bother with re-entrant interfaces.
8 *
9 *
10 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 * IDENTIFICATION
14 * src/interfaces/ecpg/preproc/parser.c
15 *
16 *-------------------------------------------------------------------------
17 */
18
19 #include "postgres_fe.h"
20
21 #include "extern.h"
22 #include "preproc.h"
23
24
25 static bool have_lookahead; /* is lookahead info valid? */
26 static int lookahead_token; /* one-token lookahead */
27 static YYSTYPE lookahead_yylval; /* yylval for lookahead token */
28 static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
29 static char *lookahead_yytext; /* start current token */
30 static char *lookahead_end; /* end of current token */
31 static char lookahead_hold_char; /* to be put back at *lookahead_end */
32
33
34 /*
35 * Intermediate filter between parser and base lexer (base_yylex in scan.l).
36 *
37 * This filter is needed because in some cases the standard SQL grammar
38 * requires more than one token lookahead. We reduce these cases to one-token
39 * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
40 *
41 * Using a filter is simpler than trying to recognize multiword tokens
42 * directly in scan.l, because we'd have to allow for comments between the
43 * words. Furthermore it's not clear how to do that without re-introducing
44 * scanner backtrack, which would cost more performance than this filter
45 * layer does.
46 */
47 int
filtered_base_yylex(void)48 filtered_base_yylex(void)
49 {
50 int cur_token;
51 int next_token;
52 int cur_token_length;
53 YYSTYPE cur_yylval;
54 YYLTYPE cur_yylloc;
55 char *cur_yytext;
56
57 /* Get next token --- we might already have it */
58 if (have_lookahead)
59 {
60 cur_token = lookahead_token;
61 base_yylval = lookahead_yylval;
62 base_yylloc = lookahead_yylloc;
63 base_yytext = lookahead_yytext;
64 *lookahead_end = lookahead_hold_char;
65 have_lookahead = false;
66 }
67 else
68 cur_token = base_yylex();
69
70 /*
71 * If this token isn't one that requires lookahead, just return it. If it
72 * does, determine the token length. (We could get that via strlen(), but
73 * since we have such a small set of possibilities, hardwiring seems
74 * feasible and more efficient.)
75 */
76 switch (cur_token)
77 {
78 case NOT:
79 cur_token_length = 3;
80 break;
81 case NULLS_P:
82 cur_token_length = 5;
83 break;
84 case WITH:
85 cur_token_length = 4;
86 break;
87 default:
88 return cur_token;
89 }
90
91 /*
92 * Identify end+1 of current token. base_yylex() has temporarily stored a
93 * '\0' here, and will undo that when we call it again. We need to redo
94 * it to fully revert the lookahead call for error reporting purposes.
95 */
96 lookahead_end = base_yytext + cur_token_length;
97 Assert(*lookahead_end == '\0');
98
99 /* Save and restore lexer output variables around the call */
100 cur_yylval = base_yylval;
101 cur_yylloc = base_yylloc;
102 cur_yytext = base_yytext;
103
104 /* Get next token, saving outputs into lookahead variables */
105 next_token = base_yylex();
106
107 lookahead_token = next_token;
108 lookahead_yylval = base_yylval;
109 lookahead_yylloc = base_yylloc;
110 lookahead_yytext = base_yytext;
111
112 base_yylval = cur_yylval;
113 base_yylloc = cur_yylloc;
114 base_yytext = cur_yytext;
115
116 /* Now revert the un-truncation of the current token */
117 lookahead_hold_char = *lookahead_end;
118 *lookahead_end = '\0';
119
120 have_lookahead = true;
121
122 /* Replace cur_token if needed, based on lookahead */
123 switch (cur_token)
124 {
125 case NOT:
126 /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
127 switch (next_token)
128 {
129 case BETWEEN:
130 case IN_P:
131 case LIKE:
132 case ILIKE:
133 case SIMILAR:
134 cur_token = NOT_LA;
135 break;
136 }
137 break;
138
139 case NULLS_P:
140 /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
141 switch (next_token)
142 {
143 case FIRST_P:
144 case LAST_P:
145 cur_token = NULLS_LA;
146 break;
147 }
148 break;
149
150 case WITH:
151 /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
152 switch (next_token)
153 {
154 case TIME:
155 case ORDINALITY:
156 cur_token = WITH_LA;
157 break;
158 }
159 break;
160 }
161
162 return cur_token;
163 }
164