1 /*-------------------------------------------------------------------------
2  *
3  * parser.c
4  *		Main entry point/driver for PostgreSQL grammar
5  *
6  * This should match src/backend/parser/parser.c, except that we do not
7  * need to bother with re-entrant interfaces.
8  *
9  *
10  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  *	  src/interfaces/ecpg/preproc/parser.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 
19 #include "postgres_fe.h"
20 
21 #include "extern.h"
22 #include "preproc.h"
23 
24 
25 static bool have_lookahead;		/* is lookahead info valid? */
26 static int	lookahead_token;	/* one-token lookahead */
27 static YYSTYPE lookahead_yylval;	/* yylval for lookahead token */
28 static YYLTYPE lookahead_yylloc;	/* yylloc for lookahead token */
29 static char *lookahead_yytext;	/* start current token */
30 static char *lookahead_end;		/* end of current token */
31 static char lookahead_hold_char;	/* to be put back at *lookahead_end */
32 
33 
34 /*
35  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
36  *
37  * This filter is needed because in some cases the standard SQL grammar
38  * requires more than one token lookahead.  We reduce these cases to one-token
39  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
40  *
41  * Using a filter is simpler than trying to recognize multiword tokens
42  * directly in scan.l, because we'd have to allow for comments between the
43  * words.  Furthermore it's not clear how to do that without re-introducing
44  * scanner backtrack, which would cost more performance than this filter
45  * layer does.
46  */
47 int
filtered_base_yylex(void)48 filtered_base_yylex(void)
49 {
50 	int			cur_token;
51 	int			next_token;
52 	int			cur_token_length;
53 	YYSTYPE		cur_yylval;
54 	YYLTYPE		cur_yylloc;
55 	char	   *cur_yytext;
56 
57 	/* Get next token --- we might already have it */
58 	if (have_lookahead)
59 	{
60 		cur_token = lookahead_token;
61 		base_yylval = lookahead_yylval;
62 		base_yylloc = lookahead_yylloc;
63 		base_yytext = lookahead_yytext;
64 		*lookahead_end = lookahead_hold_char;
65 		have_lookahead = false;
66 	}
67 	else
68 		cur_token = base_yylex();
69 
70 	/*
71 	 * If this token isn't one that requires lookahead, just return it.  If it
72 	 * does, determine the token length.  (We could get that via strlen(), but
73 	 * since we have such a small set of possibilities, hardwiring seems
74 	 * feasible and more efficient.)
75 	 */
76 	switch (cur_token)
77 	{
78 		case NOT:
79 			cur_token_length = 3;
80 			break;
81 		case NULLS_P:
82 			cur_token_length = 5;
83 			break;
84 		case WITH:
85 			cur_token_length = 4;
86 			break;
87 		default:
88 			return cur_token;
89 	}
90 
91 	/*
92 	 * Identify end+1 of current token.  base_yylex() has temporarily stored a
93 	 * '\0' here, and will undo that when we call it again.  We need to redo
94 	 * it to fully revert the lookahead call for error reporting purposes.
95 	 */
96 	lookahead_end = base_yytext + cur_token_length;
97 	Assert(*lookahead_end == '\0');
98 
99 	/* Save and restore lexer output variables around the call */
100 	cur_yylval = base_yylval;
101 	cur_yylloc = base_yylloc;
102 	cur_yytext = base_yytext;
103 
104 	/* Get next token, saving outputs into lookahead variables */
105 	next_token = base_yylex();
106 
107 	lookahead_token = next_token;
108 	lookahead_yylval = base_yylval;
109 	lookahead_yylloc = base_yylloc;
110 	lookahead_yytext = base_yytext;
111 
112 	base_yylval = cur_yylval;
113 	base_yylloc = cur_yylloc;
114 	base_yytext = cur_yytext;
115 
116 	/* Now revert the un-truncation of the current token */
117 	lookahead_hold_char = *lookahead_end;
118 	*lookahead_end = '\0';
119 
120 	have_lookahead = true;
121 
122 	/* Replace cur_token if needed, based on lookahead */
123 	switch (cur_token)
124 	{
125 		case NOT:
126 			/* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
127 			switch (next_token)
128 			{
129 				case BETWEEN:
130 				case IN_P:
131 				case LIKE:
132 				case ILIKE:
133 				case SIMILAR:
134 					cur_token = NOT_LA;
135 					break;
136 			}
137 			break;
138 
139 		case NULLS_P:
140 			/* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
141 			switch (next_token)
142 			{
143 				case FIRST_P:
144 				case LAST_P:
145 					cur_token = NULLS_LA;
146 					break;
147 			}
148 			break;
149 
150 		case WITH:
151 			/* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
152 			switch (next_token)
153 			{
154 				case TIME:
155 				case ORDINALITY:
156 					cur_token = WITH_LA;
157 					break;
158 			}
159 			break;
160 	}
161 
162 	return cur_token;
163 }
164