1 /*-------------------------------------------------------------------------
2  *
3  * parser.c
4  *		Main entry point/driver for PostgreSQL grammar
5  *
6  * Note that the grammar is not allowed to perform any table access
7  * (since we need to be able to do basic parsing even while inside an
8  * aborted transaction).  Therefore, the data structures returned by
9  * the grammar are "raw" parsetrees that still need to be analyzed by
10  * analyze.c and related files.
11  *
12  *
13  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
14  * Portions Copyright (c) 1994, Regents of the University of California
15  *
16  * IDENTIFICATION
17  *	  src/backend/parser/parser.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 
22 #include "postgres.h"
23 
24 #include "parser/gramparse.h"
25 #include "parser/parser.h"
26 
27 
28 /*
29  * raw_parser
30  *		Given a query in string form, do lexical and grammatical analysis.
31  *
32  * Returns a list of raw (un-analyzed) parse trees.  The immediate elements
33  * of the list are always RawStmt nodes.
34  */
35 List *
raw_parser(const char * str)36 raw_parser(const char *str)
37 {
38 	core_yyscan_t yyscanner;
39 	base_yy_extra_type yyextra;
40 	int			yyresult;
41 
42 	/* initialize the flex scanner */
43 	yyscanner = scanner_init(str, &yyextra.core_yy_extra,
44 							 ScanKeywords, NumScanKeywords);
45 
46 	/* base_yylex() only needs this much initialization */
47 	yyextra.have_lookahead = false;
48 
49 	/* initialize the bison parser */
50 	parser_init(&yyextra);
51 
52 	/* Parse! */
53 	yyresult = base_yyparse(yyscanner);
54 
55 	/* Clean up (release memory) */
56 	scanner_finish(yyscanner);
57 
58 	if (yyresult)				/* error */
59 		return NIL;
60 
61 	return yyextra.parsetree;
62 }
63 
64 
65 /*
66  * Intermediate filter between parser and core lexer (core_yylex in scan.l).
67  *
68  * This filter is needed because in some cases the standard SQL grammar
69  * requires more than one token lookahead.  We reduce these cases to one-token
70  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
71  *
72  * Using a filter is simpler than trying to recognize multiword tokens
73  * directly in scan.l, because we'd have to allow for comments between the
74  * words.  Furthermore it's not clear how to do that without re-introducing
75  * scanner backtrack, which would cost more performance than this filter
76  * layer does.
77  *
78  * The filter also provides a convenient place to translate between
79  * the core_YYSTYPE and YYSTYPE representations (which are really the
80  * same thing anyway, but notationally they're different).
81  */
82 int
base_yylex(YYSTYPE * lvalp,YYLTYPE * llocp,core_yyscan_t yyscanner)83 base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
84 {
85 	base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
86 	int			cur_token;
87 	int			next_token;
88 	int			cur_token_length;
89 	YYLTYPE		cur_yylloc;
90 
91 	/* Get next token --- we might already have it */
92 	if (yyextra->have_lookahead)
93 	{
94 		cur_token = yyextra->lookahead_token;
95 		lvalp->core_yystype = yyextra->lookahead_yylval;
96 		*llocp = yyextra->lookahead_yylloc;
97 		*(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
98 		yyextra->have_lookahead = false;
99 	}
100 	else
101 		cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
102 
103 	/*
104 	 * If this token isn't one that requires lookahead, just return it.  If it
105 	 * does, determine the token length.  (We could get that via strlen(), but
106 	 * since we have such a small set of possibilities, hardwiring seems
107 	 * feasible and more efficient.)
108 	 */
109 	switch (cur_token)
110 	{
111 		case NOT:
112 			cur_token_length = 3;
113 			break;
114 		case NULLS_P:
115 			cur_token_length = 5;
116 			break;
117 		case WITH:
118 			cur_token_length = 4;
119 			break;
120 		default:
121 			return cur_token;
122 	}
123 
124 	/*
125 	 * Identify end+1 of current token.  core_yylex() has temporarily stored a
126 	 * '\0' here, and will undo that when we call it again.  We need to redo
127 	 * it to fully revert the lookahead call for error reporting purposes.
128 	 */
129 	yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
130 		*llocp + cur_token_length;
131 	Assert(*(yyextra->lookahead_end) == '\0');
132 
133 	/*
134 	 * Save and restore *llocp around the call.  It might look like we could
135 	 * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
136 	 * does not work because flex actually holds onto the last-passed pointer
137 	 * internally, and will use that for error reporting.  We need any error
138 	 * reports to point to the current token, not the next one.
139 	 */
140 	cur_yylloc = *llocp;
141 
142 	/* Get next token, saving outputs into lookahead variables */
143 	next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
144 	yyextra->lookahead_token = next_token;
145 	yyextra->lookahead_yylloc = *llocp;
146 
147 	*llocp = cur_yylloc;
148 
149 	/* Now revert the un-truncation of the current token */
150 	yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
151 	*(yyextra->lookahead_end) = '\0';
152 
153 	yyextra->have_lookahead = true;
154 
155 	/* Replace cur_token if needed, based on lookahead */
156 	switch (cur_token)
157 	{
158 		case NOT:
159 			/* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
160 			switch (next_token)
161 			{
162 				case BETWEEN:
163 				case IN_P:
164 				case LIKE:
165 				case ILIKE:
166 				case SIMILAR:
167 					cur_token = NOT_LA;
168 					break;
169 			}
170 			break;
171 
172 		case NULLS_P:
173 			/* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
174 			switch (next_token)
175 			{
176 				case FIRST_P:
177 				case LAST_P:
178 					cur_token = NULLS_LA;
179 					break;
180 			}
181 			break;
182 
183 		case WITH:
184 			/* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
185 			switch (next_token)
186 			{
187 				case TIME:
188 				case ORDINALITY:
189 					cur_token = WITH_LA;
190 					break;
191 			}
192 			break;
193 	}
194 
195 	return cur_token;
196 }
197