1 /*
2 Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software Foundation,
15 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 #include "mariadb.h"
18 #include <string.h>
19
20 /* We only need the tokens here */
21 #define YYSTYPE_IS_DECLARED
22 #include <sql_yacc.hh>
23 #include <lex.h>
24
25 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
26
27 /*
28 This is a tool used during build only,
29 so MY_MAX_TOKEN does not need to be exact,
30 only big enough to hold:
31 - 256 character terminal tokens
32 - YYNTOKENS named terminal tokens
33 from bison.
34 See also YYMAXUTOK.
35 */
36 #define MY_MAX_TOKEN 1100
37 /** Generated token. */
38 struct gen_lex_token_string
39 {
40 const char *m_token_string;
41 int m_token_length;
42 bool m_append_space;
43 bool m_start_expr;
44 };
45
46 gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
47 int max_token_seen= 0;
48
49 char char_tokens[256];
50
51 int tok_generic_value= 0;
52 int tok_generic_value_list= 0;
53 int tok_row_single_value= 0;
54 int tok_row_single_value_list= 0;
55 int tok_row_multiple_value= 0;
56 int tok_row_multiple_value_list= 0;
57 int tok_ident= 0;
58 int tok_unused= 0;
59
set_token(int tok,const char * str)60 void set_token(int tok, const char *str)
61 {
62 if (tok <= 0)
63 {
64 fprintf(stderr, "Bad token found\n");
65 exit(1);
66 }
67
68 if (tok > max_token_seen)
69 {
70 max_token_seen= tok;
71 }
72
73 if (max_token_seen >= MY_MAX_TOKEN)
74 {
75 fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n");
76 exit(1);
77 }
78
79 compiled_token_array[tok].m_token_string= str;
80 compiled_token_array[tok].m_token_length= (int)strlen(str);
81 compiled_token_array[tok].m_append_space= true;
82 compiled_token_array[tok].m_start_expr= false;
83 }
84
set_start_expr_token(int tok)85 void set_start_expr_token(int tok)
86 {
87 compiled_token_array[tok].m_start_expr= true;
88 }
89
compute_tokens()90 void compute_tokens()
91 {
92 int tok;
93 unsigned int i;
94 char *str;
95
96 /*
97 Default value.
98 */
99 for (tok= 0; tok < MY_MAX_TOKEN; tok++)
100 {
101 compiled_token_array[tok].m_token_string= "(unknown)";
102 compiled_token_array[tok].m_token_length= 9;
103 compiled_token_array[tok].m_append_space= true;
104 compiled_token_array[tok].m_start_expr= false;
105 }
106
107 /*
108 Tokens made of just one terminal character
109 */
110 for (tok=0; tok < 256; tok++)
111 {
112 str= & char_tokens[tok];
113 str[0]= (char) tok;
114 compiled_token_array[tok].m_token_string= str;
115 compiled_token_array[tok].m_token_length= 1;
116 compiled_token_array[tok].m_append_space= true;
117 }
118
119 max_token_seen= 255;
120
121 /*
122 String terminal tokens, used in sql_yacc.yy
123 */
124 set_token(NEG, "~");
125 set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY");
126
127 /*
128 Tokens hard coded in sql_lex.cc
129 */
130
131 set_token(WITH_CUBE_SYM, "WITH CUBE");
132 set_token(WITH_ROLLUP_SYM, "WITH ROLLUP");
133 set_token(WITH_SYSTEM_SYM, "WITH SYSTEM");
134 set_token(FOR_SYSTEM_TIME_SYM, "FOR SYSTEM_TIME");
135 set_token(VALUES_IN_SYM, "VALUES IN");
136 set_token(VALUES_LESS_SYM, "VALUES LESS");
137 set_token(NOT2_SYM, "!");
138 set_token(OR2_SYM, "|");
139 set_token(PARAM_MARKER, "?");
140 set_token(SET_VAR, ":=");
141 set_token(UNDERSCORE_CHARSET, "(_charset)");
142 set_token(END_OF_INPUT, "");
143
144 /*
145 Values.
146 These tokens are all normalized later,
147 so this strings will never be displayed.
148 */
149 set_token(BIN_NUM, "(bin)");
150 set_token(DECIMAL_NUM, "(decimal)");
151 set_token(FLOAT_NUM, "(float)");
152 set_token(HEX_NUM, "(hex)");
153 set_token(LEX_HOSTNAME, "(hostname)");
154 set_token(LONG_NUM, "(long)");
155 set_token(NUM, "(num)");
156 set_token(TEXT_STRING, "(text)");
157 set_token(NCHAR_STRING, "(nchar)");
158 set_token(ULONGLONG_NUM, "(ulonglong)");
159
160 /*
161 Identifiers.
162 */
163 set_token(IDENT, "(id)");
164 set_token(IDENT_QUOTED, "(id_quoted)");
165
166 /*
167 Unused tokens
168 */
169 set_token(LOCATOR_SYM, "LOCATOR");
170 set_token(SERVER_OPTIONS, "SERVER_OPTIONS");
171 set_token(UDF_RETURNS_SYM, "UDF_RETURNS");
172
173 /*
174 See symbols[] in sql/lex.h
175 */
176 for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++)
177 {
178 set_token(symbols[i].tok, symbols[i].name);
179 }
180
181 /*
182 See sql_functions[] in sql/lex.h
183 */
184 for (i= 0; i< sizeof(sql_functions)/sizeof(sql_functions[0]); i++)
185 {
186 set_token(sql_functions[i].tok, sql_functions[i].name);
187 }
188
189 /*
190 Additional FAKE tokens,
191 used internally to normalize a digest text.
192 */
193
194 max_token_seen++;
195 tok_generic_value= max_token_seen;
196 set_token(tok_generic_value, "?");
197
198 max_token_seen++;
199 tok_generic_value_list= max_token_seen;
200 set_token(tok_generic_value_list, "?, ...");
201
202 max_token_seen++;
203 tok_row_single_value= max_token_seen;
204 set_token(tok_row_single_value, "(?)");
205
206 max_token_seen++;
207 tok_row_single_value_list= max_token_seen;
208 set_token(tok_row_single_value_list, "(?) /* , ... */");
209
210 max_token_seen++;
211 tok_row_multiple_value= max_token_seen;
212 set_token(tok_row_multiple_value, "(...)");
213
214 max_token_seen++;
215 tok_row_multiple_value_list= max_token_seen;
216 set_token(tok_row_multiple_value_list, "(...) /* , ... */");
217
218 max_token_seen++;
219 tok_ident= max_token_seen;
220 set_token(tok_ident, "(tok_id)");
221
222 max_token_seen++;
223 tok_unused= max_token_seen;
224 set_token(tok_unused, "UNUSED");
225
226 /*
227 Fix whitespace for some special tokens.
228 */
229
230 /*
231 The lexer parses "@@variable" as '@', '@', 'variable',
232 returning a token for '@' alone.
233
234 This is incorrect, '@' is not really a token,
235 because the syntax "@ @ variable" (with spaces) is not accepted:
236 The lexer keeps some internal state after the '@' fake token.
237
238 To work around this, digest text are printed as "@@variable".
239 */
240 compiled_token_array[(int) '@'].m_append_space= false;
241
242 /*
243 Define additional properties for tokens.
244
245 List all the token that are followed by an expression.
246 This is needed to differentiate unary from binary
247 '+' and '-' operators, because we want to:
248 - reduce <unary +> <NUM> to <?>,
249 - preserve <...> <binary +> <NUM> as is.
250 */
251 set_start_expr_token('(');
252 set_start_expr_token(',');
253 set_start_expr_token(EVERY_SYM);
254 set_start_expr_token(AT_SYM);
255 set_start_expr_token(STARTS_SYM);
256 set_start_expr_token(ENDS_SYM);
257 set_start_expr_token(DEFAULT);
258 set_start_expr_token(RETURN_MARIADB_SYM);
259 set_start_expr_token(RETURN_ORACLE_SYM);
260 set_start_expr_token(IF_SYM);
261 set_start_expr_token(ELSEIF_MARIADB_SYM);
262 set_start_expr_token(ELSEIF_ORACLE_SYM);
263 set_start_expr_token(CASE_SYM);
264 set_start_expr_token(WHEN_SYM);
265 set_start_expr_token(WHILE_SYM);
266 set_start_expr_token(UNTIL_SYM);
267 set_start_expr_token(SELECT_SYM);
268
269 set_start_expr_token(OR_SYM);
270 set_start_expr_token(OR2_SYM);
271 set_start_expr_token(XOR);
272 set_start_expr_token(AND_SYM);
273 set_start_expr_token(AND_AND_SYM);
274 set_start_expr_token(NOT_SYM);
275 set_start_expr_token(BETWEEN_SYM);
276 set_start_expr_token(LIKE);
277 set_start_expr_token(REGEXP);
278
279 set_start_expr_token('|');
280 set_start_expr_token('&');
281 set_start_expr_token(SHIFT_LEFT);
282 set_start_expr_token(SHIFT_RIGHT);
283 set_start_expr_token('+');
284 set_start_expr_token('-');
285 set_start_expr_token(INTERVAL_SYM);
286 set_start_expr_token('*');
287 set_start_expr_token('/');
288 set_start_expr_token('%');
289 set_start_expr_token(DIV_SYM);
290 set_start_expr_token(MOD_SYM);
291 set_start_expr_token('^');
292 }
293
print_tokens()294 void print_tokens()
295 {
296 int tok;
297
298 printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
299 printf("lex_token_string lex_token_array[]=\n");
300 printf("{\n");
301 printf("/* PART 1: character tokens. */\n");
302
303 for (tok= 0; tok<256; tok++)
304 {
305 printf("/* %03d */ { \"\\x%02x\", 1, %s, %s},\n",
306 tok,
307 tok,
308 compiled_token_array[tok].m_append_space ? "true" : "false",
309 compiled_token_array[tok].m_start_expr ? "true" : "false");
310 }
311
312 printf("/* PART 2: named tokens. */\n");
313
314 for (tok= 256; tok<= max_token_seen; tok++)
315 {
316 printf("/* %03d */ { \"%s\", %d, %s, %s},\n",
317 tok,
318 compiled_token_array[tok].m_token_string,
319 compiled_token_array[tok].m_token_length,
320 compiled_token_array[tok].m_append_space ? "true" : "false",
321 compiled_token_array[tok].m_start_expr ? "true" : "false");
322 }
323
324 printf("/* DUMMY */ { \"\", 0, false, false}\n");
325 printf("};\n");
326 printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");
327
328 printf("/* DIGEST specific tokens. */\n");
329 printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value);
330 printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list);
331 printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value);
332 printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list);
333 printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value);
334 printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list);
335 printf("#define TOK_IDENT %d\n", tok_ident);
336 printf("#define TOK_UNUSED %d\n", tok_unused);
337 }
338
main(int argc,char ** argv)339 int main(int argc,char **argv)
340 {
341 puts("/*");
342 puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
343 puts("*/");
344
345 printf("/*\n");
346 printf(" This file is generated, do not edit.\n");
347 printf(" See file sql/gen_lex_token.cc.\n");
348 printf("*/\n");
349 printf("struct lex_token_string\n");
350 printf("{\n");
351 printf(" const char *m_token_string;\n");
352 printf(" int m_token_length;\n");
353 printf(" bool m_append_space;\n");
354 printf(" bool m_start_expr;\n");
355 printf("};\n");
356 printf("typedef struct lex_token_string lex_token_string;\n");
357
358 compute_tokens();
359 print_tokens();
360
361 return 0;
362 }
363
364