1 /*
2 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software Foundation,
22 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23
24 #include <my_global.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28
29 /* We only need the tokens here */
30 #define YYSTYPE_IS_DECLARED
31 #include <lex.h>
32
33 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
34
35 /*
36 This is a tool used during build only,
37 so MY_MAX_TOKEN does not need to be exact,
38 only big enough to hold:
39 - 256 character terminal tokens
40 - YYNTOKENS named terminal tokens
41 from bison.
42 See also YYMAXUTOK.
43 */
44 #define MY_MAX_TOKEN 1000
45 /** Generated token. */
46 struct gen_lex_token_string
47 {
48 const char *m_token_string;
49 int m_token_length;
50 bool m_append_space;
51 bool m_start_expr;
52 };
53
54 gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
55 int max_token_seen= 0;
56
57 char char_tokens[256];
58
59 int tok_generic_value= 0;
60 int tok_generic_value_list= 0;
61 int tok_row_single_value= 0;
62 int tok_row_single_value_list= 0;
63 int tok_row_multiple_value= 0;
64 int tok_row_multiple_value_list= 0;
65 int tok_ident= 0;
66 int tok_ident_at= 0; ///< Fake token for the left part of table@query_block.
67 int tok_hint_comment_open= 0; ///< Fake token value for "/*+" of hint comments.
68 int tok_hint_comment_close= 0; ///< Fake token value for "*/" of hint comments.
69 int tok_unused= 0;
70
71 /**
72 Adjustment value to translate hint parser's internal token values to generally
73 visible token values. This adjustment is necessary, since keyword token values
74 of separate parsers may interfere.
75 */
76 int tok_hint_adjust= 0;
77
set_token(int tok,const char * str)78 void set_token(int tok, const char *str)
79 {
80 if (tok <= 0)
81 {
82 fprintf(stderr, "Bad token found\n");
83 exit(1);
84 }
85
86 if (tok > max_token_seen)
87 {
88 max_token_seen= tok;
89 }
90
91 if (max_token_seen >= MY_MAX_TOKEN)
92 {
93 fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n");
94 exit(1);
95 }
96
97 compiled_token_array[tok].m_token_string= str;
98 compiled_token_array[tok].m_token_length= strlen(str);
99 compiled_token_array[tok].m_append_space= true;
100 compiled_token_array[tok].m_start_expr= false;
101 }
102
set_start_expr_token(int tok)103 void set_start_expr_token(int tok)
104 {
105 compiled_token_array[tok].m_start_expr= true;
106 }
107
compute_tokens()108 void compute_tokens()
109 {
110 int tok;
111 unsigned int i;
112 char *str;
113
114 /*
115 Default value.
116 */
117 for (tok= 0; tok < MY_MAX_TOKEN; tok++)
118 {
119 compiled_token_array[tok].m_token_string= "(unknown)";
120 compiled_token_array[tok].m_token_length= 9;
121 compiled_token_array[tok].m_append_space= true;
122 compiled_token_array[tok].m_start_expr= false;
123 }
124
125 /*
126 Tokens made of just one terminal character
127 */
128 for (tok=0; tok < 256; tok++)
129 {
130 str= & char_tokens[tok];
131 str[0]= (char) tok;
132 compiled_token_array[tok].m_token_string= str;
133 compiled_token_array[tok].m_token_length= 1;
134 compiled_token_array[tok].m_append_space= true;
135 }
136
137 max_token_seen= 255;
138
139 /*
140 String terminal tokens, used in sql_yacc.yy
141 */
142 set_token(NEG, "~");
143 set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY");
144
145 /*
146 Tokens hard coded in sql_lex.cc
147 */
148
149 set_token(WITH_CUBE_SYM, "WITH CUBE");
150 set_token(WITH_ROLLUP_SYM, "WITH ROLLUP");
151 set_token(NOT2_SYM, "!");
152 set_token(OR2_SYM, "|");
153 set_token(PARAM_MARKER, "?");
154 set_token(SET_VAR, ":=");
155 set_token(UNDERSCORE_CHARSET, "(_charset)");
156 set_token(END_OF_INPUT, "");
157 set_token(JSON_SEPARATOR_SYM, "->");
158 set_token(JSON_UNQUOTED_SEPARATOR_SYM, "->>");
159
160 /*
161 Values.
162 These tokens are all normalized later,
163 so this strings will never be displayed.
164 */
165 set_token(BIN_NUM, "(bin)");
166 set_token(DECIMAL_NUM, "(decimal)");
167 set_token(FLOAT_NUM, "(float)");
168 set_token(HEX_NUM, "(hex)");
169 set_token(LEX_HOSTNAME, "(hostname)");
170 set_token(LONG_NUM, "(long)");
171 set_token(NUM, "(num)");
172 set_token(TEXT_STRING, "(text)");
173 set_token(NCHAR_STRING, "(nchar)");
174 set_token(ULONGLONG_NUM, "(ulonglong)");
175
176 /*
177 Identifiers.
178 */
179 set_token(IDENT, "(id)");
180 set_token(IDENT_QUOTED, "(id_quoted)");
181
182 /*
183 Unused tokens
184 */
185 set_token(LOCATOR_SYM, "LOCATOR");
186 set_token(SERVER_OPTIONS, "SERVER_OPTIONS");
187 set_token(UDF_RETURNS_SYM, "UDF_RETURNS");
188
189 /*
190 See symbols[] in sql/lex.h
191 */
192 for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++)
193 {
194 if (!(symbols[i].group & SG_MAIN_PARSER))
195 continue;
196 set_token(symbols[i].tok, symbols[i].name);
197 }
198
199 /*
200 FAKE tokens to output "optimizer hint" keywords.
201
202 Hint keyword token values may interfere with token values of the main SQL
203 parser, so the tok_hint_adjust adjustment is needed to add them into
204 compiled_token_array and lex_token_array.
205
206 Also see the TOK_HINT_ADJUST() adjustment macro definition.
207 */
208 int tok_hint_min= INT_MAX;
209 for (unsigned int i= 0; i < sizeof(symbols)/sizeof(symbols[0]); i++)
210 {
211 if ((symbols[i].group & SG_HINTS) &&
212 static_cast<int>(symbols[i].tok) < tok_hint_min)
213 tok_hint_min= symbols[i].tok; // Calculate the minimal hint token value.
214 }
215 tok_hint_adjust= max_token_seen + 1 - tok_hint_min;
216 for (unsigned int i= 0; i < sizeof(symbols)/sizeof(symbols[0]); i++)
217 {
218 if (!(symbols[i].group & SG_HINTS))
219 continue;
220 set_token(symbols[i].tok + tok_hint_adjust, symbols[i].name);
221 }
222
223 /*
224 Additional FAKE tokens,
225 used internally to normalize a digest text.
226 */
227
228 max_token_seen++;
229 tok_generic_value= max_token_seen;
230 set_token(tok_generic_value, "?");
231
232 max_token_seen++;
233 tok_generic_value_list= max_token_seen;
234 set_token(tok_generic_value_list, "?, ...");
235
236 max_token_seen++;
237 tok_row_single_value= max_token_seen;
238 set_token(tok_row_single_value, "(?)");
239
240 max_token_seen++;
241 tok_row_single_value_list= max_token_seen;
242 set_token(tok_row_single_value_list, "(?) /* , ... */");
243
244 max_token_seen++;
245 tok_row_multiple_value= max_token_seen;
246 set_token(tok_row_multiple_value, "(...)");
247
248 max_token_seen++;
249 tok_row_multiple_value_list= max_token_seen;
250 set_token(tok_row_multiple_value_list, "(...) /* , ... */");
251
252 max_token_seen++;
253 tok_ident= max_token_seen;
254 set_token(tok_ident, "(tok_id)");
255
256 max_token_seen++;
257 tok_ident_at= max_token_seen;
258 set_token(tok_ident_at, "(tok_id_at)");
259
260 max_token_seen++;
261 tok_hint_comment_open= max_token_seen;
262 set_token(tok_hint_comment_open, HINT_COMMENT_STARTER);
263
264 max_token_seen++;
265 tok_hint_comment_close= max_token_seen;
266 set_token(tok_hint_comment_close, HINT_COMMENT_TERMINATOR);
267
268 max_token_seen++;
269 tok_unused= max_token_seen;
270 set_token(tok_unused, "UNUSED");
271
272 /*
273 Fix whitespace for some special tokens.
274 */
275
276 /*
277 The lexer parses "@@variable" as '@', '@', 'variable',
278 returning a token for '@' alone.
279
280 This is incorrect, '@' is not really a token,
281 because the syntax "@ @ variable" (with spaces) is not accepted:
282 The lexer keeps some internal state after the '@' fake token.
283
284 To work around this, digest text are printed as "@@variable".
285 */
286 compiled_token_array[(int) '@'].m_append_space= false;
287
288 /*
289 Define additional properties for tokens.
290
291 List all the token that are followed by an expression.
292 This is needed to differentiate unary from binary
293 '+' and '-' operators, because we want to:
294 - reduce <unary +> <NUM> to <?>,
295 - preserve <...> <binary +> <NUM> as is.
296 */
297 set_start_expr_token('(');
298 set_start_expr_token(',');
299 set_start_expr_token(EVERY_SYM);
300 set_start_expr_token(AT_SYM);
301 set_start_expr_token(STARTS_SYM);
302 set_start_expr_token(ENDS_SYM);
303 set_start_expr_token(DEFAULT);
304 set_start_expr_token(RETURN_SYM);
305 set_start_expr_token(IF);
306 set_start_expr_token(ELSEIF_SYM);
307 set_start_expr_token(CASE_SYM);
308 set_start_expr_token(WHEN_SYM);
309 set_start_expr_token(WHILE_SYM);
310 set_start_expr_token(UNTIL_SYM);
311 set_start_expr_token(SELECT_SYM);
312
313 set_start_expr_token(OR_SYM);
314 set_start_expr_token(OR2_SYM);
315 set_start_expr_token(XOR);
316 set_start_expr_token(AND_SYM);
317 set_start_expr_token(AND_AND_SYM);
318 set_start_expr_token(NOT_SYM);
319 set_start_expr_token(BETWEEN_SYM);
320 set_start_expr_token(LIKE);
321 set_start_expr_token(REGEXP);
322
323 set_start_expr_token('|');
324 set_start_expr_token('&');
325 set_start_expr_token(SHIFT_LEFT);
326 set_start_expr_token(SHIFT_RIGHT);
327 set_start_expr_token('+');
328 set_start_expr_token('-');
329 set_start_expr_token(INTERVAL_SYM);
330 set_start_expr_token('*');
331 set_start_expr_token('/');
332 set_start_expr_token('%');
333 set_start_expr_token(DIV_SYM);
334 set_start_expr_token(MOD_SYM);
335 set_start_expr_token('^');
336 }
337
print_tokens()338 void print_tokens()
339 {
340 int tok;
341
342 printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
343 printf("lex_token_string lex_token_array[]=\n");
344 printf("{\n");
345 printf("/* PART 1: character tokens. */\n");
346
347 for (tok= 0; tok<256; tok++)
348 {
349 printf("/* %03d */ { \"\\x%02x\", 1, %s, %s},\n",
350 tok,
351 tok,
352 compiled_token_array[tok].m_append_space ? "true" : "false",
353 compiled_token_array[tok].m_start_expr ? "true" : "false");
354 }
355
356 printf("/* PART 2: named tokens. */\n");
357
358 for (tok= 256; tok<= max_token_seen; tok++)
359 {
360 printf("/* %03d */ { \"%s\", %d, %s, %s},\n",
361 tok,
362 compiled_token_array[tok].m_token_string,
363 compiled_token_array[tok].m_token_length,
364 compiled_token_array[tok].m_append_space ? "true" : "false",
365 compiled_token_array[tok].m_start_expr ? "true" : "false");
366 }
367
368 printf("/* DUMMY */ { \"\", 0, false, false}\n");
369 printf("};\n");
370 printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");
371
372 printf("/* DIGEST specific tokens. */\n");
373 printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value);
374 printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list);
375 printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value);
376 printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list);
377 printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value);
378 printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list);
379 printf("#define TOK_IDENT %d\n", tok_ident);
380 printf("#define TOK_IDENT_AT %d\n", tok_ident_at);
381 printf("#define TOK_HINT_COMMENT_OPEN %d\n", tok_hint_comment_open);
382 printf("#define TOK_HINT_COMMENT_CLOSE %d\n", tok_hint_comment_close);
383 printf("#define TOK_HINT_ADJUST(x) ((x) + %d)\n", tok_hint_adjust);
384 printf("#define TOK_UNUSED %d\n", tok_unused);
385 }
386
main(int argc,char ** argv)387 int main(int argc,char **argv)
388 {
389 puts("/*");
390 puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
391 puts("*/");
392
393 printf("/*\n");
394 printf(" This file is generated, do not edit.\n");
395 printf(" See file sql/gen_lex_token.cc.\n");
396 printf("*/\n");
397 printf("struct lex_token_string\n");
398 printf("{\n");
399 printf(" const char *m_token_string;\n");
400 printf(" int m_token_length;\n");
401 printf(" bool m_append_space;\n");
402 printf(" bool m_start_expr;\n");
403 printf("};\n");
404 printf("typedef struct lex_token_string lex_token_string;\n");
405
406 compute_tokens();
407 print_tokens();
408
409 return 0;
410 }
411
412