1 /*
2    Copyright (c) 2011, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software Foundation,
22    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23 
24 #include <my_global.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 
29 /* We only need the tokens here */
30 #define YYSTYPE_IS_DECLARED
31 #include <lex.h>
32 
33 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
34 
35 /*
36   This is a tool used during build only,
37   so MY_MAX_TOKEN does not need to be exact,
38   only big enough to hold:
39   - 256 character terminal tokens
40   - YYNTOKENS named terminal tokens
41   from bison.
42   See also YYMAXUTOK.
43 */
44 #define MY_MAX_TOKEN 1000
45 /** Generated token. */
46 struct gen_lex_token_string
47 {
48   const char *m_token_string;
49   int m_token_length;
50   bool m_append_space;
51   bool m_start_expr;
52 };
53 
54 gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
55 int max_token_seen= 0;
56 
57 char char_tokens[256];
58 
59 int tok_generic_value= 0;
60 int tok_generic_value_list= 0;
61 int tok_row_single_value= 0;
62 int tok_row_single_value_list= 0;
63 int tok_row_multiple_value= 0;
64 int tok_row_multiple_value_list= 0;
65 int tok_ident= 0;
66 int tok_ident_at= 0; ///< Fake token for the left part of table@query_block.
67 int tok_hint_comment_open= 0; ///< Fake token value for "/*+" of hint comments.
68 int tok_hint_comment_close= 0; ///< Fake token value for "*/" of hint comments.
69 int tok_unused= 0;
70 
71 /**
72   Adjustment value to translate hint parser's internal token values to generally
73   visible token values. This adjustment is necessary, since keyword token values
74   of separate parsers may interfere.
75 */
76 int tok_hint_adjust= 0;
77 
set_token(int tok,const char * str)78 void set_token(int tok, const char *str)
79 {
80   if (tok <= 0)
81   {
82     fprintf(stderr, "Bad token found\n");
83     exit(1);
84   }
85 
86   if (tok > max_token_seen)
87   {
88     max_token_seen= tok;
89   }
90 
91   if (max_token_seen >= MY_MAX_TOKEN)
92   {
93     fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n");
94     exit(1);
95   }
96 
97   compiled_token_array[tok].m_token_string= str;
98   compiled_token_array[tok].m_token_length= strlen(str);
99   compiled_token_array[tok].m_append_space= true;
100   compiled_token_array[tok].m_start_expr= false;
101 }
102 
set_start_expr_token(int tok)103 void set_start_expr_token(int tok)
104 {
105   compiled_token_array[tok].m_start_expr= true;
106 }
107 
compute_tokens()108 void compute_tokens()
109 {
110   int tok;
111   unsigned int i;
112   char *str;
113 
114   /*
115     Default value.
116   */
117   for (tok= 0; tok < MY_MAX_TOKEN; tok++)
118   {
119     compiled_token_array[tok].m_token_string= "(unknown)";
120     compiled_token_array[tok].m_token_length= 9;
121     compiled_token_array[tok].m_append_space= true;
122     compiled_token_array[tok].m_start_expr= false;
123   }
124 
125   /*
126     Tokens made of just one terminal character
127   */
128   for (tok=0; tok < 256; tok++)
129   {
130     str= & char_tokens[tok];
131     str[0]= (char) tok;
132     compiled_token_array[tok].m_token_string= str;
133     compiled_token_array[tok].m_token_length= 1;
134     compiled_token_array[tok].m_append_space= true;
135   }
136 
137   max_token_seen= 255;
138 
139   /*
140     String terminal tokens, used in sql_yacc.yy
141   */
142   set_token(NEG, "~");
143   set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY");
144 
145   /*
146     Tokens hard coded in sql_lex.cc
147   */
148 
149   set_token(WITH_CUBE_SYM, "WITH CUBE");
150   set_token(WITH_ROLLUP_SYM, "WITH ROLLUP");
151   set_token(NOT2_SYM, "!");
152   set_token(OR2_SYM, "|");
153   set_token(PARAM_MARKER, "?");
154   set_token(SET_VAR, ":=");
155   set_token(UNDERSCORE_CHARSET, "(_charset)");
156   set_token(END_OF_INPUT, "");
157   set_token(JSON_SEPARATOR_SYM, "->");
158   set_token(JSON_UNQUOTED_SEPARATOR_SYM, "->>");
159 
160   /*
161     Values.
162     These tokens are all normalized later,
163     so this strings will never be displayed.
164   */
165   set_token(BIN_NUM, "(bin)");
166   set_token(DECIMAL_NUM, "(decimal)");
167   set_token(FLOAT_NUM, "(float)");
168   set_token(HEX_NUM, "(hex)");
169   set_token(LEX_HOSTNAME, "(hostname)");
170   set_token(LONG_NUM, "(long)");
171   set_token(NUM, "(num)");
172   set_token(TEXT_STRING, "(text)");
173   set_token(NCHAR_STRING, "(nchar)");
174   set_token(ULONGLONG_NUM, "(ulonglong)");
175 
176   /*
177     Identifiers.
178   */
179   set_token(IDENT, "(id)");
180   set_token(IDENT_QUOTED, "(id_quoted)");
181 
182   /*
183     Unused tokens
184   */
185   set_token(LOCATOR_SYM, "LOCATOR");
186   set_token(SERVER_OPTIONS, "SERVER_OPTIONS");
187   set_token(UDF_RETURNS_SYM, "UDF_RETURNS");
188 
189   /*
190     See symbols[] in sql/lex.h
191   */
192   for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++)
193   {
194     if (!(symbols[i].group & SG_MAIN_PARSER))
195       continue;
196     set_token(symbols[i].tok, symbols[i].name);
197   }
198 
199   /*
200     FAKE tokens to output "optimizer hint" keywords.
201 
202     Hint keyword token values may interfere with token values of the main SQL
203     parser, so the tok_hint_adjust adjustment is needed to add them into
204     compiled_token_array and lex_token_array.
205 
206     Also see the TOK_HINT_ADJUST() adjustment macro definition.
207   */
208   int tok_hint_min= INT_MAX;
209   for (unsigned int i= 0; i < sizeof(symbols)/sizeof(symbols[0]); i++)
210   {
211     if ((symbols[i].group & SG_HINTS) &&
212         static_cast<int>(symbols[i].tok) < tok_hint_min)
213       tok_hint_min= symbols[i].tok; // Calculate the minimal hint token value.
214   }
215   tok_hint_adjust= max_token_seen + 1 - tok_hint_min;
216   for (unsigned int i= 0; i < sizeof(symbols)/sizeof(symbols[0]); i++)
217   {
218     if (!(symbols[i].group & SG_HINTS))
219       continue;
220     set_token(symbols[i].tok + tok_hint_adjust, symbols[i].name);
221   }
222 
223   /*
224     Additional FAKE tokens,
225     used internally to normalize a digest text.
226   */
227 
228   max_token_seen++;
229   tok_generic_value= max_token_seen;
230   set_token(tok_generic_value, "?");
231 
232   max_token_seen++;
233   tok_generic_value_list= max_token_seen;
234   set_token(tok_generic_value_list, "?, ...");
235 
236   max_token_seen++;
237   tok_row_single_value= max_token_seen;
238   set_token(tok_row_single_value, "(?)");
239 
240   max_token_seen++;
241   tok_row_single_value_list= max_token_seen;
242   set_token(tok_row_single_value_list, "(?) /* , ... */");
243 
244   max_token_seen++;
245   tok_row_multiple_value= max_token_seen;
246   set_token(tok_row_multiple_value, "(...)");
247 
248   max_token_seen++;
249   tok_row_multiple_value_list= max_token_seen;
250   set_token(tok_row_multiple_value_list, "(...) /* , ... */");
251 
252   max_token_seen++;
253   tok_ident= max_token_seen;
254   set_token(tok_ident, "(tok_id)");
255 
256   max_token_seen++;
257   tok_ident_at= max_token_seen;
258   set_token(tok_ident_at, "(tok_id_at)");
259 
260   max_token_seen++;
261   tok_hint_comment_open= max_token_seen;
262   set_token(tok_hint_comment_open, HINT_COMMENT_STARTER);
263 
264   max_token_seen++;
265   tok_hint_comment_close= max_token_seen;
266   set_token(tok_hint_comment_close, HINT_COMMENT_TERMINATOR);
267 
268   max_token_seen++;
269   tok_unused= max_token_seen;
270   set_token(tok_unused, "UNUSED");
271 
272   /*
273     Fix whitespace for some special tokens.
274   */
275 
276   /*
277     The lexer parses "@@variable" as '@', '@', 'variable',
278     returning a token for '@' alone.
279 
280     This is incorrect, '@' is not really a token,
281     because the syntax "@ @ variable" (with spaces) is not accepted:
282     The lexer keeps some internal state after the '@' fake token.
283 
284     To work around this, digest text are printed as "@@variable".
285   */
286   compiled_token_array[(int) '@'].m_append_space= false;
287 
288   /*
289     Define additional properties for tokens.
290 
291     List all the token that are followed by an expression.
292     This is needed to differentiate unary from binary
293     '+' and '-' operators, because we want to:
294     - reduce <unary +> <NUM> to <?>,
295     - preserve <...> <binary +> <NUM> as is.
296   */
297   set_start_expr_token('(');
298   set_start_expr_token(',');
299   set_start_expr_token(EVERY_SYM);
300   set_start_expr_token(AT_SYM);
301   set_start_expr_token(STARTS_SYM);
302   set_start_expr_token(ENDS_SYM);
303   set_start_expr_token(DEFAULT);
304   set_start_expr_token(RETURN_SYM);
305   set_start_expr_token(IF);
306   set_start_expr_token(ELSEIF_SYM);
307   set_start_expr_token(CASE_SYM);
308   set_start_expr_token(WHEN_SYM);
309   set_start_expr_token(WHILE_SYM);
310   set_start_expr_token(UNTIL_SYM);
311   set_start_expr_token(SELECT_SYM);
312 
313   set_start_expr_token(OR_SYM);
314   set_start_expr_token(OR2_SYM);
315   set_start_expr_token(XOR);
316   set_start_expr_token(AND_SYM);
317   set_start_expr_token(AND_AND_SYM);
318   set_start_expr_token(NOT_SYM);
319   set_start_expr_token(BETWEEN_SYM);
320   set_start_expr_token(LIKE);
321   set_start_expr_token(REGEXP);
322 
323   set_start_expr_token('|');
324   set_start_expr_token('&');
325   set_start_expr_token(SHIFT_LEFT);
326   set_start_expr_token(SHIFT_RIGHT);
327   set_start_expr_token('+');
328   set_start_expr_token('-');
329   set_start_expr_token(INTERVAL_SYM);
330   set_start_expr_token('*');
331   set_start_expr_token('/');
332   set_start_expr_token('%');
333   set_start_expr_token(DIV_SYM);
334   set_start_expr_token(MOD_SYM);
335   set_start_expr_token('^');
336 }
337 
print_tokens()338 void print_tokens()
339 {
340   int tok;
341 
342   printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
343   printf("lex_token_string lex_token_array[]=\n");
344   printf("{\n");
345   printf("/* PART 1: character tokens. */\n");
346 
347   for (tok= 0; tok<256; tok++)
348   {
349     printf("/* %03d */  { \"\\x%02x\", 1, %s, %s},\n",
350            tok,
351            tok,
352            compiled_token_array[tok].m_append_space ? "true" : "false",
353            compiled_token_array[tok].m_start_expr ? "true" : "false");
354   }
355 
356   printf("/* PART 2: named tokens. */\n");
357 
358   for (tok= 256; tok<= max_token_seen; tok++)
359   {
360     printf("/* %03d */  { \"%s\", %d, %s, %s},\n",
361            tok,
362            compiled_token_array[tok].m_token_string,
363            compiled_token_array[tok].m_token_length,
364            compiled_token_array[tok].m_append_space ? "true" : "false",
365            compiled_token_array[tok].m_start_expr ? "true" : "false");
366   }
367 
368   printf("/* DUMMY */ { \"\", 0, false, false}\n");
369   printf("};\n");
370   printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");
371 
372   printf("/* DIGEST specific tokens. */\n");
373   printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value);
374   printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list);
375   printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value);
376   printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list);
377   printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value);
378   printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list);
379   printf("#define TOK_IDENT %d\n", tok_ident);
380   printf("#define TOK_IDENT_AT %d\n", tok_ident_at);
381   printf("#define TOK_HINT_COMMENT_OPEN %d\n", tok_hint_comment_open);
382   printf("#define TOK_HINT_COMMENT_CLOSE %d\n", tok_hint_comment_close);
383   printf("#define TOK_HINT_ADJUST(x) ((x) + %d)\n", tok_hint_adjust);
384   printf("#define TOK_UNUSED %d\n", tok_unused);
385 }
386 
main(int argc,char ** argv)387 int main(int argc,char **argv)
388 {
389   puts("/*");
390   puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
391   puts("*/");
392 
393   printf("/*\n");
394   printf("  This file is generated, do not edit.\n");
395   printf("  See file sql/gen_lex_token.cc.\n");
396   printf("*/\n");
397   printf("struct lex_token_string\n");
398   printf("{\n");
399   printf("  const char *m_token_string;\n");
400   printf("  int m_token_length;\n");
401   printf("  bool m_append_space;\n");
402   printf("  bool m_start_expr;\n");
403   printf("};\n");
404   printf("typedef struct lex_token_string lex_token_string;\n");
405 
406   compute_tokens();
407   print_tokens();
408 
409   return 0;
410 }
411 
412