1 /*
2    Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software Foundation,
22    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23 
24 #include <my_global.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 
29 /* We only need the tokens here */
30 #define YYSTYPE_IS_DECLARED
31 #include <sql_yacc.h>
32 #include <lex.h>
33 
34 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
35 
36 /*
37   This is a tool used during build only,
38   so MY_MAX_TOKEN does not need to be exact,
39   only big enough to hold:
40   - 256 character terminal tokens
41   - YYNTOKENS named terminal tokens
42   from bison.
43   See also YYMAXUTOK.
44 */
45 #define MY_MAX_TOKEN 1000
46 /** Generated token. */
47 struct gen_lex_token_string
48 {
49   const char *m_token_string;
50   int m_token_length;
51   bool m_append_space;
52   bool m_start_expr;
53 };
54 
55 gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
56 int max_token_seen= 0;
57 
58 char char_tokens[256];
59 
60 int tok_generic_value= 0;
61 int tok_generic_value_list= 0;
62 int tok_row_single_value= 0;
63 int tok_row_single_value_list= 0;
64 int tok_row_multiple_value= 0;
65 int tok_row_multiple_value_list= 0;
66 int tok_ident= 0;
67 int tok_unused= 0;
68 
set_token(int tok,const char * str)69 void set_token(int tok, const char *str)
70 {
71   if (tok <= 0)
72   {
73     fprintf(stderr, "Bad token found\n");
74     exit(1);
75   }
76 
77   if (tok > max_token_seen)
78   {
79     max_token_seen= tok;
80   }
81 
82   if (max_token_seen >= MY_MAX_TOKEN)
83   {
84     fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n");
85     exit(1);
86   }
87 
88   compiled_token_array[tok].m_token_string= str;
89   compiled_token_array[tok].m_token_length= strlen(str);
90   compiled_token_array[tok].m_append_space= true;
91   compiled_token_array[tok].m_start_expr= false;
92 }
93 
set_start_expr_token(int tok)94 void set_start_expr_token(int tok)
95 {
96   compiled_token_array[tok].m_start_expr= true;
97 }
98 
compute_tokens()99 void compute_tokens()
100 {
101   int tok;
102   unsigned int i;
103   char *str;
104 
105   /*
106     Default value.
107   */
108   for (tok= 0; tok < MY_MAX_TOKEN; tok++)
109   {
110     compiled_token_array[tok].m_token_string= "(unknown)";
111     compiled_token_array[tok].m_token_length= 9;
112     compiled_token_array[tok].m_append_space= true;
113     compiled_token_array[tok].m_start_expr= false;
114   }
115 
116   /*
117     Tokens made of just one terminal character
118   */
119   for (tok=0; tok < 256; tok++)
120   {
121     str= & char_tokens[tok];
122     str[0]= (char) tok;
123     compiled_token_array[tok].m_token_string= str;
124     compiled_token_array[tok].m_token_length= 1;
125     compiled_token_array[tok].m_append_space= true;
126   }
127 
128   max_token_seen= 255;
129 
130   /*
131     String terminal tokens, used in sql_yacc.yy
132   */
133   set_token(NEG, "~");
134   set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY");
135 
136   /*
137     Tokens hard coded in sql_lex.cc
138   */
139 
140   set_token(WITH_CUBE_SYM, "WITH CUBE");
141   set_token(WITH_ROLLUP_SYM, "WITH ROLLUP");
142   set_token(NOT2_SYM, "!");
143   set_token(OR2_SYM, "|");
144   set_token(PARAM_MARKER, "?");
145   set_token(SET_VAR, ":=");
146   set_token(UNDERSCORE_CHARSET, "(_charset)");
147   set_token(END_OF_INPUT, "");
148 
149   /*
150     Values.
151     These tokens are all normalized later,
152     so this strings will never be displayed.
153   */
154   set_token(BIN_NUM, "(bin)");
155   set_token(DECIMAL_NUM, "(decimal)");
156   set_token(FLOAT_NUM, "(float)");
157   set_token(HEX_NUM, "(hex)");
158   set_token(LEX_HOSTNAME, "(hostname)");
159   set_token(LONG_NUM, "(long)");
160   set_token(NUM, "(num)");
161   set_token(TEXT_STRING, "(text)");
162   set_token(NCHAR_STRING, "(nchar)");
163   set_token(ULONGLONG_NUM, "(ulonglong)");
164 
165   /*
166     Identifiers.
167   */
168   set_token(IDENT, "(id)");
169   set_token(IDENT_QUOTED, "(id_quoted)");
170 
171   /*
172     Unused tokens
173   */
174   set_token(LOCATOR_SYM, "LOCATOR");
175   set_token(SERVER_OPTIONS, "SERVER_OPTIONS");
176   set_token(UDF_RETURNS_SYM, "UDF_RETURNS");
177 
178   /*
179     See symbols[] in sql/lex.h
180   */
181   for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++)
182   {
183     set_token(symbols[i].tok, symbols[i].name);
184   }
185 
186   /*
187     See sql_functions[] in sql/lex.h
188   */
189   for (i= 0; i< sizeof(sql_functions)/sizeof(sql_functions[0]); i++)
190   {
191     set_token(sql_functions[i].tok, sql_functions[i].name);
192   }
193 
194   /*
195     Additional FAKE tokens,
196     used internally to normalize a digest text.
197   */
198 
199   max_token_seen++;
200   tok_generic_value= max_token_seen;
201   set_token(tok_generic_value, "?");
202 
203   max_token_seen++;
204   tok_generic_value_list= max_token_seen;
205   set_token(tok_generic_value_list, "?, ...");
206 
207   max_token_seen++;
208   tok_row_single_value= max_token_seen;
209   set_token(tok_row_single_value, "(?)");
210 
211   max_token_seen++;
212   tok_row_single_value_list= max_token_seen;
213   set_token(tok_row_single_value_list, "(?) /* , ... */");
214 
215   max_token_seen++;
216   tok_row_multiple_value= max_token_seen;
217   set_token(tok_row_multiple_value, "(...)");
218 
219   max_token_seen++;
220   tok_row_multiple_value_list= max_token_seen;
221   set_token(tok_row_multiple_value_list, "(...) /* , ... */");
222 
223   max_token_seen++;
224   tok_ident= max_token_seen;
225   set_token(tok_ident, "(tok_id)");
226 
227   max_token_seen++;
228   tok_unused= max_token_seen;
229   set_token(tok_unused, "UNUSED");
230 
231   /*
232     Fix whitespace for some special tokens.
233   */
234 
235   /*
236     The lexer parses "@@variable" as '@', '@', 'variable',
237     returning a token for '@' alone.
238 
239     This is incorrect, '@' is not really a token,
240     because the syntax "@ @ variable" (with spaces) is not accepted:
241     The lexer keeps some internal state after the '@' fake token.
242 
243     To work around this, digest text are printed as "@@variable".
244   */
245   compiled_token_array[(int) '@'].m_append_space= false;
246 
247   /*
248     Define additional properties for tokens.
249 
250     List all the token that are followed by an expression.
251     This is needed to differentiate unary from binary
252     '+' and '-' operators, because we want to:
253     - reduce <unary +> <NUM> to <?>,
254     - preserve <...> <binary +> <NUM> as is.
255   */
256   set_start_expr_token('(');
257   set_start_expr_token(',');
258   set_start_expr_token(EVERY_SYM);
259   set_start_expr_token(AT_SYM);
260   set_start_expr_token(STARTS_SYM);
261   set_start_expr_token(ENDS_SYM);
262   set_start_expr_token(DEFAULT);
263   set_start_expr_token(RETURN_SYM);
264   set_start_expr_token(IF);
265   set_start_expr_token(ELSEIF_SYM);
266   set_start_expr_token(CASE_SYM);
267   set_start_expr_token(WHEN_SYM);
268   set_start_expr_token(WHILE_SYM);
269   set_start_expr_token(UNTIL_SYM);
270   set_start_expr_token(SELECT_SYM);
271 
272   set_start_expr_token(OR_SYM);
273   set_start_expr_token(OR2_SYM);
274   set_start_expr_token(XOR);
275   set_start_expr_token(AND_SYM);
276   set_start_expr_token(AND_AND_SYM);
277   set_start_expr_token(NOT_SYM);
278   set_start_expr_token(BETWEEN_SYM);
279   set_start_expr_token(LIKE);
280   set_start_expr_token(REGEXP);
281 
282   set_start_expr_token('|');
283   set_start_expr_token('&');
284   set_start_expr_token(SHIFT_LEFT);
285   set_start_expr_token(SHIFT_RIGHT);
286   set_start_expr_token('+');
287   set_start_expr_token('-');
288   set_start_expr_token(INTERVAL_SYM);
289   set_start_expr_token('*');
290   set_start_expr_token('/');
291   set_start_expr_token('%');
292   set_start_expr_token(DIV_SYM);
293   set_start_expr_token(MOD_SYM);
294   set_start_expr_token('^');
295 }
296 
print_tokens()297 void print_tokens()
298 {
299   int tok;
300 
301   printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
302   printf("lex_token_string lex_token_array[]=\n");
303   printf("{\n");
304   printf("/* PART 1: character tokens. */\n");
305 
306   for (tok= 0; tok<256; tok++)
307   {
308     printf("/* %03d */  { \"\\x%02x\", 1, %s, %s},\n",
309            tok,
310            tok,
311            compiled_token_array[tok].m_append_space ? "true" : "false",
312            compiled_token_array[tok].m_start_expr ? "true" : "false");
313   }
314 
315   printf("/* PART 2: named tokens. */\n");
316 
317   for (tok= 256; tok<= max_token_seen; tok++)
318   {
319     printf("/* %03d */  { \"%s\", %d, %s, %s},\n",
320            tok,
321            compiled_token_array[tok].m_token_string,
322            compiled_token_array[tok].m_token_length,
323            compiled_token_array[tok].m_append_space ? "true" : "false",
324            compiled_token_array[tok].m_start_expr ? "true" : "false");
325   }
326 
327   printf("/* DUMMY */ { \"\", 0, false, false}\n");
328   printf("};\n");
329   printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");
330 
331   printf("/* DIGEST specific tokens. */\n");
332   printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value);
333   printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list);
334   printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value);
335   printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list);
336   printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value);
337   printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list);
338   printf("#define TOK_IDENT %d\n", tok_ident);
339   printf("#define TOK_UNUSED %d\n", tok_unused);
340 }
341 
main(int argc,char ** argv)342 int main(int argc,char **argv)
343 {
344   puts("/*");
345   puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
346   puts("*/");
347 
348   printf("/*\n");
349   printf("  This file is generated, do not edit.\n");
350   printf("  See file sql/gen_lex_token.cc.\n");
351   printf("*/\n");
352   printf("struct lex_token_string\n");
353   printf("{\n");
354   printf("  const char *m_token_string;\n");
355   printf("  int m_token_length;\n");
356   printf("  bool m_append_space;\n");
357   printf("  bool m_start_expr;\n");
358   printf("};\n");
359   printf("typedef struct lex_token_string lex_token_string;\n");
360 
361   compute_tokens();
362   print_tokens();
363 
364   return 0;
365 }
366 
367