1 /*
2    Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software Foundation,
15    51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16 
17 #include "mariadb.h"
18 #include <string.h>
19 
20 /* We only need the tokens here */
21 #define YYSTYPE_IS_DECLARED
22 #include <sql_yacc.hh>
23 #include <lex.h>
24 
25 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
26 
27 /*
28   This is a tool used during build only,
29   so MY_MAX_TOKEN does not need to be exact,
30   only big enough to hold:
31   - 256 character terminal tokens
32   - YYNTOKENS named terminal tokens
33   from bison.
34   See also YYMAXUTOK.
35 */
36 #define MY_MAX_TOKEN 1100
37 /** Generated token. */
38 struct gen_lex_token_string
39 {
40   const char *m_token_string;
41   int m_token_length;
42   bool m_append_space;
43   bool m_start_expr;
44 };
45 
46 gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
47 int max_token_seen= 0;
48 
49 char char_tokens[256];
50 
51 int tok_generic_value= 0;
52 int tok_generic_value_list= 0;
53 int tok_row_single_value= 0;
54 int tok_row_single_value_list= 0;
55 int tok_row_multiple_value= 0;
56 int tok_row_multiple_value_list= 0;
57 int tok_ident= 0;
58 int tok_unused= 0;
59 
set_token(int tok,const char * str)60 void set_token(int tok, const char *str)
61 {
62   if (tok <= 0)
63   {
64     fprintf(stderr, "Bad token found\n");
65     exit(1);
66   }
67 
68   if (tok > max_token_seen)
69   {
70     max_token_seen= tok;
71   }
72 
73   if (max_token_seen >= MY_MAX_TOKEN)
74   {
75     fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n");
76     exit(1);
77   }
78 
79   compiled_token_array[tok].m_token_string= str;
80   compiled_token_array[tok].m_token_length= (int)strlen(str);
81   compiled_token_array[tok].m_append_space= true;
82   compiled_token_array[tok].m_start_expr= false;
83 }
84 
set_start_expr_token(int tok)85 void set_start_expr_token(int tok)
86 {
87   compiled_token_array[tok].m_start_expr= true;
88 }
89 
compute_tokens()90 void compute_tokens()
91 {
92   int tok;
93   unsigned int i;
94   char *str;
95 
96   /*
97     Default value.
98   */
99   for (tok= 0; tok < MY_MAX_TOKEN; tok++)
100   {
101     compiled_token_array[tok].m_token_string= "(unknown)";
102     compiled_token_array[tok].m_token_length= 9;
103     compiled_token_array[tok].m_append_space= true;
104     compiled_token_array[tok].m_start_expr= false;
105   }
106 
107   /*
108     Tokens made of just one terminal character
109   */
110   for (tok=0; tok < 256; tok++)
111   {
112     str= & char_tokens[tok];
113     str[0]= (char) tok;
114     compiled_token_array[tok].m_token_string= str;
115     compiled_token_array[tok].m_token_length= 1;
116     compiled_token_array[tok].m_append_space= true;
117   }
118 
119   max_token_seen= 255;
120 
121   /*
122     String terminal tokens, used in sql_yacc.yy
123   */
124   set_token(NEG, "~");
125   set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY");
126 
127   /*
128     Tokens hard coded in sql_lex.cc
129   */
130 
131   set_token(WITH_CUBE_SYM, "WITH CUBE");
132   set_token(WITH_ROLLUP_SYM, "WITH ROLLUP");
133   set_token(WITH_SYSTEM_SYM, "WITH SYSTEM");
134   set_token(FOR_SYSTEM_TIME_SYM, "FOR SYSTEM_TIME");
135   set_token(VALUES_IN_SYM, "VALUES IN");
136   set_token(VALUES_LESS_SYM, "VALUES LESS");
137   set_token(NOT2_SYM, "!");
138   set_token(OR2_SYM, "|");
139   set_token(PARAM_MARKER, "?");
140   set_token(SET_VAR, ":=");
141   set_token(UNDERSCORE_CHARSET, "(_charset)");
142   set_token(END_OF_INPUT, "");
143 
144   /*
145     Values.
146     These tokens are all normalized later,
147     so this strings will never be displayed.
148   */
149   set_token(BIN_NUM, "(bin)");
150   set_token(DECIMAL_NUM, "(decimal)");
151   set_token(FLOAT_NUM, "(float)");
152   set_token(HEX_NUM, "(hex)");
153   set_token(LEX_HOSTNAME, "(hostname)");
154   set_token(LONG_NUM, "(long)");
155   set_token(NUM, "(num)");
156   set_token(TEXT_STRING, "(text)");
157   set_token(NCHAR_STRING, "(nchar)");
158   set_token(ULONGLONG_NUM, "(ulonglong)");
159 
160   /*
161     Identifiers.
162   */
163   set_token(IDENT, "(id)");
164   set_token(IDENT_QUOTED, "(id_quoted)");
165 
166   /*
167     Unused tokens
168   */
169   set_token(LOCATOR_SYM, "LOCATOR");
170   set_token(SERVER_OPTIONS, "SERVER_OPTIONS");
171   set_token(UDF_RETURNS_SYM, "UDF_RETURNS");
172 
173   /*
174     See symbols[] in sql/lex.h
175   */
176   for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++)
177   {
178     set_token(symbols[i].tok, symbols[i].name);
179   }
180 
181   /*
182     See sql_functions[] in sql/lex.h
183   */
184   for (i= 0; i< sizeof(sql_functions)/sizeof(sql_functions[0]); i++)
185   {
186     set_token(sql_functions[i].tok, sql_functions[i].name);
187   }
188 
189   /*
190     Additional FAKE tokens,
191     used internally to normalize a digest text.
192   */
193 
194   max_token_seen++;
195   tok_generic_value= max_token_seen;
196   set_token(tok_generic_value, "?");
197 
198   max_token_seen++;
199   tok_generic_value_list= max_token_seen;
200   set_token(tok_generic_value_list, "?, ...");
201 
202   max_token_seen++;
203   tok_row_single_value= max_token_seen;
204   set_token(tok_row_single_value, "(?)");
205 
206   max_token_seen++;
207   tok_row_single_value_list= max_token_seen;
208   set_token(tok_row_single_value_list, "(?) /* , ... */");
209 
210   max_token_seen++;
211   tok_row_multiple_value= max_token_seen;
212   set_token(tok_row_multiple_value, "(...)");
213 
214   max_token_seen++;
215   tok_row_multiple_value_list= max_token_seen;
216   set_token(tok_row_multiple_value_list, "(...) /* , ... */");
217 
218   max_token_seen++;
219   tok_ident= max_token_seen;
220   set_token(tok_ident, "(tok_id)");
221 
222   max_token_seen++;
223   tok_unused= max_token_seen;
224   set_token(tok_unused, "UNUSED");
225 
226   /*
227     Fix whitespace for some special tokens.
228   */
229 
230   /*
231     The lexer parses "@@variable" as '@', '@', 'variable',
232     returning a token for '@' alone.
233 
234     This is incorrect, '@' is not really a token,
235     because the syntax "@ @ variable" (with spaces) is not accepted:
236     The lexer keeps some internal state after the '@' fake token.
237 
238     To work around this, digest text are printed as "@@variable".
239   */
240   compiled_token_array[(int) '@'].m_append_space= false;
241 
242   /*
243     Define additional properties for tokens.
244 
245     List all the token that are followed by an expression.
246     This is needed to differentiate unary from binary
247     '+' and '-' operators, because we want to:
248     - reduce <unary +> <NUM> to <?>,
249     - preserve <...> <binary +> <NUM> as is.
250   */
251   set_start_expr_token('(');
252   set_start_expr_token(',');
253   set_start_expr_token(EVERY_SYM);
254   set_start_expr_token(AT_SYM);
255   set_start_expr_token(STARTS_SYM);
256   set_start_expr_token(ENDS_SYM);
257   set_start_expr_token(DEFAULT);
258   set_start_expr_token(RETURN_MARIADB_SYM);
259   set_start_expr_token(RETURN_ORACLE_SYM);
260   set_start_expr_token(IF_SYM);
261   set_start_expr_token(ELSEIF_MARIADB_SYM);
262   set_start_expr_token(ELSEIF_ORACLE_SYM);
263   set_start_expr_token(CASE_SYM);
264   set_start_expr_token(WHEN_SYM);
265   set_start_expr_token(WHILE_SYM);
266   set_start_expr_token(UNTIL_SYM);
267   set_start_expr_token(SELECT_SYM);
268 
269   set_start_expr_token(OR_SYM);
270   set_start_expr_token(OR2_SYM);
271   set_start_expr_token(XOR);
272   set_start_expr_token(AND_SYM);
273   set_start_expr_token(AND_AND_SYM);
274   set_start_expr_token(NOT_SYM);
275   set_start_expr_token(BETWEEN_SYM);
276   set_start_expr_token(LIKE);
277   set_start_expr_token(REGEXP);
278 
279   set_start_expr_token('|');
280   set_start_expr_token('&');
281   set_start_expr_token(SHIFT_LEFT);
282   set_start_expr_token(SHIFT_RIGHT);
283   set_start_expr_token('+');
284   set_start_expr_token('-');
285   set_start_expr_token(INTERVAL_SYM);
286   set_start_expr_token('*');
287   set_start_expr_token('/');
288   set_start_expr_token('%');
289   set_start_expr_token(DIV_SYM);
290   set_start_expr_token(MOD_SYM);
291   set_start_expr_token('^');
292 }
293 
print_tokens()294 void print_tokens()
295 {
296   int tok;
297 
298   printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
299   printf("lex_token_string lex_token_array[]=\n");
300   printf("{\n");
301   printf("/* PART 1: character tokens. */\n");
302 
303   for (tok= 0; tok<256; tok++)
304   {
305     printf("/* %03d */  { \"\\x%02x\", 1, %s, %s},\n",
306            tok,
307            tok,
308            compiled_token_array[tok].m_append_space ? "true" : "false",
309            compiled_token_array[tok].m_start_expr ? "true" : "false");
310   }
311 
312   printf("/* PART 2: named tokens. */\n");
313 
314   for (tok= 256; tok<= max_token_seen; tok++)
315   {
316     printf("/* %03d */  { \"%s\", %d, %s, %s},\n",
317            tok,
318            compiled_token_array[tok].m_token_string,
319            compiled_token_array[tok].m_token_length,
320            compiled_token_array[tok].m_append_space ? "true" : "false",
321            compiled_token_array[tok].m_start_expr ? "true" : "false");
322   }
323 
324   printf("/* DUMMY */ { \"\", 0, false, false}\n");
325   printf("};\n");
326   printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");
327 
328   printf("/* DIGEST specific tokens. */\n");
329   printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value);
330   printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list);
331   printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value);
332   printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list);
333   printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value);
334   printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list);
335   printf("#define TOK_IDENT %d\n", tok_ident);
336   printf("#define TOK_UNUSED %d\n", tok_unused);
337 }
338 
main(int argc,char ** argv)339 int main(int argc,char **argv)
340 {
341   puts("/*");
342   puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
343   puts("*/");
344 
345   printf("/*\n");
346   printf("  This file is generated, do not edit.\n");
347   printf("  See file sql/gen_lex_token.cc.\n");
348   printf("*/\n");
349   printf("struct lex_token_string\n");
350   printf("{\n");
351   printf("  const char *m_token_string;\n");
352   printf("  int m_token_length;\n");
353   printf("  bool m_append_space;\n");
354   printf("  bool m_start_expr;\n");
355   printf("};\n");
356   printf("typedef struct lex_token_string lex_token_string;\n");
357 
358   compute_tokens();
359   print_tokens();
360 
361   return 0;
362 }
363 
364