1 /*
2 Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software Foundation,
22 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23
24 #include <my_global.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28
29 /* We only need the tokens here */
30 #define YYSTYPE_IS_DECLARED
31 #include <sql_yacc.h>
32 #include <lex.h>
33
34 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
35
36 /*
37 This is a tool used during build only,
38 so MY_MAX_TOKEN does not need to be exact,
39 only big enough to hold:
40 - 256 character terminal tokens
41 - YYNTOKENS named terminal tokens
42 from bison.
43 See also YYMAXUTOK.
44 */
45 #define MY_MAX_TOKEN 1000
46 /** Generated token. */
47 struct gen_lex_token_string
48 {
49 const char *m_token_string;
50 int m_token_length;
51 bool m_append_space;
52 bool m_start_expr;
53 };
54
55 gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
56 int max_token_seen= 0;
57
58 char char_tokens[256];
59
60 int tok_generic_value= 0;
61 int tok_generic_value_list= 0;
62 int tok_row_single_value= 0;
63 int tok_row_single_value_list= 0;
64 int tok_row_multiple_value= 0;
65 int tok_row_multiple_value_list= 0;
66 int tok_ident= 0;
67 int tok_unused= 0;
68
set_token(int tok,const char * str)69 void set_token(int tok, const char *str)
70 {
71 if (tok <= 0)
72 {
73 fprintf(stderr, "Bad token found\n");
74 exit(1);
75 }
76
77 if (tok > max_token_seen)
78 {
79 max_token_seen= tok;
80 }
81
82 if (max_token_seen >= MY_MAX_TOKEN)
83 {
84 fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n");
85 exit(1);
86 }
87
88 compiled_token_array[tok].m_token_string= str;
89 compiled_token_array[tok].m_token_length= strlen(str);
90 compiled_token_array[tok].m_append_space= true;
91 compiled_token_array[tok].m_start_expr= false;
92 }
93
set_start_expr_token(int tok)94 void set_start_expr_token(int tok)
95 {
96 compiled_token_array[tok].m_start_expr= true;
97 }
98
compute_tokens()99 void compute_tokens()
100 {
101 int tok;
102 unsigned int i;
103 char *str;
104
105 /*
106 Default value.
107 */
108 for (tok= 0; tok < MY_MAX_TOKEN; tok++)
109 {
110 compiled_token_array[tok].m_token_string= "(unknown)";
111 compiled_token_array[tok].m_token_length= 9;
112 compiled_token_array[tok].m_append_space= true;
113 compiled_token_array[tok].m_start_expr= false;
114 }
115
116 /*
117 Tokens made of just one terminal character
118 */
119 for (tok=0; tok < 256; tok++)
120 {
121 str= & char_tokens[tok];
122 str[0]= (char) tok;
123 compiled_token_array[tok].m_token_string= str;
124 compiled_token_array[tok].m_token_length= 1;
125 compiled_token_array[tok].m_append_space= true;
126 }
127
128 max_token_seen= 255;
129
130 /*
131 String terminal tokens, used in sql_yacc.yy
132 */
133 set_token(NEG, "~");
134 set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY");
135
136 /*
137 Tokens hard coded in sql_lex.cc
138 */
139
140 set_token(WITH_CUBE_SYM, "WITH CUBE");
141 set_token(WITH_ROLLUP_SYM, "WITH ROLLUP");
142 set_token(NOT2_SYM, "!");
143 set_token(OR2_SYM, "|");
144 set_token(PARAM_MARKER, "?");
145 set_token(SET_VAR, ":=");
146 set_token(UNDERSCORE_CHARSET, "(_charset)");
147 set_token(END_OF_INPUT, "");
148
149 /*
150 Values.
151 These tokens are all normalized later,
152 so this strings will never be displayed.
153 */
154 set_token(BIN_NUM, "(bin)");
155 set_token(DECIMAL_NUM, "(decimal)");
156 set_token(FLOAT_NUM, "(float)");
157 set_token(HEX_NUM, "(hex)");
158 set_token(LEX_HOSTNAME, "(hostname)");
159 set_token(LONG_NUM, "(long)");
160 set_token(NUM, "(num)");
161 set_token(TEXT_STRING, "(text)");
162 set_token(NCHAR_STRING, "(nchar)");
163 set_token(ULONGLONG_NUM, "(ulonglong)");
164
165 /*
166 Identifiers.
167 */
168 set_token(IDENT, "(id)");
169 set_token(IDENT_QUOTED, "(id_quoted)");
170
171 /*
172 Unused tokens
173 */
174 set_token(LOCATOR_SYM, "LOCATOR");
175 set_token(SERVER_OPTIONS, "SERVER_OPTIONS");
176 set_token(UDF_RETURNS_SYM, "UDF_RETURNS");
177
178 /*
179 See symbols[] in sql/lex.h
180 */
181 for (i= 0; i< sizeof(symbols)/sizeof(symbols[0]); i++)
182 {
183 set_token(symbols[i].tok, symbols[i].name);
184 }
185
186 /*
187 See sql_functions[] in sql/lex.h
188 */
189 for (i= 0; i< sizeof(sql_functions)/sizeof(sql_functions[0]); i++)
190 {
191 set_token(sql_functions[i].tok, sql_functions[i].name);
192 }
193
194 /*
195 Additional FAKE tokens,
196 used internally to normalize a digest text.
197 */
198
199 max_token_seen++;
200 tok_generic_value= max_token_seen;
201 set_token(tok_generic_value, "?");
202
203 max_token_seen++;
204 tok_generic_value_list= max_token_seen;
205 set_token(tok_generic_value_list, "?, ...");
206
207 max_token_seen++;
208 tok_row_single_value= max_token_seen;
209 set_token(tok_row_single_value, "(?)");
210
211 max_token_seen++;
212 tok_row_single_value_list= max_token_seen;
213 set_token(tok_row_single_value_list, "(?) /* , ... */");
214
215 max_token_seen++;
216 tok_row_multiple_value= max_token_seen;
217 set_token(tok_row_multiple_value, "(...)");
218
219 max_token_seen++;
220 tok_row_multiple_value_list= max_token_seen;
221 set_token(tok_row_multiple_value_list, "(...) /* , ... */");
222
223 max_token_seen++;
224 tok_ident= max_token_seen;
225 set_token(tok_ident, "(tok_id)");
226
227 max_token_seen++;
228 tok_unused= max_token_seen;
229 set_token(tok_unused, "UNUSED");
230
231 /*
232 Fix whitespace for some special tokens.
233 */
234
235 /*
236 The lexer parses "@@variable" as '@', '@', 'variable',
237 returning a token for '@' alone.
238
239 This is incorrect, '@' is not really a token,
240 because the syntax "@ @ variable" (with spaces) is not accepted:
241 The lexer keeps some internal state after the '@' fake token.
242
243 To work around this, digest text are printed as "@@variable".
244 */
245 compiled_token_array[(int) '@'].m_append_space= false;
246
247 /*
248 Define additional properties for tokens.
249
250 List all the token that are followed by an expression.
251 This is needed to differentiate unary from binary
252 '+' and '-' operators, because we want to:
253 - reduce <unary +> <NUM> to <?>,
254 - preserve <...> <binary +> <NUM> as is.
255 */
256 set_start_expr_token('(');
257 set_start_expr_token(',');
258 set_start_expr_token(EVERY_SYM);
259 set_start_expr_token(AT_SYM);
260 set_start_expr_token(STARTS_SYM);
261 set_start_expr_token(ENDS_SYM);
262 set_start_expr_token(DEFAULT);
263 set_start_expr_token(RETURN_SYM);
264 set_start_expr_token(IF);
265 set_start_expr_token(ELSEIF_SYM);
266 set_start_expr_token(CASE_SYM);
267 set_start_expr_token(WHEN_SYM);
268 set_start_expr_token(WHILE_SYM);
269 set_start_expr_token(UNTIL_SYM);
270 set_start_expr_token(SELECT_SYM);
271
272 set_start_expr_token(OR_SYM);
273 set_start_expr_token(OR2_SYM);
274 set_start_expr_token(XOR);
275 set_start_expr_token(AND_SYM);
276 set_start_expr_token(AND_AND_SYM);
277 set_start_expr_token(NOT_SYM);
278 set_start_expr_token(BETWEEN_SYM);
279 set_start_expr_token(LIKE);
280 set_start_expr_token(REGEXP);
281
282 set_start_expr_token('|');
283 set_start_expr_token('&');
284 set_start_expr_token(SHIFT_LEFT);
285 set_start_expr_token(SHIFT_RIGHT);
286 set_start_expr_token('+');
287 set_start_expr_token('-');
288 set_start_expr_token(INTERVAL_SYM);
289 set_start_expr_token('*');
290 set_start_expr_token('/');
291 set_start_expr_token('%');
292 set_start_expr_token(DIV_SYM);
293 set_start_expr_token(MOD_SYM);
294 set_start_expr_token('^');
295 }
296
print_tokens()297 void print_tokens()
298 {
299 int tok;
300
301 printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
302 printf("lex_token_string lex_token_array[]=\n");
303 printf("{\n");
304 printf("/* PART 1: character tokens. */\n");
305
306 for (tok= 0; tok<256; tok++)
307 {
308 printf("/* %03d */ { \"\\x%02x\", 1, %s, %s},\n",
309 tok,
310 tok,
311 compiled_token_array[tok].m_append_space ? "true" : "false",
312 compiled_token_array[tok].m_start_expr ? "true" : "false");
313 }
314
315 printf("/* PART 2: named tokens. */\n");
316
317 for (tok= 256; tok<= max_token_seen; tok++)
318 {
319 printf("/* %03d */ { \"%s\", %d, %s, %s},\n",
320 tok,
321 compiled_token_array[tok].m_token_string,
322 compiled_token_array[tok].m_token_length,
323 compiled_token_array[tok].m_append_space ? "true" : "false",
324 compiled_token_array[tok].m_start_expr ? "true" : "false");
325 }
326
327 printf("/* DUMMY */ { \"\", 0, false, false}\n");
328 printf("};\n");
329 printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");
330
331 printf("/* DIGEST specific tokens. */\n");
332 printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value);
333 printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list);
334 printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value);
335 printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list);
336 printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value);
337 printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list);
338 printf("#define TOK_IDENT %d\n", tok_ident);
339 printf("#define TOK_UNUSED %d\n", tok_unused);
340 }
341
main(int argc,char ** argv)342 int main(int argc,char **argv)
343 {
344 puts("/*");
345 puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
346 puts("*/");
347
348 printf("/*\n");
349 printf(" This file is generated, do not edit.\n");
350 printf(" See file sql/gen_lex_token.cc.\n");
351 printf("*/\n");
352 printf("struct lex_token_string\n");
353 printf("{\n");
354 printf(" const char *m_token_string;\n");
355 printf(" int m_token_length;\n");
356 printf(" bool m_append_space;\n");
357 printf(" bool m_start_expr;\n");
358 printf("};\n");
359 printf("typedef struct lex_token_string lex_token_string;\n");
360
361 compute_tokens();
362 print_tokens();
363
364 return 0;
365 }
366
367