1 /*
2 Copyright (c) 2000, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software Foundation,
22 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23
24 /**
25 @file
26
27 @details
28 @verbatim
29 The idea of presented algorithm see in
30 "The Art of Computer Programming" by Donald E. Knuth
31 Volume 3 "Sorting and searching"
32 (chapter 6.3 "Digital searching" - name and number of chapter
33 is back translation from Russian edition :))
34
35 as illustration of data structures, imagine next table:
36
37 static SYMBOL symbols[] = {
38 { "ADD", SYM(ADD),0,0},
39 { "AND", SYM(AND),0,0},
40 { "DAY", SYM(DAY_SYM),0,0},
41 };
42
43 for this structure, presented program generate next searching-structure:
44
45 +-----------+-+-+-+
46 | len |1|2|3|
47 +-----------+-+-+-+
48 |first_char |0|0|a|
49 |last_char |0|0|d|
50 |link |0|0|+|
51 |
52 V
53 +----------+-+-+-+--+
54 | 1 char|a|b|c|d |
55 +----------+-+-+-+--+
56 |first_char|d|0|0|0 |
57 |last_char |n|0|0|-1|
58 |link |+|0|0|+ |
59 | |
60 | V
61 | symbols[2] ( "DAY" )
62 V
63 +----------+--+-+-+-+-+-+-+-+-+-+--+
64 | 2 char|d |e|f|j|h|i|j|k|l|m|n |
65 +----------+--+-+-+-+-+-+-+-+-+-+--+
66 |first_char|0 |0|0|0|0|0|0|0|0|0|0 |
67 |last_char |-1|0|0|0|0|0|0|0|0|0|-1|
68 |link |+ |0|0|0|0|0|0|0|0|0|+ |
69 | |
70 V V
71 symbols[0] ( "ADD" ) symbols[1] ( "AND" )
72
73 for optimization, link is the 16-bit index in 'symbols'
74 or search-array..
75
76 So, we can read full search-structure as 32-bit word
77 @endverbatim
78
79 @todo
80 use instead to_upper_lex, special array
81 (substitute chars) without skip codes..
82 @todo
83 try use reverse order of comparing..
84
85 */
86
87 #define NO_YACC_SYMBOLS
88 #include <my_global.h>
89 #include "mysql_version.h"
90 #include "lex.h"
91 #include <stdlib.h>
92 #include <stdio.h>
93 #include <string.h>
94 #include <set>
95
96 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
97
98
99 static bool check_duplicates(uint group_mask);
100
101
102 struct hash_lex_struct
103 {
104 int first_char;
105 char last_char;
106 // union value is undefined if first_char == 0
107 union{
108 hash_lex_struct *char_tails; // if first_char > 0
109 int iresult; // if first_char == -1
110 };
111 int ithis;
112
destroyhash_lex_struct113 void destroy()
114 {
115 if (first_char <= 0)
116 return;
117 for (int i= 0, size= static_cast<uchar>(last_char) - first_char + 1;
118 i < size; i++)
119 char_tails[i].destroy();
120 free(char_tails);
121 }
122 };
123
124
125 class hash_map_info
126 {
127 public:
128 hash_lex_struct *root_by_len;
129 int max_len;
130
131 char *hash_map;
132 int size_hash_map;
133
hash_map_info()134 hash_map_info()
135 : root_by_len(NULL), max_len(0), hash_map(NULL), size_hash_map(0)
136 {}
137
~hash_map_info()138 ~hash_map_info()
139 {
140 for (int i= 0; i < max_len; i++)
141 root_by_len[i].destroy();
142 free(root_by_len);
143 free(hash_map);
144 }
145
146 hash_lex_struct *get_hash_struct_by_len(int len);
147 void insert_symbols(int group_mask);
148 void add_structs_to_map(hash_lex_struct *st, int len);
149 void add_struct_to_map(hash_lex_struct *st);
150 void set_links(hash_lex_struct *st, int len);
151 bool print_hash_map(const char *name, uint group_mask);
152 };
153
154
get_hash_struct_by_len(int len)155 hash_lex_struct *hash_map_info::get_hash_struct_by_len(int len)
156 {
157 if (max_len < len)
158 {
159 root_by_len= (hash_lex_struct *) realloc((char*) root_by_len,
160 sizeof(hash_lex_struct) * len);
161 hash_lex_struct *cur, *end= root_by_len + len;
162 for (cur= root_by_len + max_len; cur < end; cur++)
163 cur->first_char= 0;
164 max_len= len;
165 }
166 return root_by_len + len - 1;
167 }
168
insert_into_hash(hash_lex_struct * root,const char * name,int len_from_begin,int index)169 void insert_into_hash(hash_lex_struct *root, const char *name,
170 int len_from_begin, int index)
171 {
172 hash_lex_struct *end, *cur, *tails;
173
174 if (!root->first_char)
175 {
176 root->first_char= -1;
177 root->iresult= index;
178 return;
179 }
180
181 if (root->first_char == -1)
182 {
183 int index2= root->iresult;
184 const char *name2= symbols[index2].name + len_from_begin;
185 root->first_char= (int) (uchar) name2[0];
186 root->last_char= (char) root->first_char;
187 tails= (hash_lex_struct*)malloc(sizeof(hash_lex_struct));
188 root->char_tails= tails;
189 tails->first_char= -1;
190 tails->iresult= index2;
191 }
192
193 size_t real_size= (root->last_char-root->first_char+1);
194
195 if (root->first_char>(*name))
196 {
197 size_t new_size= root->last_char-(*name)+1;
198 if (new_size<real_size) printf("error!!!!\n");
199 tails= root->char_tails;
200 tails= (hash_lex_struct*)realloc((char*)tails,
201 sizeof(hash_lex_struct)*new_size);
202 root->char_tails= tails;
203 memmove(tails+(new_size-real_size),tails,real_size*sizeof(hash_lex_struct));
204 end= tails + new_size - real_size;
205 for (cur= tails; cur<end; cur++)
206 cur->first_char= 0;
207 root->first_char= (int) (uchar) *name;
208 }
209
210 if (root->last_char<(*name))
211 {
212 size_t new_size= (*name)-root->first_char+1;
213 if (new_size<real_size) printf("error!!!!\n");
214 tails= root->char_tails;
215 tails= (hash_lex_struct*)realloc((char*)tails,
216 sizeof(hash_lex_struct)*new_size);
217 root->char_tails= tails;
218 end= tails + new_size;
219 for (cur= tails+real_size; cur<end; cur++)
220 cur->first_char= 0;
221 root->last_char= (*name);
222 }
223
224 insert_into_hash(root->char_tails+(*name)-root->first_char,
225 name+1,len_from_begin+1,index);
226 }
227
228
insert_symbols(int group_mask)229 void hash_map_info::insert_symbols(int group_mask)
230 {
231 size_t i= 0;
232 const SYMBOL *cur;
233 for (cur= symbols; i < array_elements(symbols); cur++, i++)
234 {
235 if (!(cur->group & group_mask))
236 continue;
237 hash_lex_struct *root= get_hash_struct_by_len(cur->length);
238 insert_into_hash(root, cur->name, 0, i);
239 }
240 }
241
242
add_struct_to_map(hash_lex_struct * st)243 void hash_map_info::add_struct_to_map(hash_lex_struct *st)
244 {
245 st->ithis= size_hash_map/4;
246 size_hash_map+= 4;
247 hash_map= (char*)realloc(hash_map,size_hash_map);
248 hash_map[size_hash_map-4]= (char) (st->first_char == -1 ? 0 :
249 st->first_char);
250 hash_map[size_hash_map-3]= (char) (st->first_char == -1 ||
251 st->first_char == 0 ? 0 : st->last_char);
252 if (st->first_char == -1)
253 {
254 hash_map[size_hash_map-2]= ((unsigned int)(int16)st->iresult)&255;
255 hash_map[size_hash_map-1]= ((unsigned int)(int16)st->iresult)>>8;
256 }
257 else if (st->first_char == 0)
258 {
259 hash_map[size_hash_map-2]= ((unsigned int)(int16)array_elements(symbols))&255;
260 hash_map[size_hash_map-1]= ((unsigned int)(int16)array_elements(symbols))>>8;
261 }
262 }
263
264
add_structs_to_map(hash_lex_struct * st,int len)265 void hash_map_info::add_structs_to_map(hash_lex_struct *st, int len)
266 {
267 hash_lex_struct *cur, *end= st+len;
268 for (cur= st; cur<end; cur++)
269 add_struct_to_map(cur);
270 for (cur= st; cur<end; cur++)
271 {
272 if (cur->first_char && cur->first_char != -1)
273 add_structs_to_map(cur->char_tails,cur->last_char-cur->first_char+1);
274 }
275 }
276
277
set_links(hash_lex_struct * st,int len)278 void hash_map_info::set_links(hash_lex_struct *st, int len)
279 {
280 hash_lex_struct *cur, *end= st+len;
281 for (cur= st; cur<end; cur++)
282 {
283 if (cur->first_char != 0 && cur->first_char != -1)
284 {
285 int ilink= cur->char_tails->ithis;
286 hash_map[cur->ithis*4+2]= ilink%256;
287 hash_map[cur->ithis*4+3]= ilink/256;
288 set_links(cur->char_tails,cur->last_char-cur->first_char+1);
289 }
290 }
291 }
292
293
print_hash_map(const char * name,uint group_mask)294 bool hash_map_info::print_hash_map(const char *name, uint group_mask)
295 {
296 if (check_duplicates(group_mask))
297 return true;
298 insert_symbols(group_mask);
299 add_structs_to_map(root_by_len, max_len);
300 set_links(root_by_len, max_len);
301
302 printf("static const unsigned char %s_map[%d]= {\n", name, size_hash_map);
303
304 char *cur;
305 int i;
306 for (i=0, cur= hash_map; i < size_hash_map; i++, cur++)
307 {
308 switch(i%4){
309 case 0: case 1:
310 if (!*cur)
311 printf("0, ");
312 else
313 printf("\'%c\', ",*cur);
314 break;
315 case 2: printf("%u, ",(uint)(uchar)*cur); break;
316 case 3: printf("%u,\n",(uint)(uchar)*cur); break;
317 }
318 }
319 printf("};\n\n");
320 printf("const unsigned int %s_max_len=%d;\n", name, max_len);
321 return false;
322 }
323
324
check_duplicates(uint group_mask)325 bool check_duplicates(uint group_mask)
326 {
327 std::set<const char *> names;
328
329 size_t i= 0;
330 const SYMBOL *cur;
331 for (cur= symbols; i < array_elements(symbols); cur++, i++)
332 {
333 if (!(cur->group & group_mask))
334 continue;
335 if (!names.insert(cur->name).second)
336 {
337 const char *err_tmpl= "\ngen_lex_hash fatal error : "
338 "Unfortunately gen_lex_hash can not generate a hash,\n since "
339 "your lex.h has duplicate definition for a symbol \"%s\"\n\n";
340 printf (err_tmpl, cur->name);
341 fprintf (stderr, err_tmpl, cur->name);
342 return true;
343 }
344 }
345 return false;
346 }
347
348
main(int argc,char ** argv)349 int main(int argc,char **argv)
350 {
351
352
353 /* Broken up to indicate that it's not advice to you, gentle reader. */
354 printf("/*\n\n Do " "not " "edit " "this " "file " "directly!\n\n*/\n");
355
356 puts("/*");
357 puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
358 puts("*/");
359
360 /* Broken up to indicate that it's not advice to you, gentle reader. */
361 printf("/* Do " "not " "edit " "this " "file! This is generated by "
362 "gen_lex_hash.cc\nthat seeks for a perfect hash function */\n\n");
363
364 /* Print sql_keywords_and_funcs_map[] and sql_keyword_and_funcs_max_len: */
365 if (hash_map_info().print_hash_map("sql_keywords_and_funcs", SG_MAIN_PARSER))
366 exit(1);
367
368 printf("\n");
369
370 /* Print sql_keywords_map[] and sql_keywords_max_len values: */
371 if (hash_map_info().print_hash_map("sql_keywords",
372 SG_KEYWORDS | SG_HINTABLE_KEYWORDS))
373 exit(1);
374
375 printf("\n");
376
377 /* Print hint_keywords_map[] and hint_keywords_max_len values: */
378 if (hash_map_info().print_hash_map("hint_keywords", SG_HINTS))
379 exit(1);
380
381 exit(0);
382 }
383
384