1 /*
2 Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software Foundation,
22 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23
24 /**
25 @file
26
27 @details
28 @verbatim
29 The idea of presented algorithm see in
30 "The Art of Computer Programming" by Donald E. Knuth
31 Volume 3 "Sorting and searching"
32 (chapter 6.3 "Digital searching" - name and number of chapter
33 is back translation from Russian edition :))
34
35 as illustration of data structures, imagine next table:
36
37 static SYMBOL symbols[] = {
38 { "ADD", SYM(ADD),0,0},
39 { "AND", SYM(AND),0,0},
40 { "DAY", SYM(DAY_SYM),0,0},
41 };
42
43 for this structure, presented program generate next searching-structure:
44
45 +-----------+-+-+-+
46 | len |1|2|3|
47 +-----------+-+-+-+
48 |first_char |0|0|a|
49 |last_char |0|0|d|
50 |link |0|0|+|
51 |
52 V
53 +----------+-+-+-+--+
54 | 1 char|a|b|c|d |
55 +----------+-+-+-+--+
56 |first_char|d|0|0|0 |
57 |last_char |n|0|0|-1|
58 |link |+|0|0|+ |
59 | |
60 | V
61 | symbols[2] ( "DAY" )
62 V
63 +----------+--+-+-+-+-+-+-+-+-+-+--+
64 | 2 char|d |e|f|j|h|i|j|k|l|m|n |
65 +----------+--+-+-+-+-+-+-+-+-+-+--+
66 |first_char|0 |0|0|0|0|0|0|0|0|0|0 |
67 |last_char |-1|0|0|0|0|0|0|0|0|0|-1|
68 |link |+ |0|0|0|0|0|0|0|0|0|+ |
69 | |
70 V V
71 symbols[0] ( "ADD" ) symbols[1] ( "AND" )
72
73 for optimization, link is the 16-bit index in 'symbols' or 'sql_functions'
74 or search-array..
75
76 So, we can read full search-structure as 32-bit word
77 @endverbatim
78
79 @todo
80 use instead to_upper_lex, special array
81 (substitute chars) without skip codes..
82 @todo
83 try use reverse order of comparing..
84
85 */
86
87 #define NO_YACC_SYMBOLS
88 #include <my_global.h>
89 #include "mysql_version.h"
90 #include "lex.h"
91 #include <stdlib.h>
92 #include <stdio.h>
93 #include <string.h>
94
95 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
96
97 struct hash_lex_struct
98 {
99 int first_char;
100 char last_char;
101 union{
102 hash_lex_struct *char_tails;
103 int iresult;
104 };
105 int ithis;
106 };
107
get_hash_struct_by_len(hash_lex_struct ** root_by_len,int len,int * max_len)108 hash_lex_struct *get_hash_struct_by_len(hash_lex_struct **root_by_len,
109 int len, int *max_len)
110 {
111 if (*max_len<len){
112 *root_by_len= (hash_lex_struct *)realloc((char*)*root_by_len,
113 sizeof(hash_lex_struct)*len);
114 hash_lex_struct *cur, *end= *root_by_len + len;
115 for (cur= *root_by_len + *max_len; cur<end; cur++)
116 cur->first_char= 0;
117 *max_len= len;
118 }
119 return (*root_by_len)+(len-1);
120 }
121
insert_into_hash(hash_lex_struct * root,const char * name,int len_from_begin,int index,int function)122 void insert_into_hash(hash_lex_struct *root, const char *name,
123 int len_from_begin, int index, int function)
124 {
125 hash_lex_struct *end, *cur, *tails;
126
127 if (!root->first_char)
128 {
129 root->first_char= -1;
130 root->iresult= index;
131 return;
132 }
133
134 if (root->first_char == -1)
135 {
136 int index2= root->iresult;
137 const char *name2= (index2 < 0 ? sql_functions[-index2-1] :
138 symbols[index2]).name + len_from_begin;
139 root->first_char= (int) (uchar) name2[0];
140 root->last_char= (char) root->first_char;
141 tails= (hash_lex_struct*)malloc(sizeof(hash_lex_struct));
142 root->char_tails= tails;
143 tails->first_char= -1;
144 tails->iresult= index2;
145 }
146
147 size_t real_size= (root->last_char-root->first_char+1);
148
149 if (root->first_char>(*name))
150 {
151 size_t new_size= root->last_char-(*name)+1;
152 if (new_size<real_size) printf("error!!!!\n");
153 tails= root->char_tails;
154 tails= (hash_lex_struct*)realloc((char*)tails,
155 sizeof(hash_lex_struct)*new_size);
156 root->char_tails= tails;
157 memmove(tails+(new_size-real_size),tails,real_size*sizeof(hash_lex_struct));
158 end= tails + new_size - real_size;
159 for (cur= tails; cur<end; cur++)
160 cur->first_char= 0;
161 root->first_char= (int) (uchar) *name;
162 }
163
164 if (root->last_char<(*name))
165 {
166 size_t new_size= (*name)-root->first_char+1;
167 if (new_size<real_size) printf("error!!!!\n");
168 tails= root->char_tails;
169 tails= (hash_lex_struct*)realloc((char*)tails,
170 sizeof(hash_lex_struct)*new_size);
171 root->char_tails= tails;
172 end= tails + new_size;
173 for (cur= tails+real_size; cur<end; cur++)
174 cur->first_char= 0;
175 root->last_char= (*name);
176 }
177
178 insert_into_hash(root->char_tails+(*name)-root->first_char,
179 name+1,len_from_begin+1,index,function);
180 }
181
182
183 hash_lex_struct *root_by_len= 0;
184 int max_len=0;
185
186 hash_lex_struct *root_by_len2= 0;
187 int max_len2=0;
188
insert_symbols()189 void insert_symbols()
190 {
191 size_t i= 0;
192 SYMBOL *cur;
193 for (cur= symbols; i<array_elements(symbols); cur++, i++){
194 hash_lex_struct *root=
195 get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
196 insert_into_hash(root,cur->name,0,(uint) i,0);
197 }
198 }
199
insert_sql_functions()200 void insert_sql_functions()
201 {
202 int i= 0;
203 SYMBOL *cur;
204 for (cur= sql_functions; i < (int) array_elements(sql_functions); cur++, i++)
205 {
206 hash_lex_struct *root=
207 get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
208 insert_into_hash(root,cur->name,0,-i-1,1);
209 }
210 }
211
calc_length()212 void calc_length()
213 {
214 SYMBOL *cur, *end= symbols + array_elements(symbols);
215 for (cur= symbols; cur < end; cur++)
216 cur->length=(uchar) strlen(cur->name);
217 end= sql_functions + array_elements(sql_functions);
218 for (cur= sql_functions; cur<end; cur++)
219 cur->length=(uchar) strlen(cur->name);
220 }
221
generate_find_structs()222 void generate_find_structs()
223 {
224 root_by_len= 0;
225 max_len=0;
226
227 insert_symbols();
228
229 root_by_len2= root_by_len;
230 max_len2= max_len;
231
232 root_by_len= 0;
233 max_len= 0;
234
235 insert_symbols();
236 insert_sql_functions();
237 }
238
239 struct hash_map_cleanup
240 {
hash_map_cleanuphash_map_cleanup241 hash_map_cleanup(char **buffer, int *sz)
242 : m_buffer(buffer), m_sz(sz)
243 {}
~hash_map_cleanuphash_map_cleanup244 ~hash_map_cleanup()
245 {
246 free(*m_buffer);
247 *m_buffer= NULL;
248 *m_sz= 0;
249 }
250 char **m_buffer;
251 int *m_sz;
252 };
253
254 char *hash_map= 0;
255 int size_hash_map= 0;
256
add_struct_to_map(hash_lex_struct * st)257 void add_struct_to_map(hash_lex_struct *st)
258 {
259 st->ithis= size_hash_map/4;
260 size_hash_map+= 4;
261 hash_map= (char*)realloc((char*)hash_map,size_hash_map);
262 hash_map[size_hash_map-4]= (char) (st->first_char == -1 ? 0 :
263 st->first_char);
264 hash_map[size_hash_map-3]= (char) (st->first_char == -1 ||
265 st->first_char == 0 ? 0 : st->last_char);
266 if (st->first_char == -1)
267 {
268 hash_map[size_hash_map-2]= ((unsigned int)(int16)st->iresult)&255;
269 hash_map[size_hash_map-1]= ((unsigned int)(int16)st->iresult)>>8;
270 }
271 else if (st->first_char == 0)
272 {
273 hash_map[size_hash_map-2]= ((unsigned int)(int16)array_elements(symbols))&255;
274 hash_map[size_hash_map-1]= ((unsigned int)(int16)array_elements(symbols))>>8;
275 }
276 }
277
278
add_structs_to_map(hash_lex_struct * st,int len)279 void add_structs_to_map(hash_lex_struct *st, int len)
280 {
281 hash_lex_struct *cur, *end= st+len;
282 for (cur= st; cur<end; cur++)
283 add_struct_to_map(cur);
284 for (cur= st; cur<end; cur++)
285 {
286 if (cur->first_char && cur->first_char != -1)
287 add_structs_to_map(cur->char_tails,cur->last_char-cur->first_char+1);
288 }
289 }
290
set_links(hash_lex_struct * st,int len)291 void set_links(hash_lex_struct *st, int len)
292 {
293 hash_lex_struct *cur, *end= st+len;
294 for (cur= st; cur<end; cur++)
295 {
296 if (cur->first_char != 0 && cur->first_char != -1)
297 {
298 int ilink= cur->char_tails->ithis;
299 hash_map[cur->ithis*4+2]= ilink%256;
300 hash_map[cur->ithis*4+3]= ilink/256;
301 set_links(cur->char_tails,cur->last_char-cur->first_char+1);
302 }
303 }
304 }
305
306
print_hash_map(const char * name)307 void print_hash_map(const char *name)
308 {
309 char *cur;
310 int i;
311
312 printf("static uchar %s[%d]= {\n",name,size_hash_map);
313 for (i=0, cur= hash_map; i<size_hash_map; i++, cur++)
314 {
315 switch(i%4){
316 case 0: case 1:
317 if (!*cur)
318 printf("0, ");
319 else
320 printf("\'%c\', ",*cur);
321 break;
322 case 2: printf("%u, ",(uint)(uchar)*cur); break;
323 case 3: printf("%u,\n",(uint)(uchar)*cur); break;
324 }
325 }
326 printf("};\n");
327 }
328
329
print_find_structs()330 void print_find_structs()
331 {
332 {
333 hash_map_cleanup cleanup(&hash_map, &size_hash_map);
334 add_structs_to_map(root_by_len,max_len);
335 set_links(root_by_len,max_len);
336 print_hash_map("sql_functions_map");
337 }
338
339 printf("\n");
340
341 {
342 hash_map_cleanup cleanup(&hash_map, &size_hash_map);
343 add_structs_to_map(root_by_len2,max_len2);
344 set_links(root_by_len2,max_len2);
345 print_hash_map("symbols_map");
346 }
347 }
348
check_dup_symbols(SYMBOL * s1,SYMBOL * s2)349 int check_dup_symbols(SYMBOL *s1, SYMBOL *s2)
350 {
351 if (s1->length!=s2->length || strncmp(s1->name,s2->name,s1->length))
352 return 0;
353
354 const char *err_tmpl= "\ngen_lex_hash fatal error : \
355 Unfortunately gen_lex_hash can not generate a hash,\n since \
356 your lex.h has duplicate definition for a symbol \"%s\"\n\n";
357 printf (err_tmpl,s1->name);
358 fprintf (stderr,err_tmpl,s1->name);
359
360 return 1;
361 }
362
363
check_duplicates()364 int check_duplicates()
365 {
366 SYMBOL *cur1, *cur2, *s_end, *f_end;
367
368 s_end= symbols + array_elements(symbols);
369 f_end= sql_functions + array_elements(sql_functions);
370
371 for (cur1= symbols; cur1<s_end; cur1++)
372 {
373 for (cur2= cur1+1; cur2<s_end; cur2++)
374 {
375 if (check_dup_symbols(cur1,cur2))
376 return 1;
377 }
378 for (cur2= sql_functions; cur2<f_end; cur2++)
379 {
380 if (check_dup_symbols(cur1,cur2))
381 return 1;
382 }
383 }
384
385 for (cur1= sql_functions; cur1<f_end; cur1++)
386 {
387 for (cur2= cur1+1; cur2< f_end; cur2++)
388 {
389 if (check_dup_symbols(cur1,cur2))
390 return 1;
391 }
392 }
393 return 0;
394 }
395
396
main(int argc,char ** argv)397 int main(int argc,char **argv)
398 {
399
400
401 /* Broken up to indicate that it's not advice to you, gentle reader. */
402 printf("/*\n\n Do " "not " "edit " "this " "file " "directly!\n\n*/\n");
403
404 puts("/*");
405 puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
406 puts("*/");
407
408 /* Broken up to indicate that it's not advice to you, gentle reader. */
409 printf("/* Do " "not " "edit " "this " "file! This is generated by "
410 "gen_lex_hash.cc\nthat seeks for a perfect hash function */\n\n");
411 printf("#include \"lex.h\"\n\n");
412
413 calc_length();
414
415 if (check_duplicates())
416 exit(1);
417
418 generate_find_structs();
419 print_find_structs();
420
421 printf("\nstatic unsigned int sql_functions_max_len=%d;\n", max_len);
422 printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2);
423
424 printf("\
425 static SYMBOL *get_hash_symbol(const char *s,\n\
426 unsigned int len,bool function)\n\
427 {\n\
428 register uchar *hash_map;\n\
429 register const char *cur_str= s;\n\
430 \n\
431 if (len == 0) {\n\
432 DBUG_PRINT(\"warning\", (\"get_hash_symbol() received a request for a zero-length symbol, which is probably a mistake.\"));\
433 return(NULL);\n\
434 }\n"
435 );
436
437 printf("\
438 if (function){\n\
439 if (len>sql_functions_max_len) return 0;\n\
440 hash_map= sql_functions_map;\n\
441 register uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
442 \n\
443 for (;;){\n\
444 register uchar first_char= (uchar)cur_struct;\n\
445 \n\
446 if (first_char == 0)\n\
447 {\n\
448 register int16 ires= (int16)(cur_struct>>16);\n\
449 if (ires==array_elements(symbols)) return 0;\n\
450 register SYMBOL *res;\n\
451 if (ires>=0) \n\
452 res= symbols+ires;\n\
453 else\n\
454 res= sql_functions-ires-1;\n\
455 register uint count= (uint) (cur_str - s);\n\
456 return lex_casecmp(cur_str,res->name+count,len-count) ? 0 : res;\n\
457 }\n\
458 \n\
459 register uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
460 if (cur_char<first_char) return 0;\n\
461 cur_struct>>=8;\n\
462 if (cur_char>(uchar)cur_struct) return 0;\n\
463 \n\
464 cur_struct>>=8;\n\
465 cur_struct= uint4korr(hash_map+\n\
466 (((uint16)cur_struct + cur_char - first_char)*4));\n\
467 cur_str++;\n\
468 }\n"
469 );
470
471 printf("\
472 }else{\n\
473 if (len>symbols_max_len) return 0;\n\
474 hash_map= symbols_map;\n\
475 register uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
476 \n\
477 for (;;){\n\
478 register uchar first_char= (uchar)cur_struct;\n\
479 \n\
480 if (first_char==0){\n\
481 register int16 ires= (int16)(cur_struct>>16);\n\
482 if (ires==array_elements(symbols)) return 0;\n\
483 register SYMBOL *res= symbols+ires;\n\
484 register uint count= (uint) (cur_str - s);\n\
485 return lex_casecmp(cur_str,res->name+count,len-count)!=0 ? 0 : res;\n\
486 }\n\
487 \n\
488 register uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
489 if (cur_char<first_char) return 0;\n\
490 cur_struct>>=8;\n\
491 if (cur_char>(uchar)cur_struct) return 0;\n\
492 \n\
493 cur_struct>>=8;\n\
494 cur_struct= uint4korr(hash_map+\n\
495 (((uint16)cur_struct + cur_char - first_char)*4));\n\
496 cur_str++;\n\
497 }\n\
498 }\n\
499 }\n"
500 );
501 exit(0);
502 }
503
504