1 /*
2    Copyright (c) 2000, 2012, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 /**
18   @file
19 
20   @details
21 @verbatim
22 The idea of presented algorithm see in
23 "The Art of Computer Programming" by Donald E. Knuth
24 Volume 3 "Sorting and searching"
25 (chapter 6.3 "Digital searching" - name and number of chapter
26    is back translation from Russian edition :))
27 
28 as illustration of data structures, imagine next table:
29 
30 static SYMBOL symbols[] = {
31   { "ADD",              SYM(ADD),0,0},
32   { "AND",              SYM(AND),0,0},
33   { "DAY",              SYM(DAY_SYM),0,0},
34 };
35 
36 for this structure, presented program generate next searching-structure:
37 
38 +-----------+-+-+-+
39 |       len |1|2|3|
40 +-----------+-+-+-+
41 |first_char |0|0|a|
42 |last_char  |0|0|d|
43 |link       |0|0|+|
44                  |
45                  V
46        +----------+-+-+-+--+
47        |    1 char|a|b|c|d |
48        +----------+-+-+-+--+
49        |first_char|b|0|0|0 |
50        |last_char |n|0|0|-1|
51        |link      |+|0|0|+ |
52                    |     |
53                    |     V
54                    |  symbols[2] ( "DAY" )
55                    V
56 +----------+--+-+-+-+-+-+-+-+-+-+--+
57 |    2 char|d |e|f|j|h|i|j|k|l|m|n |
58 +----------+--+-+-+-+-+-+-+-+-+-+--+
59 |first_char|0 |0|0|0|0|0|0|0|0|0|0 |
60 |last_char |-1|0|0|0|0|0|0|0|0|0|-1|
61 |link      |+ |0|0|0|0|0|0|0|0|0|+ |
62             |                    |
63             V                    V
64          symbols[0] ( "ADD" )  symbols[1] ( "AND" )
65 
66 for optimization, link is the 16-bit index in 'symbols' or 'sql_functions'
67 or search-array..
68 
69 So, we can read full search-structure as 32-bit word
70 @endverbatim
71 
72 @todo
73     use instead to_upper_lex, special array
74     (substitute chars) without skip codes..
75 @todo
76     try use reverse order of comparing..
77 
78 */
79 
80 #define NO_YACC_SYMBOLS
81 #undef CHECK_UNLIKELY
82 #include "mariadb.h"
83 #include "mysql_version.h"
84 #include "lex.h"
85 #include <string.h>
86 
87 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
88 
89 struct hash_lex_struct
90 {
91   int first_char;
92   char last_char;
93   union{
94     hash_lex_struct *char_tails;
95     int iresult;
96   };
97   int ithis;
98 };
99 
get_hash_struct_by_len(hash_lex_struct ** root_by_len,int len,int * max_len)100 hash_lex_struct *get_hash_struct_by_len(hash_lex_struct **root_by_len,
101 					    int len, int *max_len)
102 {
103   if (*max_len<len){
104     *root_by_len= (hash_lex_struct *)realloc((char*)*root_by_len,
105                                              sizeof(hash_lex_struct)*len);
106     hash_lex_struct *cur, *end= *root_by_len + len;
107     for (cur= *root_by_len + *max_len; cur<end; cur++)
108       cur->first_char= 0;
109     *max_len= len;
110   }
111   return (*root_by_len)+(len-1);
112 }
113 
insert_into_hash(hash_lex_struct * root,const char * name,int len_from_begin,int index,int function)114 void insert_into_hash(hash_lex_struct *root, const char *name,
115 		      int len_from_begin, int index, int function)
116 {
117   hash_lex_struct *end, *cur, *tails;
118 
119   if (!root->first_char)
120   {
121     root->first_char= -1;
122     root->iresult= index;
123     return;
124   }
125 
126   if (root->first_char == -1)
127   {
128     int index2= root->iresult;
129     const char *name2= (index2 < 0 ? sql_functions[-index2-1] :
130 			symbols[index2]).name + len_from_begin;
131     root->first_char= (int) (uchar) name2[0];
132     root->last_char= (char) root->first_char;
133     tails= (hash_lex_struct*)malloc(sizeof(hash_lex_struct));
134     root->char_tails= tails;
135     tails->first_char= -1;
136     tails->iresult= index2;
137   }
138 
139   size_t real_size= (root->last_char-root->first_char+1);
140 
141   if (root->first_char>(*name))
142   {
143     size_t new_size= root->last_char-(*name)+1;
144     if (unlikely(new_size<real_size))
145       printf("error!!!!\n");
146     tails= root->char_tails;
147     tails= (hash_lex_struct*)realloc((char*)tails,
148 				       sizeof(hash_lex_struct)*new_size);
149     root->char_tails= tails;
150     memmove(tails+(new_size-real_size),tails,real_size*sizeof(hash_lex_struct));
151     end= tails + new_size - real_size;
152     for (cur= tails; cur<end; cur++)
153       cur->first_char= 0;
154     root->first_char= (int) (uchar) *name;
155   }
156 
157   if (root->last_char<(*name))
158   {
159     size_t new_size= (*name)-root->first_char+1;
160     if (unlikely(new_size<real_size))
161       printf("error!!!!\n");
162     tails= root->char_tails;
163     tails= (hash_lex_struct*)realloc((char*)tails,
164 				    sizeof(hash_lex_struct)*new_size);
165     root->char_tails= tails;
166     end= tails + new_size;
167     for (cur= tails+real_size; cur<end; cur++)
168       cur->first_char= 0;
169     root->last_char= (*name);
170   }
171 
172   insert_into_hash(root->char_tails+(*name)-root->first_char,
173 		   name+1,len_from_begin+1,index,function);
174 }
175 
176 
177 hash_lex_struct *root_by_len= 0;
178 int max_len=0;
179 
180 hash_lex_struct *root_by_len2= 0;
181 int max_len2=0;
182 
insert_symbols()183 void insert_symbols()
184 {
185   size_t i= 0;
186   SYMBOL *cur;
187   for (cur= symbols; i<array_elements(symbols); cur++, i++){
188     hash_lex_struct *root=
189       get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
190     insert_into_hash(root,cur->name,0,(uint) i,0);
191   }
192 }
193 
insert_sql_functions()194 void insert_sql_functions()
195 {
196   int i= 0;
197   SYMBOL *cur;
198   for (cur= sql_functions; i < (int) array_elements(sql_functions); cur++, i++)
199   {
200     hash_lex_struct *root=
201       get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
202     insert_into_hash(root,cur->name,0,-i-1,1);
203   }
204 }
205 
calc_length()206 void calc_length()
207 {
208   SYMBOL *cur, *end= symbols + array_elements(symbols);
209   for (cur= symbols; cur < end; cur++)
210     cur->length=(uchar) strlen(cur->name);
211   end= sql_functions + array_elements(sql_functions);
212   for (cur= sql_functions; cur<end; cur++)
213     cur->length=(uchar) strlen(cur->name);
214 }
215 
generate_find_structs()216 void generate_find_structs()
217 {
218   root_by_len= 0;
219   max_len=0;
220 
221   insert_symbols();
222 
223   root_by_len2= root_by_len;
224   max_len2= max_len;
225 
226   root_by_len= 0;
227   max_len= 0;
228 
229   insert_symbols();
230   insert_sql_functions();
231 }
232 
233 char *hash_map= 0;
234 int size_hash_map= 0;
235 
add_struct_to_map(hash_lex_struct * st)236 void add_struct_to_map(hash_lex_struct *st)
237 {
238   st->ithis= size_hash_map/4;
239   size_hash_map+= 4;
240   hash_map= (char*)realloc((char*)hash_map,size_hash_map);
241   hash_map[size_hash_map-4]= (char) (st->first_char == -1 ? 0 :
242 				     st->first_char);
243   hash_map[size_hash_map-3]= (char) (st->first_char == -1 ||
244 				     st->first_char == 0 ? 0 : st->last_char);
245   if (st->first_char == -1)
246   {
247     hash_map[size_hash_map-2]= ((unsigned int)(int16)st->iresult)&255;
248     hash_map[size_hash_map-1]= ((unsigned int)(int16)st->iresult)>>8;
249   }
250   else if (st->first_char == 0)
251   {
252     hash_map[size_hash_map-2]= ((unsigned int)(int16)array_elements(symbols))&255;
253     hash_map[size_hash_map-1]= ((unsigned int)(int16)array_elements(symbols))>>8;
254   }
255 }
256 
257 
add_structs_to_map(hash_lex_struct * st,int len)258 void add_structs_to_map(hash_lex_struct *st, int len)
259 {
260   hash_lex_struct *cur, *end= st+len;
261   for (cur= st; cur<end; cur++)
262     add_struct_to_map(cur);
263   for (cur= st; cur<end; cur++)
264   {
265     if (cur->first_char && cur->first_char != -1)
266       add_structs_to_map(cur->char_tails,cur->last_char-cur->first_char+1);
267   }
268 }
269 
set_links(hash_lex_struct * st,int len)270 void set_links(hash_lex_struct *st, int len)
271 {
272   hash_lex_struct *cur, *end= st+len;
273   for (cur= st; cur<end; cur++)
274   {
275     if (cur->first_char != 0 && cur->first_char != -1)
276     {
277       int ilink= cur->char_tails->ithis;
278       hash_map[cur->ithis*4+2]= ilink%256;
279       hash_map[cur->ithis*4+3]= ilink/256;
280       set_links(cur->char_tails,cur->last_char-cur->first_char+1);
281     }
282   }
283 }
284 
285 
print_hash_map(const char * name)286 void print_hash_map(const char *name)
287 {
288   char *cur;
289   int i;
290 
291   printf("static uchar %s[%d]= {\n",name,size_hash_map);
292   for (i=0, cur= hash_map; i<size_hash_map; i++, cur++)
293   {
294     switch(i%4){
295     case 0: case 1:
296       if (!*cur)
297 	printf("0,   ");
298       else
299 	printf("\'%c\', ",*cur);
300       break;
301     case 2: printf("%u, ",(uint)(uchar)*cur); break;
302     case 3: printf("%u,\n",(uint)(uchar)*cur); break;
303     }
304   }
305   printf("};\n");
306 }
307 
308 
print_find_structs()309 void print_find_structs()
310 {
311   add_structs_to_map(root_by_len,max_len);
312   set_links(root_by_len,max_len);
313   print_hash_map("sql_functions_map");
314   free(hash_map);
315 
316   hash_map= 0;
317   size_hash_map= 0;
318 
319   printf("\n");
320 
321   add_structs_to_map(root_by_len2,max_len2);
322   set_links(root_by_len2,max_len2);
323   print_hash_map("symbols_map");
324   free(hash_map);
325 }
326 
327 
check_dup_symbols(SYMBOL * s1,SYMBOL * s2)328 int check_dup_symbols(SYMBOL *s1, SYMBOL *s2)
329 {
330   if (s1->length!=s2->length || strncmp(s1->name,s2->name,s1->length))
331     return 0;
332 
333   const char *err_tmpl= "\ngen_lex_hash fatal error : \
334 Unfortunately gen_lex_hash can not generate a hash,\n since \
335 your lex.h has duplicate definition for a symbol \"%s\"\n\n";
336   printf (err_tmpl,s1->name);
337   fprintf (stderr,err_tmpl,s1->name);
338 
339   return 1;
340 }
341 
342 
check_duplicates()343 int check_duplicates()
344 {
345   SYMBOL *cur1, *cur2, *s_end, *f_end;
346 
347   s_end= symbols + array_elements(symbols);
348   f_end= sql_functions + array_elements(sql_functions);
349 
350   for (cur1= symbols; cur1<s_end; cur1++)
351   {
352     for (cur2= cur1+1; cur2<s_end; cur2++)
353     {
354       if (check_dup_symbols(cur1,cur2))
355 	return 1;
356     }
357     for (cur2= sql_functions; cur2<f_end; cur2++)
358     {
359       if (check_dup_symbols(cur1,cur2))
360 	return 1;
361     }
362   }
363 
364   for (cur1= sql_functions; cur1<f_end; cur1++)
365   {
366     for (cur2= cur1+1; cur2< f_end; cur2++)
367     {
368       if (check_dup_symbols(cur1,cur2))
369 	return 1;
370     }
371   }
372   return 0;
373 }
374 
375 
main(int argc,char ** argv)376 int main(int argc,char **argv)
377 {
378 
379 
380   /* Broken up to indicate that it's not advice to you, gentle reader. */
381   printf("/*\n\n  Do " "not " "edit " "this " "file " "directly!\n\n*/\n");
382 
383   puts("/*");
384   puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
385   puts("*/");
386 
387   /* Broken up to indicate that it's not advice to you, gentle reader. */
388   printf("/* Do " "not " "edit " "this " "file!  This is generated by "
389          "gen_lex_hash.cc\nthat seeks for a perfect hash function */\n\n");
390   printf("#include \"lex.h\"\n\n");
391 
392   calc_length();
393 
394   if (check_duplicates())
395     exit(1);
396 
397   generate_find_structs();
398   print_find_structs();
399 
400   printf("\nstatic unsigned int sql_functions_max_len=%d;\n", max_len);
401   printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2);
402 
403   printf("\
404 static SYMBOL *get_hash_symbol(const char *s,\n\
405                                unsigned int len,bool function)\n\
406 {\n\
407   uchar *hash_map;\n\
408   const char *cur_str= s;\n\
409 \n\
410   if (len == 0) {\n\
411     DBUG_PRINT(\"warning\", (\"get_hash_symbol() received a request for a zero-length symbol, which is probably a mistake.\"));\
412     return(NULL);\n\
413   }\n"
414 );
415 
416   printf("\
417   if (function){\n\
418     if (len>sql_functions_max_len) return 0;\n\
419     hash_map= sql_functions_map;\n\
420     uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
421 \n\
422     for (;;){\n\
423       uchar first_char= (uchar)cur_struct;\n\
424 \n\
425       if (first_char == 0)\n\
426       {\n\
427         int16 ires= (int16)(cur_struct>>16);\n\
428         if (ires==array_elements(symbols)) return 0;\n\
429         SYMBOL *res;\n\
430         if (ires>=0) \n\
431           res= symbols+ires;\n\
432         else\n\
433           res= sql_functions-ires-1;\n\
434         uint count= (uint) (cur_str - s);\n\
435         return lex_casecmp(cur_str,res->name+count,len-count) ? 0 : res;\n\
436       }\n\
437 \n\
438       uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
439       if (cur_char<first_char) return 0;\n\
440       cur_struct>>=8;\n\
441       if (cur_char>(uchar)cur_struct) return 0;\n\
442 \n\
443       cur_struct>>=8;\n\
444       cur_struct= uint4korr(hash_map+\n\
445                         (((uint16)cur_struct + cur_char - first_char)*4));\n\
446       cur_str++;\n\
447     }\n"
448 );
449 
450   printf("\
451   }else{\n\
452     if (len>symbols_max_len) return 0;\n\
453     hash_map= symbols_map;\n\
454     uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
455 \n\
456     for (;;){\n\
457       uchar first_char= (uchar)cur_struct;\n\
458 \n\
459       if (first_char==0) {\n\
460         int16 ires= (int16)(cur_struct>>16);\n\
461         if (ires==array_elements(symbols)) return 0;\n\
462         SYMBOL *res= symbols+ires;\n\
463         uint count= (uint) (cur_str - s);\n\
464         return lex_casecmp(cur_str,res->name+count,len-count)!=0 ? 0 : res;\n\
465       }\n\
466 \n\
467       uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
468       if (cur_char<first_char) return 0;\n\
469       cur_struct>>=8;\n\
470       if (cur_char>(uchar)cur_struct) return 0;\n\
471 \n\
472       cur_struct>>=8;\n\
473       cur_struct= uint4korr(hash_map+\n\
474                         (((uint16)cur_struct + cur_char - first_char)*4));\n\
475       cur_str++;\n\
476     }\n\
477   }\n\
478 }\n"
479 );
480   exit(0);
481 }
482 
483