1 /*
2    Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software Foundation,
22    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23 
24 /**
25   @file
26 
27   @details
28 @verbatim
29 The idea of presented algorithm see in
30 "The Art of Computer Programming" by Donald E. Knuth
31 Volume 3 "Sorting and searching"
32 (chapter 6.3 "Digital searching" - name and number of chapter
33    is back translation from Russian edition :))
34 
35 as illustration of data structures, imagine next table:
36 
37 static SYMBOL symbols[] = {
38   { "ADD",              SYM(ADD),0,0},
39   { "AND",              SYM(AND),0,0},
40   { "DAY",              SYM(DAY_SYM),0,0},
41 };
42 
43 for this structure, presented program generate next searching-structure:
44 
45 +-----------+-+-+-+
46 |       len |1|2|3|
47 +-----------+-+-+-+
48 |first_char |0|0|a|
49 |last_char  |0|0|d|
50 |link       |0|0|+|
51                  |
52                  V
53        +----------+-+-+-+--+
54        |    1 char|a|b|c|d |
55        +----------+-+-+-+--+
56        |first_char|d|0|0|0 |
57        |last_char |n|0|0|-1|
58        |link      |+|0|0|+ |
59                    |     |
60                    |     V
61                    |  symbols[2] ( "DAY" )
62                    V
63 +----------+--+-+-+-+-+-+-+-+-+-+--+
64 |    2 char|d |e|f|j|h|i|j|k|l|m|n |
65 +----------+--+-+-+-+-+-+-+-+-+-+--+
66 |first_char|0 |0|0|0|0|0|0|0|0|0|0 |
67 |last_char |-1|0|0|0|0|0|0|0|0|0|-1|
68 |link      |+ |0|0|0|0|0|0|0|0|0|+ |
69             |                    |
70             V                    V
71          symbols[0] ( "ADD" )  symbols[1] ( "AND" )
72 
73 for optimization, link is the 16-bit index in 'symbols' or 'sql_functions'
74 or search-array..
75 
76 So, we can read full search-structure as 32-bit word
77 @endverbatim
78 
79 @todo
80     use instead to_upper_lex, special array
81     (substitute chars) without skip codes..
82 @todo
83     try use reverse order of comparing..
84 
85 */
86 
87 #define NO_YACC_SYMBOLS
88 #include <my_global.h>
89 #include "mysql_version.h"
90 #include "lex.h"
91 #include <stdlib.h>
92 #include <stdio.h>
93 #include <string.h>
94 
95 #include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
96 
97 struct hash_lex_struct
98 {
99   int first_char;
100   char last_char;
101   union{
102     hash_lex_struct *char_tails;
103     int iresult;
104   };
105   int ithis;
106 };
107 
get_hash_struct_by_len(hash_lex_struct ** root_by_len,int len,int * max_len)108 hash_lex_struct *get_hash_struct_by_len(hash_lex_struct **root_by_len,
109 					    int len, int *max_len)
110 {
111   if (*max_len<len){
112     *root_by_len= (hash_lex_struct *)realloc((char*)*root_by_len,
113                                              sizeof(hash_lex_struct)*len);
114     hash_lex_struct *cur, *end= *root_by_len + len;
115     for (cur= *root_by_len + *max_len; cur<end; cur++)
116       cur->first_char= 0;
117     *max_len= len;
118   }
119   return (*root_by_len)+(len-1);
120 }
121 
insert_into_hash(hash_lex_struct * root,const char * name,int len_from_begin,int index,int function)122 void insert_into_hash(hash_lex_struct *root, const char *name,
123 		      int len_from_begin, int index, int function)
124 {
125   hash_lex_struct *end, *cur, *tails;
126 
127   if (!root->first_char)
128   {
129     root->first_char= -1;
130     root->iresult= index;
131     return;
132   }
133 
134   if (root->first_char == -1)
135   {
136     int index2= root->iresult;
137     const char *name2= (index2 < 0 ? sql_functions[-index2-1] :
138 			symbols[index2]).name + len_from_begin;
139     root->first_char= (int) (uchar) name2[0];
140     root->last_char= (char) root->first_char;
141     tails= (hash_lex_struct*)malloc(sizeof(hash_lex_struct));
142     root->char_tails= tails;
143     tails->first_char= -1;
144     tails->iresult= index2;
145   }
146 
147   size_t real_size= (root->last_char-root->first_char+1);
148 
149   if (root->first_char>(*name))
150   {
151     size_t new_size= root->last_char-(*name)+1;
152     if (new_size<real_size) printf("error!!!!\n");
153     tails= root->char_tails;
154     tails= (hash_lex_struct*)realloc((char*)tails,
155 				       sizeof(hash_lex_struct)*new_size);
156     root->char_tails= tails;
157     memmove(tails+(new_size-real_size),tails,real_size*sizeof(hash_lex_struct));
158     end= tails + new_size - real_size;
159     for (cur= tails; cur<end; cur++)
160       cur->first_char= 0;
161     root->first_char= (int) (uchar) *name;
162   }
163 
164   if (root->last_char<(*name))
165   {
166     size_t new_size= (*name)-root->first_char+1;
167     if (new_size<real_size) printf("error!!!!\n");
168     tails= root->char_tails;
169     tails= (hash_lex_struct*)realloc((char*)tails,
170 				    sizeof(hash_lex_struct)*new_size);
171     root->char_tails= tails;
172     end= tails + new_size;
173     for (cur= tails+real_size; cur<end; cur++)
174       cur->first_char= 0;
175     root->last_char= (*name);
176   }
177 
178   insert_into_hash(root->char_tails+(*name)-root->first_char,
179 		   name+1,len_from_begin+1,index,function);
180 }
181 
182 
183 hash_lex_struct *root_by_len= 0;
184 int max_len=0;
185 
186 hash_lex_struct *root_by_len2= 0;
187 int max_len2=0;
188 
insert_symbols()189 void insert_symbols()
190 {
191   size_t i= 0;
192   SYMBOL *cur;
193   for (cur= symbols; i<array_elements(symbols); cur++, i++){
194     hash_lex_struct *root=
195       get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
196     insert_into_hash(root,cur->name,0,(uint) i,0);
197   }
198 }
199 
insert_sql_functions()200 void insert_sql_functions()
201 {
202   int i= 0;
203   SYMBOL *cur;
204   for (cur= sql_functions; i < (int) array_elements(sql_functions); cur++, i++)
205   {
206     hash_lex_struct *root=
207       get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
208     insert_into_hash(root,cur->name,0,-i-1,1);
209   }
210 }
211 
calc_length()212 void calc_length()
213 {
214   SYMBOL *cur, *end= symbols + array_elements(symbols);
215   for (cur= symbols; cur < end; cur++)
216     cur->length=(uchar) strlen(cur->name);
217   end= sql_functions + array_elements(sql_functions);
218   for (cur= sql_functions; cur<end; cur++)
219     cur->length=(uchar) strlen(cur->name);
220 }
221 
generate_find_structs()222 void generate_find_structs()
223 {
224   root_by_len= 0;
225   max_len=0;
226 
227   insert_symbols();
228 
229   root_by_len2= root_by_len;
230   max_len2= max_len;
231 
232   root_by_len= 0;
233   max_len= 0;
234 
235   insert_symbols();
236   insert_sql_functions();
237 }
238 
239 struct hash_map_cleanup
240 {
hash_map_cleanuphash_map_cleanup241   hash_map_cleanup(char **buffer, int *sz)
242     : m_buffer(buffer), m_sz(sz)
243   {}
~hash_map_cleanuphash_map_cleanup244   ~hash_map_cleanup()
245   {
246     free(*m_buffer);
247     *m_buffer= NULL;
248     *m_sz= 0;
249   }
250   char **m_buffer;
251   int   *m_sz;
252 };
253 
254 char *hash_map= 0;
255 int size_hash_map= 0;
256 
add_struct_to_map(hash_lex_struct * st)257 void add_struct_to_map(hash_lex_struct *st)
258 {
259   st->ithis= size_hash_map/4;
260   size_hash_map+= 4;
261   hash_map= (char*)realloc((char*)hash_map,size_hash_map);
262   hash_map[size_hash_map-4]= (char) (st->first_char == -1 ? 0 :
263 				     st->first_char);
264   hash_map[size_hash_map-3]= (char) (st->first_char == -1 ||
265 				     st->first_char == 0 ? 0 : st->last_char);
266   if (st->first_char == -1)
267   {
268     hash_map[size_hash_map-2]= ((unsigned int)(int16)st->iresult)&255;
269     hash_map[size_hash_map-1]= ((unsigned int)(int16)st->iresult)>>8;
270   }
271   else if (st->first_char == 0)
272   {
273     hash_map[size_hash_map-2]= ((unsigned int)(int16)array_elements(symbols))&255;
274     hash_map[size_hash_map-1]= ((unsigned int)(int16)array_elements(symbols))>>8;
275   }
276 }
277 
278 
add_structs_to_map(hash_lex_struct * st,int len)279 void add_structs_to_map(hash_lex_struct *st, int len)
280 {
281   hash_lex_struct *cur, *end= st+len;
282   for (cur= st; cur<end; cur++)
283     add_struct_to_map(cur);
284   for (cur= st; cur<end; cur++)
285   {
286     if (cur->first_char && cur->first_char != -1)
287       add_structs_to_map(cur->char_tails,cur->last_char-cur->first_char+1);
288   }
289 }
290 
set_links(hash_lex_struct * st,int len)291 void set_links(hash_lex_struct *st, int len)
292 {
293   hash_lex_struct *cur, *end= st+len;
294   for (cur= st; cur<end; cur++)
295   {
296     if (cur->first_char != 0 && cur->first_char != -1)
297     {
298       int ilink= cur->char_tails->ithis;
299       hash_map[cur->ithis*4+2]= ilink%256;
300       hash_map[cur->ithis*4+3]= ilink/256;
301       set_links(cur->char_tails,cur->last_char-cur->first_char+1);
302     }
303   }
304 }
305 
306 
print_hash_map(const char * name)307 void print_hash_map(const char *name)
308 {
309   char *cur;
310   int i;
311 
312   printf("static uchar %s[%d]= {\n",name,size_hash_map);
313   for (i=0, cur= hash_map; i<size_hash_map; i++, cur++)
314   {
315     switch(i%4){
316     case 0: case 1:
317       if (!*cur)
318 	printf("0,   ");
319       else
320 	printf("\'%c\', ",*cur);
321       break;
322     case 2: printf("%u, ",(uint)(uchar)*cur); break;
323     case 3: printf("%u,\n",(uint)(uchar)*cur); break;
324     }
325   }
326   printf("};\n");
327 }
328 
329 
print_find_structs()330 void print_find_structs()
331 {
332   {
333     hash_map_cleanup cleanup(&hash_map, &size_hash_map);
334     add_structs_to_map(root_by_len,max_len);
335     set_links(root_by_len,max_len);
336     print_hash_map("sql_functions_map");
337   }
338 
339   printf("\n");
340 
341   {
342     hash_map_cleanup cleanup(&hash_map, &size_hash_map);
343     add_structs_to_map(root_by_len2,max_len2);
344     set_links(root_by_len2,max_len2);
345     print_hash_map("symbols_map");
346   }
347 }
348 
check_dup_symbols(SYMBOL * s1,SYMBOL * s2)349 int check_dup_symbols(SYMBOL *s1, SYMBOL *s2)
350 {
351   if (s1->length!=s2->length || strncmp(s1->name,s2->name,s1->length))
352     return 0;
353 
354   const char *err_tmpl= "\ngen_lex_hash fatal error : \
355 Unfortunately gen_lex_hash can not generate a hash,\n since \
356 your lex.h has duplicate definition for a symbol \"%s\"\n\n";
357   printf (err_tmpl,s1->name);
358   fprintf (stderr,err_tmpl,s1->name);
359 
360   return 1;
361 }
362 
363 
check_duplicates()364 int check_duplicates()
365 {
366   SYMBOL *cur1, *cur2, *s_end, *f_end;
367 
368   s_end= symbols + array_elements(symbols);
369   f_end= sql_functions + array_elements(sql_functions);
370 
371   for (cur1= symbols; cur1<s_end; cur1++)
372   {
373     for (cur2= cur1+1; cur2<s_end; cur2++)
374     {
375       if (check_dup_symbols(cur1,cur2))
376 	return 1;
377     }
378     for (cur2= sql_functions; cur2<f_end; cur2++)
379     {
380       if (check_dup_symbols(cur1,cur2))
381 	return 1;
382     }
383   }
384 
385   for (cur1= sql_functions; cur1<f_end; cur1++)
386   {
387     for (cur2= cur1+1; cur2< f_end; cur2++)
388     {
389       if (check_dup_symbols(cur1,cur2))
390 	return 1;
391     }
392   }
393   return 0;
394 }
395 
396 
main(int argc,char ** argv)397 int main(int argc,char **argv)
398 {
399 
400 
401   /* Broken up to indicate that it's not advice to you, gentle reader. */
402   printf("/*\n\n  Do " "not " "edit " "this " "file " "directly!\n\n*/\n");
403 
404   puts("/*");
405   puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
406   puts("*/");
407 
408   /* Broken up to indicate that it's not advice to you, gentle reader. */
409   printf("/* Do " "not " "edit " "this " "file!  This is generated by "
410          "gen_lex_hash.cc\nthat seeks for a perfect hash function */\n\n");
411   printf("#include \"lex.h\"\n\n");
412 
413   calc_length();
414 
415   if (check_duplicates())
416     exit(1);
417 
418   generate_find_structs();
419   print_find_structs();
420 
421   printf("\nstatic unsigned int sql_functions_max_len=%d;\n", max_len);
422   printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2);
423 
424   printf("\
425 static SYMBOL *get_hash_symbol(const char *s,\n\
426                                unsigned int len,bool function)\n\
427 {\n\
428   uchar *hash_map;\n\
429   const char *cur_str= s;\n\
430 \n\
431   if (len == 0) {\n\
432     DBUG_PRINT(\"warning\", (\"get_hash_symbol() received a request for a zero-length symbol, which is probably a mistake.\"));\
433     return(NULL);\n\
434   }\n"
435 );
436 
437   printf("\
438   if (function){\n\
439     if (len>sql_functions_max_len) return 0;\n\
440     hash_map= sql_functions_map;\n\
441     uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
442 \n\
443     for (;;){\n\
444       uchar first_char= (uchar)cur_struct;\n\
445 \n\
446       if (first_char == 0)\n\
447       {\n\
448         int16 ires= (int16)(cur_struct>>16);\n\
449         if (ires==array_elements(symbols)) return 0;\n\
450         SYMBOL *res;\n\
451         if (ires>=0) \n\
452           res= symbols+ires;\n\
453         else\n\
454           res= sql_functions-ires-1;\n\
455 		  uint count= (uint) (cur_str - s);\n\
456         return lex_casecmp(cur_str,res->name+count,len-count) ? 0 : res;\n\
457       }\n\
458 \n\
459       uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
460       if (cur_char<first_char) return 0;\n\
461       cur_struct>>=8;\n\
462       if (cur_char>(uchar)cur_struct) return 0;\n\
463 \n\
464       cur_struct>>=8;\n\
465       cur_struct= uint4korr(hash_map+\n\
466                         (((uint16)cur_struct + cur_char - first_char)*4));\n\
467       cur_str++;\n\
468     }\n"
469 );
470 
471   printf("\
472   }else{\n\
473     if (len>symbols_max_len) return 0;\n\
474     hash_map= symbols_map;\n\
475     uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
476 \n\
477     for (;;){\n\
478       uchar first_char= (uchar)cur_struct;\n\
479 \n\
480       if (first_char==0){\n\
481         int16 ires= (int16)(cur_struct>>16);\n\
482         if (ires==array_elements(symbols)) return 0;\n\
483         SYMBOL *res= symbols+ires;\n\
484 		uint count= (uint) (cur_str - s);\n\
485         return lex_casecmp(cur_str,res->name+count,len-count)!=0 ? 0 : res;\n\
486       }\n\
487 \n\
488       uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
489       if (cur_char<first_char) return 0;\n\
490       cur_struct>>=8;\n\
491       if (cur_char>(uchar)cur_struct) return 0;\n\
492 \n\
493       cur_struct>>=8;\n\
494       cur_struct= uint4korr(hash_map+\n\
495                         (((uint16)cur_struct + cur_char - first_char)*4));\n\
496       cur_str++;\n\
497     }\n\
498   }\n\
499 }\n"
500 );
501   exit(0);
502 }
503 
504