1 /************************************************************************************
2     Copyright (C) 2000, 2012 MySQL AB & MySQL Finland AB & TCX DataKonsult AB,
3                  Monty Program AB, 2016 MariaDB Corporation AB
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public
7    License as published by the Free Software Foundation; either
8    version 2 of the License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with this library; if not see <http://www.gnu.org/licenses>
17    or write to the Free Software Foundation, Inc.,
18    51 Franklin St., Fifth Floor, Boston, MA 02110, USA
19 
20    Part of this code includes code from the PHP project which
21    is freely available from http://www.php.net
22 *************************************************************************************/
23 
24 /* The hash functions used for saving keys */
25 /* One of key_length or key_length_offset must be given */
26 /* Key length of 0 isn't allowed */
27 
28 #include <ma_global.h>
29 #include <ma_sys.h>
30 #include <ma_string.h>
31 #include <mariadb_ctype.h>
32 #include "ma_hashtbl.h"
33 
34 #define NO_RECORD	((uint) -1)
35 #define LOWFIND 1
36 #define LOWUSED 2
37 #define HIGHFIND 4
38 #define HIGHUSED 8
39 
40 static uint hash_mask(uint hashnr,uint buffmax,uint maxlength);
41 static void movelink(MA_HASHTBL_LINK *array,uint pos,uint next_link,uint newlink);
42 static uint calc_hashnr(const uchar *key,uint length);
43 static uint calc_hashnr_caseup(const uchar *key,uint length);
44 static int hashcmp(MA_HASHTBL *hash,MA_HASHTBL_LINK *pos,const uchar *key,uint length);
45 
46 
47 my_bool _ma_hashtbl_init(MA_HASHTBL *hash,uint size,uint key_offset,uint key_length,
48 		  hash_get_key get_key,
49 		  void (*free_element)(void*),uint flags CALLER_INFO_PROTO)
50 {
51   hash->records=0;
52   if (ma_init_dynamic_array_ci(&hash->array,sizeof(MA_HASHTBL_LINK),size,0))
53   {
54     hash->free=0;				/* Allow call to hash_free */
55     return(TRUE);
56   }
57   hash->key_offset=key_offset;
58   hash->key_length=key_length;
59   hash->blength=1;
60   hash->current_record= NO_RECORD;		/* For the future */
61   hash->get_key=get_key;
62   hash->free=free_element;
63   hash->flags=flags;
64   if (flags & MA_HASHTBL_CASE_INSENSITIVE)
65     hash->calc_hashnr=calc_hashnr_caseup;
66   else
67     hash->calc_hashnr=calc_hashnr;
68   return(0);
69 }
70 
71 
72 void ma_hashtbl_free(MA_HASHTBL *hash)
73 {
74   if (hash->free)
75   {
76     uint i,records;
77     MA_HASHTBL_LINK *data=dynamic_element(&hash->array,0,MA_HASHTBL_LINK*);
78     for (i=0,records=hash->records ; i < records ; i++)
79       (*hash->free)(data[i].data);
80     hash->free=0;
81   }
82   ma_delete_dynamic(&hash->array);
83   hash->records=0;
84   return;
85 }
86 
87 	/* some helper functions */
88 
89 /*
90   This function is char* instead of uchar* as HPUX11 compiler can't
91   handle inline functions that are not defined as native types
92 */
93 
94 static inline char*
95 hash_key(MA_HASHTBL *hash,const uchar *record,uint *length,my_bool first)
96 {
97   if (hash->get_key)
98     return (char *)(*hash->get_key)(record,(uint *)length,first);
99   *length=hash->key_length;
100   return (char*) record+hash->key_offset;
101 }
102 
103 	/* Calculate pos according to keys */
104 
105 static uint hash_mask(uint hashnr,uint buffmax,uint maxlength)
106 {
107   if ((hashnr & (buffmax-1)) < maxlength) return (hashnr & (buffmax-1));
108   return (hashnr & ((buffmax >> 1) -1));
109 }
110 
111 static uint hash_rec_mask(MA_HASHTBL *hash,MA_HASHTBL_LINK *pos,uint buffmax,
112 			  uint maxlength)
113 {
114   uint length;
115   uchar *key= (uchar*) hash_key(hash,pos->data,&length,0);
116   return hash_mask((*hash->calc_hashnr)(key,length),buffmax,maxlength);
117 }
118 
119 #ifndef NEW_MA_HASHTBL_FUNCTION
120 
121 	/* Calc hashvalue for a key */
122 
123 static uint calc_hashnr(const uchar *key,uint length)
124 {
125   register uint nr=1, nr2=4;
126   while (length--)
127   {
128     nr^= (((nr & 63)+nr2)*((uint) (uchar) *key++))+ (nr << 8);
129     nr2+=3;
130   }
131   return((uint) nr);
132 }
133 
134 	/* Calc hashvalue for a key, case independently */
135 
136 static uint calc_hashnr_caseup(const uchar *key,uint length)
137 {
138   register uint nr=1, nr2=4;
139   while (length--)
140   {
141     nr^= (((nr & 63)+nr2)*((uint) (uchar) toupper(*key++)))+ (nr << 8);
142     nr2+=3;
143   }
144   return((uint) nr);
145 }
146 
147 #else
148 
149 /*
150  * Fowler/Noll/Vo hash
151  *
152  * The basis of the hash algorithm was taken from an idea sent by email to the
153  * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
154  * Glenn Fowler (gsf@research.att.com).  Landon Curt Noll (chongo@toad.com)
155  * later improved on their algorithm.
156  *
157  * The magic is in the interesting relationship between the special prime
158  * 16777619 (2^24 + 403) and 2^32 and 2^8.
159  *
160  * This hash produces the fewest collisions of any function that we've seen so
161  * far, and works well on both numbers and strings.
162  */
163 
164 uint calc_hashnr(const uchar *key, uint len)
165 {
166   const uchar *end=key+len;
167   uint hash;
168   for (hash = 0; key < end; key++)
169   {
170     hash *= 16777619;
171     hash ^= (uint) *(uchar*) key;
172   }
173   return (hash);
174 }
175 
176 uint calc_hashnr_caseup(const uchar *key, uint len)
177 {
178   const uchar *end=key+len;
179   uint hash;
180   for (hash = 0; key < end; key++)
181   {
182     hash *= 16777619;
183     hash ^= (uint) (uchar) toupper(*key);
184   }
185   return (hash);
186 }
187 
188 #endif
189 
190 
191 #ifndef __SUNPRO_C				/* SUNPRO can't handle this */
192 static inline
193 #endif
194 unsigned int rec_hashnr(MA_HASHTBL *hash,const uchar *record)
195 {
196   uint length;
197   uchar *key= (uchar*) hash_key(hash,record,&length,0);
198   return (*hash->calc_hashnr)(key,length);
199 }
200 
201 
202 	/* Search after a record based on a key */
203 	/* Sets info->current_ptr to found record */
204 
205 void* ma_hashtbl_search(MA_HASHTBL *hash,const uchar *key,uint length)
206 {
207   MA_HASHTBL_LINK *pos;
208   uint flag,idx;
209 
210   flag=1;
211   if (hash->records)
212   {
213     idx=hash_mask((*hash->calc_hashnr)(key,length ? length :
214 					 hash->key_length),
215 		    hash->blength,hash->records);
216     do
217     {
218       pos= dynamic_element(&hash->array,idx,MA_HASHTBL_LINK*);
219       if (!hashcmp(hash,pos,key,length))
220       {
221 	hash->current_record= idx;
222 	return (pos->data);
223       }
224       if (flag)
225       {
226 	flag=0;					/* Reset flag */
227 	if (hash_rec_mask(hash,pos,hash->blength,hash->records) != idx)
228 	  break;				/* Wrong link */
229       }
230     }
231     while ((idx=pos->next) != NO_RECORD);
232   }
233   hash->current_record= NO_RECORD;
234   return(0);
235 }
236 
237 	/* Get next record with identical key */
238 	/* Can only be called if previous calls was hash_search */
239 
240 void *ma_hashtbl_next(MA_HASHTBL *hash,const uchar *key,uint length)
241 {
242   MA_HASHTBL_LINK *pos;
243   uint idx;
244 
245   if (hash->current_record != NO_RECORD)
246   {
247     MA_HASHTBL_LINK *data=dynamic_element(&hash->array,0,MA_HASHTBL_LINK*);
248     for (idx=data[hash->current_record].next; idx != NO_RECORD ; idx=pos->next)
249     {
250       pos=data+idx;
251       if (!hashcmp(hash,pos,key,length))
252       {
253 	hash->current_record= idx;
254 	return pos->data;
255       }
256     }
257     hash->current_record=NO_RECORD;
258   }
259   return 0;
260 }
261 
262 
263 	/* Change link from pos to new_link */
264 
265 static void movelink(MA_HASHTBL_LINK *array,uint find,uint next_link,uint newlink)
266 {
267   MA_HASHTBL_LINK *old_link;
268   do
269   {
270     old_link=array+next_link;
271   }
272   while ((next_link=old_link->next) != find);
273   old_link->next= newlink;
274   return;
275 }
276 
277 	/* Compare a key in a record to a whole key. Return 0 if identical */
278 
279 static int hashcmp(MA_HASHTBL *hash,MA_HASHTBL_LINK *pos,const uchar *key,uint length)
280 {
281   uint rec_keylength;
282   uchar *rec_key= (uchar*) hash_key(hash,pos->data,&rec_keylength,1);
283   return (length && length != rec_keylength) ||
284      memcmp(rec_key,key,rec_keylength);
285 }
286 
287 
288 	/* Write a hash-key to the hash-index */
289 
290 my_bool ma_hashtbl_insert(MA_HASHTBL *info,const uchar *record)
291 {
292   int flag;
293   uint halfbuff,hash_nr,first_index,idx;
294   uchar *ptr_to_rec= NULL,*ptr_to_rec2= NULL;
295   MA_HASHTBL_LINK *data,*empty,*gpos= NULL,*gpos2 = NULL,*pos;
296 
297   LINT_INIT(gpos); LINT_INIT(gpos2);
298   LINT_INIT(ptr_to_rec); LINT_INIT(ptr_to_rec2);
299 
300   flag=0;
301   if (!(empty=(MA_HASHTBL_LINK*) ma_alloc_dynamic(&info->array)))
302     return(TRUE);				/* No more memory */
303 
304   info->current_record= NO_RECORD;
305   data=dynamic_element(&info->array,0,MA_HASHTBL_LINK*);
306   halfbuff= info->blength >> 1;
307 
308   idx=first_index=info->records-halfbuff;
309   if (idx != info->records)				/* If some records */
310   {
311     do
312     {
313       pos=data+idx;
314       hash_nr=rec_hashnr(info,pos->data);
315       if (flag == 0)				/* First loop; Check if ok */
316 	if (hash_mask(hash_nr,info->blength,info->records) != first_index)
317 	  break;
318       if (!(hash_nr & halfbuff))
319       {						/* Key will not move */
320 	if (!(flag & LOWFIND))
321 	{
322 	  if (flag & HIGHFIND)
323 	  {
324 	    flag=LOWFIND | HIGHFIND;
325 	    /* key shall be moved to the current empty position */
326 	    gpos=empty;
327 	    ptr_to_rec=pos->data;
328 	    empty=pos;				/* This place is now free */
329 	  }
330 	  else
331 	  {
332 	    flag=LOWFIND | LOWUSED;		/* key isn't changed */
333 	    gpos=pos;
334 	    ptr_to_rec=pos->data;
335 	  }
336 	}
337 	else
338 	{
339 	  if (!(flag & LOWUSED))
340 	  {
341 	    /* Change link of previous LOW-key */
342 	    gpos->data=ptr_to_rec;
343 	    gpos->next=(uint) (pos-data);
344 	    flag= (flag & HIGHFIND) | (LOWFIND | LOWUSED);
345 	  }
346 	  gpos=pos;
347 	  ptr_to_rec=pos->data;
348 	}
349       }
350       else
351       {						/* key will be moved */
352 	if (!(flag & HIGHFIND))
353 	{
354 	  flag= (flag & LOWFIND) | HIGHFIND;
355 	  /* key shall be moved to the last (empty) position */
356 	  gpos2 = empty; empty=pos;
357 	  ptr_to_rec2=pos->data;
358 	}
359 	else
360 	{
361 	  if (!(flag & HIGHUSED))
362 	  {
363 	    /* Change link of previous hash-key and save */
364 	    gpos2->data=ptr_to_rec2;
365 	    gpos2->next=(uint) (pos-data);
366 	    flag= (flag & LOWFIND) | (HIGHFIND | HIGHUSED);
367 	  }
368 	  gpos2=pos;
369 	  ptr_to_rec2=pos->data;
370 	}
371       }
372     }
373     while ((idx=pos->next) != NO_RECORD);
374 
375     if ((flag & (LOWFIND | LOWUSED)) == LOWFIND)
376     {
377       gpos->data=ptr_to_rec;
378       gpos->next=NO_RECORD;
379     }
380     if ((flag & (HIGHFIND | HIGHUSED)) == HIGHFIND)
381     {
382       gpos2->data=ptr_to_rec2;
383       gpos2->next=NO_RECORD;
384     }
385   }
386   /* Check if we are at the empty position */
387 
388   idx=hash_mask(rec_hashnr(info,record),info->blength,info->records+1);
389   pos=data+idx;
390   if (pos == empty)
391   {
392     pos->data=(uchar*) record;
393     pos->next=NO_RECORD;
394   }
395   else
396   {
397     /* Check if more records in same hash-nr family */
398     empty[0]=pos[0];
399     gpos=data+hash_rec_mask(info,pos,info->blength,info->records+1);
400     if (pos == gpos)
401     {
402       pos->data=(uchar*) record;
403       pos->next=(uint) (empty - data);
404     }
405     else
406     {
407       pos->data=(uchar*) record;
408       pos->next=NO_RECORD;
409       movelink(data,(uint) (pos-data),(uint) (gpos-data),(uint) (empty-data));
410     }
411   }
412   if (++info->records == info->blength)
413     info->blength+= info->blength;
414   return(0);
415 }
416 
417 
418 /******************************************************************************
419 ** Remove one record from hash-table. The record with the same record
420 ** ptr is removed.
421 ** if there is a free-function it's called for record if found
422 ******************************************************************************/
423 
424 my_bool ma_hashtbl_delete(MA_HASHTBL *hash,uchar *record)
425 {
426   uint blength,pos2,pos_hashnr,lastpos_hashnr,idx,empty_index;
427   MA_HASHTBL_LINK *data,*lastpos,*gpos,*pos,*pos3,*empty;
428   if (!hash->records)
429     return(1);
430 
431   blength=hash->blength;
432   data=dynamic_element(&hash->array,0,MA_HASHTBL_LINK*);
433   /* Search after record with key */
434   pos=data+ hash_mask(rec_hashnr(hash,record),blength,hash->records);
435   gpos = 0;
436 
437   while (pos->data != record)
438   {
439     gpos=pos;
440     if (pos->next == NO_RECORD)
441       return(1);			/* Key not found */
442     pos=data+pos->next;
443   }
444 
445   if ( --(hash->records) < hash->blength >> 1) hash->blength>>=1;
446   hash->current_record= NO_RECORD;
447   lastpos=data+hash->records;
448 
449   /* Remove link to record */
450   empty=pos; empty_index=(uint) (empty-data);
451   if (gpos)
452     gpos->next=pos->next;		/* unlink current ptr */
453   else if (pos->next != NO_RECORD)
454   {
455     empty=data+(empty_index=pos->next);
456     pos->data=empty->data;
457     pos->next=empty->next;
458   }
459 
460   if (empty == lastpos)			/* last key at wrong pos or no next link */
461     goto exit;
462 
463   /* Move the last key (lastpos) */
464   lastpos_hashnr=rec_hashnr(hash,lastpos->data);
465   /* pos is where lastpos should be */
466   pos=data+hash_mask(lastpos_hashnr,hash->blength,hash->records);
467   if (pos == empty)			/* Move to empty position. */
468   {
469     empty[0]=lastpos[0];
470     goto exit;
471   }
472   pos_hashnr=rec_hashnr(hash,pos->data);
473   /* pos3 is where the pos should be */
474   pos3= data+hash_mask(pos_hashnr,hash->blength,hash->records);
475   if (pos != pos3)
476   {					/* pos is on wrong posit */
477     empty[0]=pos[0];			/* Save it here */
478     pos[0]=lastpos[0];			/* This should be here */
479     movelink(data,(uint) (pos-data),(uint) (pos3-data),empty_index);
480     goto exit;
481   }
482   pos2= hash_mask(lastpos_hashnr,blength,hash->records+1);
483   if (pos2 == hash_mask(pos_hashnr,blength,hash->records+1))
484   {					/* Identical key-positions */
485     if (pos2 != hash->records)
486     {
487       empty[0]=lastpos[0];
488       movelink(data,(uint) (lastpos-data),(uint) (pos-data),empty_index);
489       goto exit;
490     }
491     idx= (uint) (pos-data);		/* Link pos->next after lastpos */
492   }
493   else idx= NO_RECORD;		/* Different positions merge */
494 
495   empty[0]=lastpos[0];
496   movelink(data,idx,empty_index,pos->next);
497   pos->next=empty_index;
498 
499 exit:
500   ma_pop_dynamic(&hash->array);
501   if (hash->free)
502     (*hash->free)((uchar*) record);
503   return(0);
504 }
505 
506 	/*
507 	  Update keys when record has changed.
508 	  This is much more efficient than using a delete & insert.
509 	  */
510 
511 my_bool ma_hashtbl_update(MA_HASHTBL *hash,uchar *record,uchar *old_key,uint old_key_length)
512 {
513   uint idx,new_index,new_pos_index,blength,records,empty;
514   MA_HASHTBL_LINK org_link,*data,*previous,*pos;
515 
516   data=dynamic_element(&hash->array,0,MA_HASHTBL_LINK*);
517   blength=hash->blength; records=hash->records;
518 
519   /* Search after record with key */
520 
521   idx=hash_mask((*hash->calc_hashnr)(old_key,(old_key_length ?
522 						old_key_length :
523 						hash->key_length)),
524 		  blength,records);
525   new_index=hash_mask(rec_hashnr(hash,record),blength,records);
526   if (idx == new_index)
527     return(0);			/* Nothing to do (No record check) */
528   previous=0;
529   for (;;)
530   {
531 
532     if ((pos= data+idx)->data == record)
533       break;
534     previous=pos;
535     if ((idx=pos->next) == NO_RECORD)
536       return(1);			/* Not found in links */
537   }
538   hash->current_record= NO_RECORD;
539   org_link= *pos;
540   empty=idx;
541 
542   /* Relink record from current chain */
543 
544   if (!previous)
545   {
546     if (pos->next != NO_RECORD)
547     {
548       empty=pos->next;
549       *pos= data[pos->next];
550     }
551   }
552   else
553     previous->next=pos->next;		/* unlink pos */
554 
555   /* Move data to correct position */
556   pos=data+new_index;
557   new_pos_index=hash_rec_mask(hash,pos,blength,records);
558   if (new_index != new_pos_index)
559   {					/* Other record in wrong position */
560     data[empty] = *pos;
561     movelink(data,new_index,new_pos_index,empty);
562     org_link.next=NO_RECORD;
563     data[new_index]= org_link;
564   }
565   else
566   {					/* Link in chain at right position */
567     org_link.next=data[new_index].next;
568     data[empty]=org_link;
569     data[new_index].next=empty;
570   }
571   return(0);
572 }
573 
574 
575 uchar *ma_hashtbl_element(MA_HASHTBL *hash,uint idx)
576 {
577   if (idx < hash->records)
578     return dynamic_element(&hash->array,idx,MA_HASHTBL_LINK*)->data;
579   return 0;
580 }
581