1 /* Copyright (c) 2006, 2018, Oracle and/or its affiliates.
2    Copyright (c) 2009, 2018, MariaDB
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 /*
18   extensible hash
19 
20   TODO
21      try to get rid of dummy nodes ?
22      for non-unique hash, count only _distinct_ values
23      (but how to do it in lf_hash_delete ?)
24 */
25 #include <my_global.h>
26 #include <m_string.h>
27 #include <my_sys.h>
28 #include <mysys_err.h>
29 #include <my_bit.h>
30 #include <lf.h>
31 #include "my_cpu.h"
32 
33 /* An element of the list */
34 typedef struct {
35   intptr link;   /* a pointer to the next element in a list and a flag */
36   const uchar *key;
37   size_t keylen;
38   uint32 hashnr; /* reversed hash number, for sorting */
39   /*
40     data is stored here, directly after the keylen.
41     thus the pointer to data is (void*)(slist_element_ptr+1)
42   */
43 } LF_SLIST;
44 
45 const int LF_HASH_OVERHEAD= sizeof(LF_SLIST);
46 
47 /*
48   a structure to pass the context (pointers two the three successive elements
49   in a list) from l_find to l_insert/l_delete
50 */
51 typedef struct {
52   intptr *prev;
53   LF_SLIST *curr, *next;
54 } CURSOR;
55 
56 /*
57   the last bit in LF_SLIST::link is a "deleted" flag.
58   the helper macros below convert it to a pure pointer or a pure flag
59 */
60 #define PTR(V)      (LF_SLIST *)((V) & (~(intptr)1))
61 #define DELETED(V)  ((V) & 1)
62 
63 /** walk the list, searching for an element or invoking a callback
64 
65     Search for hashnr/key/keylen in the list starting from 'head' and
66     position the cursor. The list is ORDER BY hashnr, key
67 
68     @param head         start walking the list from this node
69     @param cs           charset for comparing keys, NULL if callback is used
70     @param hashnr       hash number to search for
71     @param key          key to search for OR data for the callback
72     @param keylen       length of the key to compare, 0 if callback is used
73     @param cursor       for returning the found element
74     @param pins         see lf_alloc-pin.c
75     @param callback     callback action, invoked for every element
76 
77   @note
78     cursor is positioned in either case
79     pins[0..2] are used, they are NOT removed on return
80     callback might see some elements twice (because of retries)
81 
82   @return
83     if find: 0 - not found
84              1 - found
85     if callback:
86              0 - ok
87              1 - error (callbck returned 1)
88 */
l_find(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,size_t keylen,CURSOR * cursor,LF_PINS * pins,my_hash_walk_action callback)89 static int l_find(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
90                  const uchar *key, size_t keylen, CURSOR *cursor, LF_PINS *pins,
91                  my_hash_walk_action callback)
92 {
93   uint32       cur_hashnr;
94   const uchar  *cur_key;
95   size_t       cur_keylen;
96   intptr       link;
97 
98   DBUG_ASSERT(!cs || !callback);        /* should not be set both */
99   DBUG_ASSERT(!keylen || !callback);    /* should not be set both */
100 
101 retry:
102   cursor->prev= (intptr *)head;
103   do { /* PTR() isn't necessary below, head is a dummy node */
104     cursor->curr= (LF_SLIST *)(*cursor->prev);
105     lf_pin(pins, 1, cursor->curr);
106   } while (my_atomic_loadptr((void**)cursor->prev) != cursor->curr &&
107            LF_BACKOFF());
108   for (;;)
109   {
110     if (unlikely(!cursor->curr))
111       return 0; /* end of the list */
112 
113     cur_hashnr= cursor->curr->hashnr;
114     cur_keylen= cursor->curr->keylen;
115     cur_key= my_atomic_loadptr_explicit((void **) &cursor->curr->key,
116                                         MY_MEMORY_ORDER_ACQUIRE);
117 
118     do {
119       link= (intptr) my_atomic_loadptr_explicit((void **) &cursor->curr->link,
120                                                 MY_MEMORY_ORDER_RELAXED);
121       cursor->next= PTR(link);
122       lf_pin(pins, 0, cursor->next);
123     } while (link != (intptr) my_atomic_loadptr((void **) &cursor->curr->link)
124              && LF_BACKOFF());
125 
126     if (!DELETED(link))
127     {
128       if (unlikely(callback))
129       {
130         if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key))
131           return 1;
132       }
133       else if (cur_hashnr >= hashnr)
134       {
135         int r= 1;
136         if (cur_hashnr > hashnr ||
137             (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
138           return !r;
139       }
140       cursor->prev= &(cursor->curr->link);
141       if (!(cur_hashnr & 1)) /* dummy node */
142         head= (LF_SLIST **)cursor->prev;
143       lf_pin(pins, 2, cursor->curr);
144     }
145     else
146     {
147       /*
148         we found a deleted node - be nice, help the other thread
149         and remove this deleted node
150       */
151       if (my_atomic_casptr((void **) cursor->prev,
152                            (void **) &cursor->curr, cursor->next) && LF_BACKOFF())
153         lf_alloc_free(pins, cursor->curr);
154       else
155         goto retry;
156     }
157     cursor->curr= cursor->next;
158     lf_pin(pins, 1, cursor->curr);
159   }
160 }
161 
162 /*
163   DESCRIPTION
164     insert a 'node' in the list that starts from 'head' in the correct
165     position (as found by l_find)
166 
167   RETURN
168     0     - inserted
169     not 0 - a pointer to a duplicate (not pinned and thus unusable)
170 
171   NOTE
172     it uses pins[0..2], on return all pins are removed.
173     if there're nodes with the same key value, a new node is added before them.
174 */
l_insert(LF_SLIST ** head,CHARSET_INFO * cs,LF_SLIST * node,LF_PINS * pins,uint flags)175 static LF_SLIST *l_insert(LF_SLIST **head, CHARSET_INFO *cs,
176                          LF_SLIST *node, LF_PINS *pins, uint flags)
177 {
178   CURSOR         cursor;
179   int            res;
180 
181   for (;;)
182   {
183     if (l_find(head, cs, node->hashnr, node->key, node->keylen,
184               &cursor, pins, 0) &&
185         (flags & LF_HASH_UNIQUE))
186     {
187       res= 0; /* duplicate found */
188       break;
189     }
190     else
191     {
192       node->link= (intptr)cursor.curr;
193       DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
194       DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
195       if (my_atomic_casptr((void **) cursor.prev,
196                            (void **)(char*) &cursor.curr, node))
197       {
198         res= 1; /* inserted ok */
199         break;
200       }
201     }
202   }
203   lf_unpin(pins, 0);
204   lf_unpin(pins, 1);
205   lf_unpin(pins, 2);
206   /*
207     Note that cursor.curr is not pinned here and the pointer is unreliable,
208     the object may disappear anytime. But if it points to a dummy node, the
209     pointer is safe, because dummy nodes are never freed - initialize_bucket()
210     uses this fact.
211   */
212   return res ? 0 : cursor.curr;
213 }
214 
215 /*
216   DESCRIPTION
217     deletes a node as identified by hashnr/keey/keylen from the list
218     that starts from 'head'
219 
220   RETURN
221     0 - ok
222     1 - not found
223 
224   NOTE
225     it uses pins[0..2], on return all pins are removed.
226 */
l_delete(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)227 static int l_delete(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
228                    const uchar *key, uint keylen, LF_PINS *pins)
229 {
230   CURSOR cursor;
231   int res;
232 
233   for (;;)
234   {
235     if (!l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0))
236     {
237       res= 1; /* not found */
238       break;
239     }
240     else
241     {
242       /* mark the node deleted */
243       if (my_atomic_casptr((void **) (char*) &(cursor.curr->link),
244                            (void **) (char*) &cursor.next,
245                            (void *)(((intptr)cursor.next) | 1)))
246       {
247         /* and remove it from the list */
248         if (my_atomic_casptr((void **)cursor.prev,
249                              (void **)(char*)&cursor.curr, cursor.next))
250           lf_alloc_free(pins, cursor.curr);
251         else
252         {
253           /*
254             somebody already "helped" us and removed the node ?
255             Let's check if we need to help that someone too!
256             (to ensure the number of "set DELETED flag" actions
257             is equal to the number of "remove from the list" actions)
258           */
259           l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
260         }
261         res= 0;
262         break;
263       }
264     }
265   }
266   lf_unpin(pins, 0);
267   lf_unpin(pins, 1);
268   lf_unpin(pins, 2);
269   return res;
270 }
271 
272 /*
273   DESCRIPTION
274     searches for a node as identified by hashnr/keey/keylen in the list
275     that starts from 'head'
276 
277   RETURN
278     0 - not found
279     node - found
280 
281   NOTE
282     it uses pins[0..2], on return the pin[2] keeps the node found
283     all other pins are removed.
284 */
l_search(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)285 static LF_SLIST *l_search(LF_SLIST **head, CHARSET_INFO *cs,
286                          uint32 hashnr, const uchar *key, uint keylen,
287                          LF_PINS *pins)
288 {
289   CURSOR cursor;
290   int res= l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
291   if (res)
292     lf_pin(pins, 2, cursor.curr);
293   else
294     lf_unpin(pins, 2);
295   lf_unpin(pins, 1);
296   lf_unpin(pins, 0);
297   return res ? cursor.curr : 0;
298 }
299 
hash_key(const LF_HASH * hash,const uchar * record,size_t * length)300 static inline const uchar* hash_key(const LF_HASH *hash,
301                                     const uchar *record, size_t *length)
302 {
303   if (hash->get_key)
304     return (*hash->get_key)(record, length, 0);
305   *length= hash->key_length;
306   return record + hash->key_offset;
307 }
308 
309 /*
310   Compute the hash key value from the raw key.
311 
312   @note, that the hash value is limited to 2^31, because we need one
313   bit to distinguish between normal and dummy nodes.
314 */
calc_hash(CHARSET_INFO * cs,const uchar * key,size_t keylen)315 static inline my_hash_value_type calc_hash(CHARSET_INFO *cs,
316                                            const uchar *key,
317                                            size_t keylen)
318 {
319   ulong nr1= 1, nr2= 4;
320   cs->coll->hash_sort(cs, (uchar*) key, keylen, &nr1, &nr2);
321   return nr1;
322 }
323 
324 #define MAX_LOAD 1.0    /* average number of elements in a bucket */
325 
326 static int initialize_bucket(LF_HASH *, LF_SLIST **, uint, LF_PINS *);
327 
default_initializer(LF_HASH * hash,void * dst,const void * src)328 static void default_initializer(LF_HASH *hash, void *dst, const void *src)
329 {
330   memcpy(dst, src, hash->element_size);
331 }
332 
333 /*
334   Initializes lf_hash, the arguments are compatible with hash_init
335 
336   @note element_size sets both the size of allocated memory block for
337   lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
338   they are the same, indeed. But LF_HASH::element_size can be decreased
339   after lf_hash_init, and then lf_alloc will allocate larger block that
340   lf_hash_insert will copy over. It is desirable if part of the element
341   is expensive to initialize - for example if there is a mutex or
342   DYNAMIC_ARRAY. In this case they should be initialize in the
343   LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
344 
345   The above works well with PODS. For more complex cases (e.g. C++ classes
346   with private members) use initializer function.
347 */
lf_hash_init(LF_HASH * hash,uint element_size,uint flags,uint key_offset,uint key_length,my_hash_get_key get_key,CHARSET_INFO * charset)348 void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
349                   uint key_offset, uint key_length, my_hash_get_key get_key,
350                   CHARSET_INFO *charset)
351 {
352   lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
353                 offsetof(LF_SLIST, key));
354   lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
355   hash->size= 1;
356   hash->count= 0;
357   hash->element_size= element_size;
358   hash->flags= flags;
359   hash->charset= charset ? charset : &my_charset_bin;
360   hash->key_offset= key_offset;
361   hash->key_length= key_length;
362   hash->get_key= get_key;
363   hash->initializer= default_initializer;
364   hash->hash_function= calc_hash;
365   DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
366 }
367 
lf_hash_destroy(LF_HASH * hash)368 void lf_hash_destroy(LF_HASH *hash)
369 {
370   LF_SLIST *el, **head= (LF_SLIST **)lf_dynarray_value(&hash->array, 0);
371 
372   if (head)
373   {
374     el= *head;
375     while (el)
376     {
377       intptr next= el->link;
378       if (el->hashnr & 1)
379         lf_alloc_direct_free(&hash->alloc, el); /* normal node */
380       else
381         my_free(el); /* dummy node */
382       el= (LF_SLIST *)next;
383     }
384   }
385   lf_alloc_destroy(&hash->alloc);
386   lf_dynarray_destroy(&hash->array);
387 }
388 
389 /*
390   DESCRIPTION
391     inserts a new element to a hash. it will have a _copy_ of
392     data, not a pointer to it.
393 
394   RETURN
395     0 - inserted
396     1 - didn't (unique key conflict)
397    -1 - out of memory
398 
399   NOTE
400     see l_insert() for pin usage notes
401 */
lf_hash_insert(LF_HASH * hash,LF_PINS * pins,const void * data)402 int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
403 {
404   int csize, bucket, hashnr;
405   LF_SLIST *node, **el;
406 
407   node= (LF_SLIST *)lf_alloc_new(pins);
408   if (unlikely(!node))
409     return -1;
410   hash->initializer(hash, node + 1, data);
411   node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
412   hashnr= hash->hash_function(hash->charset, node->key, node->keylen) & INT_MAX32;
413   bucket= hashnr % hash->size;
414   el= lf_dynarray_lvalue(&hash->array, bucket);
415   if (unlikely(!el))
416     return -1;
417   if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
418     return -1;
419   node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
420   if (l_insert(el, hash->charset, node, pins, hash->flags))
421   {
422     lf_alloc_free(pins, node);
423     return 1;
424   }
425   csize= hash->size;
426   if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
427     my_atomic_cas32(&hash->size, &csize, csize*2);
428   return 0;
429 }
430 
431 /*
432   DESCRIPTION
433     deletes an element with the given key from the hash (if a hash is
434     not unique and there're many elements with this key - the "first"
435     matching element is deleted)
436   RETURN
437     0 - deleted
438     1 - didn't (not found)
439   NOTE
440     see l_delete() for pin usage notes
441 */
lf_hash_delete(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)442 int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
443 {
444   LF_SLIST **el;
445   uint bucket, hashnr;
446 
447   hashnr= hash->hash_function(hash->charset, (uchar *)key, keylen) & INT_MAX32;
448 
449   /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
450   for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
451   {
452     el= lf_dynarray_lvalue(&hash->array, bucket);
453     if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
454       break;
455     if (unlikely(bucket == 0))
456       return 1; /* if there's no bucket==0, the hash is empty */
457   }
458   if (l_delete(el, hash->charset, my_reverse_bits(hashnr) | 1,
459               (uchar *)key, keylen, pins))
460   {
461     return 1;
462   }
463   my_atomic_add32(&hash->count, -1);
464   return 0;
465 }
466 
467 /*
468   RETURN
469     a pointer to an element with the given key (if a hash is not unique and
470     there're many elements with this key - the "first" matching element)
471     NULL         if nothing is found
472 
473   NOTE
474     see l_search() for pin usage notes
475 */
lf_hash_search_using_hash_value(LF_HASH * hash,LF_PINS * pins,my_hash_value_type hashnr,const void * key,uint keylen)476 void *lf_hash_search_using_hash_value(LF_HASH *hash, LF_PINS *pins,
477                                       my_hash_value_type hashnr,
478                                       const void *key, uint keylen)
479 {
480   LF_SLIST **el, *found;
481   uint bucket;
482 
483   /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
484   for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
485   {
486     el= lf_dynarray_lvalue(&hash->array, bucket);
487     if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
488       break;
489     if (unlikely(bucket == 0))
490       return 0; /* if there's no bucket==0, the hash is empty */
491   }
492   found= l_search(el, hash->charset, my_reverse_bits(hashnr) | 1,
493                  (uchar *)key, keylen, pins);
494   return found ? found+1 : 0;
495 }
496 
497 
498 /**
499    Iterate over all elements in hash and call function with the element
500 
501    @note
502    If one of 'action' invocations returns 1 the iteration aborts.
503    'action' might see some elements twice!
504 
505    @retval 0    ok
506    @retval 1    error (action returned 1)
507 */
lf_hash_iterate(LF_HASH * hash,LF_PINS * pins,my_hash_walk_action action,void * argument)508 int lf_hash_iterate(LF_HASH *hash, LF_PINS *pins,
509                     my_hash_walk_action action, void *argument)
510 {
511   CURSOR cursor;
512   uint bucket= 0;
513   int res;
514   LF_SLIST **el;
515 
516   el= lf_dynarray_lvalue(&hash->array, bucket);
517   if (unlikely(!el))
518     return 0; /* if there's no bucket==0, the hash is empty */
519   if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
520     return 0; /* if there's no bucket==0, the hash is empty */
521 
522   res= l_find(el, 0, 0, (uchar*)argument, 0, &cursor, pins, action);
523 
524   lf_unpin(pins, 2);
525   lf_unpin(pins, 1);
526   lf_unpin(pins, 0);
527   return res;
528 }
529 
lf_hash_search(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)530 void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
531 {
532   return lf_hash_search_using_hash_value(hash, pins,
533                                          hash->hash_function(hash->charset,
534                                                              (uchar*) key,
535                                                              keylen) & INT_MAX32,
536                                          key, keylen);
537 }
538 
539 static const uchar *dummy_key= (uchar*)"";
540 
541 /*
542   RETURN
543     0 - ok
544    -1 - out of memory
545 */
initialize_bucket(LF_HASH * hash,LF_SLIST ** node,uint bucket,LF_PINS * pins)546 static int initialize_bucket(LF_HASH *hash, LF_SLIST **node,
547                               uint bucket, LF_PINS *pins)
548 {
549   uint parent= my_clear_highest_bit(bucket);
550   LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
551   LF_SLIST **tmp= 0, *cur;
552   LF_SLIST **el= lf_dynarray_lvalue(&hash->array, parent);
553   if (unlikely(!el || !dummy))
554     return -1;
555   if (*el == NULL && bucket &&
556       unlikely(initialize_bucket(hash, el, parent, pins)))
557   {
558     my_free(dummy);
559     return -1;
560   }
561   dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
562   dummy->key= dummy_key;
563   dummy->keylen= 0;
564   if ((cur= l_insert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
565   {
566     my_free(dummy);
567     dummy= cur;
568   }
569   my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy);
570   /*
571     note that if the CAS above failed (after l_insert() succeeded),
572     it would mean that some other thread has executed l_insert() for
573     the same dummy node, its l_insert() failed, it picked up our
574     dummy node (in "dummy= cur") and executed the same CAS as above.
575     Which means that even if CAS above failed we don't need to retry,
576     and we should not free(dummy) - there's no memory leak here
577   */
578   return 0;
579 }
580