1 /* Copyright (c) 2006, 2018, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2018, MariaDB
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /*
18 extensible hash
19
20 TODO
21 try to get rid of dummy nodes ?
22 for non-unique hash, count only _distinct_ values
23 (but how to do it in lf_hash_delete ?)
24 */
25 #include <my_global.h>
26 #include <m_string.h>
27 #include <my_sys.h>
28 #include <mysys_err.h>
29 #include <my_bit.h>
30 #include <lf.h>
31 #include "my_cpu.h"
32
33 /* An element of the list */
34 typedef struct {
35 intptr link; /* a pointer to the next element in a list and a flag */
36 const uchar *key;
37 size_t keylen;
38 uint32 hashnr; /* reversed hash number, for sorting */
39 /*
40 data is stored here, directly after the keylen.
41 thus the pointer to data is (void*)(slist_element_ptr+1)
42 */
43 } LF_SLIST;
44
45 const int LF_HASH_OVERHEAD= sizeof(LF_SLIST);
46
47 /*
48 a structure to pass the context (pointers two the three successive elements
49 in a list) from l_find to l_insert/l_delete
50 */
51 typedef struct {
52 intptr *prev;
53 LF_SLIST *curr, *next;
54 } CURSOR;
55
56 /*
57 the last bit in LF_SLIST::link is a "deleted" flag.
58 the helper macros below convert it to a pure pointer or a pure flag
59 */
60 #define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
61 #define DELETED(V) ((V) & 1)
62
63 /** walk the list, searching for an element or invoking a callback
64
65 Search for hashnr/key/keylen in the list starting from 'head' and
66 position the cursor. The list is ORDER BY hashnr, key
67
68 @param head start walking the list from this node
69 @param cs charset for comparing keys, NULL if callback is used
70 @param hashnr hash number to search for
71 @param key key to search for OR data for the callback
72 @param keylen length of the key to compare, 0 if callback is used
73 @param cursor for returning the found element
74 @param pins see lf_alloc-pin.c
75 @param callback callback action, invoked for every element
76
77 @note
78 cursor is positioned in either case
79 pins[0..2] are used, they are NOT removed on return
80 callback might see some elements twice (because of retries)
81
82 @return
83 if find: 0 - not found
84 1 - found
85 if callback:
86 0 - ok
87 1 - error (callbck returned 1)
88 */
l_find(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,size_t keylen,CURSOR * cursor,LF_PINS * pins,my_hash_walk_action callback)89 static int l_find(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
90 const uchar *key, size_t keylen, CURSOR *cursor, LF_PINS *pins,
91 my_hash_walk_action callback)
92 {
93 uint32 cur_hashnr;
94 const uchar *cur_key;
95 size_t cur_keylen;
96 intptr link;
97
98 DBUG_ASSERT(!cs || !callback); /* should not be set both */
99 DBUG_ASSERT(!keylen || !callback); /* should not be set both */
100
101 retry:
102 cursor->prev= (intptr *)head;
103 do { /* PTR() isn't necessary below, head is a dummy node */
104 cursor->curr= (LF_SLIST *)(*cursor->prev);
105 lf_pin(pins, 1, cursor->curr);
106 } while (my_atomic_loadptr((void**)cursor->prev) != cursor->curr &&
107 LF_BACKOFF());
108 for (;;)
109 {
110 if (unlikely(!cursor->curr))
111 return 0; /* end of the list */
112
113 cur_hashnr= cursor->curr->hashnr;
114 cur_keylen= cursor->curr->keylen;
115 cur_key= my_atomic_loadptr_explicit((void **) &cursor->curr->key,
116 MY_MEMORY_ORDER_ACQUIRE);
117
118 do {
119 link= (intptr) my_atomic_loadptr_explicit((void **) &cursor->curr->link,
120 MY_MEMORY_ORDER_RELAXED);
121 cursor->next= PTR(link);
122 lf_pin(pins, 0, cursor->next);
123 } while (link != (intptr) my_atomic_loadptr((void **) &cursor->curr->link)
124 && LF_BACKOFF());
125
126 if (!DELETED(link))
127 {
128 if (unlikely(callback))
129 {
130 if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key))
131 return 1;
132 }
133 else if (cur_hashnr >= hashnr)
134 {
135 int r= 1;
136 if (cur_hashnr > hashnr ||
137 (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
138 return !r;
139 }
140 cursor->prev= &(cursor->curr->link);
141 if (!(cur_hashnr & 1)) /* dummy node */
142 head= (LF_SLIST **)cursor->prev;
143 lf_pin(pins, 2, cursor->curr);
144 }
145 else
146 {
147 /*
148 we found a deleted node - be nice, help the other thread
149 and remove this deleted node
150 */
151 if (my_atomic_casptr((void **) cursor->prev,
152 (void **) &cursor->curr, cursor->next) && LF_BACKOFF())
153 lf_alloc_free(pins, cursor->curr);
154 else
155 goto retry;
156 }
157 cursor->curr= cursor->next;
158 lf_pin(pins, 1, cursor->curr);
159 }
160 }
161
162 /*
163 DESCRIPTION
164 insert a 'node' in the list that starts from 'head' in the correct
165 position (as found by l_find)
166
167 RETURN
168 0 - inserted
169 not 0 - a pointer to a duplicate (not pinned and thus unusable)
170
171 NOTE
172 it uses pins[0..2], on return all pins are removed.
173 if there're nodes with the same key value, a new node is added before them.
174 */
l_insert(LF_SLIST ** head,CHARSET_INFO * cs,LF_SLIST * node,LF_PINS * pins,uint flags)175 static LF_SLIST *l_insert(LF_SLIST **head, CHARSET_INFO *cs,
176 LF_SLIST *node, LF_PINS *pins, uint flags)
177 {
178 CURSOR cursor;
179 int res;
180
181 for (;;)
182 {
183 if (l_find(head, cs, node->hashnr, node->key, node->keylen,
184 &cursor, pins, 0) &&
185 (flags & LF_HASH_UNIQUE))
186 {
187 res= 0; /* duplicate found */
188 break;
189 }
190 else
191 {
192 node->link= (intptr)cursor.curr;
193 DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
194 DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
195 if (my_atomic_casptr((void **) cursor.prev,
196 (void **)(char*) &cursor.curr, node))
197 {
198 res= 1; /* inserted ok */
199 break;
200 }
201 }
202 }
203 lf_unpin(pins, 0);
204 lf_unpin(pins, 1);
205 lf_unpin(pins, 2);
206 /*
207 Note that cursor.curr is not pinned here and the pointer is unreliable,
208 the object may disappear anytime. But if it points to a dummy node, the
209 pointer is safe, because dummy nodes are never freed - initialize_bucket()
210 uses this fact.
211 */
212 return res ? 0 : cursor.curr;
213 }
214
215 /*
216 DESCRIPTION
217 deletes a node as identified by hashnr/keey/keylen from the list
218 that starts from 'head'
219
220 RETURN
221 0 - ok
222 1 - not found
223
224 NOTE
225 it uses pins[0..2], on return all pins are removed.
226 */
l_delete(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)227 static int l_delete(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
228 const uchar *key, uint keylen, LF_PINS *pins)
229 {
230 CURSOR cursor;
231 int res;
232
233 for (;;)
234 {
235 if (!l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0))
236 {
237 res= 1; /* not found */
238 break;
239 }
240 else
241 {
242 /* mark the node deleted */
243 if (my_atomic_casptr((void **) (char*) &(cursor.curr->link),
244 (void **) (char*) &cursor.next,
245 (void *)(((intptr)cursor.next) | 1)))
246 {
247 /* and remove it from the list */
248 if (my_atomic_casptr((void **)cursor.prev,
249 (void **)(char*)&cursor.curr, cursor.next))
250 lf_alloc_free(pins, cursor.curr);
251 else
252 {
253 /*
254 somebody already "helped" us and removed the node ?
255 Let's check if we need to help that someone too!
256 (to ensure the number of "set DELETED flag" actions
257 is equal to the number of "remove from the list" actions)
258 */
259 l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
260 }
261 res= 0;
262 break;
263 }
264 }
265 }
266 lf_unpin(pins, 0);
267 lf_unpin(pins, 1);
268 lf_unpin(pins, 2);
269 return res;
270 }
271
272 /*
273 DESCRIPTION
274 searches for a node as identified by hashnr/keey/keylen in the list
275 that starts from 'head'
276
277 RETURN
278 0 - not found
279 node - found
280
281 NOTE
282 it uses pins[0..2], on return the pin[2] keeps the node found
283 all other pins are removed.
284 */
l_search(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)285 static LF_SLIST *l_search(LF_SLIST **head, CHARSET_INFO *cs,
286 uint32 hashnr, const uchar *key, uint keylen,
287 LF_PINS *pins)
288 {
289 CURSOR cursor;
290 int res= l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
291 if (res)
292 lf_pin(pins, 2, cursor.curr);
293 else
294 lf_unpin(pins, 2);
295 lf_unpin(pins, 1);
296 lf_unpin(pins, 0);
297 return res ? cursor.curr : 0;
298 }
299
hash_key(const LF_HASH * hash,const uchar * record,size_t * length)300 static inline const uchar* hash_key(const LF_HASH *hash,
301 const uchar *record, size_t *length)
302 {
303 if (hash->get_key)
304 return (*hash->get_key)(record, length, 0);
305 *length= hash->key_length;
306 return record + hash->key_offset;
307 }
308
309 /*
310 Compute the hash key value from the raw key.
311
312 @note, that the hash value is limited to 2^31, because we need one
313 bit to distinguish between normal and dummy nodes.
314 */
calc_hash(CHARSET_INFO * cs,const uchar * key,size_t keylen)315 static inline my_hash_value_type calc_hash(CHARSET_INFO *cs,
316 const uchar *key,
317 size_t keylen)
318 {
319 ulong nr1= 1, nr2= 4;
320 cs->coll->hash_sort(cs, (uchar*) key, keylen, &nr1, &nr2);
321 return nr1;
322 }
323
324 #define MAX_LOAD 1.0 /* average number of elements in a bucket */
325
326 static int initialize_bucket(LF_HASH *, LF_SLIST **, uint, LF_PINS *);
327
default_initializer(LF_HASH * hash,void * dst,const void * src)328 static void default_initializer(LF_HASH *hash, void *dst, const void *src)
329 {
330 memcpy(dst, src, hash->element_size);
331 }
332
333 /*
334 Initializes lf_hash, the arguments are compatible with hash_init
335
336 @note element_size sets both the size of allocated memory block for
337 lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
338 they are the same, indeed. But LF_HASH::element_size can be decreased
339 after lf_hash_init, and then lf_alloc will allocate larger block that
340 lf_hash_insert will copy over. It is desirable if part of the element
341 is expensive to initialize - for example if there is a mutex or
342 DYNAMIC_ARRAY. In this case they should be initialize in the
343 LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
344
345 The above works well with PODS. For more complex cases (e.g. C++ classes
346 with private members) use initializer function.
347 */
lf_hash_init(LF_HASH * hash,uint element_size,uint flags,uint key_offset,uint key_length,my_hash_get_key get_key,CHARSET_INFO * charset)348 void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
349 uint key_offset, uint key_length, my_hash_get_key get_key,
350 CHARSET_INFO *charset)
351 {
352 lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
353 offsetof(LF_SLIST, key));
354 lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
355 hash->size= 1;
356 hash->count= 0;
357 hash->element_size= element_size;
358 hash->flags= flags;
359 hash->charset= charset ? charset : &my_charset_bin;
360 hash->key_offset= key_offset;
361 hash->key_length= key_length;
362 hash->get_key= get_key;
363 hash->initializer= default_initializer;
364 hash->hash_function= calc_hash;
365 DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
366 }
367
lf_hash_destroy(LF_HASH * hash)368 void lf_hash_destroy(LF_HASH *hash)
369 {
370 LF_SLIST *el, **head= (LF_SLIST **)lf_dynarray_value(&hash->array, 0);
371
372 if (head)
373 {
374 el= *head;
375 while (el)
376 {
377 intptr next= el->link;
378 if (el->hashnr & 1)
379 lf_alloc_direct_free(&hash->alloc, el); /* normal node */
380 else
381 my_free(el); /* dummy node */
382 el= (LF_SLIST *)next;
383 }
384 }
385 lf_alloc_destroy(&hash->alloc);
386 lf_dynarray_destroy(&hash->array);
387 }
388
389 /*
390 DESCRIPTION
391 inserts a new element to a hash. it will have a _copy_ of
392 data, not a pointer to it.
393
394 RETURN
395 0 - inserted
396 1 - didn't (unique key conflict)
397 -1 - out of memory
398
399 NOTE
400 see l_insert() for pin usage notes
401 */
lf_hash_insert(LF_HASH * hash,LF_PINS * pins,const void * data)402 int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
403 {
404 int csize, bucket, hashnr;
405 LF_SLIST *node, **el;
406
407 node= (LF_SLIST *)lf_alloc_new(pins);
408 if (unlikely(!node))
409 return -1;
410 hash->initializer(hash, node + 1, data);
411 node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
412 hashnr= hash->hash_function(hash->charset, node->key, node->keylen) & INT_MAX32;
413 bucket= hashnr % hash->size;
414 el= lf_dynarray_lvalue(&hash->array, bucket);
415 if (unlikely(!el))
416 return -1;
417 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
418 return -1;
419 node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
420 if (l_insert(el, hash->charset, node, pins, hash->flags))
421 {
422 lf_alloc_free(pins, node);
423 return 1;
424 }
425 csize= hash->size;
426 if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
427 my_atomic_cas32(&hash->size, &csize, csize*2);
428 return 0;
429 }
430
431 /*
432 DESCRIPTION
433 deletes an element with the given key from the hash (if a hash is
434 not unique and there're many elements with this key - the "first"
435 matching element is deleted)
436 RETURN
437 0 - deleted
438 1 - didn't (not found)
439 NOTE
440 see l_delete() for pin usage notes
441 */
lf_hash_delete(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)442 int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
443 {
444 LF_SLIST **el;
445 uint bucket, hashnr;
446
447 hashnr= hash->hash_function(hash->charset, (uchar *)key, keylen) & INT_MAX32;
448
449 /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
450 for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
451 {
452 el= lf_dynarray_lvalue(&hash->array, bucket);
453 if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
454 break;
455 if (unlikely(bucket == 0))
456 return 1; /* if there's no bucket==0, the hash is empty */
457 }
458 if (l_delete(el, hash->charset, my_reverse_bits(hashnr) | 1,
459 (uchar *)key, keylen, pins))
460 {
461 return 1;
462 }
463 my_atomic_add32(&hash->count, -1);
464 return 0;
465 }
466
467 /*
468 RETURN
469 a pointer to an element with the given key (if a hash is not unique and
470 there're many elements with this key - the "first" matching element)
471 NULL if nothing is found
472
473 NOTE
474 see l_search() for pin usage notes
475 */
lf_hash_search_using_hash_value(LF_HASH * hash,LF_PINS * pins,my_hash_value_type hashnr,const void * key,uint keylen)476 void *lf_hash_search_using_hash_value(LF_HASH *hash, LF_PINS *pins,
477 my_hash_value_type hashnr,
478 const void *key, uint keylen)
479 {
480 LF_SLIST **el, *found;
481 uint bucket;
482
483 /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
484 for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
485 {
486 el= lf_dynarray_lvalue(&hash->array, bucket);
487 if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
488 break;
489 if (unlikely(bucket == 0))
490 return 0; /* if there's no bucket==0, the hash is empty */
491 }
492 found= l_search(el, hash->charset, my_reverse_bits(hashnr) | 1,
493 (uchar *)key, keylen, pins);
494 return found ? found+1 : 0;
495 }
496
497
498 /**
499 Iterate over all elements in hash and call function with the element
500
501 @note
502 If one of 'action' invocations returns 1 the iteration aborts.
503 'action' might see some elements twice!
504
505 @retval 0 ok
506 @retval 1 error (action returned 1)
507 */
lf_hash_iterate(LF_HASH * hash,LF_PINS * pins,my_hash_walk_action action,void * argument)508 int lf_hash_iterate(LF_HASH *hash, LF_PINS *pins,
509 my_hash_walk_action action, void *argument)
510 {
511 CURSOR cursor;
512 uint bucket= 0;
513 int res;
514 LF_SLIST **el;
515
516 el= lf_dynarray_lvalue(&hash->array, bucket);
517 if (unlikely(!el))
518 return 0; /* if there's no bucket==0, the hash is empty */
519 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
520 return 0; /* if there's no bucket==0, the hash is empty */
521
522 res= l_find(el, 0, 0, (uchar*)argument, 0, &cursor, pins, action);
523
524 lf_unpin(pins, 2);
525 lf_unpin(pins, 1);
526 lf_unpin(pins, 0);
527 return res;
528 }
529
lf_hash_search(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)530 void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
531 {
532 return lf_hash_search_using_hash_value(hash, pins,
533 hash->hash_function(hash->charset,
534 (uchar*) key,
535 keylen) & INT_MAX32,
536 key, keylen);
537 }
538
539 static const uchar *dummy_key= (uchar*)"";
540
541 /*
542 RETURN
543 0 - ok
544 -1 - out of memory
545 */
initialize_bucket(LF_HASH * hash,LF_SLIST ** node,uint bucket,LF_PINS * pins)546 static int initialize_bucket(LF_HASH *hash, LF_SLIST **node,
547 uint bucket, LF_PINS *pins)
548 {
549 uint parent= my_clear_highest_bit(bucket);
550 LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
551 LF_SLIST **tmp= 0, *cur;
552 LF_SLIST **el= lf_dynarray_lvalue(&hash->array, parent);
553 if (unlikely(!el || !dummy))
554 return -1;
555 if (*el == NULL && bucket &&
556 unlikely(initialize_bucket(hash, el, parent, pins)))
557 {
558 my_free(dummy);
559 return -1;
560 }
561 dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
562 dummy->key= dummy_key;
563 dummy->keylen= 0;
564 if ((cur= l_insert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
565 {
566 my_free(dummy);
567 dummy= cur;
568 }
569 my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy);
570 /*
571 note that if the CAS above failed (after l_insert() succeeded),
572 it would mean that some other thread has executed l_insert() for
573 the same dummy node, its l_insert() failed, it picked up our
574 dummy node (in "dummy= cur") and executed the same CAS as above.
575 Which means that even if CAS above failed we don't need to retry,
576 and we should not free(dummy) - there's no memory leak here
577 */
578 return 0;
579 }
580