1 /* Copyright (c) 2006, 2018, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2020, MariaDB
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /*
18 extensible hash
19
20 TODO
21 try to get rid of dummy nodes ?
22 for non-unique hash, count only _distinct_ values
23 (but how to do it in lf_hash_delete ?)
24 */
25 #include "mysys_priv.h"
26 #include <m_string.h>
27 #include <mysys_err.h>
28 #include <my_bit.h>
29 #include <lf.h>
30 #include "my_cpu.h"
31 #include "assume_aligned.h"
32
33 /* An element of the list */
34 typedef struct {
35 intptr link; /* a pointer to the next element in a list and a flag */
36 const uchar *key;
37 size_t keylen;
38 uint32 hashnr; /* reversed hash number, for sorting */
39 /*
40 data is stored here, directly after the keylen.
41 thus the pointer to data is (void*)(slist_element_ptr+1)
42 */
43 } LF_SLIST;
44
45 const int LF_HASH_OVERHEAD= sizeof(LF_SLIST);
46
47 /*
48 a structure to pass the context (pointers two the three successive elements
49 in a list) from l_find to l_insert/l_delete
50 */
51 typedef struct {
52 intptr *prev;
53 LF_SLIST *curr, *next;
54 } CURSOR;
55
56 /*
57 the last bit in LF_SLIST::link is a "deleted" flag.
58 the helper macros below convert it to a pure pointer or a pure flag
59 */
60 #define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
61 #define DELETED(V) ((V) & 1)
62
63 /** walk the list, searching for an element or invoking a callback
64
65 Search for hashnr/key/keylen in the list starting from 'head' and
66 position the cursor. The list is ORDER BY hashnr, key
67
68 @param head start walking the list from this node
69 @param cs charset for comparing keys, NULL if callback is used
70 @param hashnr hash number to search for
71 @param key key to search for OR data for the callback
72 @param keylen length of the key to compare, 0 if callback is used
73 @param cursor for returning the found element
74 @param pins see lf_alloc-pin.c
75 @param callback callback action, invoked for every element
76
77 @note
78 cursor is positioned in either case
79 pins[0..2] are used, they are NOT removed on return
80 callback might see some elements twice (because of retries)
81
82 @return
83 if find: 0 - not found
84 1 - found
85 if callback:
86 0 - ok
87 1 - error (callbck returned 1)
88 */
l_find(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,size_t keylen,CURSOR * cursor,LF_PINS * pins,my_hash_walk_action callback)89 static int l_find(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
90 const uchar *key, size_t keylen, CURSOR *cursor, LF_PINS *pins,
91 my_hash_walk_action callback)
92 {
93 uint32 cur_hashnr;
94 const uchar *cur_key;
95 size_t cur_keylen;
96 intptr link;
97
98 DBUG_ASSERT(!cs || !callback); /* should not be set both */
99 DBUG_ASSERT(!keylen || !callback); /* should not be set both */
100
101 retry:
102 cursor->prev= (intptr *) my_assume_aligned<sizeof(intptr)>(head);
103 do { /* PTR() isn't necessary below, head is a dummy node */
104 cursor->curr= my_assume_aligned<sizeof(LF_SLIST *)>((LF_SLIST *)(*cursor->prev));
105 lf_pin(pins, 1, cursor->curr);
106 } while (my_atomic_loadptr(
107 (void **)my_assume_aligned<sizeof(LF_SLIST *)>(cursor->prev))
108 != cursor->curr && LF_BACKOFF());
109 for (;;)
110 {
111 if (unlikely(!cursor->curr))
112 return 0; /* end of the list */
113
114 cur_hashnr= cursor->curr->hashnr;
115 cur_keylen= cursor->curr->keylen;
116 /* The key element needs to be aligned, not necessary what it points to */
117 my_assume_aligned<sizeof(const uchar *)>(&cursor->curr->key);
118 cur_key= (const uchar *) my_atomic_loadptr_explicit((void **) &cursor->curr->key,
119 MY_MEMORY_ORDER_ACQUIRE);
120
121 do {
122 /* attempting to my_assume_aligned onlink below broke the implementation */
123 link= (intptr) my_atomic_loadptr_explicit((void **) &cursor->curr->link,
124 MY_MEMORY_ORDER_RELAXED);
125 cursor->next= my_assume_aligned<sizeof(LF_SLIST *)>(PTR(link));
126 lf_pin(pins, 0, cursor->next);
127 } while (link != (intptr) my_atomic_loadptr((void *volatile *) &cursor->curr->link)
128 && LF_BACKOFF());
129
130 if (!DELETED(link))
131 {
132 if (unlikely(callback))
133 {
134 if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key))
135 return 1;
136 }
137 else if (cur_hashnr >= hashnr)
138 {
139 int r= 1;
140 if (cur_hashnr > hashnr ||
141 (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
142 return !r;
143 }
144 cursor->prev= &(cursor->curr->link);
145 if (!(cur_hashnr & 1)) /* dummy node */
146 head= (LF_SLIST **)cursor->prev;
147 lf_pin(pins, 2, cursor->curr);
148 }
149 else
150 {
151 /*
152 we found a deleted node - be nice, help the other thread
153 and remove this deleted node
154 */
155 if (my_atomic_casptr((void **) cursor->prev,
156 (void **) &cursor->curr, cursor->next) && LF_BACKOFF())
157 lf_alloc_free(pins, cursor->curr);
158 else
159 goto retry;
160 }
161 cursor->curr= cursor->next;
162 lf_pin(pins, 1, cursor->curr);
163 }
164 }
165
166
167 /* static l_find is the only user my_assume_aligned, keep the rest as c scoped */
168 C_MODE_START
169
170 /*
171 DESCRIPTION
172 insert a 'node' in the list that starts from 'head' in the correct
173 position (as found by l_find)
174
175 RETURN
176 0 - inserted
177 not 0 - a pointer to a duplicate (not pinned and thus unusable)
178
179 NOTE
180 it uses pins[0..2], on return all pins are removed.
181 if there're nodes with the same key value, a new node is added before them.
182 */
l_insert(LF_SLIST ** head,CHARSET_INFO * cs,LF_SLIST * node,LF_PINS * pins,uint flags)183 static LF_SLIST *l_insert(LF_SLIST **head, CHARSET_INFO *cs,
184 LF_SLIST *node, LF_PINS *pins, uint flags)
185 {
186 CURSOR cursor;
187 int res;
188
189 for (;;)
190 {
191 if (l_find(head, cs, node->hashnr, node->key, node->keylen,
192 &cursor, pins, 0) &&
193 (flags & LF_HASH_UNIQUE))
194 {
195 res= 0; /* duplicate found */
196 break;
197 }
198 else
199 {
200 node->link= (intptr)cursor.curr;
201 DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
202 DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
203 if (my_atomic_casptr((void **) cursor.prev,
204 (void **)(char*) &cursor.curr, node))
205 {
206 res= 1; /* inserted ok */
207 break;
208 }
209 }
210 }
211 lf_unpin(pins, 0);
212 lf_unpin(pins, 1);
213 lf_unpin(pins, 2);
214 /*
215 Note that cursor.curr is not pinned here and the pointer is unreliable,
216 the object may disappear anytime. But if it points to a dummy node, the
217 pointer is safe, because dummy nodes are never freed - initialize_bucket()
218 uses this fact.
219 */
220 return res ? 0 : cursor.curr;
221 }
222
223 /*
224 DESCRIPTION
225 deletes a node as identified by hashnr/keey/keylen from the list
226 that starts from 'head'
227
228 RETURN
229 0 - ok
230 1 - not found
231
232 NOTE
233 it uses pins[0..2], on return all pins are removed.
234 */
l_delete(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)235 static int l_delete(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
236 const uchar *key, uint keylen, LF_PINS *pins)
237 {
238 CURSOR cursor;
239 int res;
240
241 for (;;)
242 {
243 if (!l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0))
244 {
245 res= 1; /* not found */
246 break;
247 }
248 else
249 {
250 /* mark the node deleted */
251 if (my_atomic_casptr((void **) (char*) &(cursor.curr->link),
252 (void **) (char*) &cursor.next,
253 (void *)(((intptr)cursor.next) | 1)))
254 {
255 /* and remove it from the list */
256 if (my_atomic_casptr((void **)cursor.prev,
257 (void **)(char*)&cursor.curr, cursor.next))
258 lf_alloc_free(pins, cursor.curr);
259 else
260 {
261 /*
262 somebody already "helped" us and removed the node ?
263 Let's check if we need to help that someone too!
264 (to ensure the number of "set DELETED flag" actions
265 is equal to the number of "remove from the list" actions)
266 */
267 l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
268 }
269 res= 0;
270 break;
271 }
272 }
273 }
274 lf_unpin(pins, 0);
275 lf_unpin(pins, 1);
276 lf_unpin(pins, 2);
277 return res;
278 }
279
280 /*
281 DESCRIPTION
282 searches for a node as identified by hashnr/keey/keylen in the list
283 that starts from 'head'
284
285 RETURN
286 0 - not found
287 node - found
288
289 NOTE
290 it uses pins[0..2], on return the pin[2] keeps the node found
291 all other pins are removed.
292 */
l_search(LF_SLIST ** head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)293 static LF_SLIST *l_search(LF_SLIST **head, CHARSET_INFO *cs,
294 uint32 hashnr, const uchar *key, uint keylen,
295 LF_PINS *pins)
296 {
297 CURSOR cursor;
298 int res= l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
299 if (res)
300 lf_pin(pins, 2, cursor.curr);
301 else
302 lf_unpin(pins, 2);
303 lf_unpin(pins, 1);
304 lf_unpin(pins, 0);
305 return res ? cursor.curr : 0;
306 }
307
hash_key(const LF_HASH * hash,const uchar * record,size_t * length)308 static inline const uchar* hash_key(const LF_HASH *hash,
309 const uchar *record, size_t *length)
310 {
311 if (hash->get_key)
312 return (*hash->get_key)(record, length, 0);
313 *length= hash->key_length;
314 return record + hash->key_offset;
315 }
316
317 /*
318 Compute the hash key value from the raw key.
319
320 @note, that the hash value is limited to 2^31, because we need one
321 bit to distinguish between normal and dummy nodes.
322 */
calc_hash(CHARSET_INFO * cs,const uchar * key,size_t keylen)323 static inline my_hash_value_type calc_hash(CHARSET_INFO *cs,
324 const uchar *key,
325 size_t keylen)
326 {
327 ulong nr1= 1, nr2= 4;
328 my_ci_hash_sort(cs, (uchar*) key, keylen, &nr1, &nr2);
329 return nr1;
330 }
331
332 #define MAX_LOAD 1.0 /* average number of elements in a bucket */
333
334 static int initialize_bucket(LF_HASH *, LF_SLIST **, uint, LF_PINS *);
335
default_initializer(LF_HASH * hash,void * dst,const void * src)336 static void default_initializer(LF_HASH *hash, void *dst, const void *src)
337 {
338 memcpy(dst, src, hash->element_size);
339 }
340
341
342 /*
343 Initializes lf_hash, the arguments are compatible with hash_init
344
345 @note element_size sets both the size of allocated memory block for
346 lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
347 they are the same, indeed. But LF_HASH::element_size can be decreased
348 after lf_hash_init, and then lf_alloc will allocate larger block that
349 lf_hash_insert will copy over. It is desirable if part of the element
350 is expensive to initialize - for example if there is a mutex or
351 DYNAMIC_ARRAY. In this case they should be initialize in the
352 LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
353
354 The above works well with PODS. For more complex cases (e.g. C++ classes
355 with private members) use initializer function.
356 */
lf_hash_init(LF_HASH * hash,uint element_size,uint flags,uint key_offset,uint key_length,my_hash_get_key get_key,CHARSET_INFO * charset)357 void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
358 uint key_offset, uint key_length, my_hash_get_key get_key,
359 CHARSET_INFO *charset)
360 {
361 lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
362 offsetof(LF_SLIST, key));
363 lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
364 hash->size= 1;
365 hash->count= 0;
366 hash->element_size= element_size;
367 hash->flags= flags;
368 hash->charset= charset ? charset : &my_charset_bin;
369 hash->key_offset= key_offset;
370 hash->key_length= key_length;
371 hash->get_key= get_key;
372 hash->initializer= default_initializer;
373 hash->hash_function= calc_hash;
374 DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
375 }
376
lf_hash_destroy(LF_HASH * hash)377 void lf_hash_destroy(LF_HASH *hash)
378 {
379 LF_SLIST *el, **head= (LF_SLIST **)lf_dynarray_value(&hash->array, 0);
380
381 if (head)
382 {
383 el= *head;
384 while (el)
385 {
386 intptr next= el->link;
387 if (el->hashnr & 1)
388 lf_alloc_direct_free(&hash->alloc, el); /* normal node */
389 else
390 my_free(el); /* dummy node */
391 el= (LF_SLIST *)next;
392 }
393 }
394 lf_alloc_destroy(&hash->alloc);
395 lf_dynarray_destroy(&hash->array);
396 }
397
398 /*
399 DESCRIPTION
400 inserts a new element to a hash. it will have a _copy_ of
401 data, not a pointer to it.
402
403 RETURN
404 0 - inserted
405 1 - didn't (unique key conflict)
406 -1 - out of memory
407
408 NOTE
409 see l_insert() for pin usage notes
410 */
lf_hash_insert(LF_HASH * hash,LF_PINS * pins,const void * data)411 int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
412 {
413 int csize, bucket, hashnr;
414 LF_SLIST *node, **el;
415
416 node= (LF_SLIST *)lf_alloc_new(pins);
417 if (unlikely(!node))
418 return -1;
419 hash->initializer(hash, node + 1, data);
420 node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
421 hashnr= hash->hash_function(hash->charset, node->key, node->keylen) & INT_MAX32;
422 bucket= hashnr % hash->size;
423 el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
424 if (unlikely(!el))
425 return -1;
426 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
427 return -1;
428 node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
429 if (l_insert(el, hash->charset, node, pins, hash->flags))
430 {
431 lf_alloc_free(pins, node);
432 return 1;
433 }
434 csize= hash->size;
435 if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
436 my_atomic_cas32(&hash->size, &csize, csize*2);
437 return 0;
438 }
439
440 /*
441 DESCRIPTION
442 deletes an element with the given key from the hash (if a hash is
443 not unique and there're many elements with this key - the "first"
444 matching element is deleted)
445 RETURN
446 0 - deleted
447 1 - didn't (not found)
448 NOTE
449 see l_delete() for pin usage notes
450 */
lf_hash_delete(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)451 int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
452 {
453 LF_SLIST **el;
454 uint bucket, hashnr;
455
456 hashnr= hash->hash_function(hash->charset, (uchar *)key, keylen) & INT_MAX32;
457
458 /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
459 for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
460 {
461 el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
462 if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
463 break;
464 if (unlikely(bucket == 0))
465 return 1; /* if there's no bucket==0, the hash is empty */
466 }
467 if (l_delete(el, hash->charset, my_reverse_bits(hashnr) | 1,
468 (uchar *)key, keylen, pins))
469 {
470 return 1;
471 }
472 my_atomic_add32(&hash->count, -1);
473 return 0;
474 }
475
476 /*
477 RETURN
478 a pointer to an element with the given key (if a hash is not unique and
479 there're many elements with this key - the "first" matching element)
480 NULL if nothing is found
481
482 NOTE
483 see l_search() for pin usage notes
484 */
lf_hash_search_using_hash_value(LF_HASH * hash,LF_PINS * pins,my_hash_value_type hashnr,const void * key,uint keylen)485 void *lf_hash_search_using_hash_value(LF_HASH *hash, LF_PINS *pins,
486 my_hash_value_type hashnr,
487 const void *key, uint keylen)
488 {
489 LF_SLIST **el, *found;
490 uint bucket;
491
492 /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
493 for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
494 {
495 el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
496 if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
497 break;
498 if (unlikely(bucket == 0))
499 return 0; /* if there's no bucket==0, the hash is empty */
500 }
501 found= l_search(el, hash->charset, my_reverse_bits(hashnr) | 1,
502 (uchar *)key, keylen, pins);
503 return found ? found+1 : 0;
504 }
505
506
507 /**
508 Iterate over all elements in hash and call function with the element
509
510 @note
511 If one of 'action' invocations returns 1 the iteration aborts.
512 'action' might see some elements twice!
513
514 @retval 0 ok
515 @retval 1 error (action returned 1)
516 */
lf_hash_iterate(LF_HASH * hash,LF_PINS * pins,my_hash_walk_action action,void * argument)517 int lf_hash_iterate(LF_HASH *hash, LF_PINS *pins,
518 my_hash_walk_action action, void *argument)
519 {
520 CURSOR cursor;
521 uint bucket= 0;
522 int res;
523 LF_SLIST **el;
524
525 el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
526 if (unlikely(!el))
527 return 0; /* if there's no bucket==0, the hash is empty */
528 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
529 return 0; /* if there's no bucket==0, the hash is empty */
530
531 res= l_find(el, 0, 0, (uchar*)argument, 0, &cursor, pins, action);
532
533 lf_unpin(pins, 2);
534 lf_unpin(pins, 1);
535 lf_unpin(pins, 0);
536 return res;
537 }
538
lf_hash_search(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)539 void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
540 {
541 return lf_hash_search_using_hash_value(hash, pins,
542 hash->hash_function(hash->charset,
543 (uchar*) key,
544 keylen) & INT_MAX32,
545 key, keylen);
546 }
547
548 static const uchar *dummy_key= (uchar*)"";
549
550 /*
551 RETURN
552 0 - ok
553 -1 - out of memory
554 */
initialize_bucket(LF_HASH * hash,LF_SLIST ** node,uint bucket,LF_PINS * pins)555 static int initialize_bucket(LF_HASH *hash, LF_SLIST **node,
556 uint bucket, LF_PINS *pins)
557 {
558 uint parent= my_clear_highest_bit(bucket);
559 LF_SLIST *dummy= (LF_SLIST *)my_malloc(key_memory_lf_slist,
560 sizeof(LF_SLIST), MYF(MY_WME));
561 LF_SLIST **tmp= 0, *cur;
562 LF_SLIST **el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, parent);
563 if (unlikely(!el || !dummy))
564 return -1;
565 if (*el == NULL && bucket &&
566 unlikely(initialize_bucket(hash, el, parent, pins)))
567 {
568 my_free(dummy);
569 return -1;
570 }
571 dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
572 dummy->key= dummy_key;
573 dummy->keylen= 0;
574 if ((cur= l_insert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
575 {
576 my_free(dummy);
577 dummy= cur;
578 }
579 my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy);
580 /*
581 note that if the CAS above failed (after l_insert() succeeded),
582 it would mean that some other thread has executed l_insert() for
583 the same dummy node, its l_insert() failed, it picked up our
584 dummy node (in "dummy= cur") and executed the same CAS as above.
585 Which means that even if CAS above failed we don't need to retry,
586 and we should not free(dummy) - there's no memory leak here
587 */
588 return 0;
589 }
590
591 C_MODE_END
592