1 /* Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15
16 /*
17 extensible hash
18
19 TODO
20 try to get rid of dummy nodes ?
21 for non-unique hash, count only _distinct_ values
22 (but how to do it in lf_hash_delete ?)
23 */
24 #include <my_global.h>
25 #include <m_string.h>
26 #include <my_sys.h>
27 #include <my_bit.h>
28 #include <lf.h>
29
30 LF_REQUIRE_PINS(3)
31
32 /* An element of the list */
33 typedef struct {
34 intptr volatile link; /* a pointer to the next element in a listand a flag */
35 uint32 hashnr; /* reversed hash number, for sorting */
36 const uchar *key;
37 size_t keylen;
38 /*
39 data is stored here, directly after the keylen.
40 thus the pointer to data is (void*)(slist_element_ptr+1)
41 */
42 } LF_SLIST;
43
44 /*
45 a structure to pass the context (pointers two the three successive elements
46 in a list) from my_lfind to linsert/ldelete
47 */
48 typedef struct {
49 intptr volatile *prev;
50 LF_SLIST *curr, *next;
51 } CURSOR;
52
53 /*
54 the last bit in LF_SLIST::link is a "deleted" flag.
55 the helper macros below convert it to a pure pointer or a pure flag
56 */
57 #define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
58 #define DELETED(V) ((V) & 1)
59
60 /*
61 DESCRIPTION
62 Search for hashnr/key/keylen in the list starting from 'head' and
63 position the cursor. The list is ORDER BY hashnr, key
64
65 RETURN
66 0 - not found
67 1 - found
68
69 NOTE
70 cursor is positioned in either case
71 pins[0..2] are used, they are NOT removed on return
72 */
my_lfind(LF_SLIST * volatile * head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,CURSOR * cursor,LF_PINS * pins)73 static int my_lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
74 const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
75 {
76 uint32 cur_hashnr;
77 const uchar *cur_key;
78 uint cur_keylen;
79 intptr link;
80
81 retry:
82 cursor->prev= (intptr *)head;
83 do { /* PTR() isn't necessary below, head is a dummy node */
84 cursor->curr= (LF_SLIST *)(*cursor->prev);
85 _lf_pin(pins, 1, cursor->curr);
86 } while (my_atomic_loadptr((void**)cursor->prev) != cursor->curr &&
87 LF_BACKOFF);
88 for (;;)
89 {
90 if (unlikely(!cursor->curr))
91 return 0; /* end of the list */
92 do {
93 /* QQ: XXX or goto retry ? */
94 link= cursor->curr->link;
95 cursor->next= PTR(link);
96 _lf_pin(pins, 0, cursor->next);
97 } while (link != cursor->curr->link && LF_BACKOFF);
98 cur_hashnr= cursor->curr->hashnr;
99 cur_key= cursor->curr->key;
100 cur_keylen= cursor->curr->keylen;
101 if (my_atomic_loadptr((void**)cursor->prev) != cursor->curr)
102 {
103 (void)LF_BACKOFF;
104 goto retry;
105 }
106 if (!DELETED(link))
107 {
108 if (cur_hashnr >= hashnr)
109 {
110 int r= 1;
111 if (cur_hashnr > hashnr ||
112 (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key,
113 keylen)) >= 0)
114 return !r;
115 }
116 cursor->prev= &(cursor->curr->link);
117 _lf_pin(pins, 2, cursor->curr);
118 }
119 else
120 {
121 /*
122 we found a deleted node - be nice, help the other thread
123 and remove this deleted node
124 */
125 if (my_atomic_casptr((void **)cursor->prev,
126 (void **)&cursor->curr, cursor->next))
127 _lf_alloc_free(pins, cursor->curr);
128 else
129 {
130 (void)LF_BACKOFF;
131 goto retry;
132 }
133 }
134 cursor->curr= cursor->next;
135 _lf_pin(pins, 1, cursor->curr);
136 }
137 }
138
139 /*
140 DESCRIPTION
141 insert a 'node' in the list that starts from 'head' in the correct
142 position (as found by my_lfind)
143
144 RETURN
145 0 - inserted
146 not 0 - a pointer to a duplicate (not pinned and thus unusable)
147
148 NOTE
149 it uses pins[0..2], on return all pins are removed.
150 if there're nodes with the same key value, a new node is added before them.
151 */
linsert(LF_SLIST * volatile * head,CHARSET_INFO * cs,LF_SLIST * node,LF_PINS * pins,uint flags)152 static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
153 LF_SLIST *node, LF_PINS *pins, uint flags)
154 {
155 CURSOR cursor;
156 int res;
157
158 for (;;)
159 {
160 if (my_lfind(head, cs, node->hashnr, node->key, node->keylen,
161 &cursor, pins) &&
162 (flags & LF_HASH_UNIQUE))
163 {
164 res= 0; /* duplicate found */
165 break;
166 }
167 else
168 {
169 node->link= (intptr)cursor.curr;
170 DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
171 DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
172 if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
173 {
174 res= 1; /* inserted ok */
175 break;
176 }
177 }
178 }
179 _lf_unpin(pins, 0);
180 _lf_unpin(pins, 1);
181 _lf_unpin(pins, 2);
182 /*
183 Note that cursor.curr is not pinned here and the pointer is unreliable,
184 the object may dissapear anytime. But if it points to a dummy node, the
185 pointer is safe, because dummy nodes are never freed - initialize_bucket()
186 uses this fact.
187 */
188 return res ? 0 : cursor.curr;
189 }
190
191 /*
192 DESCRIPTION
193 deletes a node as identified by hashnr/keey/keylen from the list
194 that starts from 'head'
195
196 RETURN
197 0 - ok
198 1 - not found
199
200 NOTE
201 it uses pins[0..2], on return all pins are removed.
202 */
ldelete(LF_SLIST * volatile * head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)203 static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
204 const uchar *key, uint keylen, LF_PINS *pins)
205 {
206 CURSOR cursor;
207 int res;
208
209 for (;;)
210 {
211 if (!my_lfind(head, cs, hashnr, key, keylen, &cursor, pins))
212 {
213 res= 1; /* not found */
214 break;
215 }
216 else
217 {
218 /* mark the node deleted */
219 if (my_atomic_casptr((void **)&(cursor.curr->link),
220 (void **)&cursor.next,
221 (void *)(((intptr)cursor.next) | 1)))
222 {
223 /* and remove it from the list */
224 if (my_atomic_casptr((void **)cursor.prev,
225 (void **)&cursor.curr, cursor.next))
226 _lf_alloc_free(pins, cursor.curr);
227 else
228 {
229 /*
230 somebody already "helped" us and removed the node ?
231 Let's check if we need to help that someone too!
232 (to ensure the number of "set DELETED flag" actions
233 is equal to the number of "remove from the list" actions)
234 */
235 my_lfind(head, cs, hashnr, key, keylen, &cursor, pins);
236 }
237 res= 0;
238 break;
239 }
240 }
241 }
242 _lf_unpin(pins, 0);
243 _lf_unpin(pins, 1);
244 _lf_unpin(pins, 2);
245 return res;
246 }
247
248 /*
249 DESCRIPTION
250 searches for a node as identified by hashnr/keey/keylen in the list
251 that starts from 'head'
252
253 RETURN
254 0 - not found
255 node - found
256
257 NOTE
258 it uses pins[0..2], on return the pin[2] keeps the node found
259 all other pins are removed.
260 */
my_lsearch(LF_SLIST * volatile * head,CHARSET_INFO * cs,uint32 hashnr,const uchar * key,uint keylen,LF_PINS * pins)261 static LF_SLIST *my_lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs,
262 uint32 hashnr, const uchar *key, uint keylen,
263 LF_PINS *pins)
264 {
265 CURSOR cursor;
266 int res= my_lfind(head, cs, hashnr, key, keylen, &cursor, pins);
267 if (res)
268 _lf_pin(pins, 2, cursor.curr);
269 _lf_unpin(pins, 0);
270 _lf_unpin(pins, 1);
271 return res ? cursor.curr : 0;
272 }
273
hash_key(const LF_HASH * hash,const uchar * record,size_t * length)274 static inline const uchar* hash_key(const LF_HASH *hash,
275 const uchar *record, size_t *length)
276 {
277 if (hash->get_key)
278 return (*hash->get_key)(record, length, 0);
279 *length= hash->key_length;
280 return record + hash->key_offset;
281 }
282
283 /*
284 Compute the hash key value from the raw key.
285
286 @note, that the hash value is limited to 2^31, because we need one
287 bit to distinguish between normal and dummy nodes.
288 */
calc_hash(LF_HASH * hash,const uchar * key,uint keylen)289 static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen)
290 {
291 ulong nr1= 1, nr2= 4;
292 hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen,
293 &nr1, &nr2);
294 return nr1 & INT_MAX32;
295 }
296
297 #define MAX_LOAD 1.0 /* average number of elements in a bucket */
298
299 static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *);
300
301 /*
302 Initializes lf_hash, the arguments are compatible with hash_init
303
304 @note element_size sets both the size of allocated memory block for
305 lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
306 they are the same, indeed. But LF_HASH::element_size can be decreased
307 after lf_hash_init, and then lf_alloc will allocate larger block that
308 lf_hash_insert will copy over. It is desireable if part of the element
309 is expensive to initialize - for example if there is a mutex or
310 DYNAMIC_ARRAY. In this case they should be initialize in the
311 LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
312 See wt_init() for example.
313 */
lf_hash_init(LF_HASH * hash,uint element_size,uint flags,uint key_offset,uint key_length,my_hash_get_key get_key,CHARSET_INFO * charset)314 void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
315 uint key_offset, uint key_length, my_hash_get_key get_key,
316 CHARSET_INFO *charset)
317 {
318 lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
319 offsetof(LF_SLIST, key));
320 lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
321 hash->size= 1;
322 hash->count= 0;
323 hash->element_size= element_size;
324 hash->flags= flags;
325 hash->charset= charset ? charset : &my_charset_bin;
326 hash->key_offset= key_offset;
327 hash->key_length= key_length;
328 hash->get_key= get_key;
329 DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
330 }
331
lf_hash_destroy(LF_HASH * hash)332 void lf_hash_destroy(LF_HASH *hash)
333 {
334 LF_SLIST *el, **head= (LF_SLIST **)_lf_dynarray_value(&hash->array, 0);
335
336 if (unlikely(!head))
337 return;
338 el= *head;
339
340 while (el)
341 {
342 intptr next= el->link;
343 if (el->hashnr & 1)
344 lf_alloc_direct_free(&hash->alloc, el); /* normal node */
345 else
346 my_free(el); /* dummy node */
347 el= (LF_SLIST *)next;
348 }
349 lf_alloc_destroy(&hash->alloc);
350 lf_dynarray_destroy(&hash->array);
351 }
352
353 /*
354 DESCRIPTION
355 inserts a new element to a hash. it will have a _copy_ of
356 data, not a pointer to it.
357
358 RETURN
359 0 - inserted
360 1 - didn't (unique key conflict)
361 -1 - out of memory
362
363 NOTE
364 see linsert() for pin usage notes
365 */
lf_hash_insert(LF_HASH * hash,LF_PINS * pins,const void * data)366 int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
367 {
368 int csize, bucket, hashnr;
369 LF_SLIST *node, * volatile *el;
370
371 lf_rwlock_by_pins(pins);
372 node= (LF_SLIST *)_lf_alloc_new(pins);
373 if (unlikely(!node))
374 return -1;
375 memcpy(node+1, data, hash->element_size);
376 node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
377 hashnr= calc_hash(hash, node->key, node->keylen);
378 bucket= hashnr % hash->size;
379 el= _lf_dynarray_lvalue(&hash->array, bucket);
380 if (unlikely(!el))
381 return -1;
382 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
383 return -1;
384 node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
385 if (linsert(el, hash->charset, node, pins, hash->flags))
386 {
387 _lf_alloc_free(pins, node);
388 lf_rwunlock_by_pins(pins);
389 return 1;
390 }
391 csize= hash->size;
392 if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
393 my_atomic_cas32(&hash->size, &csize, csize*2);
394 lf_rwunlock_by_pins(pins);
395 return 0;
396 }
397
398 /*
399 DESCRIPTION
400 deletes an element with the given key from the hash (if a hash is
401 not unique and there're many elements with this key - the "first"
402 matching element is deleted)
403 RETURN
404 0 - deleted
405 1 - didn't (not found)
406 -1 - out of memory
407 NOTE
408 see ldelete() for pin usage notes
409 */
lf_hash_delete(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)410 int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
411 {
412 LF_SLIST * volatile *el;
413 uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen);
414
415 bucket= hashnr % hash->size;
416 lf_rwlock_by_pins(pins);
417 el= _lf_dynarray_lvalue(&hash->array, bucket);
418 if (unlikely(!el))
419 return -1;
420 /*
421 note that we still need to initialize_bucket here,
422 we cannot return "node not found", because an old bucket of that
423 node may've been split and the node was assigned to a new bucket
424 that was never accessed before and thus is not initialized.
425 */
426 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
427 return -1;
428 if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1,
429 (uchar *)key, keylen, pins))
430 {
431 lf_rwunlock_by_pins(pins);
432 return 1;
433 }
434 my_atomic_add32(&hash->count, -1);
435 lf_rwunlock_by_pins(pins);
436 return 0;
437 }
438
439 /*
440 RETURN
441 a pointer to an element with the given key (if a hash is not unique and
442 there're many elements with this key - the "first" matching element)
443 NULL if nothing is found
444 MY_ERRPTR if OOM
445
446 NOTE
447 see my_lsearch() for pin usage notes
448 */
lf_hash_search(LF_HASH * hash,LF_PINS * pins,const void * key,uint keylen)449 void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
450 {
451 LF_SLIST * volatile *el, *found;
452 uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen);
453
454 bucket= hashnr % hash->size;
455 lf_rwlock_by_pins(pins);
456 el= _lf_dynarray_lvalue(&hash->array, bucket);
457 if (unlikely(!el))
458 return MY_ERRPTR;
459 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
460 return MY_ERRPTR;
461 found= my_lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1,
462 (uchar *)key, keylen, pins);
463 lf_rwunlock_by_pins(pins);
464 return found ? found+1 : 0;
465 }
466
467 static const uchar *dummy_key= (uchar*)"";
468
469 /*
470 RETURN
471 0 - ok
472 -1 - out of memory
473 */
initialize_bucket(LF_HASH * hash,LF_SLIST * volatile * node,uint bucket,LF_PINS * pins)474 static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
475 uint bucket, LF_PINS *pins)
476 {
477 uint parent= my_clear_highest_bit(bucket);
478 LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
479 LF_SLIST **tmp= 0, *cur;
480 LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent);
481 if (unlikely(!el || !dummy))
482 return -1;
483 if (*el == NULL && bucket &&
484 unlikely(initialize_bucket(hash, el, parent, pins)))
485 return -1;
486 dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
487 dummy->key= dummy_key;
488 dummy->keylen= 0;
489 if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
490 {
491 my_free(dummy);
492 dummy= cur;
493 }
494 my_atomic_casptr((void **)node, (void **)&tmp, dummy);
495 /*
496 note that if the CAS above failed (after linsert() succeeded),
497 it would mean that some other thread has executed linsert() for
498 the same dummy node, its linsert() failed, it picked up our
499 dummy node (in "dummy= cur") and executed the same CAS as above.
500 Which means that even if CAS above failed we don't need to retry,
501 and we should not free(dummy) - there's no memory leak here
502 */
503 return 0;
504 }
505