1 /* The implementation of the hash table (_Py_hashtable_t) is based on the
2    cfuhash project:
3    http://sourceforge.net/projects/libcfu/
4 
5    Copyright of cfuhash:
6    ----------------------------------
7    Creation date: 2005-06-24 21:22:40
8    Authors: Don
9    Change log:
10 
11    Copyright (c) 2005 Don Owens
12    All rights reserved.
13 
14    This code is released under the BSD license:
15 
16    Redistribution and use in source and binary forms, with or without
17    modification, are permitted provided that the following conditions
18    are met:
19 
20      * Redistributions of source code must retain the above copyright
21        notice, this list of conditions and the following disclaimer.
22 
23      * Redistributions in binary form must reproduce the above
24        copyright notice, this list of conditions and the following
25        disclaimer in the documentation and/or other materials provided
26        with the distribution.
27 
28      * Neither the name of the author nor the names of its
29        contributors may be used to endorse or promote products derived
30        from this software without specific prior written permission.
31 
32    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43    OF THE POSSIBILITY OF SUCH DAMAGE.
44    ----------------------------------
45 */
46 
47 #include "Python.h"
48 #include "hashtable.h"
49 
50 #define HASHTABLE_MIN_SIZE 16
51 #define HASHTABLE_HIGH 0.50
52 #define HASHTABLE_LOW 0.10
53 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54 
55 #define BUCKETS_HEAD(SLIST) \
56         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57 #define TABLE_HEAD(HT, BUCKET) \
58         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59 #define ENTRY_NEXT(ENTRY) \
60         ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61 #define HASHTABLE_ITEM_SIZE(HT) \
62         (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
63 
64 #define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
65     do { \
66         assert((DATA_SIZE) == (TABLE)->data_size); \
67         memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \
68                   (DATA_SIZE)); \
69     } while (0)
70 
71 #define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
72     do { \
73         assert((DATA_SIZE) == (TABLE)->data_size); \
74         memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \
75                   (PDATA), (DATA_SIZE)); \
76     } while (0)
77 
78 /* Forward declaration */
79 static void hashtable_rehash(_Py_hashtable_t *ht);
80 
81 static void
_Py_slist_init(_Py_slist_t * list)82 _Py_slist_init(_Py_slist_t *list)
83 {
84     list->head = NULL;
85 }
86 
87 
88 static void
_Py_slist_prepend(_Py_slist_t * list,_Py_slist_item_t * item)89 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
90 {
91     item->next = list->head;
92     list->head = item;
93 }
94 
95 
96 static void
_Py_slist_remove(_Py_slist_t * list,_Py_slist_item_t * previous,_Py_slist_item_t * item)97 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
98                  _Py_slist_item_t *item)
99 {
100     if (previous != NULL)
101         previous->next = item->next;
102     else
103         list->head = item->next;
104 }
105 
106 
107 Py_uhash_t
_Py_hashtable_hash_ptr(struct _Py_hashtable_t * ht,const void * pkey)108 _Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
109 {
110     void *key;
111 
112     _Py_HASHTABLE_READ_KEY(ht, pkey, key);
113     return (Py_uhash_t)_Py_HashPointer(key);
114 }
115 
116 
117 int
_Py_hashtable_compare_direct(_Py_hashtable_t * ht,const void * pkey,const _Py_hashtable_entry_t * entry)118 _Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey,
119                              const _Py_hashtable_entry_t *entry)
120 {
121     const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
122     return (memcmp(pkey, pkey2, ht->key_size) == 0);
123 }
124 
125 
126 /* makes sure the real size of the buckets array is a power of 2 */
127 static size_t
round_size(size_t s)128 round_size(size_t s)
129 {
130     size_t i;
131     if (s < HASHTABLE_MIN_SIZE)
132         return HASHTABLE_MIN_SIZE;
133     i = 1;
134     while (i < s)
135         i <<= 1;
136     return i;
137 }
138 
139 
140 _Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size,size_t data_size,size_t init_size,_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func,_Py_hashtable_allocator_t * allocator)141 _Py_hashtable_new_full(size_t key_size, size_t data_size,
142                        size_t init_size,
143                        _Py_hashtable_hash_func hash_func,
144                        _Py_hashtable_compare_func compare_func,
145                        _Py_hashtable_allocator_t *allocator)
146 {
147     _Py_hashtable_t *ht;
148     size_t buckets_size;
149     _Py_hashtable_allocator_t alloc;
150 
151     if (allocator == NULL) {
152         alloc.malloc = PyMem_RawMalloc;
153         alloc.free = PyMem_RawFree;
154     }
155     else
156         alloc = *allocator;
157 
158     ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
159     if (ht == NULL)
160         return ht;
161 
162     ht->num_buckets = round_size(init_size);
163     ht->entries = 0;
164     ht->key_size = key_size;
165     ht->data_size = data_size;
166 
167     buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
168     ht->buckets = alloc.malloc(buckets_size);
169     if (ht->buckets == NULL) {
170         alloc.free(ht);
171         return NULL;
172     }
173     memset(ht->buckets, 0, buckets_size);
174 
175     ht->hash_func = hash_func;
176     ht->compare_func = compare_func;
177     ht->alloc = alloc;
178     return ht;
179 }
180 
181 
182 _Py_hashtable_t *
_Py_hashtable_new(size_t key_size,size_t data_size,_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func)183 _Py_hashtable_new(size_t key_size, size_t data_size,
184                   _Py_hashtable_hash_func hash_func,
185                   _Py_hashtable_compare_func compare_func)
186 {
187     return _Py_hashtable_new_full(key_size, data_size,
188                                   HASHTABLE_MIN_SIZE,
189                                   hash_func, compare_func,
190                                   NULL);
191 }
192 
193 
194 size_t
_Py_hashtable_size(_Py_hashtable_t * ht)195 _Py_hashtable_size(_Py_hashtable_t *ht)
196 {
197     size_t size;
198 
199     size = sizeof(_Py_hashtable_t);
200 
201     /* buckets */
202     size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
203 
204     /* entries */
205     size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
206 
207     return size;
208 }
209 
210 
211 #ifdef Py_DEBUG
212 void
_Py_hashtable_print_stats(_Py_hashtable_t * ht)213 _Py_hashtable_print_stats(_Py_hashtable_t *ht)
214 {
215     size_t size;
216     size_t chain_len, max_chain_len, total_chain_len, nchains;
217     _Py_hashtable_entry_t *entry;
218     size_t hv;
219     double load;
220 
221     size = _Py_hashtable_size(ht);
222 
223     load = (double)ht->entries / ht->num_buckets;
224 
225     max_chain_len = 0;
226     total_chain_len = 0;
227     nchains = 0;
228     for (hv = 0; hv < ht->num_buckets; hv++) {
229         entry = TABLE_HEAD(ht, hv);
230         if (entry != NULL) {
231             chain_len = 0;
232             for (; entry; entry = ENTRY_NEXT(entry)) {
233                 chain_len++;
234             }
235             if (chain_len > max_chain_len)
236                 max_chain_len = chain_len;
237             total_chain_len += chain_len;
238             nchains++;
239         }
240     }
241     printf("hash table %p: entries=%"
242            PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
243            (void *)ht, ht->entries, ht->num_buckets, load * 100.0);
244     if (nchains)
245         printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
246     printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n",
247            max_chain_len, size / 1024);
248 }
249 #endif
250 
251 
252 _Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t * ht,size_t key_size,const void * pkey)253 _Py_hashtable_get_entry(_Py_hashtable_t *ht,
254                         size_t key_size, const void *pkey)
255 {
256     Py_uhash_t key_hash;
257     size_t index;
258     _Py_hashtable_entry_t *entry;
259 
260     assert(key_size == ht->key_size);
261 
262     key_hash = ht->hash_func(ht, pkey);
263     index = key_hash & (ht->num_buckets - 1);
264 
265     for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
266         if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
267             break;
268     }
269 
270     return entry;
271 }
272 
273 
274 static int
_Py_hashtable_pop_entry(_Py_hashtable_t * ht,size_t key_size,const void * pkey,void * data,size_t data_size)275 _Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
276                         void *data, size_t data_size)
277 {
278     Py_uhash_t key_hash;
279     size_t index;
280     _Py_hashtable_entry_t *entry, *previous;
281 
282     assert(key_size == ht->key_size);
283 
284     key_hash = ht->hash_func(ht, pkey);
285     index = key_hash & (ht->num_buckets - 1);
286 
287     previous = NULL;
288     for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
289         if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
290             break;
291         previous = entry;
292     }
293 
294     if (entry == NULL)
295         return 0;
296 
297     _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
298                      (_Py_slist_item_t *)entry);
299     ht->entries--;
300 
301     if (data != NULL)
302         ENTRY_READ_PDATA(ht, entry, data_size, data);
303     ht->alloc.free(entry);
304 
305     if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
306         hashtable_rehash(ht);
307     return 1;
308 }
309 
310 
311 int
_Py_hashtable_set(_Py_hashtable_t * ht,size_t key_size,const void * pkey,size_t data_size,const void * data)312 _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
313                   size_t data_size, const void *data)
314 {
315     Py_uhash_t key_hash;
316     size_t index;
317     _Py_hashtable_entry_t *entry;
318 
319     assert(key_size == ht->key_size);
320 
321     assert(data != NULL || data_size == 0);
322 #ifndef NDEBUG
323     /* Don't write the assertion on a single line because it is interesting
324        to know the duplicated entry if the assertion failed. The entry can
325        be read using a debugger. */
326     entry = _Py_hashtable_get_entry(ht, key_size, pkey);
327     assert(entry == NULL);
328 #endif
329 
330     key_hash = ht->hash_func(ht, pkey);
331     index = key_hash & (ht->num_buckets - 1);
332 
333     entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
334     if (entry == NULL) {
335         /* memory allocation failed */
336         return -1;
337     }
338 
339     entry->key_hash = key_hash;
340     memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size);
341     if (data)
342         ENTRY_WRITE_PDATA(ht, entry, data_size, data);
343 
344     _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
345     ht->entries++;
346 
347     if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
348         hashtable_rehash(ht);
349     return 0;
350 }
351 
352 
353 int
_Py_hashtable_get(_Py_hashtable_t * ht,size_t key_size,const void * pkey,size_t data_size,void * data)354 _Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
355                   size_t data_size, void *data)
356 {
357     _Py_hashtable_entry_t *entry;
358 
359     assert(data != NULL);
360 
361     entry = _Py_hashtable_get_entry(ht, key_size, pkey);
362     if (entry == NULL)
363         return 0;
364     ENTRY_READ_PDATA(ht, entry, data_size, data);
365     return 1;
366 }
367 
368 
369 int
_Py_hashtable_pop(_Py_hashtable_t * ht,size_t key_size,const void * pkey,size_t data_size,void * data)370 _Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
371                   size_t data_size, void *data)
372 {
373     assert(data != NULL);
374     return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
375 }
376 
377 
378 /* Code commented since the function is not needed in Python */
379 #if 0
380 void
381 _Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
382 {
383 #ifndef NDEBUG
384     int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
385     assert(found);
386 #else
387     (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
388 #endif
389 }
390 #endif
391 
392 
393 int
_Py_hashtable_foreach(_Py_hashtable_t * ht,_Py_hashtable_foreach_func func,void * arg)394 _Py_hashtable_foreach(_Py_hashtable_t *ht,
395                       _Py_hashtable_foreach_func func,
396                       void *arg)
397 {
398     _Py_hashtable_entry_t *entry;
399     size_t hv;
400 
401     for (hv = 0; hv < ht->num_buckets; hv++) {
402         for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
403             int res = func(ht, entry, arg);
404             if (res)
405                 return res;
406         }
407     }
408     return 0;
409 }
410 
411 
412 static void
hashtable_rehash(_Py_hashtable_t * ht)413 hashtable_rehash(_Py_hashtable_t *ht)
414 {
415     size_t buckets_size, new_size, bucket;
416     _Py_slist_t *old_buckets = NULL;
417     size_t old_num_buckets;
418 
419     new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
420     if (new_size == ht->num_buckets)
421         return;
422 
423     old_num_buckets = ht->num_buckets;
424 
425     buckets_size = new_size * sizeof(ht->buckets[0]);
426     old_buckets = ht->buckets;
427     ht->buckets = ht->alloc.malloc(buckets_size);
428     if (ht->buckets == NULL) {
429         /* cancel rehash on memory allocation failure */
430         ht->buckets = old_buckets ;
431         /* memory allocation failed */
432         return;
433     }
434     memset(ht->buckets, 0, buckets_size);
435 
436     ht->num_buckets = new_size;
437 
438     for (bucket = 0; bucket < old_num_buckets; bucket++) {
439         _Py_hashtable_entry_t *entry, *next;
440         for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
441             size_t entry_index;
442 
443 
444             assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash);
445             next = ENTRY_NEXT(entry);
446             entry_index = entry->key_hash & (new_size - 1);
447 
448             _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
449         }
450     }
451 
452     ht->alloc.free(old_buckets);
453 }
454 
455 
456 void
_Py_hashtable_clear(_Py_hashtable_t * ht)457 _Py_hashtable_clear(_Py_hashtable_t *ht)
458 {
459     _Py_hashtable_entry_t *entry, *next;
460     size_t i;
461 
462     for (i=0; i < ht->num_buckets; i++) {
463         for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
464             next = ENTRY_NEXT(entry);
465             ht->alloc.free(entry);
466         }
467         _Py_slist_init(&ht->buckets[i]);
468     }
469     ht->entries = 0;
470     hashtable_rehash(ht);
471 }
472 
473 
474 void
_Py_hashtable_destroy(_Py_hashtable_t * ht)475 _Py_hashtable_destroy(_Py_hashtable_t *ht)
476 {
477     size_t i;
478 
479     for (i = 0; i < ht->num_buckets; i++) {
480         _Py_slist_item_t *entry = ht->buckets[i].head;
481         while (entry) {
482             _Py_slist_item_t *entry_next = entry->next;
483             ht->alloc.free(entry);
484             entry = entry_next;
485         }
486     }
487 
488     ht->alloc.free(ht->buckets);
489     ht->alloc.free(ht);
490 }
491 
492 
493 _Py_hashtable_t *
_Py_hashtable_copy(_Py_hashtable_t * src)494 _Py_hashtable_copy(_Py_hashtable_t *src)
495 {
496     const size_t key_size = src->key_size;
497     const size_t data_size = src->data_size;
498     _Py_hashtable_t *dst;
499     _Py_hashtable_entry_t *entry;
500     size_t bucket;
501     int err;
502 
503     dst = _Py_hashtable_new_full(key_size, data_size,
504                                  src->num_buckets,
505                                  src->hash_func,
506                                  src->compare_func,
507                                  &src->alloc);
508     if (dst == NULL)
509         return NULL;
510 
511     for (bucket=0; bucket < src->num_buckets; bucket++) {
512         entry = TABLE_HEAD(src, bucket);
513         for (; entry; entry = ENTRY_NEXT(entry)) {
514             const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry);
515             const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry);
516             err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata);
517             if (err) {
518                 _Py_hashtable_destroy(dst);
519                 return NULL;
520             }
521         }
522     }
523     return dst;
524 }
525