1 /* An implementation of in-memory hash tables:
2 * Copyright (c) 2000-2004 Salvatore Sanfilippo <antirez@invece.org>
3 *
4 * -- VERSION 2004.05.22 --
5 *
6 * COPYRIGHT AND PERMISSION NOTICE
7 * -------------------------------
8 *
9 * Copyright (c) 2000 Salvatore Sanfilippo <antirez@invece.org>
10 * Copyright (c) 2001 Salvatore Sanfilippo <antirez@invece.org>
11 * Copyright (c) 2002 Salvatore Sanfilippo <antirez@invece.org>
12 * Copyright (c) 2003 Salvatore Sanfilippo <antirez@invece.org>
13 * Copyright (c) 2004 Salvatore Sanfilippo <antirez@invece.org>
14 *
15 * All rights reserved.
16 *
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the
19 * "Software"), to deal in the Software without restriction, including
20 * without limitation the rights to use, copy, modify, merge, publish,
21 * distribute, and/or sell copies of the Software, and to permit persons
22 * to whom the Software is furnished to do so, provided that the above
23 * copyright notice(s) and this permission notice appear in all copies of
24 * the Software and that both the above copyright notice(s) and this
25 * permission notice appear in supporting documentation.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
28 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
30 * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
31 * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
32 * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
33 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
34 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
35 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
36 *
37 * Except as contained in this notice, the name of a copyright holder
38 * shall not be used in advertising or otherwise to promote the sale, use
39 * or other dealings in this Software without prior written authorization
40 * of the copyright holder.
41 *
42 * CHANGELOG
43 * ---------
44 *
45 * 22May2004 - Fixed a but in ht_destroy(). Now after this call the
46 * hashtable is really ready to be reused. Fixed also a memory leak
47 * in the same function. Luckly this function is only called at exit
48 * in many programs.
49 *
50 * OVERVIEW
51 * --------
52 *
53 * AHT is an implementation of a dictionary with support for
54 * INSERT, DELETE and SEARCH operations. It uses the hash table
55 * as base data structure to provide almost constant times for
56 * the three operations. AHT also automatically care about the
57 * size of the current key-values set increasing the hash table
58 * as needed.
59 *
60 * DESIGN PRINCIPLE
61 * ----------------
62 *
63 * - AHT try to resist to attacker-induced worst-case behaviour
64 * trought the randomization of the hash-function. This is
65 * optional.
66 *
67 * - AHT take care of the hash table expansion when needed.
68 * The hash table load ranges from 0 to 0.5, the hash table
69 * size is a power of two.
70 *
71 * - A simple implementation. The collisions resolution used
72 * is a simple linear probing, that takes advantage of
73 * the modern CPU caches, the low hash table max load and
74 * the use of a strong hash function provided with this library
75 * (ht_strong_hash), should mitigate the primary clustering
76 * enough. Experimental results shown that double hashing
77 * was a performance lost with common key types in modern
78 * CPUs.
79 *
80 * - Moderatly method oriented, it is possible to define the hash
81 * function, key/value destructors, key compare function, for a
82 * given hash table, but not with a per-element base.
83 *
84 * === WARNING ===
85 * = Before to use this library, think about the -fact- that the
86 * = worst case is O(N). Like for the quick sort algorithm, it may
87 * = be a bad idea to use this library in medical software, or other
88 * = software for wich the worst case should be taken in account
89 * = even if not likely to happen.
90 * = Good alternatives are red-black trees, and other trees with
91 * = a good worst-case behavior.
92 * ===============
93 *
94 * TODO
95 * ----
96 *
97 * - Write the documentation
98 * - ht_copy() to copy an element between hash tables
99 * - ht_dup() to duplicate an entire hash table
100 * - ht_merge() to add the content of one hash table to another
101 * - disk operations, the ability to save an hashtable from the
102 * memory to the disk and the reverse operation.
103 *
104 * Most of this features needs additional methods, like one
105 * to copy an object, and should return an error if such methods
106 * are not defined.
107 *
108 */
109
110 #include <sys/types.h>
111 #include <stdio.h>
112 #include <stdlib.h>
113 #include <string.h>
114 #include <assert.h>
115 #include "aht.h"
116
117 /* -------------------------- private prototypes ---------------------------- */
118 static int ht_expand_if_needed(struct hashtable *t);
119 static unsigned int next_power(unsigned int size);
120 static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index);
121
122 /* The special ht_free_element pointer is used to mark
123 * a freed element in the hash table (note that the elements
124 * neven used are just NULL pointers) */
125 static struct ht_ele *ht_free_element = (void*) -1;
126
127 /* -------------------------- hash functions -------------------------------- */
128 /* The djb hash function, that's under public domain */
djb_hash(unsigned char * buf,size_t len)129 u_int32_t djb_hash(unsigned char *buf, size_t len)
130 {
131 u_int32_t h = 5381;
132 while(len--)
133 h = (h + (h << 5)) ^ *buf++;
134 return h;
135 }
136
djb_hashR(unsigned char * buf,size_t len)137 u_int32_t djb_hashR(unsigned char *buf, size_t len)
138 {
139 u_int32_t h = 5381;
140 buf += len-1;
141 while(len--)
142 h = (h + (h << 5)) ^ *buf--;
143 return h;
144 }
145
146 /* Another trivial hash function */
147 #define ROT32R(x,n) (((x)>>n)|(x<<(32-n)))
trivial_hash(unsigned char * buf,size_t len)148 u_int32_t trivial_hash(unsigned char *buf, size_t len)
149 {
150 u_int32_t h = 0;
151 while(len--) {
152 h = h + *buf++;
153 h = ROT32R(h, 3);
154 }
155 return h;
156 }
157
trivial_hashR(unsigned char * buf,size_t len)158 u_int32_t trivial_hashR(unsigned char *buf, size_t len)
159 {
160 u_int32_t h = 0;
161 buf += len-1;
162 while(len--) {
163 h = h + *buf--;
164 h = ROT32R(h, 3);
165 }
166 return h;
167 }
168
169 /* A strong hash function that should be the default with this
170 * hashtable implementation. Our hash tables does not support
171 * double hashing for design: the idea is to avoid double
172 * hashing and use a bit slower but very strong hash function like
173 * this. This should provide quite good performances with
174 * all the kinds of keys if you take the default max load of 50%.
175 *
176 * For more information see: http://burtleburtle.net/bob/hash/evahash.html */
177
178 /* The mixing step */
179 #define mix(a,b,c) \
180 { \
181 a=a-b; a=a-c; a=a^(c>>13); \
182 b=b-c; b=b-a; b=b^(a<<8); \
183 c=c-a; c=c-b; c=c^(b>>13); \
184 a=a-b; a=a-c; a=a^(c>>12); \
185 b=b-c; b=b-a; b=b^(a<<16); \
186 c=c-a; c=c-b; c=c^(b>>5); \
187 a=a-b; a=a-c; a=a^(c>>3); \
188 b=b-c; b=b-a; b=b^(a<<10); \
189 c=c-a; c=c-b; c=c^(b>>15); \
190 }
191
192 /* The whole new hash function */
__ht_strong_hash(u_int8_t * k,u_int32_t length,u_int32_t initval)193 u_int32_t __ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
194 {
195 u_int32_t a,b,c; /* the internal state */
196 u_int32_t len; /* how many key bytes still need mixing */
197
198 /* Set up the internal state */
199 len = length;
200 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
201 c = initval; /* variable initialization of internal state */
202
203 /*---------------------------------------- handle most of the key */
204 while (len >= 12)
205 {
206 a=a+(k[0]+((u_int32_t)k[1]<<8)+((u_int32_t)k[2]<<16)+
207 ((u_int32_t)k[3]<<24));
208 b=b+(k[4]+((u_int32_t)k[5]<<8)+((u_int32_t)k[6]<<16)+
209 ((u_int32_t)k[7]<<24));
210 c=c+(k[8]+((u_int32_t)k[9]<<8)+((u_int32_t)k[10]<<16)+
211 ((u_int32_t)k[11]<<24));
212 mix(a,b,c);
213 k = k+12; len = len-12;
214 }
215
216 /*------------------------------------- handle the last 11 bytes */
217 c = c+length;
218 switch(len) /* all the case statements fall through */
219 {
220 case 11: c=c+((u_int32_t)k[10]<<24);
221 case 10: c=c+((u_int32_t)k[9]<<16);
222 case 9 : c=c+((u_int32_t)k[8]<<8);
223 /* the first byte of c is reserved for the length */
224 case 8 : b=b+((u_int32_t)k[7]<<24);
225 case 7 : b=b+((u_int32_t)k[6]<<16);
226 case 6 : b=b+((u_int32_t)k[5]<<8);
227 case 5 : b=b+k[4];
228 case 4 : a=a+((u_int32_t)k[3]<<24);
229 case 3 : a=a+((u_int32_t)k[2]<<16);
230 case 2 : a=a+((u_int32_t)k[1]<<8);
231 case 1 : a=a+k[0];
232 /* case 0: nothing left to add */
233 }
234 mix(a,b,c);
235 /*-------------------------------------------- report the result */
236 return c;
237 }
238
239 /* ----------------------------- API implementation ------------------------- */
240 /* reset an hashtable already initialized with ht_init().
241 * NOTE: This function should only called by ht_destroy(). */
ht_reset(struct hashtable * t)242 static void ht_reset(struct hashtable *t)
243 {
244 t->table = NULL;
245 t->size = 0;
246 t->sizemask = 0;
247 t->used = 0;
248 t->collisions = 0;
249 }
250
251 /* Initialize the hash table */
ht_init(struct hashtable * t)252 int ht_init(struct hashtable *t)
253 {
254 ht_reset(t);
255 t->hashf = ht_hash_pointer;
256 t->key_destructor = ht_no_destructor;
257 t->val_destructor = ht_no_destructor;
258 t->key_compare = ht_compare_ptr;
259 return HT_OK;
260 }
261
262 /* Resize the table to the minimal size that contains all the elements */
ht_resize(struct hashtable * t)263 int ht_resize(struct hashtable *t)
264 {
265 int minimal = (t->used * 2)+1;
266
267 if (minimal < HT_INITIAL_SIZE)
268 minimal = HT_INITIAL_SIZE;
269 return ht_expand(t, minimal);
270 }
271
272 /* Move an element accross hash tables */
ht_move(struct hashtable * orig,struct hashtable * dest,unsigned int index)273 int ht_move(struct hashtable *orig, struct hashtable *dest, unsigned int index)
274 {
275 int ret;
276 unsigned int new_index;
277
278 /* If the element isn't in the table ht_search will store
279 * the index of the free ht_ele in the integer pointer by *index */
280 ret = ht_insert(dest, orig->table[index]->key, &new_index);
281 if (ret != HT_OK)
282 return ret;
283
284 /* Move the element */
285 dest->table[new_index] = orig->table[index];
286 orig->table[index] = ht_free_element;
287 orig->used--;
288 dest->used++;
289 return HT_OK;
290 }
291
292 /* Expand or create the hashtable */
ht_expand(struct hashtable * t,size_t size)293 int ht_expand(struct hashtable *t, size_t size)
294 {
295 struct hashtable n; /* the new hashtable */
296 unsigned int realsize = next_power(size), i;
297
298 /* the size is invalid if it is smaller than the number of
299 * elements already inside the hashtable */
300 if (t->used >= size)
301 return HT_INVALID;
302
303 ht_init(&n);
304 n.size = realsize;
305 n.sizemask = realsize-1;
306 n.table = malloc(realsize*sizeof(struct ht_ele*));
307 if (n.table == NULL)
308 return HT_NOMEM;
309 /* Copy methods */
310 n.hashf = t->hashf;
311 n.key_destructor = t->key_destructor;
312 n.val_destructor = t->val_destructor;
313 n.key_compare= t->key_compare;
314
315 /* Initialize all the pointers to NULL */
316 memset(n.table, 0, realsize*sizeof(struct ht_ele*));
317
318 /* Copy all the elements from the old to the new table:
319 * note that if the old hash table is empty t->size is zero,
320 * so ht_expand() acts like an ht_create() */
321 n.used = t->used;
322 for (i = 0; i < t->size && t->used > 0; i++) {
323 if (t->table[i] != NULL && t->table[i] != ht_free_element) {
324 u_int32_t h;
325
326 /* Get the new element index: note that we
327 * know that there aren't freed elements in 'n' */
328 h = n.hashf(t->table[i]->key) & n.sizemask;
329 if (n.table[h]) {
330 n.collisions++;
331 while(1) {
332 h = (h+1) & n.sizemask;
333 if (!n.table[h])
334 break;
335 n.collisions++;
336 }
337 }
338 /* Move the element */
339 n.table[h] = t->table[i];
340 t->used--;
341 }
342 }
343 assert(t->used == 0);
344 free(t->table);
345
346 /* Remap the new hashtable in the old */
347 *t = n;
348 return HT_OK;
349 }
350
351 /* Add an element, discarding the old if the key already exists */
ht_replace(struct hashtable * t,void * key,void * data)352 int ht_replace(struct hashtable *t, void *key, void *data)
353 {
354 int ret;
355 unsigned int index;
356
357 /* Try to add the element */
358 ret = ht_add(t, key, data);
359 if (ret == HT_OK || ret != HT_BUSY)
360 return ret;
361 /* It already exists, get the index */
362 ret = ht_search(t, key, &index);
363 assert(ret == HT_FOUND);
364 /* Remove the old */
365 ret = ht_free(t, index);
366 assert(ret == HT_OK);
367 /* And add the new */
368 return ht_add(t, key, data);
369 }
370
371 /* Add an element to the target hash table */
ht_add(struct hashtable * t,void * key,void * data)372 int ht_add(struct hashtable *t, void *key, void *data)
373 {
374 int ret;
375 unsigned int index;
376
377 /* If the element isn't in the table ht_insert() will store
378 * the index of the free ht_ele in the integer pointer by *index */
379 ret = ht_insert(t, key, &index);
380 if (ret != HT_OK)
381 return ret;
382
383 /* Allocates the memory and stores key */
384 if ((t->table[index] = malloc(sizeof(struct ht_ele))) == NULL)
385 return HT_NOMEM;
386 /* Store the pointers */
387 t->table[index]->key = key;
388 t->table[index]->data = data;
389 t->used++;
390 return HT_OK;
391 }
392
393 /* search and remove an element */
ht_rm(struct hashtable * t,void * key)394 int ht_rm(struct hashtable *t, void *key)
395 {
396 int ret;
397 unsigned int index;
398
399 if ((ret = ht_search(t, key, &index)) != HT_FOUND)
400 return ret;
401 return ht_free(t, index);
402 }
403
404 /* Destroy an entire hash table */
ht_destroy(struct hashtable * t)405 int ht_destroy(struct hashtable *t)
406 {
407 unsigned int i;
408
409 /* Free all the elements */
410 for (i = 0; i < t->size && t->used > 0; i++) {
411 if (t->table[i] != NULL && t->table[i] != ht_free_element) {
412 if (t->key_destructor)
413 t->key_destructor(t->table[i]->key);
414 if (t->val_destructor)
415 t->val_destructor(t->table[i]->data);
416 free(t->table[i]);
417 t->used--;
418 }
419 }
420 /* Free the table and the allocated cache structure */
421 free(t->table);
422 /* Re-initialize the table */
423 ht_reset(t);
424 return HT_OK; /* Actually ht_destroy never fails */
425 }
426
427 /* Free an element in the hash table */
ht_free(struct hashtable * t,unsigned int index)428 int ht_free(struct hashtable *t, unsigned int index)
429 {
430 if (index >= t->size)
431 return HT_IOVERFLOW; /* Index overflow */
432 /* ht_free() calls against non-existent elements are ignored */
433 if (t->table[index] != NULL && t->table[index] != ht_free_element) {
434 /* release the key */
435 if (t->key_destructor)
436 t->key_destructor(t->table[index]->key);
437 /* release the value */
438 if (t->val_destructor)
439 t->val_destructor(t->table[index]->data);
440 /* free the element structure */
441 free(t->table[index]);
442 /* mark the element as freed */
443 t->table[index] = ht_free_element;
444 t->used--;
445 }
446 return HT_OK;
447 }
448
449 /* Search the element with the given key */
ht_search(struct hashtable * t,void * key,unsigned int * found_index)450 int ht_search(struct hashtable *t, void *key, unsigned int *found_index)
451 {
452 int ret;
453 u_int32_t h;
454
455 /* Expand the hashtable if needed */
456 if (t->size == 0) {
457 if ((ret = ht_expand_if_needed(t)) != HT_OK)
458 return ret;
459 }
460
461 /* Try using the first hash functions */
462 h = t->hashf(key) & t->sizemask;
463 /* this handles the removed elements */
464 if (!t->table[h])
465 return HT_NOTFOUND;
466 if (t->table[h] != ht_free_element &&
467 t->key_compare(key, t->table[h]->key))
468 {
469 *found_index = h;
470 return HT_FOUND;
471 }
472
473 while(1) {
474 h = (h+1) & t->sizemask;
475 /* this handles the removed elements */
476 if (t->table[h] == ht_free_element)
477 continue;
478 if (!t->table[h])
479 return HT_NOTFOUND;
480 if (t->key_compare(key, t->table[h]->key)) {
481 *found_index = h;
482 return HT_FOUND;
483 }
484 }
485 }
486
487 /* This function is used to run the entire hash table,
488 * it returns:
489 * 1 if the element with the given index is valid
490 * 0 if the element with the given index is empty or marked free
491 * -1 if the element if out of the range */
ht_get_byindex(struct hashtable * t,unsigned int index)492 int ht_get_byindex(struct hashtable *t, unsigned int index)
493 {
494 if (index >= t->size)
495 return -1;
496 if (t->table[index] == NULL || t->table[index] == ht_free_element)
497 return 0;
498 return 1;
499 }
500
501 /* Returns the hash table as an array of paris of key/value void* pointers.
502 * The array is allocated with malloc() and should be freed when no
503 * longer useful. The key and value pointers should not be freed or
504 * altered in any way, they will be handled by the hash table structure.
505 *
506 * This function is mainly useful to sort the hashtable's content
507 * without to alter the hashtable itself.
508 *
509 * Returns NULL on out of memory. */
ht_get_array(struct hashtable * t)510 void **ht_get_array(struct hashtable *t)
511 {
512 int used = ht_used(t);
513 void **table, **tptr;
514 long idx;
515
516 if ((table = (void**) malloc(sizeof(void*)*(used*2))) == NULL)
517 return NULL;
518 tptr = table;
519 for (idx = 0; ;idx++) {
520 int type = ht_get_byindex(t, idx);
521 if (type == -1) break;
522 if (type == 0) continue;
523 *tptr++ = ht_key(t, idx);
524 *tptr++ = ht_value(t, idx);
525 }
526 return table;
527 }
528 /* ------------------------- private functions ------------------------------ */
529
530 /* Expand the hash table if needed */
ht_expand_if_needed(struct hashtable * t)531 static int ht_expand_if_needed(struct hashtable *t)
532 {
533 /* If the hash table is empty expand it to the intial size,
534 * if the table is half-full redobule its size. */
535 if (t->size == 0)
536 return ht_expand(t, HT_INITIAL_SIZE);
537 if (t->size <= t->used*2)
538 return ht_expand(t, t->size * 2);
539 return HT_OK;
540 }
541
542 /* Our hash table capability is a power of two */
next_power(unsigned int size)543 static unsigned int next_power(unsigned int size)
544 {
545 unsigned int i = 256;
546
547 if (size >= 2147483648U)
548 return 2147483648U;
549 while(1) {
550 if (i >= size)
551 return i;
552 i *= 2;
553 }
554 }
555
556 /* the insert function to add elements out of ht expansion */
ht_insert(struct hashtable * t,void * key,unsigned int * avail_index)557 static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index)
558 {
559 int ret;
560 u_int32_t h;
561
562 /* Expand the hashtable if needed */
563 if ((ret = ht_expand_if_needed(t)) != HT_OK)
564 return ret;
565
566 /* Try using the first hash functions */
567 h = t->hashf(key) & t->sizemask;
568 /* this handles the removed elements */
569 if (!t->table[h] || t->table[h] == ht_free_element) {
570 *avail_index = h;
571 return HT_OK;
572 }
573 t->collisions++;
574 if (t->key_compare(key, t->table[h]->key))
575 return HT_BUSY;
576
577 while(1) {
578 h = (h+1) & t->sizemask;
579 /* this handles the removed elements */
580 if (!t->table[h] || t->table[h] == ht_free_element) {
581 *avail_index = h;
582 return HT_OK;
583 }
584 t->collisions++;
585 if (t->key_compare(key, t->table[h]->key))
586 return HT_BUSY;
587 }
588 }
589
590 /* ------------------------- provided destructors --------------------------- */
591
592 /* destructor for heap allocated keys/values */
ht_destructor_free(void * obj)593 void ht_destructor_free(void *obj)
594 {
595 free(obj);
596 }
597
598 /* ------------------------- provided comparators --------------------------- */
599
600 /* default key_compare method */
ht_compare_ptr(void * key1,void * key2)601 int ht_compare_ptr(void *key1, void *key2)
602 {
603 return (key1 == key2);
604 }
605
606 /* key compare for nul-terminated strings */
ht_compare_string(void * key1,void * key2)607 int ht_compare_string(void *key1, void *key2)
608 {
609 return (strcmp(key1, key2) == 0) ? 1 : 0;
610 }
611
612 /* -------------------- hash functions for common data types --------------- */
613
614 /* We make this global to allow hash function randomization,
615 * as security measure against attacker-induced worst case behaviuor.
616 *
617 * Note that being H_i the strong hash function with init value of i
618 * and H_i' the same hash function with init value of i' than:
619 *
620 * if H_i(StringOne) is equal to H_i(CollidingStringTwo)
621 *
622 * it is NOT true that
623 *
624 * H_i'(StringOne) is equal to H_i''(CollidingStringTwo)
625 */
626 static u_int32_t strong_hash_init_val = 0xF937A21;
627
628 /* Set the secret initialization value. It should be set from
629 * a secure PRNG like /dev/urandom at program initialization time */
ht_set_strong_hash_init_val(u_int32_t secret)630 void ht_set_strong_hash_init_val(u_int32_t secret)
631 {
632 strong_hash_init_val = secret;
633 }
634
635 /* __ht_strong_hash wrapper that mix a user-provided initval
636 * with the global strong_hash_init_val. __ht_strong_hash is
637 * even exported directly. */
ht_strong_hash(u_int8_t * k,u_int32_t length,u_int32_t initval)638 u_int32_t ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
639 {
640 return __ht_strong_hash(k, length, initval^strong_hash_init_val);
641 }
642
643 /* Hash function suitable for C strings and other data types using
644 * a 0-byte as terminator */
ht_hash_string(void * key)645 u_int32_t ht_hash_string(void *key)
646 {
647 return __ht_strong_hash(key, strlen(key), strong_hash_init_val);
648 }
649
650 /* This one is to hash the value of the pointer itself. */
ht_hash_pointer(void * key)651 u_int32_t ht_hash_pointer(void *key)
652 {
653 return __ht_strong_hash((void*)&key, sizeof(void*), strong_hash_init_val);
654 }
655