1 /*
2 * Copyright (c) 2012 Tim Ruehsen
3 * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4 *
5 * This file is part of libwget.
6 *
7 * Libwget is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * Libwget is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with libwget. If not, see <https://www.gnu.org/licenses/>.
19 *
20 *
21 * hashmap routines
22 *
23 * Changelog
24 * 06.11.2012 Tim Ruehsen created
25 *
26 */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdarg.h>
34
35 #include <wget.h>
36 #include "private.h"
37
38 typedef struct entry_st entry_t;
39
40 struct entry_st {
41 void
42 *key,
43 *value;
44 entry_t
45 *next;
46 unsigned int
47 hash;
48 };
49
50 struct wget_hashmap_st {
51 wget_hashmap_hash_fn
52 *hash; // hash function
53 wget_hashmap_compare_fn
54 *cmp; // compare function
55 wget_hashmap_key_destructor
56 *key_destructor; // key destructor function
57 wget_hashmap_value_destructor
58 *value_destructor; // value destructor function
59 entry_t
60 **entry; // pointer to array of pointers to entries
61 int
62 max, // allocated entries
63 cur, // number of entries in use
64 threshold; // resize when max reaches threshold
65 float
66 resize_factor, // resize strategy: >0: resize = off * max, <0: resize = max + (-off)
67 load_factor;
68 };
69
70 struct wget_hashmap_iterator_st {
71 struct wget_hashmap_st
72 *h;
73 entry_t
74 *entry;
75 int
76 pos;
77 };
78
79 /**
80 * \file
81 * \brief Hashmap functions
82 * \defgroup libwget-hashmap Hashmap functions
83 * @{
84 *
85 * Hashmaps are key/value stores that perform at O(1) for insertion, searching and removing.
86 */
87
88 /**
89 * \param[in] h Hashmap
90 * \return New iterator instance for \p h
91 *
92 * Creates a hashmap iterator for \p h.
93 */
wget_hashmap_iterator_alloc(wget_hashmap * h)94 wget_hashmap_iterator *wget_hashmap_iterator_alloc(wget_hashmap *h)
95 {
96 struct wget_hashmap_iterator_st *iter = wget_calloc(1, sizeof(struct wget_hashmap_iterator_st));
97
98 if (iter)
99 iter->h = h;
100
101 return (wget_hashmap_iterator *) iter;
102 }
103
104 /**
105 * \param[in] iter Hashmap iterator
106 *
107 * Free the given iterator \p iter.
108 */
wget_hashmap_iterator_free(wget_hashmap_iterator ** iter)109 void wget_hashmap_iterator_free(wget_hashmap_iterator **iter)
110 {
111 if (iter)
112 xfree(*iter);
113 }
114
115 /**
116 * \param[in] iter Hashmap iterator
117 * \param[out] value Pointer to the value belonging to the returned key
118 * \return Pointer to the key or NULL if no more elements left
119 *
120 * Returns the next key / value in the hashmap. If all key/value pairs have been
121 * iterated over the function returns NULL and \p value is untouched.
122 *
123 * When iterating over a hashmap, the order of returned key/value pairs is not defined.
124 */
wget_hashmap_iterator_next(wget_hashmap_iterator * iter,void ** value)125 void *wget_hashmap_iterator_next(wget_hashmap_iterator *iter, void **value)
126 {
127 struct wget_hashmap_iterator_st *_iter = (struct wget_hashmap_iterator_st *) iter;
128 struct wget_hashmap_st *h = _iter->h;
129
130 if (_iter->entry) {
131 if ((_iter->entry = _iter->entry->next)) {
132 found:
133 if (value)
134 *value = _iter->entry->value;
135 return _iter->entry->key;
136 }
137
138 _iter->pos++;
139 }
140
141 if (!_iter->entry && h) {
142 for (; _iter->pos < h->max; _iter->pos++) {
143 if (h->entry[_iter->pos]) {
144 _iter->entry = h->entry[_iter->pos];
145 goto found;
146 }
147 }
148 }
149
150 return NULL;
151 }
152
153 /**
154 * \param[in] max Initial number of pre-allocated entries
155 * \param[in] hash Hash function to build hashes from elements
156 * \param[in] cmp Comparison function used to find elements
157 * \return New hashmap instance
158 *
159 * Create a new hashmap instance with initial size \p max.
160 * It should be free'd after use with wget_hashmap_free().
161 *
162 * Before the first insertion of an element, \p hash and \p cmp must be set.
163 * So if you use %NULL values here, you have to call wget_hashmap_setcmpfunc() and/or
164 * wget_hashmap_hashcmpfunc() with appropriate function pointers. No doing so will result
165 * in undefined behavior (likely you'll see a segmentation fault).
166 */
wget_hashmap_create(int max,wget_hashmap_hash_fn * hash,wget_hashmap_compare_fn * cmp)167 wget_hashmap *wget_hashmap_create(int max, wget_hashmap_hash_fn *hash, wget_hashmap_compare_fn *cmp)
168 {
169 wget_hashmap *h = wget_malloc(sizeof(wget_hashmap));
170
171 if (!h)
172 return NULL;
173
174 h->entry = wget_calloc(max, sizeof(entry_t *));
175
176 if (!h->entry) {
177 xfree(h);
178 return NULL;
179 }
180
181 h->max = max;
182 h->cur = 0;
183 h->resize_factor = 2;
184 h->hash = hash;
185 h->cmp = cmp;
186 h->key_destructor = free;
187 h->value_destructor = free;
188 h->load_factor = 0.75;
189 h->threshold = (int)(max * h->load_factor);
190
191 return h;
192 }
193
194 WGET_GCC_NONNULL_ALL
hashmap_find_entry(const wget_hashmap * h,const char * key,unsigned int hash)195 static entry_t * hashmap_find_entry(const wget_hashmap *h, const char *key, unsigned int hash)
196 {
197 for (entry_t * e = h->entry[hash % h->max]; e; e = e->next) {
198 if (hash == e->hash && (key == e->key || !h->cmp(key, e->key))) {
199 return e;
200 }
201 }
202
203 return NULL;
204 }
205
206 WGET_GCC_NONNULL_ALL
hashmap_rehash(wget_hashmap * h,entry_t ** new_entry,int newmax,int recalc_hash)207 static void hashmap_rehash(wget_hashmap *h, entry_t **new_entry, int newmax, int recalc_hash)
208 {
209 entry_t *entry, *next;
210 int cur = h->cur;
211
212 for (int it = 0; it < h->max && cur; it++) {
213 for (entry = h->entry[it]; entry; entry = next) {
214 next = entry->next;
215
216 // now move entry from 'h' to 'new_hashmap'
217 if (recalc_hash)
218 entry->hash = h->hash(entry->key);
219 int pos = entry->hash % newmax;
220 entry->next = new_entry[pos];
221 new_entry[pos] = entry;
222
223 cur--;
224 }
225 }
226
227 xfree(h->entry);
228 h->entry = new_entry;
229 h->max = newmax;
230 h->threshold = (int)(newmax * h->load_factor);
231 }
232
233 WGET_GCC_NONNULL((1,3))
hashmap_new_entry(wget_hashmap * h,unsigned int hash,const char * key,const char * value)234 static int hashmap_new_entry(wget_hashmap *h, unsigned int hash, const char *key, const char *value)
235 {
236 entry_t *entry;
237
238 if (!(entry = wget_malloc(sizeof(entry_t))))
239 return WGET_E_MEMORY;
240
241 int pos = hash % h->max;
242
243 entry->key = (void *)key;
244 entry->value = (void *)value;
245 entry->hash = hash;
246 entry->next = h->entry[pos];
247 h->entry[pos] = entry;
248
249 if (++h->cur >= h->threshold) {
250 int newsize = (int) (h->max * h->resize_factor);
251
252 if (newsize > 0) {
253 entry_t **new_entry;
254
255 if (!(new_entry = wget_calloc(newsize, sizeof(entry_t *)))) {
256 h->cur--;
257 xfree(h->entry[pos]);
258 return WGET_E_MEMORY;
259 }
260
261 // h->cur is always > 0 here, so we don't need a check
262 hashmap_rehash(h, new_entry, newsize, 0);
263 }
264 }
265
266 return WGET_E_SUCCESS;
267 }
268
269 /**
270 * \param[in] h Hashmap to put data into
271 * \param[in] key Key to insert into \p h
272 * \param[in] value Value to insert into \p h
273 * \return 0 if inserted a new entry, 1 if entry existed, WGET_E_MEMORY if internal allocation failed
274 *
275 * Insert a key/value pair into hashmap \p h.
276 *
277 * \p key and \p value are *not* cloned, the hashmap takes 'ownership' of both.
278 *
279 * If \p key already exists and the pointer values the old and the new key differ,
280 * the old key will be destroyed by calling the key destructor function (default is free()).
281 *
282 * To realize a hashset (just keys without values), \p value may be %NULL.
283 *
284 * Neither \p h nor \p key must be %NULL, else the return value will always be 0.
285 */
wget_hashmap_put(wget_hashmap * h,const void * key,const void * value)286 int wget_hashmap_put(wget_hashmap *h, const void *key, const void *value)
287 {
288 if (h && key) {
289 entry_t *entry;
290 unsigned int hash = h->hash(key);
291 int rc;
292
293 if ((entry = hashmap_find_entry(h, key, hash))) {
294 if (entry->key != key && entry->key != value) {
295 if (h->key_destructor)
296 h->key_destructor(entry->key);
297 if (entry->key == entry->value)
298 entry->value = NULL;
299 }
300 if (entry->value != value && entry->value != key) {
301 if (h->value_destructor)
302 h->value_destructor(entry->value);
303 }
304
305 entry->key = (void *) key;
306 entry->value = (void *) value;
307
308 return 1;
309 }
310
311 // a new entry
312 if ((rc = hashmap_new_entry(h, hash, key, value)) < 0)
313 return rc;
314 }
315
316 return 0;
317 }
318
319 /**
320 * \param[in] h Hashmap
321 * \param[in] key Key to search for
322 * \return 1 if \p key has been found, 0 if not found
323 *
324 * Check if \p key exists in \p h.
325 */
wget_hashmap_contains(const wget_hashmap * h,const void * key)326 int wget_hashmap_contains(const wget_hashmap *h, const void *key)
327 {
328 return wget_hashmap_get(h, key, NULL);
329 }
330
331 /**
332 * \param[in] h Hashmap
333 * \param[in] key Key to search for
334 * \param[out] value Value to be returned
335 * \return 1 if \p key has been found, 0 if not found
336 *
337 * Get the value for a given key.
338 *
339 * Neither \p h nor \p key must be %NULL.
340 */
341 #undef wget_hashmap_get
wget_hashmap_get(const wget_hashmap * h,const void * key,void ** value)342 int wget_hashmap_get(const wget_hashmap *h, const void *key, void **value)
343 {
344 if (h && key) {
345 entry_t *entry;
346
347 if ((entry = hashmap_find_entry(h, key, h->hash(key)))) {
348 if (value)
349 *value = entry->value;
350 return 1;
351 }
352 }
353
354 return 0;
355 }
356
357 WGET_GCC_NONNULL_ALL
hashmap_remove_entry(wget_hashmap * h,const char * key,int free_kv)358 static int hashmap_remove_entry(wget_hashmap *h, const char *key, int free_kv)
359 {
360 entry_t *entry, *next, *prev = NULL;
361 unsigned int hash = h->hash(key);
362 int pos = hash % h->max;
363
364 for (entry = h->entry[pos]; entry; prev = entry, entry = next) {
365 next = entry->next;
366
367 if (hash == entry->hash && (key == entry->key || !h->cmp(key, entry->key))) {
368 if (prev)
369 prev->next = next;
370 else
371 h->entry[pos] = next;
372
373 if (free_kv) {
374 if (h->key_destructor)
375 h->key_destructor(entry->key);
376 if (entry->value != entry->key) {
377 if (h->value_destructor)
378 h->value_destructor(entry->value);
379 }
380 entry->key = NULL;
381 entry->value = NULL;
382 }
383 xfree(entry);
384
385 h->cur--;
386 return 1;
387 }
388 }
389
390 return 0;
391 }
392
393 /**
394 * \param[in] h Hashmap
395 * \param[in] key Key to be removed
396 * \return 1 if \p key has been removed, 0 if not found
397 *
398 * Remove \p key from hashmap \p h.
399 *
400 * If \p key is found, the key and value destructor functions are called
401 * when removing the entry from the hashmap.
402 */
wget_hashmap_remove(wget_hashmap * h,const void * key)403 int wget_hashmap_remove(wget_hashmap *h, const void *key)
404 {
405 if (h && key)
406 return hashmap_remove_entry(h, key, 1);
407 else
408 return 0;
409 }
410
411 /**
412 * \param[in] h Hashmap
413 * \param[in] key Key to be removed
414 * \return 1 if \p key has been removed, 0 if not found
415 *
416 * Remove \p key from hashmap \p h.
417 *
418 * Key and value destructor functions are *not* called when removing the entry from the hashmap.
419 */
wget_hashmap_remove_nofree(wget_hashmap * h,const void * key)420 int wget_hashmap_remove_nofree(wget_hashmap *h, const void *key)
421 {
422 if (h && key)
423 return hashmap_remove_entry(h, key, 0);
424 else
425 return 0;
426 }
427
428 /**
429 * \param[in] h Hashmap to be free'd
430 *
431 * Remove all entries from hashmap \p h and free the hashmap instance.
432 *
433 * Key and value destructor functions are called for each entry in the hashmap.
434 */
wget_hashmap_free(wget_hashmap ** h)435 void wget_hashmap_free(wget_hashmap **h)
436 {
437 if (h && *h) {
438 wget_hashmap_clear(*h);
439 xfree((*h)->entry);
440 xfree(*h);
441 }
442 }
443
444 /**
445 * \param[in] h Hashmap to be cleared
446 *
447 * Remove all entries from hashmap \p h.
448 *
449 * Key and value destructor functions are called for each entry in the hashmap.
450 */
wget_hashmap_clear(wget_hashmap * h)451 void wget_hashmap_clear(wget_hashmap *h)
452 {
453 if (h) {
454 entry_t *entry, *next;
455 int it, cur = h->cur;
456
457 for (it = 0; it < h->max && cur; it++) {
458 for (entry = h->entry[it]; entry; entry = next) {
459 next = entry->next;
460
461 if (h->key_destructor)
462 h->key_destructor(entry->key);
463
464 // free value if different from key
465 if (entry->value != entry->key && h->value_destructor)
466 h->value_destructor(entry->value);
467
468 entry->key = NULL;
469 entry->value = NULL;
470
471 xfree(entry);
472 cur--;
473 }
474 h->entry[it] = NULL;
475 }
476 h->cur = 0;
477 }
478 }
479
480 /**
481 * \param[in] h Hashmap
482 * \return Number of entries in hashmap \p h
483 *
484 * Return the number of entries in the hashmap \p h.
485 */
wget_hashmap_size(const wget_hashmap * h)486 int wget_hashmap_size(const wget_hashmap *h)
487 {
488 return h ? h->cur : 0;
489 }
490
491 /**
492 * \param[in] h Hashmap
493 * \param[in] browse Function to be called for each element of \p h
494 * \param[in] ctx Context variable use as param to \p browse
495 * \return Return value of the last call to \p browse
496 *
497 * Call function \p browse for each element of hashmap \p h or until \p browse
498 * returns a value not equal to zero.
499 *
500 * \p browse is called with \p ctx and the pointer to the current element.
501 *
502 * The return value of the last call to \p browse is returned or 0 if either \p h or \p browse is %NULL.
503 */
wget_hashmap_browse(const wget_hashmap * h,wget_hashmap_browse_fn * browse,void * ctx)504 int wget_hashmap_browse(const wget_hashmap *h, wget_hashmap_browse_fn *browse, void *ctx)
505 {
506 if (h && browse) {
507 entry_t *entry;
508 int it, ret, cur = h->cur;
509
510 for (it = 0; it < h->max && cur; it++) {
511 for (entry = h->entry[it]; entry; entry = entry->next) {
512 if ((ret = browse(ctx, entry->key, entry->value)) != 0)
513 return ret;
514 cur--;
515 }
516 }
517 }
518
519 return 0;
520 }
521
522 /**
523 * \param[in] h Hashmap
524 * \param[in] cmp Comparison function used to find keys
525 *
526 * Set the comparison function.
527 */
wget_hashmap_setcmpfunc(wget_hashmap * h,wget_hashmap_compare_fn * cmp)528 void wget_hashmap_setcmpfunc(wget_hashmap *h, wget_hashmap_compare_fn *cmp)
529 {
530 if (h)
531 h->cmp = cmp;
532 }
533
534 /**
535 * \param[in] h Hashmap
536 * \param[in] hash Hash function used to hash keys
537 * \return WGET_E_SUCCESS if set successfully, else WGET_E_MEMORY or WGET_E_INVALID
538 *
539 * Set the key hash function.
540 *
541 * The keys of all entries in the hashmap will be hashed again. This includes a memory allocation, so
542 * there is a possibility of failure.
543 */
wget_hashmap_sethashfunc(wget_hashmap * h,wget_hashmap_hash_fn * hash)544 int wget_hashmap_sethashfunc(wget_hashmap *h, wget_hashmap_hash_fn *hash)
545 {
546 if (!h)
547 return WGET_E_INVALID;
548
549 if (!h->cur)
550 return WGET_E_SUCCESS; // no re-hashing needed
551
552 entry_t **new_entry = wget_calloc(h->max, sizeof(entry_t *));
553
554 if (!new_entry)
555 return WGET_E_MEMORY;
556
557 h->hash = hash;
558 hashmap_rehash(h, new_entry, h->max, 1);
559
560 return WGET_E_SUCCESS;
561 }
562
563 /**
564 * \param[in] h Hashmap
565 * \param[in] destructor Destructor function for keys
566 *
567 * Set the key destructor function.
568 *
569 * Default is free().
570 */
wget_hashmap_set_key_destructor(wget_hashmap * h,wget_hashmap_key_destructor * destructor)571 void wget_hashmap_set_key_destructor(wget_hashmap *h, wget_hashmap_key_destructor *destructor)
572 {
573 if (h)
574 h->key_destructor = destructor;
575 }
576
577 /**
578 * \param[in] h Hashmap
579 * \param[in] destructor Destructor function for values
580 *
581 * Set the value destructor function.
582 *
583 * Default is free().
584 */
wget_hashmap_set_value_destructor(wget_hashmap * h,wget_hashmap_value_destructor * destructor)585 void wget_hashmap_set_value_destructor(wget_hashmap *h, wget_hashmap_value_destructor *destructor)
586 {
587 if (h)
588 h->value_destructor = destructor;
589 }
590
591 /**
592 * \param[in] h Hashmap
593 * \param[in] factor The load factor
594 *
595 * Set the load factor function.
596 *
597 * The load factor is determines when to resize the internal memory.
598 * 0.75 means "resize if 75% or more of all slots are used".
599 *
600 * The resize strategy is set by wget_hashmap_set_growth_policy().
601 *
602 * The resize (and rehashing) occurs earliest on the next insertion of a new key.
603 *
604 * Default is 0.75.
605 */
wget_hashmap_set_load_factor(wget_hashmap * h,float factor)606 void wget_hashmap_set_load_factor(wget_hashmap *h, float factor)
607 {
608 if (h) {
609 h->load_factor = factor;
610 h->threshold = (int)(h->max * h->load_factor);
611 // rehashing occurs earliest on next put()
612 }
613 }
614
615 /**
616 * \param[in] h Hashmap
617 * \param[in] factor Hashmap growth factor
618 *
619 * Set the factor for resizing the hashmap when it's load factor is reached.
620 *
621 * The new size is 'factor * oldsize'. If the new size is less or equal 0,
622 * the involved put function will do nothing and the internal state of
623 * the hashmap will not change.
624 *
625 * Default is 2.
626 */
wget_hashmap_set_resize_factor(wget_hashmap * h,float factor)627 void wget_hashmap_set_resize_factor(wget_hashmap *h, float factor)
628 {
629 if (h)
630 h->resize_factor = factor;
631 }
632
633 /**@}*/
634