1 /*
2  * Copyright (c) 2012 Tim Ruehsen
3  * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4  *
5  * This file is part of libwget.
6  *
7  * Libwget is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * Libwget is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  *
21  * hashmap routines
22  *
23  * Changelog
24  * 06.11.2012  Tim Ruehsen  created
25  *
26  */
27 
28 #include <config.h>
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdarg.h>
34 
35 #include <wget.h>
36 #include "private.h"
37 
38 typedef struct entry_st entry_t;
39 
40 struct entry_st {
41 	void
42 		*key,
43 		*value;
44 	entry_t
45 		*next;
46 	unsigned int
47 		hash;
48 };
49 
50 struct wget_hashmap_st {
51 	wget_hashmap_hash_fn
52 		*hash; // hash function
53 	wget_hashmap_compare_fn
54 		*cmp; // compare function
55 	wget_hashmap_key_destructor
56 		*key_destructor; // key destructor function
57 	wget_hashmap_value_destructor
58 		*value_destructor; // value destructor function
59 	entry_t
60 		**entry;   // pointer to array of pointers to entries
61 	int
62 		max,       // allocated entries
63 		cur,       // number of entries in use
64 		threshold; // resize when max reaches threshold
65 	float
66 		resize_factor, // resize strategy: >0: resize = off * max, <0: resize = max + (-off)
67 		load_factor;
68 };
69 
70 struct wget_hashmap_iterator_st {
71 	struct wget_hashmap_st
72 		*h;
73 	entry_t
74 		*entry;
75 	int
76 		pos;
77 };
78 
79 /**
80  * \file
81  * \brief Hashmap functions
82  * \defgroup libwget-hashmap Hashmap functions
83  * @{
84  *
85  * Hashmaps are key/value stores that perform at O(1) for insertion, searching and removing.
86  */
87 
88 /**
89  * \param[in] h Hashmap
90  * \return New iterator instance for \p h
91  *
92  * Creates a hashmap iterator for \p h.
93  */
wget_hashmap_iterator_alloc(wget_hashmap * h)94 wget_hashmap_iterator *wget_hashmap_iterator_alloc(wget_hashmap *h)
95 {
96 	struct wget_hashmap_iterator_st *iter = wget_calloc(1, sizeof(struct wget_hashmap_iterator_st));
97 
98 	if (iter)
99 		iter->h = h;
100 
101 	return (wget_hashmap_iterator *) iter;
102 }
103 
104 /**
105  * \param[in] iter Hashmap iterator
106  *
107  * Free the given iterator \p iter.
108  */
wget_hashmap_iterator_free(wget_hashmap_iterator ** iter)109 void wget_hashmap_iterator_free(wget_hashmap_iterator **iter)
110 {
111 	if (iter)
112 		xfree(*iter);
113 }
114 
115 /**
116  * \param[in] iter Hashmap iterator
117  * \param[out] value Pointer to the value belonging to the returned key
118  * \return Pointer to the key or NULL if no more elements left
119  *
120  * Returns the next key / value in the hashmap. If all key/value pairs have been
121  * iterated over the function returns NULL and \p value is untouched.
122  *
123  * When iterating over a hashmap, the order of returned key/value pairs is not defined.
124  */
wget_hashmap_iterator_next(wget_hashmap_iterator * iter,void ** value)125 void *wget_hashmap_iterator_next(wget_hashmap_iterator *iter, void **value)
126 {
127 	struct wget_hashmap_iterator_st *_iter = (struct wget_hashmap_iterator_st *) iter;
128 	struct wget_hashmap_st *h = _iter->h;
129 
130 	if (_iter->entry) {
131 		if ((_iter->entry = _iter->entry->next)) {
132 found:
133 			if (value)
134 				*value = _iter->entry->value;
135 			return _iter->entry->key;
136 		}
137 
138 		_iter->pos++;
139 	}
140 
141 	if (!_iter->entry && h) {
142 		for (; _iter->pos < h->max; _iter->pos++) {
143 			if (h->entry[_iter->pos]) {
144 				_iter->entry = h->entry[_iter->pos];
145 				goto found;
146 			}
147 		}
148 	}
149 
150 	return NULL;
151 }
152 
153 /**
154  * \param[in] max Initial number of pre-allocated entries
155  * \param[in] hash Hash function to build hashes from elements
156  * \param[in] cmp Comparison function used to find elements
157  * \return New hashmap instance
158  *
159  * Create a new hashmap instance with initial size \p max.
160  * It should be free'd after use with wget_hashmap_free().
161  *
162  * Before the first insertion of an element, \p hash and \p cmp must be set.
163  * So if you use %NULL values here, you have to call wget_hashmap_setcmpfunc() and/or
164  * wget_hashmap_hashcmpfunc() with appropriate function pointers. No doing so will result
165  * in undefined behavior (likely you'll see a segmentation fault).
166  */
wget_hashmap_create(int max,wget_hashmap_hash_fn * hash,wget_hashmap_compare_fn * cmp)167 wget_hashmap *wget_hashmap_create(int max, wget_hashmap_hash_fn *hash, wget_hashmap_compare_fn *cmp)
168 {
169 	wget_hashmap *h = wget_malloc(sizeof(wget_hashmap));
170 
171 	if (!h)
172 		return NULL;
173 
174 	h->entry = wget_calloc(max, sizeof(entry_t *));
175 
176 	if (!h->entry) {
177 		xfree(h);
178 		return NULL;
179 	}
180 
181 	h->max = max;
182 	h->cur = 0;
183 	h->resize_factor = 2;
184 	h->hash = hash;
185 	h->cmp = cmp;
186 	h->key_destructor = free;
187 	h->value_destructor = free;
188 	h->load_factor = 0.75;
189 	h->threshold = (int)(max * h->load_factor);
190 
191 	return h;
192 }
193 
194 WGET_GCC_NONNULL_ALL
hashmap_find_entry(const wget_hashmap * h,const char * key,unsigned int hash)195 static entry_t * hashmap_find_entry(const wget_hashmap *h, const char *key, unsigned int hash)
196 {
197 	for (entry_t * e = h->entry[hash % h->max]; e; e = e->next) {
198 		if (hash == e->hash && (key == e->key || !h->cmp(key, e->key))) {
199 			return e;
200 		}
201 	}
202 
203 	return NULL;
204 }
205 
206 WGET_GCC_NONNULL_ALL
hashmap_rehash(wget_hashmap * h,entry_t ** new_entry,int newmax,int recalc_hash)207 static void hashmap_rehash(wget_hashmap *h, entry_t **new_entry, int newmax, int recalc_hash)
208 {
209 	entry_t *entry, *next;
210 	int cur = h->cur;
211 
212 	for (int it = 0; it < h->max && cur; it++) {
213 		for (entry = h->entry[it]; entry; entry = next) {
214 			next = entry->next;
215 
216 			// now move entry from 'h' to 'new_hashmap'
217 			if (recalc_hash)
218 				entry->hash = h->hash(entry->key);
219 			int pos = entry->hash % newmax;
220 			entry->next = new_entry[pos];
221 			new_entry[pos] = entry;
222 
223 			cur--;
224 		}
225 	}
226 
227 	xfree(h->entry);
228 	h->entry = new_entry;
229 	h->max = newmax;
230 	h->threshold = (int)(newmax * h->load_factor);
231 }
232 
233 WGET_GCC_NONNULL((1,3))
hashmap_new_entry(wget_hashmap * h,unsigned int hash,const char * key,const char * value)234 static int hashmap_new_entry(wget_hashmap *h, unsigned int hash, const char *key, const char *value)
235 {
236 	entry_t *entry;
237 
238 	if (!(entry = wget_malloc(sizeof(entry_t))))
239 		return WGET_E_MEMORY;
240 
241 	int pos = hash % h->max;
242 
243 	entry->key = (void *)key;
244 	entry->value = (void *)value;
245 	entry->hash = hash;
246 	entry->next = h->entry[pos];
247 	h->entry[pos] = entry;
248 
249 	if (++h->cur >= h->threshold) {
250 		int newsize = (int) (h->max * h->resize_factor);
251 
252 		if (newsize > 0) {
253 			entry_t **new_entry;
254 
255 			if (!(new_entry = wget_calloc(newsize, sizeof(entry_t *)))) {
256 				h->cur--;
257 				xfree(h->entry[pos]);
258 				return WGET_E_MEMORY;
259 			}
260 
261 			// h->cur is always > 0 here, so we don't need a check
262 			hashmap_rehash(h, new_entry, newsize, 0);
263 		}
264 	}
265 
266 	return WGET_E_SUCCESS;
267 }
268 
269 /**
270  * \param[in] h Hashmap to put data into
271  * \param[in] key Key to insert into \p h
272  * \param[in] value Value to insert into \p h
273  * \return 0 if inserted a new entry, 1 if entry existed, WGET_E_MEMORY if internal allocation failed
274  *
275  * Insert a key/value pair into hashmap \p h.
276  *
277  * \p key and \p value are *not* cloned, the hashmap takes 'ownership' of both.
278  *
279  * If \p key already exists and the pointer values the old and the new key differ,
280  * the old key will be destroyed by calling the key destructor function (default is free()).
281  *
282  * To realize a hashset (just keys without values), \p value may be %NULL.
283  *
284  * Neither \p h nor \p key must be %NULL, else the return value will always be 0.
285  */
wget_hashmap_put(wget_hashmap * h,const void * key,const void * value)286 int wget_hashmap_put(wget_hashmap *h, const void *key, const void *value)
287 {
288 	if (h && key) {
289 		entry_t *entry;
290 		unsigned int hash = h->hash(key);
291 		int rc;
292 
293 		if ((entry = hashmap_find_entry(h, key, hash))) {
294 			if (entry->key != key && entry->key != value) {
295 				if (h->key_destructor)
296 					h->key_destructor(entry->key);
297 				if (entry->key == entry->value)
298 					entry->value = NULL;
299 			}
300 			if (entry->value != value && entry->value != key) {
301 				if (h->value_destructor)
302 					h->value_destructor(entry->value);
303 			}
304 
305 			entry->key = (void *) key;
306 			entry->value = (void *) value;
307 
308 			return 1;
309 		}
310 
311 		// a new entry
312 		if ((rc = hashmap_new_entry(h, hash, key, value)) < 0)
313 			return rc;
314 	}
315 
316 	return 0;
317 }
318 
319 /**
320  * \param[in] h Hashmap
321  * \param[in] key Key to search for
322  * \return 1 if \p key has been found, 0 if not found
323  *
324  * Check if \p key exists in \p h.
325  */
wget_hashmap_contains(const wget_hashmap * h,const void * key)326 int wget_hashmap_contains(const wget_hashmap *h, const void *key)
327 {
328 	return wget_hashmap_get(h, key, NULL);
329 }
330 
331 /**
332  * \param[in] h Hashmap
333  * \param[in] key Key to search for
334  * \param[out] value Value to be returned
335  * \return 1 if \p key has been found, 0 if not found
336  *
337  * Get the value for a given key.
338  *
339  * Neither \p h nor \p key must be %NULL.
340  */
341 #undef wget_hashmap_get
wget_hashmap_get(const wget_hashmap * h,const void * key,void ** value)342 int wget_hashmap_get(const wget_hashmap *h, const void *key, void **value)
343 {
344 	if (h && key) {
345 		entry_t *entry;
346 
347 		if ((entry = hashmap_find_entry(h, key, h->hash(key)))) {
348 			if (value)
349 				*value = entry->value;
350 			return 1;
351 		}
352 	}
353 
354 	return 0;
355 }
356 
357 WGET_GCC_NONNULL_ALL
hashmap_remove_entry(wget_hashmap * h,const char * key,int free_kv)358 static int hashmap_remove_entry(wget_hashmap *h, const char *key, int free_kv)
359 {
360 	entry_t *entry, *next, *prev = NULL;
361 	unsigned int hash = h->hash(key);
362 	int pos = hash % h->max;
363 
364 	for (entry = h->entry[pos]; entry; prev = entry, entry = next) {
365 		next = entry->next;
366 
367 		if (hash == entry->hash && (key == entry->key || !h->cmp(key, entry->key))) {
368 			if (prev)
369 				prev->next = next;
370 			else
371 				h->entry[pos] = next;
372 
373 			if (free_kv) {
374 				if (h->key_destructor)
375 					h->key_destructor(entry->key);
376 				if (entry->value != entry->key) {
377 					if (h->value_destructor)
378 						h->value_destructor(entry->value);
379 				}
380 				entry->key = NULL;
381 				entry->value = NULL;
382 			}
383 			xfree(entry);
384 
385 			h->cur--;
386 			return 1;
387 		}
388 	}
389 
390 	return 0;
391 }
392 
393 /**
394  * \param[in] h Hashmap
395  * \param[in] key Key to be removed
396  * \return 1 if \p key has been removed, 0 if not found
397  *
398  * Remove \p key from hashmap \p h.
399  *
400  * If \p key is found, the key and value destructor functions are called
401  * when removing the entry from the hashmap.
402  */
wget_hashmap_remove(wget_hashmap * h,const void * key)403 int wget_hashmap_remove(wget_hashmap *h, const void *key)
404 {
405 	if (h && key)
406 		return hashmap_remove_entry(h, key, 1);
407 	else
408 		return 0;
409 }
410 
411 /**
412  * \param[in] h Hashmap
413  * \param[in] key Key to be removed
414  * \return 1 if \p key has been removed, 0 if not found
415  *
416  * Remove \p key from hashmap \p h.
417  *
418  * Key and value destructor functions are *not* called when removing the entry from the hashmap.
419  */
wget_hashmap_remove_nofree(wget_hashmap * h,const void * key)420 int wget_hashmap_remove_nofree(wget_hashmap *h, const void *key)
421 {
422 	if (h && key)
423 		return hashmap_remove_entry(h, key, 0);
424 	else
425 		return 0;
426 }
427 
428 /**
429  * \param[in] h Hashmap to be free'd
430  *
431  * Remove all entries from hashmap \p h and free the hashmap instance.
432  *
433  * Key and value destructor functions are called for each entry in the hashmap.
434  */
wget_hashmap_free(wget_hashmap ** h)435 void wget_hashmap_free(wget_hashmap **h)
436 {
437 	if (h && *h) {
438 		wget_hashmap_clear(*h);
439 		xfree((*h)->entry);
440 		xfree(*h);
441 	}
442 }
443 
444 /**
445  * \param[in] h Hashmap to be cleared
446  *
447  * Remove all entries from hashmap \p h.
448  *
449  * Key and value destructor functions are called for each entry in the hashmap.
450  */
wget_hashmap_clear(wget_hashmap * h)451 void wget_hashmap_clear(wget_hashmap *h)
452 {
453 	if (h) {
454 		entry_t *entry, *next;
455 		int it, cur = h->cur;
456 
457 		for (it = 0; it < h->max && cur; it++) {
458 			for (entry = h->entry[it]; entry; entry = next) {
459 				next = entry->next;
460 
461 				if (h->key_destructor)
462 					h->key_destructor(entry->key);
463 
464 				// free value if different from key
465 				if (entry->value != entry->key && h->value_destructor)
466 					h->value_destructor(entry->value);
467 
468 				entry->key = NULL;
469 				entry->value = NULL;
470 
471 				xfree(entry);
472 				cur--;
473 			}
474 			h->entry[it] = NULL;
475 		}
476 		h->cur = 0;
477 	}
478 }
479 
480 /**
481  * \param[in] h Hashmap
482  * \return Number of entries in hashmap \p h
483  *
484  * Return the number of entries in the hashmap \p h.
485  */
wget_hashmap_size(const wget_hashmap * h)486 int wget_hashmap_size(const wget_hashmap *h)
487 {
488 	return h ? h->cur : 0;
489 }
490 
491 /**
492  * \param[in] h Hashmap
493  * \param[in] browse Function to be called for each element of \p h
494  * \param[in] ctx Context variable use as param to \p browse
495  * \return Return value of the last call to \p browse
496  *
497  * Call function \p browse for each element of hashmap \p h or until \p browse
498  * returns a value not equal to zero.
499  *
500  * \p browse is called with \p ctx and the pointer to the current element.
501  *
502  * The return value of the last call to \p browse is returned or 0 if either \p h or \p browse is %NULL.
503  */
wget_hashmap_browse(const wget_hashmap * h,wget_hashmap_browse_fn * browse,void * ctx)504 int wget_hashmap_browse(const wget_hashmap *h, wget_hashmap_browse_fn *browse, void *ctx)
505 {
506 	if (h && browse) {
507 		entry_t *entry;
508 		int it, ret, cur = h->cur;
509 
510 		for (it = 0; it < h->max && cur; it++) {
511 			for (entry = h->entry[it]; entry; entry = entry->next) {
512 				if ((ret = browse(ctx, entry->key, entry->value)) != 0)
513 					return ret;
514 				cur--;
515 			}
516 		}
517 	}
518 
519 	return 0;
520 }
521 
522 /**
523  * \param[in] h Hashmap
524  * \param[in] cmp Comparison function used to find keys
525  *
526  * Set the comparison function.
527  */
wget_hashmap_setcmpfunc(wget_hashmap * h,wget_hashmap_compare_fn * cmp)528 void wget_hashmap_setcmpfunc(wget_hashmap *h, wget_hashmap_compare_fn *cmp)
529 {
530 	if (h)
531 		h->cmp = cmp;
532 }
533 
534 /**
535  * \param[in] h Hashmap
536  * \param[in] hash Hash function used to hash keys
537  * \return WGET_E_SUCCESS if set successfully, else WGET_E_MEMORY or WGET_E_INVALID
538  *
539  * Set the key hash function.
540  *
541  * The keys of all entries in the hashmap will be hashed again. This includes a memory allocation, so
542  * there is a possibility of failure.
543  */
wget_hashmap_sethashfunc(wget_hashmap * h,wget_hashmap_hash_fn * hash)544 int wget_hashmap_sethashfunc(wget_hashmap *h, wget_hashmap_hash_fn *hash)
545 {
546 	if (!h)
547 		return WGET_E_INVALID;
548 
549 	if (!h->cur)
550 		return WGET_E_SUCCESS; // no re-hashing needed
551 
552 	entry_t **new_entry = wget_calloc(h->max, sizeof(entry_t *));
553 
554 	if (!new_entry)
555 		return WGET_E_MEMORY;
556 
557 	h->hash = hash;
558 	hashmap_rehash(h, new_entry, h->max, 1);
559 
560 	return WGET_E_SUCCESS;
561 }
562 
563 /**
564  * \param[in] h Hashmap
565  * \param[in] destructor Destructor function for keys
566  *
567  * Set the key destructor function.
568  *
569  * Default is free().
570  */
wget_hashmap_set_key_destructor(wget_hashmap * h,wget_hashmap_key_destructor * destructor)571 void wget_hashmap_set_key_destructor(wget_hashmap *h, wget_hashmap_key_destructor *destructor)
572 {
573 	if (h)
574 		h->key_destructor = destructor;
575 }
576 
577 /**
578  * \param[in] h Hashmap
579  * \param[in] destructor Destructor function for values
580  *
581  * Set the value destructor function.
582  *
583  * Default is free().
584  */
wget_hashmap_set_value_destructor(wget_hashmap * h,wget_hashmap_value_destructor * destructor)585 void wget_hashmap_set_value_destructor(wget_hashmap *h, wget_hashmap_value_destructor *destructor)
586 {
587 	if (h)
588 		h->value_destructor = destructor;
589 }
590 
591 /**
592  * \param[in] h Hashmap
593  * \param[in] factor The load factor
594  *
595  * Set the load factor function.
596  *
597  * The load factor is determines when to resize the internal memory.
598  * 0.75 means "resize if 75% or more of all slots are used".
599  *
600  * The resize strategy is set by wget_hashmap_set_growth_policy().
601  *
602  * The resize (and rehashing) occurs earliest on the next insertion of a new key.
603  *
604  * Default is 0.75.
605  */
wget_hashmap_set_load_factor(wget_hashmap * h,float factor)606 void wget_hashmap_set_load_factor(wget_hashmap *h, float factor)
607 {
608 	if (h) {
609 		h->load_factor = factor;
610 		h->threshold = (int)(h->max * h->load_factor);
611 		// rehashing occurs earliest on next put()
612 	}
613 }
614 
615 /**
616  * \param[in] h Hashmap
617  * \param[in] factor Hashmap growth factor
618  *
619  * Set the factor for resizing the hashmap when it's load factor is reached.
620  *
621  * The new size is 'factor * oldsize'. If the new size is less or equal 0,
622  * the involved put function will do nothing and the internal state of
623  * the hashmap will not change.
624  *
625  * Default is 2.
626  */
wget_hashmap_set_resize_factor(wget_hashmap * h,float factor)627 void wget_hashmap_set_resize_factor(wget_hashmap *h, float factor)
628 {
629 	if (h)
630 		h->resize_factor = factor;
631 }
632 
633 /**@}*/
634