1 /* hashmap - a rehashing hash map
2  * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
3  *
4  * The MIT License
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <stdlib.h>
26 #include <string.h>
27 #include <errno.h>
28 #include <stdio.h>
29 #include <wchar.h>
30 
31 #include "mba/msgno.h"
32 #include "mba/iterator.h"
33 #include "mba/allocator.h"
34 #include "mba/suba.h"
35 #include "mba/hashmap.h"
36 
37 
38 #define HAL(h) ((struct allocator *)((h)->al ? (char *)(h) - (ptrdiff_t)(h)->al : NULL))
39 
40 #define TABLE_SIZES_SIZE 20
41 #define DELETED 1
42 
43 const int table_sizes[] = {
44 	0,
45 	17,      31,      61,      131,
46 	257,     509,     1021,    2053,
47 	4099,    8191,    16381,   32771,
48 	65537,   131071,  262147,  524287,
49 	1048573, 2097143, 4194301, 8388617
50 };
51 
52 struct entry {
53 	unsigned long hash;
54 	ref_t key;
55 	ref_t data;
56 };
57 
58 unsigned long
hash_str(const void * str,void * context)59 hash_str(const void *str, void *context)
60 {
61 	unsigned long h = 5381;
62 	const unsigned char *s = (const unsigned char *)str;
63 	int c;
64 
65 	if (context) { /* then str is ref and context is suba */
66 		s = (const unsigned char *)context + (size_t)str;
67 	}
68 
69 	while ((c = *s++)) {
70 		h = ((h << 5) + h) + c;
71 	}
72 
73 	return h;
74 }
75 unsigned long
hash_wcs(const void * wcs,void * context)76 hash_wcs(const void *wcs, void *context)
77 {
78 	unsigned long h = 5381;
79 	const wchar_t *s = (const wchar_t *)wcs;
80 	wint_t c;
81 
82 	if (context) { /* then wcs is ref and context is suba */
83 		s = (const wchar_t *)context + (size_t)wcs;
84 	}
85 
86 	while ((c = *s++)) {
87 		h = ((h << 5) + h) + c;
88 	}
89 
90 	return h;
91 }
92 static unsigned long
hash_ptr(const void * ptr,void * context)93 hash_ptr(const void *ptr, void *context)
94 {
95 	unsigned long h = (unsigned long)ptr;
96 
97 	if (context) {
98 		h = (unsigned long)((char *)context + (size_t)ptr);
99 	}
100 
101 	return h;
102 }
103 int
cmp_str(const void * object1,const void * object2,void * context)104 cmp_str(const void *object1, const void *object2, void *context)
105 {
106 	const unsigned char *s1 = object1;
107 	const unsigned char *s2 = object2;
108 	const unsigned char *slim;
109 
110 	if (context) {
111 		s1 = (const unsigned char *)context + (size_t)object1;
112 		s2 = (const unsigned char *)context + (size_t)object2;
113 		slim = (const unsigned char *)context + ((struct allocator *)context)->size;
114 	} else {
115 		slim = (const unsigned char *)-1;
116 	}
117 
118 	while (s1 < slim && s2 < slim) {
119 		if (*s1 != *s2) {
120 			return *s1 < *s2 ? -1 : 1;
121 		} else if (*s1 == '\0') {
122 			return 0;
123 		}
124 		s1++;
125 		s2++;
126 	}
127 
128 	return s2 < slim ? -1 : 1;
129 }
130 int
cmp_wcs(const void * object1,const void * object2,void * context)131 cmp_wcs(const void *object1, const void *object2, void *context)
132 {
133 	const wchar_t *s1 = object1;
134 	const wchar_t *s2 = object2;
135 	const wchar_t *slim;
136 
137 	if (context) {
138 		s1 = (const wchar_t *)((char *)context + (size_t)object1);
139 		s2 = (const wchar_t *)((char *)context + (size_t)object2);
140 		slim = (const wchar_t *)((char *)context + ((struct allocator *)context)->size);
141 	} else {
142 		slim = (const wchar_t *)-1;
143 	}
144 
145 	while (s1 < slim && s2 < slim) {
146 		if (*s1 != *s2) {
147 			return *s1 < *s2 ? -1 : 1;
148 		} else if (*s1 == '\0') {
149 			return 0;
150 		}
151 		s1++;
152 		s2++;
153 	}
154 
155 	return s2 < slim ? -1 : 1;
156 }
157 
158 int
hashmap_init(struct hashmap * h,unsigned int load_factor,hash_fn hash,cmp_fn cmp,void * context,struct allocator * al)159 hashmap_init(struct hashmap *h,
160 		unsigned int load_factor,
161 		hash_fn hash,
162 		cmp_fn cmp,
163 		void *context,
164 		struct allocator *al)
165 {
166 	memset(h, 0, sizeof *h);
167 	h->table_size_index = 0;
168 	h->hash = ALREF(al, hash);
169 	h->cmp = ALREF(al, cmp);
170 	h->context = ALREF(al, context);
171 	h->size = 0;
172 	if (load_factor == 0 || load_factor > 100) {
173 		h->load_factor_high = 75;
174 		h->load_factor_low = 18;
175 	} else {
176 		h->load_factor_high = load_factor;
177 		h->load_factor_low = load_factor / 4;
178 	}
179 	if (al && al->tail) { /* al is a suba allocator */
180 		h->al = (char *)h - (char *)al;
181 	}
182 	h->table = 0;
183 
184 	return 0;
185 }
186 int
hashmap_deinit(struct hashmap * h,del_fn key_del,del_fn data_del,void * context)187 hashmap_deinit(struct hashmap *h, del_fn key_del, del_fn data_del, void *context)
188 {
189 	int ret = 0;
190 
191 	if (h) {
192 		struct allocator *al = HAL(h);
193 		ret += hashmap_clear(h, key_del, data_del, context);
194 		ret += allocator_free(al, ALADR(al, h->table));
195 	}
196 
197 	if (ret) {
198 		AMSG("");
199 		return -1;
200 	}
201 
202 	return 0;
203 }
204 struct hashmap *
hashmap_new(hash_fn hash,cmp_fn cmp,void * context,struct allocator * al)205 hashmap_new(hash_fn hash, cmp_fn cmp, void *context, struct allocator *al)
206 {
207 	struct hashmap *h;
208 
209 	if ((h = allocator_alloc(al, sizeof *h, 0)) == NULL ||
210 				hashmap_init(h, 75, hash, cmp, context, al) == -1) {
211 		AMSG("");
212 		allocator_free(al, h);
213 		return NULL;
214 	}
215 
216 	return h;
217 }
218 int
hashmap_del(struct hashmap * h,del_fn key_del,del_fn data_del,void * context)219 hashmap_del(struct hashmap *h, del_fn key_del, del_fn data_del, void *context)
220 {
221 	int ret = 0;
222 
223 	if (h) {
224 		ret += hashmap_deinit(h, key_del, data_del, context);
225 		ret += allocator_free(HAL(h), h);
226 	}
227 
228 	if (ret) {
229 		AMSG("");
230 		return -1;
231 	}
232 
233 	return 0;
234 }
235 int
hashmap_clear(struct hashmap * h,del_fn key_del,del_fn data_del,void * context)236 hashmap_clear(struct hashmap *h, del_fn key_del, del_fn data_del, void *context)
237 {
238 	int ret = 0;
239 
240 	if (h->table) {
241 		int idx, table_size;
242 		struct allocator *al = HAL(h);
243 		struct entry *table = ALADR(al, h->table);
244 
245 		table_size = table_sizes[h->table_size_index];
246 
247 		for (idx = 0; idx < table_size; idx++) {
248 			struct entry *e = table + idx;
249 
250 			if (e->key > DELETED) {
251 				void *k = ALADR(al, e->key);
252 
253 				if (key_del)
254 					ret += key_del(context, k);
255 				if (data_del)
256 					ret += data_del(context, ALADR(al, e->data));
257 			}
258 		}
259 
260 		ret += allocator_free(al, table);
261 
262 		h->table_size_index = 0;
263 		h->size = 0;
264 		h->table = 0;
265 	}
266 
267 	if (ret) {
268 		AMSG("");
269 		return -1;
270 	}
271 
272 	return 0;
273 }
274 int
hashmap_clean(struct hashmap * h)275 hashmap_clean(struct hashmap *h)
276 {
277 	(void)h; /* TODO */
278 	return 0;
279 }
280 static int
hashmap_resize(struct hashmap * h,int new_table_size_index)281 hashmap_resize(struct hashmap *h, int new_table_size_index)
282 {
283 	struct entry *old_table, *oe;
284 	int old_table_size, table_size, i;
285 	struct allocator *al = HAL(h);
286 
287 	if ((oe = allocator_alloc(al, table_sizes[new_table_size_index] * sizeof *oe, 1)) == NULL) {
288 		AMSG("");
289 		return -1;
290 	}
291 
292 	old_table_size = table_sizes[h->table_size_index];
293 	old_table = ALADR(al, h->table);
294 	h->table_size_index = new_table_size_index;
295 	h->table = ALREF(al, oe);
296 
297 	if (old_table == NULL) {
298 		return 0;
299 	}
300 
301 	table_size = table_sizes[h->table_size_index];
302 	for (i = 0; i < old_table_size; i++) {
303 		oe = old_table + i;
304 
305 		if (oe->key > DELETED) {
306 			int idx, rehash_adv;
307 			struct entry *e;
308 
309 			idx = oe->hash % table_size;
310 			rehash_adv = 1 + oe->hash % (table_size - 2);
311 
312 			for ( ;; ) {
313 				e = (struct entry *)ALADR(al, h->table) + idx;
314 
315 				if (e->key == 0) {
316 					*e = *oe;
317 					break;
318 				}
319 
320 				idx = (idx + rehash_adv) % table_size;
321 			}
322 		}
323 	}
324 
325 	if (allocator_free(al, old_table) == -1) {
326 		AMSG("");
327 		return -1;
328 	}
329 
330 	return 0;
331 }
332 int
hashmap_put(struct hashmap * h,void * key,void * data)333 hashmap_put(struct hashmap *h, void *key, void *data)
334 {
335 	unsigned long hash;
336 	int idx, rehash_adv, count, table_size;
337 	struct entry *e;
338 	struct allocator *al = HAL(h);
339 
340 	if (h->table_size_index == 0 ||
341 				((h->size * 100 / table_sizes[h->table_size_index]) >= h->load_factor_high &&
342 				h->table_size_index < TABLE_SIZES_SIZE)) {
343 		if (hashmap_resize(h, h->table_size_index + 1) == -1) {
344 			AMSG("");
345 			return -1;
346 		}
347 	}
348 
349 	hash = h->hash ? ((hash_fn)ALADR(al, h->hash))(key, ALADR(al, h->context)) : hash_ptr(key, ALADR(al, h->context));
350 	table_size = table_sizes[h->table_size_index];
351 	idx = hash % table_size;
352 	rehash_adv = 1 + hash % (table_size - 2);
353 
354 	count = table_size;
355 	do {
356 		e = (struct entry *)ALADR(al, h->table) + idx;
357 		if (e->key <= DELETED) {
358 			break;
359 		} else {
360 			void *k = ALADR(al, e->key);
361 
362 			if (hash == e->hash && (h->cmp ? ((cmp_fn)ALADR(al, h->cmp))(key, k, ALADR(al, h->context)) == 0 : key == k)) {
363 				errno = EEXIST;
364 				PMNO(errno);
365 				return -1;
366 			}
367 		}
368 		idx = (idx + rehash_adv) % table_size;
369 	} while(--count);
370 
371 	if (!count) {
372 		errno = ENOSPC;
373 		PMNO(errno);
374 		return -1;
375 	}
376 
377 	e->hash = hash;
378 	e->key = ALREF(al, key);
379 	e->data = ALREF(al, data);
380 	h->size++;
381 
382 	return 0;
383 }
384 void *
hashmap_get(const struct hashmap * h,const void * key)385 hashmap_get(const struct hashmap *h, const void *key)
386 {
387 	if (h->table) {
388 		unsigned long hash;
389 		int idx, rehash_adv, count, table_size;
390 		struct entry *e;
391 		struct allocator *al = HAL(h);
392 
393 		hash = h->hash ? ((hash_fn)ALADR(al, h->hash))(key, ALADR(al, h->context)) : hash_ptr(key, ALADR(al, h->context));
394 		table_size = table_sizes[h->table_size_index];
395 		idx = hash % table_size;
396 		rehash_adv = 1 + hash % (table_size - 2);
397 
398 		count = table_size;
399 		do {
400 			e = (struct entry *)ALADR(al, h->table) + idx;
401 			if (e->key == 0) {
402 				break;
403 			} else if (e->key != DELETED) {
404 				void *k = ALADR(al, e->key);
405 
406 				if (hash == e->hash && (h->cmp ? ((cmp_fn)ALADR(al, h->cmp))(key, k, ALADR(al, h->context)) == 0 : key == k)) {
407 					return ALADR(al, e->data);
408 				}
409 			}
410 			idx = (idx + rehash_adv) % table_size;
411 		} while(count--);
412 	}
413 
414 	return NULL;
415 }
416 int
hashmap_remove(struct hashmap * h,void ** key,void ** data)417 hashmap_remove(struct hashmap *h, void **key, void **data)
418 {
419 	if (h->table) {
420 		unsigned long hash;
421 		int idx, rehash_adv, count, table_size;
422 		struct entry *e;
423 		struct allocator *al = HAL(h);
424 
425 		if (h->table_size_index > 1 &&
426 					(h->size * 100 / table_sizes[h->table_size_index]) < h->load_factor_low) {
427 			if (hashmap_resize(h, h->table_size_index - 1) == -1) {
428 				AMSG("");
429 				return -1;
430 			}
431 		}
432 
433 		hash = h->hash ? ((hash_fn)ALADR(al, h->hash))(*key, ALADR(al, h->context)) : hash_ptr(*key, ALADR(al, h->context));
434 		table_size = table_sizes[h->table_size_index];
435 		idx = hash % table_size;
436 		rehash_adv = 1 + hash % (table_size - 2);
437 
438 		count = table_size;
439 		do {
440 			e = (struct entry *)ALADR(al, h->table) + idx;
441 			if (e->key == 0) {
442 				break;
443 			} else if (e->key != DELETED) {
444 				void *k = ALADR(al, e->key);
445 
446 				if (hash == e->hash && (h->cmp ? ((cmp_fn)ALADR(al, h->cmp))(*key, k, ALADR(al, h->context)) == 0 : *key == k)) {
447 					*key = k;
448 					*data = ALADR(al, e->data);
449 					e->key = DELETED;
450 					h->size--;
451 					return 0;
452 				}
453 			}
454 			idx = (idx + rehash_adv) % table_size;
455 		} while(count--);
456 	}
457 
458 	*data = NULL;
459 
460 	errno = ENOENT;
461 	PMNO(errno);
462 
463 	return -1;
464 }
465 int
hashmap_is_empty(struct hashmap * h)466 hashmap_is_empty(struct hashmap *h)
467 {
468 	return h == NULL || h->size == 0;
469 }
470 unsigned int
hashmap_size(struct hashmap * h)471 hashmap_size(struct hashmap *h)
472 {
473 	return h ? h->size : 0;
474 }
475 void
hashmap_iterate(void * h,iter_t * iter)476 hashmap_iterate(void *h, iter_t *iter)
477 {
478 	memset(iter, 0, sizeof *iter);
479 	(void)h;
480 }
481 void *
hashmap_next(void * h0,iter_t * iter)482 hashmap_next(void *h0, iter_t *iter)
483 {
484 	struct hashmap *h = h0;
485 
486 	if (h->table) {
487 		int idx, table_size;
488 		struct entry *e;
489 		struct allocator *al = HAL(h);
490 
491 		table_size = table_sizes[h->table_size_index];
492 
493 		idx = iter->i2;
494 		while (idx < table_size) {
495 			e = (struct entry *)ALADR(al, h->table) + idx++;
496 			if (e->key > DELETED) {
497 				iter->i2 = idx;
498 				return ALADR(al, e->key);
499 			}
500 		}
501 	}
502 
503 	return NULL;
504 }
505 
506