1 /* hash.c -- Hash table routines for Khepera
2  * Created: Thu Nov  3 20:07:29 1994 by faith@dict.org
3  * Copyright 1994-1997, 1999, 2002 Rickard E. Faith (faith@dict.org)
4  * Copyright 2002-2008 Aleksey Cheusov (vle@gmx.net)
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  * \section{Hash Table Routines}
26  *
27  * \intro Generic hash table support is provided for storing generic data
28  * associated with keys.  The hash table has prime length, with
29  * self-organizing linked lists \cite[pp.~398--9]{faith:Knuth73c} used for
30  * collision resolution. The hash table automatically grows as necessary to
31  * preserve efficient access.
32  *
33  */
34 
35 #include "maaP.h"
36 
37 typedef struct bucket {
38 	const void    *key;
39 	unsigned long hash;
40 	const void    *datum;
41 	struct bucket *next;
42 } *bucketType;
43 
44 typedef struct table {
45 #if MAA_MAGIC
46 	int           magic;
47 #endif
48 	unsigned long prime;
49 	unsigned long entries;
50 	bucketType    *buckets;
51 	unsigned long resizings;
52 	unsigned long retrievals;
53 	unsigned long hits;
54 	unsigned long misses;
55 	unsigned long (*hash)(const void *);
56 	int           (*compare)(const void *, const void *);
57 	int           readonly;
58 } *tableType;
59 
_hsh_check(tableType t,const char * function)60 static void _hsh_check(tableType t, const char *function)
61 {
62 	if (!t) err_internal(function, "table is null");
63 #if MAA_MAGIC
64 	if (t->magic != HSH_MAGIC)
65 		err_internal(function,
66 					 "Magic match failed: 0x%08x (should be 0x%08x)",
67 					 t->magic,
68 					 HSH_MAGIC);
69 #endif
70 	if (!t->buckets)
71 		err_internal(function, "no buckets");
72 }
73 
_hsh_create(unsigned long seed,unsigned long (* hash)(const void *),int (* compare)(const void *,const void *))74 static hsh_HashTable _hsh_create(
75 	unsigned long seed,
76 	unsigned long (*hash)(const void *),
77 	int (*compare)(const void *,
78 				   const void *))
79 {
80 	tableType     t;
81 	unsigned long i;
82 	unsigned long prime = prm_next_prime(seed);
83 
84 	t             = xmalloc(sizeof(struct table));
85 #if MAA_MAGIC
86 	t->magic      = HSH_MAGIC;
87 #endif
88 	t->prime      = prime;
89 	t->entries    = 0;
90 	t->buckets    = xmalloc(prime * sizeof(struct bucket));
91 	t->resizings  = 0;
92 	t->retrievals = 0;
93 	t->hits       = 0;
94 	t->misses     = 0;
95 	t->hash       = hash ? hash : hsh_string_hash;
96 	t->compare    = compare ? compare : hsh_string_compare;
97 	t->readonly   = 0;
98 
99 	for (i = 0; i < prime; i++) t->buckets[i] = NULL;
100 
101 	return t;
102 }
103 
104 /* \doc The |hsh_create| function initilizes a generic hash table.  Keys
105    and data are pointers to "void".
106 
107    The internal representation of the hash table will grow automatically
108    when an insertion is performed and the table is more than half full.
109 
110    The |hash| function should take a pointer to a |key| and return an
111    "unsigned long".  If |hash| is "NULL", then the |key| is assumed to be a
112    pointer to a null-terminated string, and the function shown in
113    \grind{hsh_string_hash} will be used for |hash| (the algorithm for this
114    function is from \cite[p.~435]{faith:Aho88}).
115 
116    The |compare| function should take a pair of pointers to keys and return
117    zero if the keys are equal and non-zero if the keys are not equal.  If
118    |compare| is "NULL", then the keys are assumed to point to
119    null-terminated strings, and the |strcmp| function will be used for
120    |compare|.
121 
122    Additionally, the |hsh_pointer_hash| and |hsh_pointer_compare| functions
123    are available and can be used to treat the \emph{value} of the "void"
124    pointer as the key.  These functions are often useful for maintaining
125    sets of objects. */
126 
hsh_create(unsigned long (* hash)(const void *),int (* compare)(const void *,const void *))127 hsh_HashTable hsh_create(
128 	unsigned long (*hash)(const void *),
129 	int (*compare)(const void *,
130 				   const void *))
131 {
132 	return _hsh_create(0, hash, compare);
133 }
134 
_hsh_destroy_buckets(hsh_HashTable table)135 static void _hsh_destroy_buckets(hsh_HashTable table)
136 {
137 	unsigned long i;
138 	tableType     t    = (tableType)table;
139 
140 	_hsh_check(t, __func__);
141 	for (i = 0; i < t->prime; i++) {
142 		bucketType b = t->buckets[i];
143 
144 		while (b) {
145 			bucketType next = b->next;
146 
147 			xfree(b);		/* terminal */
148 			b = next;
149 		}
150 	}
151 
152 	xfree(t->buckets);		/* terminal */
153 	t->buckets = NULL;
154 }
155 
_hsh_destroy_table(hsh_HashTable table)156 static void _hsh_destroy_table(hsh_HashTable table)
157 {
158 	tableType t = (tableType)table;
159 
160 #if MAA_MAGIC
161 	t->magic = HSH_MAGIC_FREED;
162 #endif
163 	xfree(t);			/* terminal */
164 }
165 
166 /* \doc |hsh_destroy| frees all of the memory associated with the hash
167    table.
168 
169    The memory used by keys and data is \emph{not} freed---this memory is
170    the responsibility of the user.  However, a call to |hsh_iterate|
171    can be used to free this memory \emph{immediately} before a call to
172    |hsh_destroy|. */
173 
hsh_destroy(hsh_HashTable table)174 void hsh_destroy(hsh_HashTable table)
175 {
176 	_hsh_check(table, __func__);
177 	if (((tableType)table)->readonly)
178 		err_internal(__func__, "Attempt to destroy readonly table");
179 	_hsh_destroy_buckets(table);
180 	_hsh_destroy_table(table);
181 }
182 
_hsh_insert(hsh_HashTable table,unsigned long hash,const void * key,const void * datum)183 static void _hsh_insert(
184 	hsh_HashTable table,
185 	unsigned long hash,
186 	const void *key,
187 	const void *datum)
188 {
189 	tableType     t = (tableType)table;
190 	unsigned long h = hash % t->prime;
191 	bucketType    b;
192 
193 	_hsh_check(t, __func__);
194 
195 	b        = xmalloc(sizeof(struct bucket));
196 	b->key   = key;
197 	b->hash  = hash;
198 	b->datum = datum;
199 	b->next  = NULL;
200 
201 	if (t->buckets[h]) b->next = t->buckets[h];
202 	t->buckets[h] = b;
203 	++t->entries;
204 }
205 
206 /* \doc |hsh_insert| inserts a new |key| into the |table|.  If the
207    insertion is successful, zero is returned.  If the |key| already exists,
208    1 is returned.  Hence, the way to change the |datum| associated with a
209    |key| is first to call |hsh_delete|.
210 
211    If the internal representation of the hash table becomes more than half
212    full, its size is increased automatically.  At present, this requires
213    that all of the key pointers are copied into a new table.  Rehashing is
214    not required, however, since the hash values are stored for each key. */
215 
hsh_insert(hsh_HashTable table,const void * key,const void * datum)216 int hsh_insert(
217 	hsh_HashTable table,
218 	const void *key,
219 	const void *datum)
220 {
221 	tableType     t         = (tableType)table;
222 	unsigned long hashValue = t->hash(key);
223 	unsigned long h;
224 
225 	_hsh_check(t, __func__);
226 	if (t->readonly)
227 		err_internal(__func__, "Attempt to insert into readonly table");
228 
229 	/* Keep table less than half full */
230 	if (t->entries * 2 > t->prime) {
231 		tableType     new = _hsh_create(t->prime * 3, t->hash, t->compare);
232 		unsigned long i;
233 
234 		for (i = 0; i < t->prime; i++) {
235 			if (t->buckets[i]) {
236 				bucketType pt;
237 
238 				for (pt = t->buckets[i]; pt; pt = pt->next)
239 					_hsh_insert(new, pt->hash, pt->key, pt->datum);
240 			}
241 		}
242 
243 		/* fixup values */
244 		_hsh_destroy_buckets(t);
245 		t->prime   = new->prime;
246 		t->buckets = new->buckets;
247 		_hsh_destroy_table(new);
248 		++t->resizings;
249 	}
250 
251 	h = hashValue % t->prime;
252 
253 	if (t->buckets[h]) {		/* Assert uniqueness */
254 		bucketType pt;
255 
256 		for (pt = t->buckets[h]; pt; pt = pt->next)
257 			if (!t->compare(pt->key, key)) return 1;
258 	}
259 
260 	_hsh_insert(t, hashValue, key, datum);
261 	return 0;
262 }
263 
264 /* \doc |hsh_delete| removes a |key| and the associated datum from the
265    |table|.  Zero is returned if the |key| was present.  Otherwise, 1 is
266    returned. */
267 
hsh_delete(hsh_HashTable table,const void * key)268 int hsh_delete(hsh_HashTable table, const void *key)
269 {
270 	tableType     t = (tableType)table;
271 	unsigned long h = t->hash(key) % t->prime;
272 
273 	_hsh_check(t, __func__);
274 	if (t->readonly)
275 		err_internal(__func__, "Attempt to delete from readonly table");
276 
277 	if (t->buckets[h]) {
278 		bucketType pt;
279 		bucketType prev;
280 
281 		for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next)
282 			if (!t->compare(pt->key, key)) {
283 				--t->entries;
284 
285 				if (!prev) t->buckets[h] = pt->next;
286 				else       prev->next = pt->next;
287 
288 				xfree(pt);
289 				return 0;
290 			}
291 	}
292 
293 	return 1;
294 }
295 
296 
297 /* \doc |hsh_retrieve| retrieves the datum associated with a |key|.  If the
298    |key| is not present in the |table|, then "NULL" is returned. */
299 
hsh_retrieve(hsh_HashTable table,const void * key)300 const void *hsh_retrieve(hsh_HashTable table,
301 						 const void *key)
302 {
303 	tableType     t = (tableType)table;
304 	unsigned long h = t->hash(key) % t->prime;
305 
306 	_hsh_check(t, __func__);
307 
308 	++t->retrievals;
309 	if (t->buckets[h]) {
310 		bucketType pt;
311 		bucketType prev;
312 
313 		for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next)
314 			if (!t->compare(pt->key, key)) {
315 				if (!prev) {
316 					++t->hits;
317 				} else if (!t->readonly) {
318 					/* Self organize */
319 					prev->next    = pt->next;
320 					pt->next      = t->buckets[h];
321 					t->buckets[h] = pt;
322 				}
323 				return pt->datum;
324 			}
325 	}
326 
327 	++t->misses;
328 	return NULL;
329 }
330 
331 /* \doc |hsh_iterate| is used to iterate a function over every value in the
332    |table|.  The function, |iterator|, is passed the |key| and |datum| pair
333    for each entry in the table.  If |iterator| returns a non-zero value,
334    the iterations stop, and |hsh_iterate| returns non-zero.  Note that the
335    keys are in some arbitrary order, and that this order may change between
336    two successive calls to |hsh_iterate|. */
337 
hsh_iterate(hsh_HashTable table,int (* iterator)(const void * key,const void * datum))338 int hsh_iterate(hsh_HashTable table,
339 				int (*iterator)(const void *key,
340 								const void *datum))
341 {
342 	tableType     t = (tableType)table;
343 	unsigned long i;
344 	bucketType    pt;
345 	bucketType    next;		/* Save, because pt might vanish. */
346 
347 	_hsh_check(t, __func__);
348 
349 	for (i = 0; i < t->prime; i++) {
350 		if (t->buckets[i]) {
351 			for (pt = t->buckets[i]; pt; pt = next) {
352 				next = pt->next;
353 				if (iterator(pt->key, pt->datum))
354 					return 1;
355 			}
356 		}
357 	}
358 	return 0;
359 }
360 
361 /* \doc |hsh_iterate_arg| is used to iterate a function over every value in
362    the |table|.  The function, |iterator|, is passed the |key| and |datum|
363    pair for each entry in the table.  If |iterator| returns a non-zero
364    value, the iterations stop, and |hsh_iterate| returns non-zero.  Note
365    that the keys are in some arbitrary order, and that this order may
366    change between two successive calls to |hsh_iterate|. */
367 
hsh_iterate_arg(hsh_HashTable table,int (* iterator)(const void * key,const void * datum,void * arg),void * arg)368 int hsh_iterate_arg(hsh_HashTable table,
369 					int (*iterator)(const void *key,
370 									const void *datum,
371 									void *arg),
372 					void *arg)
373 {
374 	tableType     t = (tableType)table;
375 	unsigned long i;
376 	bucketType    pt;
377 	bucketType    next;		/* Save, because pt might vanish. */
378 
379 	_hsh_check(t, __func__);
380 
381 	for (i = 0; i < t->prime; i++) {
382 		if (t->buckets[i]) {
383 			for (pt = t->buckets[i]; pt; pt = next) {
384 				next = pt->next;
385 				if (iterator(pt->key, pt->datum, arg))
386 					return 1;
387 			}
388 		}
389 	}
390 	return 0;
391 }
392 
393 /* a function callable from hsh_iterate() to print key values */
_hsh_key_strings(const void * k,const void * d)394 static int _hsh_key_strings(const void *k, const void *d) {
395 	const char *s;
396 	static int i = 0;
397 
398 	if (k == NULL) { i=0; return 0; }
399 
400 	s = k;
401 	printf("%s  ",s);
402 	if ((i += strlen(s)+2) >= 60) { i=0; printf("\n"); }
403 	return 0;
404 }
405 
406 /* print all keys in table t as strings */
hsh_key_strings(hsh_HashTable t)407 void hsh_key_strings(hsh_HashTable t) {
408 	_hsh_key_strings(NULL,NULL);
409 	hsh_iterate(t,_hsh_key_strings);
410 	printf("\n");
411 }
412 
413 /* \doc |hsh_get_stats| returns statistics about the |table|.  The
414    |hsh_Stats| structure is shown in \grind{hsh_Stats}. */
415 
hsh_get_stats(hsh_HashTable table)416 hsh_Stats hsh_get_stats(hsh_HashTable table)
417 {
418 	tableType     t = (tableType)table;
419 	hsh_Stats     s = xmalloc(sizeof(struct hsh_Stats));
420 	unsigned long i;
421 	unsigned      count;
422 
423 	_hsh_check(t, __func__);
424 
425 	s->size           = t->prime;
426 	s->resizings      = t->resizings;
427 	s->entries        = 0;
428 	s->buckets_used   = 0;
429 	s->singletons     = 0;
430 	s->maximum_length = 0;
431 	s->retrievals     = t->retrievals;
432 	s->hits           = t->hits;
433 	s->misses         = t->misses;
434 
435 	for (i = 0; i < t->prime; i++) {
436 		if (t->buckets[i]) {
437 			bucketType pt;
438 
439 			++s->buckets_used;
440 			for (count = 0, pt = t->buckets[i]; pt; ++count, pt = pt->next);
441 			if (count == 1) ++s->singletons;
442 			s->maximum_length = max(s->maximum_length, count);
443 			s->entries += count;
444 		}
445 	}
446 	if (t->entries != s->entries)
447 		err_internal(__func__,
448 					 "Incorrect count for entries: %lu vs. %lu",
449 					 t->entries,
450 					 s->entries);
451 
452 	return s;
453 }
454 
455 /* \doc |hsh_print_stats| prints the statistics for |table| on the
456    specified |stream|.  If |stream| is "NULL", then "stdout" will be
457    used. */
458 
hsh_print_stats(hsh_HashTable table,FILE * stream)459 void hsh_print_stats(hsh_HashTable table, FILE *stream)
460 {
461 	FILE      *str = stream ? stream : stdout;
462 	hsh_Stats s    = hsh_get_stats(table);
463 
464 	_hsh_check(table, __func__);
465 
466 	fprintf(str, "Statistics for hash table at %p:\n", table);
467 	fprintf(str, "   %lu resizings to %lu total\n", s->resizings, s->size);
468 	fprintf(str, "   %lu entries (%lu buckets used, %lu without overflow)\n",
469 			s->entries, s->buckets_used, s->singletons);
470 	fprintf(str, "   maximum list length is %lu", s->maximum_length);
471 	if (s->buckets_used)
472 		fprintf(str, " (optimal is %.1f)\n",
473 				(double)s->entries / (double)s->buckets_used);
474 	else
475 		fprintf(str, "\n");
476 	fprintf(str, "   %lu retrievals (%lu from top, %lu failed)\n",
477 			s->retrievals, s->hits, s->misses);
478 
479 	xfree(s);			/* rare */
480 }
481 
hsh_string_hash(const void * key)482 unsigned long hsh_string_hash(const void *key)
483 {
484 	const char           *pt = (const char *)key;
485 	unsigned long        h  = 0;
486 
487 	if (!pt)
488 		err_internal(__func__, "String-valued keys may not be NULL");
489 
490 	while (*pt) {
491 		h += *pt++;
492 #if 0
493 		h *= 65599L;		/* prime near %$2^{16}$% */
494 #else
495 		h *= 2654435789U;		/* prime near %$\frac{\sqrt{5}-1}{2}2^{32}$% */
496 #endif
497 	}
498 
499 	return h;
500 }
501 
hsh_pointer_hash(const void * key)502 unsigned long hsh_pointer_hash(const void *key)
503 {
504 	const char           *pt;
505 	unsigned long        h   = 0;
506 	int                  i;
507 
508 #ifdef WORDS_BIGENDIAN
509 	pt = ((const char *)&key) + SIZEOF_VOID_P - 1;
510 #else
511 	pt = (const char *)&key;
512 #endif
513 
514 	for (i = 0; i < SIZEOF_VOID_P; i++) {
515 #ifdef WORDS_BIGENDIAN
516 		h += *pt--;
517 #else
518 		h += *pt++;
519 #endif
520 #if 0
521 		h *= 65599L;		/* prime near %$2^{16}$% */
522 #else
523 		h *= 2654435789U;		/* prime near %$\frac{\sqrt{5}-1}{2}2^{32}$% */
524 #endif
525 	}
526 
527 	return h & 0xffffffff;
528 }
529 
hsh_string_compare(const void * key1,const void * key2)530 int hsh_string_compare(const void *key1, const void *key2)
531 {
532 	if (!key1 || !key2)
533 		err_internal(__func__,
534 					 "String-valued keys may not be NULL: key1=%p, key2=%p",
535 					 key1, key2);
536 	return strcmp((const char *)key1, (const char *)key2);
537 }
538 
hsh_pointer_compare(const void * key1,const void * key2)539 int hsh_pointer_compare(const void *key1, const void *key2)
540 {
541 	intptr_t v1 = (const char *)key1 - (const char *)0;
542 	intptr_t v2 = (const char *)key2 - (const char *)0;
543 
544 	if (v1 < v2)
545 		return -1;
546 	else if (v1 > v2)
547 		return 1;
548 	else
549 		return 0;
550 }
551 
552 /* \doc |hsh_init_position| returns a position marker for some arbitary
553    first element in the table.  This marker can be used with
554    |hsh_next_position| and |hsh_get_position|. */
555 
hsh_init_position(hsh_HashTable table)556 hsh_Position hsh_init_position(hsh_HashTable table)
557 {
558 	tableType     t = (tableType)table;
559 	unsigned long i;
560 
561 	_hsh_check(t, __func__);
562 	for (i = 0; i < t->prime; i++) if (t->buckets[i]) {
563 			t->readonly = 1;
564 			return t->buckets[i];
565 		}
566 	return NULL;
567 }
568 
569 /* \doc |hsh_next_position| returns a position marker for the next element
570    in the table.  Elements are in arbitrary order based on their positions
571    in the hash table. */
572 
hsh_next_position(hsh_HashTable table,hsh_Position position)573 hsh_Position hsh_next_position(hsh_HashTable table, hsh_Position position)
574 {
575 	tableType     t = (tableType)table;
576 	bucketType    b = (bucketType)position;
577 	unsigned long i;
578 	unsigned long h;
579 
580 	_hsh_check(t, __func__);
581 
582 	if (!b){
583 		t->readonly = 0;
584 		return NULL;
585 	}
586 
587 	if (b->next) return b->next;
588 
589 	for (h = b->hash % t->prime, i = h + 1; i < t->prime; i++)
590 		if (t->buckets[i]) return t->buckets[i];
591 
592 	t->readonly = 0;
593 	return NULL;
594 }
595 
596 /* \doc |hsh_get_position| returns the datum associated with the |position|
597    marker, or "NULL" if there is no such datum.  |key| is set to the key
598    associated with this datum, or "NULL" is there is no such datum. */
599 
hsh_get_position(hsh_Position position,void ** key)600 void *hsh_get_position(hsh_Position position, void **key)
601 {
602 	bucketType b = (bucketType)position;
603 
604 	*key = NULL;
605 	if (!b) return NULL;
606 
607 	*key = __UNCONST(b->key);	/* Discard const */
608 	return __UNCONST(b->datum);	/* Discard const */
609 }
610 
611 /* \doc |hsh_readonly| sets the |readonly| flag for the |table| to |flag|.
612    |flag| should be 0 or 1.  The value of the previous flag is returned.
613    When a hash table is marked as readonly, self-organization of the
614    bucket-overflow lists will not take place, and any attempt to modify the
615    list (e.g., insertion or deletion) will result in an error. */
616 
hsh_readonly(hsh_HashTable table,int flag)617 int hsh_readonly(hsh_HashTable table, int flag)
618 {
619 	tableType t = (tableType)table;
620 	int       current;
621 
622 	_hsh_check(t, __func__);
623 
624 	current     = t->readonly;
625 	t->readonly = flag;
626 	return current;
627 }
628