1 /* hash.c -- Hash table routines for Khepera
2 * Created: Thu Nov 3 20:07:29 1994 by faith@dict.org
3 * Copyright 1994-1997, 1999, 2002 Rickard E. Faith (faith@dict.org)
4 * Copyright 2002-2008 Aleksey Cheusov (vle@gmx.net)
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * \section{Hash Table Routines}
26 *
27 * \intro Generic hash table support is provided for storing generic data
28 * associated with keys. The hash table has prime length, with
29 * self-organizing linked lists \cite[pp.~398--9]{faith:Knuth73c} used for
30 * collision resolution. The hash table automatically grows as necessary to
31 * preserve efficient access.
32 *
33 */
34
35 #include "maaP.h"
36
37 typedef struct bucket {
38 const void *key;
39 unsigned long hash;
40 const void *datum;
41 struct bucket *next;
42 } *bucketType;
43
44 typedef struct table {
45 #if MAA_MAGIC
46 int magic;
47 #endif
48 unsigned long prime;
49 unsigned long entries;
50 bucketType *buckets;
51 unsigned long resizings;
52 unsigned long retrievals;
53 unsigned long hits;
54 unsigned long misses;
55 unsigned long (*hash)(const void *);
56 int (*compare)(const void *, const void *);
57 int readonly;
58 } *tableType;
59
_hsh_check(tableType t,const char * function)60 static void _hsh_check(tableType t, const char *function)
61 {
62 if (!t) err_internal(function, "table is null");
63 #if MAA_MAGIC
64 if (t->magic != HSH_MAGIC)
65 err_internal(function,
66 "Magic match failed: 0x%08x (should be 0x%08x)",
67 t->magic,
68 HSH_MAGIC);
69 #endif
70 if (!t->buckets)
71 err_internal(function, "no buckets");
72 }
73
_hsh_create(unsigned long seed,unsigned long (* hash)(const void *),int (* compare)(const void *,const void *))74 static hsh_HashTable _hsh_create(
75 unsigned long seed,
76 unsigned long (*hash)(const void *),
77 int (*compare)(const void *,
78 const void *))
79 {
80 tableType t;
81 unsigned long i;
82 unsigned long prime = prm_next_prime(seed);
83
84 t = xmalloc(sizeof(struct table));
85 #if MAA_MAGIC
86 t->magic = HSH_MAGIC;
87 #endif
88 t->prime = prime;
89 t->entries = 0;
90 t->buckets = xmalloc(prime * sizeof(struct bucket));
91 t->resizings = 0;
92 t->retrievals = 0;
93 t->hits = 0;
94 t->misses = 0;
95 t->hash = hash ? hash : hsh_string_hash;
96 t->compare = compare ? compare : hsh_string_compare;
97 t->readonly = 0;
98
99 for (i = 0; i < prime; i++) t->buckets[i] = NULL;
100
101 return t;
102 }
103
104 /* \doc The |hsh_create| function initilizes a generic hash table. Keys
105 and data are pointers to "void".
106
107 The internal representation of the hash table will grow automatically
108 when an insertion is performed and the table is more than half full.
109
110 The |hash| function should take a pointer to a |key| and return an
111 "unsigned long". If |hash| is "NULL", then the |key| is assumed to be a
112 pointer to a null-terminated string, and the function shown in
113 \grind{hsh_string_hash} will be used for |hash| (the algorithm for this
114 function is from \cite[p.~435]{faith:Aho88}).
115
116 The |compare| function should take a pair of pointers to keys and return
117 zero if the keys are equal and non-zero if the keys are not equal. If
118 |compare| is "NULL", then the keys are assumed to point to
119 null-terminated strings, and the |strcmp| function will be used for
120 |compare|.
121
122 Additionally, the |hsh_pointer_hash| and |hsh_pointer_compare| functions
123 are available and can be used to treat the \emph{value} of the "void"
124 pointer as the key. These functions are often useful for maintaining
125 sets of objects. */
126
hsh_create(unsigned long (* hash)(const void *),int (* compare)(const void *,const void *))127 hsh_HashTable hsh_create(
128 unsigned long (*hash)(const void *),
129 int (*compare)(const void *,
130 const void *))
131 {
132 return _hsh_create(0, hash, compare);
133 }
134
_hsh_destroy_buckets(hsh_HashTable table)135 static void _hsh_destroy_buckets(hsh_HashTable table)
136 {
137 unsigned long i;
138 tableType t = (tableType)table;
139
140 _hsh_check(t, __func__);
141 for (i = 0; i < t->prime; i++) {
142 bucketType b = t->buckets[i];
143
144 while (b) {
145 bucketType next = b->next;
146
147 xfree(b); /* terminal */
148 b = next;
149 }
150 }
151
152 xfree(t->buckets); /* terminal */
153 t->buckets = NULL;
154 }
155
_hsh_destroy_table(hsh_HashTable table)156 static void _hsh_destroy_table(hsh_HashTable table)
157 {
158 tableType t = (tableType)table;
159
160 #if MAA_MAGIC
161 t->magic = HSH_MAGIC_FREED;
162 #endif
163 xfree(t); /* terminal */
164 }
165
166 /* \doc |hsh_destroy| frees all of the memory associated with the hash
167 table.
168
169 The memory used by keys and data is \emph{not} freed---this memory is
170 the responsibility of the user. However, a call to |hsh_iterate|
171 can be used to free this memory \emph{immediately} before a call to
172 |hsh_destroy|. */
173
hsh_destroy(hsh_HashTable table)174 void hsh_destroy(hsh_HashTable table)
175 {
176 _hsh_check(table, __func__);
177 if (((tableType)table)->readonly)
178 err_internal(__func__, "Attempt to destroy readonly table");
179 _hsh_destroy_buckets(table);
180 _hsh_destroy_table(table);
181 }
182
_hsh_insert(hsh_HashTable table,unsigned long hash,const void * key,const void * datum)183 static void _hsh_insert(
184 hsh_HashTable table,
185 unsigned long hash,
186 const void *key,
187 const void *datum)
188 {
189 tableType t = (tableType)table;
190 unsigned long h = hash % t->prime;
191 bucketType b;
192
193 _hsh_check(t, __func__);
194
195 b = xmalloc(sizeof(struct bucket));
196 b->key = key;
197 b->hash = hash;
198 b->datum = datum;
199 b->next = NULL;
200
201 if (t->buckets[h]) b->next = t->buckets[h];
202 t->buckets[h] = b;
203 ++t->entries;
204 }
205
206 /* \doc |hsh_insert| inserts a new |key| into the |table|. If the
207 insertion is successful, zero is returned. If the |key| already exists,
208 1 is returned. Hence, the way to change the |datum| associated with a
209 |key| is first to call |hsh_delete|.
210
211 If the internal representation of the hash table becomes more than half
212 full, its size is increased automatically. At present, this requires
213 that all of the key pointers are copied into a new table. Rehashing is
214 not required, however, since the hash values are stored for each key. */
215
hsh_insert(hsh_HashTable table,const void * key,const void * datum)216 int hsh_insert(
217 hsh_HashTable table,
218 const void *key,
219 const void *datum)
220 {
221 tableType t = (tableType)table;
222 unsigned long hashValue = t->hash(key);
223 unsigned long h;
224
225 _hsh_check(t, __func__);
226 if (t->readonly)
227 err_internal(__func__, "Attempt to insert into readonly table");
228
229 /* Keep table less than half full */
230 if (t->entries * 2 > t->prime) {
231 tableType new = _hsh_create(t->prime * 3, t->hash, t->compare);
232 unsigned long i;
233
234 for (i = 0; i < t->prime; i++) {
235 if (t->buckets[i]) {
236 bucketType pt;
237
238 for (pt = t->buckets[i]; pt; pt = pt->next)
239 _hsh_insert(new, pt->hash, pt->key, pt->datum);
240 }
241 }
242
243 /* fixup values */
244 _hsh_destroy_buckets(t);
245 t->prime = new->prime;
246 t->buckets = new->buckets;
247 _hsh_destroy_table(new);
248 ++t->resizings;
249 }
250
251 h = hashValue % t->prime;
252
253 if (t->buckets[h]) { /* Assert uniqueness */
254 bucketType pt;
255
256 for (pt = t->buckets[h]; pt; pt = pt->next)
257 if (!t->compare(pt->key, key)) return 1;
258 }
259
260 _hsh_insert(t, hashValue, key, datum);
261 return 0;
262 }
263
264 /* \doc |hsh_delete| removes a |key| and the associated datum from the
265 |table|. Zero is returned if the |key| was present. Otherwise, 1 is
266 returned. */
267
hsh_delete(hsh_HashTable table,const void * key)268 int hsh_delete(hsh_HashTable table, const void *key)
269 {
270 tableType t = (tableType)table;
271 unsigned long h = t->hash(key) % t->prime;
272
273 _hsh_check(t, __func__);
274 if (t->readonly)
275 err_internal(__func__, "Attempt to delete from readonly table");
276
277 if (t->buckets[h]) {
278 bucketType pt;
279 bucketType prev;
280
281 for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next)
282 if (!t->compare(pt->key, key)) {
283 --t->entries;
284
285 if (!prev) t->buckets[h] = pt->next;
286 else prev->next = pt->next;
287
288 xfree(pt);
289 return 0;
290 }
291 }
292
293 return 1;
294 }
295
296
297 /* \doc |hsh_retrieve| retrieves the datum associated with a |key|. If the
298 |key| is not present in the |table|, then "NULL" is returned. */
299
hsh_retrieve(hsh_HashTable table,const void * key)300 const void *hsh_retrieve(hsh_HashTable table,
301 const void *key)
302 {
303 tableType t = (tableType)table;
304 unsigned long h = t->hash(key) % t->prime;
305
306 _hsh_check(t, __func__);
307
308 ++t->retrievals;
309 if (t->buckets[h]) {
310 bucketType pt;
311 bucketType prev;
312
313 for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next)
314 if (!t->compare(pt->key, key)) {
315 if (!prev) {
316 ++t->hits;
317 } else if (!t->readonly) {
318 /* Self organize */
319 prev->next = pt->next;
320 pt->next = t->buckets[h];
321 t->buckets[h] = pt;
322 }
323 return pt->datum;
324 }
325 }
326
327 ++t->misses;
328 return NULL;
329 }
330
331 /* \doc |hsh_iterate| is used to iterate a function over every value in the
332 |table|. The function, |iterator|, is passed the |key| and |datum| pair
333 for each entry in the table. If |iterator| returns a non-zero value,
334 the iterations stop, and |hsh_iterate| returns non-zero. Note that the
335 keys are in some arbitrary order, and that this order may change between
336 two successive calls to |hsh_iterate|. */
337
hsh_iterate(hsh_HashTable table,int (* iterator)(const void * key,const void * datum))338 int hsh_iterate(hsh_HashTable table,
339 int (*iterator)(const void *key,
340 const void *datum))
341 {
342 tableType t = (tableType)table;
343 unsigned long i;
344 bucketType pt;
345 bucketType next; /* Save, because pt might vanish. */
346
347 _hsh_check(t, __func__);
348
349 for (i = 0; i < t->prime; i++) {
350 if (t->buckets[i]) {
351 for (pt = t->buckets[i]; pt; pt = next) {
352 next = pt->next;
353 if (iterator(pt->key, pt->datum))
354 return 1;
355 }
356 }
357 }
358 return 0;
359 }
360
361 /* \doc |hsh_iterate_arg| is used to iterate a function over every value in
362 the |table|. The function, |iterator|, is passed the |key| and |datum|
363 pair for each entry in the table. If |iterator| returns a non-zero
364 value, the iterations stop, and |hsh_iterate| returns non-zero. Note
365 that the keys are in some arbitrary order, and that this order may
366 change between two successive calls to |hsh_iterate|. */
367
hsh_iterate_arg(hsh_HashTable table,int (* iterator)(const void * key,const void * datum,void * arg),void * arg)368 int hsh_iterate_arg(hsh_HashTable table,
369 int (*iterator)(const void *key,
370 const void *datum,
371 void *arg),
372 void *arg)
373 {
374 tableType t = (tableType)table;
375 unsigned long i;
376 bucketType pt;
377 bucketType next; /* Save, because pt might vanish. */
378
379 _hsh_check(t, __func__);
380
381 for (i = 0; i < t->prime; i++) {
382 if (t->buckets[i]) {
383 for (pt = t->buckets[i]; pt; pt = next) {
384 next = pt->next;
385 if (iterator(pt->key, pt->datum, arg))
386 return 1;
387 }
388 }
389 }
390 return 0;
391 }
392
393 /* a function callable from hsh_iterate() to print key values */
_hsh_key_strings(const void * k,const void * d)394 static int _hsh_key_strings(const void *k, const void *d) {
395 const char *s;
396 static int i = 0;
397
398 if (k == NULL) { i=0; return 0; }
399
400 s = k;
401 printf("%s ",s);
402 if ((i += strlen(s)+2) >= 60) { i=0; printf("\n"); }
403 return 0;
404 }
405
406 /* print all keys in table t as strings */
hsh_key_strings(hsh_HashTable t)407 void hsh_key_strings(hsh_HashTable t) {
408 _hsh_key_strings(NULL,NULL);
409 hsh_iterate(t,_hsh_key_strings);
410 printf("\n");
411 }
412
413 /* \doc |hsh_get_stats| returns statistics about the |table|. The
414 |hsh_Stats| structure is shown in \grind{hsh_Stats}. */
415
hsh_get_stats(hsh_HashTable table)416 hsh_Stats hsh_get_stats(hsh_HashTable table)
417 {
418 tableType t = (tableType)table;
419 hsh_Stats s = xmalloc(sizeof(struct hsh_Stats));
420 unsigned long i;
421 unsigned count;
422
423 _hsh_check(t, __func__);
424
425 s->size = t->prime;
426 s->resizings = t->resizings;
427 s->entries = 0;
428 s->buckets_used = 0;
429 s->singletons = 0;
430 s->maximum_length = 0;
431 s->retrievals = t->retrievals;
432 s->hits = t->hits;
433 s->misses = t->misses;
434
435 for (i = 0; i < t->prime; i++) {
436 if (t->buckets[i]) {
437 bucketType pt;
438
439 ++s->buckets_used;
440 for (count = 0, pt = t->buckets[i]; pt; ++count, pt = pt->next);
441 if (count == 1) ++s->singletons;
442 s->maximum_length = max(s->maximum_length, count);
443 s->entries += count;
444 }
445 }
446 if (t->entries != s->entries)
447 err_internal(__func__,
448 "Incorrect count for entries: %lu vs. %lu",
449 t->entries,
450 s->entries);
451
452 return s;
453 }
454
455 /* \doc |hsh_print_stats| prints the statistics for |table| on the
456 specified |stream|. If |stream| is "NULL", then "stdout" will be
457 used. */
458
hsh_print_stats(hsh_HashTable table,FILE * stream)459 void hsh_print_stats(hsh_HashTable table, FILE *stream)
460 {
461 FILE *str = stream ? stream : stdout;
462 hsh_Stats s = hsh_get_stats(table);
463
464 _hsh_check(table, __func__);
465
466 fprintf(str, "Statistics for hash table at %p:\n", table);
467 fprintf(str, " %lu resizings to %lu total\n", s->resizings, s->size);
468 fprintf(str, " %lu entries (%lu buckets used, %lu without overflow)\n",
469 s->entries, s->buckets_used, s->singletons);
470 fprintf(str, " maximum list length is %lu", s->maximum_length);
471 if (s->buckets_used)
472 fprintf(str, " (optimal is %.1f)\n",
473 (double)s->entries / (double)s->buckets_used);
474 else
475 fprintf(str, "\n");
476 fprintf(str, " %lu retrievals (%lu from top, %lu failed)\n",
477 s->retrievals, s->hits, s->misses);
478
479 xfree(s); /* rare */
480 }
481
hsh_string_hash(const void * key)482 unsigned long hsh_string_hash(const void *key)
483 {
484 const char *pt = (const char *)key;
485 unsigned long h = 0;
486
487 if (!pt)
488 err_internal(__func__, "String-valued keys may not be NULL");
489
490 while (*pt) {
491 h += *pt++;
492 #if 0
493 h *= 65599L; /* prime near %$2^{16}$% */
494 #else
495 h *= 2654435789U; /* prime near %$\frac{\sqrt{5}-1}{2}2^{32}$% */
496 #endif
497 }
498
499 return h;
500 }
501
hsh_pointer_hash(const void * key)502 unsigned long hsh_pointer_hash(const void *key)
503 {
504 const char *pt;
505 unsigned long h = 0;
506 int i;
507
508 #ifdef WORDS_BIGENDIAN
509 pt = ((const char *)&key) + SIZEOF_VOID_P - 1;
510 #else
511 pt = (const char *)&key;
512 #endif
513
514 for (i = 0; i < SIZEOF_VOID_P; i++) {
515 #ifdef WORDS_BIGENDIAN
516 h += *pt--;
517 #else
518 h += *pt++;
519 #endif
520 #if 0
521 h *= 65599L; /* prime near %$2^{16}$% */
522 #else
523 h *= 2654435789U; /* prime near %$\frac{\sqrt{5}-1}{2}2^{32}$% */
524 #endif
525 }
526
527 return h & 0xffffffff;
528 }
529
hsh_string_compare(const void * key1,const void * key2)530 int hsh_string_compare(const void *key1, const void *key2)
531 {
532 if (!key1 || !key2)
533 err_internal(__func__,
534 "String-valued keys may not be NULL: key1=%p, key2=%p",
535 key1, key2);
536 return strcmp((const char *)key1, (const char *)key2);
537 }
538
hsh_pointer_compare(const void * key1,const void * key2)539 int hsh_pointer_compare(const void *key1, const void *key2)
540 {
541 intptr_t v1 = (const char *)key1 - (const char *)0;
542 intptr_t v2 = (const char *)key2 - (const char *)0;
543
544 if (v1 < v2)
545 return -1;
546 else if (v1 > v2)
547 return 1;
548 else
549 return 0;
550 }
551
552 /* \doc |hsh_init_position| returns a position marker for some arbitary
553 first element in the table. This marker can be used with
554 |hsh_next_position| and |hsh_get_position|. */
555
hsh_init_position(hsh_HashTable table)556 hsh_Position hsh_init_position(hsh_HashTable table)
557 {
558 tableType t = (tableType)table;
559 unsigned long i;
560
561 _hsh_check(t, __func__);
562 for (i = 0; i < t->prime; i++) if (t->buckets[i]) {
563 t->readonly = 1;
564 return t->buckets[i];
565 }
566 return NULL;
567 }
568
569 /* \doc |hsh_next_position| returns a position marker for the next element
570 in the table. Elements are in arbitrary order based on their positions
571 in the hash table. */
572
hsh_next_position(hsh_HashTable table,hsh_Position position)573 hsh_Position hsh_next_position(hsh_HashTable table, hsh_Position position)
574 {
575 tableType t = (tableType)table;
576 bucketType b = (bucketType)position;
577 unsigned long i;
578 unsigned long h;
579
580 _hsh_check(t, __func__);
581
582 if (!b){
583 t->readonly = 0;
584 return NULL;
585 }
586
587 if (b->next) return b->next;
588
589 for (h = b->hash % t->prime, i = h + 1; i < t->prime; i++)
590 if (t->buckets[i]) return t->buckets[i];
591
592 t->readonly = 0;
593 return NULL;
594 }
595
596 /* \doc |hsh_get_position| returns the datum associated with the |position|
597 marker, or "NULL" if there is no such datum. |key| is set to the key
598 associated with this datum, or "NULL" is there is no such datum. */
599
hsh_get_position(hsh_Position position,void ** key)600 void *hsh_get_position(hsh_Position position, void **key)
601 {
602 bucketType b = (bucketType)position;
603
604 *key = NULL;
605 if (!b) return NULL;
606
607 *key = __UNCONST(b->key); /* Discard const */
608 return __UNCONST(b->datum); /* Discard const */
609 }
610
611 /* \doc |hsh_readonly| sets the |readonly| flag for the |table| to |flag|.
612 |flag| should be 0 or 1. The value of the previous flag is returned.
613 When a hash table is marked as readonly, self-organization of the
614 bucket-overflow lists will not take place, and any attempt to modify the
615 list (e.g., insertion or deletion) will result in an error. */
616
hsh_readonly(hsh_HashTable table,int flag)617 int hsh_readonly(hsh_HashTable table, int flag)
618 {
619 tableType t = (tableType)table;
620 int current;
621
622 _hsh_check(t, __func__);
623
624 current = t->readonly;
625 t->readonly = flag;
626 return current;
627 }
628