1 /* (C) Copyright 2001, 2002, 2003, 2004, 2005 Stijn van Dongen
2 * (C) Copyright 2006, 2007, 2008, 2009, 2010 Stijn van Dongen
3 *
4 * This file is part of tingea. You can redistribute and/or modify tingea
5 * under the terms of the GNU General Public License; either version 3 of the
6 * License or (at your option) any later version. You should have received a
7 * copy of the GPL along with tingea, in the file COPYING.
8 */
9
10
11 /* TODO (?)
12 *
13 * integer overflow handling.
14 *
15 * better bucket fill statistics.
16 *
17 * nested hashes.
18 * could lump
19 * options cmp hash src_link
20 * into a structure and make hashes share them.
21 */
22
23 #include <math.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28
29 #include "hash.h"
30 #include "minmax.h"
31 #include "types.h"
32 #include "inttypes.h"
33 #include "ting.h"
34 #include "err.h"
35 #include "alloc.h"
36 #include "gralloc.h"
37 #include "compile.h"
38 #include "list.h"
39
40
41 /* the distribution of bit counts over all 32-bit keys */
42 int promilles[32] =
43 { 0, 0, 0, 0, 0, 0, 0, 0
44 , 2, 7, 15, 30, 53, 81,110,131
45 ,140,131,110, 81, 53, 30, 15, 7
46 , 2, 0, 0, 0, 0, 0, 0, 0
47 } ;
48
49
50 #ifndef TINGEA_HASH_CACHE
51 # define TINGEA_HASH_CACHE 0
52 #endif
53
54
55 typedef struct hash_link
56 { struct hash_link* next
57 ; mcxKV kv
58 #if TINGEA_HASH_CACHE
59 ; u32 hv
60 #endif
61 ;
62 } hash_link ;
63
64
65 typedef struct bucket
66 { hash_link* base
67 ;
68 } mcx_bucket ;
69
70
71 /* For further optimization work, options
72 * options
73 * load
74 * cmp
75 * hash
76 * src_link
77 * can be shared between different hashes (e.g.
78 * with multidimensional hashes). Consider
79 * making src_link file static variable.
80 */
81
82 struct mcxHash
83 { dim n_buckets /* 2^n_bits */
84 ; mcx_bucket *buckets
85 ; dim n_entries
86
87 ; mcxbits options
88
89 ; int (*cmp) (const void *a, const void *b)
90 ; u32 (*hash) (const void *a)
91
92 ; mcxGrim* src_link
93
94 ; float load
95 ;
96 } ;
97
98
99 struct mcxHashWalk
100 { mcxHash* hash
101 ; dim i_bucket
102 ; hash_link* link
103 ;
104 } ;
105
106
mcx_bucket_init(void * buck)107 void* mcx_bucket_init
108 ( void* buck
109 )
110 { ((mcx_bucket*) buck)->base = NULL
111 ; return NULL
112 ; }
113
114 void bitprint
115 ( u32 key
116 , FILE* fp
117 ) ;
118
119 int bitcount
120 ( u32 key
121 ) ;
122
123
mcxHashNew(dim n_buckets,u32 (* hash)(const void * a),int (* cmp)(const void * a,const void * b))124 mcxHash* mcxHashNew
125 ( dim n_buckets
126 , u32 (*hash)(const void *a)
127 , int (*cmp) (const void *a, const void *b)
128 )
129 { mcxHash *h
130 ; mcxbool ok = FALSE
131
132 ; u8 n_bits = 0
133
134 ; if (!n_buckets)
135 { mcxErr("mcxHashNew strange", "void alloc request")
136 ; n_buckets = 2
137 ; }
138
139 if (!(h = mcxAlloc(sizeof(mcxHash), RETURN_ON_FAIL)))
140 return NULL
141
142 ; while(n_buckets)
143 { n_buckets >>= 1
144 ; n_bits++
145 ; }
146
147 h->load = 0.5
148 ; h->n_entries = 0
149 ; h->n_buckets = n_buckets = (1 << n_bits)
150 ; h->cmp = cmp
151 ; h->hash = hash
152 ; h->options = MCX_HASH_OPT_DEFAULTS
153
154 ; h->src_link = NULL
155
156 ; while (1) /* fixme 2nd arg below, have better choice? */
157 { h->src_link = mcxGrimNew(sizeof(hash_link), h->n_buckets, MCX_GRIM_ARITHMETIC)
158 ; if (!h->src_link)
159 break
160
161 ; if
162 (! ( h->buckets
163 = mcxNAlloc
164 ( h->n_buckets
165 , sizeof(mcx_bucket)
166 , mcx_bucket_init
167 , RETURN_ON_FAIL
168 ) ) )
169 break
170
171 ; ok = TRUE
172 ; break
173 ; }
174
175 if (!ok)
176 { mcxGrimFree(&(h->src_link))
177 ; mcxFree(h)
178 ; return NULL
179 ; }
180
181 return h
182 ; }
183
184
mcxHashMemSize(mcxHash * hash)185 dim mcxHashMemSize
186 ( mcxHash* hash
187 )
188 { return
189 mcxGrimMemSize(hash->src_link)
190 + sizeof(mcx_bucket) * hash->n_buckets
191 ; }
192
193
mcxHashGetSettings(mcxHash * hash,mcxHashSettings * settings)194 void mcxHashGetSettings
195 ( mcxHash* hash
196 , mcxHashSettings* settings
197 )
198 { settings->n_buckets = hash->n_buckets
199 ; settings->load = hash->load
200 ; settings->n_entries = hash->n_entries
201 ; settings->options = hash->options
202 ; }
203
204
hash_link_size(hash_link * link)205 static dim hash_link_size
206 ( hash_link* link
207 )
208 { dim s = 0
209 ; while(link)
210 link = link->next
211 , s++
212 ; return(s)
213 ; }
214
215
mcxHashStats(FILE * fp,mcxHash * h)216 void mcxHashStats
217 ( FILE* fp
218 , mcxHash* h
219 )
220 { dim buckets = h->n_buckets
221 ; dim buckets_used = 0
222 ; float ctr = 0.0
223 ; float cb = 0.0
224 ; dim max = 0
225 ; dim entries = 0
226 ; const char* me = "mcxHashStats"
227
228 ; int j, k, distr[32]
229 ; mcx_bucket *buck
230
231 ; for (j=0;j<32;j++)
232 distr[j] = 0
233
234 ; for (buck=h->buckets; buck<h->buckets + h->n_buckets; buck++)
235 { dim d = hash_link_size(buck->base)
236 ; hash_link* this= buck->base
237
238 ; if (d)
239 { buckets_used++
240 ; entries += d
241 ; ctr += (float) d * d
242 ; cb += (float) d * d * d
243 ; max = MCX_MAX(max, d)
244 ; }
245
246 while(this)
247 { u32 u = (h->hash)(this->kv.key)
248 ; int ct = bitcount(u)
249 ; this = this->next
250 ; distr[ct]++
251 ;if (0) fprintf(stderr, "bucket [%d] key [%s]\n", (int)d, ((mcxTing*) this->kv.key)->str)
252 ; }
253 }
254
255 ctr = ctr / MCX_MAX(1, entries)
256 ; cb = sqrt(cb / MCX_MAX(1, entries))
257
258 ; if (buckets && buckets_used)
259 mcxTellf
260 ( fp
261 , me
262 , "%4.2f bucket usage (%ld available, %ld used, %ld entries)"
263 , (double) ((double) buckets_used) / buckets
264 , (long) buckets
265 , (long) buckets_used
266 , (long) entries
267 )
268 , mcxTellf
269 ( fp
270 , me
271 , "bucket average: %.2f, center: %.2f, cube: %.2f, max: %ld"
272 , (double) entries / ((double) buckets_used)
273 , (double) ctr
274 , (double) cb
275 , (long) max
276 )
277
278 ; mcxTellf(fp, me, "bit distribution (promilles):")
279 ; fprintf
280 ( fp
281 , " %-37s %s\n"
282 , "Current bit distribution"
283 , "Ideally random distribution"
284 )
285 ; for (k=0;k<4;k++)
286 { for (j=k*8;j<(k+1)*8;j++)
287 fprintf(fp, "%3.0f ", entries ? (1000 * (float)distr[j]) / entries : 0.0)
288 ; fprintf(fp, " ");
289 ; for (j=k*8;j<(k+1)*8;j++)
290 fprintf(fp, "%3d ", promilles[j])
291 ; fprintf(fp, "\n")
292 ; }
293 mcxTellf(fp, me, "link count: %ld", (long) (mcxGrimCount(h->src_link)))
294 ; mcxTellf(fp, me, "link mem count: %ld", (long) (mcxGrimMemSize(h->src_link)))
295
296 ; mcxTellf(fp, me, "done")
297 ; }
298
299
mcxHashSetOpts(mcxHash * h,double load,int options)300 void mcxHashSetOpts
301 ( mcxHash* h
302 , double load
303 , int options
304 )
305 { if (options >= 0)
306 h->options |= options
307 /* fixme; are there states in which either of these can be corrupting ? */
308 ; h->load = load
309 ; }
310
311
mcxHashFreeScalar(void * scalar cpl__unused)312 void mcxHashFreeScalar
313 ( void* scalar cpl__unused
314 )
315 { /* this triggers freeing of kv.key or kv.val */
316 ; }
317
318
mcxHashFree(mcxHash ** hpp,void freekey (void * key),void freeval (void * key))319 void mcxHashFree
320 ( mcxHash** hpp
321 , void freekey(void* key)
322 , void freeval(void* key)
323 )
324 { mcxHash* h = *hpp
325 ; mcx_bucket* buck = h ? h->buckets : NULL
326 ; dim d = h ? h->n_buckets : 0
327
328 ; if (!h)
329 return
330
331 ; if (freekey || freeval)
332 { while (d-- > 0) /* careful with unsignedness */
333 { hash_link* link = (buck++)->base
334
335 ; while(link)
336 { void* key = link->kv.key
337 ; void* val = link->kv.val
338 ; if (freekey && key)
339 freekey(key)
340 , mcxFree(key)
341 ; if (freeval && val)
342 freeval(val)
343 , mcxFree(val)
344 ; link = link->next
345 ; }
346 }
347 }
348
349 mcxGrimFree(&h->src_link)
350 ; mcxFree(h->buckets)
351 ; mcxFree(h)
352 ; *hpp = NULL
353 ; }
354
355
356 #define MCX_HASH_DOUBLING MCX_HASH_OPT_UNUSED
357
358
359 #if TINGEA_HASH_CACHE
360
mcx_bucket_search(mcxHash * h,void * ob,mcxmode ACTION,u32 * hashval)361 static hash_link* mcx_bucket_search
362 ( mcxHash* h
363 , void* ob
364 , mcxmode ACTION
365 , u32* hashval
366 )
367 { u32 thishash = hashval ? *hashval : (h->hash)(ob)
368 ; mcx_bucket *buck = h->buckets + (thishash & (h->n_buckets-1))
369 ; hash_link* link = buck->base, *prev = NULL, *new
370 ; int delta = 0
371
372 ; while
373 ( link
374 && ( link->hv != thishash
375 || h->cmp(ob, link->kv.key)
376 )
377 )
378 prev = link
379 , link = link->next
380
381 ; if (link && ACTION == MCX_DATUM_DELETE)
382 { if (buck->base == link)
383 buck->base = link->next
384 ; else
385 prev->next = link->next
386 ; delta = -1
387 ; mcxGrimLet(h->src_link, link)
388 /* we return link below though */
389 ; }
390 else if (!link)
391 { if (ACTION == MCX_DATUM_FIND || ACTION == MCX_DATUM_DELETE)
392 link = NULL
393
394 ; else if (ACTION == MCX_DATUM_INSERT)
395 { new = mcxGrimGet(h->src_link) /* fixme could be NULL */
396 ; new->next = NULL
397 ; new->kv.val = NULL
398 ; new->kv.key = ob
399 ; new->hv = thishash
400
401 ; if (!buck->base)
402 buck->base = new
403 /* in TINGEA_HASH_CACHE case we always append */
404 ; else
405 new->next = prev->next
406 , prev->next = new
407 ; delta = 1
408 ; link = new
409 ; }
410 }
411
412 h->n_entries += delta
413 ; return link
414 ; }
415
416 #else
417
mcx_bucket_search(mcxHash * h,void * ob,mcxmode ACTION,u32 * hashval)418 static hash_link* mcx_bucket_search
419 ( mcxHash* h
420 , void* ob
421 , mcxmode ACTION
422 , u32* hashval
423 )
424 { u32 thishash = hashval ? *hashval : (h->hash)(ob)
425 ; mcx_bucket *buck = h->buckets + (thishash & (h->n_buckets-1))
426 ; hash_link* link = buck->base, *prev = NULL, *new
427 ; int c = 1
428 ; int delta = 0
429
430 ; while
431 ( link
432 && (c = h->cmp(ob, link->kv.key)) > 0
433 )
434 prev = link
435 , link = link->next
436
437 ; if (!c && ACTION == MCX_DATUM_DELETE)
438 { if (buck->base == link)
439 buck->base = link->next
440 ; else
441 prev->next = link->next
442 ; delta = -1
443 ; mcxGrimLet(h->src_link, link)
444 /* we return link below though */
445 ; }
446 else if (!link || c < 0)
447 { if (ACTION == MCX_DATUM_FIND || ACTION == MCX_DATUM_DELETE)
448 link = NULL
449
450 ; else if (ACTION == MCX_DATUM_INSERT)
451 { new = mcxGrimGet(h->src_link) /* fixme could be NULL */
452 ; new->next = NULL
453 ; new->kv.val = NULL
454 ; new->kv.key = ob
455
456 ; if (!buck->base)
457 buck->base = new
458 ; else if (link == buck->base)
459 new->next = buck->base
460 , buck->base = new
461 ; else
462 new->next = prev->next
463 , prev->next = new
464 ; delta = 1
465 ; link = new
466 ; }
467 }
468
469 h->n_entries += delta
470 ; return link
471 ; }
472
473 #endif
474
475
476 static mcxstatus mcx_hash_double
477 ( mcxHash* h
478 ) ;
479
480
mcxHashSearchx(void * key,mcxHash * h,mcxmode ACTION,int * delta)481 mcxKV* mcxHashSearchx
482 ( void* key
483 , mcxHash* h
484 , mcxmode ACTION
485 , int* delta
486 )
487 { hash_link *link
488 ; dim n_entries = h->n_entries
489
490 ; if
491 ( h->load * h->n_buckets < h->n_entries
492 && !(h->options & (MCX_HASH_OPT_CONSTANT | MCX_HASH_DOUBLING))
493 && mcx_hash_double(h)
494 )
495 mcxErr("mcxHashSearch", "cannot double hash")
496
497 ; link = mcx_bucket_search(h, key, ACTION, NULL)
498
499 ; if (delta)
500 *delta = h->n_entries < n_entries ? -1 : (int) (h->n_entries - n_entries)
501
502 ; return link ? &link->kv : NULL
503 ; }
504
505
506 enum
507 { ARRAY_OF_KEY
508 , ARRAY_OF_KV
509 } ;
510
511
mcxHashApply(mcxHash * hash,void (* cb)(const void * key,void * val,void * data),void * data)512 void mcxHashApply
513 ( mcxHash* hash
514 , void (*cb)(const void* key, void* val, void* data)
515 , void* data
516 )
517 { mcxHashWalk* walk = mcxHashWalkInit(hash)
518 ; mcxKV* kv
519 ; dim i_bucket
520 ; while ((kv = mcxHashWalkStep(walk, &i_bucket)))
521 cb(kv->key, kv->val, data)
522 ; mcxHashWalkFree(&walk)
523 ; }
524
525
hash_array(mcxHash * hash,dim * n_entries,int (* cmp)(const void *,const void *),mcxbits opts cpl__unused,mcxenum mode)526 static void** hash_array
527 ( mcxHash* hash
528 , dim* n_entries
529 , int (*cmp)(const void*, const void*)
530 , mcxbits opts cpl__unused
531 , mcxenum mode
532 )
533 { void** obs = mcxAlloc(sizeof(void*) * hash->n_entries, RETURN_ON_FAIL)
534 ; dim d = 0
535 ; mcxKV* kv
536 ; const char* me = mode == ARRAY_OF_KEY ? "mcxHashKeys" : "mcxHashKVs"
537
538 ; mcxHashWalk* walk = mcxHashWalkInit(hash)
539
540 ; if (!walk || !obs)
541 return NULL
542
543 ; while ((kv = mcxHashWalkStep(walk, NULL))) /* fixme extract */
544 { if (d >= hash->n_entries)
545 { mcxErr
546 ( me
547 , "PANIC inconsistent state (n_entries %ld)"
548 , (long) hash->n_entries
549 )
550 ; break
551 ; }
552 obs[d] = mode == ARRAY_OF_KEY ? kv->key : kv
553 ; d++
554 ; }
555 if (d != hash->n_entries)
556 mcxErr(me, "PANIC inconsistent state (n_entries %lu)", (ulong) hash->n_entries)
557
558 ; if (cmp)
559 qsort(obs, d, sizeof(void*), cmp)
560 ; mcxHashWalkFree(&walk)
561
562 ; *n_entries = d
563 ; return obs
564 ; }
565
566
567
mcxHashKeys(mcxHash * hash,dim * n_entries,int (* cmp)(const void *,const void *),mcxbits opts)568 void** mcxHashKeys
569 ( mcxHash* hash
570 , dim* n_entries
571 , int (*cmp)(const void*, const void*)
572 , mcxbits opts /* unused yet */
573 )
574 { return hash_array(hash, n_entries, cmp, opts, ARRAY_OF_KEY)
575 ; }
576
577
578
mcxHashKVs(mcxHash * hash,dim * n_entries,int (* cmp)(const void *,const void *),mcxbits opts)579 void** mcxHashKVs
580 ( mcxHash* hash
581 , dim* n_entries
582 , int (*cmp)(const void*, const void*)
583 , mcxbits opts /* unused yet */
584 )
585 { return hash_array(hash, n_entries, cmp, opts, ARRAY_OF_KV)
586 ; }
587
588
589
mcxHashWalkStep(mcxHashWalk * walk,dim * i_bucket)590 mcxKV* mcxHashWalkStep
591 ( mcxHashWalk *walk
592 , dim *i_bucket
593 )
594 { hash_link* step = walk->link
595
596 ; while (!step && ++walk->i_bucket < walk->hash->n_buckets)
597 step = (walk->hash->buckets+walk->i_bucket)->base
598
599 ; if (step)
600 { walk->link = step->next
601 ; if (i_bucket)
602 *i_bucket = walk->i_bucket
603 ; return &step->kv
604 ; }
605 return NULL
606 ; }
607
608
mcxHashWalkInit(mcxHash * h)609 mcxHashWalk* mcxHashWalkInit
610 ( mcxHash *h
611 )
612 { mcxHashWalk* walk = mcxAlloc(sizeof *walk, RETURN_ON_FAIL)
613 ; if (!walk)
614 return NULL
615
616 ; walk->hash = h
617
618 ; if (!h || !h->buckets)
619 { mcxFree(walk)
620 ; return NULL
621 ; }
622
623 walk->i_bucket = 0
624 ; walk->link = (h->buckets+0)->base
625 ; return walk
626 ; }
627
628
mcxHashWalkFree(mcxHashWalk ** walkpp)629 void mcxHashWalkFree
630 ( mcxHashWalk **walkpp
631 )
632 { mcxFree(*walkpp)
633 ; *walkpp = NULL
634 ; }
635
636
mcxHashMerge(mcxHash * h1,mcxHash * h2,mcxHash * hd,void * merge (void * val1,void * val2))637 mcxHash* mcxHashMerge
638 ( mcxHash* h1
639 , mcxHash* h2
640 , mcxHash* hd /* hash destination */
641 , void* merge(void* val1, void* val2)
642 )
643 { mcxHash* ha[2] /* hash array */
644 ; mcxHash* h
645 ; int i
646
647 ; if (!h1 || !h2)
648 mcxDie(1, "mcxHashMerge FATAL", "clone functionality not yet supported")
649
650 /*
651 * fixme/note I am comparing fie pointers here, is that ok?
652 */
653 ; if (h1->hash != h2->hash || h1->cmp != h2->cmp)
654 mcxErr("mcxHashMerge WARNING", "non matching hash or cmp fie")
655
656 ; if (merge)
657 mcxErr("mcxHashMerge WARNING", "merge functionality not yet supported")
658
659 ; hd = hd
660 ? hd
661 : mcxHashNew
662 ( h1->n_entries + h2->n_entries
663 , h1->hash
664 , h1->cmp
665 )
666
667 ; if (!hd)
668 return NULL
669
670 ; ha[0] = h1
671 ; ha[1] = h2
672
673 ; for (i=0;i<2;i++)
674 { h = ha[i]
675 ; if (h != hd)
676 { mcx_bucket* buck
677
678 ; for (buck = h->buckets; buck<h->buckets + h->n_buckets; buck++)
679 { hash_link* this = buck->base
680
681 ; while(this)
682 { mcxKV* kv = mcxHashSearch(this->kv.key, hd, MCX_DATUM_INSERT)
683 ; if (!kv)
684 return NULL
685 /* note/fixme: cannot free hd, don't have key/val free functions */
686 ; if (!kv->val)
687 kv->val = this->kv.val
688 ; this = this->next
689 ; }
690 }
691 }
692 }
693
694 return hd
695 ; }
696
697
mcx_hash_double(mcxHash * h)698 static mcxstatus mcx_hash_double
699 ( mcxHash* h
700 )
701 { mcx_bucket* ole_bucket = h->buckets
702 ; mcx_bucket* ole_buckets = h->buckets
703 ; dim d = h->n_buckets
704 ; dim n_fail = 0
705
706 ; if (h->options & MCX_HASH_DOUBLING) /* called before */
707 { mcxErr("mcx_hash_double PANIC", "double trouble")
708 ; return STATUS_FAIL
709 ; }
710
711 h->options |= MCX_HASH_DOUBLING
712
713 ; if
714 (! ( h->buckets
715 = mcxNAlloc
716 ( 2 * h->n_buckets
717 , sizeof(mcx_bucket)
718 , mcx_bucket_init
719 , RETURN_ON_FAIL
720 ) ) )
721 { h->options ^= MCX_HASH_DOUBLING
722 ; h->buckets = ole_buckets
723 ; return STATUS_FAIL
724 ; }
725
726 h->n_buckets *= 2
727 ; h->n_entries = 0
728
729 ; while(d-- > 0) /* careful with unsignedness */
730 { hash_link* this = ole_bucket->base
731
732 ; while(this)
733 { hash_link* next = this->next, *clone
734 ; void* val = this->kv.val
735 ; void* key = this->kv.key
736
737 ; mcxGrimLet(h->src_link, this) /* will be used immediately */
738 #if TINGEA_HASH_CACHE
739 ; clone = mcx_bucket_search(h, key, MCX_DATUM_INSERT, &this->hv)
740 #else
741 ; clone = mcx_bucket_search(h, key, MCX_DATUM_INSERT, NULL)
742 #endif
743
744 ; if (clone)
745 clone->kv.val = val
746 ; else
747 n_fail++
748
749 ; this = next
750 ; }
751 ole_bucket++
752 ; }
753
754 if (n_fail)
755 mcxErr
756 ( "mcx_hash_double PANIC"
757 , "<%ld> reinsertion failures in hash with <%ld> entries"
758 , (long) n_fail
759 , (long) h->n_entries
760 )
761
762 ; mcxFree(ole_buckets)
763 ; h->options ^= MCX_HASH_DOUBLING
764 ; return STATUS_OK
765 ; }
766
767
768 #define BJmix(a,b,c) \
769 { \
770 a -= b; a -= c; a ^= (c>>13); \
771 b -= c; b -= a; b ^= (a<< 8); \
772 c -= a; c -= b; c ^= (b>>13); \
773 a -= b; a -= c; a ^= (c>>12); \
774 b -= c; b -= a; b ^= (a<<16); \
775 c -= a; c -= b; c ^= (b>> 5); \
776 a -= b; a -= c; a ^= (c>> 3); \
777 b -= c; b -= a; b ^= (a<<10); \
778 c -= a; c -= b; c ^= (b>>15); \
779 }
780
781
782 /*
783 * Thomas Wang says Robert Jenkins says this is a good integer hash function:
784 *unsigned int inthash(unsigned int key)
785 *{
786 * key += (key << 12);
787 * key ^= (key >> 22);
788 * key += (key << 4);
789 * key ^= (key >> 9);
790 * key += (key << 10);
791 * key ^= (key >> 2);
792 * key += (key << 7);
793 * key ^= (key >> 12);
794 * return key;
795 *}
796 */
797
798 /* created by Bob Jenkins */
mcxBJhash(register const void * key,register u32 len)799 u32 mcxBJhash
800 ( register const void* key
801 , register u32 len
802 )
803 { register u32 a, b, c, l
804 ; const char* k = key
805
806 ; l = len
807 ; a = b = 0x9e3779b9u
808 ; c = 0xabcdef01u
809
810 ; while (l >= 12)
811 {
812 a += k[0] + (k[1]<<8) + (k[2]<<16) + (k[3]<<24)
813 ; b += k[4] + (k[5]<<8) + (k[6]<<16) + (k[7]<<24)
814 ; c += k[8] + (k[9]<<8) + (k[10]<<16)+ (k[11]<<24)
815 ; BJmix(a,b,c)
816 ; k += 12
817 ; l -= 12
818 ; }
819
820 c += len
821 ; switch(l) /* all the case statements fall through */
822 {
823 case 11: c+= k[10]<<24
824 ; case 10: c+= k[9]<<16
825 ; case 9 : c+= k[8]<<8
826 /* the first byte of c is reserved for the length */
827 ; case 8 : b+= k[7]<<24
828 ; case 7 : b+= k[6]<<16
829 ; case 6 : b+= k[5]<<8
830 ; case 5 : b+= k[4]
831 ; case 4 : a+= k[3]<<24
832 ; case 3 : a+= k[2]<<16
833 ; case 2 : a+= k[1]<<8
834 ; case 1 : a+= k[0]
835 /* case 0: nothing left to add */
836 ; }
837
838 BJmix(a,b,c)
839 ; return c
840 ; }
841
842
843 /* created by Chris Torek */
mcxCThash(const void * key,u32 len)844 u32 mcxCThash
845 ( const void *key
846 , u32 len
847 )
848 #define ctHASH4a h = (h << 5) - h + *k++;
849 #define ctHASH4b h = (h << 5) + h + *k++;
850 #define ctHASH4 ctHASH4b
851
852 { u32 h = 0
853 ; const unsigned char *k = key
854
855 ; if (len > 0)
856 { unsigned loop = (len + 8 - 1) >> 3 /* loop >= 1 */
857 ; switch (len & (8 - 1))
858 {
859 case 0:
860 do
861 { /* All fall through */
862 ctHASH4
863 case 7: ctHASH4
864 case 6: ctHASH4
865 case 5: ctHASH4
866 case 4: ctHASH4
867 case 3: ctHASH4
868 case 2: ctHASH4
869 case 1: ctHASH4
870 }
871 while (--loop) /* unsignedcmpok */
872 ;
873 }
874 }
875 ; return h
876 ; }
877
878
879 /* All 3 hash fies below play on a similar theme. Interesting: as long as only
880 * << >> and ^ are used, a hash function does a partial homogeneous fill of all
881 * 2^k different strings of length k built out of two distinct characters --
882 * not all buckets need be used. E.g. for k=15, such a hash function might fill
883 * 2^13 buckets with 4 entries each, or it might fill 2^10 buckets with 32
884 * entries each. This was observed, not proven.
885 */
886
mcxSvDhash(const void * key,u32 len)887 u32 mcxSvDhash
888 ( const void *key
889 , u32 len
890 )
891 { u32 h = 0x7cabd53e /* 0x7cabd53e */
892 ; const char* k = key
893
894 ; h = 0x0180244a
895
896 ; while (len--)
897 { u32 g = *k
898 ; u32 gc = 0xff ^ g
899 ; u32 hc = 0xffffffffu
900
901 ; hc ^= h
902 ; h = ( (h << 2) + h + (h >> 3))
903 ^ ( (g << 25) + (gc << 18) + (g << 11) + (g << 5) + g )
904 ; k++
905 ; }
906
907 ; return h
908 ; }
909
910
911 /* created by me */
mcxSvD2hash(const void * key,u32 len)912 u32 mcxSvD2hash
913 ( const void *key
914 , u32 len
915 )
916 { u32 h = 0x7cabd53e /* 0x7cabd53e */
917 ; const char* k = key
918
919 ; while (len--)
920 { u32 g = *k
921 ; u32 gc = 0xff ^ g
922
923 ; h = ( (h << 3) ^ h ^ (h >> 5) )
924 ^ ( (g << 25) ^ (gc << 18) ^ (g << 11) ^ (gc << 5) ^ g )
925 ; k++
926 ; }
927
928 ; return h
929 ; }
930
931
932 /* created by me */
mcxSvD1hash(const void * key,u32 len)933 u32 mcxSvD1hash
934 ( const void *key
935 , u32 len
936 )
937 { u32 h = 0xeca96537u
938 ; const char* k = key
939
940 ; while (len--)
941 { u32 g = *k
942 ; h = ( (h << 3) ^ h ^ (h >> 5) )
943 ^ ( (g << 21) ^ (g << 12) ^ (g << 5) ^ g )
944 ; k++
945 ; }
946
947 ; return h
948 ; }
949
950
951 /* created by Daniel Phillips */
mcxDPhash(const void * key,u32 len)952 u32 mcxDPhash
953 ( const void *key
954 , u32 len
955 )
956 { u32 h0 = 0x12a3fe2du
957 , h1 = 0x37abe8f9u
958 ; const char* k = key
959
960 ; while (len--)
961 {
962 u32 h = h1 + (h0 ^ (*k++ * 71523))
963 ; h1 = h0
964 ; h0 = h
965 ; }
966 return h0
967 ; }
968
969
970 /* "GNU Emacs" hash (from m4) */
mcxGEhash(const void * key,u32 len)971 u32 mcxGEhash
972 ( const void* key
973 , u32 len
974 )
975 { const char* k = key
976 ; u32 hash = 0
977 ; int t
978 ; while (len--)
979 { if ((t = *k++) >= 0140)
980 t -= 40
981 ; hash = ((hash << 3) + (hash >> 28) + t)
982 ; }
983 return hash
984 ; }
985
986
987 /* Fowler Noll Vo hash */
mcxFNVhash(const void * buf,u32 len)988 u32 mcxFNVhash
989 ( const void *buf
990 , u32 len
991 )
992 { u32 hval = 0x811c9dc5
993 ; const char *bp = buf
994
995 ; while (len--)
996 {
997 #if 0 /* other branch supposedly optimizes gcc */
998 hval *= 0x01000193
999 #else
1000 hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24)
1001 #endif
1002 ; hval ^= *bp++;
1003 ; }
1004 return hval
1005 ; }
1006
1007
1008
1009 /* Berkely Database hash */
mcxBDBhash(const void * key,u32 len)1010 u32 mcxBDBhash
1011 ( const void *key
1012 , u32 len
1013 )
1014 { const char* k = key
1015 ; u32 hash = 0
1016
1017 ; while (len--)
1018 { hash = *k++ + (hash << 6) + (hash << 16) - hash
1019 ; }
1020 return hash
1021 ; }
1022
1023
1024 /* One at a time hash, Bob Jenkins/Colin Plumb */
mcxOAThash(const void * key,u32 len)1025 u32 mcxOAThash
1026 ( const void *key
1027 , u32 len
1028 )
1029 { const char* k = key
1030 ; u32 hash = 0
1031
1032 ; while (len--)
1033 { hash += *k++
1034 ; hash += (hash << 10)
1035 ; hash ^= (hash >> 6)
1036 ; }
1037
1038 hash += (hash << 3);
1039 ; hash ^= (hash >> 11);
1040 ; hash += (hash << 15);
1041 ; return hash
1042 ; }
1043
1044
1045 /* by Dan Bernstein */
mcxDJBhash(const void * key,u32 len)1046 u32 mcxDJBhash
1047 ( const void *key
1048 , u32 len
1049 )
1050 { const char* k = key
1051 ; u32 hash = 5381
1052 ; while (len--)
1053 { hash = *k++ + (hash << 5) + hash
1054 ; }
1055 return hash
1056 ; }
1057
1058
1059 /* UNIX ELF hash */
mcxELFhash(const void * key,u32 len)1060 u32 mcxELFhash
1061 ( const void *key
1062 , u32 len
1063 )
1064 { const char* k = key
1065 ; u32 hash = 0
1066 ; u32 g
1067
1068 ; while (len--)
1069 { hash = *k++ + (hash << 4)
1070 ; if ((g = (hash & 0xF0000000u)))
1071 hash ^= g >> 24
1072
1073 ; hash &= ~g
1074 ; }
1075 return hash
1076 ; }
1077
1078
1079
mcxStrHash(const void * s)1080 u32 mcxStrHash
1081 ( const void* s
1082 )
1083 { dim l = strlen(s)
1084 ; return(mcxDPhash(s, l))
1085 ; }
1086
1087
mcxStrCmp(const void * a,const void * b)1088 int mcxStrCmp
1089 ( const void* a
1090 , const void* b
1091 )
1092 { return strcmp(a, b)
1093 ; }
1094
1095
1096
bitprint(u32 key,FILE * fp)1097 void bitprint
1098 ( u32 key
1099 , FILE* fp
1100 )
1101 { do
1102 { fputc(key & 1 ? '1' : '0', fp)
1103 ; } while
1104 ((key = key >> 1))
1105 ; }
1106
1107
bitcount(u32 key)1108 int bitcount
1109 ( u32 key
1110 )
1111 { int ct = 0
1112 ; do
1113 { if (key & 1)
1114 ct++
1115 ; }
1116 while ((key = key >> 1))
1117 ; return ct
1118 ; }
1119
1120
1121
1122 #if 0
1123 /* The old legacy hash */
1124 static __u32 dx_hack_hash (const char *name, int len)
1125 {
1126 __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
1127 while (len--) {
1128 __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
1129
1130 if (hash & 0x80000000) hash -= 0x7fffffff;
1131 hash1 = hash0;
1132 hash0 = hash;
1133 }
1134 return (hash0 << 1);
1135 }
1136 #endif
1137
1138