1 /*   (C) Copyright 2001, 2002, 2003, 2004, 2005 Stijn van Dongen
2  *   (C) Copyright 2006, 2007, 2008, 2009, 2010 Stijn van Dongen
3  *
4  * This file is part of tingea.  You can redistribute and/or modify tingea
5  * under the terms of the GNU General Public License; either version 3 of the
6  * License or (at your option) any later version.  You should have received a
7  * copy of the GPL along with tingea, in the file COPYING.
8 */
9 
10 
11 /* TODO (?)
12  *
13  * integer overflow handling.
14  *
15  * better bucket fill statistics.
16  *
17  * nested hashes.
18  *    could lump
19  *       options  cmp  hash  src_link
20  *    into a structure and make hashes share them.
21 */
22 
23 #include <math.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28 
29 #include "hash.h"
30 #include "minmax.h"
31 #include "types.h"
32 #include "inttypes.h"
33 #include "ting.h"
34 #include "err.h"
35 #include "alloc.h"
36 #include "gralloc.h"
37 #include "compile.h"
38 #include "list.h"
39 
40 
41             /* the distribution of bit counts over all 32-bit keys */
42 int promilles[32] =
43 {  0,  0,  0,  0,  0,  0,  0,  0
44 ,  2,  7, 15, 30, 53, 81,110,131
45 ,140,131,110, 81, 53, 30, 15,  7
46 ,  2,  0,  0,  0,  0,  0,  0,  0
47 }  ;
48 
49 
50 #ifndef TINGEA_HASH_CACHE
51 #  define   TINGEA_HASH_CACHE 0
52 #endif
53 
54 
55 typedef struct hash_link
56 {  struct hash_link* next
57 ;  mcxKV             kv
58 #if TINGEA_HASH_CACHE
59 ;  u32               hv
60 #endif
61 ;
62 }  hash_link         ;
63 
64 
65 typedef struct bucket
66 {  hash_link*        base
67 ;
68 }  mcx_bucket        ;
69 
70 
71                /* For further optimization work, options
72                 *    options
73                 *    load
74                 *    cmp
75                 *    hash
76                 *    src_link
77                 * can be shared between different hashes (e.g.
78                 * with multidimensional hashes). Consider
79                 * making src_link file static variable.
80                */
81 
82 struct mcxHash
83 {  dim         n_buckets      /* 2^n_bits          */
84 ;  mcx_bucket *buckets
85 ;  dim         n_entries
86 
87 ;  mcxbits     options
88 
89 ;  int         (*cmp) (const void *a, const void *b)
90 ;  u32         (*hash) (const void *a)
91 
92 ;  mcxGrim*    src_link
93 
94 ;  float       load
95 ;
96 }  ;
97 
98 
99 struct mcxHashWalk
100 {  mcxHash*    hash
101 ;  dim         i_bucket
102 ;  hash_link*  link
103 ;
104 }  ;
105 
106 
mcx_bucket_init(void * buck)107 void* mcx_bucket_init
108 (  void*  buck
109 )
110    {  ((mcx_bucket*) buck)->base = NULL
111    ;  return NULL
112 ;  }
113 
114 void bitprint
115 (  u32   key
116 ,  FILE* fp
117 )  ;
118 
119 int bitcount
120 (  u32   key
121 )  ;
122 
123 
mcxHashNew(dim n_buckets,u32 (* hash)(const void * a),int (* cmp)(const void * a,const void * b))124 mcxHash* mcxHashNew
125 (  dim         n_buckets
126 ,  u32         (*hash)(const void *a)
127 ,  int         (*cmp) (const void *a, const void *b)
128 )
129    {  mcxHash  *h
130    ;  mcxbool ok  = FALSE
131 
132    ;  u8 n_bits   =  0
133 
134    ;  if (!n_buckets)
135       {  mcxErr("mcxHashNew strange", "void alloc request")
136       ;  n_buckets = 2
137    ;  }
138 
139       if (!(h = mcxAlloc(sizeof(mcxHash), RETURN_ON_FAIL)))
140       return NULL
141 
142    ;  while(n_buckets)
143       {  n_buckets >>=  1
144       ;  n_bits++
145    ;  }
146 
147       h->load           =  0.5
148    ;  h->n_entries      =  0
149    ;  h->n_buckets      =  n_buckets = (1 << n_bits)
150    ;  h->cmp            =  cmp
151    ;  h->hash           =  hash
152    ;  h->options        =  MCX_HASH_OPT_DEFAULTS
153 
154    ;  h->src_link       =  NULL
155 
156    ;  while (1)                        /* fixme 2nd arg below, have better choice? */
157       {  h->src_link = mcxGrimNew(sizeof(hash_link), h->n_buckets, MCX_GRIM_ARITHMETIC)
158       ;  if (!h->src_link)
159          break
160 
161       ;  if
162          (! (  h->buckets
163             =  mcxNAlloc
164                (  h->n_buckets
165                ,  sizeof(mcx_bucket)
166                ,  mcx_bucket_init
167                ,  RETURN_ON_FAIL
168          )  )  )
169          break
170 
171       ;  ok = TRUE
172       ;  break
173    ;  }
174 
175       if (!ok)
176       {  mcxGrimFree(&(h->src_link))
177       ;  mcxFree(h)
178       ;  return NULL
179    ;  }
180 
181       return h
182 ;  }
183 
184 
mcxHashMemSize(mcxHash * hash)185 dim mcxHashMemSize
186 (  mcxHash*    hash
187 )
188    {  return
189          mcxGrimMemSize(hash->src_link)
190       +  sizeof(mcx_bucket) * hash->n_buckets
191 ;  }
192 
193 
mcxHashGetSettings(mcxHash * hash,mcxHashSettings * settings)194 void mcxHashGetSettings
195 (  mcxHash*          hash
196 ,  mcxHashSettings*  settings
197 )
198    {  settings->n_buckets  =  hash->n_buckets
199    ;  settings->load       =  hash->load
200    ;  settings->n_entries  =  hash->n_entries
201    ;  settings->options    =  hash->options
202 ;  }
203 
204 
hash_link_size(hash_link * link)205 static dim hash_link_size
206 (  hash_link* link
207 )
208    {  dim s =  0
209    ;  while(link)
210          link = link->next
211       ,  s++
212    ;  return(s)
213 ;  }
214 
215 
mcxHashStats(FILE * fp,mcxHash * h)216 void mcxHashStats
217 (  FILE*       fp
218 ,  mcxHash*    h
219 )
220    {  dim      buckets  =  h->n_buckets
221    ;  dim      buckets_used   =  0
222    ;  float    ctr      =  0.0
223    ;  float    cb       =  0.0
224    ;  dim      max      =  0
225    ;  dim      entries  =  0
226    ;  const    char* me =  "mcxHashStats"
227 
228    ;  int      j, k, distr[32]
229    ;  mcx_bucket  *buck
230 
231    ;  for (j=0;j<32;j++)
232       distr[j] = 0
233 
234    ;  for (buck=h->buckets; buck<h->buckets + h->n_buckets; buck++)
235       {  dim   d        =  hash_link_size(buck->base)
236       ;  hash_link* this=  buck->base
237 
238       ;  if (d)
239          {  buckets_used++
240          ;  entries    +=  d
241          ;  ctr        +=  (float) d * d
242          ;  cb         +=  (float) d * d * d
243          ;  max         =  MCX_MAX(max, d)
244       ;  }
245 
246          while(this)
247          {  u32   u     =  (h->hash)(this->kv.key)
248          ;  int   ct    =  bitcount(u)
249          ;  this        =  this->next
250          ;  distr[ct]++
251 ;if (0) fprintf(stderr, "bucket [%d] key [%s]\n", (int)d,  ((mcxTing*) this->kv.key)->str)
252       ;  }
253       }
254 
255       ctr = ctr / MCX_MAX(1, entries)
256    ;  cb  = sqrt(cb  / MCX_MAX(1, entries))
257 
258    ;  if (buckets && buckets_used)
259          mcxTellf
260          (  fp
261          ,  me
262          ,  "%4.2f bucket usage (%ld available, %ld used, %ld entries)"
263          ,  (double) ((double) buckets_used) / buckets
264          ,  (long) buckets
265          ,  (long) buckets_used
266          ,  (long) entries
267          )
268       ,  mcxTellf
269          (  fp
270          ,  me
271          ,  "bucket average: %.2f, center: %.2f, cube: %.2f, max: %ld"
272          ,  (double) entries / ((double) buckets_used)
273          ,  (double) ctr
274          ,  (double) cb
275          ,  (long) max
276          )
277 
278    ;  mcxTellf(fp, me, "bit distribution (promilles):")
279    ;  fprintf
280       (  fp
281       ,  "  %-37s   %s\n"
282       ,  "Current bit distribution"
283       ,  "Ideally random distribution"
284       )
285    ;  for (k=0;k<4;k++)
286       {  for (j=k*8;j<(k+1)*8;j++)
287          fprintf(fp, "%3.0f ",  entries ? (1000 * (float)distr[j]) / entries : 0.0)
288       ;  fprintf(fp, "        ");
289       ;  for (j=k*8;j<(k+1)*8;j++)
290          fprintf(fp, "%3d ",  promilles[j])
291       ;  fprintf(fp, "\n")
292    ;  }
293       mcxTellf(fp, me, "link count: %ld", (long) (mcxGrimCount(h->src_link)))
294    ;  mcxTellf(fp, me, "link mem count: %ld", (long) (mcxGrimMemSize(h->src_link)))
295 
296    ;  mcxTellf(fp, me, "done")
297 ;  }
298 
299 
mcxHashSetOpts(mcxHash * h,double load,int options)300 void mcxHashSetOpts
301 (  mcxHash*    h
302 ,  double      load
303 ,  int         options
304 )
305    {  if (options >= 0)
306       h->options |= options
307   /* fixme; are there states in which either of these can be corrupting ? */
308    ;  h->load  =  load
309 ;  }
310 
311 
mcxHashFreeScalar(void * scalar cpl__unused)312 void mcxHashFreeScalar
313 (  void* scalar cpl__unused
314 )
315    {  /* this triggers freeing of kv.key or kv.val */
316 ;  }
317 
318 
mcxHashFree(mcxHash ** hpp,void freekey (void * key),void freeval (void * key))319 void mcxHashFree
320 (  mcxHash**   hpp
321 ,  void        freekey(void* key)
322 ,  void        freeval(void* key)
323 )
324    {  mcxHash* h        =  *hpp
325    ;  mcx_bucket* buck  =  h ? h->buckets   : NULL
326    ;  dim d             =  h ? h->n_buckets : 0
327 
328    ;  if (!h)
329       return
330 
331    ;  if (freekey || freeval)
332       {  while (d-- > 0)      /* careful with unsignedness */
333          {  hash_link* link = (buck++)->base
334 
335          ;  while(link)
336             {  void* key = link->kv.key
337             ;  void* val = link->kv.val
338             ;  if (freekey && key)
339                   freekey(key)
340                ,  mcxFree(key)
341             ;  if (freeval && val)
342                   freeval(val)
343                ,  mcxFree(val)
344             ;  link = link->next
345          ;  }
346          }
347       }
348 
349       mcxGrimFree(&h->src_link)
350    ;  mcxFree(h->buckets)
351    ;  mcxFree(h)
352    ;  *hpp = NULL
353 ;  }
354 
355 
356 #define MCX_HASH_DOUBLING MCX_HASH_OPT_UNUSED
357 
358 
359 #if TINGEA_HASH_CACHE
360 
mcx_bucket_search(mcxHash * h,void * ob,mcxmode ACTION,u32 * hashval)361 static hash_link* mcx_bucket_search
362 (  mcxHash*          h
363 ,  void*             ob
364 ,  mcxmode           ACTION
365 ,  u32*              hashval
366 )
367    {  u32   thishash    =  hashval ? *hashval : (h->hash)(ob)
368    ;  mcx_bucket *buck  =  h->buckets + (thishash & (h->n_buckets-1))
369    ;  hash_link* link   =  buck->base, *prev = NULL, *new
370    ;  int delta         =  0
371 
372    ;  while
373       (   link
374       &&  (  link->hv != thishash
375           || h->cmp(ob, link->kv.key)
376           )
377       )
378          prev = link
379       ,  link = link->next
380 
381    ;  if (link && ACTION == MCX_DATUM_DELETE)
382       {  if (buck->base == link)
383          buck->base = link->next
384       ;  else
385          prev->next = link->next
386       ;  delta = -1
387       ;  mcxGrimLet(h->src_link, link)
388                /* we return link below though */
389    ;  }
390       else if (!link)
391       {  if (ACTION == MCX_DATUM_FIND || ACTION == MCX_DATUM_DELETE)
392          link = NULL
393 
394       ;  else if (ACTION == MCX_DATUM_INSERT)
395          {  new = mcxGrimGet(h->src_link)            /* fixme could be NULL */
396          ;  new->next   = NULL
397          ;  new->kv.val = NULL
398          ;  new->kv.key = ob
399          ;  new->hv     =  thishash
400 
401          ;  if (!buck->base)
402             buck->base = new
403                               /* in TINGEA_HASH_CACHE case we always append */
404          ;  else
405                new->next  = prev->next
406             ,  prev->next = new
407          ;  delta = 1
408          ;  link = new
409       ;  }
410       }
411 
412       h->n_entries += delta
413    ;  return link
414 ;  }
415 
416 #else
417 
mcx_bucket_search(mcxHash * h,void * ob,mcxmode ACTION,u32 * hashval)418 static hash_link* mcx_bucket_search
419 (  mcxHash*          h
420 ,  void*             ob
421 ,  mcxmode           ACTION
422 ,  u32*              hashval
423 )
424    {  u32   thishash    =  hashval ? *hashval : (h->hash)(ob)
425    ;  mcx_bucket *buck  =  h->buckets + (thishash & (h->n_buckets-1))
426    ;  hash_link* link   =  buck->base, *prev = NULL, *new
427    ;  int c             =  1
428    ;  int delta         =  0
429 
430    ;  while
431       (   link
432       &&  (c = h->cmp(ob, link->kv.key)) > 0
433       )
434          prev = link
435       ,  link = link->next
436 
437    ;  if (!c && ACTION == MCX_DATUM_DELETE)
438       {  if (buck->base == link)
439          buck->base = link->next
440       ;  else
441          prev->next = link->next
442       ;  delta = -1
443       ;  mcxGrimLet(h->src_link, link)
444                /* we return link below though */
445    ;  }
446       else if (!link || c < 0)
447       {  if (ACTION == MCX_DATUM_FIND || ACTION == MCX_DATUM_DELETE)
448          link = NULL
449 
450       ;  else if (ACTION == MCX_DATUM_INSERT)
451          {  new = mcxGrimGet(h->src_link)          /* fixme could be NULL */
452          ;  new->next   = NULL
453          ;  new->kv.val = NULL
454          ;  new->kv.key = ob
455 
456          ;  if (!buck->base)
457             buck->base = new
458          ;  else if (link == buck->base)
459                new->next = buck->base
460             ,  buck->base = new
461          ;  else
462                new->next  = prev->next
463             ,  prev->next = new
464          ;  delta = 1
465          ;  link = new
466       ;  }
467       }
468 
469       h->n_entries += delta
470    ;  return link
471 ;  }
472 
473 #endif
474 
475 
476 static mcxstatus mcx_hash_double
477 (  mcxHash* h
478 )  ;
479 
480 
mcxHashSearchx(void * key,mcxHash * h,mcxmode ACTION,int * delta)481 mcxKV* mcxHashSearchx
482 (  void*       key
483 ,  mcxHash*    h
484 ,  mcxmode     ACTION
485 ,  int*        delta
486 )
487    {  hash_link *link
488    ;  dim n_entries = h->n_entries
489 
490    ;  if
491       (  h->load * h->n_buckets < h->n_entries
492       && !(h->options & (MCX_HASH_OPT_CONSTANT | MCX_HASH_DOUBLING))
493       && mcx_hash_double(h)
494       )
495       mcxErr("mcxHashSearch", "cannot double hash")
496 
497    ;  link = mcx_bucket_search(h, key, ACTION, NULL)
498 
499    ;  if (delta)
500       *delta = h->n_entries < n_entries ? -1 : (int) (h->n_entries - n_entries)
501 
502    ;  return link ? &link->kv : NULL
503 ;  }
504 
505 
506 enum
507 {  ARRAY_OF_KEY
508 ,  ARRAY_OF_KV
509 }  ;
510 
511 
mcxHashApply(mcxHash * hash,void (* cb)(const void * key,void * val,void * data),void * data)512 void mcxHashApply
513 (  mcxHash* hash
514 ,  void    (*cb)(const void* key, void* val, void* data)
515 ,  void*    data
516 )
517    {  mcxHashWalk* walk = mcxHashWalkInit(hash)
518    ;  mcxKV* kv
519    ;  dim i_bucket
520    ;  while ((kv = mcxHashWalkStep(walk, &i_bucket)))
521       cb(kv->key, kv->val, data)
522    ;  mcxHashWalkFree(&walk)
523 ;  }
524 
525 
hash_array(mcxHash * hash,dim * n_entries,int (* cmp)(const void *,const void *),mcxbits opts cpl__unused,mcxenum mode)526 static void** hash_array
527 (  mcxHash*    hash
528 ,  dim*        n_entries
529 ,  int       (*cmp)(const void*, const void*)
530 ,  mcxbits     opts     cpl__unused
531 ,  mcxenum     mode
532 )
533    {  void** obs   =  mcxAlloc(sizeof(void*) * hash->n_entries, RETURN_ON_FAIL)
534    ;  dim d = 0
535    ;  mcxKV* kv
536    ;  const char* me = mode == ARRAY_OF_KEY ? "mcxHashKeys" : "mcxHashKVs"
537 
538    ;  mcxHashWalk* walk = mcxHashWalkInit(hash)
539 
540    ;  if (!walk || !obs)
541       return NULL
542 
543    ;  while ((kv = mcxHashWalkStep(walk, NULL)))   /* fixme extract */
544       {  if (d >= hash->n_entries)
545          {  mcxErr
546             (  me
547             ,  "PANIC inconsistent state (n_entries %ld)"
548             ,  (long) hash->n_entries
549             )
550          ;  break
551       ;  }
552          obs[d] = mode == ARRAY_OF_KEY ? kv->key : kv
553       ;  d++
554    ;  }
555       if (d != hash->n_entries)
556       mcxErr(me, "PANIC inconsistent state (n_entries %lu)", (ulong) hash->n_entries)
557 
558    ;  if (cmp)
559       qsort(obs, d, sizeof(void*), cmp)
560    ;  mcxHashWalkFree(&walk)
561 
562    ;  *n_entries = d
563    ;  return obs
564 ;  }
565 
566 
567 
mcxHashKeys(mcxHash * hash,dim * n_entries,int (* cmp)(const void *,const void *),mcxbits opts)568 void** mcxHashKeys
569 (  mcxHash*    hash
570 ,  dim*        n_entries
571 ,  int       (*cmp)(const void*, const void*)
572 ,  mcxbits     opts                    /* unused yet */
573 )
574    {  return hash_array(hash, n_entries, cmp, opts, ARRAY_OF_KEY)
575 ;  }
576 
577 
578 
mcxHashKVs(mcxHash * hash,dim * n_entries,int (* cmp)(const void *,const void *),mcxbits opts)579 void** mcxHashKVs
580 (  mcxHash*    hash
581 ,  dim*        n_entries
582 ,  int       (*cmp)(const void*, const void*)
583 ,  mcxbits     opts                    /* unused yet */
584 )
585    {  return hash_array(hash, n_entries, cmp, opts, ARRAY_OF_KV)
586 ;  }
587 
588 
589 
mcxHashWalkStep(mcxHashWalk * walk,dim * i_bucket)590 mcxKV* mcxHashWalkStep
591 (  mcxHashWalk  *walk
592 ,  dim          *i_bucket
593 )
594    {  hash_link* step  =  walk->link
595 
596    ;  while (!step && ++walk->i_bucket < walk->hash->n_buckets)
597       step = (walk->hash->buckets+walk->i_bucket)->base
598 
599    ;  if (step)
600       {  walk->link = step->next
601       ;  if (i_bucket)
602          *i_bucket = walk->i_bucket
603       ;  return &step->kv
604    ;  }
605       return NULL
606 ;  }
607 
608 
mcxHashWalkInit(mcxHash * h)609 mcxHashWalk* mcxHashWalkInit
610 (  mcxHash  *h
611 )
612    {  mcxHashWalk* walk = mcxAlloc(sizeof *walk, RETURN_ON_FAIL)
613    ;  if (!walk)
614       return NULL
615 
616    ;  walk->hash =  h
617 
618    ;  if (!h || !h->buckets)
619       {  mcxFree(walk)
620       ;  return NULL
621    ;  }
622 
623       walk->i_bucket =  0
624    ;  walk->link     =  (h->buckets+0)->base
625    ;  return walk
626 ;  }
627 
628 
mcxHashWalkFree(mcxHashWalk ** walkpp)629 void mcxHashWalkFree
630 (  mcxHashWalk  **walkpp
631 )
632    {  mcxFree(*walkpp)
633    ;  *walkpp =  NULL
634 ;  }
635 
636 
mcxHashMerge(mcxHash * h1,mcxHash * h2,mcxHash * hd,void * merge (void * val1,void * val2))637 mcxHash* mcxHashMerge
638 (  mcxHash*    h1
639 ,  mcxHash*    h2
640 ,  mcxHash*    hd    /* hash destination */
641 ,  void*       merge(void* val1, void* val2)
642 )
643    {  mcxHash* ha[2] /* hash array */
644    ;  mcxHash* h
645    ;  int i
646 
647    ;  if (!h1 || !h2)
648       mcxDie(1, "mcxHashMerge FATAL", "clone functionality not yet supported")
649 
650       /*
651        * fixme/note I am comparing fie pointers here, is that ok?
652       */
653    ;  if (h1->hash != h2->hash || h1->cmp != h2->cmp)
654       mcxErr("mcxHashMerge WARNING", "non matching hash or cmp fie")
655 
656    ;  if (merge)
657       mcxErr("mcxHashMerge WARNING", "merge functionality not yet supported")
658 
659    ;  hd =  hd
660             ?  hd
661             :  mcxHashNew
662                (  h1->n_entries + h2->n_entries
663                ,  h1->hash
664                ,  h1->cmp
665                )
666 
667    ;  if (!hd)
668       return NULL
669 
670    ;  ha[0] = h1
671    ;  ha[1] = h2
672 
673    ;  for (i=0;i<2;i++)
674       {  h = ha[i]
675       ;  if (h != hd)
676          {  mcx_bucket* buck
677 
678          ;  for (buck = h->buckets; buck<h->buckets + h->n_buckets; buck++)
679             {  hash_link* this =  buck->base
680 
681             ;  while(this)
682                {  mcxKV* kv = mcxHashSearch(this->kv.key, hd, MCX_DATUM_INSERT)
683                ;  if (!kv)
684                   return NULL
685 /*  note/fixme: cannot free hd, don't have key/val free functions */
686                ;  if (!kv->val)
687                   kv->val = this->kv.val
688                ;  this = this->next
689             ;  }
690             }
691          }
692       }
693 
694       return hd
695 ;  }
696 
697 
mcx_hash_double(mcxHash * h)698 static mcxstatus mcx_hash_double
699 (  mcxHash* h
700 )
701    {  mcx_bucket* ole_bucket  =  h->buckets
702    ;  mcx_bucket* ole_buckets =  h->buckets
703    ;  dim d                   =  h->n_buckets
704    ;  dim n_fail              =  0
705 
706    ;  if (h->options & MCX_HASH_DOUBLING)     /* called before */
707       {  mcxErr("mcx_hash_double PANIC", "double trouble")
708       ;  return STATUS_FAIL
709    ;  }
710 
711       h->options |= MCX_HASH_DOUBLING
712 
713    ;  if
714       (! (  h->buckets
715          =  mcxNAlloc
716             (  2 * h->n_buckets
717             ,  sizeof(mcx_bucket)
718             ,  mcx_bucket_init
719             ,  RETURN_ON_FAIL
720       )  )  )
721       {  h->options ^= MCX_HASH_DOUBLING
722       ;  h->buckets = ole_buckets
723       ;  return STATUS_FAIL
724    ;  }
725 
726       h->n_buckets  *=  2
727    ;  h->n_entries   =  0
728 
729    ;  while(d-- > 0)    /* careful with unsignedness */
730       {  hash_link* this   =  ole_bucket->base
731 
732       ;  while(this)
733          {  hash_link* next = this->next, *clone
734          ;  void* val   =  this->kv.val
735          ;  void* key   =  this->kv.key
736 
737          ;  mcxGrimLet(h->src_link, this)  /* will be used immediately */
738 #if TINGEA_HASH_CACHE
739          ;  clone = mcx_bucket_search(h, key, MCX_DATUM_INSERT, &this->hv)
740 #else
741          ;  clone = mcx_bucket_search(h, key, MCX_DATUM_INSERT, NULL)
742 #endif
743 
744          ;  if (clone)
745             clone->kv.val = val
746          ;  else
747             n_fail++
748 
749          ;  this = next
750       ;  }
751          ole_bucket++
752    ;  }
753 
754       if (n_fail)
755       mcxErr
756       (  "mcx_hash_double PANIC"
757       ,  "<%ld> reinsertion failures in hash with <%ld> entries"
758       ,  (long) n_fail
759       ,  (long) h->n_entries
760       )
761 
762    ;  mcxFree(ole_buckets)
763    ;  h->options ^= MCX_HASH_DOUBLING
764    ;  return STATUS_OK
765 ;  }
766 
767 
768 #define BJmix(a,b,c)             \
769 {                                \
770   a -= b; a -= c; a ^= (c>>13);  \
771   b -= c; b -= a; b ^= (a<< 8);  \
772   c -= a; c -= b; c ^= (b>>13);  \
773   a -= b; a -= c; a ^= (c>>12);  \
774   b -= c; b -= a; b ^= (a<<16);  \
775   c -= a; c -= b; c ^= (b>> 5);  \
776   a -= b; a -= c; a ^= (c>> 3);  \
777   b -= c; b -= a; b ^= (a<<10);  \
778   c -= a; c -= b; c ^= (b>>15);  \
779 }
780 
781 
782 /*
783  * Thomas Wang says Robert Jenkins says this is a good integer hash function:
784  *unsigned int inthash(unsigned int key)
785  *{
786  *   key += (key << 12);
787  *   key ^= (key >> 22);
788  *   key += (key << 4);
789  *   key ^= (key >> 9);
790  *   key += (key << 10);
791  *   key ^= (key >> 2);
792  *   key += (key << 7);
793  *   key ^= (key >> 12);
794  *   return key;
795  *}
796 */
797 
798                         /* created by Bob Jenkins */
mcxBJhash(register const void * key,register u32 len)799 u32 mcxBJhash
800 (  register const void*    key
801 ,  register u32            len
802 )
803    {  register u32      a, b, c, l
804    ;  const char* k =  key
805 
806    ;  l           =  len
807    ;  a = b       =  0x9e3779b9u
808    ;  c           =  0xabcdef01u
809 
810    ;  while (l >= 12)
811       {
812          a += k[0] + (k[1]<<8) + (k[2]<<16) + (k[3]<<24)
813       ;  b += k[4] + (k[5]<<8) + (k[6]<<16) + (k[7]<<24)
814       ;  c += k[8] + (k[9]<<8) + (k[10]<<16)+ (k[11]<<24)
815       ;  BJmix(a,b,c)
816       ;  k += 12
817       ;  l -= 12
818    ;  }
819 
820       c += len
821    ;  switch(l)         /* all the case statements fall through */
822       {
823          case 11: c+= k[10]<<24
824       ;  case 10: c+= k[9]<<16
825       ;  case 9 : c+= k[8]<<8
826                         /* the first byte of c is reserved for the length */
827       ;  case 8 : b+= k[7]<<24
828       ;  case 7 : b+= k[6]<<16
829       ;  case 6 : b+= k[5]<<8
830       ;  case 5 : b+= k[4]
831       ;  case 4 : a+= k[3]<<24
832       ;  case 3 : a+= k[2]<<16
833       ;  case 2 : a+= k[1]<<8
834       ;  case 1 : a+= k[0]
835                         /* case 0: nothing left to add */
836    ;  }
837 
838       BJmix(a,b,c)
839    ;  return c
840 ;  }
841 
842 
843                         /* created by Chris Torek */
mcxCThash(const void * key,u32 len)844 u32   mcxCThash
845 (  const void *key
846 ,  u32        len
847 )
848 #define ctHASH4a   h = (h << 5) - h + *k++;
849 #define ctHASH4b   h = (h << 5) + h + *k++;
850 #define ctHASH4 ctHASH4b
851 
852    {   u32 h               =  0
853    ;   const unsigned char *k    =  key
854 
855    ;   if (len > 0)
856        {   unsigned loop = (len + 8 - 1) >> 3    /* loop >= 1 */
857        ;   switch (len & (8 - 1))
858            {
859                case 0:
860                   do
861                   {        /* All fall through */
862                              ctHASH4
863                      case 7: ctHASH4
864                      case 6: ctHASH4
865                      case 5: ctHASH4
866                      case 4: ctHASH4
867                      case 3: ctHASH4
868                      case 2: ctHASH4
869                      case 1: ctHASH4
870                   }
871                   while (--loop)                  /* unsignedcmpok */
872                ;
873            }
874        }
875    ;  return h
876 ;  }
877 
878 
879 /* All 3 hash fies below play on a similar theme.  Interesting: as long as only
880  * << >> and ^ are used, a hash function does a partial homogeneous fill of all
881  * 2^k different strings of length k built out of two distinct characters --
882  * not all buckets need be used. E.g. for k=15, such a hash function might fill
883  * 2^13 buckets with 4 entries each, or it might fill 2^10 buckets with 32
884  * entries each.  This was observed, not proven.
885 */
886 
mcxSvDhash(const void * key,u32 len)887 u32   mcxSvDhash
888 (  const void        *key
889 ,  u32               len
890 )
891    {  u32   h     =  0x7cabd53e /* 0x7cabd53e */
892    ;  const char* k =  key
893 
894    ;  h           =  0x0180244a
895 
896    ;  while (len--)
897       {  u32  g   =  *k
898       ;  u32  gc  =  0xff ^ g
899       ;  u32  hc  =  0xffffffffu
900 
901       ;  hc      ^=  h
902       ;  h        =  (  (h << 2) +  h +  (h >> 3))
903                   ^  ( (g << 25) + (gc << 18) + (g << 11) + (g << 5) + g )
904       ;  k++
905    ;  }
906 
907    ;  return h
908 ;  }
909 
910 
911                         /* created by me */
mcxSvD2hash(const void * key,u32 len)912 u32   mcxSvD2hash
913 (  const void        *key
914 ,  u32               len
915 )
916    {  u32   h     =  0x7cabd53e /* 0x7cabd53e */
917    ;  const char* k     =  key
918 
919    ;  while (len--)
920       {  u32  g   =  *k
921       ;  u32  gc  =  0xff ^ g
922 
923       ;  h        =  (  (h << 3) ^ h ^ (h >> 5) )
924                   ^  ( (g << 25) ^ (gc << 18) ^ (g << 11) ^ (gc << 5) ^ g )
925       ;  k++
926    ;  }
927 
928    ;  return h
929 ;  }
930 
931 
932                         /* created by me */
mcxSvD1hash(const void * key,u32 len)933 u32   mcxSvD1hash
934 (  const void        *key
935 ,  u32               len
936 )
937    {  u32   h     =  0xeca96537u
938    ;  const char* k     =  key
939 
940    ;  while (len--)
941       {  u32  g   =  *k
942       ;  h        =  (  (h << 3)  ^ h ^ (h >> 5) )
943                   ^  ( (g << 21) ^  (g << 12)   ^ (g << 5) ^ g )
944       ;  k++
945    ;  }
946 
947    ;  return h
948 ;  }
949 
950 
951                         /* created by Daniel Phillips */
mcxDPhash(const void * key,u32 len)952 u32   mcxDPhash
953 (  const void        *key
954 ,  u32               len
955 )
956    {  u32   h0    =  0x12a3fe2du
957    ,        h1    =  0x37abe8f9u
958    ;  const char* k     =  key
959 
960    ;  while (len--)
961       {
962          u32 h    =  h1 + (h0 ^ (*k++ * 71523))
963       ;  h1       =  h0
964       ;  h0       =  h
965    ;  }
966       return h0
967 ;  }
968 
969 
970                            /* "GNU Emacs" hash (from m4) */
mcxGEhash(const void * key,u32 len)971 u32 mcxGEhash
972 (  const void* key
973 ,  u32         len
974 )
975    {  const char* k  =  key
976    ;  u32 hash =  0
977    ;  int t
978    ;  while (len--)
979       {  if ((t = *k++) >= 0140)
980          t -= 40
981       ;  hash = ((hash << 3) + (hash >> 28) + t)
982    ;  }
983       return hash
984 ;  }
985 
986 
987                            /* Fowler Noll Vo hash */
mcxFNVhash(const void * buf,u32 len)988 u32   mcxFNVhash
989 (  const void *buf
990 ,  u32 len
991 )
992    {  u32 hval = 0x811c9dc5
993    ;  const char *bp = buf
994 
995    ;  while (len--)
996       {
997 #if 0                               /* other branch supposedly optimizes gcc */
998          hval *= 0x01000193
999 #else
1000          hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24)
1001 #endif
1002       ;  hval ^= *bp++;
1003    ;  }
1004        return hval
1005 ;  }
1006 
1007 
1008 
1009                            /* Berkely Database hash */
mcxBDBhash(const void * key,u32 len)1010 u32 mcxBDBhash
1011 (  const void *key
1012 ,  u32        len
1013 )
1014    {  const char* k  =  key
1015    ;  u32   hash     =  0
1016 
1017    ;  while (len--)
1018       {  hash = *k++ + (hash << 6) + (hash << 16) - hash
1019    ;  }
1020       return hash
1021 ;  }
1022 
1023 
1024                            /* One at a time hash, Bob Jenkins/Colin Plumb */
mcxOAThash(const void * key,u32 len)1025 u32 mcxOAThash
1026 (  const void *key
1027 ,  u32        len
1028 )
1029    {  const char* k     =  key
1030    ;  u32   hash  =  0
1031 
1032    ;  while (len--)
1033       {  hash += *k++
1034       ;  hash += (hash << 10)
1035       ;  hash ^= (hash >> 6)
1036    ;  }
1037 
1038       hash += (hash << 3);
1039    ;  hash ^= (hash >> 11);
1040    ;  hash += (hash << 15);
1041    ;  return hash
1042 ;  }
1043 
1044 
1045                            /* by Dan Bernstein  */
mcxDJBhash(const void * key,u32 len)1046 u32 mcxDJBhash
1047 (  const void *key
1048 ,  u32        len
1049 )
1050    {  const char* k     =  key
1051    ;  u32   hash  =  5381
1052    ;  while (len--)
1053       {  hash = *k++ + (hash << 5) + hash
1054    ;  }
1055       return hash
1056 ;  }
1057 
1058 
1059                            /* UNIX ELF hash */
mcxELFhash(const void * key,u32 len)1060 u32 mcxELFhash
1061 (  const void *key
1062 ,  u32        len
1063 )
1064    {  const char* k     =  key
1065    ;  u32   hash  =  0
1066    ;  u32   g
1067 
1068    ;  while (len--)
1069       {  hash = *k++ + (hash << 4)
1070       ;  if ((g = (hash & 0xF0000000u)))
1071          hash ^= g >> 24
1072 
1073       ;  hash &= ~g
1074    ;  }
1075       return hash
1076 ;  }
1077 
1078 
1079 
mcxStrHash(const void * s)1080 u32 mcxStrHash
1081 (  const void* s
1082 )
1083    {  dim l =  strlen(s)
1084    ;  return(mcxDPhash(s, l))
1085 ;  }
1086 
1087 
mcxStrCmp(const void * a,const void * b)1088 int mcxStrCmp
1089 (  const void* a
1090 ,  const void* b
1091 )
1092    {  return strcmp(a, b)
1093 ;  }
1094 
1095 
1096 
bitprint(u32 key,FILE * fp)1097 void bitprint
1098 (  u32   key
1099 ,  FILE* fp
1100 )
1101    {  do
1102       {  fputc(key & 1 ? '1' : '0',  fp)
1103    ;  }  while
1104          ((key = key >> 1))
1105 ;  }
1106 
1107 
bitcount(u32 key)1108 int bitcount
1109 (  u32   key
1110 )
1111    {  int ct = 0
1112    ;  do
1113       {  if (key & 1)
1114          ct++
1115    ;  }
1116       while ((key = key >> 1))
1117    ;  return ct
1118 ;  }
1119 
1120 
1121 
1122 #if 0
1123 /* The old legacy hash */
1124 static __u32 dx_hack_hash (const char *name, int len)
1125 {
1126         __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
1127         while (len--) {
1128                 __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
1129 
1130                 if (hash & 0x80000000) hash -= 0x7fffffff;
1131                 hash1 = hash0;
1132                 hash0 = hash;
1133         }
1134         return (hash0 << 1);
1135 }
1136 #endif
1137 
1138