1 /*
2  * hash.c - hash table implementation
3  *
4  *   Copyright (c) 2000-2020  Shiro Kawai  <shiro@acm.org>
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *      documentation and/or other materials provided with the distribution.
16  *
17  *   3. Neither the name of the authors nor the names of its contributors
18  *      may be used to endorse or promote products derived from this
19  *      software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #define LIBGAUCHE_BODY
35 #include <math.h>
36 #include "gauche.h"
37 #include "gauche/class.h"
38 #include "gauche/priv/atomicP.h"
39 
40 /*============================================================
41  * Internal structures
42  */
43 
44 /* The beginning of this structure must match ScmDictEntry. */
45 typedef struct EntryRec {
46     intptr_t key;
47     intptr_t value;
48     struct EntryRec *next;
49     u_long   hashval;
50 } Entry;
51 
52 #define BUCKETS(hc)   ((Entry**)hc->buckets)
53 
54 #define DEFAULT_NUM_BUCKETS    4
55 #define MAX_AVG_CHAIN_LIMITS   3
56 #define EXTEND_BITS            2
57 
58 /* We limit portable hash value to 32bits */
59 #define PORTABLE_HASHMASK  0xffffffffUL
60 
61 /* For other hash values, we limit it in the fixnum range. */
62 #define HASHMASK SCM_SMALL_INT_MAX
63 
64 typedef Entry *SearchProc(ScmHashCore *core, intptr_t key, ScmDictOp op);
65 
66 static u_int round2up(unsigned int val);
67 
68 /*============================================================
69  * Hash salt
70  */
71 
72 /* The salt value is nonnegative fixnum.  For the time being, we initialize
73    the default salt value for each run of the process; we might do
74    per-hashtable salt in future.
75 
76    Internally we use parameter slot to keep the hash salt value, but
77    we provide dedicated C API to access it to avoid overhead of parameter
78    mechanism.
79 */
80 
81 static ScmPrimitiveParameter *hash_salt; /* initialized by Scm__InitHash() */
82 
Scm_HashSaltRef()83 ScmSmallInt Scm_HashSaltRef()
84 {
85     return SCM_INT_VALUE(Scm_PrimitiveParameterRef(Scm_VM(), hash_salt));
86 }
87 
Scm_HashSaltSet(ScmSmallInt newval)88 ScmSmallInt Scm_HashSaltSet(ScmSmallInt newval) /* returns old value */
89 {
90     return SCM_INT_VALUE(Scm_PrimitiveParameterSet(Scm_VM(), hash_salt,
91                                                    SCM_MAKE_INT(newval)));
92 }
93 
94 /*============================================================
95  * Hash functions
96  */
97 
98 /* Hash function calculates 32bit hash value from the given object.
99    HASH2INDEX macro maps the hash value to the bucket number.
100    (On 64 bit architecture, it's OK to calculate 64bit, but the
101    upper bits are discarded by HASH2INDEX to maintain compatibility. */
102 
103 /* Integer and address. */
104 /* Integer and address hash is a variation of "multiplicative hashing"
105    scheme described in Knuth, TAOCP, section 6.4.  The final shifting
106    is done by HASH2INDEX macro  */
107 
108 #define SMALL_INT_HASH(result, val) \
109     (result) = ((val)*2654435761UL)
110 
111 #define ADDRESS_HASH(result, val) \
112     (result) = (u_long)((SCM_WORD(val) >> 3)*2654435761UL)
113 
114 /* HASH2INDEX
115    Map a hash value to bucket number.
116    We fix the word length to 32bits, since the multiplication
117    constant above is fixed. */
118 #define HASH2INDEX(tabsiz, bits, hashval) \
119     (((hashval)+((hashval)>>(32-(bits)))) & ((tabsiz) - 1))
120 
121 /* Combining two hash values. */
122 #define COMBINE(hv1, hv2)   ((hv1)*5+(hv2))
123 
124 /* For strings, we employ siphash.
125    We use public domain implementation by Sam Trenholme
126    http://samiam.org/blog/20131006.html.  It has a version suitable
127    for 32bit architecture, too.
128    See dws_adapter.h for the details.
129  */
130 #define SCM_DWSIPHASH_INTERFACE
131 #include "gauche/priv/dws_adapter.h"
132 
Scm_EqHash(ScmObj obj)133 u_long Scm_EqHash(ScmObj obj)
134 {
135     u_long hashval;
136     ADDRESS_HASH(hashval, obj);
137     return hashval&HASHMASK;
138 }
139 
140 static u_long number_hash(ScmObj obj, u_long salt, int portable);
141 
flonum_hash(double d,u_long salt,int portable)142 static u_long flonum_hash(double d, u_long salt, int portable)
143 {
144     int exp, sign;
145     ScmObj mantissa = Scm_DecodeFlonum(d, &exp, &sign);
146     u_long xh;
147     SMALL_INT_HASH(xh, exp*sign);
148     if (SCM_NUMBERP(mantissa)) {
149         return COMBINE(number_hash(mantissa, salt, portable), xh);
150     } else {
151         /* d is not finite.  we just map +inf.0, -inf.0 and nan.0 to 0. */
152         return 0;
153     }
154 }
155 
number_hash(ScmObj obj,u_long salt,int portable)156 static u_long number_hash(ScmObj obj, u_long salt, int portable)
157 {
158     u_long hashval;
159     if (SCM_INTP(obj)) {
160         /* On 64bit platform, if we have fixnum that is beyond the range
161            of 32bit fixnum, we have to calculate the hash value the same
162            way as 32bit bignum would do. */
163         long u = SCM_INT_VALUE(obj);
164         if (portable) {
165             if (u < 0) u = -u;  /* safe, for u is in fixnum range */
166 #if SIZEOF_LONG == 8
167             u = ((u & ((1UL<<32) - 1)) + (u >> 32)) & ((1UL<<32)-1);
168 #endif
169         }
170         SMALL_INT_HASH(hashval, u);
171     } else if (SCM_BIGNUMP(obj)) {
172         if (portable) {
173             u_int i;
174             u_long u = 0;
175             for (i=0; i<SCM_BIGNUM_SIZE(obj); i++) {
176 #if SIZEOF_LONG == 4
177                 u += SCM_BIGNUM(obj)->values[i];
178 #elif SIZEOF_LONG == 8
179                 u += (SCM_BIGNUM(obj)->values[i] & ((1UL<<32) - 1))
180                     + (SCM_BIGNUM(obj)->values[i] >> 32);
181 #else
182 #error "sizeof(long) > 8 platform unsupported"
183 #endif
184             }
185             SMALL_INT_HASH(hashval, u);
186         } else {
187             u_int i;
188             u_long u = 0;
189             for (i=0; i<SCM_BIGNUM_SIZE(obj); i++) {
190                 u += SCM_BIGNUM(obj)->values[i];
191             }
192             SMALL_INT_HASH(hashval, u);
193         }
194     } else if (SCM_FLONUMP(obj)) {
195         hashval = flonum_hash(SCM_FLONUM_VALUE(obj), salt, portable);
196     } else if (SCM_RATNUMP(obj)) {
197         /* Ratnum must already be normalized, so we can simply combine
198            hashvals of numerator and denominator. */
199         u_long h1 = number_hash(SCM_RATNUM_NUMER(obj), salt, portable);
200         u_long h2 = number_hash(SCM_RATNUM_DENOM(obj), salt, portable);
201         hashval = COMBINE(h1, h2);
202     } else {
203         SCM_ASSERT(SCM_COMPNUMP(obj));
204         hashval = COMBINE(flonum_hash(SCM_COMPNUM_REAL(obj), salt, portable),
205                           flonum_hash(SCM_COMPNUM_IMAG(obj), salt, portable));
206     }
207     return hashval & (portable ? PORTABLE_HASHMASK : HASHMASK);
208 }
209 
Scm_EqvHash(ScmObj obj)210 u_long Scm_EqvHash(ScmObj obj)
211 {
212     u_long hashval;
213     if (SCM_NUMBERP(obj)) {
214         hashval = number_hash(obj, 0, FALSE);
215     } else {
216         ADDRESS_HASH(hashval, obj);
217     }
218     return hashval&HASHMASK;
219 }
220 
internal_string_hash(ScmString * str,u_long salt,int portable)221 static u_long internal_string_hash(ScmString *str, u_long salt, int portable)
222 {
223     const ScmStringBody *b = SCM_STRING_BODY(str);
224     if (portable) {
225         return (u_long)Scm__DwSipPortableHash((uint8_t*)b->start, b->size,
226                                               salt, salt);
227     } else {
228         return Scm__DwSipDefaultHash((uint8_t*)b->start, b->size,
229                                      salt, salt);
230     }
231 }
232 
internal_uvector_hash(ScmUVector * u,u_long salt,int portable)233 static u_long internal_uvector_hash(ScmUVector *u, u_long salt, int portable)
234 {
235     if (portable) {
236         ScmUVectorType uvtype = Scm_UVectorType(Scm_ClassOf(SCM_OBJ(u)));
237         u_long r, seed;
238         size_t s = SCM_UVECTOR_SIZE(u);
239 
240         switch (uvtype) {
241             /* We can use siphash if u is u8vector or s8vector. */
242         case SCM_UVECTOR_S8:
243         case SCM_UVECTOR_U8:
244             return Scm__DwSipPortableHash((uint8_t*)SCM_UVECTOR_ELEMENTS(u),
245                                           (uint32_t)Scm_UVectorSizeInBytes(u),
246                                           salt, salt^uvtype);
247             /* We need to avoid depending on endianness of multibyte numbers.
248                Using siphash after canonicalizing byte order can be expensive,
249                for we need to allocate a buffer.
250                (Unless we directly access siphash setup/round function
251                and feed one word at a time--which is tedious.)
252                The current code may not be ideal, but just as good as our
253                other primitive portable hash functions. */
254 
255             /* Initial hash value.  The seed value for each uvector type
256                is just a random value I generated. */
257 #define INIT_R(r, seed) SMALL_INT_HASH(r, seed^salt)
258 
259         case SCM_UVECTOR_S16:
260             seed = 3499211612ul;
261             goto do16;
262         case SCM_UVECTOR_U16:
263             seed = 3890346734ul;
264             goto do16;
265         case SCM_UVECTOR_F16:
266             seed = 545404204ul;
267             do16:
268             {
269                 INIT_R(r, seed);
270                 for (size_t i=0; i<s; i++) {
271                     u_long e = SCM_U16VECTOR_ELEMENT(u, i);
272                     SMALL_INT_HASH(e, e);
273                     r = COMBINE(r, e);
274                 }
275                 return r;
276             }
277         case SCM_UVECTOR_S32:
278             seed = 4161255391ul;
279             goto do32;
280         case SCM_UVECTOR_U32:
281             seed = 3922919429ul;
282             goto do32;
283         case SCM_UVECTOR_F32:
284             seed = 949333985ul;
285             do32:
286             {
287                 INIT_R(r, seed);
288                 for (size_t i=0; i<s; i++) {
289                     u_long e = SCM_U32VECTOR_ELEMENT(u, i);
290                     SMALL_INT_HASH(e, e);
291                     r = COMBINE(r, e);
292                 }
293                 return r;
294             }
295         case SCM_UVECTOR_S64:
296             seed = 2715962298ul;
297             goto do64;
298         case SCM_UVECTOR_U64:
299             seed = 1323567403ul;
300             do64:
301             {
302                 INIT_R(r, seed);
303                 for (size_t i=0; i<s; i++) {
304                     uint64_t e = SCM_U64VECTOR_ELEMENT(u, i);
305                     u_long z = (e >> 32) ^ e;
306                     SMALL_INT_HASH(z, z);
307                     r = COMBINE(r, z);
308                 }
309                 return r;
310             }
311         case SCM_UVECTOR_F64: {
312             /* We can't reinterpret f64 as u64, since ARM mixed-endian would
313                yield nonportable result. */
314             seed = 2350294565ul;
315             INIT_R(r, seed);
316             for (size_t i=0; i<s; i++) {
317                 double e = SCM_F64VECTOR_ELEMENT(u, i);
318                 r = COMBINE(r, flonum_hash(e, salt, TRUE));
319             }
320             return r;
321         }
322 #undef INIT_R
323         case SCM_UVECTOR_RESERVED1:
324         case SCM_UVECTOR_C32:
325         case SCM_UVECTOR_C64:
326         case SCM_UVECTOR_C128:
327         case SCM_UVECTOR_RESERVED2:
328         case SCM_UVECTOR_INVALID:
329             Scm_Panic("invalid uvector class.");
330         }
331         return 0;           /* dummy */
332     } else {
333         return Scm__DwSipDefaultHash((uint8_t*)SCM_UVECTOR_ELEMENTS(u),
334                                      (uint32_t)Scm_UVectorSizeInBytes(u),
335                                      salt, salt);
336     }
337 }
338 
339 /* equal-hash, which satisfies
340      forall x, y: equal(x,y) => hash(x) = hash(y)
341 
342    Both default-hash and portable-hash have this property but their
343    requirements are slightly different, so here's the common part.
344 */
equal_hash_common(ScmObj obj,u_long salt,int portable)345 static u_long equal_hash_common(ScmObj obj, u_long salt, int portable)
346 {
347     if (SCM_NUMBERP(obj)) {
348         return number_hash(obj, salt, portable);
349     } else if (!SCM_PTRP(obj)) {
350         u_long hashval;
351         SMALL_INT_HASH(hashval, (u_long)SCM_WORD(obj));
352         return hashval&PORTABLE_HASHMASK;
353     } else if (SCM_STRINGP(obj)) {
354         return internal_string_hash(SCM_STRING(obj), salt, portable);
355     } else if (SCM_PAIRP(obj)) {
356         u_long h = 0, h2;
357         ScmObj cp;
358         SCM_FOR_EACH(cp, obj) {
359             h2 = equal_hash_common(SCM_CAR(cp), salt, portable);
360             h = COMBINE(h, h2);
361         }
362         h2 = equal_hash_common(cp, salt, portable);
363         return COMBINE(h, h2);
364     } else if (SCM_VECTORP(obj)) {
365         int siz = SCM_VECTOR_SIZE(obj);
366         u_long h = 0, h2;
367         for (int i=0; i<siz; i++) {
368             h2 = equal_hash_common(SCM_VECTOR_ELEMENT(obj, i), salt, portable);
369             h = COMBINE(h, h2);
370         }
371         return h;
372     /* uvector hash support */
373     } else if (SCM_UVECTORP(obj)) {
374         return internal_uvector_hash(SCM_UVECTOR(obj), salt, portable);
375 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
376     } else if (SCM_KEYWORDP(obj)) {
377         if (portable) {
378             if (SCM_SYMBOLP(obj)) {
379                 /* GAUCHE_KEYWORD_IS_SYMBOL mode */
380                 return internal_string_hash(SCM_KEYWORD_NAME(obj), salt, TRUE);
381             } else {
382                 /* GAUCHE_KEYWORD_IS_DISJOINT mode.  SCM_KEYWORD_NAME does
383                    not include prefix ':'.  We should append it so that
384                    the hash value stays the same.  Appending string incurs
385                    allocation, but we expect this branch isn't taken often
386                    and eventually fade away. */
387                 static ScmString *prefix = NULL;
388                 if (prefix == NULL) {
389                     /* idempotent.  no MT hazard. */
390                     prefix = SCM_STRING(Scm_MakeString(":", 1, 1, 0));
391                 }
392                 ScmObj name = Scm_StringAppend2(prefix, SCM_KEYWORD_NAME(obj));
393                 return internal_string_hash(SCM_STRING(name), salt, TRUE);
394             }
395         } else {
396             u_long hashval;
397             ADDRESS_HASH(hashval, obj);
398             return hashval;
399         }
400 #endif /*GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
401     } else if (SCM_SYMBOLP(obj)) {
402         if (portable) {
403             return internal_string_hash(SCM_SYMBOL_NAME(obj), salt, TRUE);
404         } else {
405             u_long hashval;
406             ADDRESS_HASH(hashval, obj);
407             return hashval;
408         }
409     }
410 
411     ScmClass *k = SCM_CLASS_OF(obj);
412     if (k->hash) {
413         return (u_long)k->hash(obj, salt,
414                                (portable? SCM_HASH_PORTABLE:0));
415     }
416 
417     /* Call specialized object-hash method
418        We need some trick; See libomega.scm for the details. */
419     static ScmObj call_object_hash_proc = SCM_UNDEFINED;
420     static ScmObj portable_hash_proc = SCM_UNDEFINED;
421     static ScmObj default_hash_proc = SCM_UNDEFINED;
422     SCM_BIND_PROC(call_object_hash_proc, "%call-object-hash",
423                   Scm_GaucheInternalModule());
424     SCM_BIND_PROC(portable_hash_proc, "portable-hash", Scm_GaucheModule());
425     SCM_BIND_PROC(default_hash_proc, "default-hash", Scm_GaucheModule());
426     ScmObj r = Scm_ApplyRec3(call_object_hash_proc, obj,
427                              (portable
428                               ? portable_hash_proc
429                               : default_hash_proc),
430                              (portable
431                               ? Scm_MakeIntegerU(salt)
432                               : SCM_FALSE));
433     if (SCM_INTP(r)) {
434         return (u_long)SCM_INT_VALUE(r);
435     }
436     if (SCM_BIGNUMP(r)) {
437         /* NB: Scm_GetUInteger clamps the result to [0, ULONG_MAX],
438            so taking the LSW would give better distribution. */
439         return SCM_BIGNUM(r)->values[0];
440     }
441     Scm_Error("object-hash returned non-integer: %S", r);
442     return 0;               /* dummy */
443 }
444 
445 /* For recursive call to the current hash function - see call-object-hash
446    and object-hash definitions in libomega.scm. */
447 static ScmPrimitiveParameter *current_recursive_hash;
448 
Scm_CurrentRecursiveHash(ScmObj newval)449 ScmObj Scm_CurrentRecursiveHash(ScmObj newval)
450 {
451     if (newval == SCM_UNBOUND) {
452         return Scm_PrimitiveParameterRef(Scm_VM(), current_recursive_hash);
453     } else {
454         return Scm_PrimitiveParameterSet(Scm_VM(), current_recursive_hash, newval);
455     }
456 }
457 
458 /* 'Portable' general hash function.
459 
460    It is guaranteed that the hash value won't change for the same objects
461    (roughly, indistinguishable in their external representation)
462    across the runs of the program, and among different platforms.
463    That is, the value can be used in persistent stores.
464  */
Scm_PortableHash(ScmObj obj,u_long salt)465 u_long Scm_PortableHash(ScmObj obj, u_long salt)
466 {
467     return equal_hash_common(obj, salt, TRUE) & PORTABLE_HASHMASK;
468 }
469 
470 /* 'Default' general hash function. */
Scm_DefaultHash(ScmObj obj)471 ScmSmallInt Scm_DefaultHash(ScmObj obj)
472 {
473     return equal_hash_common(obj, Scm_HashSaltRef(), FALSE) & HASHMASK;
474 }
475 
476 /* This is to be called from ScmClass->hash if it needs to compute
477    hash value of field values. */
Scm_RecursiveHash(ScmObj obj,ScmSmallInt salt,u_long flags)478 ScmSmallInt Scm_RecursiveHash(ScmObj obj, ScmSmallInt salt, u_long flags)
479 {
480     if (flags & SCM_HASH_PORTABLE) {
481         return equal_hash_common(obj, salt, TRUE) & PORTABLE_HASHMASK;
482     } else {
483         return equal_hash_common(obj, salt, FALSE) & HASHMASK;
484     }
485 }
486 
Scm_SmallIntHash(ScmSmallInt val,ScmSmallInt salt SCM_UNUSED,u_long flags SCM_UNUSED)487 ScmSmallInt Scm_SmallIntHash(ScmSmallInt val,
488                              ScmSmallInt salt SCM_UNUSED,
489                              u_long flags SCM_UNUSED)
490 {
491     ScmSmallInt v;
492     SMALL_INT_HASH(v, val);
493     return v;
494 }
495 
Scm_Int64Hash(int64_t val,ScmSmallInt salt SCM_UNUSED,u_long flags SCM_UNUSED)496 ScmSmallInt Scm_Int64Hash(int64_t val,
497                           ScmSmallInt salt SCM_UNUSED,
498                           u_long flags SCM_UNUSED)
499 {
500     ScmSmallInt v;
501     SMALL_INT_HASH(v, val);
502     return v;
503 }
504 
505 
506 /* This is to expose string hash function.  Modulo is for the compatibility
507    of srfi-13; just give 0 as modulo if you don't need it.  */
Scm_HashString(ScmString * str,u_long modulo)508 u_long Scm_HashString(ScmString *str, u_long modulo)
509 {
510     u_long hashval = internal_string_hash(str, Scm_HashSaltRef(), FALSE);
511     if (modulo == 0) return hashval&HASHMASK;
512     else return (hashval % modulo);
513 }
514 
515 /* Expose COMBINE. */
Scm_CombineHashValue(u_long a,u_long b)516 u_long Scm_CombineHashValue(u_long a, u_long b)
517 {
518     u_long c = COMBINE(a, b);
519 #if SIZEOF_LONG == 8
520     /* we limit portable hash value to 32bit. */
521     c &= 0xffffffff;
522 #endif /**/
523     return c;
524 }
525 
526 /*------------------------------------------------------------
527  * Parameterization
528  *
529  * Conceptually hash tables are parameterized by hash function and
530  * compare function.  However, if they are trivial functions, calling
531  * them via function pointers incur overhead.  So we layered the
532  * parameterization.
533  *
534  * For the pre-defined simple hash tables, the calls to the hash and
535  * compare functions are inlined in a single "access" function.
536  * (In this case hashfn and cmpfn are never used.)
537  * For the generic hash tables, the general_access function uses
538  * the info in hashfn and cmpfn fields.
539  *
540  * The accessor function takes three arguments.
541  *
542  *     ScmHashCore *core   : hash table core
543  *     intptr_t key        : key
544  *     ScmDictOp op        : operation
545  */
546 
547 /* NOTE: eq?, eqv?, and string=? hash tables are guaranteed not to
548  * throw an error during hash table access (except the case that string=?
549  * hash table gets non-string key).  So the caller doesn't need to
550  * set unwind handler in case it needs cleanup (like unlocking mutex).
551  * However, equal? hash may call back to Scheme method, so it can
552  * throw Scheme error.  Be aware of that.
553  */
554 
555 /*
556  * Common function called when the accessor function needs to add an entry.
557  */
insert_entry(ScmHashCore * table,intptr_t key,u_long hashval,int index)558 static Entry *insert_entry(ScmHashCore *table,
559                            intptr_t key,
560                            u_long   hashval,
561                            int index)
562 {
563     Entry *e = SCM_NEW(Entry);
564     Entry **buckets = BUCKETS(table);
565     e->key = key;
566     e->value = 0;
567     e->next = buckets[index];
568     e->hashval = hashval;
569     buckets[index] = e;
570     table->numEntries++;
571 
572     if (table->numEntries > table->numBuckets*MAX_AVG_CHAIN_LIMITS) {
573         /* Extend the table */
574         int newsize = (table->numBuckets << EXTEND_BITS);
575         int newbits = table->numBucketsLog2 + EXTEND_BITS;
576 
577         Entry **newb = SCM_NEW_ARRAY(Entry*, newsize);
578         for (int i=0; i<newsize; i++) newb[i] = NULL;
579 
580         ScmHashIter iter;
581         Entry *f;
582         Scm_HashIterInit(&iter, table);
583         while ((f = (Entry*)Scm_HashIterNext(&iter)) != NULL) {
584             index = HASH2INDEX(newsize, newbits, f->hashval);
585             f->next = newb[index];
586             newb[index] = f;
587         }
588         /* gc friendliness */
589         for (int i=0; i<table->numBuckets; i++) table->buckets[i] = NULL;
590 
591         table->numBuckets = newsize;
592         table->numBucketsLog2 = newbits;
593         table->buckets = (void**)newb;
594     }
595     return e;
596 }
597 
598 /* NB: Deleting entry E doesn't modify E's key and value, but cut
599    the "next" link for the sake of weak-gc robustness.  The hash core
600    iterator prefetches a pointer to the next entry, so deleting the
601    "current" entry of iteration is safe as far as other iterators
602    are running on the same hash table. */
delete_entry(ScmHashCore * table,Entry * entry,Entry * prev,int index)603 static Entry *delete_entry(ScmHashCore *table,
604                            Entry *entry, Entry *prev,
605                            int index)
606 {
607     if (prev) prev->next = entry->next;
608     else table->buckets[index] = (void*)entry->next;
609     table->numEntries--;
610     SCM_ASSERT(table->numEntries >= 0);
611     entry->next = NULL;         /* GC friendliness */
612     return entry;
613 }
614 
615 #define FOUND(table, op, e, p, index)                   \
616     do {                                                \
617         switch (op) {                                   \
618         case SCM_DICT_GET:;                             \
619         case SCM_DICT_CREATE:;                          \
620             return e;                                   \
621         case SCM_DICT_DELETE:;                          \
622             return delete_entry(table, e, p, index);    \
623         }                                               \
624     } while (0)
625 
626 #define NOTFOUND(table, op, key, hashval, index)                \
627     do {                                                        \
628         if (op == SCM_DICT_CREATE) {                            \
629            return insert_entry(table, key, hashval, index);     \
630         } else {                                                \
631            return NULL;                                         \
632         }                                                       \
633     } while (0)
634 
635 /*
636  * Accessor function for address.   Used for EQ-type hash.
637  */
address_access(ScmHashCore * table,intptr_t key,ScmDictOp op)638 static Entry *address_access(ScmHashCore *table,
639                              intptr_t key,
640                              ScmDictOp op)
641 {
642     u_long hashval, index;
643     Entry **buckets = (Entry**)table->buckets;
644 
645     ADDRESS_HASH(hashval, key);
646     index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
647 
648     for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
649         if (e->key == key) FOUND(table, op, e, p, index);
650     }
651     NOTFOUND(table, op, key, hashval, index);
652 }
653 
address_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t obj)654 static u_long address_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t obj)
655 {
656     u_long hashval;
657     ADDRESS_HASH(hashval, obj);
658     return hashval;
659 }
660 
address_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t key,intptr_t k2)661 static int address_cmp(const ScmHashCore *ht SCM_UNUSED,
662                        intptr_t key, intptr_t k2)
663 {
664     return (key == k2);
665 }
666 
667 /*
668  * Accessor function for equal and eqv-hash.
669  * We assume KEY is ScmObj.
670  */
eqv_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t key)671 static u_long eqv_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t key)
672 {
673     return Scm_EqvHash(SCM_OBJ(key));
674 }
675 
eqv_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t key,intptr_t k2)676 static int eqv_cmp(const ScmHashCore *ht SCM_UNUSED, intptr_t key, intptr_t k2)
677 {
678     return Scm_EqvP(SCM_OBJ(key), SCM_OBJ(k2));
679 }
680 
equal_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t key)681 static u_long equal_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t key)
682 {
683     return Scm_DefaultHash(SCM_OBJ(key));
684 }
685 
equal_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t key,intptr_t k2)686 static int equal_cmp(const ScmHashCore *ht SCM_UNUSED, intptr_t key, intptr_t k2)
687 {
688     return Scm_EqualP(SCM_OBJ(key), SCM_OBJ(k2));
689 }
690 
691 
692 /*
693  * Accessor function for string type.
694  */
string_access(ScmHashCore * table,intptr_t k,ScmDictOp op)695 static Entry *string_access(ScmHashCore *table, intptr_t k, ScmDictOp op)
696 {
697     ScmObj key = SCM_OBJ(k);
698 
699     if (!SCM_STRINGP(key)) {
700         Scm_Error("Got non-string key %S to the string hashtable.", key);
701     }
702     u_long hashval = Scm_HashString(SCM_STRING(key), 0);
703     u_long index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
704     Entry **buckets = (Entry**)table->buckets;
705 
706     const ScmStringBody *keyb = SCM_STRING_BODY(key);
707     long size = SCM_STRING_BODY_SIZE(keyb);
708     for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
709         ScmObj ee = SCM_OBJ(e->key);
710         const ScmStringBody *eeb = SCM_STRING_BODY(ee);
711         int eesize = SCM_STRING_BODY_SIZE(eeb);
712         if (size == eesize
713             && memcmp(SCM_STRING_BODY_START(keyb),
714                       SCM_STRING_BODY_START(eeb), eesize) == 0){
715             FOUND(table, op, e, p, index);
716         }
717     }
718     NOTFOUND(table, op, k, hashval, index);
719 }
720 
string_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t key)721 static u_long string_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t key)
722 {
723     return Scm_HashString(SCM_STRING(key), 0);
724 }
725 
string_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t k1,intptr_t k2)726 static int string_cmp(const ScmHashCore *ht SCM_UNUSED, intptr_t k1, intptr_t k2)
727 {
728     const ScmStringBody *b1 = SCM_STRING_BODY(k1);
729     const ScmStringBody *b2 = SCM_STRING_BODY(k2);
730     return ((SCM_STRING_BODY_SIZE(b1) == SCM_STRING_BODY_SIZE(b2))
731             && (memcmp(SCM_STRING_BODY_START(b1),
732                        SCM_STRING_BODY_START(b2),
733                        SCM_STRING_BODY_SIZE(b1)) == 0));
734 }
735 
736 /*
737  * Accessor function for multiword raw hashtable.
738  * Key points to an array of N words.
739  */
740 #if 0                           /* not used yet */
741 static u_long multiword_hash(const ScmHashCore *table, intptr_t key)
742 {
743     ScmWord keysize = (ScmWord)table->data;
744     ScmWord *keyarray = (ScmWord*)key;
745     u_long h = 0, h1;
746     for (int i=0; i<keysize; i++) {
747         ADDRESS_HASH(h1, keyarray[i]);
748         h = COMBINE(h, h1);
749     }
750     return h;
751 }
752 #endif
753 
754 #if 0
755 static Entry *multiword_access(ScmHashCore *table, intptr_t k, ScmDictOp op)
756 {
757     u_long hashval, index;
758     ScmWord keysize = (ScmWord)table->data;
759 
760     hashval = multiword_hash(table, k);
761     index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
762     Entry **buckets = (Entry**)table->buckets;
763 
764     for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
765         if (memcmp((void*)k, (void*)e->key, keysize*sizeof(ScmWord)) == 0)
766             FOUND(table, op, e, p, index);
767     }
768     NOTFOUND(table, op, k, hashval, index);
769 }
770 #endif
771 
772 
773 /*
774  * Accessor function for general case
775  *    (hashfn and cmpfn are given by user)
776  */
general_access(ScmHashCore * table,intptr_t key,ScmDictOp op)777 static Entry *general_access(ScmHashCore *table, intptr_t key, ScmDictOp op)
778 {
779     u_long hashval, index;
780 
781     hashval = table->hashfn(table, key);
782     index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
783     Entry **buckets = (Entry**)table->buckets;
784 
785     for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
786         if (table->cmpfn(table, key, e->key)) FOUND(table, op, e, p, index);
787     }
788     NOTFOUND(table, op, key, hashval, index);
789 }
790 
791 /*============================================================
792  * Hash Core functions
793  */
794 
hash_core_init(ScmHashCore * table,SearchProc * accessfn,ScmHashProc * hashfn,ScmHashCompareProc * cmpfn,unsigned int initSize,void * data)795 static void hash_core_init(ScmHashCore *table,
796                            SearchProc  *accessfn,
797                            ScmHashProc *hashfn,
798                            ScmHashCompareProc *cmpfn,
799                            unsigned int initSize,
800                            void *data)
801 {
802     if (initSize != 0) initSize = round2up(initSize);
803     else initSize = DEFAULT_NUM_BUCKETS;
804 
805     Entry **b = SCM_NEW_ARRAY(Entry*, initSize);
806     table->buckets = (void**)b;
807     table->numBuckets = initSize;
808     table->numEntries = 0;
809     table->accessfn = (void*)accessfn;
810     table->hashfn = hashfn;
811     table->cmpfn = cmpfn;
812     table->data = data;
813     table->numBucketsLog2 = 0;
814     for (u_int i=initSize; i > 1; i /= 2) {
815         table->numBucketsLog2++;
816     }
817     for (u_int i=0; i<initSize; i++) table->buckets[i] = NULL;
818 }
819 
820 /* choose appropriate procedures for predefined hash types. */
hash_core_predef_procs(ScmHashType type,SearchProc ** accessfn,ScmHashProc ** hashfn,ScmHashCompareProc ** cmpfn)821 int  hash_core_predef_procs(ScmHashType type,
822                             SearchProc  **accessfn,
823                             ScmHashProc **hashfn,
824                             ScmHashCompareProc **cmpfn)
825 {
826     switch (type) {
827     case SCM_HASH_EQ:
828     case SCM_HASH_WORD:
829         *accessfn = address_access;
830         *hashfn = address_hash;
831         *cmpfn  = address_cmp;
832         return TRUE;
833     case SCM_HASH_EQV:
834         *accessfn = general_access;
835         *hashfn = eqv_hash;
836         *cmpfn  = eqv_cmp;
837         return TRUE;
838     case SCM_HASH_EQUAL:
839         *accessfn = general_access;
840         *hashfn = equal_hash;
841         *cmpfn  = equal_cmp;
842         return TRUE;
843     case SCM_HASH_STRING:
844         *accessfn = string_access;
845         *hashfn = string_hash;
846         *cmpfn  = string_cmp;
847         return TRUE;
848     default:
849         return FALSE;
850     }
851 }
852 
Scm_HashCoreInitSimple(ScmHashCore * core,ScmHashType type,unsigned int initSize,void * data)853 void Scm_HashCoreInitSimple(ScmHashCore *core,
854                             ScmHashType type,
855                             unsigned int initSize,
856                             void *data)
857 {
858     SearchProc  *accessfn = NULL;
859     ScmHashProc *hashfn = NULL;
860     ScmHashCompareProc *cmpfn = NULL;
861 
862     if (hash_core_predef_procs(type, &accessfn, &hashfn, &cmpfn) == FALSE) {
863         Scm_Error("[internal error]: wrong TYPE argument passed to Scm_HashCoreInitSimple: %d", type);
864     }
865     hash_core_init(core, accessfn, hashfn, cmpfn, initSize, data);
866 }
867 
Scm_HashCoreInitGeneral(ScmHashCore * core,ScmHashProc * hashfn,ScmHashCompareProc * cmpfn,unsigned int initSize,void * data)868 void Scm_HashCoreInitGeneral(ScmHashCore *core,
869                              ScmHashProc *hashfn,
870                              ScmHashCompareProc *cmpfn,
871                              unsigned int initSize,
872                              void *data)
873 {
874     hash_core_init(core, general_access, hashfn,
875                    cmpfn, initSize, data);
876 }
877 
Scm_HashCoreTypeToProcs(ScmHashType type,ScmHashProc ** hashfn,ScmHashCompareProc ** cmpfn)878 int Scm_HashCoreTypeToProcs(ScmHashType type,
879                             ScmHashProc **hashfn,
880                             ScmHashCompareProc **cmpfn)
881 {
882     SearchProc *accessfn;       /* dummy */
883     return hash_core_predef_procs(type, &accessfn, hashfn, cmpfn);
884 }
885 
Scm_HashCoreCopy(ScmHashCore * dst,const ScmHashCore * src)886 void Scm_HashCoreCopy(ScmHashCore *dst, const ScmHashCore *src)
887 {
888     Entry **b = SCM_NEW_ARRAY(Entry*, src->numBuckets);
889 
890     for (int i=0; i<src->numBuckets; i++) {
891         Entry *p = NULL;
892         Entry *s = (Entry*)src->buckets[i];
893         b[i] = NULL;
894         while (s) {
895             Entry *e = SCM_NEW(Entry);
896             e->key = s->key;
897             e->value = s->value;
898             e->next = NULL;
899             e->hashval = s->hashval;
900             if (p) p->next = e;
901             else   b[i] = e;
902             p = e;
903             s = s->next;
904         }
905     }
906 
907     /* A little trick to avoid hazard in careless race condition */
908     dst->numBuckets = dst->numEntries = 0;
909 
910     dst->buckets = (void**)b;
911     dst->hashfn   = src->hashfn;
912     dst->cmpfn    = src->cmpfn;
913     dst->accessfn = src->accessfn;
914     dst->data     = src->data;
915     dst->numEntries = src->numEntries;
916     dst->numBucketsLog2 = src->numBucketsLog2;
917     dst->numBuckets = src->numBuckets;
918 }
919 
Scm_HashCoreClear(ScmHashCore * table)920 void Scm_HashCoreClear(ScmHashCore *table)
921 {
922     for (int i=0; i<table->numBuckets; i++) {
923         table->buckets[i] = NULL;
924     }
925     table->numEntries = 0;
926 }
927 
Scm_HashCoreSearch(ScmHashCore * table,intptr_t key,ScmDictOp op)928 ScmDictEntry *Scm_HashCoreSearch(ScmHashCore *table, intptr_t key,
929                                  ScmDictOp op)
930 {
931     SearchProc *p = (SearchProc*)table->accessfn;
932     return (ScmDictEntry*)p(table, key, op);
933 }
934 
Scm_HashCoreNumEntries(ScmHashCore * table)935 int Scm_HashCoreNumEntries(ScmHashCore *table)
936 {
937     return table->numEntries;
938 }
939 
940 /*
941  * NB: It is important to keep the pointer to the "next" entry,
942  * not the "current", since the current entry may be deleted,
943  * erasing its next pointer.
944  */
Scm_HashIterInit(ScmHashIter * iter,ScmHashCore * table)945 void Scm_HashIterInit(ScmHashIter *iter, ScmHashCore *table)
946 {
947     iter->core = table;
948     for (int i=0; i<table->numBuckets; i++) {
949         if (table->buckets[i]) {
950             iter->bucket = i;
951             iter->next = table->buckets[i];
952             return;
953         }
954     }
955     iter->next = NULL;
956 }
957 
Scm_HashIterNext(ScmHashIter * iter)958 ScmDictEntry *Scm_HashIterNext(ScmHashIter *iter)
959 {
960     Entry *e = (Entry*)iter->next;
961     if (e != NULL) {
962         if (e->next) iter->next = e->next;
963         else {
964             int i = iter->bucket + 1;
965             for (; i < iter->core->numBuckets; i++) {
966                 if (iter->core->buckets[i]) {
967                     iter->bucket = i;
968                     iter->next = iter->core->buckets[i];
969                     return (ScmDictEntry*)e;
970                 }
971             }
972             iter->next = NULL;
973         }
974     }
975     return (ScmDictEntry*)e;
976 }
977 
978 /*============================================================
979  * Scheme <hash-table> object
980  */
981 
982 static void hash_print(ScmObj obj, ScmPort *port, ScmWriteContext *ctx);
983 
984 SCM_DEFINE_BUILTIN_CLASS(Scm_HashTableClass, hash_print, Scm_ObjectCompare,
985                          NULL, NULL,
986                          SCM_CLASS_DICTIONARY_CPL);
987 
Scm_MakeHashTableSimple(ScmHashType type,unsigned int initSize)988 ScmObj Scm_MakeHashTableSimple(ScmHashType type, unsigned int initSize)
989 {
990     /* We only allow ScmObj in <hash-table> */
991     if (type > SCM_HASH_GENERAL) {
992         Scm_Error("Scm_MakeHashTableSimple: wrong type arg: %d", type);
993     }
994     ScmHashTable *z = SCM_NEW(ScmHashTable);
995     SCM_SET_CLASS(z, SCM_CLASS_HASH_TABLE);
996     Scm_HashCoreInitSimple(&z->core, type, initSize, NULL);
997     z->type = type;
998     return SCM_OBJ(z);
999 }
1000 
Scm_MakeHashTableFull(ScmHashProc hashfn,ScmHashCompareProc cmpfn,unsigned int initSize,void * data)1001 ScmObj Scm_MakeHashTableFull(ScmHashProc hashfn,
1002                              ScmHashCompareProc cmpfn,
1003                              unsigned int initSize, void *data)
1004 {
1005     ScmHashTable *z = SCM_NEW(ScmHashTable);
1006     SCM_SET_CLASS(z, SCM_CLASS_HASH_TABLE);
1007     z->type = SCM_HASH_GENERAL;
1008     Scm_HashCoreInitGeneral(&z->core, hashfn, cmpfn, initSize, data);
1009     return SCM_OBJ(z);
1010 }
1011 
Scm_HashTableCopy(ScmHashTable * src)1012 ScmObj Scm_HashTableCopy(ScmHashTable *src)
1013 {
1014     ScmHashTable *dst = SCM_NEW(ScmHashTable);
1015     SCM_SET_CLASS(dst, SCM_CLASS_HASH_TABLE);
1016     Scm_HashCoreCopy(SCM_HASH_TABLE_CORE(dst), SCM_HASH_TABLE_CORE(src));
1017     dst->type = src->type;
1018     return SCM_OBJ(dst);
1019 }
1020 
Scm_HashTableType(ScmHashTable * ht)1021 ScmHashType Scm_HashTableType(ScmHashTable *ht)
1022 {
1023     return ht->type;
1024 }
1025 
Scm_HashTableRef(ScmHashTable * ht,ScmObj key,ScmObj fallback)1026 ScmObj Scm_HashTableRef(ScmHashTable *ht, ScmObj key, ScmObj fallback)
1027 {
1028     ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1029                                          (intptr_t)key, SCM_DICT_GET);
1030     if (!e) return fallback;
1031     else    return SCM_DICT_VALUE(e);
1032 }
1033 
1034 /* Returns previous value; can return SCM_UNBOUND when the association hasn't
1035    been there.  Be careful not to let SCM_UNBOUND leak out to Scheme! */
Scm_HashTableSet(ScmHashTable * ht,ScmObj key,ScmObj value,int flags)1036 ScmObj Scm_HashTableSet(ScmHashTable *ht, ScmObj key, ScmObj value, int flags)
1037 {
1038     ScmDictEntry *e;
1039 
1040     e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1041                            (intptr_t)key,
1042                            (flags&SCM_DICT_NO_CREATE)?SCM_DICT_GET: SCM_DICT_CREATE);
1043     if (!e) return SCM_UNBOUND;
1044     ScmObj oldval = e->value? SCM_DICT_VALUE(e) : SCM_UNBOUND;
1045     if (!(flags&SCM_DICT_NO_OVERWRITE) || SCM_UNBOUNDP(oldval)) {
1046         return SCM_DICT_SET_VALUE(e, value);
1047     }
1048     return oldval;
1049 }
1050 
Scm_HashTableDelete(ScmHashTable * ht,ScmObj key)1051 ScmObj Scm_HashTableDelete(ScmHashTable *ht, ScmObj key)
1052 {
1053     ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1054                                          (intptr_t)key, SCM_DICT_DELETE);
1055     if (e && e->value) return SCM_DICT_VALUE(e);
1056     else               return SCM_UNBOUND;
1057 }
1058 
Scm_HashTableKeys(ScmHashTable * table)1059 ScmObj Scm_HashTableKeys(ScmHashTable *table)
1060 {
1061     ScmHashIter iter;
1062     ScmDictEntry *e;
1063     ScmObj h = SCM_NIL, t = SCM_NIL;
1064     Scm_HashIterInit(&iter, SCM_HASH_TABLE_CORE(table));
1065     while ((e = Scm_HashIterNext(&iter)) != NULL) {
1066         SCM_APPEND1(h, t, SCM_DICT_KEY(e));
1067     }
1068     return h;
1069 }
1070 
Scm_HashTableValues(ScmHashTable * table)1071 ScmObj Scm_HashTableValues(ScmHashTable *table)
1072 {
1073     ScmHashIter iter;
1074     ScmDictEntry *e;
1075     ScmObj h = SCM_NIL, t = SCM_NIL;
1076     Scm_HashIterInit(&iter, SCM_HASH_TABLE_CORE(table));
1077     while ((e = Scm_HashIterNext(&iter)) != NULL) {
1078         SCM_APPEND1(h, t, SCM_DICT_VALUE(e));
1079     }
1080     return h;
1081 }
1082 
Scm_HashTableStat(ScmHashTable * table)1083 ScmObj Scm_HashTableStat(ScmHashTable *table)
1084 {
1085     ScmObj h = SCM_NIL, t = SCM_NIL;
1086     ScmHashCore *c = SCM_HASH_TABLE_CORE(table);
1087     SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("num-entries"));
1088     SCM_APPEND1(h, t, Scm_MakeInteger(c->numEntries));
1089     SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("num-buckets"));
1090     SCM_APPEND1(h, t, Scm_MakeInteger(c->numBuckets));
1091     SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("num-buckets-log2"));
1092     SCM_APPEND1(h, t, Scm_MakeInteger(c->numBucketsLog2));
1093 
1094     Entry** b = BUCKETS(c);
1095     ScmVector *v = SCM_VECTOR(Scm_MakeVector(c->numBuckets, SCM_NIL));
1096     ScmObj *vp = SCM_VECTOR_ELEMENTS(v);
1097     for (int i = 0; i<c->numBuckets; i++, vp++) {
1098         Entry *e = b[i];
1099         for (; e; e = e->next) {
1100             *vp = Scm_Acons(SCM_DICT_KEY(e), SCM_DICT_VALUE(e), *vp);
1101         }
1102     }
1103     SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("contents"));
1104     SCM_APPEND1(h, t, SCM_OBJ(v));
1105     return h;
1106 }
1107 
1108 
1109 /*
1110  * Utilities
1111  */
1112 
1113 /*
1114  * Printer
1115  */
1116 
hash_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx SCM_UNUSED)1117 static void hash_print(ScmObj obj, ScmPort *port,
1118                        ScmWriteContext *ctx SCM_UNUSED)
1119 {
1120     ScmHashTable *ht = (ScmHashTable*)obj;
1121     char *str = "";
1122 
1123     switch (ht->type) {
1124     case SCM_HASH_EQ:      str = "eq?"; break;
1125     case SCM_HASH_EQV:     str = "eqv?"; break;
1126     case SCM_HASH_EQUAL:   str = "equal?"; break;
1127     case SCM_HASH_STRING:  str = "string=?"; break;
1128     case SCM_HASH_GENERAL: str = "general"; break;
1129     default: Scm_Panic("something wrong with a hash table");
1130     }
1131 
1132 #if 0
1133     /* Use read-time constructor so that table can be read back
1134        --- is it necessary?  I'm not sure yet. */
1135     Scm_Printf(port, "#,(<hash-table> %s", str);
1136     if (ht->numEntries > 0) {
1137         Scm_HashIterInit(&iter, ht);
1138         while ((e = Scm_HashIterNext(&iter)) != NULL) {
1139             Scm_Printf(port, " %S %S", e->key, e->value);
1140         }
1141     }
1142     SCM_PUTZ(")", -1, port);
1143 #else
1144     Scm_Printf(port, "#<hash-table %s %p>", str, ht);
1145 #endif
1146 }
1147 
round2up(unsigned int val)1148 static unsigned int round2up(unsigned int val)
1149 {
1150     unsigned int n = 1;
1151     while (n < val) {
1152         n <<= 1;
1153         SCM_ASSERT(n > 1);      /* check overflow */
1154     }
1155     return n;
1156 }
1157 
1158 /*====================================================================
1159  * Initialization
1160  */
1161 
Scm__InitHash()1162 void Scm__InitHash()
1163 {
1164     struct timeval t;
1165     if (gettimeofday(&t, NULL) < 0) {
1166         Scm_Panic("gettimeofday failed");
1167     }
1168     u_long salt = ((u_long)getpid() * ((u_long)t.tv_sec^(u_long)t.tv_usec));
1169     ADDRESS_HASH(salt, salt);
1170     salt &= SCM_SMALL_INT_MAX;
1171     /*
1172      * We can't use Scm_BindPrimitiveParameter here, since symbol table
1173      * is not initialized yet (symbol table uses hashtable!)
1174      */
1175     hash_salt =
1176         Scm_MakePrimitiveParameter(SCM_CLASS_PRIMITIVE_PARAMETER, SCM_FALSE,
1177                                    Scm_MakeIntegerU(salt), 0);
1178     current_recursive_hash =
1179         Scm_MakePrimitiveParameter(SCM_CLASS_PRIMITIVE_PARAMETER, SCM_FALSE,
1180                                    SCM_FALSE, 0);
1181 }
1182 
1183 /*====================================================================
1184  * For backward compatibility
1185  */
1186 
1187 #if GAUCHE_API_VERSION < 1000
1188 /* Backward compatibility.
1189    NB: Casting ScmDictEntry* to ScmHashEntry* would be invalid if
1190    sizeof(intptr_t) and sizeof(void*) differ.  I know only one
1191    such platform (PlayStation2).  If it is a problem, moving to
1192    the new API is recommended. */
Scm_HashTableGet(ScmHashTable * ht,ScmObj key)1193 ScmHashEntry *Scm_HashTableGet(ScmHashTable *ht, ScmObj key)
1194 {
1195     if (sizeof(intptr_t) != sizeof(void*)) {
1196         Scm_Error("[internal] Scm_HashTableGet is obsoleted on this platform.  You should use the new hashtable API.");
1197     }
1198     return (ScmHashEntry*)Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1199                                              (intptr_t)key,
1200                                              SCM_DICT_GET);
1201 }
1202 
Scm_HashTableAdd(ScmHashTable * ht,ScmObj key,ScmObj value)1203 ScmHashEntry *Scm_HashTableAdd(ScmHashTable *ht, ScmObj key, ScmObj value)
1204 {
1205     ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1206                                          (intptr_t)key, SCM_DICT_CREATE);
1207     if (sizeof(intptr_t) != sizeof(void*)) {
1208         Scm_Error("[internal] Scm_HashTableAdd is obsoleted on this platform.  You should use the new hashtable API.");
1209     }
1210     if (!e->value) (void)SCM_DICT_SET_VALUE(e, value);
1211     return (ScmHashEntry*)e;
1212 }
1213 
Scm_HashTablePut(ScmHashTable * ht,ScmObj key,ScmObj value)1214 ScmHashEntry *Scm_HashTablePut(ScmHashTable *ht, ScmObj key, ScmObj value)
1215 {
1216     ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1217                                          (intptr_t)key, SCM_DICT_CREATE);
1218     if (sizeof(intptr_t) != sizeof(void*)) {
1219         Scm_Error("[internal] Scm_HashTablePut is obsoleted on this platform.  You should use the new hashtable API.");
1220     }
1221     (void)SCM_DICT_SET_VALUE(e, value);
1222     return (ScmHashEntry*)e;
1223 }
1224 
1225 /* TRANSIENT: Pre-0.9 Compatibility routine.  Kept for the binary compatibility.
1226    Will be removed on 1.0 */
Scm__HashIterInitCompat(ScmHashTable * table,ScmHashIter * iter)1227 void Scm__HashIterInitCompat(ScmHashTable *table, ScmHashIter *iter)
1228 {
1229     Scm_HashIterInit(iter, SCM_HASH_TABLE_CORE(table));
1230 }
1231 
1232 /* TRANSIENT: Pre-0.9 Compatibility routine.  Kept for the binary compatibility.
1233    Will be removed on 1.0 */
Scm__HashIterNextCompat(ScmHashIter * iter)1234 ScmHashEntry *Scm__HashIterNextCompat(ScmHashIter *iter)
1235 {
1236     ScmDictEntry *e = Scm_HashIterNext(iter);
1237     return (ScmHashEntry*)e;
1238 }
1239 
1240 #if 0
1241 ScmObj Scm_MakeHashTableMultiWord(int keysize, int initsize)
1242 {
1243     return make_hash_table(SCM_CLASS_HASH_TABLE, SCM_HASH_MULTIWORD,
1244                            multiword_access, multiword_hash,
1245                            NULL, initsize, (void*)SCM_WORD(keysize));
1246 }
1247 #endif
1248 
1249 /* Legacy constructor.  DEPRECATED.  Will go away soon. */
Scm_MakeHashTable(ScmHashProc * hashfn,ScmHashCompareProc * cmpfn SCM_UNUSED,unsigned int initSize)1250 ScmObj Scm_MakeHashTable(ScmHashProc *hashfn,
1251                          ScmHashCompareProc *cmpfn SCM_UNUSED,
1252                          unsigned int initSize)
1253 {
1254     if (hashfn == (ScmHashProc*)SCM_HASH_EQ) {
1255         return Scm_MakeHashTableSimple(SCM_HASH_EQ, initSize);
1256     } else if (hashfn == (ScmHashProc*)SCM_HASH_EQV) {
1257         return Scm_MakeHashTableSimple(SCM_HASH_EQV, initSize);
1258     } else if (hashfn == (ScmHashProc*)SCM_HASH_EQUAL) {
1259         return Scm_MakeHashTableSimple(SCM_HASH_EQUAL, initSize);
1260     } else if (hashfn == (ScmHashProc*)SCM_HASH_STRING) {
1261         return Scm_MakeHashTableSimple(SCM_HASH_STRING, initSize);
1262     }
1263 #if 0
1264     else {
1265         return Scm_MakeHashTableFull(SCM_CLASS_HASH_TABLE, SCM_HASH_GENERAL,
1266                                      hashfn, cmpfn, initSize, NULL);
1267     }
1268 #else
1269     return SCM_UNDEFINED;
1270 #endif
1271 }
1272 #endif /*GAUCHE_API_VERSION < 1000*/
1273 
1274 /* Legacy hash function.
1275  *
1276  * This used to be used for equal?-hashtable hash.  It also guaranteed
1277  * that the hash result won't change between runs and among different
1278  * platforms, so it can be used for persistent data.
1279  *
1280  * There are several drawbacks, though.  The guaranteed hash value means
1281  * we can't change hash function.   The quality of the original hash
1282  * function wasn't good (it behaves terrible on flonums and compnums);
1283  * it's vulnerable to collision attacks; and it had a few bugs in the
1284  * number hash that broke the 'portable' guarantee between platforms.
1285  *
1286  * Since there have already been stored data relying on the original hash
1287  * values, we keep the old function (with bugs fixed) here.
1288  * Scm_Hash() and Scheme's 'hash' function uses this for the backward
1289  * comaptibility, but it is not recommended for the new code.
1290  */
1291 
1292 static u_long legacy_number_hash(ScmObj obj);
1293 static u_long legacy_string_hash(ScmString *str);
1294 
Scm_Hash(ScmObj obj)1295 u_long Scm_Hash(ScmObj obj)
1296 {
1297     if (!SCM_PTRP(obj)) {
1298         u_long hashval;
1299         SMALL_INT_HASH(hashval, (u_long)SCM_WORD(obj));
1300         return hashval&PORTABLE_HASHMASK;
1301     } else if (SCM_NUMBERP(obj)) {
1302         return legacy_number_hash(obj);
1303     } else if (SCM_STRINGP(obj)) {
1304         goto string_hash;
1305     } else if (SCM_PAIRP(obj)) {
1306         u_long h = 0, h2;
1307         ScmObj cp;
1308         SCM_FOR_EACH(cp, obj) {
1309             h2 = Scm_Hash(SCM_CAR(cp));
1310             h = COMBINE(h, h2);
1311         }
1312         h2 = Scm_Hash(cp);
1313         h = COMBINE(h, h2);
1314         return h&PORTABLE_HASHMASK;
1315     } else if (SCM_VECTORP(obj)) {
1316         int siz = SCM_VECTOR_SIZE(obj);
1317         u_long h = 0, h2;
1318         for (int i=0; i<siz; i++) {
1319             h2 = Scm_Hash(SCM_VECTOR_ELEMENT(obj, i));
1320             h = COMBINE(h, h2);
1321         }
1322         return h&PORTABLE_HASHMASK;
1323     } else if (SCM_KEYWORDP(obj)) {
1324         /* If we have keyword and symbol unified, KEYWORD_NAME includes
1325            ':'.  Legacy hash didn't consider it. */
1326 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
1327         if (SCM_SYMBOLP(obj)) {
1328             obj = Scm_Substring(SCM_KEYWORD_NAME(obj), 1, -1, FALSE);
1329         } else {
1330             obj = SCM_OBJ(SCM_KEYWORD_NAME(obj));
1331         }
1332 #else  /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
1333         obj = Scm_Substring(SCM_KEYWORD_NAME(obj), 1, -1, FALSE);
1334 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
1335         goto string_hash;
1336     } else if (SCM_SYMBOLP(obj)) {
1337         obj = SCM_OBJ(SCM_SYMBOL_NAME(obj));
1338         goto string_hash;
1339     } else {
1340         /* Call specialized object-hash method */
1341         ScmObj r = Scm_ApplyRec(SCM_OBJ(&Scm_GenericObjectHash),
1342                                 SCM_LIST1(obj));
1343         if (SCM_INTP(r)) {
1344             return ((u_long)SCM_INT_VALUE(r))&PORTABLE_HASHMASK;
1345         }
1346         if (SCM_BIGNUMP(r)) {
1347             /* NB: Scm_GetUInteger clamps the result to [0, ULONG_MAX],
1348                but taking the LSW would give better distribution. */
1349             return (SCM_BIGNUM(r)->values[0])&PORTABLE_HASHMASK;
1350         }
1351         Scm_Error("object-hash returned non-integer: %S", r);
1352         return 0;               /* dummy */
1353     }
1354   string_hash:
1355     return legacy_string_hash(SCM_STRING(obj));
1356 }
1357 
legacy_flonum_hash(double f)1358 static u_long legacy_flonum_hash(double f)
1359 {
1360     /* Originally the code was just (u_long)(f * 2654435761UL), but that's
1361        UB when the multiplication yields out of range of u_long.  I don't
1362        even remember why I adopted that for the hash function in the
1363        first place, but we have to stick to existing hash values recorded
1364        elsewhere.
1365 
1366        On x86 with 8087-compatible FPU, (u_long)(d) behaves as follows.
1367        If -2^63 < d < 2^63, the modulo of 2^32 is taken.  Otherwise
1368        it yields 0.
1369 
1370        On x86 with SSE, the out-of-range value yields #x8000_0000.
1371 
1372        On x86_64, if -2^63 < d < 2^64, the modulo of 2^32 is taken.
1373        Otherwise it yields #x8000_0000_0000_0000.
1374 
1375        To achieve maximum compatibility with historical data, we
1376        take the range of 8087, and the calculation is adjusted for
1377        x86_64 behavior.  There can be a slight discrepancy from the
1378        result of 8087 because of its internal 80bit calculation---for
1379        example, 3.767278962604362e-10 * 2654435761 is just tiny little
1380        bit less than 1.0 but with 64bit calculation it is rounded up
1381        to 1.0.  With 80 bit and integer truncation the result is 0 but
1382        with 64bit we get 1.
1383      */
1384     volatile double d = f * 2654435761UL;
1385     static double two_pow_63 = 0.0;
1386     static double minus_two_pow_63 = 0.0;
1387     static double two_pow_32 = 0.0;
1388     static int initialized = FALSE;
1389 
1390     if (!initialized) {
1391         /* This is idempotent - no need to lock */
1392         two_pow_63 = ldexp(1.0, 63);
1393         minus_two_pow_63 = -ldexp(1.0, 63);
1394         two_pow_32 = ldexp(1.0, 32);
1395         AO_nop_full();
1396         initialized = TRUE;
1397     }
1398     /* This condition eliminates NaN as well. */
1399     if (!(minus_two_pow_63 < d && d < two_pow_63)) return 0;
1400     if (-0.5 < d && d < 0.5) return 0;
1401 
1402     double dm = trunc(fmod(d, two_pow_32));
1403     if (dm < 0) dm += two_pow_32;
1404     return (u_long)trunc(dm);
1405 }
1406 
1407 /* Old hash function for numeric objects.  This is terrible for flonums,
1408    and we only keep it in order to maintain portable hash value generated
1409    by legacy hash function. */
legacy_number_hash(ScmObj obj)1410 static u_long legacy_number_hash(ScmObj obj)
1411 {
1412     u_long hashval;
1413     SCM_ASSERT(SCM_NUMBERP(obj));
1414     if (SCM_INTP(obj)) {
1415         SMALL_INT_HASH(hashval, SCM_INT_VALUE(obj));
1416     } else if (SCM_BIGNUMP(obj)) {
1417         u_int i;
1418         u_long u = 0;
1419         for (i=0; i<SCM_BIGNUM_SIZE(obj); i++) {
1420 #if SIZEOF_LONG == 4
1421             u += SCM_BIGNUM(obj)->values[i];
1422 #elif SIZEOF_LONG == 8
1423             u += (SCM_BIGNUM(obj)->values[i] & ((1UL<<32) - 1))
1424                 + (SCM_BIGNUM(obj)->values[i] >> 32);
1425 #else
1426 #error "sizeof(long) > 8 platform unsupported"
1427 #endif
1428         }
1429         SMALL_INT_HASH(hashval, u);
1430     } else if (SCM_FLONUMP(obj)) {
1431         hashval = legacy_flonum_hash(SCM_FLONUM_VALUE(obj));
1432     } else if (SCM_RATNUMP(obj)) {
1433         /* Ratnum must already be normalized, so we can simply combine
1434            hashvals of numerator and denominator. */
1435         u_long h1 = legacy_number_hash(SCM_RATNUM_NUMER(obj));
1436         u_long h2 = legacy_number_hash(SCM_RATNUM_DENOM(obj));
1437         hashval = COMBINE(h1, h2);
1438     } else {
1439         hashval =
1440             legacy_flonum_hash(SCM_COMPNUM_REAL(obj)+SCM_COMPNUM_IMAG(obj));
1441     }
1442     return hashval&PORTABLE_HASHMASK;
1443 }
1444 
1445 /* Legacy hash function for strings.  This isn't very good hash function
1446    either, and it's difficult to adopt salting. */
legacy_string_hash(ScmString * str)1447 static u_long legacy_string_hash(ScmString *str)
1448 {
1449     const ScmStringBody *b = SCM_STRING_BODY(str);
1450     const char *p = SCM_STRING_BODY_START(b);
1451     long k = SCM_STRING_BODY_SIZE(b);
1452     u_long hv = 0;
1453     while (k-- > 0) {
1454         hv = (hv<<5) - (hv) + ((unsigned char)*p++);
1455     }
1456     return hv&PORTABLE_HASHMASK;
1457 }
1458