1 /*
2 * hash.c - hash table implementation
3 *
4 * Copyright (c) 2000-2020 Shiro Kawai <shiro@acm.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the authors nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #define LIBGAUCHE_BODY
35 #include <math.h>
36 #include "gauche.h"
37 #include "gauche/class.h"
38 #include "gauche/priv/atomicP.h"
39
40 /*============================================================
41 * Internal structures
42 */
43
44 /* The beginning of this structure must match ScmDictEntry. */
45 typedef struct EntryRec {
46 intptr_t key;
47 intptr_t value;
48 struct EntryRec *next;
49 u_long hashval;
50 } Entry;
51
52 #define BUCKETS(hc) ((Entry**)hc->buckets)
53
54 #define DEFAULT_NUM_BUCKETS 4
55 #define MAX_AVG_CHAIN_LIMITS 3
56 #define EXTEND_BITS 2
57
58 /* We limit portable hash value to 32bits */
59 #define PORTABLE_HASHMASK 0xffffffffUL
60
61 /* For other hash values, we limit it in the fixnum range. */
62 #define HASHMASK SCM_SMALL_INT_MAX
63
64 typedef Entry *SearchProc(ScmHashCore *core, intptr_t key, ScmDictOp op);
65
66 static u_int round2up(unsigned int val);
67
68 /*============================================================
69 * Hash salt
70 */
71
72 /* The salt value is nonnegative fixnum. For the time being, we initialize
73 the default salt value for each run of the process; we might do
74 per-hashtable salt in future.
75
76 Internally we use parameter slot to keep the hash salt value, but
77 we provide dedicated C API to access it to avoid overhead of parameter
78 mechanism.
79 */
80
81 static ScmPrimitiveParameter *hash_salt; /* initialized by Scm__InitHash() */
82
Scm_HashSaltRef()83 ScmSmallInt Scm_HashSaltRef()
84 {
85 return SCM_INT_VALUE(Scm_PrimitiveParameterRef(Scm_VM(), hash_salt));
86 }
87
Scm_HashSaltSet(ScmSmallInt newval)88 ScmSmallInt Scm_HashSaltSet(ScmSmallInt newval) /* returns old value */
89 {
90 return SCM_INT_VALUE(Scm_PrimitiveParameterSet(Scm_VM(), hash_salt,
91 SCM_MAKE_INT(newval)));
92 }
93
94 /*============================================================
95 * Hash functions
96 */
97
98 /* Hash function calculates 32bit hash value from the given object.
99 HASH2INDEX macro maps the hash value to the bucket number.
100 (On 64 bit architecture, it's OK to calculate 64bit, but the
101 upper bits are discarded by HASH2INDEX to maintain compatibility. */
102
103 /* Integer and address. */
104 /* Integer and address hash is a variation of "multiplicative hashing"
105 scheme described in Knuth, TAOCP, section 6.4. The final shifting
106 is done by HASH2INDEX macro */
107
108 #define SMALL_INT_HASH(result, val) \
109 (result) = ((val)*2654435761UL)
110
111 #define ADDRESS_HASH(result, val) \
112 (result) = (u_long)((SCM_WORD(val) >> 3)*2654435761UL)
113
114 /* HASH2INDEX
115 Map a hash value to bucket number.
116 We fix the word length to 32bits, since the multiplication
117 constant above is fixed. */
118 #define HASH2INDEX(tabsiz, bits, hashval) \
119 (((hashval)+((hashval)>>(32-(bits)))) & ((tabsiz) - 1))
120
121 /* Combining two hash values. */
122 #define COMBINE(hv1, hv2) ((hv1)*5+(hv2))
123
124 /* For strings, we employ siphash.
125 We use public domain implementation by Sam Trenholme
126 http://samiam.org/blog/20131006.html. It has a version suitable
127 for 32bit architecture, too.
128 See dws_adapter.h for the details.
129 */
130 #define SCM_DWSIPHASH_INTERFACE
131 #include "gauche/priv/dws_adapter.h"
132
Scm_EqHash(ScmObj obj)133 u_long Scm_EqHash(ScmObj obj)
134 {
135 u_long hashval;
136 ADDRESS_HASH(hashval, obj);
137 return hashval&HASHMASK;
138 }
139
140 static u_long number_hash(ScmObj obj, u_long salt, int portable);
141
flonum_hash(double d,u_long salt,int portable)142 static u_long flonum_hash(double d, u_long salt, int portable)
143 {
144 int exp, sign;
145 ScmObj mantissa = Scm_DecodeFlonum(d, &exp, &sign);
146 u_long xh;
147 SMALL_INT_HASH(xh, exp*sign);
148 if (SCM_NUMBERP(mantissa)) {
149 return COMBINE(number_hash(mantissa, salt, portable), xh);
150 } else {
151 /* d is not finite. we just map +inf.0, -inf.0 and nan.0 to 0. */
152 return 0;
153 }
154 }
155
number_hash(ScmObj obj,u_long salt,int portable)156 static u_long number_hash(ScmObj obj, u_long salt, int portable)
157 {
158 u_long hashval;
159 if (SCM_INTP(obj)) {
160 /* On 64bit platform, if we have fixnum that is beyond the range
161 of 32bit fixnum, we have to calculate the hash value the same
162 way as 32bit bignum would do. */
163 long u = SCM_INT_VALUE(obj);
164 if (portable) {
165 if (u < 0) u = -u; /* safe, for u is in fixnum range */
166 #if SIZEOF_LONG == 8
167 u = ((u & ((1UL<<32) - 1)) + (u >> 32)) & ((1UL<<32)-1);
168 #endif
169 }
170 SMALL_INT_HASH(hashval, u);
171 } else if (SCM_BIGNUMP(obj)) {
172 if (portable) {
173 u_int i;
174 u_long u = 0;
175 for (i=0; i<SCM_BIGNUM_SIZE(obj); i++) {
176 #if SIZEOF_LONG == 4
177 u += SCM_BIGNUM(obj)->values[i];
178 #elif SIZEOF_LONG == 8
179 u += (SCM_BIGNUM(obj)->values[i] & ((1UL<<32) - 1))
180 + (SCM_BIGNUM(obj)->values[i] >> 32);
181 #else
182 #error "sizeof(long) > 8 platform unsupported"
183 #endif
184 }
185 SMALL_INT_HASH(hashval, u);
186 } else {
187 u_int i;
188 u_long u = 0;
189 for (i=0; i<SCM_BIGNUM_SIZE(obj); i++) {
190 u += SCM_BIGNUM(obj)->values[i];
191 }
192 SMALL_INT_HASH(hashval, u);
193 }
194 } else if (SCM_FLONUMP(obj)) {
195 hashval = flonum_hash(SCM_FLONUM_VALUE(obj), salt, portable);
196 } else if (SCM_RATNUMP(obj)) {
197 /* Ratnum must already be normalized, so we can simply combine
198 hashvals of numerator and denominator. */
199 u_long h1 = number_hash(SCM_RATNUM_NUMER(obj), salt, portable);
200 u_long h2 = number_hash(SCM_RATNUM_DENOM(obj), salt, portable);
201 hashval = COMBINE(h1, h2);
202 } else {
203 SCM_ASSERT(SCM_COMPNUMP(obj));
204 hashval = COMBINE(flonum_hash(SCM_COMPNUM_REAL(obj), salt, portable),
205 flonum_hash(SCM_COMPNUM_IMAG(obj), salt, portable));
206 }
207 return hashval & (portable ? PORTABLE_HASHMASK : HASHMASK);
208 }
209
Scm_EqvHash(ScmObj obj)210 u_long Scm_EqvHash(ScmObj obj)
211 {
212 u_long hashval;
213 if (SCM_NUMBERP(obj)) {
214 hashval = number_hash(obj, 0, FALSE);
215 } else {
216 ADDRESS_HASH(hashval, obj);
217 }
218 return hashval&HASHMASK;
219 }
220
internal_string_hash(ScmString * str,u_long salt,int portable)221 static u_long internal_string_hash(ScmString *str, u_long salt, int portable)
222 {
223 const ScmStringBody *b = SCM_STRING_BODY(str);
224 if (portable) {
225 return (u_long)Scm__DwSipPortableHash((uint8_t*)b->start, b->size,
226 salt, salt);
227 } else {
228 return Scm__DwSipDefaultHash((uint8_t*)b->start, b->size,
229 salt, salt);
230 }
231 }
232
internal_uvector_hash(ScmUVector * u,u_long salt,int portable)233 static u_long internal_uvector_hash(ScmUVector *u, u_long salt, int portable)
234 {
235 if (portable) {
236 ScmUVectorType uvtype = Scm_UVectorType(Scm_ClassOf(SCM_OBJ(u)));
237 u_long r, seed;
238 size_t s = SCM_UVECTOR_SIZE(u);
239
240 switch (uvtype) {
241 /* We can use siphash if u is u8vector or s8vector. */
242 case SCM_UVECTOR_S8:
243 case SCM_UVECTOR_U8:
244 return Scm__DwSipPortableHash((uint8_t*)SCM_UVECTOR_ELEMENTS(u),
245 (uint32_t)Scm_UVectorSizeInBytes(u),
246 salt, salt^uvtype);
247 /* We need to avoid depending on endianness of multibyte numbers.
248 Using siphash after canonicalizing byte order can be expensive,
249 for we need to allocate a buffer.
250 (Unless we directly access siphash setup/round function
251 and feed one word at a time--which is tedious.)
252 The current code may not be ideal, but just as good as our
253 other primitive portable hash functions. */
254
255 /* Initial hash value. The seed value for each uvector type
256 is just a random value I generated. */
257 #define INIT_R(r, seed) SMALL_INT_HASH(r, seed^salt)
258
259 case SCM_UVECTOR_S16:
260 seed = 3499211612ul;
261 goto do16;
262 case SCM_UVECTOR_U16:
263 seed = 3890346734ul;
264 goto do16;
265 case SCM_UVECTOR_F16:
266 seed = 545404204ul;
267 do16:
268 {
269 INIT_R(r, seed);
270 for (size_t i=0; i<s; i++) {
271 u_long e = SCM_U16VECTOR_ELEMENT(u, i);
272 SMALL_INT_HASH(e, e);
273 r = COMBINE(r, e);
274 }
275 return r;
276 }
277 case SCM_UVECTOR_S32:
278 seed = 4161255391ul;
279 goto do32;
280 case SCM_UVECTOR_U32:
281 seed = 3922919429ul;
282 goto do32;
283 case SCM_UVECTOR_F32:
284 seed = 949333985ul;
285 do32:
286 {
287 INIT_R(r, seed);
288 for (size_t i=0; i<s; i++) {
289 u_long e = SCM_U32VECTOR_ELEMENT(u, i);
290 SMALL_INT_HASH(e, e);
291 r = COMBINE(r, e);
292 }
293 return r;
294 }
295 case SCM_UVECTOR_S64:
296 seed = 2715962298ul;
297 goto do64;
298 case SCM_UVECTOR_U64:
299 seed = 1323567403ul;
300 do64:
301 {
302 INIT_R(r, seed);
303 for (size_t i=0; i<s; i++) {
304 uint64_t e = SCM_U64VECTOR_ELEMENT(u, i);
305 u_long z = (e >> 32) ^ e;
306 SMALL_INT_HASH(z, z);
307 r = COMBINE(r, z);
308 }
309 return r;
310 }
311 case SCM_UVECTOR_F64: {
312 /* We can't reinterpret f64 as u64, since ARM mixed-endian would
313 yield nonportable result. */
314 seed = 2350294565ul;
315 INIT_R(r, seed);
316 for (size_t i=0; i<s; i++) {
317 double e = SCM_F64VECTOR_ELEMENT(u, i);
318 r = COMBINE(r, flonum_hash(e, salt, TRUE));
319 }
320 return r;
321 }
322 #undef INIT_R
323 case SCM_UVECTOR_RESERVED1:
324 case SCM_UVECTOR_C32:
325 case SCM_UVECTOR_C64:
326 case SCM_UVECTOR_C128:
327 case SCM_UVECTOR_RESERVED2:
328 case SCM_UVECTOR_INVALID:
329 Scm_Panic("invalid uvector class.");
330 }
331 return 0; /* dummy */
332 } else {
333 return Scm__DwSipDefaultHash((uint8_t*)SCM_UVECTOR_ELEMENTS(u),
334 (uint32_t)Scm_UVectorSizeInBytes(u),
335 salt, salt);
336 }
337 }
338
339 /* equal-hash, which satisfies
340 forall x, y: equal(x,y) => hash(x) = hash(y)
341
342 Both default-hash and portable-hash have this property but their
343 requirements are slightly different, so here's the common part.
344 */
equal_hash_common(ScmObj obj,u_long salt,int portable)345 static u_long equal_hash_common(ScmObj obj, u_long salt, int portable)
346 {
347 if (SCM_NUMBERP(obj)) {
348 return number_hash(obj, salt, portable);
349 } else if (!SCM_PTRP(obj)) {
350 u_long hashval;
351 SMALL_INT_HASH(hashval, (u_long)SCM_WORD(obj));
352 return hashval&PORTABLE_HASHMASK;
353 } else if (SCM_STRINGP(obj)) {
354 return internal_string_hash(SCM_STRING(obj), salt, portable);
355 } else if (SCM_PAIRP(obj)) {
356 u_long h = 0, h2;
357 ScmObj cp;
358 SCM_FOR_EACH(cp, obj) {
359 h2 = equal_hash_common(SCM_CAR(cp), salt, portable);
360 h = COMBINE(h, h2);
361 }
362 h2 = equal_hash_common(cp, salt, portable);
363 return COMBINE(h, h2);
364 } else if (SCM_VECTORP(obj)) {
365 int siz = SCM_VECTOR_SIZE(obj);
366 u_long h = 0, h2;
367 for (int i=0; i<siz; i++) {
368 h2 = equal_hash_common(SCM_VECTOR_ELEMENT(obj, i), salt, portable);
369 h = COMBINE(h, h2);
370 }
371 return h;
372 /* uvector hash support */
373 } else if (SCM_UVECTORP(obj)) {
374 return internal_uvector_hash(SCM_UVECTOR(obj), salt, portable);
375 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
376 } else if (SCM_KEYWORDP(obj)) {
377 if (portable) {
378 if (SCM_SYMBOLP(obj)) {
379 /* GAUCHE_KEYWORD_IS_SYMBOL mode */
380 return internal_string_hash(SCM_KEYWORD_NAME(obj), salt, TRUE);
381 } else {
382 /* GAUCHE_KEYWORD_IS_DISJOINT mode. SCM_KEYWORD_NAME does
383 not include prefix ':'. We should append it so that
384 the hash value stays the same. Appending string incurs
385 allocation, but we expect this branch isn't taken often
386 and eventually fade away. */
387 static ScmString *prefix = NULL;
388 if (prefix == NULL) {
389 /* idempotent. no MT hazard. */
390 prefix = SCM_STRING(Scm_MakeString(":", 1, 1, 0));
391 }
392 ScmObj name = Scm_StringAppend2(prefix, SCM_KEYWORD_NAME(obj));
393 return internal_string_hash(SCM_STRING(name), salt, TRUE);
394 }
395 } else {
396 u_long hashval;
397 ADDRESS_HASH(hashval, obj);
398 return hashval;
399 }
400 #endif /*GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
401 } else if (SCM_SYMBOLP(obj)) {
402 if (portable) {
403 return internal_string_hash(SCM_SYMBOL_NAME(obj), salt, TRUE);
404 } else {
405 u_long hashval;
406 ADDRESS_HASH(hashval, obj);
407 return hashval;
408 }
409 }
410
411 ScmClass *k = SCM_CLASS_OF(obj);
412 if (k->hash) {
413 return (u_long)k->hash(obj, salt,
414 (portable? SCM_HASH_PORTABLE:0));
415 }
416
417 /* Call specialized object-hash method
418 We need some trick; See libomega.scm for the details. */
419 static ScmObj call_object_hash_proc = SCM_UNDEFINED;
420 static ScmObj portable_hash_proc = SCM_UNDEFINED;
421 static ScmObj default_hash_proc = SCM_UNDEFINED;
422 SCM_BIND_PROC(call_object_hash_proc, "%call-object-hash",
423 Scm_GaucheInternalModule());
424 SCM_BIND_PROC(portable_hash_proc, "portable-hash", Scm_GaucheModule());
425 SCM_BIND_PROC(default_hash_proc, "default-hash", Scm_GaucheModule());
426 ScmObj r = Scm_ApplyRec3(call_object_hash_proc, obj,
427 (portable
428 ? portable_hash_proc
429 : default_hash_proc),
430 (portable
431 ? Scm_MakeIntegerU(salt)
432 : SCM_FALSE));
433 if (SCM_INTP(r)) {
434 return (u_long)SCM_INT_VALUE(r);
435 }
436 if (SCM_BIGNUMP(r)) {
437 /* NB: Scm_GetUInteger clamps the result to [0, ULONG_MAX],
438 so taking the LSW would give better distribution. */
439 return SCM_BIGNUM(r)->values[0];
440 }
441 Scm_Error("object-hash returned non-integer: %S", r);
442 return 0; /* dummy */
443 }
444
445 /* For recursive call to the current hash function - see call-object-hash
446 and object-hash definitions in libomega.scm. */
447 static ScmPrimitiveParameter *current_recursive_hash;
448
Scm_CurrentRecursiveHash(ScmObj newval)449 ScmObj Scm_CurrentRecursiveHash(ScmObj newval)
450 {
451 if (newval == SCM_UNBOUND) {
452 return Scm_PrimitiveParameterRef(Scm_VM(), current_recursive_hash);
453 } else {
454 return Scm_PrimitiveParameterSet(Scm_VM(), current_recursive_hash, newval);
455 }
456 }
457
458 /* 'Portable' general hash function.
459
460 It is guaranteed that the hash value won't change for the same objects
461 (roughly, indistinguishable in their external representation)
462 across the runs of the program, and among different platforms.
463 That is, the value can be used in persistent stores.
464 */
Scm_PortableHash(ScmObj obj,u_long salt)465 u_long Scm_PortableHash(ScmObj obj, u_long salt)
466 {
467 return equal_hash_common(obj, salt, TRUE) & PORTABLE_HASHMASK;
468 }
469
470 /* 'Default' general hash function. */
Scm_DefaultHash(ScmObj obj)471 ScmSmallInt Scm_DefaultHash(ScmObj obj)
472 {
473 return equal_hash_common(obj, Scm_HashSaltRef(), FALSE) & HASHMASK;
474 }
475
476 /* This is to be called from ScmClass->hash if it needs to compute
477 hash value of field values. */
Scm_RecursiveHash(ScmObj obj,ScmSmallInt salt,u_long flags)478 ScmSmallInt Scm_RecursiveHash(ScmObj obj, ScmSmallInt salt, u_long flags)
479 {
480 if (flags & SCM_HASH_PORTABLE) {
481 return equal_hash_common(obj, salt, TRUE) & PORTABLE_HASHMASK;
482 } else {
483 return equal_hash_common(obj, salt, FALSE) & HASHMASK;
484 }
485 }
486
Scm_SmallIntHash(ScmSmallInt val,ScmSmallInt salt SCM_UNUSED,u_long flags SCM_UNUSED)487 ScmSmallInt Scm_SmallIntHash(ScmSmallInt val,
488 ScmSmallInt salt SCM_UNUSED,
489 u_long flags SCM_UNUSED)
490 {
491 ScmSmallInt v;
492 SMALL_INT_HASH(v, val);
493 return v;
494 }
495
Scm_Int64Hash(int64_t val,ScmSmallInt salt SCM_UNUSED,u_long flags SCM_UNUSED)496 ScmSmallInt Scm_Int64Hash(int64_t val,
497 ScmSmallInt salt SCM_UNUSED,
498 u_long flags SCM_UNUSED)
499 {
500 ScmSmallInt v;
501 SMALL_INT_HASH(v, val);
502 return v;
503 }
504
505
506 /* This is to expose string hash function. Modulo is for the compatibility
507 of srfi-13; just give 0 as modulo if you don't need it. */
Scm_HashString(ScmString * str,u_long modulo)508 u_long Scm_HashString(ScmString *str, u_long modulo)
509 {
510 u_long hashval = internal_string_hash(str, Scm_HashSaltRef(), FALSE);
511 if (modulo == 0) return hashval&HASHMASK;
512 else return (hashval % modulo);
513 }
514
515 /* Expose COMBINE. */
Scm_CombineHashValue(u_long a,u_long b)516 u_long Scm_CombineHashValue(u_long a, u_long b)
517 {
518 u_long c = COMBINE(a, b);
519 #if SIZEOF_LONG == 8
520 /* we limit portable hash value to 32bit. */
521 c &= 0xffffffff;
522 #endif /**/
523 return c;
524 }
525
526 /*------------------------------------------------------------
527 * Parameterization
528 *
529 * Conceptually hash tables are parameterized by hash function and
530 * compare function. However, if they are trivial functions, calling
531 * them via function pointers incur overhead. So we layered the
532 * parameterization.
533 *
534 * For the pre-defined simple hash tables, the calls to the hash and
535 * compare functions are inlined in a single "access" function.
536 * (In this case hashfn and cmpfn are never used.)
537 * For the generic hash tables, the general_access function uses
538 * the info in hashfn and cmpfn fields.
539 *
540 * The accessor function takes three arguments.
541 *
542 * ScmHashCore *core : hash table core
543 * intptr_t key : key
544 * ScmDictOp op : operation
545 */
546
547 /* NOTE: eq?, eqv?, and string=? hash tables are guaranteed not to
548 * throw an error during hash table access (except the case that string=?
549 * hash table gets non-string key). So the caller doesn't need to
550 * set unwind handler in case it needs cleanup (like unlocking mutex).
551 * However, equal? hash may call back to Scheme method, so it can
552 * throw Scheme error. Be aware of that.
553 */
554
555 /*
556 * Common function called when the accessor function needs to add an entry.
557 */
insert_entry(ScmHashCore * table,intptr_t key,u_long hashval,int index)558 static Entry *insert_entry(ScmHashCore *table,
559 intptr_t key,
560 u_long hashval,
561 int index)
562 {
563 Entry *e = SCM_NEW(Entry);
564 Entry **buckets = BUCKETS(table);
565 e->key = key;
566 e->value = 0;
567 e->next = buckets[index];
568 e->hashval = hashval;
569 buckets[index] = e;
570 table->numEntries++;
571
572 if (table->numEntries > table->numBuckets*MAX_AVG_CHAIN_LIMITS) {
573 /* Extend the table */
574 int newsize = (table->numBuckets << EXTEND_BITS);
575 int newbits = table->numBucketsLog2 + EXTEND_BITS;
576
577 Entry **newb = SCM_NEW_ARRAY(Entry*, newsize);
578 for (int i=0; i<newsize; i++) newb[i] = NULL;
579
580 ScmHashIter iter;
581 Entry *f;
582 Scm_HashIterInit(&iter, table);
583 while ((f = (Entry*)Scm_HashIterNext(&iter)) != NULL) {
584 index = HASH2INDEX(newsize, newbits, f->hashval);
585 f->next = newb[index];
586 newb[index] = f;
587 }
588 /* gc friendliness */
589 for (int i=0; i<table->numBuckets; i++) table->buckets[i] = NULL;
590
591 table->numBuckets = newsize;
592 table->numBucketsLog2 = newbits;
593 table->buckets = (void**)newb;
594 }
595 return e;
596 }
597
598 /* NB: Deleting entry E doesn't modify E's key and value, but cut
599 the "next" link for the sake of weak-gc robustness. The hash core
600 iterator prefetches a pointer to the next entry, so deleting the
601 "current" entry of iteration is safe as far as other iterators
602 are running on the same hash table. */
delete_entry(ScmHashCore * table,Entry * entry,Entry * prev,int index)603 static Entry *delete_entry(ScmHashCore *table,
604 Entry *entry, Entry *prev,
605 int index)
606 {
607 if (prev) prev->next = entry->next;
608 else table->buckets[index] = (void*)entry->next;
609 table->numEntries--;
610 SCM_ASSERT(table->numEntries >= 0);
611 entry->next = NULL; /* GC friendliness */
612 return entry;
613 }
614
615 #define FOUND(table, op, e, p, index) \
616 do { \
617 switch (op) { \
618 case SCM_DICT_GET:; \
619 case SCM_DICT_CREATE:; \
620 return e; \
621 case SCM_DICT_DELETE:; \
622 return delete_entry(table, e, p, index); \
623 } \
624 } while (0)
625
626 #define NOTFOUND(table, op, key, hashval, index) \
627 do { \
628 if (op == SCM_DICT_CREATE) { \
629 return insert_entry(table, key, hashval, index); \
630 } else { \
631 return NULL; \
632 } \
633 } while (0)
634
635 /*
636 * Accessor function for address. Used for EQ-type hash.
637 */
address_access(ScmHashCore * table,intptr_t key,ScmDictOp op)638 static Entry *address_access(ScmHashCore *table,
639 intptr_t key,
640 ScmDictOp op)
641 {
642 u_long hashval, index;
643 Entry **buckets = (Entry**)table->buckets;
644
645 ADDRESS_HASH(hashval, key);
646 index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
647
648 for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
649 if (e->key == key) FOUND(table, op, e, p, index);
650 }
651 NOTFOUND(table, op, key, hashval, index);
652 }
653
address_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t obj)654 static u_long address_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t obj)
655 {
656 u_long hashval;
657 ADDRESS_HASH(hashval, obj);
658 return hashval;
659 }
660
address_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t key,intptr_t k2)661 static int address_cmp(const ScmHashCore *ht SCM_UNUSED,
662 intptr_t key, intptr_t k2)
663 {
664 return (key == k2);
665 }
666
667 /*
668 * Accessor function for equal and eqv-hash.
669 * We assume KEY is ScmObj.
670 */
eqv_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t key)671 static u_long eqv_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t key)
672 {
673 return Scm_EqvHash(SCM_OBJ(key));
674 }
675
eqv_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t key,intptr_t k2)676 static int eqv_cmp(const ScmHashCore *ht SCM_UNUSED, intptr_t key, intptr_t k2)
677 {
678 return Scm_EqvP(SCM_OBJ(key), SCM_OBJ(k2));
679 }
680
equal_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t key)681 static u_long equal_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t key)
682 {
683 return Scm_DefaultHash(SCM_OBJ(key));
684 }
685
equal_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t key,intptr_t k2)686 static int equal_cmp(const ScmHashCore *ht SCM_UNUSED, intptr_t key, intptr_t k2)
687 {
688 return Scm_EqualP(SCM_OBJ(key), SCM_OBJ(k2));
689 }
690
691
692 /*
693 * Accessor function for string type.
694 */
string_access(ScmHashCore * table,intptr_t k,ScmDictOp op)695 static Entry *string_access(ScmHashCore *table, intptr_t k, ScmDictOp op)
696 {
697 ScmObj key = SCM_OBJ(k);
698
699 if (!SCM_STRINGP(key)) {
700 Scm_Error("Got non-string key %S to the string hashtable.", key);
701 }
702 u_long hashval = Scm_HashString(SCM_STRING(key), 0);
703 u_long index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
704 Entry **buckets = (Entry**)table->buckets;
705
706 const ScmStringBody *keyb = SCM_STRING_BODY(key);
707 long size = SCM_STRING_BODY_SIZE(keyb);
708 for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
709 ScmObj ee = SCM_OBJ(e->key);
710 const ScmStringBody *eeb = SCM_STRING_BODY(ee);
711 int eesize = SCM_STRING_BODY_SIZE(eeb);
712 if (size == eesize
713 && memcmp(SCM_STRING_BODY_START(keyb),
714 SCM_STRING_BODY_START(eeb), eesize) == 0){
715 FOUND(table, op, e, p, index);
716 }
717 }
718 NOTFOUND(table, op, k, hashval, index);
719 }
720
string_hash(const ScmHashCore * ht SCM_UNUSED,intptr_t key)721 static u_long string_hash(const ScmHashCore *ht SCM_UNUSED, intptr_t key)
722 {
723 return Scm_HashString(SCM_STRING(key), 0);
724 }
725
string_cmp(const ScmHashCore * ht SCM_UNUSED,intptr_t k1,intptr_t k2)726 static int string_cmp(const ScmHashCore *ht SCM_UNUSED, intptr_t k1, intptr_t k2)
727 {
728 const ScmStringBody *b1 = SCM_STRING_BODY(k1);
729 const ScmStringBody *b2 = SCM_STRING_BODY(k2);
730 return ((SCM_STRING_BODY_SIZE(b1) == SCM_STRING_BODY_SIZE(b2))
731 && (memcmp(SCM_STRING_BODY_START(b1),
732 SCM_STRING_BODY_START(b2),
733 SCM_STRING_BODY_SIZE(b1)) == 0));
734 }
735
736 /*
737 * Accessor function for multiword raw hashtable.
738 * Key points to an array of N words.
739 */
740 #if 0 /* not used yet */
741 static u_long multiword_hash(const ScmHashCore *table, intptr_t key)
742 {
743 ScmWord keysize = (ScmWord)table->data;
744 ScmWord *keyarray = (ScmWord*)key;
745 u_long h = 0, h1;
746 for (int i=0; i<keysize; i++) {
747 ADDRESS_HASH(h1, keyarray[i]);
748 h = COMBINE(h, h1);
749 }
750 return h;
751 }
752 #endif
753
754 #if 0
755 static Entry *multiword_access(ScmHashCore *table, intptr_t k, ScmDictOp op)
756 {
757 u_long hashval, index;
758 ScmWord keysize = (ScmWord)table->data;
759
760 hashval = multiword_hash(table, k);
761 index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
762 Entry **buckets = (Entry**)table->buckets;
763
764 for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
765 if (memcmp((void*)k, (void*)e->key, keysize*sizeof(ScmWord)) == 0)
766 FOUND(table, op, e, p, index);
767 }
768 NOTFOUND(table, op, k, hashval, index);
769 }
770 #endif
771
772
773 /*
774 * Accessor function for general case
775 * (hashfn and cmpfn are given by user)
776 */
general_access(ScmHashCore * table,intptr_t key,ScmDictOp op)777 static Entry *general_access(ScmHashCore *table, intptr_t key, ScmDictOp op)
778 {
779 u_long hashval, index;
780
781 hashval = table->hashfn(table, key);
782 index = HASH2INDEX(table->numBuckets, table->numBucketsLog2, hashval);
783 Entry **buckets = (Entry**)table->buckets;
784
785 for (Entry *e = buckets[index], *p = NULL; e; p = e, e = e->next) {
786 if (table->cmpfn(table, key, e->key)) FOUND(table, op, e, p, index);
787 }
788 NOTFOUND(table, op, key, hashval, index);
789 }
790
791 /*============================================================
792 * Hash Core functions
793 */
794
hash_core_init(ScmHashCore * table,SearchProc * accessfn,ScmHashProc * hashfn,ScmHashCompareProc * cmpfn,unsigned int initSize,void * data)795 static void hash_core_init(ScmHashCore *table,
796 SearchProc *accessfn,
797 ScmHashProc *hashfn,
798 ScmHashCompareProc *cmpfn,
799 unsigned int initSize,
800 void *data)
801 {
802 if (initSize != 0) initSize = round2up(initSize);
803 else initSize = DEFAULT_NUM_BUCKETS;
804
805 Entry **b = SCM_NEW_ARRAY(Entry*, initSize);
806 table->buckets = (void**)b;
807 table->numBuckets = initSize;
808 table->numEntries = 0;
809 table->accessfn = (void*)accessfn;
810 table->hashfn = hashfn;
811 table->cmpfn = cmpfn;
812 table->data = data;
813 table->numBucketsLog2 = 0;
814 for (u_int i=initSize; i > 1; i /= 2) {
815 table->numBucketsLog2++;
816 }
817 for (u_int i=0; i<initSize; i++) table->buckets[i] = NULL;
818 }
819
820 /* choose appropriate procedures for predefined hash types. */
hash_core_predef_procs(ScmHashType type,SearchProc ** accessfn,ScmHashProc ** hashfn,ScmHashCompareProc ** cmpfn)821 int hash_core_predef_procs(ScmHashType type,
822 SearchProc **accessfn,
823 ScmHashProc **hashfn,
824 ScmHashCompareProc **cmpfn)
825 {
826 switch (type) {
827 case SCM_HASH_EQ:
828 case SCM_HASH_WORD:
829 *accessfn = address_access;
830 *hashfn = address_hash;
831 *cmpfn = address_cmp;
832 return TRUE;
833 case SCM_HASH_EQV:
834 *accessfn = general_access;
835 *hashfn = eqv_hash;
836 *cmpfn = eqv_cmp;
837 return TRUE;
838 case SCM_HASH_EQUAL:
839 *accessfn = general_access;
840 *hashfn = equal_hash;
841 *cmpfn = equal_cmp;
842 return TRUE;
843 case SCM_HASH_STRING:
844 *accessfn = string_access;
845 *hashfn = string_hash;
846 *cmpfn = string_cmp;
847 return TRUE;
848 default:
849 return FALSE;
850 }
851 }
852
Scm_HashCoreInitSimple(ScmHashCore * core,ScmHashType type,unsigned int initSize,void * data)853 void Scm_HashCoreInitSimple(ScmHashCore *core,
854 ScmHashType type,
855 unsigned int initSize,
856 void *data)
857 {
858 SearchProc *accessfn = NULL;
859 ScmHashProc *hashfn = NULL;
860 ScmHashCompareProc *cmpfn = NULL;
861
862 if (hash_core_predef_procs(type, &accessfn, &hashfn, &cmpfn) == FALSE) {
863 Scm_Error("[internal error]: wrong TYPE argument passed to Scm_HashCoreInitSimple: %d", type);
864 }
865 hash_core_init(core, accessfn, hashfn, cmpfn, initSize, data);
866 }
867
Scm_HashCoreInitGeneral(ScmHashCore * core,ScmHashProc * hashfn,ScmHashCompareProc * cmpfn,unsigned int initSize,void * data)868 void Scm_HashCoreInitGeneral(ScmHashCore *core,
869 ScmHashProc *hashfn,
870 ScmHashCompareProc *cmpfn,
871 unsigned int initSize,
872 void *data)
873 {
874 hash_core_init(core, general_access, hashfn,
875 cmpfn, initSize, data);
876 }
877
Scm_HashCoreTypeToProcs(ScmHashType type,ScmHashProc ** hashfn,ScmHashCompareProc ** cmpfn)878 int Scm_HashCoreTypeToProcs(ScmHashType type,
879 ScmHashProc **hashfn,
880 ScmHashCompareProc **cmpfn)
881 {
882 SearchProc *accessfn; /* dummy */
883 return hash_core_predef_procs(type, &accessfn, hashfn, cmpfn);
884 }
885
Scm_HashCoreCopy(ScmHashCore * dst,const ScmHashCore * src)886 void Scm_HashCoreCopy(ScmHashCore *dst, const ScmHashCore *src)
887 {
888 Entry **b = SCM_NEW_ARRAY(Entry*, src->numBuckets);
889
890 for (int i=0; i<src->numBuckets; i++) {
891 Entry *p = NULL;
892 Entry *s = (Entry*)src->buckets[i];
893 b[i] = NULL;
894 while (s) {
895 Entry *e = SCM_NEW(Entry);
896 e->key = s->key;
897 e->value = s->value;
898 e->next = NULL;
899 e->hashval = s->hashval;
900 if (p) p->next = e;
901 else b[i] = e;
902 p = e;
903 s = s->next;
904 }
905 }
906
907 /* A little trick to avoid hazard in careless race condition */
908 dst->numBuckets = dst->numEntries = 0;
909
910 dst->buckets = (void**)b;
911 dst->hashfn = src->hashfn;
912 dst->cmpfn = src->cmpfn;
913 dst->accessfn = src->accessfn;
914 dst->data = src->data;
915 dst->numEntries = src->numEntries;
916 dst->numBucketsLog2 = src->numBucketsLog2;
917 dst->numBuckets = src->numBuckets;
918 }
919
Scm_HashCoreClear(ScmHashCore * table)920 void Scm_HashCoreClear(ScmHashCore *table)
921 {
922 for (int i=0; i<table->numBuckets; i++) {
923 table->buckets[i] = NULL;
924 }
925 table->numEntries = 0;
926 }
927
Scm_HashCoreSearch(ScmHashCore * table,intptr_t key,ScmDictOp op)928 ScmDictEntry *Scm_HashCoreSearch(ScmHashCore *table, intptr_t key,
929 ScmDictOp op)
930 {
931 SearchProc *p = (SearchProc*)table->accessfn;
932 return (ScmDictEntry*)p(table, key, op);
933 }
934
Scm_HashCoreNumEntries(ScmHashCore * table)935 int Scm_HashCoreNumEntries(ScmHashCore *table)
936 {
937 return table->numEntries;
938 }
939
940 /*
941 * NB: It is important to keep the pointer to the "next" entry,
942 * not the "current", since the current entry may be deleted,
943 * erasing its next pointer.
944 */
Scm_HashIterInit(ScmHashIter * iter,ScmHashCore * table)945 void Scm_HashIterInit(ScmHashIter *iter, ScmHashCore *table)
946 {
947 iter->core = table;
948 for (int i=0; i<table->numBuckets; i++) {
949 if (table->buckets[i]) {
950 iter->bucket = i;
951 iter->next = table->buckets[i];
952 return;
953 }
954 }
955 iter->next = NULL;
956 }
957
Scm_HashIterNext(ScmHashIter * iter)958 ScmDictEntry *Scm_HashIterNext(ScmHashIter *iter)
959 {
960 Entry *e = (Entry*)iter->next;
961 if (e != NULL) {
962 if (e->next) iter->next = e->next;
963 else {
964 int i = iter->bucket + 1;
965 for (; i < iter->core->numBuckets; i++) {
966 if (iter->core->buckets[i]) {
967 iter->bucket = i;
968 iter->next = iter->core->buckets[i];
969 return (ScmDictEntry*)e;
970 }
971 }
972 iter->next = NULL;
973 }
974 }
975 return (ScmDictEntry*)e;
976 }
977
978 /*============================================================
979 * Scheme <hash-table> object
980 */
981
982 static void hash_print(ScmObj obj, ScmPort *port, ScmWriteContext *ctx);
983
984 SCM_DEFINE_BUILTIN_CLASS(Scm_HashTableClass, hash_print, Scm_ObjectCompare,
985 NULL, NULL,
986 SCM_CLASS_DICTIONARY_CPL);
987
Scm_MakeHashTableSimple(ScmHashType type,unsigned int initSize)988 ScmObj Scm_MakeHashTableSimple(ScmHashType type, unsigned int initSize)
989 {
990 /* We only allow ScmObj in <hash-table> */
991 if (type > SCM_HASH_GENERAL) {
992 Scm_Error("Scm_MakeHashTableSimple: wrong type arg: %d", type);
993 }
994 ScmHashTable *z = SCM_NEW(ScmHashTable);
995 SCM_SET_CLASS(z, SCM_CLASS_HASH_TABLE);
996 Scm_HashCoreInitSimple(&z->core, type, initSize, NULL);
997 z->type = type;
998 return SCM_OBJ(z);
999 }
1000
Scm_MakeHashTableFull(ScmHashProc hashfn,ScmHashCompareProc cmpfn,unsigned int initSize,void * data)1001 ScmObj Scm_MakeHashTableFull(ScmHashProc hashfn,
1002 ScmHashCompareProc cmpfn,
1003 unsigned int initSize, void *data)
1004 {
1005 ScmHashTable *z = SCM_NEW(ScmHashTable);
1006 SCM_SET_CLASS(z, SCM_CLASS_HASH_TABLE);
1007 z->type = SCM_HASH_GENERAL;
1008 Scm_HashCoreInitGeneral(&z->core, hashfn, cmpfn, initSize, data);
1009 return SCM_OBJ(z);
1010 }
1011
Scm_HashTableCopy(ScmHashTable * src)1012 ScmObj Scm_HashTableCopy(ScmHashTable *src)
1013 {
1014 ScmHashTable *dst = SCM_NEW(ScmHashTable);
1015 SCM_SET_CLASS(dst, SCM_CLASS_HASH_TABLE);
1016 Scm_HashCoreCopy(SCM_HASH_TABLE_CORE(dst), SCM_HASH_TABLE_CORE(src));
1017 dst->type = src->type;
1018 return SCM_OBJ(dst);
1019 }
1020
Scm_HashTableType(ScmHashTable * ht)1021 ScmHashType Scm_HashTableType(ScmHashTable *ht)
1022 {
1023 return ht->type;
1024 }
1025
Scm_HashTableRef(ScmHashTable * ht,ScmObj key,ScmObj fallback)1026 ScmObj Scm_HashTableRef(ScmHashTable *ht, ScmObj key, ScmObj fallback)
1027 {
1028 ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1029 (intptr_t)key, SCM_DICT_GET);
1030 if (!e) return fallback;
1031 else return SCM_DICT_VALUE(e);
1032 }
1033
1034 /* Returns previous value; can return SCM_UNBOUND when the association hasn't
1035 been there. Be careful not to let SCM_UNBOUND leak out to Scheme! */
Scm_HashTableSet(ScmHashTable * ht,ScmObj key,ScmObj value,int flags)1036 ScmObj Scm_HashTableSet(ScmHashTable *ht, ScmObj key, ScmObj value, int flags)
1037 {
1038 ScmDictEntry *e;
1039
1040 e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1041 (intptr_t)key,
1042 (flags&SCM_DICT_NO_CREATE)?SCM_DICT_GET: SCM_DICT_CREATE);
1043 if (!e) return SCM_UNBOUND;
1044 ScmObj oldval = e->value? SCM_DICT_VALUE(e) : SCM_UNBOUND;
1045 if (!(flags&SCM_DICT_NO_OVERWRITE) || SCM_UNBOUNDP(oldval)) {
1046 return SCM_DICT_SET_VALUE(e, value);
1047 }
1048 return oldval;
1049 }
1050
Scm_HashTableDelete(ScmHashTable * ht,ScmObj key)1051 ScmObj Scm_HashTableDelete(ScmHashTable *ht, ScmObj key)
1052 {
1053 ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1054 (intptr_t)key, SCM_DICT_DELETE);
1055 if (e && e->value) return SCM_DICT_VALUE(e);
1056 else return SCM_UNBOUND;
1057 }
1058
Scm_HashTableKeys(ScmHashTable * table)1059 ScmObj Scm_HashTableKeys(ScmHashTable *table)
1060 {
1061 ScmHashIter iter;
1062 ScmDictEntry *e;
1063 ScmObj h = SCM_NIL, t = SCM_NIL;
1064 Scm_HashIterInit(&iter, SCM_HASH_TABLE_CORE(table));
1065 while ((e = Scm_HashIterNext(&iter)) != NULL) {
1066 SCM_APPEND1(h, t, SCM_DICT_KEY(e));
1067 }
1068 return h;
1069 }
1070
Scm_HashTableValues(ScmHashTable * table)1071 ScmObj Scm_HashTableValues(ScmHashTable *table)
1072 {
1073 ScmHashIter iter;
1074 ScmDictEntry *e;
1075 ScmObj h = SCM_NIL, t = SCM_NIL;
1076 Scm_HashIterInit(&iter, SCM_HASH_TABLE_CORE(table));
1077 while ((e = Scm_HashIterNext(&iter)) != NULL) {
1078 SCM_APPEND1(h, t, SCM_DICT_VALUE(e));
1079 }
1080 return h;
1081 }
1082
Scm_HashTableStat(ScmHashTable * table)1083 ScmObj Scm_HashTableStat(ScmHashTable *table)
1084 {
1085 ScmObj h = SCM_NIL, t = SCM_NIL;
1086 ScmHashCore *c = SCM_HASH_TABLE_CORE(table);
1087 SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("num-entries"));
1088 SCM_APPEND1(h, t, Scm_MakeInteger(c->numEntries));
1089 SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("num-buckets"));
1090 SCM_APPEND1(h, t, Scm_MakeInteger(c->numBuckets));
1091 SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("num-buckets-log2"));
1092 SCM_APPEND1(h, t, Scm_MakeInteger(c->numBucketsLog2));
1093
1094 Entry** b = BUCKETS(c);
1095 ScmVector *v = SCM_VECTOR(Scm_MakeVector(c->numBuckets, SCM_NIL));
1096 ScmObj *vp = SCM_VECTOR_ELEMENTS(v);
1097 for (int i = 0; i<c->numBuckets; i++, vp++) {
1098 Entry *e = b[i];
1099 for (; e; e = e->next) {
1100 *vp = Scm_Acons(SCM_DICT_KEY(e), SCM_DICT_VALUE(e), *vp);
1101 }
1102 }
1103 SCM_APPEND1(h, t, SCM_MAKE_KEYWORD("contents"));
1104 SCM_APPEND1(h, t, SCM_OBJ(v));
1105 return h;
1106 }
1107
1108
1109 /*
1110 * Utilities
1111 */
1112
1113 /*
1114 * Printer
1115 */
1116
hash_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx SCM_UNUSED)1117 static void hash_print(ScmObj obj, ScmPort *port,
1118 ScmWriteContext *ctx SCM_UNUSED)
1119 {
1120 ScmHashTable *ht = (ScmHashTable*)obj;
1121 char *str = "";
1122
1123 switch (ht->type) {
1124 case SCM_HASH_EQ: str = "eq?"; break;
1125 case SCM_HASH_EQV: str = "eqv?"; break;
1126 case SCM_HASH_EQUAL: str = "equal?"; break;
1127 case SCM_HASH_STRING: str = "string=?"; break;
1128 case SCM_HASH_GENERAL: str = "general"; break;
1129 default: Scm_Panic("something wrong with a hash table");
1130 }
1131
1132 #if 0
1133 /* Use read-time constructor so that table can be read back
1134 --- is it necessary? I'm not sure yet. */
1135 Scm_Printf(port, "#,(<hash-table> %s", str);
1136 if (ht->numEntries > 0) {
1137 Scm_HashIterInit(&iter, ht);
1138 while ((e = Scm_HashIterNext(&iter)) != NULL) {
1139 Scm_Printf(port, " %S %S", e->key, e->value);
1140 }
1141 }
1142 SCM_PUTZ(")", -1, port);
1143 #else
1144 Scm_Printf(port, "#<hash-table %s %p>", str, ht);
1145 #endif
1146 }
1147
round2up(unsigned int val)1148 static unsigned int round2up(unsigned int val)
1149 {
1150 unsigned int n = 1;
1151 while (n < val) {
1152 n <<= 1;
1153 SCM_ASSERT(n > 1); /* check overflow */
1154 }
1155 return n;
1156 }
1157
1158 /*====================================================================
1159 * Initialization
1160 */
1161
Scm__InitHash()1162 void Scm__InitHash()
1163 {
1164 struct timeval t;
1165 if (gettimeofday(&t, NULL) < 0) {
1166 Scm_Panic("gettimeofday failed");
1167 }
1168 u_long salt = ((u_long)getpid() * ((u_long)t.tv_sec^(u_long)t.tv_usec));
1169 ADDRESS_HASH(salt, salt);
1170 salt &= SCM_SMALL_INT_MAX;
1171 /*
1172 * We can't use Scm_BindPrimitiveParameter here, since symbol table
1173 * is not initialized yet (symbol table uses hashtable!)
1174 */
1175 hash_salt =
1176 Scm_MakePrimitiveParameter(SCM_CLASS_PRIMITIVE_PARAMETER, SCM_FALSE,
1177 Scm_MakeIntegerU(salt), 0);
1178 current_recursive_hash =
1179 Scm_MakePrimitiveParameter(SCM_CLASS_PRIMITIVE_PARAMETER, SCM_FALSE,
1180 SCM_FALSE, 0);
1181 }
1182
1183 /*====================================================================
1184 * For backward compatibility
1185 */
1186
1187 #if GAUCHE_API_VERSION < 1000
1188 /* Backward compatibility.
1189 NB: Casting ScmDictEntry* to ScmHashEntry* would be invalid if
1190 sizeof(intptr_t) and sizeof(void*) differ. I know only one
1191 such platform (PlayStation2). If it is a problem, moving to
1192 the new API is recommended. */
Scm_HashTableGet(ScmHashTable * ht,ScmObj key)1193 ScmHashEntry *Scm_HashTableGet(ScmHashTable *ht, ScmObj key)
1194 {
1195 if (sizeof(intptr_t) != sizeof(void*)) {
1196 Scm_Error("[internal] Scm_HashTableGet is obsoleted on this platform. You should use the new hashtable API.");
1197 }
1198 return (ScmHashEntry*)Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1199 (intptr_t)key,
1200 SCM_DICT_GET);
1201 }
1202
Scm_HashTableAdd(ScmHashTable * ht,ScmObj key,ScmObj value)1203 ScmHashEntry *Scm_HashTableAdd(ScmHashTable *ht, ScmObj key, ScmObj value)
1204 {
1205 ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1206 (intptr_t)key, SCM_DICT_CREATE);
1207 if (sizeof(intptr_t) != sizeof(void*)) {
1208 Scm_Error("[internal] Scm_HashTableAdd is obsoleted on this platform. You should use the new hashtable API.");
1209 }
1210 if (!e->value) (void)SCM_DICT_SET_VALUE(e, value);
1211 return (ScmHashEntry*)e;
1212 }
1213
Scm_HashTablePut(ScmHashTable * ht,ScmObj key,ScmObj value)1214 ScmHashEntry *Scm_HashTablePut(ScmHashTable *ht, ScmObj key, ScmObj value)
1215 {
1216 ScmDictEntry *e = Scm_HashCoreSearch(SCM_HASH_TABLE_CORE(ht),
1217 (intptr_t)key, SCM_DICT_CREATE);
1218 if (sizeof(intptr_t) != sizeof(void*)) {
1219 Scm_Error("[internal] Scm_HashTablePut is obsoleted on this platform. You should use the new hashtable API.");
1220 }
1221 (void)SCM_DICT_SET_VALUE(e, value);
1222 return (ScmHashEntry*)e;
1223 }
1224
1225 /* TRANSIENT: Pre-0.9 Compatibility routine. Kept for the binary compatibility.
1226 Will be removed on 1.0 */
Scm__HashIterInitCompat(ScmHashTable * table,ScmHashIter * iter)1227 void Scm__HashIterInitCompat(ScmHashTable *table, ScmHashIter *iter)
1228 {
1229 Scm_HashIterInit(iter, SCM_HASH_TABLE_CORE(table));
1230 }
1231
1232 /* TRANSIENT: Pre-0.9 Compatibility routine. Kept for the binary compatibility.
1233 Will be removed on 1.0 */
Scm__HashIterNextCompat(ScmHashIter * iter)1234 ScmHashEntry *Scm__HashIterNextCompat(ScmHashIter *iter)
1235 {
1236 ScmDictEntry *e = Scm_HashIterNext(iter);
1237 return (ScmHashEntry*)e;
1238 }
1239
1240 #if 0
1241 ScmObj Scm_MakeHashTableMultiWord(int keysize, int initsize)
1242 {
1243 return make_hash_table(SCM_CLASS_HASH_TABLE, SCM_HASH_MULTIWORD,
1244 multiword_access, multiword_hash,
1245 NULL, initsize, (void*)SCM_WORD(keysize));
1246 }
1247 #endif
1248
1249 /* Legacy constructor. DEPRECATED. Will go away soon. */
Scm_MakeHashTable(ScmHashProc * hashfn,ScmHashCompareProc * cmpfn SCM_UNUSED,unsigned int initSize)1250 ScmObj Scm_MakeHashTable(ScmHashProc *hashfn,
1251 ScmHashCompareProc *cmpfn SCM_UNUSED,
1252 unsigned int initSize)
1253 {
1254 if (hashfn == (ScmHashProc*)SCM_HASH_EQ) {
1255 return Scm_MakeHashTableSimple(SCM_HASH_EQ, initSize);
1256 } else if (hashfn == (ScmHashProc*)SCM_HASH_EQV) {
1257 return Scm_MakeHashTableSimple(SCM_HASH_EQV, initSize);
1258 } else if (hashfn == (ScmHashProc*)SCM_HASH_EQUAL) {
1259 return Scm_MakeHashTableSimple(SCM_HASH_EQUAL, initSize);
1260 } else if (hashfn == (ScmHashProc*)SCM_HASH_STRING) {
1261 return Scm_MakeHashTableSimple(SCM_HASH_STRING, initSize);
1262 }
1263 #if 0
1264 else {
1265 return Scm_MakeHashTableFull(SCM_CLASS_HASH_TABLE, SCM_HASH_GENERAL,
1266 hashfn, cmpfn, initSize, NULL);
1267 }
1268 #else
1269 return SCM_UNDEFINED;
1270 #endif
1271 }
1272 #endif /*GAUCHE_API_VERSION < 1000*/
1273
1274 /* Legacy hash function.
1275 *
1276 * This used to be used for equal?-hashtable hash. It also guaranteed
1277 * that the hash result won't change between runs and among different
1278 * platforms, so it can be used for persistent data.
1279 *
1280 * There are several drawbacks, though. The guaranteed hash value means
1281 * we can't change hash function. The quality of the original hash
1282 * function wasn't good (it behaves terrible on flonums and compnums);
1283 * it's vulnerable to collision attacks; and it had a few bugs in the
1284 * number hash that broke the 'portable' guarantee between platforms.
1285 *
1286 * Since there have already been stored data relying on the original hash
1287 * values, we keep the old function (with bugs fixed) here.
1288 * Scm_Hash() and Scheme's 'hash' function uses this for the backward
1289 * comaptibility, but it is not recommended for the new code.
1290 */
1291
1292 static u_long legacy_number_hash(ScmObj obj);
1293 static u_long legacy_string_hash(ScmString *str);
1294
Scm_Hash(ScmObj obj)1295 u_long Scm_Hash(ScmObj obj)
1296 {
1297 if (!SCM_PTRP(obj)) {
1298 u_long hashval;
1299 SMALL_INT_HASH(hashval, (u_long)SCM_WORD(obj));
1300 return hashval&PORTABLE_HASHMASK;
1301 } else if (SCM_NUMBERP(obj)) {
1302 return legacy_number_hash(obj);
1303 } else if (SCM_STRINGP(obj)) {
1304 goto string_hash;
1305 } else if (SCM_PAIRP(obj)) {
1306 u_long h = 0, h2;
1307 ScmObj cp;
1308 SCM_FOR_EACH(cp, obj) {
1309 h2 = Scm_Hash(SCM_CAR(cp));
1310 h = COMBINE(h, h2);
1311 }
1312 h2 = Scm_Hash(cp);
1313 h = COMBINE(h, h2);
1314 return h&PORTABLE_HASHMASK;
1315 } else if (SCM_VECTORP(obj)) {
1316 int siz = SCM_VECTOR_SIZE(obj);
1317 u_long h = 0, h2;
1318 for (int i=0; i<siz; i++) {
1319 h2 = Scm_Hash(SCM_VECTOR_ELEMENT(obj, i));
1320 h = COMBINE(h, h2);
1321 }
1322 return h&PORTABLE_HASHMASK;
1323 } else if (SCM_KEYWORDP(obj)) {
1324 /* If we have keyword and symbol unified, KEYWORD_NAME includes
1325 ':'. Legacy hash didn't consider it. */
1326 #if GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION
1327 if (SCM_SYMBOLP(obj)) {
1328 obj = Scm_Substring(SCM_KEYWORD_NAME(obj), 1, -1, FALSE);
1329 } else {
1330 obj = SCM_OBJ(SCM_KEYWORD_NAME(obj));
1331 }
1332 #else /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
1333 obj = Scm_Substring(SCM_KEYWORD_NAME(obj), 1, -1, FALSE);
1334 #endif /*!GAUCHE_KEEP_DISJOINT_KEYWORD_OPTION*/
1335 goto string_hash;
1336 } else if (SCM_SYMBOLP(obj)) {
1337 obj = SCM_OBJ(SCM_SYMBOL_NAME(obj));
1338 goto string_hash;
1339 } else {
1340 /* Call specialized object-hash method */
1341 ScmObj r = Scm_ApplyRec(SCM_OBJ(&Scm_GenericObjectHash),
1342 SCM_LIST1(obj));
1343 if (SCM_INTP(r)) {
1344 return ((u_long)SCM_INT_VALUE(r))&PORTABLE_HASHMASK;
1345 }
1346 if (SCM_BIGNUMP(r)) {
1347 /* NB: Scm_GetUInteger clamps the result to [0, ULONG_MAX],
1348 but taking the LSW would give better distribution. */
1349 return (SCM_BIGNUM(r)->values[0])&PORTABLE_HASHMASK;
1350 }
1351 Scm_Error("object-hash returned non-integer: %S", r);
1352 return 0; /* dummy */
1353 }
1354 string_hash:
1355 return legacy_string_hash(SCM_STRING(obj));
1356 }
1357
legacy_flonum_hash(double f)1358 static u_long legacy_flonum_hash(double f)
1359 {
1360 /* Originally the code was just (u_long)(f * 2654435761UL), but that's
1361 UB when the multiplication yields out of range of u_long. I don't
1362 even remember why I adopted that for the hash function in the
1363 first place, but we have to stick to existing hash values recorded
1364 elsewhere.
1365
1366 On x86 with 8087-compatible FPU, (u_long)(d) behaves as follows.
1367 If -2^63 < d < 2^63, the modulo of 2^32 is taken. Otherwise
1368 it yields 0.
1369
1370 On x86 with SSE, the out-of-range value yields #x8000_0000.
1371
1372 On x86_64, if -2^63 < d < 2^64, the modulo of 2^32 is taken.
1373 Otherwise it yields #x8000_0000_0000_0000.
1374
1375 To achieve maximum compatibility with historical data, we
1376 take the range of 8087, and the calculation is adjusted for
1377 x86_64 behavior. There can be a slight discrepancy from the
1378 result of 8087 because of its internal 80bit calculation---for
1379 example, 3.767278962604362e-10 * 2654435761 is just tiny little
1380 bit less than 1.0 but with 64bit calculation it is rounded up
1381 to 1.0. With 80 bit and integer truncation the result is 0 but
1382 with 64bit we get 1.
1383 */
1384 volatile double d = f * 2654435761UL;
1385 static double two_pow_63 = 0.0;
1386 static double minus_two_pow_63 = 0.0;
1387 static double two_pow_32 = 0.0;
1388 static int initialized = FALSE;
1389
1390 if (!initialized) {
1391 /* This is idempotent - no need to lock */
1392 two_pow_63 = ldexp(1.0, 63);
1393 minus_two_pow_63 = -ldexp(1.0, 63);
1394 two_pow_32 = ldexp(1.0, 32);
1395 AO_nop_full();
1396 initialized = TRUE;
1397 }
1398 /* This condition eliminates NaN as well. */
1399 if (!(minus_two_pow_63 < d && d < two_pow_63)) return 0;
1400 if (-0.5 < d && d < 0.5) return 0;
1401
1402 double dm = trunc(fmod(d, two_pow_32));
1403 if (dm < 0) dm += two_pow_32;
1404 return (u_long)trunc(dm);
1405 }
1406
1407 /* Old hash function for numeric objects. This is terrible for flonums,
1408 and we only keep it in order to maintain portable hash value generated
1409 by legacy hash function. */
legacy_number_hash(ScmObj obj)1410 static u_long legacy_number_hash(ScmObj obj)
1411 {
1412 u_long hashval;
1413 SCM_ASSERT(SCM_NUMBERP(obj));
1414 if (SCM_INTP(obj)) {
1415 SMALL_INT_HASH(hashval, SCM_INT_VALUE(obj));
1416 } else if (SCM_BIGNUMP(obj)) {
1417 u_int i;
1418 u_long u = 0;
1419 for (i=0; i<SCM_BIGNUM_SIZE(obj); i++) {
1420 #if SIZEOF_LONG == 4
1421 u += SCM_BIGNUM(obj)->values[i];
1422 #elif SIZEOF_LONG == 8
1423 u += (SCM_BIGNUM(obj)->values[i] & ((1UL<<32) - 1))
1424 + (SCM_BIGNUM(obj)->values[i] >> 32);
1425 #else
1426 #error "sizeof(long) > 8 platform unsupported"
1427 #endif
1428 }
1429 SMALL_INT_HASH(hashval, u);
1430 } else if (SCM_FLONUMP(obj)) {
1431 hashval = legacy_flonum_hash(SCM_FLONUM_VALUE(obj));
1432 } else if (SCM_RATNUMP(obj)) {
1433 /* Ratnum must already be normalized, so we can simply combine
1434 hashvals of numerator and denominator. */
1435 u_long h1 = legacy_number_hash(SCM_RATNUM_NUMER(obj));
1436 u_long h2 = legacy_number_hash(SCM_RATNUM_DENOM(obj));
1437 hashval = COMBINE(h1, h2);
1438 } else {
1439 hashval =
1440 legacy_flonum_hash(SCM_COMPNUM_REAL(obj)+SCM_COMPNUM_IMAG(obj));
1441 }
1442 return hashval&PORTABLE_HASHMASK;
1443 }
1444
1445 /* Legacy hash function for strings. This isn't very good hash function
1446 either, and it's difficult to adopt salting. */
legacy_string_hash(ScmString * str)1447 static u_long legacy_string_hash(ScmString *str)
1448 {
1449 const ScmStringBody *b = SCM_STRING_BODY(str);
1450 const char *p = SCM_STRING_BODY_START(b);
1451 long k = SCM_STRING_BODY_SIZE(b);
1452 u_long hv = 0;
1453 while (k-- > 0) {
1454 hv = (hv<<5) - (hv) + ((unsigned char)*p++);
1455 }
1456 return hv&PORTABLE_HASHMASK;
1457 }
1458