1 /* hashtable.c                                     -*- mode:c; coding:utf-8; -*-
2  *
3  *   Copyright (c) 2010-2021  Takashi Kato <ktakashi@ymail.com>
4  *
5  *   Redistribution and use in source and binary forms, with or without
6  *   modification, are permitted provided that the following conditions
7  *   are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: $
29  */
30 #define LIBSAGITTARIUS_BODY
31 #include "sagittarius/private/hashtable.h"
32 #include "sagittarius/private/bytevector.h"
33 #include "sagittarius/private/collection.h"
34 #include "sagittarius/private/compare.h"
35 #include "sagittarius/private/error.h"
36 #include "sagittarius/private/pair.h"
37 #include "sagittarius/private/port.h"
38 #include "sagittarius/private/string.h"
39 #include "sagittarius/private/number.h"
40 #include "sagittarius/private/symbol.h"
41 #include "sagittarius/private/keyword.h"
42 #include "sagittarius/private/vector.h"
43 #include "sagittarius/private/vm.h"
44 #include "sagittarius/private/writer.h"
45 #include "sagittarius/private/cache.h"
46 #include "sagittarius/private/generic.h"
47 
48 
49 typedef struct EntryRec
50 {
51   intptr_t key;
52   intptr_t value;
53   struct EntryRec *next;
54   SgHashVal hashValue;
55 } Entry;
56 
57 #define BUCKETS(hc) ((Entry**)hc->buckets)
58 
59 #define DEFAULT_BUCKET_COUNT 4
60 #define MAX_AVG_CHAIN_LIMIS  3
61 #define EXTEND_BITS          2
62 /* hash value must be 32 bit */
63 #define HASHMASK 0xffffffffUL
64 
65 typedef Entry* SearchProc(SgHashCore *core, intptr_t key,
66 			  SgDictOp op, int flags);
67 
68 static unsigned long round2up(unsigned long val);
69 void hash_iter_init(SgHashCore *core, SgHashIter *itr);
70 
71 /* hash functions */
72 #define STRING_HASH(hv, chars, size)				\
73   do {								\
74     long i_ = (size);						\
75     (hv) = 0;							\
76     while (i_-- > 0) {						\
77       (hv) = ((hv) << 5) - (hv) + ((unsigned char)*chars++);	\
78     }								\
79   } while (0)
80 
81 #define SMALL_INT_HASH(result, value)		\
82   (result) = ((value)*2654435761UL)
83 #define ADDRESS_HASH(result, val)				\
84   (result) = (SgHashVal)((SG_WORD(val) >> 3)*2654435761UL)
85 
86 #define HASH2INDEX(tabsize, bits, hashval)			\
87   (((hashval)+((hashval)>>(32-(bits)))) & ((tabsize) - 1))
88 
89 #define COMBINE(hv1, hv2)  ((hv1)*5+(hv2))
90 
Sg_EqHash(SgObject obj,SgHashVal bound)91 SgHashVal Sg_EqHash(SgObject obj, SgHashVal bound)
92 {
93   SgHashVal hashval;
94   ADDRESS_HASH(hashval, obj);
95   if (bound) return (hashval & HASHMASK) % bound;
96   return hashval & HASHMASK;
97 }
98 
Sg_EqvHash(SgObject obj,SgHashVal bound)99 SgHashVal Sg_EqvHash(SgObject obj, SgHashVal bound)
100 {
101   SgHashVal hashval;
102   if (SG_NUMBERP(obj)) {
103     if (SG_INTP(obj)) {
104       SMALL_INT_HASH(hashval, SG_INT_VALUE(obj));
105     } else if (SG_BIGNUMP(obj)) {
106       long i;
107       unsigned long u = 0;
108       unsigned long size = SG_BIGNUM_GET_COUNT(obj);
109       for (i = 0; i < size; i++) {
110 	u += SG_BIGNUM(obj)->elements[i];
111       }
112       SMALL_INT_HASH(hashval, u);
113     } else if (SG_FLONUMP(obj)) {
114       hashval = (SgHashVal)(SG_FLONUM_VALUE(obj) * 2654435761UL);
115     } else if (SG_RATIONALP(obj)) {
116       SgHashVal h1 = Sg_EqvHash(SG_RATIONAL(obj)->numerator, bound);
117       SgHashVal h2 = Sg_EqvHash(SG_RATIONAL(obj)->denominator, bound);
118       hashval = COMBINE(h1, h2);
119     } else {
120       SgHashVal h1 = Sg_EqvHash(SG_COMPLEX(obj)->real, bound);
121       SgHashVal h2 = Sg_EqvHash(SG_COMPLEX(obj)->imag, bound);
122       hashval = COMBINE(h1, h2);
123     }
124   } else {
125     ADDRESS_HASH(hashval, obj);
126   }
127   if (bound) return (hashval & HASHMASK) % bound;
128   return hashval & HASHMASK;
129 }
130 
131 static SgHashVal pair_hash(SgObject o, int level);
132 static SgHashVal vector_hash(SgObject o, int level);
133 static SgHashVal equal_hash_rec(SgObject obj, int level);
134 
level_hash(SgObject o,int level)135 static SgHashVal level_hash(SgObject o, int level)
136 {
137   if (SG_PAIRP(o)) return pair_hash(o, level);
138   else if (SG_VECTORP(o)) return vector_hash(o, level);
139   else return equal_hash_rec(o, level);
140 }
141 
pair_hash(SgObject o,int level)142 static SgHashVal pair_hash(SgObject o, int level)
143 {
144   if (level == 0) return 0x08d;
145   else return COMBINE(level_hash(SG_CAR(o), level-1),
146 		      level_hash(SG_CDR(o), level-1));
147 }
148 
compact_vector(SgObject vec,long len,long short_len)149 static SgObject compact_vector(SgObject vec, long len, long short_len)
150 {
151   long selections = short_len - 5;
152   long interval = (len-5)/(short_len-5);
153   long fsp = 3 + interval/2, i, index;
154   SgObject r = Sg_MakeVector(short_len, SG_FALSE);
155   /* set 5 elements first */
156   SG_VECTOR_ELEMENT(r,	0) = SG_VECTOR_ELEMENT(vec, 0);
157   SG_VECTOR_ELEMENT(r,	1) = SG_VECTOR_ELEMENT(vec, 1);
158   SG_VECTOR_ELEMENT(r,	2) = SG_VECTOR_ELEMENT(vec, 2);
159   for (i = 3, index = fsp; i < selections+3; i++, index+=interval) {
160     SG_VECTOR_ELEMENT(r, i) = SG_VECTOR_ELEMENT(vec, index);
161   }
162   SG_VECTOR_ELEMENT(r, i++) = SG_VECTOR_ELEMENT(vec, (len-2));
163   SG_VECTOR_ELEMENT(r, i)   = SG_VECTOR_ELEMENT(vec, (len-1));
164   return r;
165 }
166 
smoosh_vector(SgObject vec,long len,int level)167 static SgHashVal smoosh_vector(SgObject vec, long len, int level)
168 {
169   long remain;
170   SgHashVal result = 0xd80f;
171   for (remain = len; remain > 0; remain--) {
172     result = COMBINE(result, level_hash(SG_VECTOR_ELEMENT(vec, remain-1),
173 					level-1));
174   }
175   return result;
176 }
vector_hash(SgObject obj,int level)177 static SgHashVal vector_hash(SgObject obj, int level)
178 {
179   if (level == 0) return 0xd80e;
180   else {
181     long breakn = 13, len = SG_VECTOR_SIZE(obj);
182     SgHashVal hashval, h;
183     SMALL_INT_HASH(hashval, len);
184     if (len <= breakn) {
185       h = smoosh_vector(obj, len, level);
186     } else {
187       obj = compact_vector(obj, len, breakn);
188       h = smoosh_vector(obj, breakn, level);
189     }
190     return COMBINE(hashval, h);
191   }
192 }
193 
object_hash(SgObject obj,int level)194 static SgHashVal object_hash(SgObject obj, int level)
195 {
196   SgObject l = SG_MAKE_INT(level);
197   SgObject r = Sg_Apply2(SG_OBJ(&Sg_GenericObjectHash), obj, l);
198   SgHashVal hashval;
199   if (SG_EXACT_INTP(r)) {
200     int oor;
201     hashval = (SgHashVal)Sg_GetUIntegerClamp(r, SG_CLAMP_NONE, &oor);
202     if (!oor) {
203       return hashval;
204     }
205   }
206   /* don't want to raise an error with hashing unless user raises it
207      in Scheme world. */
208   ADDRESS_HASH(hashval, obj);
209   return hashval;
210 }
211 
212 #define MAX_NESTING_LEVEL 4
equal_hash_rec(SgObject obj,int level)213 static SgHashVal equal_hash_rec(SgObject obj, int level)
214 {
215   SgHashVal hashval;
216   if (!SG_PTRP(obj)) {
217     SMALL_INT_HASH(hashval, (SgHashVal)SG_WORD(obj));
218     return hashval;
219   } else if (SG_NUMBERP(obj)) {
220     return Sg_EqvHash(obj, 0);
221   } else if (SG_STRINGP(obj)) {
222     return Sg_StringHash(SG_STRING(obj), 0);
223   } else if (SG_PAIRP(obj)) {
224     return pair_hash(obj, MAX_NESTING_LEVEL);
225   } else if (SG_VECTORP(obj)) {
226     return vector_hash(obj, MAX_NESTING_LEVEL);
227   } else if (SG_SYMBOLP(obj)) {
228     return Sg_StringHash(SG_SYMBOL(obj)->name, 0);
229   } else if (SG_KEYWORDP(obj)) {
230     return Sg_StringHash(SG_KEYWORD_NAME(obj), 0);
231   } else if (SG_BVECTORP(obj)) {
232     /* TODO is this ok? */
233     SgHashVal h = 0, h2;
234     long i, size = SG_BVECTOR_SIZE(obj);
235     for (i = 0; i < size; i++) {
236       SMALL_INT_HASH(h2, SG_BVECTOR_ELEMENT(obj, i));
237       h = COMBINE(h, h2);
238     }
239     return h;
240   } else {
241     return object_hash(obj, level);
242   }
243 }
244 
Sg_EqualHash(SgObject obj,SgHashVal bound)245 SgHashVal Sg_EqualHash(SgObject obj, SgHashVal bound)
246 {
247   SgHashVal hash = equal_hash_rec(obj, MAX_NESTING_LEVEL);
248   if (bound) return hash % bound;
249   return hash;
250 }
251 
Sg_StringHash(SgString * str,SgHashVal bound)252 SgHashVal Sg_StringHash(SgString *str, SgHashVal bound)
253 {
254   SgHashVal hashval;
255   const SgChar *p = str->value;
256   STRING_HASH(hashval, p, str->size);
257   if (bound == 0) return hashval;
258   else return (hashval % bound);
259 }
260 
261 /* accessor and so */
insert_entry(SgHashCore * table,intptr_t key,SgHashVal hashval,long index)262 static Entry *insert_entry(SgHashCore *table,
263 			   intptr_t key,
264 			   SgHashVal hashval,
265 			   long index)
266 {
267   Entry *e = SG_NEW(Entry);
268   Entry **buckets = BUCKETS(table);
269   e->key = key;
270   if (table->create_entry) {
271     table->create_entry(table, (SgHashEntry *)e);
272   }
273   e->value = 0;
274   e->next = buckets[index];
275   e->hashValue = hashval;
276   buckets[index] = e;
277   table->entryCount++;
278 
279   if (table->entryCount > table->bucketCount * MAX_AVG_CHAIN_LIMIS) {
280     /* too many chains */
281     /* extend the table */
282     Entry **newb, *f;
283     SgHashIter itr;
284     long i, newsize = (table->bucketCount << EXTEND_BITS);
285     long newbits = table->bucketsLog2Count + EXTEND_BITS;
286 
287     newb = SG_NEW_ARRAY(Entry*, newsize);
288     for (i = 0; i < newsize; i++) newb[i] = NULL; /* initialize new buckets */
289 
290     hash_iter_init(table, &itr);
291     while ((f = (Entry*)Sg_HashIterNext(&itr, NULL, NULL)) != NULL) {
292       index = HASH2INDEX(newsize, newbits, f->hashValue);
293       f->next = newb[index];
294       newb[index] = f;
295     }
296     /* gc friendliness */
297     for (i = 0; i < table->bucketCount; i++) table->buckets[i] = NULL;
298 
299     table->bucketCount = newsize;
300     table->bucketsLog2Count = newbits;
301     table->buckets = (void**)newb;
302   }
303   return e;
304 }
305 
delete_entry(SgHashCore * table,Entry * entry,Entry * prev,long index)306 static Entry *delete_entry(SgHashCore *table,
307 			   Entry *entry, Entry *prev,
308 			   long index)
309 {
310   if (prev) prev->next = entry->next;
311   else table->buckets[index] = (void*)entry->next;
312   table->entryCount--;
313   ASSERT(table->entryCount >= 0);
314   entry->next = NULL; /* GC friendliness */
315   return entry;
316 }
317 
318 #define FOUND(table, op, e, p, index)			\
319   do {							\
320     switch (op) {					\
321     case SG_DICT_GET:;					\
322     case SG_DICT_CREATE:;				\
323       return e;						\
324     case SG_DICT_DELETE:;				\
325       return delete_entry(table, e, p, index);		\
326     }							\
327   } while(0)
328 
329 #define NOTFOUND(table, op, key, hashval, index)	\
330   do {							\
331     if (op == SG_DICT_CREATE) {				\
332       return insert_entry(table, key, hashval, index);	\
333     } else {						\
334       return NULL;					\
335     }							\
336   } while (0)
337 
338 
339 
340 /* core initialize */
hash_core_init(SgHashCore * table,SearchProc * access,SgHashProc * hasher,SgHashCompareProc * compare,unsigned long initSize,void * data)341 static void hash_core_init(SgHashCore *table,
342 			   SearchProc *access,
343 			   SgHashProc *hasher,
344 			   SgHashCompareProc *compare,
345 			   unsigned long initSize,
346 			   void* data)
347 {
348   Entry **b;
349   unsigned long i;
350 
351   if (initSize != 0) initSize = round2up(initSize);
352   else initSize = DEFAULT_BUCKET_COUNT;
353 
354   b = SG_NEW_ARRAY(Entry*, initSize);
355   table->buckets = (void**)b;
356   table->bucketCount = initSize;
357   table->entryCount = 0;
358   table->access = access;
359   table->hasher = hasher;
360   table->compare = compare;
361   table->data = data;
362   table->generalHasher = SG_UNDEF;
363   table->generalCompare = SG_UNDEF;
364   for (i = initSize, table->bucketsLog2Count = 0; i > 1; i /= 2) {
365     table->bucketsLog2Count++;
366   }
367   for (i = 0; i < initSize; i++) table->buckets[i] = NULL;
368   table->create_entry = NULL;	/* default */
369 }
370 
371 /** accessor function */
372 /* eq? */
address_access(SgHashCore * table,intptr_t key,SgDictOp op,int flags)373 static Entry *address_access(SgHashCore *table,
374 			     intptr_t key,
375 			     SgDictOp op,
376 			     int flags)
377 {
378   SgHashVal hashval;
379   unsigned long index;
380   Entry *e, *p, **buckets = BUCKETS(table);
381 
382   ADDRESS_HASH(hashval, key);
383   index = HASH2INDEX(table->bucketCount, table->bucketsLog2Count, hashval);
384 
385   for (e = buckets[index], p = NULL; e; p = e, e = e->next) {
386     if (e->key == key) FOUND(table, op, e, p, index);
387   }
388   NOTFOUND(table, op, key, hashval, index);
389 }
390 
address_hash(const SgHashCore * ht,intptr_t obj)391 static SgHashVal address_hash(const SgHashCore *ht, intptr_t obj)
392 {
393   SgHashVal hashval;
394   ADDRESS_HASH(hashval, obj);
395   return hashval;
396 }
397 
address_compare(const SgHashCore * ht,intptr_t key,intptr_t k2)398 static int address_compare(const SgHashCore *ht, intptr_t key, intptr_t k2)
399 {
400   return (key == k2);
401 }
402 
403 /* eqv? and equal? */
eqv_hash(const SgHashCore * table,intptr_t key)404 static SgHashVal eqv_hash(const SgHashCore *table, intptr_t key)
405 {
406   return Sg_EqvHash(SG_OBJ(key), 0);
407 }
408 
eqv_compare(const SgHashCore * table,intptr_t key,intptr_t k2)409 static int eqv_compare(const SgHashCore *table, intptr_t key, intptr_t k2)
410 {
411   return Sg_EqvP(SG_OBJ(key), SG_OBJ(k2));
412 }
413 
equal_hash(const SgHashCore * table,intptr_t key)414 static SgHashVal equal_hash(const SgHashCore *table, intptr_t key)
415 {
416   return Sg_EqualHash(SG_OBJ(key), 0);
417 }
418 
equal_compare(const SgHashCore * table,intptr_t key,intptr_t k2)419 static int equal_compare(const SgHashCore *table, intptr_t key, intptr_t k2)
420 {
421   return Sg_EqualP(SG_OBJ(key), SG_OBJ(k2));
422 }
423 
424 /* string */
string_access(SgHashCore * table,intptr_t k,SgDictOp op,int flags)425 static Entry *string_access(SgHashCore *table,
426 			    intptr_t k,
427 			    SgDictOp op,
428 			    int flags)
429 {
430   SgHashVal hashval;
431   unsigned long index;
432   long size;
433   const SgChar *s;
434   SgObject key = SG_OBJ(k);
435   Entry *e, *p, **buckets;
436 
437   if (!SG_STRINGP(key)) {
438     if (flags & SG_HASH_NO_ERROR) return NULL;
439     Sg_Error(UC("Got non-string key %S to the string hashtable."), key);
440   }
441 
442   s = SG_STRING(key)->value;
443   size = SG_STRING(key)->size;
444   STRING_HASH(hashval, s, size);
445   index = HASH2INDEX(table->bucketCount, table->bucketsLog2Count, hashval);
446   buckets = BUCKETS(table);
447 
448   for (e = buckets[index], p = NULL; e; p = e, e = e->next) {
449     SgObject ee = SG_OBJ(e->key);
450     if (Sg_StringEqual(SG_STRING(key), SG_STRING(ee))) {
451       FOUND(table, op, e, p, index);
452     }
453   }
454   NOTFOUND(table, op, k, hashval, index);
455 }
456 
string_hash(const SgHashCore * table,intptr_t key)457 static SgHashVal string_hash(const SgHashCore *table, intptr_t key)
458 {
459   return Sg_StringHash(SG_STRING(key), 0);
460 }
461 
string_compare(const SgHashCore * table,intptr_t key,intptr_t k2)462 static int string_compare(const SgHashCore *table, intptr_t key, intptr_t k2)
463 {
464   return Sg_StringEqual(SG_STRING(key), SG_STRING(k2));
465 }
466 /* general access */
general_access(SgHashCore * table,intptr_t key,SgDictOp op,int flags)467 static Entry* general_access(SgHashCore *table,
468 			     intptr_t key,
469 			     SgDictOp op,
470 			     int flags)
471 {
472   SgHashVal hashval;
473   unsigned long index;
474   Entry *e, *p, **buckets;
475 
476   hashval = table->hasher(table, key);
477   index = HASH2INDEX(table->bucketCount, table->bucketsLog2Count, hashval);
478   buckets = BUCKETS(table);
479 
480   for (e = buckets[index], p = NULL; e; p = e, e = e->next) {
481     if (table->compare(table, key, e->key)) FOUND(table, op, e, p, index);
482   }
483   NOTFOUND(table, op, key, hashval, index);
484 }
485 
general_hash(const SgHashCore * table,intptr_t key)486 static SgHashVal general_hash(const SgHashCore *table, intptr_t key)
487 {
488   SgObject hash, hasher = table->generalHasher;
489 
490   if (SG_SUBRP(hasher)) {
491     SG_CALL_SUBR1(hash, hasher, SG_OBJ(key));
492   } else {
493     hash = Sg_Apply1(table->generalHasher, SG_OBJ(key));
494   }
495   if (!SG_EXACT_INTP(hash)) {
496     Sg_Error(UC("%S is not an exact integer"), hash);
497   }
498   /* well the value must be either fixnum or bignum.
499      To avoid overflow we use eqv-hash.
500    */
501   return Sg_EqvHash(hash, 0);
502 }
503 
general_compare(const SgHashCore * table,intptr_t key,intptr_t k2)504 static int general_compare(const SgHashCore *table, intptr_t key, intptr_t k2)
505 {
506   SgObject ret, compare = table->generalCompare;
507   if (SG_SUBRP(compare)) {
508     SG_CALL_SUBR2(ret, compare, SG_OBJ(key), SG_OBJ(k2));
509   } else {
510     ret = Sg_Apply2(table->generalCompare, SG_OBJ(key), SG_OBJ(k2));
511   }
512   return !SG_FALSEP(ret);
513 }
514 
515 
hash_core_predef_procs(SgHashType type,SearchProc ** access,SgHashProc ** hasher,SgHashCompareProc ** compare)516 static int hash_core_predef_procs(SgHashType type,
517 				  SearchProc **access,
518 				  SgHashProc **hasher,
519 				  SgHashCompareProc **compare)
520 {
521   switch (type) {
522   case SG_HASH_EQ:
523     *access = address_access;
524     *hasher = address_hash;
525     *compare = address_compare;
526     return TRUE;
527   case SG_HASH_EQV:
528     *access = general_access;
529     *hasher = eqv_hash;
530     *compare = eqv_compare;
531     return TRUE;
532   case SG_HASH_EQUAL:
533     *access = general_access;
534     *hasher = equal_hash;
535     *compare = equal_compare;
536     return TRUE;
537   case SG_HASH_STRING:
538     *access = string_access;
539     *hasher = string_hash;
540     *compare = string_compare;
541     return TRUE;
542   case SG_HASH_GENERAL:
543     *access = general_access;
544     *hasher = general_hash;
545     *compare = general_compare;
546     return TRUE;
547   default:
548     return FALSE;
549   }
550 }
551 
Sg_HashCoreInitSimple(SgHashCore * core,SgHashType type,long initSize,void * data)552 void Sg_HashCoreInitSimple(SgHashCore *core,
553 			   SgHashType type,
554 			   long initSize,
555 			   void *data)
556 {
557   SearchProc *access = NULL;
558   SgHashProc *hasher = NULL;
559   SgHashCompareProc *compare = NULL;
560   if (hash_core_predef_procs(type, &access, &hasher, &compare) == FALSE) {
561     Sg_Error(UC("wrong TYPE argument passed to Sg_HashCoreInitSimple: %d"),
562 	     type);
563   }
564   hash_core_init(core, access, hasher, compare, initSize, data);
565 }
566 
Sg_HashCoreInitGeneral(SgHashCore * core,SgHashProc * hasher,SgHashCompareProc * compare,long initSize,void * data)567 void Sg_HashCoreInitGeneral(SgHashCore *core,
568 			    SgHashProc *hasher,
569 			    SgHashCompareProc *compare,
570 			    long initSize,
571 			    void *data)
572 {
573   hash_core_init(core, general_access, hasher, compare, initSize, data);
574 }
575 
Sg_HashCoreTypeToProcs(SgHashType type,SgHashProc ** hasher,SgHashCompareProc ** compare)576 int Sg_HashCoreTypeToProcs(SgHashType type, SgHashProc **hasher,
577 			   SgHashCompareProc **compare)
578 {
579   SearchProc *access;
580   return hash_core_predef_procs(type, &access, hasher, compare);
581 }
582 
Sg_HashCoreSearch(SgHashCore * table,intptr_t key,SgDictOp op,int flags)583 SgHashEntry* Sg_HashCoreSearch(SgHashCore *table, intptr_t key,
584 			       SgDictOp op, int flags)
585 {
586   SearchProc *p = (SearchProc*)table->access;
587   return (SgHashEntry*)p(table, key, op, flags);
588 }
589 
Sg_HashCoreCopy(SgHashTable * dstT,const SgHashTable * srcT)590 void Sg_HashCoreCopy(SgHashTable *dstT, const SgHashTable *srcT)
591 {
592   SgHashCore *dst = SG_HASHTABLE_CORE(dstT);
593   const SgHashCore *src = SG_HASHTABLE_CORE(srcT);
594 
595   Entry **b = SG_NEW_ARRAY(Entry*, src->bucketCount);
596   int i;
597   Entry *e, *p, *s;
598 
599   for (i = 0; i < src->bucketCount; i++) {
600     p = NULL;
601     s = (Entry*)src->buckets[i];
602     b[i] = NULL;
603     while (s) {
604       e = SG_NEW(Entry);
605       e->key = s->key;
606       SG_HASHTABLE_ENTRY_SET(dstT, (SgHashEntry *)e, SG_OBJ(s->value),
607 			     SG_DICT_ON_COPY);
608       /* e->value = s->value; */
609       e->next = NULL;
610       if (p) p->next = e;
611       else b[i] = e;
612       p = e;
613       s = s->next;
614     }
615   }
616   dst->bucketCount = dst->entryCount = 0;
617 
618   dst->buckets = (void**)b;
619   dst->hasher = src->hasher;
620   dst->compare = src->compare;
621   dst->access = src->access;
622   dst->generalHasher  =	src->generalHasher;
623   dst->generalCompare =	src->generalCompare;
624   dst->data = src->data;
625   dst->bucketCount = src->bucketCount;
626   dst->bucketsLog2Count = src->bucketsLog2Count;
627   dst->entryCount = src->entryCount;
628 }
629 
Sg_HashCoreClear(SgHashCore * ht,long k)630 void Sg_HashCoreClear(SgHashCore *ht, long k)
631 {
632   long i;
633   for (i = 0; i < ht->bucketCount; i++) {
634     ht->buckets[i] = NULL;
635   }
636   ht->entryCount = 0;
637   /* TODO: do we need this? */
638   if (k > 0) {
639     Entry **b;
640     ht->buckets = NULL;	/* gc friendliness */
641     if (k == 0) k = DEFAULT_BUCKET_COUNT;
642     else k = round2up(k);
643     b = SG_NEW_ARRAY(Entry*, k);
644     ht->buckets = (void**)b;
645     ht->bucketCount = k;
646     for (i = k, ht->bucketsLog2Count = 0; i > 1; i /= 2) {
647       ht->bucketsLog2Count++;
648     }
649     for (i = 0; i < k; i++) ht->buckets[i] = NULL;
650   }
651 }
652 
653 SgHashEntry * hash_iter_next(SgHashIter *itr, SgObject *key, SgObject *value);
hash_iter_init(SgHashCore * core,SgHashIter * itr)654 void hash_iter_init(SgHashCore *core, SgHashIter *itr)
655 {
656   int i;
657   itr->core = core;
658   itr->iter_next = hash_iter_next;
659   for (i = 0; i < core->bucketCount; i++) {
660     if (core->buckets[i]) {
661       itr->bucket = i;
662       itr->next = core->buckets[i];
663       return;
664     }
665   }
666   itr->next = NULL;
667 }
668 
Sg_HashIterInit(SgObject table,SgHashIter * itr)669 void Sg_HashIterInit(SgObject table, SgHashIter *itr)
670 {
671   SG_HASHTABLE_OPTABLE(table)->init_iter(table, itr);
672 }
673 
Sg_HashIterNext(SgHashIter * itr,SgObject * key,SgObject * value)674 SgHashEntry* Sg_HashIterNext(SgHashIter *itr, SgObject *key, SgObject *value)
675 {
676   return itr->iter_next(itr, key, value);
677 }
678 
hash_print(SgObject obj,SgPort * port,SgWriteContext * ctx)679 static void hash_print(SgObject obj, SgPort *port, SgWriteContext *ctx)
680 {
681   SgHashTable *ht = SG_HASHTABLE(obj);
682   SG_PORT_LOCK_WRITE(port);
683   Sg_PutuzUnsafe(port, UC("#<hashtable "));
684   if (SG_IMMUTABLE_HASHTABLE_P(ht)) {
685     Sg_PutuzUnsafe(port, UC("immutable "));
686   }
687   switch (ht->type) {
688   case SG_HASH_EQ:
689     Sg_PutuzUnsafe(port, UC("eq?"));
690     break;
691   case SG_HASH_EQV:
692     Sg_PutuzUnsafe(port, UC("eqv?"));
693     break;
694   case SG_HASH_EQUAL:
695     Sg_PutuzUnsafe(port, UC("equal?"));
696     break;
697   case SG_HASH_STRING:
698     Sg_PutuzUnsafe(port, UC("string=?"));
699     break;
700   case SG_HASH_GENERAL:
701     Sg_Write(SG_HASHTABLE_CORE(ht)->generalHasher, port, ctx->mode);
702     Sg_PutcUnsafe(port, ' ');
703     Sg_Write(SG_HASHTABLE_CORE(ht)->generalCompare, port, ctx->mode);
704     break;
705   }
706   /* Sg_PutcUnsafe(port, '>'); */
707   Sg_Printf(port, UC(" %p>"), obj);
708   SG_PORT_UNLOCK_WRITE(port);
709 }
710 
711 /*
712   caching hashtable is best effort. so if we can't it'll give up.
713   the case we can't cache it:
714      + General hashtable with subr
715      + Entry contains something we can't cache
716 
717   cache structure
718     + type (byte)
719     + immutable? (byte)
720     + size (int) enough?
721       + if type == general
722         + hasher
723 	+ compare
724     + key value, so on
725 */
hash_cache_reader(SgPort * port,SgReadCacheCtx * ctx)726 static SgObject hash_cache_reader(SgPort *port, SgReadCacheCtx *ctx)
727 {
728   SgHashTable *ht;
729   int type, immutablep;
730   long entryCount, i;
731   SgObject count;
732   type = Sg_GetbUnsafe(port);
733   immutablep = Sg_GetbUnsafe(port);
734   count = Sg_ReadCacheObject(port, ctx);
735   ASSERT(SG_INTP(count));
736   entryCount = SG_INT_VALUE(count);
737   switch (type) {
738   case SG_HASH_GENERAL: {
739     SgObject hasher = Sg_ReadCacheObject(port, ctx);
740     SgObject compare = Sg_ReadCacheObject(port, ctx);
741     ht = SG_HASHTABLE(Sg_MakeHashTable(hasher, compare, entryCount));
742     break;
743   }
744   default:
745     ht = SG_HASHTABLE(Sg_MakeHashTableSimple(type, entryCount));
746     break;
747   }
748   for (i = 0; i < entryCount; i++) {
749     SgObject key = Sg_ReadCacheObject(port, ctx);
750     SgObject value = Sg_ReadCacheObject(port, ctx);
751     Sg_HashTableSet(ht, key, value, 0);
752   }
753   ht->immutablep = immutablep;
754   return SG_OBJ(ht);
755 }
756 
hash_cache_scanner(SgObject obj,SgObject cbs,SgWriteCacheCtx * ctx)757 static SgObject hash_cache_scanner(SgObject obj, SgObject cbs,
758 				   SgWriteCacheCtx *ctx)
759 {
760   SgHashTable *ht = SG_HASHTABLE(obj);
761   SgHashIter iter;
762   SgHashEntry *e;
763   switch (ht->type) {
764   default: break;
765     /* we are only interested in general type */
766   case SG_HASH_GENERAL:
767     /* if one of these 2 is subr, then the cache will be failed. */
768     cbs = Sg_WriteCacheScanRec(SG_HASHTABLE_CORE(ht)->generalHasher, cbs, ctx);
769     cbs = Sg_WriteCacheScanRec(SG_HASHTABLE_CORE(ht)->generalCompare, cbs, ctx);
770     break;
771   }
772   Sg_HashIterInit(ht, &iter);
773   while ((e = Sg_HashIterNext(&iter, NULL, NULL)) != NULL) {
774     cbs = Sg_WriteCacheScanRec(SG_HASH_ENTRY_KEY(e), cbs, ctx);
775     cbs = Sg_WriteCacheScanRec(SG_HASH_ENTRY_VALUE(e), cbs, ctx);
776   }
777   return cbs;
778 }
779 
hash_cache_writer(SgObject obj,SgPort * port,SgWriteCacheCtx * ctx)780 static void hash_cache_writer(SgObject obj, SgPort *port,
781 			      SgWriteCacheCtx *ctx)
782 {
783   SgHashTable *ht = SG_HASHTABLE(obj);
784   SgHashIter iter;
785   SgHashEntry *e;
786   SgObject count = SG_MAKE_INT(SG_HASHTABLE_CORE(ht)->entryCount);
787   Sg_PutbUnsafe(port, (int)ht->type);
788   Sg_PutbUnsafe(port, (int)ht->immutablep);
789   /* should we write 4 byte instead of object? */
790   Sg_WriteObjectCache(count, port, ctx);
791   switch (ht->type) {
792   default: break;
793     /* we are only interested in general type */
794   case SG_HASH_GENERAL:
795     /* if one of these 2 is subr, then the cache will be failed. */
796     Sg_WriteObjectCache(SG_HASHTABLE_CORE(ht)->generalHasher, port, ctx);
797     Sg_WriteObjectCache(SG_HASHTABLE_CORE(ht)->generalCompare, port, ctx);
798     break;
799   }
800   Sg_HashIterInit(ht, &iter);
801   while ((e = Sg_HashIterNext(&iter, NULL, NULL)) != NULL) {
802     Sg_WriteObjectCache(SG_HASH_ENTRY_KEY(e), port, ctx);
803     Sg_WriteObjectCache(SG_HASH_ENTRY_VALUE(e), port, ctx);
804   }
805 }
806 
807 #define DEFINE_CLASS_WITH_CACHE SG_DEFINE_BUILTIN_CLASS_WITH_CACHE
808 
809 DEFINE_CLASS_WITH_CACHE(Sg_HashTableClass,
810 			hash_cache_reader, hash_cache_scanner,
811 			hash_cache_writer,
812 			hash_print, NULL, NULL, NULL,
813 			SG_CLASS_DICTIONARY_CPL);
814 
815 static SgHashTable * make_hashtable();
816 
hashtable_ref(SgObject table,SgHashEntry * e,int flags)817 static SgObject hashtable_ref(SgObject table, SgHashEntry *e,  int flags)
818 {
819   return SG_HASH_ENTRY_VALUE(e);
820 }
821 
hashtable_set(SgObject table,SgHashEntry * e,SgObject value,int flags)822 static SgObject hashtable_set(SgObject table, SgHashEntry *e, SgObject value,
823 			      int flags)
824 {
825   if (e->value) {
826     if (flags & SG_HASH_NO_OVERWRITE) return SG_HASH_ENTRY_VALUE(e);
827     else {
828       return SG_HASH_ENTRY_SET_VALUE(e, value);
829     }
830   } else {
831     return SG_HASH_ENTRY_SET_VALUE(e, value);
832   }
833 }
834 
hashtable_delete(SgObject table,SgObject key)835 static SgObject hashtable_delete(SgObject table, SgObject key)
836 {
837   SgHashEntry *e;
838 
839   e = Sg_HashCoreSearch(SG_HASHTABLE_CORE(table),
840 			(intptr_t)key,
841 			SG_DICT_DELETE,
842 			0);
843   if (e && e->value) return SG_HASH_ENTRY_VALUE(e);
844   else return SG_UNBOUND;
845 }
846 
hashtable_copy(SgObject src,int mutableP)847 static SgObject hashtable_copy(SgObject src, int mutableP)
848 {
849   SgHashTable *dst = make_hashtable();
850   Sg_HashCoreCopy(dst, src);
851   dst->type = SG_HASHTABLE_TYPE(src);
852   if (!mutableP) {
853     dst->immutablep = TRUE;
854   }
855   return SG_OBJ(dst);
856 }
857 
hashtable_init_iter(SgObject table,SgHashIter * iter)858 static void hashtable_init_iter(SgObject table, SgHashIter *iter)
859 {
860   hash_iter_init(SG_HASHTABLE_CORE(table), iter);
861   iter->table = table;
862 }
863 
hash_iter_next(SgHashIter * itr,SgObject * key,SgObject * value)864 SgHashEntry * hash_iter_next(SgHashIter *itr, SgObject *key, SgObject *value)
865 {
866   Entry *e = (Entry*)itr->next;
867   if (e != NULL) {
868     if (e->next) itr->next = e->next;
869     else {
870       long i = itr->bucket + 1;
871       for (; i < itr->core->bucketCount; i++) {
872 	if (itr->core->buckets[i]) {
873 	  itr->bucket = i;
874 	  itr->next = itr->core->buckets[i];
875 	  if (key) *key = SG_HASH_ENTRY_KEY(e);
876 	  if (value) *value = SG_HASH_ENTRY_VALUE(e);
877 	  return (SgHashEntry*)e;
878 	}
879       }
880       itr->next = NULL;
881     }
882     if (key) *key = SG_HASH_ENTRY_KEY(e);
883     if (value) *value = SG_HASH_ENTRY_VALUE(e);
884   }
885   return (SgHashEntry*)e; /*NULL*/
886 }
887 
888 static SgHashOpTable hashtable_operations = {
889   hashtable_ref,
890   hashtable_set,
891   hashtable_delete,
892   hashtable_copy,
893   hashtable_init_iter,
894 };
895 
make_hashtable()896 static SgHashTable * make_hashtable()
897 {
898   SgHashTable *z = SG_NEW(SgHashTable);
899   SG_SET_CLASS(z, SG_CLASS_HASHTABLE);
900   SG_HASHTABLE_OPTABLE(z) = &hashtable_operations;
901   return z;
902 }
903 
Sg_MakeHashTableSimple(SgHashType type,long initSize)904 SgObject Sg_MakeHashTableSimple(SgHashType type, long initSize)
905 {
906   SgHashTable *z = make_hashtable();
907   return Sg_InitHashTableSimple(z, type, initSize);
908 }
909 
Sg_InitHashTableSimple(SgHashTable * table,SgHashType type,long initSize)910 SgObject Sg_InitHashTableSimple(SgHashTable *table,
911 				SgHashType type, long initSize)
912 {
913   if (type > SG_HASH_GENERAL) {
914     Sg_Error(UC("Sg_MakeHashTableSimple: wrong type arg: %d"), type);
915   }
916   SG_SET_CLASS(table, SG_CLASS_HASHTABLE);
917   SG_HASHTABLE_OPTABLE(table) = &hashtable_operations;
918   Sg_HashCoreInitSimple(&table->core, type, initSize, NULL);
919   table->type = type;
920   table->immutablep = FALSE;
921   return SG_OBJ(table);
922 }
923 
Sg_MakeHashTable(SgObject hasher,SgObject compare,long initSize)924 SgObject Sg_MakeHashTable(SgObject hasher, SgObject compare, long initSize)
925 {
926   SgHashTable *z = SG_HASHTABLE(Sg_MakeHashTableSimple(SG_HASH_GENERAL,
927 						       initSize));
928   z->core.generalHasher = hasher;
929   z->core.generalCompare = compare;
930   return SG_OBJ(z);
931 }
932 
Sg_MakeHashTableWithComparator(SgObject comparator,long initSize)933 SgObject Sg_MakeHashTableWithComparator(SgObject comparator, long initSize)
934 {
935   /* do some optimisation */
936   if (comparator == Sg_EqComparator()) {
937     return Sg_MakeHashTableSimple(SG_HASH_EQ, initSize);
938   } else if (comparator == Sg_EqvComparator()) {
939     return Sg_MakeHashTableSimple(SG_HASH_EQV, initSize);
940   } else if (comparator == Sg_EqualComparator()) {
941     return Sg_MakeHashTableSimple(SG_HASH_EQUAL, initSize);
942   } else if (comparator == Sg_StringComparator()) {
943     return Sg_MakeHashTableSimple(SG_HASH_STRING, initSize);
944   } else {
945     if (!SG_PROCEDUREP(SG_COMPARATOR(comparator)->hashFn) ||
946 	!SG_PROCEDUREP(SG_COMPARATOR(comparator)->eqFn)) {
947       Sg_Error(UC("make-hashtable/comparator: comparator doesn't "
948 		  "have hash and/or equality procedure(s). %S"), comparator);
949     }
950     return Sg_MakeHashTable(SG_COMPARATOR(comparator)->hashFn,
951 			    SG_COMPARATOR(comparator)->eqFn,
952 			    initSize);
953   }
954 }
955 
956 
Sg_HashTableCopy(SgHashTable * src,int mutableP)957 SgObject Sg_HashTableCopy(SgHashTable *src, int mutableP)
958 {
959   return SG_HASHTABLE_OPTABLE(src)->copy(src, mutableP);
960 }
961 
Sg_HashTableRef(SgHashTable * table,SgObject key,SgObject fallback)962 SgObject Sg_HashTableRef(SgHashTable *table, SgObject key, SgObject fallback)
963 {
964   SgHashEntry *e = Sg_HashCoreSearch(SG_HASHTABLE_CORE(table),
965 				     (intptr_t)key, SG_DICT_GET,
966 				     0);
967   if (!e) return fallback;
968   return SG_HASHTABLE_OPTABLE(table)->ref(table, e, 0);
969 }
970 
Sg_HashTableSet(SgHashTable * table,SgObject key,SgObject value,int flags)971 SgObject Sg_HashTableSet(SgHashTable *table, SgObject key, SgObject value,
972 			 int flags)
973 {
974   SgHashEntry *e;
975   if (SG_IMMUTABLE_HASHTABLE_P(table)) {
976     Sg_Error(UC("attemp to modify immutable hashtable"));
977     return SG_UNDEF;
978   }
979 
980   e = Sg_HashCoreSearch(SG_HASHTABLE_CORE(table), (intptr_t)key,
981 			(flags & SG_HASH_NO_CREATE)
982 			? SG_DICT_GET
983 			: SG_DICT_CREATE,
984 			0);
985   if (!e) return SG_UNBOUND;
986   return SG_HASHTABLE_OPTABLE(table)->set(table, e, value, flags);
987 }
988 
Sg_HashTableDelete(SgHashTable * table,SgObject key)989 SgObject Sg_HashTableDelete(SgHashTable *table, SgObject key)
990 {
991   if (SG_IMMUTABLE_HASHTABLE_P(table)) {
992     Sg_Error(UC("attemp to modify immutable hashtable"));
993     return SG_UNDEF;
994   }
995   return SG_HASHTABLE_OPTABLE(table)->remove(table, key);
996 }
997 
Sg_HashTableAddAll(SgHashTable * dst,SgHashTable * src)998 SgObject Sg_HashTableAddAll(SgHashTable *dst, SgHashTable *src)
999 {
1000   SgObject keys;
1001   SgObject cp, key;
1002 
1003   if (SG_IMMUTABLE_HASHTABLE_P(dst)) {
1004     Sg_Error(UC("attemp to modify immutable hashtable"));
1005     return SG_UNDEF;
1006   }
1007 
1008   keys = Sg_HashTableKeys(src);
1009   SG_FOR_EACH(cp, keys) {
1010     key = SG_CAR(cp);
1011     Sg_HashTableSet(dst, key, Sg_HashTableRef(src, key, SG_UNBOUND), 0);
1012   }
1013   return keys;
1014 }
1015 
Sg_HashTableKeys(SgHashTable * table)1016 SgObject Sg_HashTableKeys(SgHashTable *table)
1017 {
1018   SgHashIter itr;
1019   SgObject h = SG_NIL, t = SG_NIL, k;
1020   Sg_HashIterInit(table, &itr);
1021   while (Sg_HashIterNext(&itr, &k, NULL) != NULL) {
1022     SG_APPEND1(h, t, k);
1023   }
1024   return h;
1025 }
1026 
Sg_HashTableValues(SgHashTable * table)1027 SgObject Sg_HashTableValues(SgHashTable *table)
1028 {
1029   SgHashIter itr;
1030   SgObject h = SG_NIL, t = SG_NIL, v;
1031   Sg_HashIterInit(table, &itr);
1032   while (Sg_HashIterNext(&itr, NULL, &v) != NULL) {
1033     SG_APPEND1(h, t, v);
1034   }
1035   return h;
1036 }
1037 
Sg_HashTableStat(SgHashTable * table)1038 SgObject Sg_HashTableStat(SgHashTable *table)
1039 {
1040   /* TODO */
1041   return SG_FALSE;
1042 }
1043 
Sg_HashTableSize(SgHashTable * table)1044 long Sg_HashTableSize(SgHashTable *table)
1045 {
1046   return SG_HASHTABLE_CORE(table)->entryCount;
1047 }
1048 
round2up(unsigned long val)1049 unsigned long round2up(unsigned long val)
1050 {
1051   unsigned long n = 1;
1052   while (n < val) {
1053     n <<= 1;
1054     ASSERT(n > 1);      /* check overflow */
1055   }
1056   return n;
1057 }
1058 /*
1059   end of file
1060   Local Variables:
1061   coding: utf-8-unix
1062   End:
1063 */
1064