1 /*
2  * sptab.c - Sparse hash table
3  *
4  *   Copyright (c) 2009-2020  Shiro Kawai  <shiro@acm.org>
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *      documentation and/or other materials provided with the distribution.
16  *
17  *   3. Neither the name of the authors nor the names of its contributors
18  *      may be used to endorse or promote products derived from this
19  *      software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "sptab.h"
35 
36 /*===================================================================
37  * Leaf node manipulation
38  */
39 
40 typedef struct TLeafRec {
41     Leaf hdr;                   /* data bit 0 indicates if key is chained */
42     union {
43         struct {
44             ScmObj key;
45             ScmObj value;
46         } entry;
47         struct {
48             ScmObj next;        /* alist of ((key . value) ...) */
49             ScmObj pair;        /* first (key . value) */
50         } chain;
51     };
52 } TLeaf;
53 
leaf_is_chained(TLeaf * leaf)54 static inline int leaf_is_chained(TLeaf *leaf)
55 {
56     return leaf_data(LEAF(leaf))&1;
57 }
58 
leaf_mark_chained(TLeaf * leaf)59 static inline void leaf_mark_chained(TLeaf *leaf)
60 {
61     leaf_data_bit_set(LEAF(leaf), 0);
62 }
63 
leaf_mark_unchained(TLeaf * leaf)64 static inline void leaf_mark_unchained(TLeaf *leaf)
65 {
66     leaf_data_bit_reset(LEAF(leaf), 0);
67 }
68 
leaf_allocate(void * data SCM_UNUSED)69 static Leaf *leaf_allocate(void *data SCM_UNUSED)
70 {
71     TLeaf *z = SCM_NEW(TLeaf);
72     z->entry.key = z->entry.value = SCM_UNBOUND;
73     return (Leaf*)z;
74 }
75 
76 /*===================================================================
77  * Constructor
78  */
79 
string_hash(ScmObj key)80 static u_long string_hash(ScmObj key)
81 {
82     if (!SCM_STRINGP(key)) {
83         Scm_Error("sparse string hashtable got non-string key: %S", key);
84     }
85     return Scm_HashString(SCM_STRING(key), 0);
86 }
87 
string_cmp(ScmObj a,ScmObj b)88 static int string_cmp(ScmObj a, ScmObj b)
89 {
90     if (!SCM_STRINGP(a)) {
91         Scm_Error("sparse string hashtable got non-string key: %S", a);
92     }
93     if (!SCM_STRINGP(b)) {
94         Scm_Error("sparse string hashtable got non-string key: %S", b);
95     }
96     return Scm_StringEqual(SCM_STRING(a), SCM_STRING(b));
97 }
98 
MakeSparseTable(ScmHashType type,ScmComparator * comparator,u_long flags SCM_UNUSED)99 ScmObj MakeSparseTable(ScmHashType type, ScmComparator *comparator,
100                        u_long flags SCM_UNUSED)
101 {
102     SparseTable *v = SCM_NEW(SparseTable);
103     SCM_SET_CLASS(v, SCM_CLASS_SPARSE_TABLE);
104     CompactTrieInit(&v->trie);
105     v->numEntries = 0;
106     v->comparator = comparator;
107 
108     switch (type) {
109     case SCM_HASH_EQ:
110         v->hashfn = Scm_EqHash;
111         v->cmpfn = Scm_EqP;
112         break;
113     case SCM_HASH_EQV:
114         v->hashfn = Scm_EqvHash;
115         v->cmpfn = Scm_EqvP;
116         break;
117     case SCM_HASH_EQUAL:
118         v->hashfn = Scm_Hash;
119         v->cmpfn = Scm_EqualP;
120         break;
121     case SCM_HASH_STRING:
122         v->hashfn = string_hash;
123         v->cmpfn = string_cmp;
124         break;
125     case SCM_HASH_GENERAL:
126         SCM_ASSERT(comparator != NULL);
127         v->hashfn = NULL;
128         v->cmpfn = NULL;
129         break;
130     default:
131         Scm_Error("invalid hash type (%d) for a sparse hash table", type);
132     }
133     return SCM_OBJ(v);
134 }
135 
136 SCM_DEFINE_BUILTIN_CLASS(Scm_SparseTableClass,
137                          NULL, NULL, NULL, NULL,
138                          SCM_CLASS_DICTIONARY_CPL);
139 
sparse_table_hash(SparseTable * st,ScmObj key)140 static u_long sparse_table_hash(SparseTable *st, ScmObj key)
141 {
142     if (st->hashfn) return st->hashfn(key);
143     ScmObj h = st->comparator->hashFn;
144     ScmObj r = Scm_ApplyRec1(h, key);
145     if (!SCM_INTEGERP(r)) {
146         Scm_Error("hash function %S returns non-integer: %S", h, r);
147     }
148     return Scm_GetIntegerU(r);
149 }
150 
sparse_table_eq(SparseTable * st,ScmObj a,ScmObj b)151 static int sparse_table_eq(SparseTable *st, ScmObj a, ScmObj b)
152 {
153     if (st->cmpfn) return st->cmpfn(a, b);
154     ScmObj e = st->comparator->eqFn;
155     ScmObj r = Scm_ApplyRec2(e, a, b);
156     return !SCM_FALSEP(r);
157 }
158 
159 /*===================================================================
160  * Lookup
161  */
162 
SparseTableRef(SparseTable * st,ScmObj key,ScmObj fallback)163 ScmObj SparseTableRef(SparseTable *st, ScmObj key, ScmObj fallback)
164 {
165     u_long hv = sparse_table_hash(st, key);
166     TLeaf *z = (TLeaf*)CompactTrieGet(&st->trie, hv);
167 
168     if (z != NULL) {
169         if (!leaf_is_chained(z)) {
170             if (sparse_table_eq(st, key, z->entry.key)) return z->entry.value;
171             else return fallback;
172         } else if (sparse_table_eq(st, key, SCM_CAR(z->chain.pair))) {
173             return SCM_CDR(z->chain.pair);
174         } else {
175             ScmObj cp;
176             SCM_FOR_EACH(cp, z->chain.next) {
177                 ScmObj p = SCM_CAR(cp);
178                 if (sparse_table_eq(st, key, SCM_CAR(p))) return SCM_CDR(p);
179             }
180         }
181     }
182     return fallback;
183 }
184 
185 /*===================================================================
186  * Insertion
187  */
188 
SparseTableSet(SparseTable * st,ScmObj key,ScmObj value,int flags)189 ScmObj SparseTableSet(SparseTable *st, ScmObj key, ScmObj value, int flags)
190 {
191     int createp = !(flags&SCM_DICT_NO_CREATE);
192     u_long hv = sparse_table_hash(st, key);
193     TLeaf *z;
194 
195     if (!createp) {
196         z = (TLeaf*)CompactTrieGet(&st->trie, hv);
197         if (z == NULL) return SCM_UNBOUND;
198     } else {
199         z = (TLeaf*)CompactTrieAdd(&st->trie, hv, leaf_allocate, NULL);
200     }
201 
202     if (!leaf_is_chained(z)) {
203         if (SCM_UNBOUNDP(z->entry.key)) {
204             /* new entry */
205             z->entry.key = key;
206             z->entry.value = value;
207             st->numEntries++;
208             return value;
209         } else if (sparse_table_eq(st, z->entry.key, key)) {
210             z->entry.value = value;
211             return value;
212         } else {
213             ScmObj p = Scm_Cons(z->entry.key, z->entry.value);
214             leaf_mark_chained(z);
215             z->chain.next = SCM_NIL;
216             z->chain.pair = p;
217             /*FALLTHROUGH*/
218         }
219     }
220     /* we got a chained entry. */
221     if (sparse_table_eq(st, SCM_CAR(z->chain.pair), key)) {
222         SCM_SET_CDR(z->chain.pair, value);
223         return value;
224     }
225     ScmObj cp;
226     SCM_FOR_EACH(cp, z->chain.next) {
227         ScmObj p = SCM_CAR(cp);
228         SCM_ASSERT(SCM_PAIRP(p));
229         if (sparse_table_eq(st, SCM_CAR(p), key)) {
230             SCM_SET_CDR(p, value);
231             return value;
232         }
233     }
234     z->chain.next = Scm_Cons(z->chain.pair, z->chain.next);
235     z->chain.pair = Scm_Cons(key, value);
236     st->numEntries++;
237     return value;
238 }
239 
240 /*===================================================================
241  * Deletion
242  */
243 
244 /* returns value of the deleted entry, or SCM_UNBOUND if there's no entry */
SparseTableDelete(SparseTable * st,ScmObj key)245 ScmObj SparseTableDelete(SparseTable *st, ScmObj key)
246 {
247     u_long hv = sparse_table_hash(st, key);
248     TLeaf *z = (TLeaf*)CompactTrieGet(&st->trie, hv);
249     ScmObj retval = SCM_UNBOUND;
250 
251     if (z != NULL) {
252         if (!leaf_is_chained(z)) {
253             if (sparse_table_eq(st, key, z->entry.key)) {
254                 retval = z->entry.value;
255                 CompactTrieDelete(&st->trie, hv);
256                 st->numEntries--;
257             }
258         } else {
259             if (sparse_table_eq(st, key, SCM_CAR(z->chain.pair))) {
260                 ScmObj p = z->chain.next;
261                 SCM_ASSERT(SCM_PAIRP(p));
262                 retval = SCM_CDR(z->chain.pair);
263                 z->chain.pair = SCM_CAR(p);
264                 z->chain.next = SCM_CDR(p);
265                 st->numEntries--;
266             } else {
267                 ScmObj cp, prev = SCM_FALSE;
268                 SCM_FOR_EACH(cp, z->chain.next) {
269                     ScmObj pp = SCM_CAR(cp);
270                     if (sparse_table_eq(st, key, SCM_CAR(pp))) {
271                         retval = SCM_CDR(pp);
272                         if (SCM_FALSEP(prev)) z->chain.next = SCM_CDR(cp);
273                         else SCM_SET_CDR(prev, SCM_CDR(cp));
274                         st->numEntries--;
275                         break;
276                     }
277                     prev = cp;
278                 }
279             }
280             /* make sure we have more than one entry in a chained leaf */
281             if (SCM_NULLP(z->chain.next)) {
282                 ScmObj p = z->chain.pair;
283                 leaf_mark_unchained(z);
284                 z->entry.key = SCM_CAR(p);
285                 z->entry.value = SCM_CDR(p);
286             }
287         }
288     }
289     return retval;
290 }
291 
clear_leaf(Leaf * f,void * data SCM_UNUSED)292 static void clear_leaf(Leaf *f, void *data SCM_UNUSED)
293 {
294     TLeaf *z = (TLeaf*)f;
295     z->entry.key = z->entry.value = NULL;
296 }
297 
SparseTableClear(SparseTable * st)298 void SparseTableClear(SparseTable *st)
299 {
300     st->numEntries = 0;
301     CompactTrieClear(&st->trie, clear_leaf, NULL);
302 }
303 
304 /*===================================================================
305  * Copy
306  */
307 
copy_leaf(Leaf * leaf,void * data SCM_UNUSED)308 static Leaf *copy_leaf(Leaf *leaf, void *data SCM_UNUSED)
309 {
310     TLeaf *s = (TLeaf*)leaf;
311     TLeaf *d = SCM_NEW(TLeaf);
312     d->hdr = s->hdr;
313     if (leaf_is_chained(s)) {
314         ScmObj h = SCM_NIL, t = SCM_NIL, cp;
315         d->chain.pair = Scm_Cons(SCM_CAR(s->chain.pair),
316                                  SCM_CDR(s->chain.pair));
317         SCM_FOR_EACH(cp, s->chain.next) {
318             SCM_APPEND1(h, t, Scm_Cons(SCM_CAAR(cp), SCM_CDAR(cp)));
319         }
320         d->chain.next = h;
321     } else {
322         d->entry.key   = s->entry.key;
323         d->entry.value = s->entry.value;
324     }
325     return (Leaf*)d;
326 }
327 
SparseTableCopy(const SparseTable * s)328 ScmObj SparseTableCopy(const SparseTable *s)
329 {
330     SparseTable *d = SCM_NEW(SparseTable);
331     memcpy(d, s, sizeof(SparseTable));
332     CompactTrieCopy(&d->trie, &s->trie, copy_leaf, NULL);
333     return SCM_OBJ(d);
334 }
335 
336 /*===================================================================
337  * Iterators
338  */
339 
SparseTableIterInit(SparseTableIter * it,SparseTable * st)340 void SparseTableIterInit(SparseTableIter *it, SparseTable *st)
341 {
342     it->st = st;
343     CompactTrieIterInit(&it->ctit, &st->trie);
344     it->chain = SCM_NIL;
345     it->end = FALSE;
346 }
347 
348 /* returns (key . value) or #f */
SparseTableIterNext(SparseTableIter * it)349 ScmObj SparseTableIterNext(SparseTableIter *it)
350 {
351     if (it->end) return SCM_FALSE;
352     if (SCM_PAIRP(it->chain)) {
353         ScmObj p = SCM_CAR(it->chain);
354         it->chain = SCM_CDR(it->chain);
355         return p;
356     } else {
357         TLeaf *z = (TLeaf*)CompactTrieIterNext(&it->ctit);
358         if (z == NULL) { it->end = TRUE; return SCM_FALSE; }
359         if (!leaf_is_chained(z)) {
360             return Scm_Cons(z->entry.key, z->entry.value);
361         }
362         it->chain = z->chain.next;
363         return z->chain.pair;
364     }
365 }
366 
367 
368 /*===================================================================
369  * Miscellaneous
370  */
371 
leaf_dump(ScmPort * out,Leaf * leaf,int indent,void * data SCM_UNUSED)372 static void leaf_dump(ScmPort *out, Leaf *leaf, int indent, void *data SCM_UNUSED)
373 {
374     TLeaf *z = (TLeaf*)leaf;
375 
376     if (leaf_is_chained(z)) {
377         Scm_Printf(out, "(chained)");
378         Scm_Printf(out, "\n  %*s%S => %25.1S", indent, "",
379                    SCM_CAR(z->chain.pair), SCM_CDR(z->chain.pair));
380         ScmObj cp;
381         SCM_FOR_EACH(cp, z->chain.next) {
382             ScmObj p = SCM_CAR(cp);
383             SCM_ASSERT(SCM_PAIRP(p));
384             Scm_Printf(out, "\n  %*s%S => %25.1S", indent, "",
385                        SCM_CAR(p), SCM_CDR(p));
386         }
387     } else {
388         Scm_Printf(out, "\n  %*s%S => %25.1S", indent, "",
389                    z->entry.key, z->entry.value);
390     }
391 }
392 
SparseTableDump(SparseTable * st)393 void SparseTableDump(SparseTable *st)
394 {
395     CompactTrieDump(SCM_CUROUT, &st->trie, leaf_dump, NULL);
396 }
397 
SparseTableCheck(SparseTable * st)398 void SparseTableCheck(SparseTable *st)
399 {
400     CompactTrieCheck(&st->trie, SCM_OBJ(st), NULL);
401 }
402 
403 /*===================================================================
404  * Initialization
405  */
406 
Scm_Init_sptab(ScmModule * mod)407 void Scm_Init_sptab(ScmModule *mod)
408 {
409     Scm_InitStaticClass(&Scm_SparseTableClass, "<sparse-table>",
410                         mod, NULL, 0);
411 }
412