1 /*
2 * sptab.c - Sparse hash table
3 *
4 * Copyright (c) 2009-2020 Shiro Kawai <shiro@acm.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the authors nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "sptab.h"
35
36 /*===================================================================
37 * Leaf node manipulation
38 */
39
40 typedef struct TLeafRec {
41 Leaf hdr; /* data bit 0 indicates if key is chained */
42 union {
43 struct {
44 ScmObj key;
45 ScmObj value;
46 } entry;
47 struct {
48 ScmObj next; /* alist of ((key . value) ...) */
49 ScmObj pair; /* first (key . value) */
50 } chain;
51 };
52 } TLeaf;
53
leaf_is_chained(TLeaf * leaf)54 static inline int leaf_is_chained(TLeaf *leaf)
55 {
56 return leaf_data(LEAF(leaf))&1;
57 }
58
leaf_mark_chained(TLeaf * leaf)59 static inline void leaf_mark_chained(TLeaf *leaf)
60 {
61 leaf_data_bit_set(LEAF(leaf), 0);
62 }
63
leaf_mark_unchained(TLeaf * leaf)64 static inline void leaf_mark_unchained(TLeaf *leaf)
65 {
66 leaf_data_bit_reset(LEAF(leaf), 0);
67 }
68
leaf_allocate(void * data SCM_UNUSED)69 static Leaf *leaf_allocate(void *data SCM_UNUSED)
70 {
71 TLeaf *z = SCM_NEW(TLeaf);
72 z->entry.key = z->entry.value = SCM_UNBOUND;
73 return (Leaf*)z;
74 }
75
76 /*===================================================================
77 * Constructor
78 */
79
string_hash(ScmObj key)80 static u_long string_hash(ScmObj key)
81 {
82 if (!SCM_STRINGP(key)) {
83 Scm_Error("sparse string hashtable got non-string key: %S", key);
84 }
85 return Scm_HashString(SCM_STRING(key), 0);
86 }
87
string_cmp(ScmObj a,ScmObj b)88 static int string_cmp(ScmObj a, ScmObj b)
89 {
90 if (!SCM_STRINGP(a)) {
91 Scm_Error("sparse string hashtable got non-string key: %S", a);
92 }
93 if (!SCM_STRINGP(b)) {
94 Scm_Error("sparse string hashtable got non-string key: %S", b);
95 }
96 return Scm_StringEqual(SCM_STRING(a), SCM_STRING(b));
97 }
98
MakeSparseTable(ScmHashType type,ScmComparator * comparator,u_long flags SCM_UNUSED)99 ScmObj MakeSparseTable(ScmHashType type, ScmComparator *comparator,
100 u_long flags SCM_UNUSED)
101 {
102 SparseTable *v = SCM_NEW(SparseTable);
103 SCM_SET_CLASS(v, SCM_CLASS_SPARSE_TABLE);
104 CompactTrieInit(&v->trie);
105 v->numEntries = 0;
106 v->comparator = comparator;
107
108 switch (type) {
109 case SCM_HASH_EQ:
110 v->hashfn = Scm_EqHash;
111 v->cmpfn = Scm_EqP;
112 break;
113 case SCM_HASH_EQV:
114 v->hashfn = Scm_EqvHash;
115 v->cmpfn = Scm_EqvP;
116 break;
117 case SCM_HASH_EQUAL:
118 v->hashfn = Scm_Hash;
119 v->cmpfn = Scm_EqualP;
120 break;
121 case SCM_HASH_STRING:
122 v->hashfn = string_hash;
123 v->cmpfn = string_cmp;
124 break;
125 case SCM_HASH_GENERAL:
126 SCM_ASSERT(comparator != NULL);
127 v->hashfn = NULL;
128 v->cmpfn = NULL;
129 break;
130 default:
131 Scm_Error("invalid hash type (%d) for a sparse hash table", type);
132 }
133 return SCM_OBJ(v);
134 }
135
136 SCM_DEFINE_BUILTIN_CLASS(Scm_SparseTableClass,
137 NULL, NULL, NULL, NULL,
138 SCM_CLASS_DICTIONARY_CPL);
139
sparse_table_hash(SparseTable * st,ScmObj key)140 static u_long sparse_table_hash(SparseTable *st, ScmObj key)
141 {
142 if (st->hashfn) return st->hashfn(key);
143 ScmObj h = st->comparator->hashFn;
144 ScmObj r = Scm_ApplyRec1(h, key);
145 if (!SCM_INTEGERP(r)) {
146 Scm_Error("hash function %S returns non-integer: %S", h, r);
147 }
148 return Scm_GetIntegerU(r);
149 }
150
sparse_table_eq(SparseTable * st,ScmObj a,ScmObj b)151 static int sparse_table_eq(SparseTable *st, ScmObj a, ScmObj b)
152 {
153 if (st->cmpfn) return st->cmpfn(a, b);
154 ScmObj e = st->comparator->eqFn;
155 ScmObj r = Scm_ApplyRec2(e, a, b);
156 return !SCM_FALSEP(r);
157 }
158
159 /*===================================================================
160 * Lookup
161 */
162
SparseTableRef(SparseTable * st,ScmObj key,ScmObj fallback)163 ScmObj SparseTableRef(SparseTable *st, ScmObj key, ScmObj fallback)
164 {
165 u_long hv = sparse_table_hash(st, key);
166 TLeaf *z = (TLeaf*)CompactTrieGet(&st->trie, hv);
167
168 if (z != NULL) {
169 if (!leaf_is_chained(z)) {
170 if (sparse_table_eq(st, key, z->entry.key)) return z->entry.value;
171 else return fallback;
172 } else if (sparse_table_eq(st, key, SCM_CAR(z->chain.pair))) {
173 return SCM_CDR(z->chain.pair);
174 } else {
175 ScmObj cp;
176 SCM_FOR_EACH(cp, z->chain.next) {
177 ScmObj p = SCM_CAR(cp);
178 if (sparse_table_eq(st, key, SCM_CAR(p))) return SCM_CDR(p);
179 }
180 }
181 }
182 return fallback;
183 }
184
185 /*===================================================================
186 * Insertion
187 */
188
SparseTableSet(SparseTable * st,ScmObj key,ScmObj value,int flags)189 ScmObj SparseTableSet(SparseTable *st, ScmObj key, ScmObj value, int flags)
190 {
191 int createp = !(flags&SCM_DICT_NO_CREATE);
192 u_long hv = sparse_table_hash(st, key);
193 TLeaf *z;
194
195 if (!createp) {
196 z = (TLeaf*)CompactTrieGet(&st->trie, hv);
197 if (z == NULL) return SCM_UNBOUND;
198 } else {
199 z = (TLeaf*)CompactTrieAdd(&st->trie, hv, leaf_allocate, NULL);
200 }
201
202 if (!leaf_is_chained(z)) {
203 if (SCM_UNBOUNDP(z->entry.key)) {
204 /* new entry */
205 z->entry.key = key;
206 z->entry.value = value;
207 st->numEntries++;
208 return value;
209 } else if (sparse_table_eq(st, z->entry.key, key)) {
210 z->entry.value = value;
211 return value;
212 } else {
213 ScmObj p = Scm_Cons(z->entry.key, z->entry.value);
214 leaf_mark_chained(z);
215 z->chain.next = SCM_NIL;
216 z->chain.pair = p;
217 /*FALLTHROUGH*/
218 }
219 }
220 /* we got a chained entry. */
221 if (sparse_table_eq(st, SCM_CAR(z->chain.pair), key)) {
222 SCM_SET_CDR(z->chain.pair, value);
223 return value;
224 }
225 ScmObj cp;
226 SCM_FOR_EACH(cp, z->chain.next) {
227 ScmObj p = SCM_CAR(cp);
228 SCM_ASSERT(SCM_PAIRP(p));
229 if (sparse_table_eq(st, SCM_CAR(p), key)) {
230 SCM_SET_CDR(p, value);
231 return value;
232 }
233 }
234 z->chain.next = Scm_Cons(z->chain.pair, z->chain.next);
235 z->chain.pair = Scm_Cons(key, value);
236 st->numEntries++;
237 return value;
238 }
239
240 /*===================================================================
241 * Deletion
242 */
243
244 /* returns value of the deleted entry, or SCM_UNBOUND if there's no entry */
SparseTableDelete(SparseTable * st,ScmObj key)245 ScmObj SparseTableDelete(SparseTable *st, ScmObj key)
246 {
247 u_long hv = sparse_table_hash(st, key);
248 TLeaf *z = (TLeaf*)CompactTrieGet(&st->trie, hv);
249 ScmObj retval = SCM_UNBOUND;
250
251 if (z != NULL) {
252 if (!leaf_is_chained(z)) {
253 if (sparse_table_eq(st, key, z->entry.key)) {
254 retval = z->entry.value;
255 CompactTrieDelete(&st->trie, hv);
256 st->numEntries--;
257 }
258 } else {
259 if (sparse_table_eq(st, key, SCM_CAR(z->chain.pair))) {
260 ScmObj p = z->chain.next;
261 SCM_ASSERT(SCM_PAIRP(p));
262 retval = SCM_CDR(z->chain.pair);
263 z->chain.pair = SCM_CAR(p);
264 z->chain.next = SCM_CDR(p);
265 st->numEntries--;
266 } else {
267 ScmObj cp, prev = SCM_FALSE;
268 SCM_FOR_EACH(cp, z->chain.next) {
269 ScmObj pp = SCM_CAR(cp);
270 if (sparse_table_eq(st, key, SCM_CAR(pp))) {
271 retval = SCM_CDR(pp);
272 if (SCM_FALSEP(prev)) z->chain.next = SCM_CDR(cp);
273 else SCM_SET_CDR(prev, SCM_CDR(cp));
274 st->numEntries--;
275 break;
276 }
277 prev = cp;
278 }
279 }
280 /* make sure we have more than one entry in a chained leaf */
281 if (SCM_NULLP(z->chain.next)) {
282 ScmObj p = z->chain.pair;
283 leaf_mark_unchained(z);
284 z->entry.key = SCM_CAR(p);
285 z->entry.value = SCM_CDR(p);
286 }
287 }
288 }
289 return retval;
290 }
291
clear_leaf(Leaf * f,void * data SCM_UNUSED)292 static void clear_leaf(Leaf *f, void *data SCM_UNUSED)
293 {
294 TLeaf *z = (TLeaf*)f;
295 z->entry.key = z->entry.value = NULL;
296 }
297
SparseTableClear(SparseTable * st)298 void SparseTableClear(SparseTable *st)
299 {
300 st->numEntries = 0;
301 CompactTrieClear(&st->trie, clear_leaf, NULL);
302 }
303
304 /*===================================================================
305 * Copy
306 */
307
copy_leaf(Leaf * leaf,void * data SCM_UNUSED)308 static Leaf *copy_leaf(Leaf *leaf, void *data SCM_UNUSED)
309 {
310 TLeaf *s = (TLeaf*)leaf;
311 TLeaf *d = SCM_NEW(TLeaf);
312 d->hdr = s->hdr;
313 if (leaf_is_chained(s)) {
314 ScmObj h = SCM_NIL, t = SCM_NIL, cp;
315 d->chain.pair = Scm_Cons(SCM_CAR(s->chain.pair),
316 SCM_CDR(s->chain.pair));
317 SCM_FOR_EACH(cp, s->chain.next) {
318 SCM_APPEND1(h, t, Scm_Cons(SCM_CAAR(cp), SCM_CDAR(cp)));
319 }
320 d->chain.next = h;
321 } else {
322 d->entry.key = s->entry.key;
323 d->entry.value = s->entry.value;
324 }
325 return (Leaf*)d;
326 }
327
SparseTableCopy(const SparseTable * s)328 ScmObj SparseTableCopy(const SparseTable *s)
329 {
330 SparseTable *d = SCM_NEW(SparseTable);
331 memcpy(d, s, sizeof(SparseTable));
332 CompactTrieCopy(&d->trie, &s->trie, copy_leaf, NULL);
333 return SCM_OBJ(d);
334 }
335
336 /*===================================================================
337 * Iterators
338 */
339
SparseTableIterInit(SparseTableIter * it,SparseTable * st)340 void SparseTableIterInit(SparseTableIter *it, SparseTable *st)
341 {
342 it->st = st;
343 CompactTrieIterInit(&it->ctit, &st->trie);
344 it->chain = SCM_NIL;
345 it->end = FALSE;
346 }
347
348 /* returns (key . value) or #f */
SparseTableIterNext(SparseTableIter * it)349 ScmObj SparseTableIterNext(SparseTableIter *it)
350 {
351 if (it->end) return SCM_FALSE;
352 if (SCM_PAIRP(it->chain)) {
353 ScmObj p = SCM_CAR(it->chain);
354 it->chain = SCM_CDR(it->chain);
355 return p;
356 } else {
357 TLeaf *z = (TLeaf*)CompactTrieIterNext(&it->ctit);
358 if (z == NULL) { it->end = TRUE; return SCM_FALSE; }
359 if (!leaf_is_chained(z)) {
360 return Scm_Cons(z->entry.key, z->entry.value);
361 }
362 it->chain = z->chain.next;
363 return z->chain.pair;
364 }
365 }
366
367
368 /*===================================================================
369 * Miscellaneous
370 */
371
leaf_dump(ScmPort * out,Leaf * leaf,int indent,void * data SCM_UNUSED)372 static void leaf_dump(ScmPort *out, Leaf *leaf, int indent, void *data SCM_UNUSED)
373 {
374 TLeaf *z = (TLeaf*)leaf;
375
376 if (leaf_is_chained(z)) {
377 Scm_Printf(out, "(chained)");
378 Scm_Printf(out, "\n %*s%S => %25.1S", indent, "",
379 SCM_CAR(z->chain.pair), SCM_CDR(z->chain.pair));
380 ScmObj cp;
381 SCM_FOR_EACH(cp, z->chain.next) {
382 ScmObj p = SCM_CAR(cp);
383 SCM_ASSERT(SCM_PAIRP(p));
384 Scm_Printf(out, "\n %*s%S => %25.1S", indent, "",
385 SCM_CAR(p), SCM_CDR(p));
386 }
387 } else {
388 Scm_Printf(out, "\n %*s%S => %25.1S", indent, "",
389 z->entry.key, z->entry.value);
390 }
391 }
392
SparseTableDump(SparseTable * st)393 void SparseTableDump(SparseTable *st)
394 {
395 CompactTrieDump(SCM_CUROUT, &st->trie, leaf_dump, NULL);
396 }
397
SparseTableCheck(SparseTable * st)398 void SparseTableCheck(SparseTable *st)
399 {
400 CompactTrieCheck(&st->trie, SCM_OBJ(st), NULL);
401 }
402
403 /*===================================================================
404 * Initialization
405 */
406
Scm_Init_sptab(ScmModule * mod)407 void Scm_Init_sptab(ScmModule *mod)
408 {
409 Scm_InitStaticClass(&Scm_SparseTableClass, "<sparse-table>",
410 mod, NULL, 0);
411 }
412