1 /*  hash.c -- type-general hashing                            */
2 /*  Copyright (c) 2009-2011 Alex Shinn.  All rights reserved. */
3 /*  BSD-style license: http://synthcode.com/license.txt       */
4 
5 #include <chibi/eval.h>
6 
7 #define HASH_DEPTH 5
8 #define HASH_BOUND sexp_make_fixnum(SEXP_MAX_FIXNUM)
9 
10 #define FNV_PRIME 16777619
11 #define FNV_OFFSET_BASIS 2166136261uL
12 
13 #define sexp_hash_table_buckets(x)  sexp_slot_ref(x, 0)
14 #define sexp_hash_table_size(x)     sexp_slot_ref(x, 1)
15 #define sexp_hash_table_hash_fn(x)  sexp_slot_ref(x, 2)
16 #define sexp_hash_table_eq_fn(x)    sexp_slot_ref(x, 3)
17 
18 #define sexp_hash_resize_check(n, len) (((n)*3) > ((len)>>2))
19 
string_hash(char * str,sexp_uint_t bound)20 static sexp_uint_t string_hash (char *str, sexp_uint_t bound) {
21   sexp_uint_t acc = FNV_OFFSET_BASIS;
22   while (*str) {acc *= FNV_PRIME; acc ^= *str++;}
23   return acc % bound;
24 }
25 
sexp_string_hash(sexp ctx,sexp self,sexp_sint_t n,sexp str,sexp bound)26 sexp sexp_string_hash (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp bound) {
27   if (! sexp_stringp(str))
28     return sexp_type_exception(ctx, self, SEXP_STRING, str);
29   else if (! sexp_fixnump(bound))
30     return sexp_type_exception(ctx, self, SEXP_FIXNUM, bound);
31   return sexp_make_fixnum(string_hash(sexp_string_data(str),
32                                       sexp_unbox_fixnum(bound)));
33 }
34 
string_ci_hash(char * str,sexp_uint_t bound)35 static sexp_uint_t string_ci_hash (char *str, sexp_uint_t bound) {
36   sexp_uint_t acc = FNV_OFFSET_BASIS;
37   while (*str) {acc *= FNV_PRIME; acc ^= sexp_tolower((unsigned char)*str++);}
38   return acc % bound;
39 }
40 
sexp_string_ci_hash(sexp ctx,sexp self,sexp_sint_t n,sexp str,sexp bound)41 sexp sexp_string_ci_hash (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp bound) {
42   if (! sexp_stringp(str))
43     return sexp_type_exception(ctx, self, SEXP_STRING, str);
44   else if (! sexp_fixnump(bound))
45     return sexp_type_exception(ctx, self, SEXP_FIXNUM, bound);
46   return sexp_make_fixnum(string_ci_hash(sexp_string_data(str),
47                                          sexp_unbox_fixnum(bound)));
48 }
49 
hash_one(sexp ctx,sexp obj,sexp_uint_t bound,sexp_sint_t depth)50 static sexp_uint_t hash_one (sexp ctx, sexp obj, sexp_uint_t bound, sexp_sint_t depth) {
51   sexp_uint_t acc = FNV_OFFSET_BASIS, right_size;
52   sexp_sint_t i, len;
53   sexp t, *p;
54   char *p0, *p_right;
55  loop:
56   if (obj) {
57 #if SEXP_USE_FLONUMS
58     if (sexp_flonump(obj))
59       acc ^= (sexp_sint_t) sexp_flonum_value(obj);
60     else
61 #endif
62     if (sexp_pointerp(obj)) {
63       if (depth > 0) {
64         t = sexp_object_type(ctx, obj);
65         p = (sexp*) (((char*)obj) + sexp_type_field_base(t));
66         p0 = ((char*)obj) + offsetof(struct sexp_struct, value);
67         /* if the field_base is 0, skip to the value */
68         if ((sexp)p == obj) p=(sexp*)p0;
69         /* hash uvector data (otherwise strings all hash to the same value) */
70         if (sexp_bytesp(obj) || sexp_uvectorp(obj) || sexp_bignump(obj)) {
71           p_right = ((char*)p + sexp_type_num_slots_of_object(t, obj)*sizeof(sexp));
72           right_size = ((char*)obj + sexp_type_size_of_object(t, obj)) - p_right;
73           for (i=0; i<right_size; i++) {acc *= FNV_PRIME; acc ^= p_right[i];}
74         }
75         /* hash eq-object slots */
76         len = sexp_type_num_eq_slots_of_object(t, obj);
77         if (len > 0) {
78           depth--;
79           for (i=0; i<len-1; i++) {
80             acc *= FNV_PRIME;
81             acc ^= hash_one(ctx, p[i], 0, depth);
82           }
83           /* tail-recurse on the last value */
84           obj = p[len-1]; goto loop;
85         }
86       } else {
87         acc ^= sexp_pointer_tag(obj);
88       }
89     } else {
90       acc ^= (sexp_uint_t)obj;
91     }
92   }
93   return (bound ? acc % bound : acc);
94 }
95 
sexp_hash(sexp ctx,sexp self,sexp_sint_t n,sexp obj,sexp bound)96 sexp sexp_hash (sexp ctx, sexp self, sexp_sint_t n, sexp obj, sexp bound) {
97   if (! sexp_exact_integerp(bound))
98     return sexp_type_exception(ctx, self, SEXP_FIXNUM, bound);
99   return sexp_make_fixnum(hash_one(ctx, obj, sexp_unbox_fixnum(bound), HASH_DEPTH));
100 }
101 
sexp_hash_by_identity(sexp ctx,sexp self,sexp_sint_t n,sexp obj,sexp bound)102 sexp sexp_hash_by_identity (sexp ctx, sexp self, sexp_sint_t n, sexp obj, sexp bound) {
103   if (! sexp_exact_integerp(bound))
104     return sexp_type_exception(ctx, self, SEXP_FIXNUM, bound);
105   return sexp_make_fixnum((sexp_uint_t)obj % sexp_unbox_fixnum(bound));
106 }
107 
sexp_get_bucket(sexp ctx,sexp buckets,sexp hash_fn,sexp obj)108 static sexp sexp_get_bucket (sexp ctx, sexp buckets, sexp hash_fn, sexp obj) {
109   sexp_gc_var1(args);
110   sexp res;
111   sexp_uint_t len = sexp_vector_length(buckets);
112   if (hash_fn == SEXP_ONE)
113     res = sexp_hash_by_identity(ctx, NULL, 2, obj, sexp_make_fixnum(len));
114   else if (hash_fn == SEXP_TWO)
115     res = sexp_hash(ctx, NULL, 2, obj, sexp_make_fixnum(len));
116   else {
117     sexp_gc_preserve1(ctx, args);
118     args = sexp_list2(ctx, obj, sexp_make_fixnum(len));
119     res = sexp_apply(ctx, hash_fn, args);
120     if (sexp_exceptionp(res)) {
121       args = sexp_eval_string(ctx, "(current-error-port)", -1, sexp_context_env(ctx));
122       sexp_print_exception(ctx, res, args);
123       res = SEXP_ZERO;
124     } else if ((sexp_uint_t)sexp_unbox_fixnum(res) >= len) {
125       res = SEXP_ZERO;
126     }
127     sexp_gc_release1(ctx);
128   }
129   return res;
130 }
131 
sexp_scan_bucket(sexp ctx,sexp ls,sexp obj,sexp eq_fn)132 static sexp sexp_scan_bucket (sexp ctx, sexp ls, sexp obj, sexp eq_fn) {
133   sexp_gc_var1(res);
134   sexp p;
135   res = SEXP_FALSE;
136   if ((eq_fn == SEXP_ONE)
137       || ((eq_fn == SEXP_TWO)
138           && (sexp_pointerp(obj) ?
139               (sexp_pointer_tag(obj) == SEXP_SYMBOL) : ! sexp_fixnump(obj)))) {
140     for (p=ls; sexp_pairp(p); p=sexp_cdr(p)) {
141       if (sexp_caar(p) == obj) {
142         res = p;
143         break;
144       }
145     }
146   } else if (eq_fn == SEXP_TWO) {
147     for (p=ls; sexp_pairp(p); p=sexp_cdr(p)) {
148       if (sexp_truep(sexp_equalp(ctx, sexp_caar(p), obj))) {
149         res = p;
150         break;
151       }
152     }
153   } else {
154     sexp_gc_preserve1(ctx, res);
155     for (p=ls; sexp_pairp(p); p=sexp_cdr(p)) {
156       res = sexp_list2(ctx, sexp_caar(p), obj);
157       if (sexp_truep(sexp_apply(ctx, eq_fn, res))) {
158         res = p;
159         break;
160       } else {
161         res = SEXP_FALSE;
162       }
163     }
164     sexp_gc_release1(ctx);
165   }
166   return res;
167 }
168 
sexp_regrow_hash_table(sexp ctx,sexp ht,sexp oldbuckets,sexp hash_fn)169 static void sexp_regrow_hash_table (sexp ctx, sexp ht, sexp oldbuckets, sexp hash_fn) {
170   sexp ls, *oldvec, *newvec;
171   int i, j, oldsize=sexp_vector_length(oldbuckets), newsize=oldsize*2;
172   sexp_gc_var1(newbuckets);
173   sexp_gc_preserve1(ctx, newbuckets);
174   newbuckets = sexp_make_vector(ctx, sexp_make_fixnum(newsize), SEXP_NULL);
175   if (newbuckets && !sexp_exceptionp(newbuckets)) {
176     oldvec = sexp_vector_data(oldbuckets);
177     newvec = sexp_vector_data(newbuckets);
178     for (i=0; i<oldsize; i++) {
179       for (ls=oldvec[i]; sexp_pairp(ls); ls=sexp_cdr(ls)) {
180         j = sexp_unbox_fixnum(sexp_get_bucket(ctx, newbuckets, hash_fn, sexp_caar(ls)));
181         sexp_push(ctx, newvec[j], sexp_car(ls));
182       }
183     }
184     sexp_hash_table_buckets(ht) = newbuckets;
185   }
186   sexp_gc_release1(ctx);
187 }
188 
sexp_hash_table_cell(sexp ctx,sexp self,sexp_sint_t n,sexp ht,sexp obj,sexp createp)189 sexp sexp_hash_table_cell (sexp ctx, sexp self, sexp_sint_t n, sexp ht, sexp obj, sexp createp) {
190   sexp buckets, eq_fn, hash_fn, i;
191   sexp_uint_t size;
192   sexp_gc_var1(res);
193   /* extra check - exact type should be checked by the calling procedure */
194   if (! sexp_pointerp(ht))
195     return sexp_xtype_exception(ctx, self, "not a Hash-Table", ht);
196   buckets = sexp_hash_table_buckets(ht);
197   eq_fn = sexp_hash_table_eq_fn(ht);
198   hash_fn = sexp_hash_table_hash_fn(ht);
199   i = sexp_get_bucket(ctx, buckets, hash_fn, obj);
200   res = sexp_scan_bucket(ctx, sexp_vector_ref(buckets, i), obj, eq_fn);
201   if (sexp_truep(res)) {
202     res = sexp_car(res);
203   } else if (sexp_truep(createp)) {
204     sexp_gc_preserve1(ctx, res);
205     size = sexp_unbox_fixnum(sexp_hash_table_size(ht));
206     if (sexp_hash_resize_check(size, sexp_vector_length(buckets))) {
207       sexp_regrow_hash_table(ctx, ht, buckets, hash_fn);
208       buckets = sexp_hash_table_buckets(ht);
209       i = sexp_get_bucket(ctx, buckets, hash_fn, obj);
210     }
211     res = sexp_cons(ctx, obj, createp);
212     sexp_vector_set(buckets, i, sexp_cons(ctx, res, sexp_vector_ref(buckets, i)));
213     sexp_hash_table_size(ht) = sexp_make_fixnum(size+1);
214     sexp_gc_release1(ctx);
215   }
216   return res;
217 }
218 
sexp_hash_table_delete(sexp ctx,sexp self,sexp_sint_t n,sexp ht,sexp obj)219 sexp sexp_hash_table_delete (sexp ctx, sexp self, sexp_sint_t n, sexp ht, sexp obj) {
220   sexp buckets, eq_fn, hash_fn, i, p, res;
221   if (!(sexp_pointerp(ht) && strcmp(sexp_string_data(sexp_object_type_name(ctx, ht)), "Hash-Table") == 0))
222     return sexp_xtype_exception(ctx, self, "not a Hash-Table", ht);
223   buckets = sexp_hash_table_buckets(ht);
224   eq_fn = sexp_hash_table_eq_fn(ht);
225   hash_fn = sexp_hash_table_hash_fn(ht);
226   i = sexp_get_bucket(ctx, buckets, hash_fn, obj);
227   res = sexp_scan_bucket(ctx, sexp_vector_ref(buckets, i), obj, eq_fn);
228   if (sexp_pairp(res)) {
229     sexp_hash_table_size(ht) = sexp_fx_sub(sexp_hash_table_size(ht), SEXP_ONE);
230     if (res == sexp_vector_ref(buckets, i)) {
231       sexp_vector_set(buckets, i, sexp_cdr(res));
232     } else {
233       for (p=sexp_vector_ref(buckets, i); sexp_cdr(p)!=res; p=sexp_cdr(p))
234         ;
235       sexp_cdr(p) = sexp_cdr(res);
236     }
237   }
238   return SEXP_VOID;
239 }
240 
sexp_init_library(sexp ctx,sexp self,sexp_sint_t n,sexp env,const char * version,const sexp_abi_identifier_t abi)241 sexp sexp_init_library (sexp ctx, sexp self, sexp_sint_t n, sexp env, const char* version, const sexp_abi_identifier_t abi) {
242   if (!(sexp_version_compatible(ctx, version, sexp_version)
243         && sexp_abi_compatible(ctx, abi, SEXP_ABI_IDENTIFIER)))
244     return SEXP_ABI_ERROR;
245 
246   sexp_define_foreign_opt(ctx, env, "string-hash", 2, sexp_string_hash, HASH_BOUND);
247   sexp_define_foreign_opt(ctx, env, "string-ci-hash", 2, sexp_string_ci_hash, HASH_BOUND);
248   sexp_define_foreign_opt(ctx, env, "hash", 2, sexp_hash, HASH_BOUND);
249   sexp_define_foreign_opt(ctx, env, "hash-by-identity", 2, sexp_hash_by_identity, HASH_BOUND);
250   sexp_define_foreign(ctx, env, "hash-table-cell", 3, sexp_hash_table_cell);
251   sexp_define_foreign(ctx, env, "hash-table-delete!", 2, sexp_hash_table_delete);
252 
253   return SEXP_VOID;
254 }
255