1 #include <string.h>
2 #include "nasal.h"
3 #include "data.h"
4
5 /* A HashRec lives in a single allocated block. The layout is the
6 * header struct, then a table of 2^lgsz hash entries (key/value
7 * pairs), then an index table of 2*2^lgsz integers storing index
8 * values into the entry table. There are two tokens needed for
9 * "unused" and "used but empty". */
10
11 #define ENT_EMPTY -1
12 #define ENT_DELETED -2
13
14 typedef struct { naRef key, val; } HashEnt;
15
16 typedef struct HashRec {
17 int size; /* number of active entries */
18 int lgsz; /* base-2 logarithm of the allocated (!) size */
19 int next; /* next entry to use */
20 } HashRec;
21
22 #define REC(h) (PTR(h).hash->rec)
23 #define POW2(n) (1<<(n))
24 #define NCELLS(hr) (2*POW2((hr)->lgsz))
25 #define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n)
26 #define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz))
27 #define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef)))
28 #define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz]))
29 #define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0)
30
31 #define LROT(h,n) (((h)<<n)|((h)>>((8*sizeof(h))-n)))
mix32(unsigned int h)32 static unsigned int mix32(unsigned int h)
33 {
34 h ^= 0x2e63823a; h += LROT(h, 15); h -= LROT(h, 9);
35 h += LROT(h, 4); h -= LROT(h, 1); h ^= LROT(h, 2);
36 return h;
37 }
hash32(const unsigned char * in,int len)38 static unsigned int hash32(const unsigned char* in, int len)
39 {
40 unsigned int h = len, val = 0;
41 int i, count = 0;
42 for(i=0; i<len; i++) {
43 val = (val<<8) ^ in[i];
44 if(++count == 4) {
45 h = mix32(h ^ val);
46 val = count = 0;
47 }
48 }
49 return mix32(h ^ val);
50 }
51
refhash(naRef key)52 static unsigned int refhash(naRef key)
53 {
54 if(IS_STR(key)) {
55 struct naStr* s = PTR(key).str;
56 if(s->hashcode) return s->hashcode;
57 return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key));
58 } else { /* must be a number */
59 union { double d; unsigned int u[2]; } n;
60 n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */
61 return mix32(mix32(n.u[0]) ^ n.u[1]);
62 }
63 }
64
equal(naRef a,naRef b)65 static int equal(naRef a, naRef b)
66 {
67 if(IS_NUM(a)) return a.num == b.num;
68 if(PTR(a).obj == PTR(b).obj) return 1;
69 if(naStr_len(a) != naStr_len(b)) return 0;
70 return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0;
71 }
72
73 /* Returns the index of a cell that either contains a matching key, or
74 * is the empty slot to receive a new insertion. */
findcell(struct HashRec * hr,naRef key,unsigned int hash)75 static int findcell(struct HashRec *hr, naRef key, unsigned int hash)
76 {
77 int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask;
78 for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask)
79 if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key))
80 break;
81 return i;
82 }
83
hashset(HashRec * hr,naRef key,naRef val)84 static void hashset(HashRec* hr, naRef key, naRef val)
85 {
86 int ent, cell = findcell(hr, key, refhash(key));
87 if((ent = TAB(hr)[cell]) == ENT_EMPTY) {
88 ent = hr->next++;
89 if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
90 TAB(hr)[cell] = ent;
91 hr->size++;
92 ENTS(hr)[ent].key = key;
93 }
94 ENTS(hr)[ent].val = val;
95 }
96
recsize(int lgsz)97 static int recsize(int lgsz)
98 {
99 HashRec hr;
100 hr.lgsz = lgsz;
101 return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef);
102 }
103
resize(struct naHash * hash)104 static HashRec* resize(struct naHash* hash)
105 {
106 HashRec *hr = hash->rec, *hr2;
107 int i, lgsz = 0;
108 if(hr) {
109 int oldsz = hr->size;
110 while(oldsz) { oldsz >>= 1; lgsz++; }
111 }
112 hr2 = naAlloc(recsize(lgsz));
113 hr2->size = hr2->next = 0;
114 hr2->lgsz = lgsz;
115 for(i=0; i<(2*(1<<lgsz)); i++)
116 TAB(hr2)[i] = ENT_EMPTY;
117 for(i=0; hr && i < POW2(hr->lgsz+1); i++)
118 if(TAB(hr)[i] >= 0)
119 hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val);
120 naGC_swapfree((void*)&hash->rec, hr2);
121 return hr2;
122 }
123
naHash_size(naRef h)124 int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; }
125
naHash_get(naRef hash,naRef key,naRef * out)126 int naHash_get(naRef hash, naRef key, naRef* out)
127 {
128 HashRec* hr = REC(hash);
129 if(hr) {
130 int ent, cell = findcell(hr, key, refhash(key));
131 if((ent = TAB(hr)[cell]) < 0) return 0;
132 *out = ENTS(hr)[ent].val;
133 return 1;
134 }
135 return 0;
136 }
137
naHash_set(naRef hash,naRef key,naRef val)138 void naHash_set(naRef hash, naRef key, naRef val)
139 {
140 HashRec* hr = REC(hash);
141 if(!hr || hr->next >= POW2(hr->lgsz))
142 hr = resize(PTR(hash).hash);
143 hashset(hr, key, val);
144 }
145
naHash_delete(naRef hash,naRef key)146 void naHash_delete(naRef hash, naRef key)
147 {
148 HashRec* hr = REC(hash);
149 if(hr) {
150 int cell = findcell(hr, key, refhash(key));
151 if(TAB(hr)[cell] >= 0) {
152 TAB(hr)[cell] = ENT_DELETED;
153 if(--hr->size < POW2(hr->lgsz-1))
154 resize(PTR(hash).hash);
155 }
156 }
157 }
158
naHash_keys(naRef dst,naRef hash)159 void naHash_keys(naRef dst, naRef hash)
160 {
161 int i;
162 HashRec* hr = REC(hash);
163 for(i=0; hr && i < NCELLS(hr); i++)
164 if(TAB(hr)[i] >= 0)
165 naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key);
166 }
167
naiGCMarkHash(naRef hash)168 void naiGCMarkHash(naRef hash)
169 {
170 int i;
171 HashRec* hr = REC(hash);
172 for(i=0; hr && i < NCELLS(hr); i++)
173 if(TAB(hr)[i] >= 0) {
174 naiGCMark(ENTS(hr)[TAB(hr)[i]].key);
175 naiGCMark(ENTS(hr)[TAB(hr)[i]].val);
176 }
177 }
178
tmpStr(naRef * out,struct naStr * str,const char * key)179 static void tmpStr(naRef* out, struct naStr* str, const char* key)
180 {
181 str->type = T_STR;
182 str->hashcode = 0;
183 str->emblen = -1;
184 str->data.ref.ptr = (unsigned char*)key;
185 str->data.ref.len = strlen(key);
186 SETPTR(*out, str);
187 }
188
naMember_cget(naContext c,naRef obj,const char * field,naRef * out)189 int naMember_cget(naContext c, naRef obj, const char* field, naRef* out)
190 {
191 naRef key; struct naStr str;
192 tmpStr(&key, &str, field);
193 return naMember_get(c, obj, key, out);
194 }
195
naHash_cget(naRef hash,char * key)196 naRef naHash_cget(naRef hash, char* key)
197 {
198 struct naStr str;
199 naRef result, key2;
200 tmpStr(&key2, &str, key);
201 return naHash_get(hash, key2, &result) ? result : naNil();
202 }
203
naHash_cset(naRef hash,char * key,naRef val)204 void naHash_cset(naRef hash, char* key, naRef val)
205 {
206 naRef key2; struct naStr str;
207 tmpStr(&key2, &str, key);
208 naiHash_tryset(hash, key2, val);
209 }
210
naiHash_tryset(naRef hash,naRef key,naRef val)211 int naiHash_tryset(naRef hash, naRef key, naRef val)
212 {
213 HashRec* hr = REC(hash);
214 if(hr) {
215 int ent, cell = findcell(hr, key, refhash(key));
216 if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; }
217 }
218 return 0;
219 }
220
naiGCHashClean(struct naHash * h)221 void naiGCHashClean(struct naHash* h)
222 {
223 naFree(h->rec);
224 h->rec = 0;
225 }
226
227 /* Optimized naHash_get for looking up local variables (OP_LOCAL is by
228 * far the most common opcode and deserves some special case
229 * optimization). Assumes that the key is an interned symbol
230 * (i.e. the hash code is precomputed, and we only need to test for
231 * pointer identity). */
naiHash_sym(struct naHash * hash,struct naStr * sym,naRef * out)232 int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
233 {
234 HashRec* hr = hash->rec;
235 if(hr) {
236 int* tab = TAB(hr);
237 HashEnt* ents = ENTS(hr);
238 unsigned int hc = sym->hashcode;
239 int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask;
240 for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask)
241 if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) {
242 *out = ents[tab[cell]].val;
243 return 1;
244 }
245 }
246 return 0;
247 }
248
249
250 /* As above, a special naHash_set for setting local variables.
251 * Assumes that the key is interned, and also that it isn't already
252 * present in the hash. */
naiHash_newsym(struct naHash * hash,naRef * sym,naRef * val)253 void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
254 {
255 HashRec* hr = hash->rec;
256 int mask, step, cell, ent;
257 struct naStr *s = PTR(*sym).str;
258 if(!hr || hr->next >= POW2(hr->lgsz))
259 hr = resize(hash);
260 mask = POW2(hr->lgsz+1) - 1;
261 step = (2*s->hashcode+1) & mask;
262 cell = HBITS(hr, s->hashcode);
263 while(TAB(hr)[cell] != ENT_EMPTY)
264 cell = (cell + step) & mask;
265 ent = hr->next++;
266 if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
267 TAB(hr)[cell] = ent;
268 hr->size++;
269 ENTS(hr)[TAB(hr)[cell]].key = *sym;
270 ENTS(hr)[TAB(hr)[cell]].val = *val;
271 }
272
273