1 #include <string.h>
2 #include "nasal.h"
3 #include "data.h"
4 
5 /* A HashRec lives in a single allocated block.  The layout is the
6  * header struct, then a table of 2^lgsz hash entries (key/value
7  * pairs), then an index table of 2*2^lgsz integers storing index
8  * values into the entry table.  There are two tokens needed for
9  * "unused" and "used but empty". */
10 
11 #define ENT_EMPTY -1
12 #define ENT_DELETED -2
13 
14 typedef struct { naRef key, val; } HashEnt;
15 
16 typedef struct HashRec {
17     int size; /* number of active entries */
18     int lgsz; /* base-2 logarithm of the allocated (!) size */
19     int next; /* next entry to use */
20 } HashRec;
21 
22 #define REC(h) (PTR(h).hash->rec)
23 #define POW2(n) (1<<(n))
24 #define NCELLS(hr) (2*POW2((hr)->lgsz))
25 #define ROUNDUPOFF(n,m) ((((n)+(m-1))/m)*m)-(n)
26 #define ALIGN(p,sz) (((char*)p)+ROUNDUPOFF(((size_t)p)%sz,sz))
27 #define ENTS(h) ((HashEnt*)ALIGN(&((HashRec*)h)[1],sizeof(naRef)))
28 #define TAB(h) ((int*)&(ENTS(h)[1<<(h)->lgsz]))
29 #define HBITS(hr,code) ((hr)->lgsz ? ((code)>>(32-(hr)->lgsz)) : 0)
30 
31 #define LROT(h,n) (((h)<<n)|((h)>>((8*sizeof(h))-n)))
mix32(unsigned int h)32 static unsigned int mix32(unsigned int h)
33 {
34     h ^= 0x2e63823a;  h += LROT(h, 15); h -= LROT(h, 9);
35     h += LROT(h, 4);  h -= LROT(h, 1);  h ^= LROT(h, 2);
36     return h;
37 }
hash32(const unsigned char * in,int len)38 static unsigned int hash32(const unsigned char* in, int len)
39 {
40     unsigned int h = len, val = 0;
41     int i, count = 0;
42     for(i=0; i<len; i++) {
43         val = (val<<8) ^ in[i];
44         if(++count == 4) {
45             h = mix32(h ^ val);
46             val = count = 0;
47         }
48     }
49     return mix32(h ^ val);
50 }
51 
refhash(naRef key)52 static unsigned int refhash(naRef key)
53 {
54     if(IS_STR(key)) {
55         struct naStr* s = PTR(key).str;
56         if(s->hashcode) return s->hashcode;
57         return s->hashcode = hash32((void*)naStr_data(key), naStr_len(key));
58     } else { /* must be a number */
59         union { double d; unsigned int u[2]; } n;
60         n.d = key.num == -0.0 ? 0.0 : key.num; /* remember negative zero! */
61         return mix32(mix32(n.u[0]) ^ n.u[1]);
62     }
63 }
64 
equal(naRef a,naRef b)65 static int equal(naRef a, naRef b)
66 {
67     if(IS_NUM(a)) return a.num == b.num;
68     if(PTR(a).obj == PTR(b).obj) return 1;
69     if(naStr_len(a) != naStr_len(b)) return 0;
70     return memcmp(naStr_data(a), naStr_data(b), naStr_len(a)) == 0;
71 }
72 
73 /* Returns the index of a cell that either contains a matching key, or
74  * is the empty slot to receive a new insertion. */
findcell(struct HashRec * hr,naRef key,unsigned int hash)75 static int findcell(struct HashRec *hr, naRef key, unsigned int hash)
76 {
77     int i, mask = POW2(hr->lgsz+1)-1, step = (2*hash+1) & mask;
78     for(i=HBITS(hr,hash); TAB(hr)[i] != ENT_EMPTY; i=(i+step)&mask)
79         if(TAB(hr)[i] != ENT_DELETED && equal(key, ENTS(hr)[TAB(hr)[i]].key))
80             break;
81     return i;
82 }
83 
hashset(HashRec * hr,naRef key,naRef val)84 static void hashset(HashRec* hr, naRef key, naRef val)
85 {
86     int ent, cell = findcell(hr, key, refhash(key));
87     if((ent = TAB(hr)[cell]) == ENT_EMPTY) {
88         ent = hr->next++;
89         if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
90         TAB(hr)[cell] = ent;
91         hr->size++;
92         ENTS(hr)[ent].key = key;
93     }
94     ENTS(hr)[ent].val = val;
95 }
96 
recsize(int lgsz)97 static int recsize(int lgsz)
98 {
99     HashRec hr;
100     hr.lgsz = lgsz;
101     return (int)((char*)&TAB(&hr)[POW2(lgsz+1)] - (char*)&hr) + sizeof(naRef);
102 }
103 
resize(struct naHash * hash)104 static HashRec* resize(struct naHash* hash)
105 {
106     HashRec *hr = hash->rec, *hr2;
107     int i, lgsz = 0;
108     if(hr) {
109         int oldsz = hr->size;
110         while(oldsz) { oldsz >>= 1; lgsz++; }
111     }
112     hr2 = naAlloc(recsize(lgsz));
113     hr2->size = hr2->next = 0;
114     hr2->lgsz = lgsz;
115     for(i=0; i<(2*(1<<lgsz)); i++)
116         TAB(hr2)[i] = ENT_EMPTY;
117     for(i=0; hr && i < POW2(hr->lgsz+1); i++)
118         if(TAB(hr)[i] >= 0)
119             hashset(hr2, ENTS(hr)[TAB(hr)[i]].key, ENTS(hr)[TAB(hr)[i]].val);
120     naGC_swapfree((void*)&hash->rec, hr2);
121     return hr2;
122 }
123 
naHash_size(naRef h)124 int naHash_size(naRef h) { return REC(h) ? REC(h)->size : 0; }
125 
naHash_get(naRef hash,naRef key,naRef * out)126 int naHash_get(naRef hash, naRef key, naRef* out)
127 {
128     HashRec* hr = REC(hash);
129     if(hr) {
130         int ent, cell = findcell(hr, key, refhash(key));
131         if((ent = TAB(hr)[cell]) < 0) return 0;
132         *out = ENTS(hr)[ent].val;
133         return 1;
134     }
135     return 0;
136 }
137 
naHash_set(naRef hash,naRef key,naRef val)138 void naHash_set(naRef hash, naRef key, naRef val)
139 {
140     HashRec* hr = REC(hash);
141     if(!hr || hr->next >= POW2(hr->lgsz))
142         hr = resize(PTR(hash).hash);
143     hashset(hr, key, val);
144 }
145 
naHash_delete(naRef hash,naRef key)146 void naHash_delete(naRef hash, naRef key)
147 {
148     HashRec* hr = REC(hash);
149     if(hr) {
150         int cell = findcell(hr, key, refhash(key));
151         if(TAB(hr)[cell] >= 0) {
152             TAB(hr)[cell] = ENT_DELETED;
153             if(--hr->size < POW2(hr->lgsz-1))
154                 resize(PTR(hash).hash);
155         }
156     }
157 }
158 
naHash_keys(naRef dst,naRef hash)159 void naHash_keys(naRef dst, naRef hash)
160 {
161     int i;
162     HashRec* hr = REC(hash);
163     for(i=0; hr && i < NCELLS(hr); i++)
164         if(TAB(hr)[i] >= 0)
165             naVec_append(dst, ENTS(hr)[TAB(hr)[i]].key);
166 }
167 
naiGCMarkHash(naRef hash)168 void naiGCMarkHash(naRef hash)
169 {
170     int i;
171     HashRec* hr = REC(hash);
172     for(i=0; hr && i < NCELLS(hr); i++)
173         if(TAB(hr)[i] >= 0) {
174             naiGCMark(ENTS(hr)[TAB(hr)[i]].key);
175             naiGCMark(ENTS(hr)[TAB(hr)[i]].val);
176         }
177 }
178 
tmpStr(naRef * out,struct naStr * str,const char * key)179 static void tmpStr(naRef* out, struct naStr* str, const char* key)
180 {
181     str->type = T_STR;
182     str->hashcode = 0;
183     str->emblen = -1;
184     str->data.ref.ptr = (unsigned char*)key;
185     str->data.ref.len = strlen(key);
186     SETPTR(*out, str);
187 }
188 
naMember_cget(naContext c,naRef obj,const char * field,naRef * out)189 int naMember_cget(naContext c, naRef obj, const char* field, naRef* out)
190 {
191     naRef key; struct naStr str;
192     tmpStr(&key, &str, field);
193     return naMember_get(c, obj, key, out);
194 }
195 
naHash_cget(naRef hash,char * key)196 naRef naHash_cget(naRef hash, char* key)
197 {
198     struct naStr str;
199     naRef result, key2;
200     tmpStr(&key2, &str, key);
201     return naHash_get(hash, key2, &result) ? result : naNil();
202 }
203 
naHash_cset(naRef hash,char * key,naRef val)204 void naHash_cset(naRef hash, char* key, naRef val)
205 {
206     naRef key2; struct naStr str;
207     tmpStr(&key2, &str, key);
208     naiHash_tryset(hash, key2, val);
209 }
210 
naiHash_tryset(naRef hash,naRef key,naRef val)211 int naiHash_tryset(naRef hash, naRef key, naRef val)
212 {
213     HashRec* hr = REC(hash);
214     if(hr) {
215         int ent, cell = findcell(hr, key, refhash(key));
216         if((ent = TAB(hr)[cell]) >= 0) { ENTS(hr)[ent].val = val; return 1; }
217     }
218     return 0;
219 }
220 
naiGCHashClean(struct naHash * h)221 void naiGCHashClean(struct naHash* h)
222 {
223     naFree(h->rec);
224     h->rec = 0;
225 }
226 
227 /* Optimized naHash_get for looking up local variables (OP_LOCAL is by
228  * far the most common opcode and deserves some special case
229  * optimization).  Assumes that the key is an interned symbol
230  * (i.e. the hash code is precomputed, and we only need to test for
231  * pointer identity). */
naiHash_sym(struct naHash * hash,struct naStr * sym,naRef * out)232 int naiHash_sym(struct naHash* hash, struct naStr* sym, naRef* out)
233 {
234     HashRec* hr = hash->rec;
235     if(hr) {
236         int* tab = TAB(hr);
237         HashEnt* ents = ENTS(hr);
238         unsigned int hc = sym->hashcode;
239         int cell, mask = POW2(hr->lgsz+1) - 1, step = (2*hc+1) & mask;
240         for(cell=HBITS(hr,hc); tab[cell] != ENT_EMPTY; cell=(cell+step)&mask)
241             if(tab[cell]!=ENT_DELETED && sym==PTR(ents[tab[cell]].key).str) {
242                 *out = ents[tab[cell]].val;
243                 return 1;
244             }
245     }
246     return 0;
247 }
248 
249 
250 /* As above, a special naHash_set for setting local variables.
251  * Assumes that the key is interned, and also that it isn't already
252  * present in the hash. */
naiHash_newsym(struct naHash * hash,naRef * sym,naRef * val)253 void naiHash_newsym(struct naHash* hash, naRef* sym, naRef* val)
254 {
255     HashRec* hr = hash->rec;
256     int mask, step, cell, ent;
257     struct naStr *s = PTR(*sym).str;
258     if(!hr || hr->next >= POW2(hr->lgsz))
259         hr = resize(hash);
260     mask = POW2(hr->lgsz+1) - 1;
261     step = (2*s->hashcode+1) & mask;
262     cell = HBITS(hr, s->hashcode);
263     while(TAB(hr)[cell] != ENT_EMPTY)
264         cell = (cell + step) & mask;
265     ent = hr->next++;
266     if(ent >= NCELLS(hr)) return; /* race protection, don't overrun */
267     TAB(hr)[cell] = ent;
268     hr->size++;
269     ENTS(hr)[TAB(hr)[cell]].key = *sym;
270     ENTS(hr)[TAB(hr)[cell]].val = *val;
271 }
272 
273