1 /*
2 * mapping.cpp -- associative array implementations
3 * by pts@fazekas.hu at Fri Mar 15 21:13:47 CET 2002
4 */
5
6 #ifdef __GNUC__
7 #ifndef __clang__
8 #pragma implementation
9 #endif
10 #endif
11
12 #include "mapping.hpp"
13 #include "error.hpp"
14 #include <string.h>
15 #include <stdlib.h> /* abort() */
16
17 /* --- */
18
update(char const * key,slen_t keylen,char const * data)19 bool Mapping::Gen::update(char const*key, slen_t keylen, char const*data) {
20 char *found_data=get(key, keylen);
21 if (found_data==NULLP) return true;
22 memcpy(found_data, data, datalen);
23 return false;
24 }
25
26 /* --- */
27
obj_assert()28 bool Mapping::DoubleHash::obj_assert() {
29 assert(len <= used);
30 assert(minlen <= len);
31 assert(used <= maxused);
32 assert(maxused < alloced);
33 assert(alloced < (slen_t)-2);
34 /* Imp: re-count used and len */
35 return true;
36 }
37
set(char const * key,slen_t keylen,char const * data)38 bool Mapping::DoubleHash::set (char const* key, slen_t keylen, char const*data) {
39 assert(obj_assert());
40 slen_t h1=vi_h1(key,keylen);
41 assert(h1<alloced);
42 Ary *p=ary+h1;
43 if (p->keylen==NEVER_USED) { added:
44 /* Dat: we shouldn't stop on p->keylen==DELETED */
45 /* This will be deleted by `delete [] (pp->keydata-datalen);' called from
46 * Mapping::DoubleHash::clear() called from the destructor of
47 * Mapping::DoubleHash15, also known as Mapping::H.
48 */
49 memcpy(p->keydata=new char[keylen+datalen], data, datalen);
50 memcpy(p->keydata+=datalen, key, p->keylen=keylen); len++;
51 //if (p->keylen==NEVER_USED && used++==maxused) { p->keylen=keylen; rehash(); }
52 // else { p->keylen=keylen; }
53 if (used++==maxused) rehash();
54 assert(obj_assert());
55 return true; /* new value added */
56 }
57 if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { updated:
58 memcpy(p->keydata-datalen, data, datalen);
59 return false; /* value updated */
60 }
61 /* Not found for the 1st try. We have to walk. */
62 slen_t h2=vi_h2(key,keylen);
63 assert(1<=h2 && h1<alloced);
64 while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
65 if (h1>=h2) { h1-=h2; p-=h2; }
66 else { h1+=alloced-h2; p+=alloced-h2; }
67 if (p->keylen==NEVER_USED) goto added;
68 if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto updated;
69 }
70 }
get(char const * key,slen_t keylen)71 char*Mapping::DoubleHash::get (char const* key, slen_t keylen) {
72 assert(obj_assert());
73 slen_t h1=vi_h1(key,keylen);
74 assert(h1<alloced);
75 Ary *p=ary+h1;
76 if (p->keylen==NEVER_USED) return (char*)NULLP;
77 if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen;
78 /* Not found for the 1st try. We have to walk. */
79 slen_t h2=vi_h2(key,keylen);
80 assert(1<=h2 && h1<alloced);
81 /* Dat: assert(lnko(...)) better in vi_h2 */
82 while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
83 if (h1>=h2) { h1-=h2; p-=h2; }
84 else { h1+=alloced-h2; p+=alloced-h2; }
85 if (p->keylen==NEVER_USED) return (char*)NULLP;
86 if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen;
87 }
88 }
deletee(char const * key,slen_t keylen)89 bool Mapping::DoubleHash::deletee (char const* key, slen_t keylen) {
90 /* We must not set `p->keylen=NEVER_USED' in this method because this would
91 * ruin searches jumping on `p', but not coming from `p+h2'. So we just set
92 * `p->keylen=DELETED'.
93 */
94 assert(obj_assert());
95 slen_t h1=vi_h1(key,keylen);
96 assert(h1<alloced);
97 Ary *p=ary+h1;
98 if (p->keylen==NEVER_USED) return true; /* key to be deleted does not exist */
99 if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { found:
100 vi_dtor(p->keydata-datalen);
101 delete [] (p->keydata-datalen);
102 p->keylen=(slen_t)DELETED;
103 // p->keydata=(char*)NULLP;
104 if (len--==minlen) rehash();
105 assert(obj_assert());
106 return false; /* key-value pair deleted */
107 }
108 /* Not found for the 1st try. We have to walk. */
109 slen_t h2=vi_h2(key,keylen);
110 assert(1<=h2 && h1<alloced);
111 while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
112 if (h1>=h2) { h1-=h2; p-=h2; }
113 else { h1+=alloced-h2; p+=alloced-h2; }
114 if (p->keylen==(slen_t)NEVER_USED) return true; /* key to be deleted does not exist */
115 if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto found;
116 }
117 }
getFirst(char const * const * & key,slen_t & keylen,char * & data)118 void Mapping::DoubleHash::getFirst(char const*const*& key, slen_t &keylen, char *& data) {
119 assert(obj_assert());
120 Ary *p=ary, *pend=p+alloced;
121 while (p!=pend && p->keylen>=(slen_t)-2) p++;
122 if (p==pend) { key=(char const*const*)NULLP; }
123 else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; }
124 }
getNext(char const * const * & key,slen_t & keylen,char * & data)125 void Mapping::DoubleHash::getNext (char const*const*& key, slen_t &keylen, char *& data) {
126 assert(obj_assert());
127 /* vvv Dat: this operation is safe despite of the fact that it increases
128 * signedness off pointer target type ((char*) -> (Ary*)).
129 */
130 Ary *p=PTS_align_cast(Ary*,
131 ( (char*)const_cast<char**>(key)
132 - ((char*)&((Ary*)0)->keydata-(char*)0) )), *pend=ary+alloced;
133 p++;
134 while (p!=pend && p->keylen>=(slen_t)-2) p++;
135 if (p==pend) { key=(char const*const*)NULLP; }
136 else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; }
137 }
rehash()138 void Mapping::DoubleHash::rehash() {
139 unsigned old_scale=scale;
140 slen_t old_alloced=alloced;
141 #ifndef NDEBUG
142 slen_t old_len=len;
143 slen_t old_used=used;
144 #endif
145 Ary *old_ary=ary;
146 // printf("rehash minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced);
147 vi_scale();
148 // printf("scaled minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced);
149 // assert( minlen <= len);
150 if (scale==old_scale && used<=maxused) { assert(obj_assert()); return; }
151 Ary *pp=old_ary, *ppend=pp+old_alloced;
152 slen_t calclen=0, calcused=0;
153 ary=new Ary[alloced];
154 // printf("alloced=%u\n", alloced);
155 memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */
156 /* insert the old values to the new hash */
157 for (; pp!=ppend; pp++) {
158 if (pp->keylen!=NEVER_USED) calcused++;
159 if (pp->keylen<(slen_t)-2) {
160 // printf("%u %u\n", (unsigned char)pp->keydata[0], (unsigned char)pp->keydata[1]);
161 calclen++;
162 slen_t h1=vi_h1(pp->keydata,pp->keylen);
163 assert(h1<alloced);
164 Ary *p=ary+h1;
165 assert(p->keylen!=DELETED);
166 if (p->keylen==NEVER_USED) { found:
167 p->keylen=pp->keylen;
168 p->keydata=pp->keydata;
169 continue;
170 }
171 assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key");
172 /* Not found for the 1st try. We have to walk. */
173 slen_t h2=vi_h2(pp->keydata,pp->keylen);
174 assert(1<=h2 && h1<alloced);
175 while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
176 if (h1>=h2) { h1-=h2; p-=h2; }
177 else { h1+=alloced-h2; p+=alloced-h2; }
178 assert(p->keylen!=DELETED);
179 if (p->keylen==NEVER_USED) goto found;
180 assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key");
181 } /* WHILE */
182 }
183 } /* NEXT */
184 used=len=calclen;
185 assert(calclen==old_len);
186 assert(calcused==old_used);
187 assert(old_len==len);
188 assert(old_len==used);
189 assert(obj_assert());
190 delete [] old_ary;
191 }
clear()192 void Mapping::DoubleHash::clear() {
193 assert(obj_assert());
194 Ary *pp=ary, *ppend=pp+alloced;
195 for (; pp!=ppend; pp++) if (pp->keylen<(slen_t)-2) {
196 vi_dtor(pp->keydata-datalen); /* this is quite fast */
197 delete [] (pp->keydata-datalen); /* this is really slow for millions of values */
198 pp->keylen=(slen_t)DELETED;
199 len--;
200 }
201 assert(len==0);
202 if (minlen!=0) rehash();
203 }
204
205 /* --- */
206
207 static slen_t const d15_primes[]= {
208 0, 10, 13, 23, 37, 59, 89, 137, 211, 317, 479, 719, 1087, 1637, 2459, 3691,
209 5557, 8353, 12539, 18839, 28277, 42433, 63659,
210 95507UL, 143261UL, 214913UL, 322397UL, 483611UL, 725423UL, 1088159UL, 1632259UL, 2448389UL,
211 3672593UL, 5508913UL, 8263373UL, 12395069UL, 18592631UL, 27888947UL, 41833427UL,
212 62750147UL, 94125247UL, 141187901UL, 211781873UL, 317672813UL, 476509223UL,
213 714763843UL, 1072145771UL, 1608218669UL, 2412328031UL, 3618492049UL,
214 #if SIZEOF_SLEN_T>=8
215 5427738097UL, 8141607167UL, 12212410753UL, 18318616157UL, 27477924239UL,
216 41216886467UL, 61825329719UL, 92737994593UL, 139106991917UL, 208660487887UL,
217 312990731839UL, 469486097807UL, 704229146717UL, 1056343720093UL,
218 1584515580187UL, 2376773370313UL, 3565160055487UL, 5347740083243UL,
219 8021610124867UL, 12032415187319UL, 18048622780987UL, 27072934171487UL,
220 40609401257291UL, 60914101885951UL, 91371152828947UL, 137056729243439UL,
221 205585093865189UL, 308377640797829UL, 462566461196803UL,
222 693849691795229UL, 1040774537692907UL, 1561161806539393UL,
223 2341742709809177UL, 3512614064713777UL, 5268921097070759UL,
224 7903381645606193UL, 11855072468409349UL, 17782608702614033UL,
225 26673913053921061UL, 40010869580881603UL, 60016304371322423UL,
226 90024456556983643UL, 135036684835475519UL, 202555027253213279UL,
227 303832540879819943UL, 455748811319729951UL, 683623216979594929UL,
228 1025434825469392417UL, 1538152238204088631UL, 2307228357306132983UL,
229 3460842535959199481UL, 5191263803938799269UL,
230 #endif
231 0
232 };
233
vi_scale()234 void Mapping::DoubleHash15::vi_scale() {
235 #if 0
236 // <lloced=63659 used=59671 maxused=59670
237 // alloced=1637 used=59671 maxused=1534
238
239 // <lloced=1637 used=1535 maxused=1534
240 // |lloced=2459 used=1535 maxused=1534
241 // |lloced=1637 used=1535 maxused=1534
242 // alloced=1637 used=1535 maxused=1534
243
244
245 printf("<lloced=%u used=%u maxused=%u\n", alloced, used, maxused);
246 if (len<minlen) { again:
247 /* assert(used==minlen-1); */
248 assert(scale>=1);
249 alloced=(d15_primes+2)[--scale];
250 if (scale>1) minlen=(d15_primes+2)[scale-2];
251 else if (scale==1) minlen=10;
252 else minlen=0;
253 if (len<minlen) goto again;
254 } else if (used>maxused) {
255 assert(used==maxused+1);
256 if ((alloced=(d15_primes+2)[++scale])==0) abort(); /* Imp: better overflow reporting */
257 if (scale>1) minlen=(d15_primes+2)[scale-2];
258 else if (scale==1) minlen=10;
259 else minlen=0;
260 // if (len<minlen) minlen=len;
261 } else assert(0);
262 printf("|lloced=%u used=%u maxused=%u\n", alloced, used, maxused);
263 if (alloced<=4000U) maxused=(alloced*15)>>4;
264 else maxused=(alloced>>4)*15;
265 if (used>maxused) printf("alloced=%u used=%u maxused=%u\n", alloced, used, maxused);
266 #endif
267
268 /* Dat: we respect only `len', not used. */
269 /* Imp: make calculation of scale relative to previous one */
270
271 scale=0; while (1) {
272 // printf("xscale=%u\n", scale);
273 if (0==(alloced=(d15_primes+2)[scale])) { assert(0); abort(); } /* Imp: better overflow reporting */
274 if (alloced<=4000U) maxused=(alloced*15)>>4;
275 else maxused=(alloced>>4)*15; /* avoid overflows */
276 if (len<=maxused) break; /* _not_ used<=maxused, because of rehash() */
277 scale++;
278 }
279 minlen=(d15_primes)[scale];
280 // printf("ret alloced=%u used=%u maxused=%u\n", alloced, used, maxused);
281 }
vi_h1(register char const * key,register slen_t keylen)282 slen_t Mapping::DoubleHash15::vi_h1(register char const*key, register slen_t keylen) {
283 register slen_t hval=0, m=alloced;
284 while (keylen--!=0) hval=((hval<<8)+*key++)%m;
285 /* Imp: make this accurate and faster (the `%' operation is rather slow) */
286 return hval;
287 }
vi_h2(char const * key,slen_t keylen)288 slen_t Mapping::DoubleHash15::vi_h2(char const*key, slen_t keylen) {
289 register slen_t hval=0, m=alloced-1;
290 while (keylen--!=0) hval=((hval<<8)+*key++)%m;
291 /* Imp: make this accurate and faster (the `%' operation is rather slow) */
292 return hval+1;
293 }
vi_dtor(char *)294 void Mapping::DoubleHash15::vi_dtor(char *) {}
295
DoubleHash15(slen_t datalen_)296 Mapping::DoubleHash15::DoubleHash15(slen_t datalen_) {
297 ary=new Ary[alloced=(d15_primes+2)[scale=0]];
298 memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */
299 datalen=datalen_;
300 len=used=minlen=0;
301 maxused=(alloced>>4)*15;
302 }
~DoubleHash15()303 Mapping::DoubleHash15::~DoubleHash15() {
304 minlen=0;
305 clear();
306 delete [] ary;
307 }
308
309 /* __END__ */
310