1 /*
2  * mapping.cpp -- associative array implementations
3  * by pts@fazekas.hu at Fri Mar 15 21:13:47 CET 2002
4  */
5 
6 #ifdef __GNUC__
7 #ifndef __clang__
8 #pragma implementation
9 #endif
10 #endif
11 
12 #include "mapping.hpp"
13 #include "error.hpp"
14 #include <string.h>
15 #include <stdlib.h> /* abort() */
16 
17 /* --- */
18 
update(char const * key,slen_t keylen,char const * data)19 bool Mapping::Gen::update(char const*key, slen_t keylen, char const*data) {
20   char *found_data=get(key, keylen);
21   if (found_data==NULLP) return true;
22   memcpy(found_data, data, datalen);
23   return false;
24 }
25 
26 /* --- */
27 
obj_assert()28 bool Mapping::DoubleHash::obj_assert() {
29   assert(len <= used);
30   assert(minlen <= len);
31   assert(used <= maxused);
32   assert(maxused < alloced);
33   assert(alloced < (slen_t)-2);
34   /* Imp: re-count used and len */
35   return true;
36 }
37 
set(char const * key,slen_t keylen,char const * data)38 bool Mapping::DoubleHash::set     (char const*  key, slen_t  keylen, char const*data) {
39   assert(obj_assert());
40   slen_t h1=vi_h1(key,keylen);
41   assert(h1<alloced);
42   Ary *p=ary+h1;
43   if (p->keylen==NEVER_USED) { added:
44     /* Dat: we shouldn't stop on p->keylen==DELETED */
45     /* This will be deleted by `delete [] (pp->keydata-datalen);' called from
46      * Mapping::DoubleHash::clear() called from the destructor of
47      * Mapping::DoubleHash15, also known as Mapping::H.
48      */
49     memcpy(p->keydata=new char[keylen+datalen], data, datalen);
50     memcpy(p->keydata+=datalen, key, p->keylen=keylen); len++;
51     //if (p->keylen==NEVER_USED && used++==maxused) { p->keylen=keylen; rehash(); }
52     //                                         else { p->keylen=keylen; }
53     if (used++==maxused) rehash();
54     assert(obj_assert());
55     return true; /* new value added */
56   }
57   if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { updated:
58     memcpy(p->keydata-datalen, data, datalen);
59     return false; /* value updated */
60   }
61   /* Not found for the 1st try. We have to walk. */
62   slen_t h2=vi_h2(key,keylen);
63   assert(1<=h2 && h1<alloced);
64   while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
65     if (h1>=h2) { h1-=h2; p-=h2; }
66            else { h1+=alloced-h2; p+=alloced-h2; }
67     if (p->keylen==NEVER_USED) goto added;
68     if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto updated;
69   }
70 }
get(char const * key,slen_t keylen)71 char*Mapping::DoubleHash::get     (char const*  key, slen_t  keylen) {
72   assert(obj_assert());
73   slen_t h1=vi_h1(key,keylen);
74   assert(h1<alloced);
75   Ary *p=ary+h1;
76   if (p->keylen==NEVER_USED) return (char*)NULLP;
77   if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen;
78   /* Not found for the 1st try. We have to walk. */
79   slen_t h2=vi_h2(key,keylen);
80   assert(1<=h2 && h1<alloced);
81   /* Dat: assert(lnko(...)) better in vi_h2 */
82   while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
83     if (h1>=h2) { h1-=h2; p-=h2; }
84            else { h1+=alloced-h2; p+=alloced-h2; }
85     if (p->keylen==NEVER_USED) return (char*)NULLP;
86     if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen;
87   }
88 }
deletee(char const * key,slen_t keylen)89 bool Mapping::DoubleHash::deletee (char const*  key, slen_t  keylen) {
90   /* We must not set `p->keylen=NEVER_USED' in this method because this would
91    * ruin searches jumping on `p', but not coming from `p+h2'. So we just set
92    * `p->keylen=DELETED'.
93    */
94   assert(obj_assert());
95   slen_t h1=vi_h1(key,keylen);
96   assert(h1<alloced);
97   Ary *p=ary+h1;
98   if (p->keylen==NEVER_USED) return true; /* key to be deleted does not exist */
99   if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { found:
100     vi_dtor(p->keydata-datalen);
101     delete [] (p->keydata-datalen);
102     p->keylen=(slen_t)DELETED;
103     // p->keydata=(char*)NULLP;
104     if (len--==minlen) rehash();
105     assert(obj_assert());
106     return false; /* key-value pair deleted */
107   }
108   /* Not found for the 1st try. We have to walk. */
109   slen_t h2=vi_h2(key,keylen);
110   assert(1<=h2 && h1<alloced);
111   while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
112     if (h1>=h2) { h1-=h2; p-=h2; }
113            else { h1+=alloced-h2; p+=alloced-h2; }
114     if (p->keylen==(slen_t)NEVER_USED) return true; /* key to be deleted does not exist */
115     if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto found;
116   }
117 }
getFirst(char const * const * & key,slen_t & keylen,char * & data)118 void Mapping::DoubleHash::getFirst(char const*const*& key, slen_t &keylen, char *& data) {
119   assert(obj_assert());
120   Ary *p=ary, *pend=p+alloced;
121   while (p!=pend && p->keylen>=(slen_t)-2) p++;
122   if (p==pend) { key=(char const*const*)NULLP; }
123           else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; }
124 }
getNext(char const * const * & key,slen_t & keylen,char * & data)125 void Mapping::DoubleHash::getNext (char const*const*& key, slen_t &keylen, char *& data) {
126   assert(obj_assert());
127   /* vvv Dat: this operation is safe despite of the fact that it increases
128    *          signedness off pointer target type ((char*) -> (Ary*)).
129    */
130   Ary *p=PTS_align_cast(Ary*,
131    ( (char*)const_cast<char**>(key)
132      - ((char*)&((Ary*)0)->keydata-(char*)0) )), *pend=ary+alloced;
133   p++;
134   while (p!=pend && p->keylen>=(slen_t)-2) p++;
135   if (p==pend) { key=(char const*const*)NULLP; }
136           else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; }
137 }
rehash()138 void Mapping::DoubleHash::rehash() {
139   unsigned old_scale=scale;
140   slen_t old_alloced=alloced;
141 #ifndef NDEBUG
142   slen_t old_len=len;
143   slen_t old_used=used;
144 #endif
145   Ary *old_ary=ary;
146   // printf("rehash minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced);
147   vi_scale();
148   // printf("scaled minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced);
149   // assert( minlen <= len);
150   if (scale==old_scale && used<=maxused) { assert(obj_assert()); return; }
151   Ary *pp=old_ary, *ppend=pp+old_alloced;
152   slen_t calclen=0, calcused=0;
153   ary=new Ary[alloced];
154   // printf("alloced=%u\n", alloced);
155   memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */
156   /* insert the old values to the new hash */
157   for (; pp!=ppend; pp++) {
158     if (pp->keylen!=NEVER_USED) calcused++;
159     if (pp->keylen<(slen_t)-2) {
160       // printf("%u %u\n", (unsigned char)pp->keydata[0], (unsigned char)pp->keydata[1]);
161       calclen++;
162       slen_t h1=vi_h1(pp->keydata,pp->keylen);
163       assert(h1<alloced);
164       Ary *p=ary+h1;
165       assert(p->keylen!=DELETED);
166       if (p->keylen==NEVER_USED) { found:
167         p->keylen=pp->keylen;
168         p->keydata=pp->keydata;
169         continue;
170       }
171       assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key");
172       /* Not found for the 1st try. We have to walk. */
173       slen_t h2=vi_h2(pp->keydata,pp->keylen);
174       assert(1<=h2 && h1<alloced);
175       while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
176         if (h1>=h2) { h1-=h2; p-=h2; }
177                else { h1+=alloced-h2; p+=alloced-h2; }
178         assert(p->keylen!=DELETED);
179         if (p->keylen==NEVER_USED) goto found;
180         assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key");
181       } /* WHILE */
182     }
183   } /* NEXT */
184   used=len=calclen;
185   assert(calclen==old_len);
186   assert(calcused==old_used);
187   assert(old_len==len);
188   assert(old_len==used);
189   assert(obj_assert());
190   delete [] old_ary;
191 }
clear()192 void Mapping::DoubleHash::clear() {
193   assert(obj_assert());
194   Ary *pp=ary, *ppend=pp+alloced;
195   for (; pp!=ppend; pp++) if (pp->keylen<(slen_t)-2) {
196     vi_dtor(pp->keydata-datalen); /* this is quite fast */
197     delete [] (pp->keydata-datalen); /* this is really slow for millions of values */
198     pp->keylen=(slen_t)DELETED;
199     len--;
200   }
201   assert(len==0);
202   if (minlen!=0) rehash();
203 }
204 
205 /* --- */
206 
207 static slen_t const d15_primes[]= {
208   0, 10, 13, 23, 37, 59, 89, 137, 211, 317, 479, 719, 1087, 1637, 2459, 3691,
209   5557, 8353, 12539, 18839, 28277, 42433, 63659,
210   95507UL, 143261UL, 214913UL, 322397UL, 483611UL, 725423UL, 1088159UL, 1632259UL, 2448389UL,
211   3672593UL, 5508913UL, 8263373UL, 12395069UL, 18592631UL, 27888947UL, 41833427UL,
212   62750147UL, 94125247UL, 141187901UL, 211781873UL, 317672813UL, 476509223UL,
213   714763843UL, 1072145771UL, 1608218669UL, 2412328031UL, 3618492049UL,
214 #if SIZEOF_SLEN_T>=8
215   5427738097UL, 8141607167UL, 12212410753UL, 18318616157UL, 27477924239UL,
216   41216886467UL, 61825329719UL, 92737994593UL, 139106991917UL, 208660487887UL,
217   312990731839UL, 469486097807UL, 704229146717UL, 1056343720093UL,
218   1584515580187UL, 2376773370313UL, 3565160055487UL, 5347740083243UL,
219   8021610124867UL, 12032415187319UL, 18048622780987UL, 27072934171487UL,
220   40609401257291UL, 60914101885951UL, 91371152828947UL, 137056729243439UL,
221   205585093865189UL, 308377640797829UL, 462566461196803UL,
222   693849691795229UL, 1040774537692907UL, 1561161806539393UL,
223   2341742709809177UL, 3512614064713777UL, 5268921097070759UL,
224   7903381645606193UL, 11855072468409349UL, 17782608702614033UL,
225   26673913053921061UL, 40010869580881603UL, 60016304371322423UL,
226   90024456556983643UL, 135036684835475519UL, 202555027253213279UL,
227   303832540879819943UL, 455748811319729951UL, 683623216979594929UL,
228   1025434825469392417UL, 1538152238204088631UL, 2307228357306132983UL,
229   3460842535959199481UL, 5191263803938799269UL,
230 #endif
231   0
232 };
233 
vi_scale()234 void Mapping::DoubleHash15::vi_scale() {
235 #if 0
236   // <lloced=63659 used=59671 maxused=59670
237   // alloced=1637 used=59671 maxused=1534
238 
239   // <lloced=1637 used=1535 maxused=1534
240   // |lloced=2459 used=1535 maxused=1534
241   // |lloced=1637 used=1535 maxused=1534
242   // alloced=1637 used=1535 maxused=1534
243 
244 
245   printf("<lloced=%u used=%u maxused=%u\n", alloced, used, maxused);
246   if (len<minlen) { again:
247     /* assert(used==minlen-1); */
248     assert(scale>=1);
249     alloced=(d15_primes+2)[--scale];
250     if (scale>1) minlen=(d15_primes+2)[scale-2];
251     else if (scale==1) minlen=10;
252     else minlen=0;
253     if (len<minlen) goto again;
254   } else if (used>maxused) {
255     assert(used==maxused+1);
256     if ((alloced=(d15_primes+2)[++scale])==0) abort(); /* Imp: better overflow reporting */
257     if (scale>1) minlen=(d15_primes+2)[scale-2];
258     else if (scale==1) minlen=10;
259     else minlen=0;
260     // if (len<minlen) minlen=len;
261   } else assert(0);
262   printf("|lloced=%u used=%u maxused=%u\n", alloced, used, maxused);
263   if (alloced<=4000U) maxused=(alloced*15)>>4;
264                  else maxused=(alloced>>4)*15;
265   if (used>maxused) printf("alloced=%u used=%u maxused=%u\n", alloced, used, maxused);
266 #endif
267 
268   /* Dat: we respect only `len', not used. */
269   /* Imp: make calculation of scale relative to previous one */
270 
271   scale=0; while (1) {
272     // printf("xscale=%u\n", scale);
273     if (0==(alloced=(d15_primes+2)[scale])) { assert(0); abort(); } /* Imp: better overflow reporting */
274     if (alloced<=4000U) maxused=(alloced*15)>>4;
275                    else maxused=(alloced>>4)*15; /* avoid overflows */
276     if (len<=maxused) break; /* _not_ used<=maxused, because of rehash() */
277     scale++;
278   }
279   minlen=(d15_primes)[scale];
280   // printf("ret alloced=%u used=%u maxused=%u\n", alloced, used, maxused);
281 }
vi_h1(register char const * key,register slen_t keylen)282 slen_t Mapping::DoubleHash15::vi_h1(register char const*key, register slen_t keylen) {
283   register slen_t hval=0, m=alloced;
284   while (keylen--!=0) hval=((hval<<8)+*key++)%m;
285   /* Imp: make this accurate and faster (the `%' operation is rather slow) */
286   return hval;
287 }
vi_h2(char const * key,slen_t keylen)288 slen_t Mapping::DoubleHash15::vi_h2(char const*key, slen_t keylen) {
289   register slen_t hval=0, m=alloced-1;
290   while (keylen--!=0) hval=((hval<<8)+*key++)%m;
291   /* Imp: make this accurate and faster (the `%' operation is rather slow) */
292   return hval+1;
293 }
vi_dtor(char *)294 void Mapping::DoubleHash15::vi_dtor(char *) {}
295 
DoubleHash15(slen_t datalen_)296 Mapping::DoubleHash15::DoubleHash15(slen_t datalen_) {
297   ary=new Ary[alloced=(d15_primes+2)[scale=0]];
298   memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */
299   datalen=datalen_;
300   len=used=minlen=0;
301   maxused=(alloced>>4)*15;
302 }
~DoubleHash15()303 Mapping::DoubleHash15::~DoubleHash15() {
304   minlen=0;
305   clear();
306   delete [] ary;
307 }
308 
309 /* __END__ */
310