1 /********************************************************************************
2 * Hash table class template (char* based) *
3 *********************************************************************************/
4
5 #ifndef GHash_HH
6 #define GHash_HH
7 #include "GBase.h"
8
9 /**
10 * This class maintains a fast-access hash table of entities
11 * indexed by a character string (essentially, maps strings to pointers)
12 */
13
14
15 template <class OBJ> class GHash {
16 protected:
17 struct GHashEntry {
18 char* key; // Key string
19 bool keyalloc; //shared key flag (to not free the key chars)
20 int hash; // Hash value of key
21 pointer data; // Data
22 bool mark; // Entry is marked
23 };
24 GHashEntry* hash; // Hash
25 int fCapacity; // table size
26 int fCount; // number of valid entries
27 int fCurrentEntry;
28 char* lastkeyptr; //pointer to last key string added
29 //---------- Raw data retrieval (including empty entries
30 // Return key at position pos.
Key(uint pos) const31 const char* Key(uint pos) const { return hash[pos].key; }
32 // return data OBJ* at given position
Data(uint pos) const33 OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
34 // Return mark flag of entry at position pos.
Mark(uint pos) const35 bool Mark(uint pos) const { return hash[pos].mark; }
36 // Return position of first filled slot, or >= fCapacity
37 int First() const;
38 // Return position of last filled slot or -1
39 int Last() const;
40 // Return position of next filled slot in hash table
41 // or a value greater than or equal to fCapacity if no filled
42 // slot was found
43 int Next(int pos) const;
44 //Return position of previous filled slot in hash table
45 //or a -1 if no filled slot was found
46 int Prev(int pos) const;
47
48 private:
49 GHash(const GHash&);
50 GHash &operator=(const GHash&);
51 GFreeProc* fFreeProc; //procedure to free item data
52 protected:
53 public:
DefaultFreeProc(pointer item)54 static void DefaultFreeProc(pointer item) {
55 delete (OBJ*)item;
56 }
57 public:
58 GHash(GFreeProc* freeProc); // constructs of an empty hash
59 GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
setFreeItem(GFreeProc * freeProc)60 void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
setFreeItem(bool doFree)61 void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
Capacity() const62 int Capacity() const { return fCapacity; } // table's size, including the empty slots.
63 void Resize(int m); // Resize the table to the given size.
Count() const64 int Count() const { return fCount; }// the total number of entries in the table.
65 // Insert a new entry into the table given key and mark.
66 // If there is already an entry with that key, leave it unchanged,
67 const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
68 //same as Add, but the key pointer is stored directly, no string duplicate
69 //is made (shared-key-Add)
70 const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
71
72 // Replace data at key, if the entry's mark is less than
73 // or equal to the given mark. If there was no existing entry,
74 // a new entry is inserted with the given mark.
75 OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
76 // Remove a given key and its data
77 OBJ* Remove(const char* ky);
78 // Find data OBJ* given key.
79 OBJ* Find(const char* ky, char** keyptr=NULL);
80 bool hasKey(const char* ky);
getLastKey()81 char* getLastKey() { return lastkeyptr; }
operator [](const char * ky)82 OBJ* operator[](const char* ky) { return Find(ky); }
83 void startIterate(); //iterator-like initialization
84 char* NextKey(); //returns next valid key in the table (NULL if no more)
85 OBJ* NextData(); //returns next valid hash[].data
86 OBJ* NextData(char*& nextkey); //returns next valid hash[].data
87 //or NULL if no more
88 //nextkey is SET to the corresponding key
NextEntry()89 GHashEntry* NextEntry() { //returns a pointer to a GHashEntry
90 register int pos=fCurrentEntry;
91 while (pos<fCapacity && (hash[pos].hash)<0) pos++;
92 if (pos==fCapacity) {
93 fCurrentEntry=fCapacity;
94 return NULL;
95 }
96 else {
97 fCurrentEntry=pos+1;
98 return &hash[pos];
99 }
100 }
101 /// Clear all entries
102 void Clear();
103
104 /// Destructor
105 virtual ~GHash();
106 };
107 //
108 //======================== method definitions ========================
109 //
110 /*
111 Notes:
112 - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
113 GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
114 - Since the algorithm doubles the table size when exceeding MAX_LOAD,
115 it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
116 otherwise, the algorithm might hip-hop between halving and doubling,
117 which would be quite expensive!!
118 - Not many people seem to know that hash tables don't have to be prime
119 numbers; in fact, a table size of 2**n and odd probe distance are very
120 easy to arrange, and this works just as well!
121 - We store the hash key, so that 99.999% of the time we can compare hash numbers;
122 only when hash numbers match do we need to compare keys.
123 Thus, with a good hash function, the fCount of calls to strcmp() should be
124 roughly the same as the fCount of successful lookups.
125 - The hash table should NEVER get full, or stuff will loop forever!!
126 */
127
128 // Initial table size (MUST be power of 2)
129 #define DEF_HASH_SIZE 32
130 // Maximum hash table load factor (%)
131 #define MAX_LOAD 80
132 // Minimum hash table load factor (%)
133 #define MIN_LOAD 10
134 // Probe Position [0..n-1]
135 #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
136 // Probe Distance [1..n-1]
137 #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
138
139 #define FREEDATA (fFreeProc!=NULL)
140
141 /*******************************************************************************/
142 // Construct empty hash
GHash(GFreeProc * freeProc)143 template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
144 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
145 fCurrentEntry=-1;
146 fFreeProc=freeProc;
147 lastkeyptr=NULL;
148 for (uint i=0; i<DEF_HASH_SIZE; i++)
149 hash[i].hash=-1; //this will be an indicator for 'empty' entries
150 fCapacity=DEF_HASH_SIZE;
151 fCount=0;
152 }
153
GHash(bool doFree)154 template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
155 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
156 fCurrentEntry=-1;
157 lastkeyptr=NULL;
158 fFreeProc = (doFree)?&DefaultFreeProc : NULL;
159 for (uint i=0; i<DEF_HASH_SIZE; i++)
160 hash[i].hash=-1; //this will be an indicator for 'empty' entries
161 fCapacity=DEF_HASH_SIZE;
162 fCount=0;
163 }
164
165
166 // Resize table
Resize(int m)167 template <class OBJ> void GHash<OBJ>::Resize(int m){
168 register int i,n,p,x,h;
169 GHashEntry *k;
170 GASSERT(fCount<=fCapacity);
171 if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
172 n=fCapacity;
173 while((n>>2)>m) n>>=1; // Shrink until n/4 <= m
174 while((n>>1)<m) n<<=1; // Grow until m <= n/2
175 GASSERT(m<=(n>>1));
176 GASSERT(DEF_HASH_SIZE<=n);
177 if(n!=fCapacity){
178 GASSERT(m<=n);
179 GMALLOC(k, sizeof(GHashEntry)*n);
180 for(i=0; i<n; i++) k[i].hash=-1;
181 for(i=0; i<fCapacity; i++){
182 h=hash[i].hash;
183 if(0<=h){
184 p=HASH1(h,n);
185 GASSERT(0<=p && p<n);
186 x=HASH2(h,n);
187 GASSERT(1<=x && x<n);
188 while(k[p].hash!=-1) p=(p+x)%n;
189 GASSERT((k[p].hash)<0);
190 k[p]=hash[i];
191 }
192 }
193 GFREE(hash);
194 hash=k;
195 fCapacity=n;
196 }
197 }
198
199 // add a new entry, or update it if it already exists
Add(const char * ky,const OBJ * pdata,bool mrk)200 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
201 const OBJ* pdata,bool mrk){
202 register int p,i,x,h,n;
203 if(!ky) GError("GHash::insert: NULL key argument.\n");
204 GASSERT(fCount<fCapacity);
205 h=strhash(ky);
206 GASSERT(0<=h);
207 p=HASH1(h,fCapacity);
208 GASSERT(0<=p && p<fCapacity);
209 x=HASH2(h,fCapacity);
210 GASSERT(1<=x && x<fCapacity);
211 i=-1;
212 n=fCapacity;
213 while(n && hash[p].hash!=-1){
214 if ((i==-1)&&(hash[p].hash==-2)) i=p;
215 if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
216 //replace hash data for this key!
217 lastkeyptr=hash[p].key;
218 hash[p].data = (void*) pdata;
219 return (OBJ*)hash[p].data;
220 }
221 p=(p+x)%fCapacity;
222 n--;
223 }
224 if(i==-1) i=p;
225 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
226 //GMessage("GHash::insert: key=\"%s\"\n",ky);
227 GASSERT(0<=i && i<fCapacity);
228 GASSERT((hash[i].hash)<0);
229 hash[i].hash=h;
230 hash[i].mark=mrk;
231 hash[i].key=Gstrdup(ky);
232 hash[i].keyalloc=true;
233 lastkeyptr=hash[i].key;
234 hash[i].data= (void*) pdata;
235 fCount++;
236 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
237 GASSERT(fCount<fCapacity);
238 return pdata;
239 }
240
shkAdd(const char * ky,const OBJ * pdata,bool mrk)241 template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
242 const OBJ* pdata,bool mrk){
243 register int p,i,x,h,n;
244 if(!ky) GError("GHash::insert: NULL key argument.\n");
245 GASSERT(fCount<fCapacity);
246 h=strhash(ky);
247 GASSERT(0<=h);
248 p=HASH1(h,fCapacity);
249 GASSERT(0<=p && p<fCapacity);
250 x=HASH2(h,fCapacity);
251 GASSERT(1<=x && x<fCapacity);
252 i=-1;
253 n=fCapacity;
254 while(n && hash[p].hash!=-1){
255 if((i==-1)&&(hash[p].hash==-2)) i=p;
256 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
257 //replace hash data for this key!
258 lastkeyptr=hash[p].key;
259 hash[p].data = (void*) pdata;
260 return (OBJ*)hash[p].data;
261 }
262 p=(p+x)%fCapacity;
263 n--;
264 }
265 if(i==-1) i=p;
266 GTRACE(("GHash::insert: key=\"%s\"\n",ky));
267 //GMessage("GHash::insert: key=\"%s\"\n",ky);
268 GASSERT(0<=i && i<fCapacity);
269 GASSERT((hash[i].hash)<0);
270 hash[i].hash=h;
271 hash[i].mark=mrk;
272 hash[i].key=(char *)ky;
273 lastkeyptr=hash[i].key;
274 hash[i].keyalloc=false;
275 hash[i].data= (void*) pdata;
276 fCount++;
277 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
278 GASSERT(fCount<fCapacity);
279 return pdata;
280 }
281
282
283 // Add or replace entry
Replace(const char * ky,const OBJ * pdata,bool mrk)284 template <class OBJ> OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
285 register int p,i,x,h,n;
286 if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
287 GASSERT(fCount<fCapacity);
288 h=strhash(ky);
289 GASSERT(0<=h);
290 p=HASH1(h,fCapacity);
291 GASSERT(0<=p && p<fCapacity);
292 x=HASH2(h,fCapacity);
293 GASSERT(1<=x && x<fCapacity);
294 i=-1;
295 n=fCapacity;
296 while(n && hash[p].hash!=-1){
297 if((i==-1)&&(hash[p].hash==-2)) i=p;
298 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
299 if(hash[p].mark<=mrk){
300 GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
301 if (FREEDATA) (*fFreeProc)(hash[p].data);
302 hash[p].mark=mrk;
303 hash[p].data=pdata;
304 }
305 return hash[p].data;
306 }
307 p=(p+x)%fCapacity;
308 n--;
309 }
310 if(i==-1) i=p;
311 GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
312 GASSERT(0<=i && i<fCapacity);
313 GASSERT((hash[i].hash)<0);
314 hash[i].hash=h;
315 hash[i].mark=mrk;
316 hash[i].key=Gstrdup(ky);
317 hash[i].data=pdata;
318 fCount++;
319 if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
320 GASSERT(fCount<fCapacity);
321 return pdata;
322 }
323
324
325 // Remove entry
Remove(const char * ky)326 template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
327 register int p,x,h,n;
328 if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
329 OBJ* removed=NULL;
330 if(0<fCount){
331 h=strhash(ky);
332 GASSERT(0<=h);
333 p=HASH1(h,fCapacity);
334 GASSERT(0<=p && p<fCapacity);
335 x=HASH2(h,fCapacity);
336 GASSERT(1<=x && x<fCapacity);
337 GASSERT(fCount<fCapacity);
338 n=fCapacity;
339 while(n && hash[p].hash!=-1){
340 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
341 GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
342 hash[p].hash=-2;
343 hash[p].mark=false;
344 if (hash[p].keyalloc) GFREE((hash[p].key));
345 if (FREEDATA) (*fFreeProc)(hash[p].data);
346 else removed=(OBJ*)hash[p].data;
347 hash[p].key=NULL;
348 hash[p].data=NULL;
349 fCount--;
350 if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
351 GASSERT(fCount<fCapacity);
352 return removed;
353 }
354 p=(p+x)%fCapacity;
355 n--;
356 }
357 }
358 return removed;
359 }
360
361
362 // Find entry
hasKey(const char * ky)363 template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
364 register int p,x,h,n;
365 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
366 if(0<fCount){
367 h=strhash(ky);
368 GASSERT(0<=h);
369 p=HASH1(h,fCapacity);
370 GASSERT(0<=p && p<fCapacity);
371 x=HASH2(h,fCapacity);
372 GASSERT(1<=x && x<fCapacity);
373 GASSERT(fCount<fCapacity);
374 n=fCapacity;
375 while(n && hash[p].hash!=-1){
376 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
377 return true;
378 }
379 p=(p+x)%fCapacity;
380 n--;
381 }
382 }
383 return false;
384 }
385
Find(const char * ky,char ** keyptr)386 template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
387 register int p,x,h,n;
388 if(!ky){ GError("GHash::find: NULL key argument.\n"); }
389 if(0<fCount){
390 h=strhash(ky);
391 GASSERT(0<=h);
392 p=HASH1(h,fCapacity);
393 GASSERT(0<=p && p<fCapacity);
394 x=HASH2(h,fCapacity);
395 GASSERT(1<=x && x<fCapacity);
396 GASSERT(fCount<fCapacity);
397 n=fCapacity;
398 while(n && hash[p].hash!=-1){
399 if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
400 if (keyptr!=NULL) *keyptr = hash[p].key;
401 return (OBJ*)hash[p].data;
402 }
403 p=(p+x)%fCapacity;
404 n--;
405 }
406 }
407 return NULL;
408 }
409
410
startIterate()411 template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
412 fCurrentEntry=0;
413 }
414
NextKey()415 template <class OBJ> char* GHash<OBJ>::NextKey() {
416 register int pos=fCurrentEntry;
417 while (pos<fCapacity && (hash[pos].hash)<0) pos++;
418 if (pos==fCapacity) {
419 fCurrentEntry=fCapacity;
420 return NULL;
421 }
422 else {
423 fCurrentEntry=pos+1;
424 return hash[pos].key;
425 }
426 }
427
NextData()428 template <class OBJ> OBJ* GHash<OBJ>::NextData() {
429 register int pos=fCurrentEntry;
430 while (pos<fCapacity && (hash[pos].hash)<0) pos++;
431 if (pos==fCapacity) {
432 fCurrentEntry=fCapacity;
433 return NULL;
434 }
435 else {
436 fCurrentEntry=pos+1;
437 return (OBJ*)hash[pos].data;
438 }
439
440 }
441
NextData(char * & nextkey)442 template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
443 register int pos=fCurrentEntry;
444 while (pos<fCapacity && (hash[pos].hash)<0) pos++;
445 if (pos==fCapacity) {
446 fCurrentEntry=fCapacity;
447 nextkey=NULL;
448 return NULL;
449 }
450 else {
451 fCurrentEntry=pos+1;
452 nextkey=hash[pos].key;
453 return (OBJ*)hash[pos].data;
454 }
455
456 }
457
458
459 // Get first non-empty entry
First() const460 template <class OBJ> int GHash<OBJ>::First() const {
461 register int pos=0;
462 while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
463 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
464 return pos;
465 }
466
467 // Get last non-empty entry
Last() const468 template <class OBJ> int GHash<OBJ>::Last() const {
469 register int pos=fCapacity-1;
470 while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
471 GASSERT(pos<0 || 0<=hash[pos].hash);
472 return pos;
473 }
474
475
476 // Find next valid entry
Next(int pos) const477 template <class OBJ> int GHash<OBJ>::Next(int pos) const {
478 GASSERT(0<=pos && pos<fCapacity);
479 while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
480 GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
481 return pos;
482 }
483
484
485 // Find previous valid entry
Prev(int pos) const486 template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
487 GASSERT(0<=pos && pos<fCapacity);
488 while(--pos >= 0){ if(0<=hash[pos].hash) break; }
489 GASSERT(pos<0 || 0<=hash[pos].hash);
490 return pos;
491 }
492
493
494 // Remove all
Clear()495 template <class OBJ> void GHash<OBJ>::Clear(){
496 register int i;
497 for(i=0; i<fCapacity; i++){
498 if(hash[i].hash>=0){
499 if (hash[i].keyalloc) GFREE((hash[i].key));
500 if (FREEDATA)
501 (*fFreeProc)(hash[i].data);
502 }
503 }
504 GFREE(hash);
505 GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
506 //reinitialize it
507 for (i=0; i<DEF_HASH_SIZE; i++)
508 hash[i].hash=-1; //this will be an indicator for 'empty' entries
509 fCapacity=DEF_HASH_SIZE;
510 fCount=0;
511 }
512
513
514 // Save data
515 /*
516 void GHash::Save(Stream& store) const {
517 Object::save(store);
518 store << fCapacity;
519 store << fCount;
520 for(int i=0; i<fCapacity; i++){
521 store << hash[i].hash;
522 if(hash[i].hash>=0){
523 uint len=strlen(hash[i].key);
524 store << len;
525 store << hash[i].mark;
526 store.save(hash[i].key,len);
527 }
528 }
529 }
530
531
532 // Load data
533 void GHash::Load(Stream& store){
534 Object::load(store);
535 store >> fCapacity;
536 store >> fCount;
537 for(int i=0; i<fCapacity; i++){
538 store >> hash[i].hash;
539 if(hash[i].hash>=0){
540 uint len;
541 store >> len;
542 store >> hash[i].mark;
543 GMALLOC(hash[i].key,len+1);
544 store.load(hash[i].key,len);
545 hash[i].key[len]='\0';
546 }
547 }
548 }
549 */
550
551 // Destroy table
~GHash()552 template <class OBJ> GHash<OBJ>::~GHash(){
553 register int i;
554 for(i=0; i<fCapacity; i++){
555 if(hash[i].hash>=0){
556 if (hash[i].keyalloc) GFREE((hash[i].key));
557 if (FREEDATA) (*fFreeProc)(hash[i].data);
558 }
559 }
560 GFREE(hash);
561 }
562
563 #endif
564