1 /********************************************************************************
2 *                  Hash table class template (char* based)                               *
3 *********************************************************************************/
4 
5 #ifndef GHash_HH
6 #define GHash_HH
7 #include "GBase.h"
8 
9 /**
10 * This class maintains a fast-access hash table of entities
11 * indexed by a character string (essentially, maps strings to pointers)
12 */
13 
14 
15 template <class OBJ> class GHash {
16  protected:
17 	struct GHashEntry {
18 	     char*   key;              // Key string
19 	     bool    keyalloc;         //shared key flag (to not free the key chars)
20 	     int     hash;             // Hash value of key
21 	     pointer data;              // Data
22 	     bool    mark;             // Entry is marked
23 	     };
24   GHashEntry* hash;         // Hash
25   int         fCapacity;     // table size
26   int         fCount;        // number of valid entries
27   int  fCurrentEntry;
28   char* lastkeyptr; //pointer to last key string added
29     //---------- Raw data retrieval (including empty entries
30   // Return key at position pos.
Key(uint pos) const31   const char* Key(uint pos) const { return hash[pos].key; }
32   // return data OBJ* at given position
Data(uint pos) const33   OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
34   // Return mark flag of entry at position pos.
Mark(uint pos) const35   bool Mark(uint pos) const { return hash[pos].mark; }
36   // Return position of first filled slot, or >= fCapacity
37   int First() const;
38   // Return position of last filled slot or -1
39   int Last() const;
40   // Return position of next filled slot in hash table
41   // or a value greater than or equal to fCapacity if no filled
42   // slot was found
43   int Next(int pos) const;
44   //Return position of previous filled slot in hash table
45   //or a -1 if no filled slot was found
46   int Prev(int pos) const;
47 
48 private:
49   GHash(const GHash&);
50   GHash &operator=(const GHash&);
51   GFreeProc* fFreeProc; //procedure to free item data
52 protected:
53 public:
DefaultFreeProc(pointer item)54   static void DefaultFreeProc(pointer item) {
55       delete (OBJ*)item;
56       }
57 public:
58   GHash(GFreeProc* freeProc); // constructs of an empty hash
59   GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
setFreeItem(GFreeProc * freeProc)60   void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
setFreeItem(bool doFree)61   void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
Capacity() const62   int Capacity() const { return fCapacity; } // table's size, including the empty slots.
63   void Resize(int m);  // Resize the table to the given size.
Count() const64   int Count() const { return fCount; }// the total number of entries in the table.
65   // Insert a new entry into the table given key and mark.
66   // If there is already an entry with that key, leave it unchanged,
67   const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
68   //same as Add, but the key pointer is stored directly, no string duplicate
69   //is made (shared-key-Add)
70   const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
71 
72   // Replace data at key, if the entry's mark is less than
73   // or equal to the given mark.  If there was no existing entry,
74   // a new entry is inserted with the given mark.
75   OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
76   // Remove a given key and its data
77   OBJ* Remove(const char* ky);
78   // Find data OBJ* given key.
79   OBJ* Find(const char* ky, char** keyptr=NULL);
80   bool hasKey(const char* ky);
getLastKey()81   char* getLastKey() { return lastkeyptr; }
operator [](const char * ky)82   OBJ* operator[](const char* ky) { return Find(ky); }
83   void startIterate(); //iterator-like initialization
84   char* NextKey(); //returns next valid key in the table (NULL if no more)
85   OBJ* NextData(); //returns next valid hash[].data
86   OBJ* NextData(char*& nextkey); //returns next valid hash[].data
87                                 //or NULL if no more
88                                 //nextkey is SET to the corresponding key
NextEntry()89   GHashEntry* NextEntry() { //returns a pointer to a GHashEntry
90   	 register int pos=fCurrentEntry;
91   	 while (pos<fCapacity && (hash[pos].hash)<0) pos++;
92   	 if (pos==fCapacity) {
93   	                 fCurrentEntry=fCapacity;
94   	                 return NULL;
95   	                 }
96   	              else {
97   	                 fCurrentEntry=pos+1;
98   	                 return &hash[pos];
99   	                 }
100   }
101   /// Clear all entries
102   void Clear();
103 
104   /// Destructor
105   virtual ~GHash();
106   };
107 //
108 //======================== method definitions ========================
109 //
110 /*
111   Notes:
112   - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
113      GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
114   - Since the algorithm doubles the table size when exceeding MAX_LOAD,
115     it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
116     otherwise, the algorithm might hip-hop between halving and doubling,
117     which would be quite expensive!!
118   - Not many people seem to know that hash tables don't have to be prime
119     numbers; in fact, a table size of 2**n and odd probe distance are very
120     easy to arrange, and this works just as well!
121   - We store the hash key, so that 99.999% of the time we can compare hash numbers;
122     only when hash numbers match do we need to compare keys.
123     Thus, with a good hash function, the fCount of calls to strcmp() should be
124     roughly the same as the fCount of successful lookups.
125   - The hash table should NEVER get full, or stuff will loop forever!!
126 */
127 
128 // Initial table size (MUST be power of 2)
129 #define DEF_HASH_SIZE      32
130 // Maximum hash table load factor (%)
131 #define MAX_LOAD           80
132 // Minimum hash table load factor (%)
133 #define MIN_LOAD           10
134 // Probe Position [0..n-1]
135 #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
136 // Probe Distance [1..n-1]
137 #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
138 
139 #define FREEDATA (fFreeProc!=NULL)
140 
141 /*******************************************************************************/
142 // Construct empty hash
GHash(GFreeProc * freeProc)143 template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
144   GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
145   fCurrentEntry=-1;
146   fFreeProc=freeProc;
147   lastkeyptr=NULL;
148   for (uint i=0; i<DEF_HASH_SIZE; i++)
149          hash[i].hash=-1; //this will be an indicator for 'empty' entries
150   fCapacity=DEF_HASH_SIZE;
151   fCount=0;
152   }
153 
GHash(bool doFree)154 template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
155   GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
156   fCurrentEntry=-1;
157   lastkeyptr=NULL;
158   fFreeProc = (doFree)?&DefaultFreeProc : NULL;
159   for (uint i=0; i<DEF_HASH_SIZE; i++)
160          hash[i].hash=-1; //this will be an indicator for 'empty' entries
161   fCapacity=DEF_HASH_SIZE;
162   fCount=0;
163   }
164 
165 
166 // Resize table
Resize(int m)167 template <class OBJ> void GHash<OBJ>::Resize(int m){
168   register int i,n,p,x,h;
169   GHashEntry *k;
170   GASSERT(fCount<=fCapacity);
171   if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
172   n=fCapacity;
173   while((n>>2)>m) n>>=1;            // Shrink until n/4 <= m
174   while((n>>1)<m) n<<=1;            // Grow until m <= n/2
175   GASSERT(m<=(n>>1));
176   GASSERT(DEF_HASH_SIZE<=n);
177   if(n!=fCapacity){
178     GASSERT(m<=n);
179     GMALLOC(k, sizeof(GHashEntry)*n);
180     for(i=0; i<n; i++) k[i].hash=-1;
181     for(i=0; i<fCapacity; i++){
182       h=hash[i].hash;
183       if(0<=h){
184         p=HASH1(h,n);
185         GASSERT(0<=p && p<n);
186         x=HASH2(h,n);
187         GASSERT(1<=x && x<n);
188         while(k[p].hash!=-1) p=(p+x)%n;
189         GASSERT((k[p].hash)<0);
190         k[p]=hash[i];
191         }
192       }
193     GFREE(hash);
194     hash=k;
195     fCapacity=n;
196     }
197   }
198 
199 // add a new entry, or update it if it already exists
Add(const char * ky,const OBJ * pdata,bool mrk)200 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
201                       const OBJ* pdata,bool mrk){
202   register int p,i,x,h,n;
203   if(!ky) GError("GHash::insert: NULL key argument.\n");
204   GASSERT(fCount<fCapacity);
205   h=strhash(ky);
206   GASSERT(0<=h);
207   p=HASH1(h,fCapacity);
208   GASSERT(0<=p && p<fCapacity);
209   x=HASH2(h,fCapacity);
210   GASSERT(1<=x && x<fCapacity);
211   i=-1;
212   n=fCapacity;
213   while(n && hash[p].hash!=-1){
214     if ((i==-1)&&(hash[p].hash==-2)) i=p;
215     if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
216       //replace hash data for this key!
217       lastkeyptr=hash[p].key;
218       hash[p].data = (void*) pdata;
219       return (OBJ*)hash[p].data;
220       }
221     p=(p+x)%fCapacity;
222     n--;
223     }
224   if(i==-1) i=p;
225   GTRACE(("GHash::insert: key=\"%s\"\n",ky));
226   //GMessage("GHash::insert: key=\"%s\"\n",ky);
227   GASSERT(0<=i && i<fCapacity);
228   GASSERT((hash[i].hash)<0);
229   hash[i].hash=h;
230   hash[i].mark=mrk;
231   hash[i].key=Gstrdup(ky);
232   hash[i].keyalloc=true;
233   lastkeyptr=hash[i].key;
234   hash[i].data= (void*) pdata;
235   fCount++;
236   if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
237   GASSERT(fCount<fCapacity);
238   return pdata;
239   }
240 
shkAdd(const char * ky,const OBJ * pdata,bool mrk)241 template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
242                       const OBJ* pdata,bool mrk){
243   register int p,i,x,h,n;
244   if(!ky) GError("GHash::insert: NULL key argument.\n");
245   GASSERT(fCount<fCapacity);
246   h=strhash(ky);
247   GASSERT(0<=h);
248   p=HASH1(h,fCapacity);
249   GASSERT(0<=p && p<fCapacity);
250   x=HASH2(h,fCapacity);
251   GASSERT(1<=x && x<fCapacity);
252   i=-1;
253   n=fCapacity;
254   while(n && hash[p].hash!=-1){
255     if((i==-1)&&(hash[p].hash==-2)) i=p;
256     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
257       //replace hash data for this key!
258       lastkeyptr=hash[p].key;
259       hash[p].data = (void*) pdata;
260       return (OBJ*)hash[p].data;
261       }
262     p=(p+x)%fCapacity;
263     n--;
264     }
265   if(i==-1) i=p;
266   GTRACE(("GHash::insert: key=\"%s\"\n",ky));
267   //GMessage("GHash::insert: key=\"%s\"\n",ky);
268   GASSERT(0<=i && i<fCapacity);
269   GASSERT((hash[i].hash)<0);
270   hash[i].hash=h;
271   hash[i].mark=mrk;
272   hash[i].key=(char *)ky;
273   lastkeyptr=hash[i].key;
274   hash[i].keyalloc=false;
275   hash[i].data= (void*) pdata;
276   fCount++;
277   if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
278   GASSERT(fCount<fCapacity);
279   return pdata;
280   }
281 
282 
283 // Add or replace entry
Replace(const char * ky,const OBJ * pdata,bool mrk)284 template <class OBJ>  OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
285   register int p,i,x,h,n;
286   if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
287   GASSERT(fCount<fCapacity);
288   h=strhash(ky);
289   GASSERT(0<=h);
290   p=HASH1(h,fCapacity);
291   GASSERT(0<=p && p<fCapacity);
292   x=HASH2(h,fCapacity);
293   GASSERT(1<=x && x<fCapacity);
294   i=-1;
295   n=fCapacity;
296   while(n && hash[p].hash!=-1){
297     if((i==-1)&&(hash[p].hash==-2)) i=p;
298     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
299       if(hash[p].mark<=mrk){
300         GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
301         if (FREEDATA) (*fFreeProc)(hash[p].data);
302         hash[p].mark=mrk;
303         hash[p].data=pdata;
304         }
305       return hash[p].data;
306       }
307     p=(p+x)%fCapacity;
308     n--;
309     }
310   if(i==-1) i=p;
311   GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
312   GASSERT(0<=i && i<fCapacity);
313   GASSERT((hash[i].hash)<0);
314   hash[i].hash=h;
315   hash[i].mark=mrk;
316   hash[i].key=Gstrdup(ky);
317   hash[i].data=pdata;
318   fCount++;
319   if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
320   GASSERT(fCount<fCapacity);
321   return pdata;
322   }
323 
324 
325 // Remove entry
Remove(const char * ky)326 template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
327   register int p,x,h,n;
328   if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
329   OBJ* removed=NULL;
330   if(0<fCount){
331     h=strhash(ky);
332     GASSERT(0<=h);
333     p=HASH1(h,fCapacity);
334     GASSERT(0<=p && p<fCapacity);
335     x=HASH2(h,fCapacity);
336     GASSERT(1<=x && x<fCapacity);
337     GASSERT(fCount<fCapacity);
338     n=fCapacity;
339     while(n && hash[p].hash!=-1){
340       if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
341         GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
342         hash[p].hash=-2;
343         hash[p].mark=false;
344         if (hash[p].keyalloc) GFREE((hash[p].key));
345         if (FREEDATA) (*fFreeProc)(hash[p].data);
346             else removed=(OBJ*)hash[p].data;
347         hash[p].key=NULL;
348         hash[p].data=NULL;
349         fCount--;
350         if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
351         GASSERT(fCount<fCapacity);
352         return removed;
353         }
354       p=(p+x)%fCapacity;
355       n--;
356       }
357     }
358   return removed;
359   }
360 
361 
362 // Find entry
hasKey(const char * ky)363 template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
364   register int p,x,h,n;
365   if(!ky){ GError("GHash::find: NULL key argument.\n"); }
366   if(0<fCount){
367     h=strhash(ky);
368     GASSERT(0<=h);
369     p=HASH1(h,fCapacity);
370     GASSERT(0<=p && p<fCapacity);
371     x=HASH2(h,fCapacity);
372     GASSERT(1<=x && x<fCapacity);
373     GASSERT(fCount<fCapacity);
374     n=fCapacity;
375     while(n && hash[p].hash!=-1){
376       if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
377         return true;
378         }
379       p=(p+x)%fCapacity;
380       n--;
381       }
382     }
383   return false;
384 }
385 
Find(const char * ky,char ** keyptr)386 template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
387   register int p,x,h,n;
388   if(!ky){ GError("GHash::find: NULL key argument.\n"); }
389   if(0<fCount){
390     h=strhash(ky);
391     GASSERT(0<=h);
392     p=HASH1(h,fCapacity);
393     GASSERT(0<=p && p<fCapacity);
394     x=HASH2(h,fCapacity);
395     GASSERT(1<=x && x<fCapacity);
396     GASSERT(fCount<fCapacity);
397     n=fCapacity;
398     while(n && hash[p].hash!=-1){
399       if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
400         if (keyptr!=NULL) *keyptr = hash[p].key;
401         return (OBJ*)hash[p].data;
402         }
403       p=(p+x)%fCapacity;
404       n--;
405       }
406     }
407   return NULL;
408   }
409 
410 
startIterate()411 template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
412  fCurrentEntry=0;
413 }
414 
NextKey()415 template <class OBJ> char* GHash<OBJ>::NextKey() {
416  register int pos=fCurrentEntry;
417  while (pos<fCapacity && (hash[pos].hash)<0) pos++;
418  if (pos==fCapacity) {
419                  fCurrentEntry=fCapacity;
420                  return NULL;
421                  }
422               else {
423                  fCurrentEntry=pos+1;
424                  return hash[pos].key;
425                  }
426 }
427 
NextData()428 template <class OBJ> OBJ* GHash<OBJ>::NextData() {
429  register int pos=fCurrentEntry;
430  while (pos<fCapacity && (hash[pos].hash)<0) pos++;
431  if (pos==fCapacity) {
432                  fCurrentEntry=fCapacity;
433                  return NULL;
434                  }
435               else {
436                  fCurrentEntry=pos+1;
437                  return (OBJ*)hash[pos].data;
438                  }
439 
440 }
441 
NextData(char * & nextkey)442 template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
443  register int pos=fCurrentEntry;
444  while (pos<fCapacity && (hash[pos].hash)<0) pos++;
445  if (pos==fCapacity) {
446                  fCurrentEntry=fCapacity;
447                  nextkey=NULL;
448                  return NULL;
449                  }
450               else {
451                  fCurrentEntry=pos+1;
452                  nextkey=hash[pos].key;
453                  return (OBJ*)hash[pos].data;
454                  }
455 
456 }
457 
458 
459 // Get first non-empty entry
First() const460 template <class OBJ> int GHash<OBJ>::First() const {
461   register int pos=0;
462   while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
463   GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
464   return pos;
465   }
466 
467 // Get last non-empty entry
Last() const468 template <class OBJ> int GHash<OBJ>::Last() const {
469   register int pos=fCapacity-1;
470   while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
471   GASSERT(pos<0 || 0<=hash[pos].hash);
472   return pos;
473   }
474 
475 
476 // Find next valid entry
Next(int pos) const477 template <class OBJ> int GHash<OBJ>::Next(int pos) const {
478   GASSERT(0<=pos && pos<fCapacity);
479   while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
480   GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
481   return pos;
482   }
483 
484 
485 // Find previous valid entry
Prev(int pos) const486 template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
487   GASSERT(0<=pos && pos<fCapacity);
488   while(--pos >= 0){ if(0<=hash[pos].hash) break; }
489   GASSERT(pos<0 || 0<=hash[pos].hash);
490   return pos;
491   }
492 
493 
494 // Remove all
Clear()495 template <class OBJ> void GHash<OBJ>::Clear(){
496   register int i;
497   for(i=0; i<fCapacity; i++){
498     if(hash[i].hash>=0){
499       if (hash[i].keyalloc) GFREE((hash[i].key));
500       if (FREEDATA)
501             (*fFreeProc)(hash[i].data);
502       }
503     }
504   GFREE(hash);
505   GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
506   //reinitialize it
507   for (i=0; i<DEF_HASH_SIZE; i++)
508          hash[i].hash=-1; //this will be an indicator for 'empty' entries
509   fCapacity=DEF_HASH_SIZE;
510   fCount=0;
511   }
512 
513 
514 // Save data
515 /*
516 void GHash::Save(Stream& store) const {
517   Object::save(store);
518   store << fCapacity;
519   store << fCount;
520   for(int i=0; i<fCapacity; i++){
521     store << hash[i].hash;
522     if(hash[i].hash>=0){
523       uint len=strlen(hash[i].key);
524       store << len;
525       store << hash[i].mark;
526       store.save(hash[i].key,len);
527       }
528     }
529   }
530 
531 
532 // Load data
533 void GHash::Load(Stream& store){
534   Object::load(store);
535   store >> fCapacity;
536   store >> fCount;
537   for(int i=0; i<fCapacity; i++){
538     store >> hash[i].hash;
539     if(hash[i].hash>=0){
540       uint len;
541       store >> len;
542       store >> hash[i].mark;
543       GMALLOC(hash[i].key,len+1);
544       store.load(hash[i].key,len);
545       hash[i].key[len]='\0';
546       }
547     }
548   }
549 */
550 
551 // Destroy table
~GHash()552 template <class OBJ> GHash<OBJ>::~GHash(){
553   register int i;
554   for(i=0; i<fCapacity; i++){
555     if(hash[i].hash>=0){
556       if (hash[i].keyalloc) GFREE((hash[i].key));
557       if (FREEDATA) (*fFreeProc)(hash[i].data);
558       }
559     }
560   GFREE(hash);
561   }
562 
563 #endif
564