1 /********************************************************************************
2 *                  Hash table class template (char* based)                               *
3 *********************************************************************************/
4 
5 #ifndef GHash_HH
6 #define GHash_HH
7 #include "GBase.h"
8 /**
9 * This class maintains a fast-access hash table of entities
10 * indexed by a character string.
11 * It is typically used to map strings to pointers; however, overloading
12 * the createData() and deleteData() members allows any type of data to
13 * be indexed by strings.
14 */
15 typedef struct {
16      char*   key;              // Key string
17      bool    keyalloc;         //shared key flag (to not free the key chars)
18      int     hash;             // Hash value of key
19      pointer data;              // Data
20      bool    mark;             // Entry is marked
21      } GHashEntry;
22 
23 template <class OBJ> class  GHash {
24  protected:
25   GHashEntry* hash;         // Hash
26   int         fCapacity;     // table size
27   int         fCount;        // number of valid entries
28   int  fCurrentEntry;
29   char* lastkeyptr; //pointer to last key string added
30     //---------- Raw data retrieval (including empty entries
31   // Return key at position pos.
Key(uint pos) const32   const char* Key(uint pos) const { return hash[pos].key; }
33   // return data OBJ* at given position
Data(uint pos) const34   OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
35   // Return mark flag of entry at position pos.
Mark(uint pos) const36   bool Mark(uint pos) const { return hash[pos].mark; }
37   // Return position of first filled slot, or >= fCapacity
38   int First() const;
39   // Return position of last filled slot or -1
40   int Last() const;
41   // Return position of next filled slot in hash table
42   // or a value greater than or equal to fCapacity if no filled
43   // slot was found
44   int Next(int pos) const;
45   //Return position of previous filled slot in hash table
46   //or a -1 if no filled slot was found
47   int Prev(int pos) const;
48 
49 private:
50   GHash(const GHash&);
51   GHash &operator=(const GHash&);
52   GFreeProc* fFreeProc; //procedure to free item data
53 protected:
54 public:
DefaultFreeProc(pointer item)55   static void DefaultFreeProc(pointer item) {
56       delete (OBJ*)item;
57       item=NULL;
58       }
59 public:
60   GHash(GFreeProc* freeProc); // constructs of an empty hash
61   GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
setFreeItem(GFreeProc * freeProc)62   void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
setFreeItem(bool doFree)63   void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
Capacity() const64   int Capacity() const { return fCapacity; } // table's size, including the empty slots.
65   void Resize(int m);  // Resize the table to the given size.
Count() const66   int Count() const { return fCount; }// the total number of entries in the table.
67   // Insert a new entry into the table given key and mark.
68   // If there is already an entry with that key, leave it unchanged,
69   const OBJ* Add(const char* ky, const OBJ* ptr, bool mrk=false);
70   //same as Add, but the key pointer is stored directly, no string duplicate
71   //is made (shared-key-Add)
72   const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
73 
74   // Replace data at key, if the entry's mark is less than
75   // or equal to the given mark.  If there was no existing entry,
76   // a new entry is inserted with the given mark.
77   OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
78   // Remove a given key and its data
79   OBJ* Remove(const char* ky);
80   // Find data OBJ* given key.
81   OBJ* Find(const char* ky);
82   bool hasKey(const char* ky);
getLastKey()83   const char* getLastKey() { return lastkeyptr; }
operator [](const char * ky)84   OBJ* operator[](const char* ky) { return Find(ky); }
85   void startIterate(); //iterator-like initialization
86   char* NextKey(); //returns next valid key in the table (NULL if no more)
87   OBJ* NextData(); //returns next valid hash[].data
88   OBJ* NextData(char*& nextkey); //returns next valid hash[].data
89                                 //or NULL if no more
90                                 //nextkey is SET to the corresponding key
91   GHashEntry* NextEntry(); //returns a pointer to a GHashEntry
92 
93   /// Clear all entries
94   void Clear();
95 
96   /// Destructor
97   virtual ~GHash();
98   };
99 //
100 //======================== method definitions ========================
101 //
102 /*
103   Notes:
104   - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
105      GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
106   - Since the algorithm doubles the table size when exceeding MAX_LOAD,
107     it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
108     otherwise, the algorithm might hip-hop between halving and doubling,
109     which would be quite expensive!!
110   - Not many people seem to know that hash tables don't have to be prime
111     numbers; in fact, a table size of 2**n and odd probe distance are very
112     easy to arrange, and this works just as well!
113   - We store the hash key, so that 99.999% of the time we can compare hash numbers;
114     only when hash numbers match do we need to compare keys.
115     Thus, with a good hash function, the fCount of calls to strcmp() should be
116     roughly the same as the fCount of successful lookups.
117   - The hash table should NEVER get full, or stuff will loop forever!!
118 */
119 
120 // Initial table size (MUST be power of 2)
121 #define DEF_HASH_SIZE      32
122 // Maximum hash table load factor (%)
123 #define MAX_LOAD           80
124 // Minimum hash table load factor (%)
125 #define MIN_LOAD           10
126 // Probe Position [0..n-1]
127 #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
128 // Probe Distance [1..n-1]
129 #define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
130 
131 #define FREEDATA (fFreeProc!=NULL)
132 
133 /*******************************************************************************/
134 // Construct empty hash
GHash(GFreeProc * freeProc)135 template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
136   GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
137   fFreeProc=freeProc;
138   for (uint i=0; i<DEF_HASH_SIZE; i++)
139          hash[i].hash=-1; //this will be an indicator for 'empty' entries
140   fCapacity=DEF_HASH_SIZE;
141   fCount=0;
142   }
143 
GHash(bool doFree)144 template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
145   GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
146   fFreeProc = (doFree)?&DefaultFreeProc : NULL;
147   for (uint i=0; i<DEF_HASH_SIZE; i++)
148          hash[i].hash=-1; //this will be an indicator for 'empty' entries
149   fCapacity=DEF_HASH_SIZE;
150   fCount=0;
151   }
152 
153 
154 // Resize table
Resize(int m)155 template <class OBJ> void GHash<OBJ>::Resize(int m){
156   register int i,n,p,x,h;
157   GHashEntry *k;
158   GASSERT(fCount<=fCapacity);
159   if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
160   n=fCapacity;
161   while((n>>2)>m) n>>=1;            // Shrink until n/4 <= m
162   while((n>>1)<m) n<<=1;            // Grow until m <= n/2
163   GASSERT(m<=(n>>1));
164   GASSERT(DEF_HASH_SIZE<=n);
165   if(n!=fCapacity){
166     GASSERT(m<=n);
167     GMALLOC(k, sizeof(GHashEntry)*n);
168     for(i=0; i<n; i++) k[i].hash=-1;
169     for(i=0; i<fCapacity; i++){
170       h=hash[i].hash;
171       if(0<=h){
172         p=HASH1(h,n);
173         GASSERT(0<=p && p<n);
174         x=HASH2(h,n);
175         GASSERT(1<=x && x<n);
176         while(k[p].hash!=-1) p=(p+x)%n;
177         GASSERT(k[p].hash<0);
178         k[p]=hash[i];
179         }
180       }
181     GFREE(hash);
182     hash=k;
183     fCapacity=n;
184     }
185   }
186 
187 // add a new entry, leave it alone if already existing
Add(const char * ky,const OBJ * pdata,bool mrk)188 template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
189                       const OBJ* pdata,bool mrk){
190   register int p,i,x,h,n;
191   if(!ky) GError("GHash::insert: NULL key argument.\n");
192   GASSERT(fCount<fCapacity);
193   h=strhash(ky);
194   GASSERT(0<=h);
195   p=HASH1(h,fCapacity);
196   GASSERT(0<=p && p<fCapacity);
197   x=HASH2(h,fCapacity);
198   GASSERT(1<=x && x<fCapacity);
199   i=-1;
200   n=fCapacity;
201   while(n && hash[p].hash!=-1){
202     if ((i==-1)&&(hash[p].hash==-2)) i=p;
203     if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
204       //replace hash data for this key!
205       lastkeyptr=hash[p].key;
206       hash[p].data = (void*) pdata;
207       return (OBJ*)hash[p].data;
208       }
209     p=(p+x)%fCapacity;
210     n--;
211     }
212   if(i==-1) i=p;
213   GTRACE(("GHash::insert: key=\"%s\"\n",ky));
214   //GMessage("GHash::insert: key=\"%s\"\n",ky);
215   GASSERT(0<=i && i<fCapacity);
216   GASSERT(hash[i].hash<0);
217   hash[i].hash=h;
218   hash[i].mark=mrk;
219   hash[i].key=Gstrdup(ky);
220   hash[i].keyalloc=true;
221   lastkeyptr=hash[i].key;
222   hash[i].data= (void*) pdata;
223   fCount++;
224   if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
225   GASSERT(fCount<fCapacity);
226   return pdata;
227   }
228 
shkAdd(const char * ky,const OBJ * pdata,bool mrk)229 template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
230                       const OBJ* pdata,bool mrk){
231   register int p,i,x,h,n;
232   if(!ky) GError("GHash::insert: NULL key argument.\n");
233   GASSERT(fCount<fCapacity);
234   h=strhash(ky);
235   GASSERT(0<=h);
236   p=HASH1(h,fCapacity);
237   GASSERT(0<=p && p<fCapacity);
238   x=HASH2(h,fCapacity);
239   GASSERT(1<=x && x<fCapacity);
240   i=-1;
241   n=fCapacity;
242   while(n && hash[p].hash!=-1){
243     if((i==-1)&&(hash[p].hash==-2)) i=p;
244     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
245       //replace hash data for this key!
246       lastkeyptr=hash[p].key;
247       hash[p].data = (void*) pdata;
248       return (OBJ*)hash[p].data;
249       }
250     p=(p+x)%fCapacity;
251     n--;
252     }
253   if(i==-1) i=p;
254   GTRACE(("GHash::insert: key=\"%s\"\n",ky));
255   //GMessage("GHash::insert: key=\"%s\"\n",ky);
256   GASSERT(0<=i && i<fCapacity);
257   GASSERT(hash[i].hash<0);
258   hash[i].hash=h;
259   hash[i].mark=mrk;
260   hash[i].key=(char *)ky;
261   lastkeyptr=hash[i].key;
262   hash[i].keyalloc=false;
263   hash[i].data= (void*) pdata;
264   fCount++;
265   if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
266   GASSERT(fCount<fCapacity);
267   return pdata;
268   }
269 
270 
271 // Add or replace entry
Replace(const char * ky,const OBJ * pdata,bool mrk)272 template <class OBJ>  OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
273   register int p,i,x,h,n;
274   if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
275   GASSERT(fCount<fCapacity);
276   h=strhash(ky);
277   GASSERT(0<=h);
278   p=HASH1(h,fCapacity);
279   GASSERT(0<=p && p<fCapacity);
280   x=HASH2(h,fCapacity);
281   GASSERT(1<=x && x<fCapacity);
282   i=-1;
283   n=fCapacity;
284   while(n && hash[p].hash!=-1){
285     if((i==-1)&&(hash[p].hash==-2)) i=p;
286     if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
287       if(hash[p].mark<=mrk){
288         GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
289         if (FREEDATA) (*fFreeProc)(hash[p].data);
290         hash[p].mark=mrk;
291         hash[p].data=pdata;
292         }
293       return hash[p].data;
294       }
295     p=(p+x)%fCapacity;
296     n--;
297     }
298   if(i==-1) i=p;
299   GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
300   GASSERT(0<=i && i<fCapacity);
301   GASSERT(hash[i].hash<0);
302   hash[i].hash=h;
303   hash[i].mark=mrk;
304   hash[i].key=Gstrdup(ky);
305   hash[i].data=pdata;
306   fCount++;
307   if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
308   GASSERT(fCount<fCapacity);
309   return pdata;
310   }
311 
312 
313 // Remove entry
Remove(const char * ky)314 template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
315   register int p,x,h,n;
316   if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
317   if(0<fCount){
318     h=strhash(ky);
319     GASSERT(0<=h);
320     p=HASH1(h,fCapacity);
321     GASSERT(0<=p && p<fCapacity);
322     x=HASH2(h,fCapacity);
323     GASSERT(1<=x && x<fCapacity);
324     GASSERT(fCount<fCapacity);
325     n=fCapacity;
326     while(n && hash[p].hash!=-1){
327       if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
328         GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
329         hash[p].hash=-2;
330         hash[p].mark=false;
331         if (hash[p].keyalloc) GFREE((hash[p].key));
332         if (FREEDATA) (*fFreeProc)(hash[p].data);
333         hash[p].key=NULL;
334         hash[p].data=NULL;
335         fCount--;
336         if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
337         GASSERT(fCount<fCapacity);
338         return NULL;
339         }
340       p=(p+x)%fCapacity;
341       n--;
342       }
343     }
344   return NULL;
345   }
346 
347 
348 // Find entry
hasKey(const char * ky)349 template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
350   register int p,x,h,n;
351   if(!ky){ GError("GHash::find: NULL key argument.\n"); }
352   if(0<fCount){
353     h=strhash(ky);
354     GASSERT(0<=h);
355     p=HASH1(h,fCapacity);
356     GASSERT(0<=p && p<fCapacity);
357     x=HASH2(h,fCapacity);
358     GASSERT(1<=x && x<fCapacity);
359     GASSERT(fCount<fCapacity);
360     n=fCapacity;
361     while(n && hash[p].hash!=-1){
362       if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
363         return true;
364         }
365       p=(p+x)%fCapacity;
366       n--;
367       }
368     }
369   return false;
370 }
371 
Find(const char * ky)372 template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky){
373   register int p,x,h,n;
374   if(!ky){ GError("GHash::find: NULL key argument.\n"); }
375   if(0<fCount){
376     h=strhash(ky);
377     GASSERT(0<=h);
378     p=HASH1(h,fCapacity);
379     GASSERT(0<=p && p<fCapacity);
380     x=HASH2(h,fCapacity);
381     GASSERT(1<=x && x<fCapacity);
382     GASSERT(fCount<fCapacity);
383     n=fCapacity;
384     while(n && hash[p].hash!=-1){
385       if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
386         return (OBJ*)hash[p].data;
387         }
388       p=(p+x)%fCapacity;
389       n--;
390       }
391     }
392   return NULL;
393   }
394 
395 
startIterate()396 template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
397  fCurrentEntry=0;
398 }
399 
NextKey()400 template <class OBJ> char* GHash<OBJ>::NextKey() {
401  register int pos=fCurrentEntry;
402  while (pos<fCapacity && hash[pos].hash<0) pos++;
403  if (pos==fCapacity) {
404                  fCurrentEntry=fCapacity;
405                  return NULL;
406                  }
407               else {
408                  fCurrentEntry=pos+1;
409                  return hash[pos].key;
410                  }
411 }
412 
NextData()413 template <class OBJ> OBJ* GHash<OBJ>::NextData() {
414  register int pos=fCurrentEntry;
415  while (pos<fCapacity && hash[pos].hash<0) pos++;
416  if (pos==fCapacity) {
417                  fCurrentEntry=fCapacity;
418                  return NULL;
419                  }
420               else {
421                  fCurrentEntry=pos+1;
422                  return (OBJ*)hash[pos].data;
423                  }
424 
425 }
426 
NextData(char * & nextkey)427 template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
428  register int pos=fCurrentEntry;
429  while (pos<fCapacity && hash[pos].hash<0) pos++;
430  if (pos==fCapacity) {
431                  fCurrentEntry=fCapacity;
432                  nextkey=NULL;
433                  return NULL;
434                  }
435               else {
436                  fCurrentEntry=pos+1;
437                  nextkey=hash[pos].key;
438                  return (OBJ*)hash[pos].data;
439                  }
440 
441 }
442 
NextEntry()443 template <class OBJ> GHashEntry* GHash<OBJ>::NextEntry() {
444  register int pos=fCurrentEntry;
445  while (pos<fCapacity && hash[pos].hash<0) pos++;
446  if (pos==fCapacity) {
447                  fCurrentEntry=fCapacity;
448                  return NULL;
449                  }
450               else {
451                  fCurrentEntry=pos+1;
452                  return &hash[pos];
453                  }
454 }
455 
456 
457 // Get first non-empty entry
First() const458 template <class OBJ> int GHash<OBJ>::First() const {
459   register int pos=0;
460   while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
461   GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
462   return pos;
463   }
464 
465 // Get last non-empty entry
Last() const466 template <class OBJ> int GHash<OBJ>::Last() const {
467   register int pos=fCapacity-1;
468   while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
469   GASSERT(pos<0 || 0<=hash[pos].hash);
470   return pos;
471   }
472 
473 
474 // Find next valid entry
Next(int pos) const475 template <class OBJ> int GHash<OBJ>::Next(int pos) const {
476   GASSERT(0<=pos && pos<fCapacity);
477   while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
478   GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
479   return pos;
480   }
481 
482 
483 // Find previous valid entry
Prev(int pos) const484 template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
485   GASSERT(0<=pos && pos<fCapacity);
486   while(--pos >= 0){ if(0<=hash[pos].hash) break; }
487   GASSERT(pos<0 || 0<=hash[pos].hash);
488   return pos;
489   }
490 
491 
492 // Remove all
Clear()493 template <class OBJ> void GHash<OBJ>::Clear(){
494   register int i;
495   for(i=0; i<fCapacity; i++){
496     if(hash[i].hash>=0){
497       if (hash[i].keyalloc) GFREE((hash[i].key));
498       if (FREEDATA)
499             (*fFreeProc)(hash[i].data);
500       }
501     }
502   GFREE(hash);
503   GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
504   //reinitialize it
505   for (i=0; i<DEF_HASH_SIZE; i++)
506          hash[i].hash=-1; //this will be an indicator for 'empty' entries
507   fCapacity=DEF_HASH_SIZE;
508   fCount=0;
509   }
510 
511 
512 // Save data
513 /*
514 void GHash::Save(Stream& store) const {
515   Object::save(store);
516   store << fCapacity;
517   store << fCount;
518   for(int i=0; i<fCapacity; i++){
519     store << hash[i].hash;
520     if(hash[i].hash>=0){
521       uint len=strlen(hash[i].key);
522       store << len;
523       store << hash[i].mark;
524       store.save(hash[i].key,len);
525       }
526     }
527   }
528 
529 
530 // Load data
531 void GHash::Load(Stream& store){
532   Object::load(store);
533   store >> fCapacity;
534   store >> fCount;
535   for(int i=0; i<fCapacity; i++){
536     store >> hash[i].hash;
537     if(hash[i].hash>=0){
538       uint len;
539       store >> len;
540       store >> hash[i].mark;
541       GMALLOC(hash[i].key,len+1);
542       store.load(hash[i].key,len);
543       hash[i].key[len]='\0';
544       }
545     }
546   }
547 */
548 
549 // Destroy table
~GHash()550 template <class OBJ> GHash<OBJ>::~GHash(){
551   register int i;
552   for(i=0; i<fCapacity; i++){
553     if(hash[i].hash>=0){
554       if (hash[i].keyalloc) GFREE((hash[i].key));
555       if (FREEDATA) (*fFreeProc)(hash[i].data);
556       }
557     }
558   GFREE(hash);
559   }
560 
561 #endif
562