1 /*
2  *  Copyright (C) 2010-2012  Regents of the University of Michigan
3  *
4  *   This program is free software: you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation, either version 3 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *   GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "StringHash.h"
19 #include "InputFile.h"
20 #include "Error.h"
21 
StringHash(int startsize)22 StringHash::StringHash(int startsize)
23     : StringHashBase()
24 {
25     count = 0;
26     size  = startsize;
27     mask  = startsize - 1;
28 
29     // In this implementation, the size of hash tables must be a power of two
30     if (startsize & mask)
31         error("StringHash: Hash table size must be a power of two.\n");
32 
33     strings = new String * [size];
34     objects = new void * [size];
35     keys    = new unsigned int [size];
36 
37     for (unsigned int i = 0; i < size; i++)
38     {
39         strings[i] = NULL;
40         objects[i] = NULL;
41     }
42 };
43 
~StringHash()44 StringHash::~StringHash()
45 {
46     for (unsigned int i = 0; i < size; i++)
47         if (strings[i] != NULL)
48             delete strings[i];
49 
50     if(strings) delete [] strings;
51     if(objects) delete [] objects;
52     if(keys) delete [] keys;
53 }
54 
Clear()55 void StringHash::Clear()
56 {
57     for (unsigned int i = 0; i < size; i++)
58         if (strings[i] != NULL)
59         {
60             delete strings[i];
61             strings[i] = NULL;
62         }
63 
64     count = 0;
65 
66     if (size > 256)
67         SetSize(256);
68 }
69 
SetSize(int newsize)70 void StringHash::SetSize(int newsize)
71 {
72     int newmask = newsize - 1;
73 
74     String   ** newstrings = new String * [newsize];
75     void     ** newobjects = new void * [newsize];
76     unsigned int * newkeys = new unsigned int [newsize];
77 
78     for (int i = 0; i < newsize; i++)
79     {
80         newstrings[i] = NULL;
81         newobjects[i] = NULL;
82     }
83 
84     if (count)
85         for (unsigned int i = 0; i < size; i++)
86             if (strings[i] != NULL)
87             {
88                 unsigned int key = keys[i];
89                 unsigned int h   = key & newmask;
90 
91                 while (newstrings[h] != NULL &&
92                         (newkeys[h] != key ||
93                          (!stringsEqual(*(newstrings[h]), *(strings[i])))))
94                     h = (h + 1) & newmask;
95 
96                 newkeys[h] = key;
97                 newstrings[h] = strings[i];
98                 newobjects[h] = objects[i];
99             }
100 
101     if(strings) delete [] strings;
102     if(objects) delete [] objects;
103     if(keys) delete [] keys;
104 
105     strings = newstrings;
106     objects = newobjects;
107     keys = newkeys;
108     size = newsize;
109     mask = newmask;
110 }
111 
Add(const String & string,void * object)112 int StringHash::Add(const String & string, void * object)
113 {
114     unsigned int key = getKey(string);
115     unsigned int h   = Iterate(key, string);
116 
117     if (strings[h] == NULL)
118         Insert(h, key, string);
119 
120     objects[h] = object;
121 
122     if (count * 2 > size)
123     {
124         Grow();
125         return Iterate(key, string);
126     }
127 
128     return h;
129 }
130 
Find(const String & string,void * (* create_object)())131 int StringHash::Find(const String & string,  void *(*create_object)())
132 {
133     unsigned int key = getKey(string);
134     unsigned int h   = Iterate(key, string);
135 
136     if (strings[h] == NULL && create_object == NULL)
137         return -1;
138 
139     if (strings[h] == NULL && create_object != NULL)
140     {
141         Insert(h, key, string);
142         objects[h] = create_object();
143 
144         if (count * 2 > size)
145         {
146             Grow();
147             return Iterate(key, string);
148         }
149     }
150 
151     return h;
152 }
153 
Find(const String & string) const154 int StringHash::Find(const String & string) const
155 {
156     unsigned int key = getKey(string);
157     unsigned int h   = Iterate(key, string);
158 
159     if (strings[h] == NULL)
160         return -1;
161 
162     return h;
163 }
CreateHash()164 void * StringHash::CreateHash()
165 {
166     return (void *) new StringHash();
167 }
168 
Delete(unsigned int index)169 void StringHash::Delete(unsigned int index)
170 {
171     if (index >= size || strings[index] == NULL)
172         return;
173 
174     delete strings[index];
175     strings[index] = NULL;
176     count--;
177 
178     if (count * 8 < size && size > 32)
179         Shrink();
180     else
181     {
182         // rehash the next strings until we find empty slot
183         index = (index + 1) & mask;
184 
185         while (strings[index] != NULL)
186         {
187             if ((keys[index] & mask) != index)
188             {
189                 unsigned int h = Iterate(keys[index], *strings[index]);
190 
191                 if (h != (unsigned int) index)
192                 {
193                     keys[h] = keys[index];
194                     strings[h] = strings[index];
195                     objects[h] = objects[index];
196 
197                     strings[index] = NULL;
198                     objects[index] = NULL;
199                 }
200             }
201 
202             index = (index + 1) & mask;
203         }
204     }
205 }
206 
ReadLinesFromFile(const char * filename)207 void StringHash::ReadLinesFromFile(const char * filename)
208 {
209     IFILE f = ifopen(filename, "rb");
210     if (f == NULL) return;
211     ReadLinesFromFile(f);
212     ifclose(f);
213 }
214 
ReadLinesFromFile(FILE * f)215 void StringHash::ReadLinesFromFile(FILE * f)
216 {
217     String buffer;
218 
219     while (!feof(f))
220     {
221         buffer.ReadLine(f);
222         Add(buffer.Trim());
223     }
224 }
225 
ReadLinesFromFile(IFILE & f)226 void StringHash::ReadLinesFromFile(IFILE & f)
227 {
228     String buffer;
229 
230     while (!ifeof(f))
231     {
232         buffer.ReadLine(f);
233         Add(buffer.Trim());
234     }
235 }
236 
237 // StringIntHash implementation
238 
StringIntHash(int startsize)239 StringIntHash::StringIntHash(int startsize)
240     : StringHashBase()
241 {
242     count = 0;
243     size  = startsize;
244     mask  = startsize - 1;
245 
246     // In this implementation, the size of hash tables must be a power of two
247     if (startsize & mask)
248         error("StringIntHash: Hash table size must be a power of two.\n");
249 
250     strings  = new String * [size];
251     integers = new int [size];
252     keys     = new unsigned int [size];
253 
254     for (unsigned int i = 0; i < size; i++)
255         strings[i] = NULL;
256 };
257 
~StringIntHash()258 StringIntHash::~StringIntHash()
259 {
260     for (unsigned int i = 0; i < size; i++)
261         if (strings[i] != NULL)
262             delete strings[i];
263 
264     if(strings) delete [] strings;
265     if(integers) delete [] integers;
266     if(keys) delete [] keys;
267 }
268 
SetSize(int newsize)269 void StringIntHash::SetSize(int newsize)
270 {
271     int newmask = newsize - 1;
272 
273     String   ** newstrings = new String * [newsize];
274     int      * newintegers = new int [newsize];
275     unsigned int * newkeys = new unsigned int [newsize];
276 
277     for (int i = 0; i < newsize; i++)
278         newstrings[i] = NULL;
279 
280     for (unsigned int i = 0; i < size; i++)
281         if (strings[i] != NULL)
282         {
283             unsigned int key = keys[i];
284             unsigned int h   = key & newmask;
285 
286             while (newstrings[h] != NULL &&
287                    (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
288                 h = (h + 1) & newmask;
289 
290             newkeys[h] = key;
291             newstrings[h] = strings[i];
292             newintegers[h] = integers[i];
293         }
294 
295     if(strings) delete [] strings;
296     if(integers) delete [] integers;
297     if(keys) delete [] keys;
298 
299     strings = newstrings;
300     integers = newintegers;
301     keys = newkeys;
302     size = newsize;
303     mask = newmask;
304 }
305 
Clear()306 void StringIntHash::Clear()
307 {
308     for (unsigned int i = 0; i < size; i++)
309         if (strings[i] != NULL)
310         {
311             delete strings[i];
312             strings[i] = NULL;
313         }
314 
315     count = 0;
316 
317     if (size > 256)
318         SetSize(256);
319 }
320 
Add(const String & string,int value)321 int StringIntHash::Add(const String & string, int value)
322 {
323     unsigned int key = getKey(string);
324     unsigned int h   = Iterate(key, string);
325 
326     if (strings[h] == NULL)
327         Insert(h, key, string);
328 
329     integers[h] = value;
330 
331     if (count * 2 > size)
332     {
333         Grow();
334         return Iterate(key, string);
335     }
336 
337     return h;
338 }
339 
Find(const String & string,int defaultValue)340 int StringIntHash::Find(const String & string,  int defaultValue)
341 {
342     unsigned int key = getKey(string);
343     unsigned int h   = Iterate(key, string);
344 
345     if (strings[h] == NULL)
346     {
347         Insert(h, key, string);
348         integers[h] = defaultValue;
349 
350         if (count * 2 > size)
351         {
352             Grow();
353             return Iterate(key, string);
354         }
355     }
356 
357     return h;
358 }
359 
Find(const String & string) const360 int StringIntHash::Find(const String & string) const
361 {
362     unsigned int key = getKey(string);
363     unsigned int h   = Iterate(key, string);
364 
365     if (strings[h] == NULL)
366         return -1;
367 
368     return h;
369 }
370 
Delete(unsigned int index)371 void StringIntHash::Delete(unsigned int index)
372 {
373     if (index >= size || strings[index] == NULL)
374         return;
375 
376     delete strings[index];
377     strings[index] = NULL;
378     count--;
379 
380     if (count * 8 < size && size > 32)
381         Shrink();
382     else
383     {
384         // rehash the next strings until we find empty slot
385         index = (index + 1) & mask;
386 
387         while (strings[index] != NULL)
388         {
389             if ((keys[index] & mask) != index)
390             {
391                 unsigned int h = Iterate(keys[index], *strings[index]);
392 
393                 if (h != (unsigned int) index)
394                 {
395                     keys[h] = keys[index];
396                     strings[h] = strings[index];
397                     integers[h] = integers[index];
398 
399                     strings[index] = NULL;
400                 }
401             }
402 
403             index = (index + 1) & mask;
404         }
405     }
406 }
407 
408 // StringDoubleHash implementation
409 
StringDoubleHash(int startsize)410 StringDoubleHash::StringDoubleHash(int startsize)
411     : StringHashBase()
412 {
413     count = 0;
414     size  = startsize;
415     mask  = startsize - 1;
416 
417     // In this implementation, the size of hash tables must be a power of two
418     if (startsize & mask)
419         error("StringDoubleHash: Hash table size must be a power of two.\n");
420 
421     strings  = new String * [size];
422     doubles  = new double [size];
423     keys     = new unsigned int [size];
424 
425     for (unsigned int i = 0; i < size; i++)
426         strings[i] = NULL;
427 };
428 
~StringDoubleHash()429 StringDoubleHash::~StringDoubleHash()
430 {
431     for (unsigned int i = 0; i < size; i++)
432         if (strings[i] != NULL)
433             delete strings[i];
434 
435     if(strings) delete [] strings;
436     if(doubles) delete [] doubles;
437     if(keys) delete [] keys;
438 }
439 
SetSize(int newsize)440 void StringDoubleHash::SetSize(int newsize)
441 {
442     int newmask = newsize - 1;
443 
444     String   ** newstrings = new String * [newsize];
445     double    * newdoubles = new double [newsize];
446     unsigned int * newkeys = new unsigned int [newsize];
447 
448     for (int i = 0; i < newsize; i++)
449         newstrings[i] = NULL;
450 
451     for (unsigned int i = 0; i < size; i++)
452         if (strings[i] != NULL)
453         {
454             unsigned int key = keys[i];
455             unsigned int h   = key & newmask;
456 
457             while (newstrings[h] != NULL &&
458                     (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
459                 h = (h + 1) & newmask;
460 
461             newkeys[h] = key;
462             newstrings[h] = strings[i];
463             newdoubles[h] = doubles[i];
464         }
465 
466     if(strings) delete [] strings;
467     if(doubles) delete [] doubles;
468     if(keys) delete [] keys;
469 
470     strings = newstrings;
471     doubles = newdoubles;
472     keys = newkeys;
473     size = newsize;
474     mask = newmask;
475 }
476 
Add(const String & string,double value)477 int StringDoubleHash::Add(const String & string, double value)
478 {
479     unsigned int key = getKey(string);
480     unsigned int h   = Iterate(key, string);
481 
482     if (strings[h] == NULL)
483         Insert(h, key, string);
484 
485     doubles[h] = value;
486 
487     if (count * 2 > size)
488     {
489         Grow();
490         return Iterate(key, string);
491     }
492 
493     return h;
494 }
495 
Find(const String & string,double defaultValue)496 int StringDoubleHash::Find(const String & string, double defaultValue)
497 {
498     unsigned int key = getKey(string);
499     unsigned int h   = Iterate(key, string);
500 
501     if (strings[h] == NULL)
502     {
503         Insert(h, key, string);
504         doubles[h] = defaultValue;
505 
506         if (count * 2 > size)
507         {
508             Grow();
509             return Iterate(key, string);
510         }
511     }
512 
513     return h;
514 }
515 
Find(const String & string) const516 int StringDoubleHash::Find(const String & string) const
517 {
518     unsigned int key = getKey(string);
519     unsigned int h   = Iterate(key, string);
520 
521     if (strings[h] == NULL)
522         return -1;
523 
524     return h;
525 }
526 
Delete(unsigned int index)527 void StringDoubleHash::Delete(unsigned int index)
528 {
529     if (index >= size || strings[index] == NULL)
530         return;
531 
532     delete strings[index];
533     strings[index] = NULL;
534     count--;
535 
536     if (count * 8 < size && size > 32)
537         Shrink();
538     else
539     {
540         // rehash the next strings until we find empty slot
541         index = (index + 1) & mask;
542 
543         while (strings[index] != NULL)
544         {
545             if ((keys[index] & mask) != index)
546             {
547                 unsigned int h = Iterate(keys[index], *strings[index]);
548 
549                 if (h != (unsigned int) index)
550                 {
551                     keys[h] = keys[index];
552                     strings[h] = strings[index];
553                     doubles[h] = doubles[index];
554 
555                     strings[index] = NULL;
556                 }
557             }
558 
559             index = (index + 1) & mask;
560         }
561     }
562 }
563 
Print()564 void StringHash::Print()
565 {
566     Print(stdout);
567 }
568 
Print(const char * filename)569 void StringHash::Print(const char * filename)
570 {
571     FILE * output = fopen(filename, "wt");
572     if (output == NULL)
573         return;
574     Print(output);
575     fclose(output);
576 }
577 
Print(FILE * output)578 void StringHash::Print(FILE * output)
579 {
580     for (unsigned int i = 0; i < size; i++)
581         if (SlotInUse(i))
582             strings[i]->WriteLine(output);
583 }
584 
StringList(char separator)585 String StringHash::StringList(char separator)
586 {
587     String list;
588 
589     for (unsigned int i = 0; i < size; i++)
590         if (SlotInUse(i))
591             list += *strings[i] + separator;
592 
593     list.SetLength(list.Length() - 1);
594 
595     return list;
596 }
597 
GetCount(const String & key) const598 int StringIntHash::GetCount(const String & key) const
599 {
600     int index = Find(key);
601     return index == -1 ?  0 : integers[index];
602 }
603 
IncrementCount(const String & key)604 int StringIntHash::IncrementCount(const String & key)
605 {
606     int index = Find(key);
607 
608     if (index != -1)
609         return ++(integers[index]);
610 
611     SetInteger(key, 1);
612     return 1;
613 }
614 
IncrementCount(const String & key,int amount)615 int StringIntHash::IncrementCount(const String & key, int amount)
616 {
617     int index = Find(key);
618 
619     if (index != -1)
620         return (integers[index] += amount);
621 
622     SetInteger(key, amount);
623     return amount;
624 }
625 
DecrementCount(const String & key)626 int StringIntHash::DecrementCount(const String & key)
627 {
628     int index = Find(key);
629 
630     if (index != -1)
631         return --(integers[index]);
632 
633     SetInteger(key, -1);
634     return -1;
635 }
636 
Clear()637 void StringDoubleHash::Clear()
638 {
639     for (unsigned int i = 0; i < size; i++)
640         if (strings[i] != NULL)
641         {
642             delete strings[i];
643             strings[i] = NULL;
644         }
645 
646     count = 0;
647 
648     if (size > 256)
649         SetSize(256);
650 }
651 
operator =(const StringHash & rhs)652 StringHash & StringHash::operator = (const StringHash & rhs)
653 {
654     Clear();
655 
656     for (int i = 0; i < rhs.Capacity(); i++)
657         if (rhs.SlotInUse(i))
658             Add(*(rhs.strings[i]), rhs.objects[i]);
659 
660     return *this;
661 }
662 
operator =(const StringIntHash & rhs)663 StringIntHash & StringIntHash::operator = (const StringIntHash & rhs)
664 {
665     Clear();
666 
667     for (int i = 0; i < rhs.Capacity(); i++)
668         if (rhs.SlotInUse(i))
669             Add(*(rhs.strings[i]), rhs.integers[i]);
670 
671     return *this;
672 }
673 
operator ==(const StringIntHash & rhs) const674 bool StringIntHash::operator == (const StringIntHash & rhs) const
675 {
676     if (Capacity() != rhs.Capacity()) return false;
677     if (Entries() != rhs.Entries()) return false;
678     for (int i = 0; i < rhs.Capacity(); i++)
679     {
680         if(rhs.SlotInUse(i) != SlotInUse(i))
681         {
682             return(false);
683         }
684         if (rhs.SlotInUse(i))
685         {
686             if(*(strings[i]) != *(rhs.strings[i]))
687             {
688                 return(false);
689             }
690             if(rhs.integers[i] != integers[i])
691             {
692                 return(false);
693             }
694         }
695     }
696     return(true);
697 }
698 
operator =(const StringDoubleHash & rhs)699 StringDoubleHash & StringDoubleHash::operator = (const StringDoubleHash & rhs)
700 {
701     Clear();
702 
703     for (int i = 0; i < rhs.Capacity(); i++)
704         if (rhs.SlotInUse(i))
705             Add(*(rhs.strings[i]), rhs.doubles[i]);
706 
707     return *this;
708 }
709 
Swap(StringHash & s)710 void StringHash::Swap(StringHash & s)
711 {
712     String ** tstrings = s.strings;
713     s.strings = strings;
714     strings = tstrings;
715 
716     void ** tobjects = s.objects;
717     s.objects = objects;
718     objects = tobjects;
719 
720     unsigned int * tkeys = s.keys;
721     s.keys = keys;
722     keys = tkeys;
723 
724     unsigned int temp = s.count;
725     s.count = count;
726     count = temp;
727 
728     temp = s.size;
729     s.size = size;
730     size = temp;
731 
732     temp = s.mask;
733     s.mask = mask;
734     mask = temp;
735 }
736 
737