1 // ================================================================
2 // Array-only (open addressing) string-valued hash set with linear probing for
3 // collisions.
4 //
5 // John Kerl 2012-08-13
6 //
7 // Notes:
8 // * null key is not supported.
9 // * null value is supported.
10 //
11 // See also:
12 // * http://en.wikipedia.org/wiki/Hash_table
13 // * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
14 // ================================================================
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "lib/mlr_globals.h"
21 #include "lib/mlrutil.h"
22 #include "containers/hss.h"
23
24 // ----------------------------------------------------------------
25 #define INITIAL_ARRAY_LENGTH 128
26 #define LOAD_FACTOR 0.7
27
28 #define OCCUPIED 444
29 #define DELETED 555
30 #define EMPTY 666
31
32 // ================================================================
hsse_clear(hsse_t * pentry)33 static void hsse_clear(hsse_t *pentry) {
34 pentry->key = NULL;
35 pentry->state = EMPTY;
36 pentry->ideal_index = -1;
37 }
38
39 // ----------------------------------------------------------------
hss_make_alloc_array(int length)40 static hsse_t* hss_make_alloc_array(int length) {
41 hsse_t* array = (hsse_t*)mlr_malloc_or_die(sizeof(hsse_t) * length);
42 for (int i = 0; i < length; i++)
43 hsse_clear(&array[i]);
44 return array;
45 }
46
hss_init(hss_t * pset,int length)47 static void hss_init(hss_t *pset, int length) {
48 pset->num_occupied = 0;
49 pset->num_freed = 0;
50 pset->array_length = length;
51 pset->array = hss_make_alloc_array(length);
52 }
53
hss_alloc()54 hss_t* hss_alloc() {
55 hss_t* pset = mlr_malloc_or_die(sizeof(hss_t));
56 hss_init(pset, INITIAL_ARRAY_LENGTH);
57 return pset;
58 }
59
hss_free(hss_t * pset)60 void hss_free(hss_t* pset) {
61 if (pset == NULL)
62 return;
63 free(pset->array);
64 pset->array = NULL;
65 pset->num_occupied = 0;
66 pset->num_freed = 0;
67 pset->array_length = 0;
68 free(pset);
69 }
70
71 // ----------------------------------------------------------------
72 // Used by get() and remove().
73 // Returns >=0 for where the key is *or* should go (end of chain).
hss_find_index_for_key(hss_t * pset,char * key,int * pideal_index)74 static int hss_find_index_for_key(hss_t* pset, char* key, int* pideal_index) {
75 int hash = mlr_string_hash_func(key);
76 int index = mlr_canonical_mod(hash, pset->array_length);
77 *pideal_index = index;
78 int num_tries = 0;
79
80 while (TRUE) {
81 hsse_t* pe = &pset->array[index];
82 if (pe->state == OCCUPIED) {
83 char* ekey = pe->key;
84 // Existing key found in chain.
85 if (streq(key, ekey))
86 return index;
87 }
88 else if (pe->state == EMPTY) {
89 return index;
90 }
91
92 // If the current entry has been freed, i.e. previously occupied,
93 // the sought index may be further down the chain. So we must
94 // continue looking.
95 if (++num_tries >= pset->array_length) {
96 fprintf(stderr,
97 "%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
98 exit(1);
99 }
100
101 // Linear probing.
102 if (++index >= pset->array_length)
103 index = 0;
104 }
105 MLR_INTERNAL_CODING_ERROR();
106 }
107
108 // ----------------------------------------------------------------
109 static void hss_enlarge(hss_t* pset);
110
hss_add(hss_t * pset,char * key)111 void hss_add(hss_t* pset, char* key) {
112 if ((pset->num_occupied + pset->num_freed) >= (pset->array_length*LOAD_FACTOR))
113 hss_enlarge(pset);
114
115 int ideal_index = 0;
116 int index = hss_find_index_for_key(pset, key, &ideal_index);
117 hsse_t* pe = &pset->array[index];
118
119 if (pe->state == OCCUPIED) {
120 // Existing key found in chain. Chaining already handled by hss_find_index_for_key.
121 return;
122 }
123 else if (pe->state == EMPTY) {
124 // End of chain.
125 pe->key = key;
126 pe->state = OCCUPIED;
127 pe->ideal_index = ideal_index;
128 pset->num_occupied++;
129 }
130 else {
131 fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
132 exit(1);
133 }
134 }
135
136 // ----------------------------------------------------------------
hss_enlarge(hss_t * pset)137 static void hss_enlarge(hss_t* pset) {
138 int old_array_length = pset->array_length;
139 hsse_t* old_array = pset->array;
140
141 hss_init(pset, pset->array_length*2);
142
143 for (int index = 0; index < old_array_length; index++) {
144 hsse_t e = old_array[index];
145 if (e.state == OCCUPIED)
146 hss_add(pset, e.key);
147 }
148
149 free(old_array);
150 }
151
152 // ----------------------------------------------------------------
hss_has(hss_t * pset,char * key)153 int hss_has(hss_t* pset, char* key) {
154 int ideal_index = 0;
155 int index = hss_find_index_for_key(pset, key, &ideal_index);
156 hsse_t* pe = &pset->array[index];
157
158 if (pe->state == OCCUPIED)
159 return TRUE;
160 else if (pe->state == EMPTY)
161 return FALSE;
162 else {
163 fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
164 exit(1);
165 }
166 }
167
168 // ----------------------------------------------------------------
hss_remove(hss_t * pset,char * key)169 void hss_remove(hss_t* pset, char* key) {
170 int ideal_index = 0;
171 int index = hss_find_index_for_key(pset, key, &ideal_index);
172 hsse_t* pe = &pset->array[index];
173 if (pe->state == OCCUPIED) {
174 pe->key = NULL;
175 pe->state = DELETED;
176 pe->ideal_index = -1;
177 pset->num_freed++;
178 pset->num_occupied--;
179 }
180 else if (pe->state == EMPTY) {
181 }
182 else {
183 fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
184 exit(1);
185 }
186 }
187
188 // ----------------------------------------------------------------
hss_clear(hss_t * pset)189 void hss_clear(hss_t* pset) {
190 for (int i = 0; i < pset->array_length; i++) {
191 hsse_clear(&pset->array[i]);
192 }
193 pset->num_occupied = 0;
194 pset->num_freed = 0;
195 }
196
hss_size(hss_t * pset)197 int hss_size(hss_t* pset) {
198 return pset->num_occupied;
199 }
200
201 // ----------------------------------------------------------------
hss_check_counts(hss_t * pset)202 int hss_check_counts(hss_t* pset) {
203 int nocc = 0;
204 int ndel = 0;
205 for (int index = 0; index < pset->array_length; index++) {
206 hsse_t* pe = &pset->array[index];
207 if (pe->state == OCCUPIED)
208 nocc++;
209 else if (pe->state == DELETED)
210 ndel++;
211 }
212 if (nocc != pset->num_occupied) {
213 fprintf(stderr,
214 "occupancy-count mismatch: actual %d != cached %d.\n",
215 nocc, pset->num_occupied);
216 return FALSE;
217 }
218 if (ndel != pset->num_freed) {
219 fprintf(stderr,
220 "freed-count mismatch: actual %d != cached %d.\n",
221 ndel, pset->num_freed);
222 return FALSE;
223 }
224 return TRUE;
225 }
226
227 // ----------------------------------------------------------------
get_state_name(int state)228 static char* get_state_name(int state) {
229 switch(state) {
230 case OCCUPIED: return "occupied"; break;
231 case DELETED: return "freed"; break;
232 case EMPTY: return "empty"; break;
233 default: return "?????"; break;
234 }
235 }
236
hss_print(hss_t * pset)237 void hss_print(hss_t* pset) {
238 for (int index = 0; index < pset->array_length; index++) {
239 hsse_t* pe = &pset->array[index];
240
241 const char* key_string = (pe == NULL) ? "none" :
242 pe->key == NULL ? "null" :
243 pe->key;
244
245 printf(
246 "| stt: %-8s | idx: %6d | nidx: %6d | key: %12s |\n",
247 get_state_name(pe->state), index, pe->ideal_index, key_string);
248 }
249 }
250