1 // ================================================================
2 // Array-only (open addressing) string-to-string linked hash map with linear
3 // probing for collisions.
4 //
5 // Keys are not strduped.
6 //
7 // John Kerl 2012-08-13
8 //
9 // Notes:
10 // * null key is not supported.
11 // * null value is supported.
12 //
13 // See also:
14 // * http://en.wikipedia.org/wiki/Hash_table
15 // * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
16 // ================================================================
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "lib/mlr_globals.h"
23 #include "lib/mlrutil.h"
24 #include "containers/lhmsll.h"
25 #include "lib/free_flags.h"
26
27 // ----------------------------------------------------------------
28 // Allow compile-time override, e.g using gcc -D.
29 #ifndef INITIAL_ARRAY_LENGTH
30 #define INITIAL_ARRAY_LENGTH 16
31 #endif
32
33 #ifndef LOAD_FACTOR
34 #define LOAD_FACTOR 0.7
35 #endif
36
37 #ifndef ENLARGEMENT_FACTOR
38 #define ENLARGEMENT_FACTOR 2
39 #endif
40
41 // ----------------------------------------------------------------
42 #define OCCUPIED 0xa4
43 #define DELETED 0xb8
44 #define EMPTY 0xce
45
46 // ----------------------------------------------------------------
47 static void lhmsll_put_no_enlarge(lhmsll_t* pmap, char* key, int value, char free_flags);
48 static void lhmsll_enlarge(lhmsll_t* pmap);
49
50 // ================================================================
lhmsll_init(lhmsll_t * pmap,int length)51 static void lhmsll_init(lhmsll_t *pmap, int length) {
52 pmap->num_occupied = 0;
53 pmap->num_freed = 0;
54 pmap->array_length = length;
55
56 pmap->entries = (lhmslle_t*)mlr_malloc_or_die(sizeof(lhmslle_t) * length);
57 // Don't do lhmslle_clear() of all entries at init time, since this has a
58 // drastic effect on the time needed to construct an empty map (and miller
59 // constructs an awful lot of those). The attributes there are don't-cares
60 // if the corresponding entry state is EMPTY. They are set on put, and
61 // mutated on remove.
62
63 pmap->states = (lhmslle_state_t*)mlr_malloc_or_die(sizeof(lhmslle_state_t) * length);
64 memset(pmap->states, EMPTY, length);
65
66 pmap->phead = NULL;
67 pmap->ptail = NULL;
68 }
69
lhmsll_alloc()70 lhmsll_t* lhmsll_alloc() {
71 lhmsll_t* pmap = mlr_malloc_or_die(sizeof(lhmsll_t));
72 lhmsll_init(pmap, INITIAL_ARRAY_LENGTH);
73 return pmap;
74 }
75
lhmsll_copy(lhmsll_t * pmap)76 lhmsll_t* lhmsll_copy(lhmsll_t* pmap) {
77 lhmsll_t* pnew = lhmsll_alloc();
78 for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext)
79 lhmsll_put(pnew, mlr_strdup_or_die(pe->key), pe->value, FREE_ENTRY_KEY);
80 return pnew;
81 }
82
lhmsll_free(lhmsll_t * pmap)83 void lhmsll_free(lhmsll_t* pmap) {
84 if (pmap == NULL)
85 return;
86 for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
87 if (pe->free_flags & FREE_ENTRY_KEY)
88 free(pe->key);
89 }
90 free(pmap->entries);
91 free(pmap->states);
92 pmap->entries = NULL;
93 pmap->num_occupied = 0;
94 pmap->num_freed = 0;
95 pmap->array_length = 0;
96 free(pmap);
97 }
98
99 // ----------------------------------------------------------------
100 // Used by get() and remove().
101 // Returns >=0 for where the key is *or* should go (end of chain).
lhmsll_find_index_for_key(lhmsll_t * pmap,char * key,int * pideal_index)102 static int lhmsll_find_index_for_key(lhmsll_t* pmap, char* key, int* pideal_index) {
103 int hash = mlr_string_hash_func(key);
104 int index = mlr_canonical_mod(hash, pmap->array_length);
105 *pideal_index = index;
106 int num_tries = 0;
107
108 while (TRUE) {
109 lhmslle_t* pe = &pmap->entries[index];
110 if (pmap->states[index] == OCCUPIED) {
111 char* ekey = pe->key;
112 // Existing key found in chain.
113 if (streq(key, ekey))
114 return index;
115 }
116 else if (pmap->states[index] == EMPTY) {
117 return index;
118 }
119
120 // If the current entry has been deleted, i.e. previously occupied,
121 // the sought index may be further down the chain. So we must
122 // continue looking.
123 if (++num_tries >= pmap->array_length) {
124 fprintf(stderr,
125 "%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
126 exit(1);
127 }
128
129 // Linear probing.
130 if (++index >= pmap->array_length)
131 index = 0;
132 }
133 MLR_INTERNAL_CODING_ERROR();
134 return -1; // not reached
135 }
136
137 // ----------------------------------------------------------------
lhmsll_put(lhmsll_t * pmap,char * key,int value,char free_flags)138 void lhmsll_put(lhmsll_t* pmap, char* key, int value, char free_flags) {
139 if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
140 lhmsll_enlarge(pmap);
141 lhmsll_put_no_enlarge(pmap, key, value, free_flags);
142 }
143
lhmsll_put_no_enlarge(lhmsll_t * pmap,char * key,int value,char free_flags)144 static void lhmsll_put_no_enlarge(lhmsll_t* pmap, char* key, int value, char free_flags) {
145 int ideal_index = 0;
146 int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
147 lhmslle_t* pe = &pmap->entries[index];
148
149 if (pmap->states[index] == OCCUPIED) {
150 // Existing key found in chain; put value.
151 pe->value = value;
152
153 } else if (pmap->states[index] == EMPTY) {
154 // End of chain.
155 pe->ideal_index = ideal_index;
156 pe->key = key;
157 pe->value = value;
158 pe->free_flags = free_flags;
159 pmap->states[index] = OCCUPIED;
160
161 if (pmap->phead == NULL) {
162 pe->pprev = NULL;
163 pe->pnext = NULL;
164 pmap->phead = pe;
165 pmap->ptail = pe;
166 } else {
167 pe->pprev = pmap->ptail;
168 pe->pnext = NULL;
169 pmap->ptail->pnext = pe;
170 pmap->ptail = pe;
171 }
172 pmap->num_occupied++;
173
174 } else {
175 fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
176 exit(1);
177 }
178 }
179
180 // ----------------------------------------------------------------
lhmsll_get(lhmsll_t * pmap,char * key)181 long long lhmsll_get(lhmsll_t* pmap, char* key) {
182 int ideal_index = 0;
183 int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
184 lhmslle_t* pe = &pmap->entries[index];
185
186 if (pmap->states[index] == OCCUPIED)
187 return pe->value;
188 else if (pmap->states[index] == EMPTY)
189 return -999; // caller must do lhmsll_has_key to check validity
190 else {
191 fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
192 exit(1);
193 }
194 }
195
196 // ----------------------------------------------------------------
lhmsll_test_and_get(lhmsll_t * pmap,char * key,long long * pval)197 int lhmsll_test_and_get(lhmsll_t* pmap, char* key, long long* pval) {
198 int ideal_index = 0;
199 int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
200 lhmslle_t* pe = &pmap->entries[index];
201
202 if (pmap->states[index] == OCCUPIED) {
203 *pval = pe->value;
204 return TRUE;
205 } else if (pmap->states[index] == EMPTY) {
206 return FALSE;
207 } else {
208 fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
209 exit(1);
210 }
211 }
212
lhmsll_test_and_increment(lhmsll_t * pmap,char * key)213 int lhmsll_test_and_increment(lhmsll_t* pmap, char* key) {
214 int ideal_index = 0;
215 int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
216 lhmslle_t* pe = &pmap->entries[index];
217
218 if (pmap->states[index] == OCCUPIED) {
219 pe->value++;
220 return TRUE;
221 } else if (pmap->states[index] == EMPTY) {
222 return FALSE;
223 } else {
224 fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
225 exit(1);
226 }
227 }
228
lhmsll_get_entry(lhmsll_t * pmap,char * key)229 lhmslle_t* lhmsll_get_entry(lhmsll_t* pmap, char* key) {
230 int ideal_index = 0;
231 int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
232 lhmslle_t* pe = &pmap->entries[index];
233
234 if (pmap->states[index] == OCCUPIED)
235 return pe;
236 else if (pmap->states[index] == EMPTY)
237 return NULL;
238 else {
239 fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
240 exit(1);
241 }
242 }
243
244 // ----------------------------------------------------------------
lhmsll_has_key(lhmsll_t * pmap,char * key)245 int lhmsll_has_key(lhmsll_t* pmap, char* key) {
246 int ideal_index = 0;
247 int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
248
249 if (pmap->states[index] == OCCUPIED)
250 return TRUE;
251 else if (pmap->states[index] == EMPTY)
252 return FALSE;
253 else {
254 fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
255 exit(1);
256 }
257 }
258
259 // ----------------------------------------------------------------
lhmsll_rename(lhmsll_t * pmap,char * old_key,char * new_key)260 void lhmsll_rename(lhmsll_t* pmap, char* old_key, char* new_key) {
261 fprintf(stderr, "rename is not supported in the hashed-record impl.\n");
262 exit(1);
263 }
264
265 // ----------------------------------------------------------------
lhmsll_enlarge(lhmsll_t * pmap)266 static void lhmsll_enlarge(lhmsll_t* pmap) {
267 lhmslle_t* old_entries = pmap->entries;
268 lhmslle_state_t* old_states = pmap->states;
269 lhmslle_t* old_head = pmap->phead;
270
271 lhmsll_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
272
273 for (lhmslle_t* pe = old_head; pe != NULL; pe = pe->pnext) {
274 lhmsll_put_no_enlarge(pmap, pe->key, pe->value, pe->free_flags);
275 }
276 free(old_entries);
277 free(old_states);
278 }
279
280 // ----------------------------------------------------------------
lhmsll_check_counts(lhmsll_t * pmap)281 int lhmsll_check_counts(lhmsll_t* pmap) {
282 int nocc = 0;
283 int ndel = 0;
284 for (int index = 0; index < pmap->array_length; index++) {
285 if (pmap->states[index] == OCCUPIED)
286 nocc++;
287 else if (pmap->states[index] == DELETED)
288 ndel++;
289 }
290 if (nocc != pmap->num_occupied) {
291 fprintf(stderr,
292 "occupancy-count mismatch: actual %d != cached %d.\n",
293 nocc, pmap->num_occupied);
294 return FALSE;
295 }
296 if (ndel != pmap->num_freed) {
297 fprintf(stderr,
298 "deleted-count mismatch: actual %d != cached %d.\n",
299 ndel, pmap->num_freed);
300 return FALSE;
301 }
302 return TRUE;
303 }
304
305 // ----------------------------------------------------------------
get_state_name(int state)306 static char* get_state_name(int state) {
307 switch(state) {
308 case OCCUPIED: return "occupied"; break;
309 case DELETED: return "deleted"; break;
310 case EMPTY: return "empty"; break;
311 default: return "?????"; break;
312 }
313 }
314
lhmsll_print(lhmsll_t * pmap)315 void lhmsll_print(lhmsll_t* pmap) {
316 for (int index = 0; index < pmap->array_length; index++) {
317 lhmslle_t* pe = &pmap->entries[index];
318
319 const char* key_string = (pe == NULL) ? "none" :
320 pe->key == NULL ? "null" :
321 pe->key;
322
323 printf(
324 "| stt: %-8s | idx: %6d | nidx: %6d | key: %12s | value: %8lld |\n",
325 get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, pe->value);
326 }
327 printf("+\n");
328 printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
329 printf("+\n");
330 for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
331 const char* key_string = (pe == NULL) ? "none" :
332 pe->key == NULL ? "null" :
333 pe->key;
334 printf(
335 "| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | value: %8lld |\n",
336 pe->pprev, pe, pe->pnext,
337 pe->ideal_index, key_string, pe->value);
338 }
339 }
340