1 // ================================================================
2 // Array-only (open addressing) string-to-string linked hash map with linear
3 // probing for collisions.
4 //
5 // Keys are not strduped.
6 //
7 // John Kerl 2012-08-13
8 //
9 // Notes:
10 // * null key is not supported.
11 // * null value is supported.
12 //
13 // See also:
14 // * http://en.wikipedia.org/wiki/Hash_table
15 // * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
16 // ================================================================
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include "lib/mlr_globals.h"
23 #include "lib/mlrutil.h"
24 #include "containers/lhmsll.h"
25 #include "lib/free_flags.h"
26 
27 // ----------------------------------------------------------------
28 // Allow compile-time override, e.g using gcc -D.
29 #ifndef INITIAL_ARRAY_LENGTH
30 #define INITIAL_ARRAY_LENGTH 16
31 #endif
32 
33 #ifndef LOAD_FACTOR
34 #define LOAD_FACTOR          0.7
35 #endif
36 
37 #ifndef ENLARGEMENT_FACTOR
38 #define ENLARGEMENT_FACTOR   2
39 #endif
40 
41 // ----------------------------------------------------------------
42 #define OCCUPIED 0xa4
43 #define DELETED  0xb8
44 #define EMPTY    0xce
45 
46 // ----------------------------------------------------------------
47 static void lhmsll_put_no_enlarge(lhmsll_t* pmap, char* key, int value, char free_flags);
48 static void lhmsll_enlarge(lhmsll_t* pmap);
49 
50 // ================================================================
lhmsll_init(lhmsll_t * pmap,int length)51 static void lhmsll_init(lhmsll_t *pmap, int length) {
52 	pmap->num_occupied = 0;
53 	pmap->num_freed    = 0;
54 	pmap->array_length = length;
55 
56 	pmap->entries      = (lhmslle_t*)mlr_malloc_or_die(sizeof(lhmslle_t) * length);
57 	// Don't do lhmslle_clear() of all entries at init time, since this has a
58 	// drastic effect on the time needed to construct an empty map (and miller
59 	// constructs an awful lot of those). The attributes there are don't-cares
60 	// if the corresponding entry state is EMPTY. They are set on put, and
61 	// mutated on remove.
62 
63 	pmap->states       = (lhmslle_state_t*)mlr_malloc_or_die(sizeof(lhmslle_state_t) * length);
64 	memset(pmap->states, EMPTY, length);
65 
66 	pmap->phead        = NULL;
67 	pmap->ptail        = NULL;
68 }
69 
lhmsll_alloc()70 lhmsll_t* lhmsll_alloc() {
71 	lhmsll_t* pmap = mlr_malloc_or_die(sizeof(lhmsll_t));
72 	lhmsll_init(pmap, INITIAL_ARRAY_LENGTH);
73 	return pmap;
74 }
75 
lhmsll_copy(lhmsll_t * pmap)76 lhmsll_t* lhmsll_copy(lhmsll_t* pmap) {
77 	lhmsll_t* pnew = lhmsll_alloc();
78 	for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext)
79 		lhmsll_put(pnew, mlr_strdup_or_die(pe->key), pe->value, FREE_ENTRY_KEY);
80 	return pnew;
81 }
82 
lhmsll_free(lhmsll_t * pmap)83 void lhmsll_free(lhmsll_t* pmap) {
84 	if (pmap == NULL)
85 		return;
86 	for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
87 		if (pe->free_flags & FREE_ENTRY_KEY)
88 			free(pe->key);
89 	}
90 	free(pmap->entries);
91 	free(pmap->states);
92 	pmap->entries      = NULL;
93 	pmap->num_occupied = 0;
94 	pmap->num_freed    = 0;
95 	pmap->array_length = 0;
96 	free(pmap);
97 }
98 
99 // ----------------------------------------------------------------
100 // Used by get() and remove().
101 // Returns >=0 for where the key is *or* should go (end of chain).
lhmsll_find_index_for_key(lhmsll_t * pmap,char * key,int * pideal_index)102 static int lhmsll_find_index_for_key(lhmsll_t* pmap, char* key, int* pideal_index) {
103 	int hash = mlr_string_hash_func(key);
104 	int index = mlr_canonical_mod(hash, pmap->array_length);
105 	*pideal_index = index;
106 	int num_tries = 0;
107 
108 	while (TRUE) {
109 		lhmslle_t* pe = &pmap->entries[index];
110 		if (pmap->states[index] == OCCUPIED) {
111 			char* ekey = pe->key;
112 			// Existing key found in chain.
113 			if (streq(key, ekey))
114 				return index;
115 		}
116 		else if (pmap->states[index] == EMPTY) {
117 			return index;
118 		}
119 
120 		// If the current entry has been deleted, i.e. previously occupied,
121 		// the sought index may be further down the chain.  So we must
122 		// continue looking.
123 		if (++num_tries >= pmap->array_length) {
124 			fprintf(stderr,
125 				"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
126 			exit(1);
127 		}
128 
129 		// Linear probing.
130 		if (++index >= pmap->array_length)
131 			index = 0;
132 	}
133 	MLR_INTERNAL_CODING_ERROR();
134 	return -1; // not reached
135 }
136 
137 // ----------------------------------------------------------------
lhmsll_put(lhmsll_t * pmap,char * key,int value,char free_flags)138 void lhmsll_put(lhmsll_t* pmap, char* key, int value, char free_flags) {
139 	if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
140 		lhmsll_enlarge(pmap);
141 	lhmsll_put_no_enlarge(pmap, key, value, free_flags);
142 }
143 
lhmsll_put_no_enlarge(lhmsll_t * pmap,char * key,int value,char free_flags)144 static void lhmsll_put_no_enlarge(lhmsll_t* pmap, char* key, int value, char free_flags) {
145 	int ideal_index = 0;
146 	int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
147 	lhmslle_t* pe = &pmap->entries[index];
148 
149 	if (pmap->states[index] == OCCUPIED) {
150 		// Existing key found in chain; put value.
151 		pe->value = value;
152 
153 	} else if (pmap->states[index] == EMPTY) {
154 		// End of chain.
155 		pe->ideal_index = ideal_index;
156 		pe->key = key;
157 		pe->value = value;
158 		pe->free_flags = free_flags;
159 		pmap->states[index] = OCCUPIED;
160 
161 		if (pmap->phead == NULL) {
162 			pe->pprev   = NULL;
163 			pe->pnext   = NULL;
164 			pmap->phead = pe;
165 			pmap->ptail = pe;
166 		} else {
167 			pe->pprev   = pmap->ptail;
168 			pe->pnext   = NULL;
169 			pmap->ptail->pnext = pe;
170 			pmap->ptail = pe;
171 		}
172 		pmap->num_occupied++;
173 
174 	} else {
175 		fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
176 		exit(1);
177 	}
178 }
179 
180 // ----------------------------------------------------------------
lhmsll_get(lhmsll_t * pmap,char * key)181 long long lhmsll_get(lhmsll_t* pmap, char* key) {
182 	int ideal_index = 0;
183 	int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
184 	lhmslle_t* pe = &pmap->entries[index];
185 
186 	if (pmap->states[index] == OCCUPIED)
187 		return pe->value;
188 	else if (pmap->states[index] == EMPTY)
189 		return -999; // caller must do lhmsll_has_key to check validity
190 	else {
191 		fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
192 		exit(1);
193 	}
194 }
195 
196 // ----------------------------------------------------------------
lhmsll_test_and_get(lhmsll_t * pmap,char * key,long long * pval)197 int lhmsll_test_and_get(lhmsll_t* pmap, char* key, long long* pval) {
198 	int ideal_index = 0;
199 	int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
200 	lhmslle_t* pe = &pmap->entries[index];
201 
202 	if (pmap->states[index] == OCCUPIED) {
203 		*pval = pe->value;
204 		return TRUE;
205 	} else if (pmap->states[index] == EMPTY) {
206 		return FALSE;
207 	} else {
208 		fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
209 		exit(1);
210 	}
211 }
212 
lhmsll_test_and_increment(lhmsll_t * pmap,char * key)213 int lhmsll_test_and_increment(lhmsll_t* pmap, char* key) {
214 	int ideal_index = 0;
215 	int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
216 	lhmslle_t* pe = &pmap->entries[index];
217 
218 	if (pmap->states[index] == OCCUPIED) {
219 		pe->value++;
220 		return TRUE;
221 	} else if (pmap->states[index] == EMPTY) {
222 		return FALSE;
223 	} else {
224 		fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
225 		exit(1);
226 	}
227 }
228 
lhmsll_get_entry(lhmsll_t * pmap,char * key)229 lhmslle_t* lhmsll_get_entry(lhmsll_t* pmap, char* key) {
230 	int ideal_index = 0;
231 	int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
232 	lhmslle_t* pe = &pmap->entries[index];
233 
234 	if (pmap->states[index] == OCCUPIED)
235 		return pe;
236 	else if (pmap->states[index] == EMPTY)
237 		return NULL;
238 	else {
239 		fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
240 		exit(1);
241 	}
242 }
243 
244 // ----------------------------------------------------------------
lhmsll_has_key(lhmsll_t * pmap,char * key)245 int lhmsll_has_key(lhmsll_t* pmap, char* key) {
246 	int ideal_index = 0;
247 	int index = lhmsll_find_index_for_key(pmap, key, &ideal_index);
248 
249 	if (pmap->states[index] == OCCUPIED)
250 		return TRUE;
251 	else if (pmap->states[index] == EMPTY)
252 		return FALSE;
253 	else {
254 		fprintf(stderr, "%s: lhmsll_find_index_for_key did not find end of chain.\n", MLR_GLOBALS.bargv0);
255 		exit(1);
256 	}
257 }
258 
259 // ----------------------------------------------------------------
lhmsll_rename(lhmsll_t * pmap,char * old_key,char * new_key)260 void  lhmsll_rename(lhmsll_t* pmap, char* old_key, char* new_key) {
261 	fprintf(stderr, "rename is not supported in the hashed-record impl.\n");
262 	exit(1);
263 }
264 
265 // ----------------------------------------------------------------
lhmsll_enlarge(lhmsll_t * pmap)266 static void lhmsll_enlarge(lhmsll_t* pmap) {
267 	lhmslle_t*       old_entries = pmap->entries;
268 	lhmslle_state_t* old_states  = pmap->states;
269 	lhmslle_t*       old_head    = pmap->phead;
270 
271 	lhmsll_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
272 
273 	for (lhmslle_t* pe = old_head; pe != NULL; pe = pe->pnext) {
274 		lhmsll_put_no_enlarge(pmap, pe->key, pe->value, pe->free_flags);
275 	}
276 	free(old_entries);
277 	free(old_states);
278 }
279 
280 // ----------------------------------------------------------------
lhmsll_check_counts(lhmsll_t * pmap)281 int lhmsll_check_counts(lhmsll_t* pmap) {
282 	int nocc = 0;
283 	int ndel = 0;
284 	for (int index = 0; index < pmap->array_length; index++) {
285 		if (pmap->states[index] == OCCUPIED)
286 			nocc++;
287 		else if (pmap->states[index] == DELETED)
288 			ndel++;
289 	}
290 	if (nocc != pmap->num_occupied) {
291 		fprintf(stderr,
292 			"occupancy-count mismatch:  actual %d != cached  %d.\n",
293 				nocc, pmap->num_occupied);
294 		return FALSE;
295 	}
296 	if (ndel != pmap->num_freed) {
297 		fprintf(stderr,
298 			"deleted-count mismatch:  actual %d != cached  %d.\n",
299 				ndel, pmap->num_freed);
300 		return FALSE;
301 	}
302 	return TRUE;
303 }
304 
305 // ----------------------------------------------------------------
get_state_name(int state)306 static char* get_state_name(int state) {
307 	switch(state) {
308 	case OCCUPIED: return "occupied"; break;
309 	case DELETED:  return "deleted";  break;
310 	case EMPTY:    return "empty";    break;
311 	default:       return "?????";    break;
312 	}
313 }
314 
lhmsll_print(lhmsll_t * pmap)315 void lhmsll_print(lhmsll_t* pmap) {
316 	for (int index = 0; index < pmap->array_length; index++) {
317 		lhmslle_t* pe = &pmap->entries[index];
318 
319 		const char* key_string = (pe == NULL) ? "none" :
320 			pe->key == NULL ? "null" :
321 			pe->key;
322 
323 		printf(
324 		"| stt: %-8s  | idx: %6d | nidx: %6d | key: %12s | value: %8lld |\n",
325 			get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, pe->value);
326 	}
327 	printf("+\n");
328 	printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
329 	printf("+\n");
330 	for (lhmslle_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
331 		const char* key_string = (pe == NULL) ? "none" :
332 			pe->key == NULL ? "null" :
333 			pe->key;
334 		printf(
335 		"| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | value: %8lld |\n",
336 			pe->pprev, pe, pe->pnext,
337 			pe->ideal_index, key_string, pe->value);
338 	}
339 }
340