1 // ================================================================
2 // Array-only (open addressing) string-list-to-void-star linked hash map with
3 // linear probing for collisions.
4 //
5 // John Kerl 2014-12-22
6 //
7 // Notes:
8 // * null key is not supported.
9 // * null value is not supported.
10 //
11 // See also:
12 // * http://en.wikipedia.org/wiki/Hash_table
13 // * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
14 // ================================================================
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "lib/mlr_globals.h"
21 #include "lib/mlrutil.h"
22 #include "containers/lhms2v.h"
23 
24 // ----------------------------------------------------------------
25 // Allow compile-time override, e.g using gcc -D.
26 #ifndef INITIAL_ARRAY_LENGTH
27 #define INITIAL_ARRAY_LENGTH 16
28 #endif
29 
30 #ifndef LOAD_FACTOR
31 #define LOAD_FACTOR          0.7
32 #endif
33 
34 #ifndef ENLARGEMENT_FACTOR
35 #define ENLARGEMENT_FACTOR   2
36 #endif
37 
38 // ----------------------------------------------------------------
39 #define OCCUPIED 0xa4
40 #define DELETED  0xb8
41 #define EMPTY    0xce
42 
43 // ----------------------------------------------------------------
44 static void* lhms2v_put_no_enlarge(lhms2v_t* pmap, char* key1, char* key2, void* pvvalue, char free_flags);
45 static void lhms2v_enlarge(lhms2v_t* pmap);
46 
47 // ================================================================
lhms2v_init(lhms2v_t * pmap,int length)48 static void lhms2v_init(lhms2v_t *pmap, int length) {
49 	pmap->num_occupied = 0;
50 	pmap->num_freed    = 0;
51 	pmap->array_length = length;
52 
53 	pmap->entries = (lhms2ve_t*)mlr_malloc_or_die(sizeof(lhms2ve_t) * length);
54 	// Don't do lhms2ve_clear() of all entries at init time, since this has a
55 	// drastic effect on the time needed to construct an empty map (and miller
56 	// constructs an awful lot of those). The attributes there are don't-cares
57 	// if the corresponding entry state is EMPTY. They are set on put, and
58 	// mutated on remove.
59 
60 	pmap->states       = (lhms2ve_state_t*)mlr_malloc_or_die(sizeof(lhms2ve_state_t) * length);
61 	memset(pmap->states, EMPTY, length);
62 
63 	pmap->phead        = NULL;
64 	pmap->ptail        = NULL;
65 }
66 
lhms2v_alloc()67 lhms2v_t* lhms2v_alloc() {
68 	lhms2v_t* pmap = mlr_malloc_or_die(sizeof(lhms2v_t));
69 	lhms2v_init(pmap, INITIAL_ARRAY_LENGTH);
70 	return pmap;
71 }
72 
73 // void-star payloads should first be freed by the caller.
lhms2v_free(lhms2v_t * pmap)74 void lhms2v_free(lhms2v_t* pmap) {
75 	if (pmap == NULL)
76 		return;
77 	for (lhms2ve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
78 		if (pe->free_flags & FREE_ENTRY_KEY) {
79 			free(pe->key1);
80 			free(pe->key2);
81 		}
82 	}
83 	free(pmap->entries);
84 	free(pmap->states);
85 	pmap->entries      = NULL;
86 	pmap->num_occupied = 0;
87 	pmap->num_freed    = 0;
88 	pmap->array_length = 0;
89 	free(pmap);
90 }
91 
92 // ----------------------------------------------------------------
93 // Used by get() and remove().
94 // Returns >=0 for where the key is *or* should go (end of chain).
lhms2v_find_index_for_key(lhms2v_t * pmap,char * key1,char * key2,int * pideal_index)95 static int lhms2v_find_index_for_key(lhms2v_t* pmap, char* key1, char* key2, int* pideal_index) {
96 	int hash = mlr_string_pair_hash_func(key1, key2);
97 	int index = mlr_canonical_mod(hash, pmap->array_length);
98 	*pideal_index = index;
99 	int num_tries = 0;
100 	int done = 0;
101 
102 	while (!done) {
103 		lhms2ve_t* pe = &pmap->entries[index];
104 		if (pmap->states[index] == OCCUPIED) {
105 			char* ekey1 = pe->key1;
106 			char* ekey2 = pe->key2;
107 			// Existing key found in chain.
108 			if (streq(key1, ekey1) && streq(key2, ekey2))
109 				return index;
110 		}
111 		else if (pmap->states[index] == EMPTY) {
112 			return index;
113 		}
114 
115 		// If the current entry has been deleted, i.e. previously occupied,
116 		// the sought index may be further down the chain.  So we must
117 		// continue looking.
118 		if (++num_tries >= pmap->array_length) {
119 			fprintf(stderr,
120 				"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
121 			exit(1);
122 		}
123 
124 		// Linear probing.
125 		if (++index >= pmap->array_length)
126 			index = 0;
127 	}
128 	MLR_INTERNAL_CODING_ERROR();
129 	return -1; // not reached
130 }
131 
132 // ----------------------------------------------------------------
lhms2v_put(lhms2v_t * pmap,char * key1,char * key2,void * pvvalue,char free_flags)133 void* lhms2v_put(lhms2v_t* pmap, char* key1, char* key2, void* pvvalue, char free_flags) {
134 	if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
135 		lhms2v_enlarge(pmap);
136 	return lhms2v_put_no_enlarge(pmap, key1, key2, pvvalue, free_flags);
137 }
138 
lhms2v_put_no_enlarge(lhms2v_t * pmap,char * key1,char * key2,void * pvvalue,char free_flags)139 static void* lhms2v_put_no_enlarge(lhms2v_t* pmap, char* key1, char* key2, void* pvvalue, char free_flags) {
140 	int ideal_index = 0;
141 	int index = lhms2v_find_index_for_key(pmap, key1, key2, &ideal_index);
142 	lhms2ve_t* pe = &pmap->entries[index];
143 
144 	if (pmap->states[index] == OCCUPIED) {
145 		// Existing key found in chain; put value.
146 		pe->pvvalue = pvvalue;
147 	}
148 	else if (pmap->states[index] == EMPTY) {
149 		// End of chain.
150 		pe->ideal_index = ideal_index;
151 		pe->key1 = key1;
152 		pe->key2 = key2;
153 		pe->pvvalue = pvvalue;
154 		pe->free_flags = free_flags;
155 		pmap->states[index] = OCCUPIED;
156 
157 		if (pmap->phead == NULL) {
158 			pe->pprev   = NULL;
159 			pe->pnext   = NULL;
160 			pmap->phead = pe;
161 			pmap->ptail = pe;
162 		} else {
163 			pe->pprev   = pmap->ptail;
164 			pe->pnext   = NULL;
165 			pmap->ptail->pnext = pe;
166 			pmap->ptail = pe;
167 		}
168 		pmap->num_occupied++;
169 	}
170 	else {
171 		fprintf(stderr, "%s: lhms2v_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
172 		exit(1);
173 	}
174 	return pvvalue;
175 }
176 
177 // ----------------------------------------------------------------
lhms2v_get(lhms2v_t * pmap,char * key1,char * key2)178 void* lhms2v_get(lhms2v_t* pmap, char* key1, char* key2) {
179 	int ideal_index = 0;
180 	int index = lhms2v_find_index_for_key(pmap, key1, key2, &ideal_index);
181 	lhms2ve_t* pe = &pmap->entries[index];
182 
183 	if (pmap->states[index] == OCCUPIED)
184 		return pe->pvvalue;
185 	else if (pmap->states[index] == EMPTY)
186 		return NULL;
187 	else {
188 		fprintf(stderr, "%s: lhms2v_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
189 		exit(1);
190 	}
191 }
192 
193 // ----------------------------------------------------------------
lhms2v_has_key(lhms2v_t * pmap,char * key1,char * key2)194 int lhms2v_has_key(lhms2v_t* pmap, char* key1, char* key2) {
195 	int ideal_index = 0;
196 	int index = lhms2v_find_index_for_key(pmap, key1, key2, &ideal_index);
197 
198 	if (pmap->states[index] == OCCUPIED)
199 		return TRUE;
200 	else if (pmap->states[index] == EMPTY)
201 		return FALSE;
202 	else {
203 		fprintf(stderr, "%s: lhms2v_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
204 		exit(1);
205 	}
206 }
207 
208 // ----------------------------------------------------------------
lhms2v_size(lhms2v_t * pmap)209 int lhms2v_size(lhms2v_t* pmap) {
210 	return pmap->num_occupied;
211 }
212 
213 // ----------------------------------------------------------------
lhms2v_enlarge(lhms2v_t * pmap)214 static void lhms2v_enlarge(lhms2v_t* pmap) {
215 	lhms2ve_t*       old_entries = pmap->entries;
216 	lhms2ve_state_t* old_states  = pmap->states;
217 	lhms2ve_t*       old_head    = pmap->phead;
218 
219 	lhms2v_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
220 
221 	for (lhms2ve_t* pe = old_head; pe != NULL; pe = pe->pnext) {
222 		lhms2v_put_no_enlarge(pmap, pe->key1, pe->key2, pe->pvvalue, pe->free_flags);
223 	}
224 	free(old_entries);
225 	free(old_states);
226 }
227 
228 // ----------------------------------------------------------------
lhms2v_check_counts(lhms2v_t * pmap)229 int lhms2v_check_counts(lhms2v_t* pmap) {
230 	int nocc = 0;
231 	int ndel = 0;
232 	for (int index = 0; index < pmap->array_length; index++) {
233 		if (pmap->states[index] == OCCUPIED)
234 			nocc++;
235 		else if (pmap->states[index] == DELETED)
236 			ndel++;
237 	}
238 	if (nocc != pmap->num_occupied) {
239 		fprintf(stderr,
240 			"occupancy-count mismatch:  actual %d != cached  %d\n",
241 				nocc, pmap->num_occupied);
242 		return FALSE;
243 	}
244 	if (ndel != pmap->num_freed) {
245 		fprintf(stderr,
246 			"deleted-count mismatch:  actual %d != cached  %d\n",
247 				ndel, pmap->num_freed);
248 		return FALSE;
249 	}
250 	return TRUE;
251 }
252 
253 // ----------------------------------------------------------------
get_state_name(int state)254 static char* get_state_name(int state) {
255 	switch(state) {
256 	case OCCUPIED: return "occupied"; break;
257 	case DELETED:  return "deleted";  break;
258 	case EMPTY:    return "empty";    break;
259 	default:       return "?????";    break;
260 	}
261 }
262 
lhms2v_print(lhms2v_t * pmap)263 void lhms2v_print(lhms2v_t* pmap) {
264 	for (int index = 0; index < pmap->array_length; index++) {
265 		lhms2ve_t* pe = &pmap->entries[index];
266 
267 		const char* key1_string = (pe == NULL) ? "none" :
268 			pe->key1 == NULL ? "null" :
269 			pe->key1;
270 		const char* key2_string = (pe == NULL) ? "none" :
271 			pe->key2 == NULL ? "null" :
272 			pe->key2;
273 		const char* value_string = (pe == NULL) ? "none" :
274 			pe->pvvalue == NULL ? "null" :
275 			pe->pvvalue;
276 
277 		printf(
278 		"| stt: %-8s  | idx: %6d | nidx: %6d | key1: %12s | key2: %12s | pvvalue: %12s |\n",
279 			get_state_name(pmap->states[index]), index, pe->ideal_index,
280 			key1_string, key2_string, value_string);
281 	}
282 	printf("+\n");
283 	printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
284 	printf("+\n");
285 	for (lhms2ve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
286 		const char* key1_string = (pe == NULL) ? "none" :
287 			pe->key1 == NULL ? "null" :
288 			pe->key1;
289 		const char* key2_string = (pe == NULL) ? "none" :
290 			pe->key2 == NULL ? "null" :
291 			pe->key2;
292 		const char* value_string = (pe == NULL) ? "none" :
293 			pe->pvvalue == NULL ? "null" :
294 			pe->pvvalue;
295 		printf(
296 		"| prev: %p curr: %p next: %p | nidx: %6d | key1: %12s | key2: %12s | pvvalue: %12s |\n",
297 			pe->pprev, pe, pe->pnext,
298 			pe->ideal_index, key1_string, key2_string, value_string);
299 	}
300 }
301