1 // ================================================================
2 // Array-only (open addressing) string-list-to-void-star linked hash map with
3 // linear probing for collisions.
4 //
5 // John Kerl 2014-12-22
6 //
7 // Notes:
8 // * null key is not supported.
9 // * null value is not supported.
10 //
11 // See also:
12 // * http://en.wikipedia.org/wiki/Hash_table
13 // * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
14 // ================================================================
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "lib/mlr_globals.h"
21 #include "lib/mlrutil.h"
22 #include "containers/lhmslv.h"
23 
24 // ----------------------------------------------------------------
25 // Allow compile-time override, e.g using gcc -D.
26 #ifndef INITIAL_ARRAY_LENGTH
27 #define INITIAL_ARRAY_LENGTH 16
28 #endif
29 
30 #ifndef LOAD_FACTOR
31 #define LOAD_FACTOR          0.7
32 #endif
33 
34 #ifndef ENLARGEMENT_FACTOR
35 #define ENLARGEMENT_FACTOR   2
36 #endif
37 
38 // ----------------------------------------------------------------
39 #define OCCUPIED 0xa4
40 #define DELETED  0xb8
41 #define EMPTY    0xce
42 
43 // ----------------------------------------------------------------
44 static void* lhmslv_put_no_enlarge(lhmslv_t* pmap, slls_t* key, void* pvvalue, char free_flags);
45 static void lhmslv_enlarge(lhmslv_t* pmap);
46 
47 // ================================================================
lhmslv_init(lhmslv_t * pmap,int length)48 static void lhmslv_init(lhmslv_t *pmap, int length) {
49 	pmap->num_occupied = 0;
50 	pmap->num_freed    = 0;
51 	pmap->array_length = length;
52 
53 	pmap->entries      = (lhmslve_t*)mlr_malloc_or_die(sizeof(lhmslve_t) * length);
54 	// Don't do lhmslve_clear() of all entries at init time, since this has a
55 	// drastic effect on the time needed to construct an empty map (and miller
56 	// constructs an awful lot of those). The attributes there are don't-cares
57 	// if the corresponding entry state is EMPTY. They are set on put, and
58 	// mutated on remove.
59 
60 	pmap->states       = (lhmslve_state_t*)mlr_malloc_or_die(sizeof(lhmslve_state_t) * length);
61 	memset(pmap->states, EMPTY, length);
62 
63 	pmap->phead        = NULL;
64 	pmap->ptail        = NULL;
65 }
66 
lhmslv_alloc()67 lhmslv_t* lhmslv_alloc() {
68 	lhmslv_t* pmap = mlr_malloc_or_die(sizeof(lhmslv_t));
69 	lhmslv_init(pmap, INITIAL_ARRAY_LENGTH);
70 	return pmap;
71 }
72 
73 // void-star payloads should first be freed by the caller.
lhmslv_free(lhmslv_t * pmap)74 void lhmslv_free(lhmslv_t* pmap) {
75 	if (pmap == NULL)
76 		return;
77 	for (lhmslve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext)
78 		if (pe->free_flags & FREE_ENTRY_KEY)
79 			slls_free(pe->key);
80 	free(pmap->entries);
81 	free(pmap->states);
82 	pmap->entries      = NULL;
83 	pmap->num_occupied = 0;
84 	pmap->num_freed    = 0;
85 	pmap->array_length = 0;
86 	free(pmap);
87 }
88 
89 // ----------------------------------------------------------------
90 // Used by get() and remove().
91 // Returns >=0 for where the key is *or* should go (end of chain).
lhmslv_find_index_for_key(lhmslv_t * pmap,slls_t * key,int * pideal_index)92 static int lhmslv_find_index_for_key(lhmslv_t* pmap, slls_t* key, int* pideal_index) {
93 	int hash = slls_hash_func(key);
94 	int index = mlr_canonical_mod(hash, pmap->array_length);
95 	*pideal_index = index;
96 	int num_tries = 0;
97 
98 	while (TRUE) {
99 		lhmslve_t* pe = &pmap->entries[index];
100 		if (pmap->states[index] == OCCUPIED) {
101 			slls_t* ekey = pe->key;
102 			// Existing key found in chain.
103 			if (slls_equals(key, ekey))
104 				return index;
105 		}
106 		else if (pmap->states[index] == EMPTY) {
107 			return index;
108 		}
109 
110 		// If the current entry has been freed, i.e. previously occupied,
111 		// the sought index may be further down the chain.  So we must
112 		// continue looking.
113 		if (++num_tries >= pmap->array_length) {
114 			fprintf(stderr,
115 				"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
116 			exit(1);
117 		}
118 
119 		// Linear probing.
120 		if (++index >= pmap->array_length)
121 			index = 0;
122 	}
123 	MLR_INTERNAL_CODING_ERROR();
124 	return -1; // not reached
125 }
126 
127 // ----------------------------------------------------------------
lhmslv_put(lhmslv_t * pmap,slls_t * key,void * pvvalue,char free_flags)128 void* lhmslv_put(lhmslv_t* pmap, slls_t* key, void* pvvalue, char free_flags) {
129 	if ((pmap->num_occupied + pmap->num_freed) >= (pmap->array_length*LOAD_FACTOR))
130 		lhmslv_enlarge(pmap);
131 	return lhmslv_put_no_enlarge(pmap, key, pvvalue, free_flags);
132 }
133 
lhmslv_put_no_enlarge(lhmslv_t * pmap,slls_t * key,void * pvvalue,char free_flags)134 static void* lhmslv_put_no_enlarge(lhmslv_t* pmap, slls_t* key, void* pvvalue, char free_flags) {
135 	int ideal_index = 0;
136 	int index = lhmslv_find_index_for_key(pmap, key, &ideal_index);
137 	lhmslve_t* pe = &pmap->entries[index];
138 
139 	if (pmap->states[index] == OCCUPIED) {
140 		// Existing key found in chain; put value.
141 		pe->pvvalue = pvvalue;
142 
143 	} else if (pmap->states[index] == EMPTY) {
144 		// End of chain.
145 		pe->ideal_index = ideal_index;
146 		pe->key = key;
147 		pe->free_flags = free_flags;
148 		pe->pvvalue = pvvalue;
149 		pmap->states[index] = OCCUPIED;
150 
151 		if (pmap->phead == NULL) {
152 			pe->pprev   = NULL;
153 			pe->pnext   = NULL;
154 			pmap->phead = pe;
155 			pmap->ptail = pe;
156 		} else {
157 			pe->pprev   = pmap->ptail;
158 			pe->pnext   = NULL;
159 			pmap->ptail->pnext = pe;
160 			pmap->ptail = pe;
161 		}
162 		pmap->num_occupied++;
163 
164 	} else {
165 		fprintf(stderr, "%s: lhmslv_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
166 		exit(1);
167 	}
168 
169 	return pvvalue;
170 }
171 
172 // ----------------------------------------------------------------
lhmslv_get(lhmslv_t * pmap,slls_t * key)173 void* lhmslv_get(lhmslv_t* pmap, slls_t* key) {
174 	int ideal_index = 0;
175 	int index = lhmslv_find_index_for_key(pmap, key, &ideal_index);
176 	lhmslve_t* pe = &pmap->entries[index];
177 
178 	if (pmap->states[index] == OCCUPIED)
179 		return pe->pvvalue;
180 	else if (pmap->states[index] == EMPTY)
181 		return NULL;
182 	else {
183 		fprintf(stderr, "%s: lhmslv_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
184 		exit(1);
185 	}
186 }
187 
188 // ----------------------------------------------------------------
lhmslv_has_key(lhmslv_t * pmap,slls_t * key)189 int lhmslv_has_key(lhmslv_t* pmap, slls_t* key) {
190 	int ideal_index = 0;
191 	int index = lhmslv_find_index_for_key(pmap, key, &ideal_index);
192 
193 	if (pmap->states[index] == OCCUPIED)
194 		return TRUE;
195 	else if (pmap->states[index] == EMPTY)
196 		return FALSE;
197 	else {
198 		fprintf(stderr, "%s: lhmslv_find_index_for_key did not find end of chain\n", MLR_GLOBALS.bargv0);
199 		exit(1);
200 	}
201 }
202 
203 // ----------------------------------------------------------------
lhmslv_size(lhmslv_t * pmap)204 int lhmslv_size(lhmslv_t* pmap) {
205 	return pmap->num_occupied;
206 }
207 
208 // ----------------------------------------------------------------
lhmslv_enlarge(lhmslv_t * pmap)209 static void lhmslv_enlarge(lhmslv_t* pmap) {
210 	lhmslve_t*       old_entries = pmap->entries;
211 	lhmslve_state_t* old_states  = pmap->states;
212 	lhmslve_t*       old_head    = pmap->phead;
213 
214 	lhmslv_init(pmap, pmap->array_length*ENLARGEMENT_FACTOR);
215 
216 	for (lhmslve_t* pe = old_head; pe != NULL; pe = pe->pnext) {
217 		lhmslv_put_no_enlarge(pmap, pe->key, pe->pvvalue, pe->free_flags);
218 	}
219 	free(old_entries);
220 	free(old_states);
221 }
222 
223 // ----------------------------------------------------------------
lhmslv_check_counts(lhmslv_t * pmap)224 int lhmslv_check_counts(lhmslv_t* pmap) {
225 	int nocc = 0;
226 	int ndel = 0;
227 	for (int index = 0; index < pmap->array_length; index++) {
228 		if (pmap->states[index] == OCCUPIED)
229 			nocc++;
230 		else if (pmap->states[index] == DELETED)
231 			ndel++;
232 	}
233 	if (nocc != pmap->num_occupied) {
234 		fprintf(stderr,
235 			"occupancy-count mismatch:  actual %d != cached  %d\n",
236 				nocc, pmap->num_occupied);
237 		return FALSE;
238 	}
239 	if (ndel != pmap->num_freed) {
240 		fprintf(stderr,
241 			"freed-count mismatch:  actual %d != cached  %d\n",
242 				ndel, pmap->num_freed);
243 		return FALSE;
244 	}
245 	return TRUE;
246 }
247 
248 // ----------------------------------------------------------------
get_state_name(int state)249 static char* get_state_name(int state) {
250 	switch(state) {
251 	case OCCUPIED: return "occupied"; break;
252 	case DELETED:  return "freed";  break;
253 	case EMPTY:    return "empty";    break;
254 	default:       return "?????";    break;
255 	}
256 }
257 
lhmslv_print(lhmslv_t * pmap)258 void lhmslv_print(lhmslv_t* pmap) {
259 	for (int index = 0; index < pmap->array_length; index++) {
260 		lhmslve_t* pe = &pmap->entries[index];
261 
262 		const char* key_string = (pe == NULL) ? "none" :
263 			pe->key == NULL ? "null" :
264 			slls_join(pe->key, ",");
265 		const char* value_string = (pe == NULL) ? "none" :
266 			pe->pvvalue == NULL ? "null" :
267 			pe->pvvalue;
268 
269 		printf(
270 		"| stt: %-8s  | idx: %6d | nidx: %6d | key: %12s | pvvalue: %12s |\n",
271 			get_state_name(pmap->states[index]), index, pe->ideal_index, key_string, value_string);
272 	}
273 	printf("+\n");
274 	printf("| phead: %p | ptail %p\n", pmap->phead, pmap->ptail);
275 	printf("+\n");
276 	for (lhmslve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
277 		const char* key_string = (pe == NULL) ? "none" :
278 			pe->key == NULL ? "null" :
279 			slls_join(pe->key, ",");
280 		const char* value_string = (pe == NULL) ? "none" :
281 			pe->pvvalue == NULL ? "null" :
282 			pe->pvvalue;
283 		printf(
284 		"| prev: %p curr: %p next: %p | nidx: %6d | key: %12s | pvvalue: %12s |\n",
285 			pe->pprev, pe, pe->pnext,
286 			pe->ideal_index, key_string, value_string);
287 	}
288 }
289