1 // ================================================================
2 // Array-only (open addressing) string-valued hash set with linear probing for
3 // collisions.
4 //
5 // John Kerl 2012-08-13
6 //
7 // Notes:
8 // * null key is not supported.
9 // * null value is supported.
10 //
11 // See also:
12 // * http://en.wikipedia.org/wiki/Hash_table
13 // * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
14 // ================================================================
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "lib/mlr_globals.h"
21 #include "lib/mlrutil.h"
22 #include "containers/hss.h"
23 
24 // ----------------------------------------------------------------
25 #define INITIAL_ARRAY_LENGTH 128
26 #define LOAD_FACTOR          0.7
27 
28 #define OCCUPIED 444
29 #define DELETED  555
30 #define EMPTY    666
31 
32 // ================================================================
hsse_clear(hsse_t * pentry)33 static void hsse_clear(hsse_t *pentry) {
34 	pentry->key         = NULL;
35 	pentry->state       = EMPTY;
36 	pentry->ideal_index = -1;
37 }
38 
39 // ----------------------------------------------------------------
hss_make_alloc_array(int length)40 static hsse_t* hss_make_alloc_array(int length) {
41 	hsse_t* array = (hsse_t*)mlr_malloc_or_die(sizeof(hsse_t) * length);
42 	for (int i = 0; i < length; i++)
43 		hsse_clear(&array[i]);
44 	return array;
45 }
46 
hss_init(hss_t * pset,int length)47 static void hss_init(hss_t *pset, int length) {
48 	pset->num_occupied = 0;
49 	pset->num_freed    = 0;
50 	pset->array_length = length;
51 	pset->array        = hss_make_alloc_array(length);
52 }
53 
hss_alloc()54 hss_t* hss_alloc() {
55 	hss_t* pset = mlr_malloc_or_die(sizeof(hss_t));
56 	hss_init(pset, INITIAL_ARRAY_LENGTH);
57 	return pset;
58 }
59 
hss_free(hss_t * pset)60 void hss_free(hss_t* pset) {
61 	if (pset == NULL)
62 		return;
63 	free(pset->array);
64 	pset->array = NULL;
65 	pset->num_occupied = 0;
66 	pset->num_freed    = 0;
67 	pset->array_length = 0;
68 	free(pset);
69 }
70 
71 // ----------------------------------------------------------------
72 // Used by get() and remove().
73 // Returns >=0 for where the key is *or* should go (end of chain).
hss_find_index_for_key(hss_t * pset,char * key,int * pideal_index)74 static int hss_find_index_for_key(hss_t* pset, char* key, int* pideal_index) {
75 	int hash = mlr_string_hash_func(key);
76 	int index = mlr_canonical_mod(hash, pset->array_length);
77 	*pideal_index = index;
78 	int num_tries = 0;
79 
80 	while (TRUE) {
81 		hsse_t* pe = &pset->array[index];
82 		if (pe->state == OCCUPIED) {
83 			char* ekey = pe->key;
84 			// Existing key found in chain.
85 			if (streq(key, ekey))
86 				return index;
87 		}
88 		else if (pe->state == EMPTY) {
89 			return index;
90 		}
91 
92 		// If the current entry has been freed, i.e. previously occupied,
93 		// the sought index may be further down the chain.  So we must
94 		// continue looking.
95 		if (++num_tries >= pset->array_length) {
96 			fprintf(stderr,
97 				"%s: internal coding error: table full even after enlargement.\n", MLR_GLOBALS.bargv0);
98 			exit(1);
99 		}
100 
101 		// Linear probing.
102 		if (++index >= pset->array_length)
103 			index = 0;
104 	}
105 	MLR_INTERNAL_CODING_ERROR();
106 }
107 
108 // ----------------------------------------------------------------
109 static void hss_enlarge(hss_t* pset);
110 
hss_add(hss_t * pset,char * key)111 void hss_add(hss_t* pset, char* key) {
112 	if ((pset->num_occupied + pset->num_freed) >= (pset->array_length*LOAD_FACTOR))
113 		hss_enlarge(pset);
114 
115 	int ideal_index = 0;
116 	int index = hss_find_index_for_key(pset, key, &ideal_index);
117 	hsse_t* pe = &pset->array[index];
118 
119 	if (pe->state == OCCUPIED) {
120 		// Existing key found in chain. Chaining already handled by hss_find_index_for_key.
121 		return;
122 	}
123 	else if (pe->state == EMPTY) {
124 		// End of chain.
125 		pe->key = key;
126 		pe->state = OCCUPIED;
127 		pe->ideal_index = ideal_index;
128 		pset->num_occupied++;
129 	}
130 	else {
131 		fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
132 		exit(1);
133 	}
134 }
135 
136 // ----------------------------------------------------------------
hss_enlarge(hss_t * pset)137 static void hss_enlarge(hss_t* pset) {
138 	int old_array_length = pset->array_length;
139 	hsse_t* old_array = pset->array;
140 
141 	hss_init(pset, pset->array_length*2);
142 
143 	for (int index = 0; index < old_array_length; index++) {
144 		hsse_t e = old_array[index];
145 		if (e.state == OCCUPIED)
146 			hss_add(pset, e.key);
147 	}
148 
149 	free(old_array);
150 }
151 
152 // ----------------------------------------------------------------
hss_has(hss_t * pset,char * key)153 int hss_has(hss_t* pset, char* key) {
154 	int ideal_index = 0;
155 	int index = hss_find_index_for_key(pset, key, &ideal_index);
156 	hsse_t* pe = &pset->array[index];
157 
158 	if (pe->state == OCCUPIED)
159 		return TRUE;
160 	else if (pe->state == EMPTY)
161 		return FALSE;
162 	else {
163 		fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
164 		exit(1);
165 	}
166 }
167 
168 // ----------------------------------------------------------------
hss_remove(hss_t * pset,char * key)169 void hss_remove(hss_t* pset, char* key) {
170 	int ideal_index = 0;
171 	int index = hss_find_index_for_key(pset, key, &ideal_index);
172 	hsse_t* pe = &pset->array[index];
173 	if (pe->state == OCCUPIED) {
174 		pe->key          = NULL;
175 		pe->state        = DELETED;
176 		pe->ideal_index  = -1;
177 		pset->num_freed++;
178 		pset->num_occupied--;
179 	}
180 	else if (pe->state == EMPTY) {
181 	}
182 	else {
183 		fprintf(stderr, "hss_find_index_for_key did not find end of chain.\n");
184 		exit(1);
185 	}
186 }
187 
188 // ----------------------------------------------------------------
hss_clear(hss_t * pset)189 void hss_clear(hss_t* pset) {
190 	for (int i = 0; i < pset->array_length; i++) {
191 		hsse_clear(&pset->array[i]);
192 	}
193 	pset->num_occupied = 0;
194 	pset->num_freed = 0;
195 }
196 
hss_size(hss_t * pset)197 int hss_size(hss_t* pset) {
198 	return pset->num_occupied;
199 }
200 
201 // ----------------------------------------------------------------
hss_check_counts(hss_t * pset)202 int hss_check_counts(hss_t* pset) {
203 	int nocc = 0;
204 	int ndel = 0;
205 	for (int index = 0; index < pset->array_length; index++) {
206 		hsse_t* pe = &pset->array[index];
207 		if (pe->state == OCCUPIED)
208 			nocc++;
209 		else if (pe->state == DELETED)
210 			ndel++;
211 	}
212 	if (nocc != pset->num_occupied) {
213 		fprintf(stderr,
214 			"occupancy-count mismatch:  actual %d != cached  %d.\n",
215 				nocc, pset->num_occupied);
216 		return FALSE;
217 	}
218 	if (ndel != pset->num_freed) {
219 		fprintf(stderr,
220 			"freed-count mismatch:  actual %d != cached  %d.\n",
221 				ndel, pset->num_freed);
222 		return FALSE;
223 	}
224 	return TRUE;
225 }
226 
227 // ----------------------------------------------------------------
get_state_name(int state)228 static char* get_state_name(int state) {
229 	switch(state) {
230 	case OCCUPIED: return "occupied"; break;
231 	case DELETED:  return "freed";  break;
232 	case EMPTY:    return "empty";    break;
233 	default:       return "?????";    break;
234 	}
235 }
236 
hss_print(hss_t * pset)237 void hss_print(hss_t* pset) {
238 	for (int index = 0; index < pset->array_length; index++) {
239 		hsse_t* pe = &pset->array[index];
240 
241 		const char* key_string = (pe == NULL) ? "none" :
242 			pe->key == NULL ? "null" :
243 			pe->key;
244 
245 		printf(
246 		"| stt: %-8s  | idx: %6d | nidx: %6d | key: %12s |\n",
247 			get_state_name(pe->state), index, pe->ideal_index, key_string);
248 	}
249 }
250