1 /* dict -- hash table of strings indexed by strings
2 *
3 * dict_create(initial_size) -- create a new, empty hash table
4 * dict_find(dict, key) -- return value for key, or NULL if not found
5 * dict_add(dict, key, value) -- add value for key, overwriting if it exists
6 * dict_destroy(dict, key) -- remove value for key
7 * dict_delete(dict) -- remove table, free memory
8 *
9 * The dictionary will automatically expand beyond its initial size if
10 * it gets full. But enlarging a large dictionary can be slow. The
11 * algorithm will try to keep the dictionary less than 50% filled (as
12 * long as there is memory available), so an initial size of more than
13 * twice the total number of keys results in the best performance.
14 *
15 * Part of HTML-XML-utils, see:
16 * http://www.w3.org/Tools/HTML-XML-utils/
17 *
18 * Copyright © 2008 World Wide Web Consortium
19 * See http://www.w3.org/Consortium/Legal/copyright-software
20 *
21 * Author: Bert Bos <bert@w3.org>
22 * Created: 4 Aug 2008
23 */
24
25 #include <assert.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include "export.h"
29
30 #define eq(s, t) (*(s) == *(t) && strcmp(s, t) == 0)
31
32
33 EXPORT typedef struct _Dictionary * Dictionary;
34
35 struct _Dictionary {
36 unsigned long size, entries;
37 unsigned long *seqno2index, *index2seqno;
38 char **keys, **values;
39 };
40
41
42 /* hash -- compute a hash sum modulo n over a string */
hash(const unsigned long n,const char * s)43 static unsigned long hash(const unsigned long n, const char *s)
44 {
45 unsigned long h = 5381;
46
47 for (; *s; s++) h = ((h << 5) + h) ^ (unsigned)*s;
48 h = h % n;
49 if (h == 0) h = 1; /* We don't use index 0 */
50 return h;
51 }
52
53
54 /* find -- return index of key in dictionary, 0 if not found */
find(Dictionary d,const char * key)55 static unsigned long find(Dictionary d, const char *key)
56 {
57 unsigned long index0, index, seqno;
58
59 assert(d);
60 index0 = hash(d->size, key);
61 index = index0;
62 while (1) {
63 seqno = d->index2seqno[index];
64 if (seqno >= d->entries || d->seqno2index[seqno] != index) return 0;
65 if (eq(d->keys[index], key)) return index;
66 index = (index + 1) % d->size;
67 if (index == 0) index = 1;
68 if (index == index0) return 0; /* We've tried all entries */
69 }
70 }
71
72
73 /* dict_create -- create a new, empty dictionary; return NULL if no memory */
dict_create(int initial_size)74 EXPORT Dictionary dict_create(int initial_size)
75 {
76 Dictionary d;
77
78 if (initial_size < 2) initial_size = 2;
79 if (!(d = malloc(sizeof(*d)))) return NULL;
80 d->keys = d->values = NULL;
81 d->index2seqno = d->seqno2index = NULL;
82 if (!(d->keys = malloc(initial_size * sizeof(*(d->keys)))) ||
83 !(d->values = malloc(initial_size * sizeof(*(d->values)))) ||
84 !(d->seqno2index = malloc(initial_size * sizeof(*(d->seqno2index)))) ||
85 !(d->index2seqno = malloc(initial_size * sizeof(*(d->index2seqno))))) {
86 free(d->index2seqno);
87 free(d->seqno2index);
88 free(d->values);
89 free(d->keys);
90 free(d);
91 return NULL;
92 }
93 d->size = initial_size;
94 d->entries = 0;
95 return d;
96 }
97
98
99 /* dict_delete -- delete a dictionary, free all allocated memory */
dict_delete(Dictionary d)100 EXPORT void dict_delete(Dictionary d)
101 {
102 unsigned long i;
103
104 assert(d);
105 for (i = 0; i < d->entries; i++) {
106 free(d->keys[d->seqno2index[i]]);
107 free(d->values[d->seqno2index[i]]);
108 }
109 free(d->keys);
110 free(d->values);
111 free(d->seqno2index);
112 free(d->index2seqno);
113 free(d);
114 }
115
116
117 /* dict_destroy_all -- remove all keys and values from dictionary */
dict_destroy_all(Dictionary d)118 EXPORT void dict_destroy_all(Dictionary d)
119 {
120 unsigned long i;
121
122 assert(d);
123 for (i = 0; i < d->entries; i++) {
124 free(d->keys[d->seqno2index[i]]);
125 free(d->values[d->seqno2index[i]]);
126 }
127 d->entries = 0;
128 }
129
130
131 /* dict_destroy -- delete a value from the dictionary */
dict_destroy(Dictionary d,const char * key)132 EXPORT void dict_destroy(Dictionary d, const char *key)
133 {
134 unsigned long index, seqno;
135
136 assert(d);
137 if ((index = find(d, key)) > 0) {
138 free(d->keys[index]);
139 free(d->values[index]);
140 seqno = d->index2seqno[index];
141 assert(seqno < d->entries);
142 assert(d->entries > 0);
143 d->entries--;
144 d->seqno2index[seqno] = d->seqno2index[d->entries];
145 d->index2seqno[d->seqno2index[seqno]] = seqno;
146 }
147 }
148
149
150 /* Forward declaration */
151 static int expand(Dictionary d);
152
153
154 /* dict_add -- add a key-value pair to dictionary, return 0 if out of memory */
dict_add(Dictionary d,const char * key,const char * value)155 EXPORT int dict_add(Dictionary d, const char *key, const char *value)
156 {
157 unsigned long index0, index, seqno, found;
158
159 assert(d);
160 index0 = hash(d->size, key);
161 index = index0;
162 while (1) {
163 seqno = d->index2seqno[index];
164 if (seqno >= d->entries) { found = 0; break; }
165 if (d->seqno2index[seqno] != index) { found = 0; break; }
166 if (eq(d->keys[index], key)) { found = 1; break; }
167 index = (index + 1) % d->size;
168 if (index == 0) index = 1;
169 if (index == index0) {if (!expand(d)) return 0; } /* Dictionary full */
170 }
171 if (found) {
172 free(d->values[index]);
173 d->values[index] = strdup(value);
174 if (!d->values[index]) return 0; /* Out of memory */
175 } else {
176 assert(d->entries < d->size);
177 d->keys[index] = strdup(key);
178 if (!d->keys[index]) return 0; /* Out of memory */
179 d->values[index] = strdup(value);
180 if (!d->values[index]) { free(d->keys[index]); return 0; } /* Out of mem. */
181 d->seqno2index[d->entries] = index;
182 d->index2seqno[index] = d->entries;
183 d->entries++;
184 if (d->entries > d->size/2) (void) expand(d); /* Try expand, fail is OK */
185 }
186 return 1;
187 }
188
189
190 /* expand -- make the tables in the dictionary larger, return 0 if out of mem */
expand(Dictionary d)191 static int expand(Dictionary d)
192 {
193 unsigned long i;
194 Dictionary h;
195
196 assert(d);
197 h = dict_create(2 * d->size);
198 if (!h) return 0;
199
200 /* Add all old entries to the new dictionary */
201 for (i = 0; i < d->entries; i++) {
202 if (!dict_add(h, d->keys[d->seqno2index[i]],d->values[d->seqno2index[i]])) {
203 dict_delete(h);
204 return 0; /* Failed */
205 }
206 }
207
208 /* Succeeded in making a copy, now delete the old entries */
209 for (i = 0; i < d->entries; i++) {
210 free(d->keys[d->seqno2index[i]]);
211 free(d->values[d->seqno2index[i]]);
212 }
213 free(d->keys);
214 free(d->values);
215 free(d->seqno2index);
216 free(d->index2seqno);
217
218 /* Put the larger arrays from the new dictionary in the old dictionary */
219 d->size = h->size;
220 assert(d->entries == h->entries);
221 d->keys = h->keys;
222 d->values = h->values;
223 d->index2seqno = h->index2seqno;
224 d->seqno2index = h->seqno2index;
225 free(h);
226
227 return 1;
228 }
229
230
231 /* dict_find -- return value associated with key in dictionary, or NULL */
dict_find(Dictionary d,const char * key)232 EXPORT const char* dict_find(Dictionary d, const char* key)
233 {
234 unsigned long i;
235
236 assert(d);
237 if ((i = find(d, key)) > 0) return d->values[i]; else return NULL;
238 }
239
240
241 /* dict_next -- return next key, or 1st key if NULL, or NULL if no more */
dict_next(Dictionary d,const char * prev_key)242 EXPORT const char *dict_next(Dictionary d, const char *prev_key)
243 {
244 unsigned long index, seqno;
245
246 assert(d);
247 if (d->entries == 0) return NULL; /* Empty dictionary */
248 if (!prev_key) return d->keys[d->seqno2index[0]]; /* Return first key */
249 index = find(d, prev_key);
250 if (index == 0) return NULL; /* Error, unknown key */
251 seqno = d->index2seqno[index] + 1;
252 if (seqno == d->entries) return NULL; /* No more keys */
253 return d->keys[d->seqno2index[seqno]];
254 }
255