1 /* dict -- hash table of strings indexed by strings
2  *
3  * dict_create(initial_size) -- create a new, empty hash table
4  * dict_find(dict, key) -- return value for key, or NULL if not found
5  * dict_add(dict, key, value) -- add value for key, overwriting if it exists
6  * dict_destroy(dict, key) -- remove value for key
7  * dict_delete(dict) -- remove table, free memory
8  *
9  * The dictionary will automatically expand beyond its initial size if
10  * it gets full. But enlarging a large dictionary can be slow. The
11  * algorithm will try to keep the dictionary less than 50% filled (as
12  * long as there is memory available), so an initial size of more than
13  * twice the total number of keys results in the best performance.
14  *
15  * Part of HTML-XML-utils, see:
16  * http://www.w3.org/Tools/HTML-XML-utils/
17  *
18  * Copyright © 2008 World Wide Web Consortium
19  * See http://www.w3.org/Consortium/Legal/copyright-software
20  *
21  * Author: Bert Bos <bert@w3.org>
22  * Created: 4 Aug 2008
23  */
24 
25 #include <assert.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include "export.h"
29 
30 #define eq(s, t) (*(s) == *(t) && strcmp(s, t) == 0)
31 
32 
33 EXPORT typedef struct _Dictionary * Dictionary;
34 
35 struct _Dictionary {
36   unsigned long size, entries;
37   unsigned long *seqno2index, *index2seqno;
38   char **keys, **values;
39 };
40 
41 
42 /* hash -- compute a hash sum modulo n over a string */
hash(const unsigned long n,const char * s)43 static unsigned long hash(const unsigned long n, const char *s)
44 {
45   unsigned long h = 5381;
46 
47   for (; *s; s++) h = ((h << 5) + h) ^ (unsigned)*s;
48   h = h % n;
49   if (h == 0) h = 1;		/* We don't use index 0 */
50   return h;
51 }
52 
53 
54 /* find -- return index of key in dictionary, 0 if not found */
find(Dictionary d,const char * key)55 static unsigned long find(Dictionary d, const char *key)
56 {
57   unsigned long index0, index, seqno;
58 
59   assert(d);
60   index0 = hash(d->size, key);
61   index = index0;
62   while (1) {
63     seqno = d->index2seqno[index];
64     if (seqno >= d->entries || d->seqno2index[seqno] != index) return 0;
65     if (eq(d->keys[index], key)) return index;
66     index = (index + 1) % d->size;
67     if (index == 0) index = 1;
68     if (index == index0) return 0; /* We've tried all entries */
69   }
70 }
71 
72 
73 /* dict_create -- create a new, empty dictionary; return NULL if no memory */
dict_create(int initial_size)74 EXPORT Dictionary dict_create(int initial_size)
75 {
76   Dictionary d;
77 
78   if (initial_size < 2) initial_size = 2;
79   if (!(d = malloc(sizeof(*d)))) return NULL;
80   d->keys = d->values = NULL;
81   d->index2seqno = d->seqno2index = NULL;
82   if (!(d->keys = malloc(initial_size * sizeof(*(d->keys)))) ||
83       !(d->values = malloc(initial_size * sizeof(*(d->values)))) ||
84       !(d->seqno2index = malloc(initial_size * sizeof(*(d->seqno2index)))) ||
85       !(d->index2seqno = malloc(initial_size * sizeof(*(d->index2seqno))))) {
86     free(d->index2seqno);
87     free(d->seqno2index);
88     free(d->values);
89     free(d->keys);
90     free(d);
91     return NULL;
92   }
93   d->size = initial_size;
94   d->entries = 0;
95   return d;
96 }
97 
98 
99 /* dict_delete -- delete a dictionary, free all allocated memory */
dict_delete(Dictionary d)100 EXPORT void dict_delete(Dictionary d)
101 {
102   unsigned long i;
103 
104   assert(d);
105   for (i = 0; i < d->entries; i++) {
106     free(d->keys[d->seqno2index[i]]);
107     free(d->values[d->seqno2index[i]]);
108   }
109   free(d->keys);
110   free(d->values);
111   free(d->seqno2index);
112   free(d->index2seqno);
113   free(d);
114 }
115 
116 
117 /* dict_destroy_all -- remove all keys and values from dictionary */
dict_destroy_all(Dictionary d)118 EXPORT void dict_destroy_all(Dictionary d)
119 {
120   unsigned long i;
121 
122   assert(d);
123   for (i = 0; i < d->entries; i++) {
124     free(d->keys[d->seqno2index[i]]);
125     free(d->values[d->seqno2index[i]]);
126   }
127   d->entries = 0;
128 }
129 
130 
131 /* dict_destroy -- delete a value from the dictionary */
dict_destroy(Dictionary d,const char * key)132 EXPORT void dict_destroy(Dictionary d, const char *key)
133 {
134   unsigned long index, seqno;
135 
136   assert(d);
137   if ((index = find(d, key)) > 0) {
138     free(d->keys[index]);
139     free(d->values[index]);
140     seqno = d->index2seqno[index];
141     assert(seqno < d->entries);
142     assert(d->entries > 0);
143     d->entries--;
144     d->seqno2index[seqno] = d->seqno2index[d->entries];
145     d->index2seqno[d->seqno2index[seqno]] = seqno;
146   }
147 }
148 
149 
150 /* Forward declaration */
151 static int expand(Dictionary d);
152 
153 
154 /* dict_add -- add a key-value pair to dictionary, return 0 if out of memory */
dict_add(Dictionary d,const char * key,const char * value)155 EXPORT int dict_add(Dictionary d, const char *key, const char *value)
156 {
157   unsigned long index0, index, seqno, found;
158 
159   assert(d);
160   index0 = hash(d->size, key);
161   index = index0;
162   while (1) {
163     seqno = d->index2seqno[index];
164     if (seqno >= d->entries) { found = 0; break; }
165     if (d->seqno2index[seqno] != index) { found = 0; break; }
166     if (eq(d->keys[index], key)) { found = 1; break; }
167     index = (index + 1) % d->size;
168     if (index == 0) index = 1;
169     if (index == index0) {if (!expand(d)) return 0; } /* Dictionary full */
170   }
171   if (found) {
172     free(d->values[index]);
173     d->values[index] = strdup(value);
174     if (!d->values[index]) return 0; /* Out of memory */
175   } else {
176     assert(d->entries < d->size);
177     d->keys[index] = strdup(key);
178     if (!d->keys[index]) return 0; /* Out of memory */
179     d->values[index] = strdup(value);
180     if (!d->values[index]) { free(d->keys[index]); return 0; } /* Out of mem. */
181     d->seqno2index[d->entries] = index;
182     d->index2seqno[index] = d->entries;
183     d->entries++;
184     if (d->entries > d->size/2) (void) expand(d); /* Try expand, fail is OK */
185   }
186   return 1;
187 }
188 
189 
190 /* expand -- make the tables in the dictionary larger, return 0 if out of mem */
expand(Dictionary d)191 static int expand(Dictionary d)
192 {
193   unsigned long i;
194   Dictionary h;
195 
196   assert(d);
197   h = dict_create(2 * d->size);
198   if (!h) return 0;
199 
200   /* Add all old entries to the new dictionary */
201   for (i = 0; i < d->entries; i++) {
202     if (!dict_add(h, d->keys[d->seqno2index[i]],d->values[d->seqno2index[i]])) {
203       dict_delete(h);
204       return 0;			/* Failed */
205     }
206   }
207 
208   /* Succeeded in making a copy, now delete the old entries */
209   for (i = 0; i < d->entries; i++) {
210     free(d->keys[d->seqno2index[i]]);
211     free(d->values[d->seqno2index[i]]);
212   }
213   free(d->keys);
214   free(d->values);
215   free(d->seqno2index);
216   free(d->index2seqno);
217 
218   /* Put the larger arrays from the new dictionary in the old dictionary */
219   d->size = h->size;
220   assert(d->entries == h->entries);
221   d->keys = h->keys;
222   d->values = h->values;
223   d->index2seqno = h->index2seqno;
224   d->seqno2index = h->seqno2index;
225   free(h);
226 
227   return 1;
228 }
229 
230 
231 /* dict_find -- return value associated with key in dictionary, or NULL */
dict_find(Dictionary d,const char * key)232 EXPORT const char* dict_find(Dictionary d, const char* key)
233 {
234   unsigned long i;
235 
236   assert(d);
237   if ((i = find(d, key)) > 0) return d->values[i]; else return NULL;
238 }
239 
240 
241 /* dict_next -- return next key, or 1st key if NULL, or NULL if no more */
dict_next(Dictionary d,const char * prev_key)242 EXPORT const char *dict_next(Dictionary d, const char *prev_key)
243 {
244   unsigned long index, seqno;
245 
246   assert(d);
247   if (d->entries == 0) return NULL; /* Empty dictionary */
248   if (!prev_key) return d->keys[d->seqno2index[0]]; /* Return first key */
249   index = find(d, prev_key);
250   if (index == 0) return NULL;	/* Error, unknown key */
251   seqno = d->index2seqno[index] + 1;
252   if (seqno == d->entries) return NULL; /* No more keys */
253   return d->keys[d->seqno2index[seqno]];
254 }
255