1 /* libwapcaplet.c
2 *
3 * String internment and management tools.
4 *
5 * Copyright 2009 The NetSurf Browser Project.
6 * Daniel Silverstone <dsilvers@netsurf-browser.org>
7 */
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <assert.h>
12
13 #include "libwapcaplet/libwapcaplet.h"
14
15 #ifndef UNUSED
16 #define UNUSED(x) ((x) = (x))
17 #endif
18
19 static inline lwc_hash
lwc__calculate_hash(const char * str,size_t len)20 lwc__calculate_hash(const char *str, size_t len)
21 {
22 lwc_hash z = 0x811c9dc5;
23
24 while (len > 0) {
25 z *= 0x01000193;
26 z ^= *str++;
27 len--;
28 }
29
30 return z;
31 }
32
33 #define STR_OF(str) ((char *)(str + 1))
34 #define CSTR_OF(str) ((const char *)(str + 1))
35
36 #define NR_BUCKETS_DEFAULT (4091)
37
38 typedef struct lwc_context_s {
39 lwc_string ** buckets;
40 lwc_hash bucketcount;
41 } lwc_context;
42
43 static lwc_context *ctx = NULL;
44
45 #define LWC_ALLOC(s) malloc(s)
46 #define LWC_FREE(p) free(p)
47
48 typedef lwc_hash (*lwc_hasher)(const char *, size_t);
49 typedef int (*lwc_strncmp)(const char *, const char *, size_t);
50 typedef void * (*lwc_memcpy)(void * restrict, const void * restrict, size_t);
51
52 static lwc_error
lwc__initialise(void)53 lwc__initialise(void)
54 {
55 if (ctx != NULL)
56 return lwc_error_ok;
57
58 ctx = LWC_ALLOC(sizeof(lwc_context));
59
60 if (ctx == NULL)
61 return lwc_error_oom;
62
63 memset(ctx, 0, sizeof(lwc_context));
64
65 ctx->bucketcount = NR_BUCKETS_DEFAULT;
66 ctx->buckets = LWC_ALLOC(sizeof(lwc_string *) * ctx->bucketcount);
67
68 if (ctx->buckets == NULL) {
69 LWC_FREE(ctx);
70 ctx = NULL;
71 return lwc_error_oom;
72 }
73
74 memset(ctx->buckets, 0, sizeof(lwc_string *) * ctx->bucketcount);
75
76 return lwc_error_ok;
77 }
78
79 static lwc_error
lwc__intern(const char * s,size_t slen,lwc_string ** ret,lwc_hasher hasher,lwc_strncmp compare,lwc_memcpy copy)80 lwc__intern(const char *s, size_t slen,
81 lwc_string **ret,
82 lwc_hasher hasher,
83 lwc_strncmp compare,
84 lwc_memcpy copy)
85 {
86 lwc_hash h;
87 lwc_hash bucket;
88 lwc_string *str;
89 lwc_error eret;
90
91 assert((s != NULL) || (slen == 0));
92 assert(ret);
93
94 if (ctx == NULL) {
95 eret = lwc__initialise();
96 if (eret != lwc_error_ok)
97 return eret;
98 if (ctx == NULL)
99 return lwc_error_oom;
100 }
101
102 h = hasher(s, slen);
103 bucket = h % ctx->bucketcount;
104 str = ctx->buckets[bucket];
105
106 while (str != NULL) {
107 if ((str->hash == h) && (str->len == slen)) {
108 if (compare(CSTR_OF(str), s, slen) == 0) {
109 str->refcnt++;
110 *ret = str;
111 return lwc_error_ok;
112 }
113 }
114 str = str->next;
115 }
116
117 /* Add one for the additional NUL. */
118 *ret = str = LWC_ALLOC(sizeof(lwc_string) + slen + 1);
119
120 if (str == NULL)
121 return lwc_error_oom;
122
123 str->prevptr = &(ctx->buckets[bucket]);
124 str->next = ctx->buckets[bucket];
125 if (str->next != NULL)
126 str->next->prevptr = &(str->next);
127 ctx->buckets[bucket] = str;
128
129 str->len = slen;
130 str->hash = h;
131 str->refcnt = 1;
132 str->insensitive = NULL;
133
134 copy(STR_OF(str), s, slen);
135
136 /* Guarantee NUL termination */
137 STR_OF(str)[slen] = '\0';
138
139 return lwc_error_ok;
140 }
141
142 lwc_error
lwc_intern_string(const char * s,size_t slen,lwc_string ** ret)143 lwc_intern_string(const char *s, size_t slen,
144 lwc_string **ret)
145 {
146 return lwc__intern(s, slen, ret,
147 lwc__calculate_hash,
148 strncmp, (lwc_memcpy)memcpy);
149 }
150
151 lwc_error
lwc_intern_substring(lwc_string * str,size_t ssoffset,size_t sslen,lwc_string ** ret)152 lwc_intern_substring(lwc_string *str,
153 size_t ssoffset, size_t sslen,
154 lwc_string **ret)
155 {
156 assert(str);
157 assert(ret);
158
159 if (ssoffset >= str->len)
160 return lwc_error_range;
161 if ((ssoffset + sslen) > str->len)
162 return lwc_error_range;
163
164 return lwc_intern_string(CSTR_OF(str) + ssoffset, sslen, ret);
165 }
166
167 lwc_error
lwc_string_tolower(lwc_string * str,lwc_string ** ret)168 lwc_string_tolower(lwc_string *str, lwc_string **ret)
169 {
170 assert(str);
171 assert(ret);
172
173 /* Internally make use of knowledge that insensitive strings
174 * are lower case. */
175 if (str->insensitive == NULL) {
176 lwc_error error = lwc__intern_caseless_string(str);
177 if (error != lwc_error_ok) {
178 return error;
179 }
180 }
181
182 *ret = lwc_string_ref(str->insensitive);
183 return lwc_error_ok;
184 }
185
186 void
lwc_string_destroy(lwc_string * str)187 lwc_string_destroy(lwc_string *str)
188 {
189 assert(str);
190
191 *(str->prevptr) = str->next;
192
193 if (str->next != NULL)
194 str->next->prevptr = str->prevptr;
195
196 if (str->insensitive != NULL && str->refcnt == 0)
197 lwc_string_unref(str->insensitive);
198
199 #ifndef NDEBUG
200 memset(str, 0xA5, sizeof(*str) + str->len);
201 #endif
202
203 LWC_FREE(str);
204 }
205
206 /**** Shonky caseless bits ****/
207
208 static inline char
lwc__dolower(const char c)209 lwc__dolower(const char c)
210 {
211 if (c >= 'A' && c <= 'Z')
212 return c + 'a' - 'A';
213 return c;
214 }
215
216 static inline lwc_hash
lwc__calculate_lcase_hash(const char * str,size_t len)217 lwc__calculate_lcase_hash(const char *str, size_t len)
218 {
219 lwc_hash z = 0x811c9dc5;
220
221 while (len > 0) {
222 z *= 0x01000193;
223 z ^= lwc__dolower(*str++);
224 len--;
225 }
226
227 return z;
228 }
229
230 static int
lwc__lcase_strncmp(const char * s1,const char * s2,size_t n)231 lwc__lcase_strncmp(const char *s1, const char *s2, size_t n)
232 {
233 while (n--) {
234 if (*s1++ != lwc__dolower(*s2++))
235 /** @todo Test this somehow? */
236 return 1;
237 }
238 return 0;
239 }
240
241 static void *
lwc__lcase_memcpy(void * restrict _target,const void * restrict _source,size_t n)242 lwc__lcase_memcpy(void *restrict _target, const void *restrict _source, size_t n)
243 {
244 char *restrict target = _target;
245 const char *restrict source = _source;
246
247 while (n--) {
248 *target++ = lwc__dolower(*source++);
249 }
250
251 return _target;
252 }
253
254 lwc_error
lwc__intern_caseless_string(lwc_string * str)255 lwc__intern_caseless_string(lwc_string *str)
256 {
257 assert(str);
258 assert(str->insensitive == NULL);
259
260 return lwc__intern(CSTR_OF(str),
261 str->len, &(str->insensitive),
262 lwc__calculate_lcase_hash,
263 lwc__lcase_strncmp,
264 lwc__lcase_memcpy);
265 }
266
267 /**** Iteration ****/
268
269 void
lwc_iterate_strings(lwc_iteration_callback_fn cb,void * pw)270 lwc_iterate_strings(lwc_iteration_callback_fn cb, void *pw)
271 {
272 lwc_hash n;
273 lwc_string *str;
274 bool found = false;
275
276 if (ctx == NULL)
277 return;
278
279 for (n = 0; n < ctx->bucketcount; ++n) {
280 for (str = ctx->buckets[n]; str != NULL; str = str->next) {
281 found = true;
282 cb(str, pw);
283 }
284 }
285
286 if (found == false) {
287 /* We found no strings, so remove the global context. */
288 free(ctx->buckets);
289 free(ctx);
290 ctx = NULL;
291 }
292 }
293