1 /* libwapcaplet.h
2 *
3 * String internment and management tools.
4 *
5 * Copyright 2009 The NetSurf Browser Project.
6 * Daniel Silverstone <dsilvers@netsurf-browser.org>
7 */
8
9 #ifndef libwapcaplet_h_
10 #define libwapcaplet_h_
11
12 #ifdef __cplusplus
13 extern "C"
14 {
15 #endif
16
17 #include <sys/types.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <assert.h>
22
23 /**
24 * The type of a reference counter used in libwapcaplet.
25 */
26 typedef uint32_t lwc_refcounter;
27
28 /**
29 * The type of a hash value used in libwapcaplet.
30 */
31 typedef uint32_t lwc_hash;
32
33 /**
34 * An interned string.
35 *
36 * NOTE: The contents of this struct are considered *PRIVATE* and may
37 * change in future revisions. Do not rely on them whatsoever.
38 * They're only here at all so that the ref, unref and matches etc can
39 * use them.
40 */
41 typedef struct lwc_string_s {
42 struct lwc_string_s ** prevptr;
43 struct lwc_string_s * next;
44 size_t len;
45 lwc_hash hash;
46 lwc_refcounter refcnt;
47 struct lwc_string_s * insensitive;
48 } lwc_string;
49
50 /**
51 * String iteration function
52 *
53 * @param str A string which has been interned.
54 * @param pw The private pointer for the allocator.
55 */
56 typedef void (*lwc_iteration_callback_fn)(lwc_string *str, void *pw);
57
58 /**
59 * Result codes which libwapcaplet might return.
60 */
61 typedef enum lwc_error_e {
62 lwc_error_ok = 0, /**< No error. */
63 lwc_error_oom = 1, /**< Out of memory. */
64 lwc_error_range = 2 /**< Substring internment out of range. */
65 } lwc_error;
66
67 /**
68 * Intern a string.
69 *
70 * Take a copy of the string data referred to by \a s and \a slen and
71 * intern it. The resulting ::lwc_string can be used for simple and
72 * caseless comparisons by ::lwc_string_isequal and
73 * ::lwc_string_caseless_isequal respectively.
74 *
75 * @param s Pointer to the start of the string to intern.
76 * @param slen Length of the string in characters. (Not including any
77 * terminators)
78 * @param ret Pointer to ::lwc_string pointer to fill out.
79 * @return Result of operation, if not OK then the value pointed
80 * to by \a ret will not be valid.
81 *
82 * @note The memory pointed to by \a s is not referenced by the result.
83 * @note If the string was already present, its reference count is
84 * incremented rather than allocating more memory.
85 *
86 * @note The returned string is currently NULL-terminated but this
87 * will not necessarily be the case in future. Try not to rely
88 * on it.
89 */
90 extern lwc_error lwc_intern_string(const char *s, size_t slen,
91 lwc_string **ret);
92
93 /**
94 * Intern a substring.
95 *
96 * Intern a subsequence of the provided ::lwc_string.
97 *
98 * @param str String to acquire substring from.
99 * @param ssoffset Substring offset into \a str.
100 * @param sslen Substring length.
101 * @param ret Pointer to pointer to ::lwc_string to fill out.
102 * @return Result of operation, if not OK then the value
103 * pointed to by \a ret will not be valid.
104 */
105 extern lwc_error lwc_intern_substring(lwc_string *str,
106 size_t ssoffset, size_t sslen,
107 lwc_string **ret);
108
109 /**
110 * Optain a lowercased lwc_string from given lwc_string.
111 *
112 * @param str String to create lowercase string from.
113 * @param ret Pointer to ::lwc_string pointer to fill out.
114 * @return Result of operation, if not OK then the value pointed
115 * to by \a ret will not be valid.
116 */
117 extern lwc_error lwc_string_tolower(lwc_string *str, lwc_string **ret);
118
119 /**
120 * Increment the reference count on an lwc_string.
121 *
122 * This increases the reference count on the given string. You should
123 * use this when copying a string pointer into a persistent data
124 * structure.
125 *
126 * @verbatim
127 * myobject->str = lwc_string_ref(myparent->str);
128 * @endverbatim
129 *
130 * @param str The string to create another reference to.
131 * @return The string pointer to use in your new data structure.
132 *
133 * @note Use this if copying the string and intending both sides to retain
134 * ownership.
135 */
136 #if defined(STMTEXPR)
137 #define lwc_string_ref(str) ({lwc_string *__lwc_s = (str); assert(__lwc_s != NULL); __lwc_s->refcnt++; __lwc_s;})
138 #else
139 static inline lwc_string *
lwc_string_ref(lwc_string * str)140 lwc_string_ref(lwc_string *str)
141 {
142 assert(str != NULL);
143 str->refcnt++;
144 return str;
145 }
146 #endif
147
148 /**
149 * Release a reference on an lwc_string.
150 *
151 * This decreases the reference count on the given ::lwc_string.
152 *
153 * @param str The string to unref.
154 *
155 * @note If the reference count reaches zero then the string will be
156 * freed. (Ref count of 1 where string is its own insensitve match
157 * will also result in the string being freed.)
158 */
159 #define lwc_string_unref(str) { \
160 lwc_string *__lwc_s = (str); \
161 assert(__lwc_s != NULL); \
162 __lwc_s->refcnt--; \
163 if ((__lwc_s->refcnt == 0) || \
164 ((__lwc_s->refcnt == 1) && (__lwc_s->insensitive == __lwc_s))) \
165 lwc_string_destroy(__lwc_s); \
166 }
167
168 /**
169 * Destroy an unreffed lwc_string.
170 *
171 * This destroys an lwc_string whose reference count indicates that it should be.
172 *
173 * @param str The string to unref.
174 */
175 extern void lwc_string_destroy(lwc_string *str);
176
177 /**
178 * Check if two interned strings are equal.
179 *
180 * @param str1 The first string in the comparison.
181 * @param str2 The second string in the comparison.
182 * @param ret A pointer to a boolean to be filled out with the result.
183 * @return Result of operation, if not ok then value pointed to
184 * by \a ret will not be valid.
185 */
186 #define lwc_string_isequal(str1, str2, ret) \
187 ((*(ret) = ((str1) == (str2))), lwc_error_ok)
188
189 /**
190 * Intern a caseless copy of the passed string.
191 *
192 * @param str The string to intern the caseless copy of.
193 *
194 * @return lwc_error_ok if successful, otherwise the
195 * error code describing the issue.,
196 *
197 * @note This is for "internal" use by the caseless comparison
198 * macro and not for users.
199 */
200 extern lwc_error
201 lwc__intern_caseless_string(lwc_string *str);
202
203 #if defined(STMTEXPR)
204 /**
205 * Check if two interned strings are case-insensitively equal.
206 *
207 * @param _str1 The first string in the comparison.
208 * @param _str2 The second string in the comparison.
209 * @param _ret A pointer to a boolean to be filled out with the result.
210 * @return Result of operation, if not ok then value pointed to by \a ret will
211 * not be valid.
212 */
213 #define lwc_string_caseless_isequal(_str1,_str2,_ret) ({ \
214 lwc_error __lwc_err = lwc_error_ok; \
215 lwc_string *__lwc_str1 = (_str1); \
216 lwc_string *__lwc_str2 = (_str2); \
217 bool *__lwc_ret = (_ret); \
218 \
219 if (__lwc_str1->insensitive == NULL) { \
220 __lwc_err = lwc__intern_caseless_string(__lwc_str1); \
221 } \
222 if (__lwc_err == lwc_error_ok && __lwc_str2->insensitive == NULL) { \
223 __lwc_err = lwc__intern_caseless_string(__lwc_str2); \
224 } \
225 if (__lwc_err == lwc_error_ok) \
226 *__lwc_ret = (__lwc_str1->insensitive == __lwc_str2->insensitive); \
227 __lwc_err; \
228 })
229
230 #else
231 /**
232 * Check if two interned strings are case-insensitively equal.
233 *
234 * @param str1 The first string in the comparison.
235 * @param str2 The second string in the comparison.
236 * @param ret A pointer to a boolean to be filled out with the result.
237 * @return Result of operation, if not ok then value pointed to by \a ret will
238 * not be valid.
239 */
240 static inline lwc_error
lwc_string_caseless_isequal(lwc_string * str1,lwc_string * str2,bool * ret)241 lwc_string_caseless_isequal(lwc_string *str1, lwc_string *str2, bool *ret)
242 {
243 lwc_error err = lwc_error_ok;
244 if (str1->insensitive == NULL) {
245 err = lwc__intern_caseless_string(str1);
246 }
247 if (err == lwc_error_ok && str2->insensitive == NULL) {
248 err = lwc__intern_caseless_string(str2);
249 }
250 if (err == lwc_error_ok)
251 *ret = (str1->insensitive == str2->insensitive);
252 return err;
253 }
254 #endif
255
256 #if defined(STMTEXPR)
257 #define lwc__assert_and_expr(str, expr) ({assert(str != NULL); expr;})
258 #else
259 #define lwc__assert_and_expr(str, expr) (expr)
260 #endif
261
262 /**
263 * Retrieve the data pointer for an interned string.
264 *
265 * @param str The string to retrieve the data pointer for.
266 * @return The C string data pointer for \a str.
267 *
268 * @note The data we point at belongs to the string and will
269 * die with the string. Keep a ref if you need it.
270 * @note You may not rely on the NULL termination of the strings
271 * in future. Any code relying on it currently should be
272 * modified to use ::lwc_string_length if possible.
273 */
274 #define lwc_string_data(str) lwc__assert_and_expr(str, (const char *)((str)+1))
275
276 /**
277 * Retrieve the data length for an interned string.
278 *
279 * @param str The string to retrieve the length of.
280 * @return The length of \a str.
281 */
282 #define lwc_string_length(str) lwc__assert_and_expr(str, (str)->len)
283
284 /**
285 * Retrieve (or compute if unavailable) a hash value for the content of the string.
286 *
287 * @param str The string to get the hash for.
288 * @return The 32 bit hash of \a str.
289 *
290 * @note This API should only be used as a convenient way to retrieve a hash
291 * value for the string. This hash value should not be relied on to be
292 * unique within an invocation of the program, nor should it be relied upon
293 * to be stable between invocations of the program. Never use the hash
294 * value as a way to directly identify the value of the string.
295 */
296 #define lwc_string_hash_value(str) lwc__assert_and_expr(str, (str)->hash)
297
298 /**
299 * Retrieve a hash value for the caseless content of the string.
300 *
301 * @param str The string to get caseless hash value for.
302 * @param hash A pointer to a hash value to be filled out with the result.
303 * @return Result of operation, if not ok then value pointed to by \a ret will
304 * not be valid.
305 */
lwc_string_caseless_hash_value(lwc_string * str,lwc_hash * hash)306 static inline lwc_error lwc_string_caseless_hash_value(
307 lwc_string *str, lwc_hash *hash)
308 {
309 if (str->insensitive == NULL) {
310 lwc_error err = lwc__intern_caseless_string(str);
311 if (err != lwc_error_ok) {
312 return err;
313 }
314 }
315
316 *hash = str->insensitive->hash;
317 return lwc_error_ok;
318 }
319
320
321 /**
322 * Iterate the context and return every string in it.
323 *
324 * If there are no strings found in the context, then this has the
325 * side effect of removing the global context which will reduce the
326 * chances of false-positives on leak checkers.
327 *
328 * @param cb The callback to give the string to.
329 * @param pw The private word for the callback.
330 */
331 extern void lwc_iterate_strings(lwc_iteration_callback_fn cb, void *pw);
332
333 #ifdef __cplusplus
334 }
335 #endif
336
337 #endif /* libwapcaplet_h_ */
338