1 /* libwapcaplet.h
2  *
3  * String internment and management tools.
4  *
5  * Copyright 2009 The NetSurf Browser Project.
6  *		  Daniel Silverstone <dsilvers@netsurf-browser.org>
7  */
8 
9 #ifndef libwapcaplet_h_
10 #define libwapcaplet_h_
11 
12 #ifdef __cplusplus
13 extern "C"
14 {
15 #endif
16 
17 #include <sys/types.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <assert.h>
22 
23 /**
24  * The type of a reference counter used in libwapcaplet.
25  */
26 typedef uint32_t lwc_refcounter;
27 
28 /**
29  * The type of a hash value used in libwapcaplet.
30  */
31 typedef uint32_t lwc_hash;
32 
33 /**
34  * An interned string.
35  *
36  * NOTE: The contents of this struct are considered *PRIVATE* and may
37  * change in future revisions.  Do not rely on them whatsoever.
38  * They're only here at all so that the ref, unref and matches etc can
39  * use them.
40  */
41 typedef struct lwc_string_s {
42         struct lwc_string_s **	prevptr;
43         struct lwc_string_s *	next;
44         size_t		len;
45         lwc_hash	hash;
46         lwc_refcounter	refcnt;
47         struct lwc_string_s *	insensitive;
48 } lwc_string;
49 
50 /**
51  * String iteration function
52  *
53  * @param str A string which has been interned.
54  * @param pw The private pointer for the allocator.
55  */
56 typedef void (*lwc_iteration_callback_fn)(lwc_string *str, void *pw);
57 
58 /**
59  * Result codes which libwapcaplet might return.
60  */
61 typedef enum lwc_error_e {
62 	lwc_error_ok		= 0,	/**< No error. */
63 	lwc_error_oom		= 1,	/**< Out of memory. */
64 	lwc_error_range		= 2	/**< Substring internment out of range. */
65 } lwc_error;
66 
67 /**
68  * Intern a string.
69  *
70  * Take a copy of the string data referred to by \a s and \a slen and
71  * intern it.  The resulting ::lwc_string can be used for simple and
72  * caseless comparisons by ::lwc_string_isequal and
73  * ::lwc_string_caseless_isequal respectively.
74  *
75  * @param s    Pointer to the start of the string to intern.
76  * @param slen Length of the string in characters. (Not including any
77  *	       terminators)
78  * @param ret  Pointer to ::lwc_string pointer to fill out.
79  * @return     Result of operation, if not OK then the value pointed
80  *	       to by \a ret will not be valid.
81  *
82  * @note The memory pointed to by \a s is not referenced by the result.
83  * @note If the string was already present, its reference count is
84  * incremented rather than allocating more memory.
85  *
86  * @note The returned string is currently NULL-terminated but this
87  *	 will not necessarily be the case in future.  Try not to rely
88  *	 on it.
89  */
90 extern lwc_error lwc_intern_string(const char *s, size_t slen,
91                                    lwc_string **ret);
92 
93 /**
94  * Intern a substring.
95  *
96  * Intern a subsequence of the provided ::lwc_string.
97  *
98  * @param str	   String to acquire substring from.
99  * @param ssoffset Substring offset into \a str.
100  * @param sslen	   Substring length.
101  * @param ret	   Pointer to pointer to ::lwc_string to fill out.
102  * @return	   Result of operation, if not OK then the value
103  *		   pointed to by \a ret will not be valid.
104  */
105 extern lwc_error lwc_intern_substring(lwc_string *str,
106                                       size_t ssoffset, size_t sslen,
107                                       lwc_string **ret);
108 
109 /**
110  * Optain a lowercased lwc_string from given lwc_string.
111  *
112  * @param str  String to create lowercase string from.
113  * @param ret  Pointer to ::lwc_string pointer to fill out.
114  * @return     Result of operation, if not OK then the value pointed
115  *             to by \a ret will not be valid.
116  */
117 extern lwc_error lwc_string_tolower(lwc_string *str, lwc_string **ret);
118 
119 /**
120  * Increment the reference count on an lwc_string.
121  *
122  * This increases the reference count on the given string. You should
123  * use this when copying a string pointer into a persistent data
124  * structure.
125  *
126  * @verbatim
127  *   myobject->str = lwc_string_ref(myparent->str);
128  * @endverbatim
129  *
130  * @param str The string to create another reference to.
131  * @return    The string pointer to use in your new data structure.
132  *
133  * @note Use this if copying the string and intending both sides to retain
134  * ownership.
135  */
136 #if defined(STMTEXPR)
137 #define lwc_string_ref(str) ({lwc_string *__lwc_s = (str); assert(__lwc_s != NULL); __lwc_s->refcnt++; __lwc_s;})
138 #else
139 static inline lwc_string *
lwc_string_ref(lwc_string * str)140 lwc_string_ref(lwc_string *str)
141 {
142 	assert(str != NULL);
143 	str->refcnt++;
144 	return str;
145 }
146 #endif
147 
148 /**
149  * Release a reference on an lwc_string.
150  *
151  * This decreases the reference count on the given ::lwc_string.
152  *
153  * @param str The string to unref.
154  *
155  * @note If the reference count reaches zero then the string will be
156  *       freed. (Ref count of 1 where string is its own insensitve match
157  *       will also result in the string being freed.)
158  */
159 #define lwc_string_unref(str) {						\
160 		lwc_string *__lwc_s = (str);				\
161 		assert(__lwc_s != NULL);				\
162 		__lwc_s->refcnt--;						\
163 		if ((__lwc_s->refcnt == 0) ||					\
164 		    ((__lwc_s->refcnt == 1) && (__lwc_s->insensitive == __lwc_s)))	\
165 			lwc_string_destroy(__lwc_s);				\
166 	}
167 
168 /**
169  * Destroy an unreffed lwc_string.
170  *
171  * This destroys an lwc_string whose reference count indicates that it should be.
172  *
173  * @param str The string to unref.
174  */
175 extern void lwc_string_destroy(lwc_string *str);
176 
177 /**
178  * Check if two interned strings are equal.
179  *
180  * @param str1 The first string in the comparison.
181  * @param str2 The second string in the comparison.
182  * @param ret  A pointer to a boolean to be filled out with the result.
183  * @return     Result of operation, if not ok then value pointed to
184  *	       by \a ret will not be valid.
185  */
186 #define lwc_string_isequal(str1, str2, ret) \
187 	((*(ret) = ((str1) == (str2))), lwc_error_ok)
188 
189 /**
190  * Intern a caseless copy of the passed string.
191  *
192  * @param str The string to intern the caseless copy of.
193  *
194  * @return    lwc_error_ok if successful, otherwise the
195  *            error code describing the issue.,
196  *
197  * @note This is for "internal" use by the caseless comparison
198  *       macro and not for users.
199  */
200 extern lwc_error
201 lwc__intern_caseless_string(lwc_string *str);
202 
203 #if defined(STMTEXPR)
204 /**
205  * Check if two interned strings are case-insensitively equal.
206  *
207  * @param _str1 The first string in the comparison.
208  * @param _str2 The second string in the comparison.
209  * @param _ret  A pointer to a boolean to be filled out with the result.
210  * @return Result of operation, if not ok then value pointed to by \a ret will
211  *	    not be valid.
212  */
213 #define lwc_string_caseless_isequal(_str1,_str2,_ret) ({                \
214             lwc_error __lwc_err = lwc_error_ok;                         \
215             lwc_string *__lwc_str1 = (_str1);                           \
216             lwc_string *__lwc_str2 = (_str2);                           \
217             bool *__lwc_ret = (_ret);                                   \
218                                                                         \
219             if (__lwc_str1->insensitive == NULL) {                      \
220                 __lwc_err = lwc__intern_caseless_string(__lwc_str1);    \
221             }                                                           \
222             if (__lwc_err == lwc_error_ok && __lwc_str2->insensitive == NULL) { \
223                 __lwc_err = lwc__intern_caseless_string(__lwc_str2);    \
224             }                                                           \
225             if (__lwc_err == lwc_error_ok)                              \
226                 *__lwc_ret = (__lwc_str1->insensitive == __lwc_str2->insensitive); \
227             __lwc_err;                                                  \
228         })
229 
230 #else
231 /**
232  * Check if two interned strings are case-insensitively equal.
233  *
234  * @param str1 The first string in the comparison.
235  * @param str2 The second string in the comparison.
236  * @param ret  A pointer to a boolean to be filled out with the result.
237  * @return Result of operation, if not ok then value pointed to by \a ret will
238  *         not be valid.
239  */
240 static inline lwc_error
lwc_string_caseless_isequal(lwc_string * str1,lwc_string * str2,bool * ret)241 lwc_string_caseless_isequal(lwc_string *str1, lwc_string *str2, bool *ret)
242 {
243        lwc_error err = lwc_error_ok;
244        if (str1->insensitive == NULL) {
245            err = lwc__intern_caseless_string(str1);
246        }
247        if (err == lwc_error_ok && str2->insensitive == NULL) {
248            err = lwc__intern_caseless_string(str2);
249        }
250        if (err == lwc_error_ok)
251            *ret = (str1->insensitive == str2->insensitive);
252        return err;
253 }
254 #endif
255 
256 #if defined(STMTEXPR)
257 #define lwc__assert_and_expr(str, expr) ({assert(str != NULL); expr;})
258 #else
259 #define lwc__assert_and_expr(str, expr) (expr)
260 #endif
261 
262 /**
263  * Retrieve the data pointer for an interned string.
264  *
265  * @param str The string to retrieve the data pointer for.
266  * @return    The C string data pointer for \a str.
267  *
268  * @note The data we point at belongs to the string and will
269  *	 die with the string. Keep a ref if you need it.
270  * @note You may not rely on the NULL termination of the strings
271  *	 in future.  Any code relying on it currently should be
272  *	 modified to use ::lwc_string_length if possible.
273  */
274 #define lwc_string_data(str) lwc__assert_and_expr(str, (const char *)((str)+1))
275 
276 /**
277  * Retrieve the data length for an interned string.
278  *
279  * @param str The string to retrieve the length of.
280  * @return    The length of \a str.
281  */
282 #define lwc_string_length(str) lwc__assert_and_expr(str, (str)->len)
283 
284 /**
285  * Retrieve (or compute if unavailable) a hash value for the content of the string.
286  *
287  * @param str The string to get the hash for.
288  * @return    The 32 bit hash of \a str.
289  *
290  * @note This API should only be used as a convenient way to retrieve a hash
291  *	 value for the string. This hash value should not be relied on to be
292  *	 unique within an invocation of the program, nor should it be relied upon
293  *	 to be stable between invocations of the program. Never use the hash
294  *	 value as a way to directly identify the value of the string.
295  */
296 #define lwc_string_hash_value(str) lwc__assert_and_expr(str, (str)->hash)
297 
298 /**
299  * Retrieve a hash value for the caseless content of the string.
300  *
301  * @param str   The string to get caseless hash value for.
302  * @param hash  A pointer to a hash value to be filled out with the result.
303  * @return Result of operation, if not ok then value pointed to by \a ret will
304  *      not be valid.
305  */
lwc_string_caseless_hash_value(lwc_string * str,lwc_hash * hash)306 static inline lwc_error lwc_string_caseless_hash_value(
307 	lwc_string *str, lwc_hash *hash)
308 {
309 	if (str->insensitive == NULL) {
310 		lwc_error err = lwc__intern_caseless_string(str);
311 		if (err != lwc_error_ok) {
312 			return err;
313 		}
314 	}
315 
316 	*hash = str->insensitive->hash;
317 	return lwc_error_ok;
318 }
319 
320 
321 /**
322  * Iterate the context and return every string in it.
323  *
324  * If there are no strings found in the context, then this has the
325  * side effect of removing the global context which will reduce the
326  * chances of false-positives on leak checkers.
327  *
328  * @param cb The callback to give the string to.
329  * @param pw The private word for the callback.
330  */
331 extern void lwc_iterate_strings(lwc_iteration_callback_fn cb, void *pw);
332 
333 #ifdef __cplusplus
334 }
335 #endif
336 
337 #endif /* libwapcaplet_h_ */
338