1 /*
2  * critbit89 - A crit-bit tree implementation for strings in C89
3  * Written by Jonas Gehring <jonas@jgehring.net>
4  * Implemented key-value storing by Marek Vavrusa <marek.vavrusa@nic.cz>
5  * SPDX-License-Identifier: GPL-3.0-or-later
6  *
7  * The code makes the assumption that malloc returns pointers aligned at at
8  * least a two-byte boundary. Since the C standard requires that malloc return
9  * pointers that can store any type, there are no commonly-used toolchains for
10  * which this assumption is false.
11  *
12  * See https://github.com/agl/critbit/blob/master/critbit.pdf for reference.
13  */
14 
15 #include <errno.h>
16 #include <string.h>
17 #include <stdlib.h>
18 
19 #include "map.h"
20 #include "lib/utils.h"
21 
22  /* Exports */
23 #if defined _WIN32 || defined __CYGWIN__
24   #define EXPORT __attribute__ ((dllexport))
25 #else
26   #define EXPORT __attribute__ ((visibility ("default")))
27 #endif
28 
29 #ifdef _MSC_VER /* MSVC */
30  typedef unsigned __int8 uint8_t;
31  typedef unsigned __int32 uint32_t;
32  #ifdef _WIN64
33   typedef signed __int64 intptr_t;
34  #else
35   typedef _W64 signed int intptr_t;
36  #endif
37 #else /* Not MSVC */
38  #include <stdint.h>
39 #endif
40 
41 typedef struct {
42 	void* value;
43 	uint8_t key[];
44 } cb_data_t;
45 
46 typedef struct {
47 	void *child[2];
48 	uint32_t byte;
49 	uint8_t otherbits;
50 } cb_node_t;
51 
52 /* Return true if ptr is internal node. */
ref_is_internal(const uint8_t * p)53 static inline int ref_is_internal(const uint8_t *p)
54 {
55 	return 1 & (intptr_t)p;
56 }
57 
58 /* Get internal node. */
ref_get_internal(uint8_t * p)59 static inline cb_node_t *ref_get_internal(uint8_t *p)
60 {
61 	return (cb_node_t *)(p - 1);
62 }
63 
64 /* Static helper functions */
cbt_traverse_delete(map_t * map,void * top)65 static void cbt_traverse_delete(map_t *map, void *top)
66 {
67 	uint8_t *p = top;
68 	if (ref_is_internal(p)) {
69 		cb_node_t *q = ref_get_internal(p);
70 		cbt_traverse_delete(map, q->child[0]);
71 		cbt_traverse_delete(map, q->child[1]);
72 		mm_free(map->pool, q);
73 	} else {
74 		mm_free(map->pool, p);
75 	}
76 }
77 
cbt_traverse_prefixed(void * top,int (* callback)(const char *,void *,void *),void * baton)78 static int cbt_traverse_prefixed(void *top,
79 	int (*callback)(const char *, void *, void *), void *baton)
80 {
81 	uint8_t *p = top;
82 	cb_data_t *x = (cb_data_t *)top;
83 
84 	if (ref_is_internal(p)) {
85 		cb_node_t *q = ref_get_internal(p);
86 		int ret = 0;
87 
88 		ret = cbt_traverse_prefixed(q->child[0], callback, baton);
89 		if (ret != 0) {
90 			return ret;
91 		}
92 		ret = cbt_traverse_prefixed(q->child[1], callback, baton);
93 		if (ret != 0) {
94 			return ret;
95 		}
96 		return 0;
97 	}
98 
99 	return (callback)((const char *)x->key, x->value, baton);
100 }
101 
cbt_make_data(map_t * map,const uint8_t * str,size_t len,void * value)102 static cb_data_t *cbt_make_data(map_t *map, const uint8_t *str, size_t len, void *value)
103 {
104 	cb_data_t *x = mm_alloc(map->pool, sizeof(cb_data_t) + len);
105 	if (x != NULL) {
106 		x->value = value;
107 		memcpy(x->key, str, len);
108 	}
109 	return x;
110 }
111 
112 /*! Like map_contains, but also set the value, if passed and found. */
cbt_get(map_t * map,const char * str,void ** value)113 static int cbt_get(map_t *map, const char *str, void **value)
114 {
115 	const uint8_t *ubytes = (void *)str;
116 	const size_t ulen = strlen(str);
117 	uint8_t *p = map->root;
118 	cb_data_t *x = NULL;
119 
120 	if (p == NULL) {
121 		return 0;
122 	}
123 
124 	while (ref_is_internal(p)) {
125 		cb_node_t *q = ref_get_internal(p);
126 		uint8_t c = 0;
127 		int direction;
128 
129 		if (q->byte < ulen) {
130 			c = ubytes[q->byte];
131 		}
132 		direction = (1 + (q->otherbits | c)) >> 8;
133 
134 		p = q->child[direction];
135 	}
136 
137 	x = (cb_data_t *)p;
138 	if (strcmp(str, (const char *)x->key) == 0) {
139 		if (value != NULL) {
140 			*value = x->value;
141 		}
142 		return 1;
143 	}
144 
145 	return 0;
146 }
147 
148 /*! Returns non-zero if map contains str */
map_contains(map_t * map,const char * str)149 EXPORT int map_contains(map_t *map, const char *str)
150 {
151 	return cbt_get(map, str, NULL);
152 }
153 
map_get(map_t * map,const char * str)154 EXPORT void *map_get(map_t *map, const char *str)
155 {
156 	void *v = NULL;
157 	cbt_get(map, str, &v);
158 	return v;
159 }
160 
map_set(map_t * map,const char * str,void * val)161 EXPORT int map_set(map_t *map, const char *str, void *val)
162 {
163 	const uint8_t *const ubytes = (void *)str;
164 	const size_t ulen = strlen(str);
165 	uint8_t *p = map->root;
166 	uint8_t c = 0, *x = NULL;
167 	uint32_t newbyte = 0;
168 	uint32_t newotherbits = 0;
169 	int direction = 0, newdirection = 0;
170 	cb_node_t *newnode = NULL;
171 	cb_data_t *data = NULL;
172 	void **wherep = NULL;
173 
174 	if (p == NULL) {
175 		map->root = cbt_make_data(map, (const uint8_t *)str, ulen + 1, val);
176 		if (map->root == NULL) {
177 			return ENOMEM;
178 		}
179 		return 0;
180 	}
181 
182 	while (ref_is_internal(p)) {
183 		cb_node_t *q = ref_get_internal(p);
184 		c = 0;
185 		if (q->byte < ulen) {
186 			c = ubytes[q->byte];
187 		}
188 		direction = (1 + (q->otherbits | c)) >> 8;
189 
190 		p = q->child[direction];
191 	}
192 
193 	data = (cb_data_t *)p;
194 	for (newbyte = 0; newbyte < ulen; ++newbyte) {
195 		if (data->key[newbyte] != ubytes[newbyte]) {
196 			newotherbits = data->key[newbyte] ^ ubytes[newbyte];
197 			goto different_byte_found;
198 		}
199 	}
200 
201 	if (data->key[newbyte] != 0) {
202 		newotherbits = data->key[newbyte];
203 		goto different_byte_found;
204 	}
205 	data->value = val;
206 	return 1;
207 
208 different_byte_found:
209 	newotherbits |= newotherbits >> 1;
210 	newotherbits |= newotherbits >> 2;
211 	newotherbits |= newotherbits >> 4;
212 	newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
213 	c = data->key[newbyte];
214 	newdirection = (1 + (newotherbits | c)) >> 8;
215 
216 	newnode = mm_alloc(map->pool, sizeof(cb_node_t));
217 	if (newnode == NULL) {
218 		return ENOMEM;
219 	}
220 
221 	x = (uint8_t *)cbt_make_data(map, ubytes, ulen + 1, val);
222 	if (x == NULL) {
223 		mm_free(map->pool, newnode);
224 		return ENOMEM;
225 	}
226 
227 	newnode->byte = newbyte;
228 	newnode->otherbits = newotherbits;
229 	newnode->child[1 - newdirection] = x;
230 
231 	/* Insert into map */
232 	wherep = &map->root;
233 	for (;;) {
234 		cb_node_t *q;
235 		p = *wherep;
236 		if (!ref_is_internal(p)) {
237 			break;
238 		}
239 
240 		q = ref_get_internal(p);
241 		if (q->byte > newbyte) {
242 			break;
243 		}
244 		if (q->byte == newbyte && q->otherbits > newotherbits) {
245 			break;
246 		}
247 
248 		c = 0;
249 		if (q->byte < ulen) {
250 			c = ubytes[q->byte];
251 		}
252 		direction = (1 + (q->otherbits | c)) >> 8;
253 		wherep = q->child + direction;
254 	}
255 
256 	newnode->child[newdirection] = *wherep;
257 	*wherep = (void *)(1 + (char *)newnode);
258 	return 0;
259 }
260 
261 /*! Deletes str from the map, returns 0 on success */
map_del(map_t * map,const char * str)262 EXPORT int map_del(map_t *map, const char *str)
263 {
264 	const uint8_t *ubytes = (void *)str;
265 	const size_t ulen = strlen(str);
266 	uint8_t *p = map->root;
267 	void **wherep = NULL, **whereq = NULL;
268 	cb_node_t *q = NULL;
269 	cb_data_t *data = NULL;
270 	int direction = 0;
271 
272 	if (map->root == NULL) {
273 		return 1;
274 	}
275 	wherep = &map->root;
276 
277 	while (ref_is_internal(p)) {
278 		uint8_t c = 0;
279 		whereq = wherep;
280 		q = ref_get_internal(p);
281 
282 		if (q->byte < ulen) {
283 			c = ubytes[q->byte];
284 		}
285 		direction = (1 + (q->otherbits | c)) >> 8;
286 		wherep = q->child + direction;
287 		p = *wherep;
288 	}
289 
290 	data = (cb_data_t *)p;
291 	if (strcmp(str, (const char *)data->key) != 0) {
292 		return 1;
293 	}
294 	mm_free(map->pool, p);
295 
296 	if (!whereq) {
297 		map->root = NULL;
298 		return 0;
299 	}
300 
301 	*whereq = q->child[1 - direction];
302 	mm_free(map->pool, q);
303 	return 0;
304 }
305 
306 /*! Clears the given map */
map_clear(map_t * map)307 EXPORT void map_clear(map_t *map)
308 {
309 	if (map->root) {
310 		cbt_traverse_delete(map, map->root);
311 	}
312 	map->root = NULL;
313 }
314 
315 /*! Calls callback for all strings in map with the given prefix */
map_walk_prefixed(map_t * map,const char * prefix,int (* callback)(const char *,void *,void *),void * baton)316 EXPORT int map_walk_prefixed(map_t *map, const char *prefix,
317 	int (*callback)(const char *, void *, void *), void *baton)
318 {
319 	if (!map) {
320 		return 0;
321 	}
322 
323 	const uint8_t *ubytes = (void *)prefix;
324 	const size_t ulen = strlen(prefix);
325 	uint8_t *p = map->root;
326 	uint8_t *top = p;
327 	cb_data_t *data = NULL;
328 
329 	if (p == NULL) {
330 		return 0;
331 	}
332 
333 	while (ref_is_internal(p)) {
334 		cb_node_t *q = ref_get_internal(p);
335 		uint8_t c = 0;
336 		int direction;
337 
338 		if (q->byte < ulen) {
339 			c = ubytes[q->byte];
340 		}
341 		direction = (1 + (q->otherbits | c)) >> 8;
342 
343 		p = q->child[direction];
344 		if (q->byte < ulen) {
345 			top = p;
346 		}
347 	}
348 
349 	data = (cb_data_t *)p;
350 	if (strlen((const char *)data->key) < ulen || memcmp(data->key, prefix, ulen) != 0) {
351 		return 0; /* No strings match */
352 	}
353 
354 	return cbt_traverse_prefixed(top, callback, baton);
355 }
356