1 /*
2   Copyright 2011-2016 David Robillard <http://drobilla.net>
3 
4   Permission to use, copy, modify, and/or distribute this software for any
5   purpose with or without fee is hereby granted, provided that the above
6   copyright notice and this permission notice appear in all copies.
7 
8   THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16 
17 #define _XOPEN_SOURCE 500
18 
19 #include <assert.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
25 // #define ZIX_TRIE_LINEAR_SEARCH 1
26 
27 #include "zix/common.h"
28 #include "zix/trie.h"
29 #include "zix/trie_util.h"
30 
31 //typedef uint_fast8_t n_edges_t;
32 typedef uintptr_t n_edges_t;
33 
34 typedef struct {
35 	const uint8_t* str;           /* String stored at this node */
36 	n_edges_t      num_children;  /* Number of outgoing edges */
37 
38 	/** Array of first child characters, followed by parallel array of pointers
39 	    to ZixTrieNode pointers. */
40 	uint8_t children[];
41 } ZixTrieNode;
42 
43 struct _ZixTrie {
44 	ZixTrieNode* root;  /* Root of the tree */
45 };
46 
47 /** Round `size` up to the next multiple of the word size. */
48 ZIX_PRIVATE uint32_t
zix_trie_pad(uint32_t size)49 zix_trie_pad(uint32_t size)
50 {
51 	return (size + sizeof(void*) - 1) & (~(sizeof(void*) - 1));
52 }
53 
54 ZIX_PRIVATE ZixTrieNode**
zix_trie_get_children(ZixTrieNode * node)55 zix_trie_get_children(ZixTrieNode* node)
56 {
57 	return (ZixTrieNode**)(node->children +
58 	                       zix_trie_pad(node->num_children));
59 }
60 
61 ZIX_PRIVATE ZixTrieNode**
zix_trie_get_child_ptr(ZixTrieNode * node,n_edges_t index)62 zix_trie_get_child_ptr(ZixTrieNode* node, n_edges_t index)
63 {
64 	return zix_trie_get_children(node) + index;
65 }
66 
67 ZIX_PRIVATE void
zix_trie_print_rec(ZixTrieNode * node,FILE * fd)68 zix_trie_print_rec(ZixTrieNode* node, FILE* fd)
69 {
70 	if (node->str) {
71 		fprintf(fd, "\t\"%p\" [label=\"%s\"] ;\n", (void*)node, node->str);
72 	} else {
73 		fprintf(fd, "\t\"%p\" [label=\"\"] ;\n", (void*)node);
74 	}
75 
76 	for (n_edges_t i = 0; i < node->num_children; ++i) {
77 		ZixTrieNode* const child = *zix_trie_get_child_ptr(node, i);
78 		zix_trie_print_rec(child, fd);
79 		fprintf(fd, "\t\"%p\" -> \"%p\" [ label = \"%c\" ];\n",
80 		        (void*)node, (void*)child, node->children[i]);
81 	}
82 }
83 
84 ZIX_API
85 void
zix_trie_print_dot(const ZixTrie * t,FILE * fd)86 zix_trie_print_dot(const ZixTrie* t, FILE* fd)
87 {
88 	fprintf(fd, "digraph Trie { \n");
89 	zix_trie_print_rec(t->root, fd);
90 	fprintf(fd, "}\n");
91 }
92 
93 ZIX_PRIVATE size_t
zix_trie_node_size(n_edges_t num_children)94 zix_trie_node_size(n_edges_t num_children)
95 {
96 	return (sizeof(ZixTrieNode) +
97 	        zix_trie_pad(num_children) +
98 	        (num_children * sizeof(ZixTrieNode*)));
99 }
100 
101 ZIX_PRIVATE ZixTrieNode*
realloc_node(ZixTrieNode * n,int num_children)102 realloc_node(ZixTrieNode* n, int num_children)
103 {
104 	return (ZixTrieNode*)realloc(n, zix_trie_node_size(num_children));
105 }
106 
107 ZIX_API ZixTrie*
zix_trie_new(void)108 zix_trie_new(void)
109 {
110 	ZixTrie* t = (ZixTrie*)calloc(1, sizeof(ZixTrie));
111 
112 	t->root = calloc(1, sizeof(ZixTrieNode));
113 
114 	return t;
115 }
116 
117 ZIX_PRIVATE void
zix_trie_free_rec(ZixTrieNode * n)118 zix_trie_free_rec(ZixTrieNode* n)
119 {
120 	if (n) {
121 		for (n_edges_t i = 0; i < n->num_children; ++i) {
122 			zix_trie_free_rec(*zix_trie_get_child_ptr(n, i));
123 		}
124 		free(n);
125 	}
126 }
127 
128 ZIX_API void
zix_trie_free(ZixTrie * t)129 zix_trie_free(ZixTrie* t)
130 {
131 	zix_trie_free_rec(t->root);
132 	free(t);
133 }
134 
135 ZIX_PRIVATE inline bool
trie_is_leaf(const ZixTrieNode * n)136 trie_is_leaf(const ZixTrieNode* n)
137 {
138 	return n->num_children == 0;
139 }
140 
141 ZIX_PRIVATE bool
trie_find_edge(const ZixTrieNode * n,const uint8_t c,n_edges_t * const index)142 trie_find_edge(const ZixTrieNode* n, const uint8_t c, n_edges_t* const index)
143 {
144 	*index = zix_trie_find_key(n->children, n->num_children, c);
145 	return *index < n->num_children && n->children[*index] == c;
146 }
147 
148 #ifndef NDEBUG
149 ZIX_PRIVATE bool
zix_trie_node_check(ZixTrieNode * const n)150 zix_trie_node_check(ZixTrieNode* const n)
151 {
152 	for (n_edges_t i = 0; i < n->num_children; ++i) {
153 		assert(n->children[i] != '\0');
154 		assert(*zix_trie_get_child_ptr(n, i) != NULL);
155 	}
156 	return true;
157 }
158 #endif
159 
160 ZIX_PRIVATE size_t
zix_trie_add_child(ZixTrieNode ** n_ptr,ZixTrieNode * child,const uint8_t first)161 zix_trie_add_child(ZixTrieNode** n_ptr,
162                    ZixTrieNode*  child,
163                    const uint8_t first)
164 {
165 	ZixTrieNode* n = *n_ptr;
166 
167 	n = realloc_node(n, n->num_children + 1);
168 #ifdef ZIX_TRIE_LINEAR_SEARCH
169 	const n_edges_t l = n->num_children;
170 	memmove(n->children + zix_trie_pad(n->num_children + 1),
171 	        n->children + zix_trie_pad(n->num_children),
172 	        n->num_children * sizeof(ZixTrieNode*));
173 	++n->num_children;
174 	n->children[n->num_children - 1]                = first;
175 	*zix_trie_get_child_ptr(n, n->num_children - 1) = child;
176 #else
177 	const n_edges_t l = zix_trie_find_key(n->children, n->num_children, first);
178 	ZixTrieNode*    m = malloc(zix_trie_node_size(n->num_children + 1));
179 	m->str          = n->str;
180 	m->num_children = n->num_children + 1;
181 
182 	memcpy(m->children, n->children, l);
183 	m->children[l] = first;
184 	memcpy(m->children + l + 1, n->children + l, n->num_children - l);
185 
186 	memcpy(zix_trie_get_child_ptr(m, 0),
187 	       zix_trie_get_child_ptr(n, 0),
188 	       l * sizeof(ZixTrieNode*));
189 	*zix_trie_get_child_ptr(m, l) = child;
190 	memcpy(zix_trie_get_child_ptr(m, l + 1),
191 	       zix_trie_get_child_ptr(n, l),
192 	       (n->num_children - l) * sizeof(ZixTrieNode*));
193 	free(n);
194 	n = m;
195 #endif
196 
197 	*n_ptr = n;
198 
199 	return l;
200 
201 	assert(zix_trie_node_check(n));
202 	//assert(zix_trie_node_check(child));
203 }
204 
205 ZIX_PRIVATE ZixTrieNode**
trie_insert_tail(ZixTrieNode ** n_ptr,const uint8_t * str,const uint8_t * first,const size_t len)206 trie_insert_tail(ZixTrieNode**  n_ptr,
207                  const uint8_t* str,
208                  const uint8_t* first,
209                  const size_t   len)
210 {
211 	assert(first[0]);
212 	ZixTrieNode* c = NULL;
213 	while (first[0]) {
214 		assert(zix_trie_node_check(*n_ptr));
215 
216 		c               = realloc_node(NULL, 1);
217 		c->str          = NULL;
218 		c->num_children = 0;
219 
220 		const size_t l = zix_trie_add_child(n_ptr, c, first[0]);
221 		n_ptr = zix_trie_get_child_ptr(*n_ptr, l);
222 		assert(*n_ptr == c);
223 
224 		++first;
225 	}
226 	c->num_children = 0;
227 	c->str = str;
228 	assert(zix_trie_node_check(c));
229 	assert(*n_ptr == c);
230 	return n_ptr;
231 }
232 
233 ZIX_PRIVATE ZixStatus
trie_insert_internal(ZixTrieNode ** n_ptr,const uint8_t * str,const uint8_t * first,const size_t len)234 trie_insert_internal(ZixTrieNode**  n_ptr,
235                      const uint8_t* str,
236                      const uint8_t* first,
237                      const size_t   len)
238 {
239 	assert(zix_trie_node_check(*n_ptr));
240 	ZixTrieNode* n = *n_ptr;
241 	n_edges_t    child_i;
242 	while (trie_find_edge(n, first[0], &child_i)) {
243 		assert(child_i <= n->num_children);
244 		ZixTrieNode** const child_ptr = zix_trie_get_child_ptr(n, child_i);
245 		ZixTrieNode*  const child     = *child_ptr;
246 		assert(child_ptr);
247 
248 		assert(zix_trie_node_check(child));
249 
250 		if (!first[0]) {
251 			/* Reached end of search string */
252 			if (child->str) {
253 				/* Found exact string in trie. */
254 				return ZIX_STATUS_EXISTS;
255 			} else {
256 				/* Set node value to inserted string */
257 				child->str = str;
258 				return ZIX_STATUS_SUCCESS;
259 			}
260 		} else {
261 			/* Didn't reach end of search string */
262 			if (trie_is_leaf(n)) {
263 				/* Hit leaf early, insert underneath */
264 				ZixTrieNode** c = trie_insert_tail(n_ptr, str, first, len);
265 				assert(zix_trie_node_check(*n_ptr));
266 				assert(zix_trie_node_check(*c));
267 				return ZIX_STATUS_SUCCESS;
268 			} else {
269 				/* Match at this node, continue search downwards (or match) */
270 				n_ptr = child_ptr;
271 				assert(*n_ptr);
272 				n = *n_ptr;
273 				assert(n == child);
274 				++first;
275 			}
276 		}
277 	}
278 
279 	if (!first[0]) {
280 		if ((*n_ptr)->str) {
281 			return ZIX_STATUS_EXISTS;
282 		} else {
283 			(*n_ptr)->str = str;
284 			return ZIX_STATUS_SUCCESS;
285 		}
286 	}
287 
288 	/* Hit leaf early, insert underneath */
289 	assert(zix_trie_node_check(*n_ptr));
290 	ZixTrieNode** c = trie_insert_tail(n_ptr, str, first, len);
291 	assert(zix_trie_node_check(*n_ptr));
292 	assert(zix_trie_node_check(*c));
293 	return ZIX_STATUS_SUCCESS;
294 }
295 
296 ZIX_API ZixStatus
zix_trie_insert(ZixTrie * t,const char * str,size_t len)297 zix_trie_insert(ZixTrie* t, const char* str, size_t len)
298 {
299 	assert(strlen(str) == len);
300 
301 	const uint8_t* ustr = (const uint8_t*)str;
302 	return trie_insert_internal(&t->root, ustr, ustr, len);
303 }
304 
305 ZIX_API ZixStatus
zix_trie_find(const ZixTrie * t,const char * const str,const char ** match)306 zix_trie_find(const ZixTrie* t, const char* const str, const char** match)
307 {
308 	ZixTrieNode* n = t->root;
309 	const char*  p = str;
310 	n_edges_t    child_i;
311 
312 	while (trie_find_edge(n, p[0], &child_i)) {
313 		assert(child_i <= n->num_children);
314 		ZixTrieNode* const child = *zix_trie_get_child_ptr(n, child_i);
315 
316 		++p;
317 
318 		if (!*p) {
319 			/* Reached end of search string */
320 			if (child->str) {
321 				/* Found exact string in trie. */
322 				*match = (const char*)child->str;
323 				return ZIX_STATUS_SUCCESS;
324 			} else {
325 				/* Prefix match, but exact string not present. */
326 				return ZIX_STATUS_NOT_FOUND;
327 			}
328 		} else {
329 			/* Didn't reach end of search string */
330 			if (trie_is_leaf(n)) {
331 				/* Hit leaf early, no match */
332 				return ZIX_STATUS_NOT_FOUND;
333 			} else {
334 				/* Match at this node, continue search downwards (or match) */
335 				n = child;
336 			}
337 		}
338 	}
339 
340 	return ZIX_STATUS_NOT_FOUND;
341 }
342