1 /*
2 Copyright 2011-2016 David Robillard <http://drobilla.net>
3
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
7
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #define _XOPEN_SOURCE 500
18
19 #include <assert.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 // #define ZIX_TRIE_LINEAR_SEARCH 1
26
27 #include "zix/common.h"
28 #include "zix/trie.h"
29 #include "zix/trie_util.h"
30
31 //typedef uint_fast8_t n_edges_t;
32 typedef uintptr_t n_edges_t;
33
34 typedef struct {
35 const uint8_t* str; /* String stored at this node */
36 n_edges_t num_children; /* Number of outgoing edges */
37
38 /** Array of first child characters, followed by parallel array of pointers
39 to ZixTrieNode pointers. */
40 uint8_t children[];
41 } ZixTrieNode;
42
43 struct _ZixTrie {
44 ZixTrieNode* root; /* Root of the tree */
45 };
46
47 /** Round `size` up to the next multiple of the word size. */
48 ZIX_PRIVATE uint32_t
zix_trie_pad(uint32_t size)49 zix_trie_pad(uint32_t size)
50 {
51 return (size + sizeof(void*) - 1) & (~(sizeof(void*) - 1));
52 }
53
54 ZIX_PRIVATE ZixTrieNode**
zix_trie_get_children(ZixTrieNode * node)55 zix_trie_get_children(ZixTrieNode* node)
56 {
57 return (ZixTrieNode**)(node->children +
58 zix_trie_pad(node->num_children));
59 }
60
61 ZIX_PRIVATE ZixTrieNode**
zix_trie_get_child_ptr(ZixTrieNode * node,n_edges_t index)62 zix_trie_get_child_ptr(ZixTrieNode* node, n_edges_t index)
63 {
64 return zix_trie_get_children(node) + index;
65 }
66
67 ZIX_PRIVATE void
zix_trie_print_rec(ZixTrieNode * node,FILE * fd)68 zix_trie_print_rec(ZixTrieNode* node, FILE* fd)
69 {
70 if (node->str) {
71 fprintf(fd, "\t\"%p\" [label=\"%s\"] ;\n", (void*)node, node->str);
72 } else {
73 fprintf(fd, "\t\"%p\" [label=\"\"] ;\n", (void*)node);
74 }
75
76 for (n_edges_t i = 0; i < node->num_children; ++i) {
77 ZixTrieNode* const child = *zix_trie_get_child_ptr(node, i);
78 zix_trie_print_rec(child, fd);
79 fprintf(fd, "\t\"%p\" -> \"%p\" [ label = \"%c\" ];\n",
80 (void*)node, (void*)child, node->children[i]);
81 }
82 }
83
84 ZIX_API
85 void
zix_trie_print_dot(const ZixTrie * t,FILE * fd)86 zix_trie_print_dot(const ZixTrie* t, FILE* fd)
87 {
88 fprintf(fd, "digraph Trie { \n");
89 zix_trie_print_rec(t->root, fd);
90 fprintf(fd, "}\n");
91 }
92
93 ZIX_PRIVATE size_t
zix_trie_node_size(n_edges_t num_children)94 zix_trie_node_size(n_edges_t num_children)
95 {
96 return (sizeof(ZixTrieNode) +
97 zix_trie_pad(num_children) +
98 (num_children * sizeof(ZixTrieNode*)));
99 }
100
101 ZIX_PRIVATE ZixTrieNode*
realloc_node(ZixTrieNode * n,int num_children)102 realloc_node(ZixTrieNode* n, int num_children)
103 {
104 return (ZixTrieNode*)realloc(n, zix_trie_node_size(num_children));
105 }
106
107 ZIX_API ZixTrie*
zix_trie_new(void)108 zix_trie_new(void)
109 {
110 ZixTrie* t = (ZixTrie*)calloc(1, sizeof(ZixTrie));
111
112 t->root = calloc(1, sizeof(ZixTrieNode));
113
114 return t;
115 }
116
117 ZIX_PRIVATE void
zix_trie_free_rec(ZixTrieNode * n)118 zix_trie_free_rec(ZixTrieNode* n)
119 {
120 if (n) {
121 for (n_edges_t i = 0; i < n->num_children; ++i) {
122 zix_trie_free_rec(*zix_trie_get_child_ptr(n, i));
123 }
124 free(n);
125 }
126 }
127
128 ZIX_API void
zix_trie_free(ZixTrie * t)129 zix_trie_free(ZixTrie* t)
130 {
131 zix_trie_free_rec(t->root);
132 free(t);
133 }
134
135 ZIX_PRIVATE inline bool
trie_is_leaf(const ZixTrieNode * n)136 trie_is_leaf(const ZixTrieNode* n)
137 {
138 return n->num_children == 0;
139 }
140
141 ZIX_PRIVATE bool
trie_find_edge(const ZixTrieNode * n,const uint8_t c,n_edges_t * const index)142 trie_find_edge(const ZixTrieNode* n, const uint8_t c, n_edges_t* const index)
143 {
144 *index = zix_trie_find_key(n->children, n->num_children, c);
145 return *index < n->num_children && n->children[*index] == c;
146 }
147
148 #ifndef NDEBUG
149 ZIX_PRIVATE bool
zix_trie_node_check(ZixTrieNode * const n)150 zix_trie_node_check(ZixTrieNode* const n)
151 {
152 for (n_edges_t i = 0; i < n->num_children; ++i) {
153 assert(n->children[i] != '\0');
154 assert(*zix_trie_get_child_ptr(n, i) != NULL);
155 }
156 return true;
157 }
158 #endif
159
160 ZIX_PRIVATE size_t
zix_trie_add_child(ZixTrieNode ** n_ptr,ZixTrieNode * child,const uint8_t first)161 zix_trie_add_child(ZixTrieNode** n_ptr,
162 ZixTrieNode* child,
163 const uint8_t first)
164 {
165 ZixTrieNode* n = *n_ptr;
166
167 n = realloc_node(n, n->num_children + 1);
168 #ifdef ZIX_TRIE_LINEAR_SEARCH
169 const n_edges_t l = n->num_children;
170 memmove(n->children + zix_trie_pad(n->num_children + 1),
171 n->children + zix_trie_pad(n->num_children),
172 n->num_children * sizeof(ZixTrieNode*));
173 ++n->num_children;
174 n->children[n->num_children - 1] = first;
175 *zix_trie_get_child_ptr(n, n->num_children - 1) = child;
176 #else
177 const n_edges_t l = zix_trie_find_key(n->children, n->num_children, first);
178 ZixTrieNode* m = malloc(zix_trie_node_size(n->num_children + 1));
179 m->str = n->str;
180 m->num_children = n->num_children + 1;
181
182 memcpy(m->children, n->children, l);
183 m->children[l] = first;
184 memcpy(m->children + l + 1, n->children + l, n->num_children - l);
185
186 memcpy(zix_trie_get_child_ptr(m, 0),
187 zix_trie_get_child_ptr(n, 0),
188 l * sizeof(ZixTrieNode*));
189 *zix_trie_get_child_ptr(m, l) = child;
190 memcpy(zix_trie_get_child_ptr(m, l + 1),
191 zix_trie_get_child_ptr(n, l),
192 (n->num_children - l) * sizeof(ZixTrieNode*));
193 free(n);
194 n = m;
195 #endif
196
197 *n_ptr = n;
198
199 return l;
200
201 assert(zix_trie_node_check(n));
202 //assert(zix_trie_node_check(child));
203 }
204
205 ZIX_PRIVATE ZixTrieNode**
trie_insert_tail(ZixTrieNode ** n_ptr,const uint8_t * str,const uint8_t * first,const size_t len)206 trie_insert_tail(ZixTrieNode** n_ptr,
207 const uint8_t* str,
208 const uint8_t* first,
209 const size_t len)
210 {
211 assert(first[0]);
212 ZixTrieNode* c = NULL;
213 while (first[0]) {
214 assert(zix_trie_node_check(*n_ptr));
215
216 c = realloc_node(NULL, 1);
217 c->str = NULL;
218 c->num_children = 0;
219
220 const size_t l = zix_trie_add_child(n_ptr, c, first[0]);
221 n_ptr = zix_trie_get_child_ptr(*n_ptr, l);
222 assert(*n_ptr == c);
223
224 ++first;
225 }
226 c->num_children = 0;
227 c->str = str;
228 assert(zix_trie_node_check(c));
229 assert(*n_ptr == c);
230 return n_ptr;
231 }
232
233 ZIX_PRIVATE ZixStatus
trie_insert_internal(ZixTrieNode ** n_ptr,const uint8_t * str,const uint8_t * first,const size_t len)234 trie_insert_internal(ZixTrieNode** n_ptr,
235 const uint8_t* str,
236 const uint8_t* first,
237 const size_t len)
238 {
239 assert(zix_trie_node_check(*n_ptr));
240 ZixTrieNode* n = *n_ptr;
241 n_edges_t child_i;
242 while (trie_find_edge(n, first[0], &child_i)) {
243 assert(child_i <= n->num_children);
244 ZixTrieNode** const child_ptr = zix_trie_get_child_ptr(n, child_i);
245 ZixTrieNode* const child = *child_ptr;
246 assert(child_ptr);
247
248 assert(zix_trie_node_check(child));
249
250 if (!first[0]) {
251 /* Reached end of search string */
252 if (child->str) {
253 /* Found exact string in trie. */
254 return ZIX_STATUS_EXISTS;
255 } else {
256 /* Set node value to inserted string */
257 child->str = str;
258 return ZIX_STATUS_SUCCESS;
259 }
260 } else {
261 /* Didn't reach end of search string */
262 if (trie_is_leaf(n)) {
263 /* Hit leaf early, insert underneath */
264 ZixTrieNode** c = trie_insert_tail(n_ptr, str, first, len);
265 assert(zix_trie_node_check(*n_ptr));
266 assert(zix_trie_node_check(*c));
267 return ZIX_STATUS_SUCCESS;
268 } else {
269 /* Match at this node, continue search downwards (or match) */
270 n_ptr = child_ptr;
271 assert(*n_ptr);
272 n = *n_ptr;
273 assert(n == child);
274 ++first;
275 }
276 }
277 }
278
279 if (!first[0]) {
280 if ((*n_ptr)->str) {
281 return ZIX_STATUS_EXISTS;
282 } else {
283 (*n_ptr)->str = str;
284 return ZIX_STATUS_SUCCESS;
285 }
286 }
287
288 /* Hit leaf early, insert underneath */
289 assert(zix_trie_node_check(*n_ptr));
290 ZixTrieNode** c = trie_insert_tail(n_ptr, str, first, len);
291 assert(zix_trie_node_check(*n_ptr));
292 assert(zix_trie_node_check(*c));
293 return ZIX_STATUS_SUCCESS;
294 }
295
296 ZIX_API ZixStatus
zix_trie_insert(ZixTrie * t,const char * str,size_t len)297 zix_trie_insert(ZixTrie* t, const char* str, size_t len)
298 {
299 assert(strlen(str) == len);
300
301 const uint8_t* ustr = (const uint8_t*)str;
302 return trie_insert_internal(&t->root, ustr, ustr, len);
303 }
304
305 ZIX_API ZixStatus
zix_trie_find(const ZixTrie * t,const char * const str,const char ** match)306 zix_trie_find(const ZixTrie* t, const char* const str, const char** match)
307 {
308 ZixTrieNode* n = t->root;
309 const char* p = str;
310 n_edges_t child_i;
311
312 while (trie_find_edge(n, p[0], &child_i)) {
313 assert(child_i <= n->num_children);
314 ZixTrieNode* const child = *zix_trie_get_child_ptr(n, child_i);
315
316 ++p;
317
318 if (!*p) {
319 /* Reached end of search string */
320 if (child->str) {
321 /* Found exact string in trie. */
322 *match = (const char*)child->str;
323 return ZIX_STATUS_SUCCESS;
324 } else {
325 /* Prefix match, but exact string not present. */
326 return ZIX_STATUS_NOT_FOUND;
327 }
328 } else {
329 /* Didn't reach end of search string */
330 if (trie_is_leaf(n)) {
331 /* Hit leaf early, no match */
332 return ZIX_STATUS_NOT_FOUND;
333 } else {
334 /* Match at this node, continue search downwards (or match) */
335 n = child;
336 }
337 }
338 }
339
340 return ZIX_STATUS_NOT_FOUND;
341 }
342