1 /****************************************************************************
2 * bfs *
3 * Copyright (C) 2019 Tavian Barnes <tavianator@tavianator.com> *
4 * *
5 * Permission to use, copy, modify, and/or distribute this software for any *
6 * purpose with or without fee is hereby granted. *
7 * *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES *
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF *
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR *
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES *
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN *
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF *
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *
15 ****************************************************************************/
16
17 /**
18 * This is an implementation of a "qp trie," as documented at
19 * https://dotat.at/prog/qp/README.html
20 *
21 * An uncompressed trie over the dataset {AAAA, AADD, ABCD, DDAA, DDDD} would
22 * look like
23 *
24 * A A A A
25 * *--->*--->*--->*--->$
26 * | | | D D
27 * | | +--->*--->$
28 * | | B C D
29 * | +--->*--->*--->$
30 * | D D A A
31 * +--->*--->*--->*--->$
32 * | D D
33 * +--->*--->$
34 *
35 * A compressed (PATRICIA) trie collapses internal nodes that have only a single
36 * child, like this:
37 *
38 * A A AA
39 * *--->*--->*---->$
40 * | | | DD
41 * | | +---->$
42 * | | BCD
43 * | +----->$
44 * | DD AA
45 * +---->*---->$
46 * | DD
47 * +---->$
48 *
49 * The nodes can be compressed further by dropping the actual compressed
50 * sequences from the nodes, storing it only in the leaves. This is the
51 * technique applied in QP tries, and the crit-bit trees that inspired them
52 * (https://cr.yp.to/critbit.html). Only the index to test, and the values to
53 * branch on, need to be stored in each node.
54 *
55 * A A A
56 * 0--->1--->2--->AAAA
57 * | | | D
58 * | | +--->AADD
59 * | | B
60 * | +--->ABCD
61 * | D A
62 * +--->2--->DDAA
63 * | D
64 * +--->DDDD
65 *
66 * Nodes are represented very compactly. Rather than a dense array of children,
67 * a sparse array of only the non-NULL children directly follows the node in
68 * memory. A bitmap is used to track which children exist; the index of a child
69 * i is found by counting the number of bits below bit i that are set. A tag
70 * bit is used to tell pointers to internal nodes apart from pointers to leaves.
71 *
72 * This implementation tests a whole nibble (half byte/hex digit) at every
73 * branch, so the bitmap takes up 16 bits. The remainder of a machine word is
74 * used to hold the offset, which severely constrains its range on 32-bit
75 * platforms. As a workaround, we store relative instead of absolute offsets,
76 * and insert intermediate singleton "jump" nodes when necessary.
77 */
78
79 #include "trie.h"
80 #include "util.h"
81 #include <assert.h>
82 #include <limits.h>
83 #include <stdbool.h>
84 #include <stdint.h>
85 #include <stdlib.h>
86 #include <string.h>
87
88 #if CHAR_BIT != 8
89 # error "This trie implementation assumes 8-bit bytes."
90 #endif
91
92 /** Number of bits for the sparse array bitmap, aka the range of a nibble. */
93 #define BITMAP_BITS 16
94 /** The number of remaining bits in a word, to hold the offset. */
95 #define OFFSET_BITS (sizeof(size_t)*CHAR_BIT - BITMAP_BITS)
96 /** The highest representable offset (only 64k on a 32-bit architecture). */
97 #define OFFSET_MAX (((size_t)1 << OFFSET_BITS) - 1)
98
99 /**
100 * An internal node of the trie.
101 */
102 struct trie_node {
103 /**
104 * A bitmap that hold which indices exist in the sparse children array.
105 * Bit i will be set if a child exists at logical index i, and its index
106 * into the array will be popcount(bitmap & ((1 << i) - 1)).
107 */
108 size_t bitmap : BITMAP_BITS;
109
110 /**
111 * The offset into the key in nibbles. This is relative to the parent
112 * node, to support offsets larger than OFFSET_MAX.
113 */
114 size_t offset : OFFSET_BITS;
115
116 /**
117 * Flexible array of children. Each pointer uses the lowest bit as a
118 * tag to distinguish internal nodes from leaves. This is safe as long
119 * as all dynamic allocations are aligned to more than a single byte.
120 */
121 uintptr_t children[];
122 };
123
124 /** Check if an encoded pointer is to a leaf. */
trie_is_leaf(uintptr_t ptr)125 static bool trie_is_leaf(uintptr_t ptr) {
126 return ptr & 1;
127 }
128
129 /** Decode a pointer to a leaf. */
trie_decode_leaf(uintptr_t ptr)130 static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
131 assert(trie_is_leaf(ptr));
132 return (struct trie_leaf *)(ptr ^ 1);
133 }
134
135 /** Encode a pointer to a leaf. */
trie_encode_leaf(const struct trie_leaf * leaf)136 static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
137 uintptr_t ptr = (uintptr_t)leaf ^ 1;
138 assert(trie_is_leaf(ptr));
139 return ptr;
140 }
141
142 /** Decode a pointer to an internal node. */
trie_decode_node(uintptr_t ptr)143 static struct trie_node *trie_decode_node(uintptr_t ptr) {
144 assert(!trie_is_leaf(ptr));
145 return (struct trie_node *)ptr;
146 }
147
148 /** Encode a pointer to an internal node. */
trie_encode_node(const struct trie_node * node)149 static uintptr_t trie_encode_node(const struct trie_node *node) {
150 uintptr_t ptr = (uintptr_t)node;
151 assert(!trie_is_leaf(ptr));
152 return ptr;
153 }
154
trie_init(struct trie * trie)155 void trie_init(struct trie *trie) {
156 trie->root = 0;
157 }
158
159 /** Compute the popcount (Hamming weight) of a bitmap. */
trie_popcount(unsigned int n)160 static unsigned int trie_popcount(unsigned int n) {
161 #if __POPCNT__
162 // Use the x86 instruction if we have it. Otherwise, GCC generates a
163 // library call, so use the below implementation instead.
164 return __builtin_popcount(n);
165 #else
166 // See https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
167 n -= (n >> 1) & 0x5555;
168 n = (n & 0x3333) + ((n >> 2) & 0x3333);
169 n = (n + (n >> 4)) & 0x0F0F;
170 n = (n + (n >> 8)) & 0xFF;
171 return n;
172 #endif
173 }
174
175 /** Extract the nibble at a certain offset from a byte sequence. */
trie_key_nibble(const void * key,size_t offset)176 static unsigned char trie_key_nibble(const void *key, size_t offset) {
177 const unsigned char *bytes = key;
178 size_t byte = offset >> 1;
179
180 // A branchless version of
181 // if (offset & 1) {
182 // return bytes[byte] >> 4;
183 // } else {
184 // return bytes[byte] & 0xF;
185 // }
186 unsigned int shift = (offset & 1) << 2;
187 return (bytes[byte] >> shift) & 0xF;
188 }
189
190 /**
191 * Finds a leaf in the trie that matches the key at every branch. If the key
192 * exists in the trie, the representative will match the searched key. But
193 * since only branch points are tested, it can be different from the key. In
194 * that case, the first mismatch between the key and the representative will be
195 * the depth at which to make a new branch to insert the key.
196 */
trie_representative(const struct trie * trie,const void * key,size_t length)197 static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
198 uintptr_t ptr = trie->root;
199 if (!ptr) {
200 return NULL;
201 }
202
203 size_t offset = 0;
204 while (!trie_is_leaf(ptr)) {
205 struct trie_node *node = trie_decode_node(ptr);
206 offset += node->offset;
207
208 unsigned int index = 0;
209 if ((offset >> 1) < length) {
210 unsigned char nibble = trie_key_nibble(key, offset);
211 unsigned int bit = 1U << nibble;
212 if (node->bitmap & bit) {
213 index = trie_popcount(node->bitmap & (bit - 1));
214 }
215 }
216 ptr = node->children[index];
217 }
218
219 return trie_decode_leaf(ptr);
220 }
221
trie_first_leaf(const struct trie * trie)222 struct trie_leaf *trie_first_leaf(const struct trie *trie) {
223 return trie_representative(trie, NULL, 0);
224 }
225
trie_find_str(const struct trie * trie,const char * key)226 struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) {
227 return trie_find_mem(trie, key, strlen(key) + 1);
228 }
229
trie_find_mem(const struct trie * trie,const void * key,size_t length)230 struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) {
231 struct trie_leaf *rep = trie_representative(trie, key, length);
232 if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) {
233 return rep;
234 } else {
235 return NULL;
236 }
237 }
238
trie_find_postfix(const struct trie * trie,const char * key)239 struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) {
240 size_t length = strlen(key);
241 struct trie_leaf *rep = trie_representative(trie, key, length + 1);
242 if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) {
243 return rep;
244 } else {
245 return NULL;
246 }
247 }
248
249 /**
250 * Find a leaf that may end at the current node.
251 */
trie_terminal_leaf(const struct trie_node * node)252 static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) {
253 // Finding a terminating NUL byte may take two nibbles
254 for (int i = 0; i < 2; ++i) {
255 if (!(node->bitmap & 1)) {
256 break;
257 }
258
259 uintptr_t ptr = node->children[0];
260 if (trie_is_leaf(ptr)) {
261 return trie_decode_leaf(ptr);
262 } else {
263 node = trie_decode_node(ptr);
264 }
265 }
266
267 return NULL;
268 }
269
270 /** Check if a leaf is a prefix of a search key. */
trie_check_prefix(struct trie_leaf * leaf,size_t skip,const char * key,size_t length)271 static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *key, size_t length) {
272 if (leaf && leaf->length <= length) {
273 return memcmp(key + skip, leaf->key + skip, leaf->length - skip - 1) == 0;
274 } else {
275 return false;
276 }
277 }
278
trie_find_prefix(const struct trie * trie,const char * key)279 struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) {
280 uintptr_t ptr = trie->root;
281 if (!ptr) {
282 return NULL;
283 }
284
285 struct trie_leaf *best = NULL;
286 size_t skip = 0;
287 size_t length = strlen(key) + 1;
288
289 size_t offset = 0;
290 while (!trie_is_leaf(ptr)) {
291 struct trie_node *node = trie_decode_node(ptr);
292 offset += node->offset;
293 if ((offset >> 1) >= length) {
294 return best;
295 }
296
297 struct trie_leaf *leaf = trie_terminal_leaf(node);
298 if (trie_check_prefix(leaf, skip, key, length)) {
299 best = leaf;
300 skip = offset >> 1;
301 }
302
303 unsigned char nibble = trie_key_nibble(key, offset);
304 unsigned int bit = 1U << nibble;
305 if (node->bitmap & bit) {
306 unsigned int index = trie_popcount(node->bitmap & (bit - 1));
307 ptr = node->children[index];
308 } else {
309 return best;
310 }
311 }
312
313 struct trie_leaf *leaf = trie_decode_leaf(ptr);
314 if (trie_check_prefix(leaf, skip, key, length)) {
315 best = leaf;
316 }
317
318 return best;
319 }
320
321 /** Create a new leaf, holding a copy of the given key. */
new_trie_leaf(const void * key,size_t length)322 static struct trie_leaf *new_trie_leaf(const void *key, size_t length) {
323 struct trie_leaf *leaf = malloc(BFS_FLEX_SIZEOF(struct trie_leaf, key, length));
324 if (leaf) {
325 leaf->value = NULL;
326 leaf->length = length;
327 memcpy(leaf->key, key, length);
328 }
329 return leaf;
330 }
331
332 /** Compute the size of a trie node with a certain number of children. */
trie_node_size(unsigned int size)333 static size_t trie_node_size(unsigned int size) {
334 // Empty nodes aren't supported
335 assert(size > 0);
336 // Node size must be a power of two
337 assert((size & (size - 1)) == 0);
338
339 return BFS_FLEX_SIZEOF(struct trie_node, children, size);
340 }
341
342 /** Find the offset of the first nibble that differs between two keys. */
trie_key_mismatch(const void * key1,const void * key2,size_t length)343 static size_t trie_key_mismatch(const void *key1, const void *key2, size_t length) {
344 const unsigned char *bytes1 = key1;
345 const unsigned char *bytes2 = key2;
346 size_t i = 0;
347 size_t offset = 0;
348 const size_t chunk = sizeof(size_t);
349
350 for (; i + chunk <= length; i += chunk) {
351 if (memcmp(bytes1 + i, bytes2 + i, chunk) != 0) {
352 break;
353 }
354 }
355
356 for (; i < length; ++i) {
357 unsigned char b1 = bytes1[i], b2 = bytes2[i];
358 if (b1 != b2) {
359 offset = (b1 & 0xF) == (b2 & 0xF);
360 break;
361 }
362 }
363
364 offset |= i << 1;
365 return offset;
366 }
367
368 /**
369 * Insert a key into a node. The node must not have a child in that position
370 * already. Effectively takes a subtrie like this:
371 *
372 * ptr
373 * |
374 * v X
375 * *--->...
376 * | Z
377 * +--->...
378 *
379 * and transforms it to:
380 *
381 * ptr
382 * |
383 * v X
384 * *--->...
385 * | Y
386 * +--->key
387 * | Z
388 * +--->...
389 */
trie_node_insert(uintptr_t * ptr,const void * key,size_t length,size_t offset)390 static struct trie_leaf *trie_node_insert(uintptr_t *ptr, const void *key, size_t length, size_t offset) {
391 struct trie_node *node = trie_decode_node(*ptr);
392 unsigned int size = trie_popcount(node->bitmap);
393
394 // Double the capacity every power of two
395 if ((size & (size - 1)) == 0) {
396 node = realloc(node, trie_node_size(2*size));
397 if (!node) {
398 return NULL;
399 }
400 *ptr = trie_encode_node(node);
401 }
402
403 struct trie_leaf *leaf = new_trie_leaf(key, length);
404 if (!leaf) {
405 return NULL;
406 }
407
408 unsigned char nibble = trie_key_nibble(key, offset);
409 unsigned int bit = 1U << nibble;
410
411 // The child must not already be present
412 assert(!(node->bitmap & bit));
413 node->bitmap |= bit;
414
415 unsigned int index = trie_popcount(node->bitmap & (bit - 1));
416 uintptr_t *child = node->children + index;
417 if (index < size) {
418 memmove(child + 1, child, (size - index)*sizeof(*child));
419 }
420 *child = trie_encode_leaf(leaf);
421 return leaf;
422 }
423
424 /**
425 * When the current offset exceeds OFFSET_MAX, insert "jump" nodes that bridge
426 * the gap. This function takes a subtrie like this:
427 *
428 * ptr
429 * |
430 * v
431 * *--->rep
432 *
433 * and changes it to:
434 *
435 * ptr ret
436 * | |
437 * v v
438 * *--->*--->rep
439 *
440 * so that a new key can be inserted like:
441 *
442 * ptr ret
443 * | |
444 * v v X
445 * *--->*--->rep
446 * | Y
447 * +--->key
448 */
trie_jump(uintptr_t * ptr,const char * key,size_t * offset)449 static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t *offset) {
450 // We only ever need to jump to leaf nodes, since internal nodes are
451 // guaranteed to be within OFFSET_MAX anyway
452 assert(trie_is_leaf(*ptr));
453
454 struct trie_node *node = malloc(trie_node_size(1));
455 if (!node) {
456 return NULL;
457 }
458
459 *offset += OFFSET_MAX;
460 node->offset = OFFSET_MAX;
461
462 unsigned char nibble = trie_key_nibble(key, *offset);
463 node->bitmap = 1 << nibble;
464
465 node->children[0] = *ptr;
466 *ptr = trie_encode_node(node);
467 return node->children;
468 }
469
470 /**
471 * Split a node in the trie. Changes a subtrie like this:
472 *
473 * ptr
474 * |
475 * v
476 * *...>--->rep
477 *
478 * into this:
479 *
480 * ptr
481 * |
482 * v X
483 * *--->*...>--->rep
484 * | Y
485 * +--->key
486 */
trie_split(uintptr_t * ptr,const void * key,size_t length,struct trie_leaf * rep,size_t offset,size_t mismatch)487 static struct trie_leaf *trie_split(uintptr_t *ptr, const void *key, size_t length, struct trie_leaf *rep, size_t offset, size_t mismatch) {
488 unsigned char key_nibble = trie_key_nibble(key, mismatch);
489 unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch);
490 assert(key_nibble != rep_nibble);
491
492 struct trie_node *node = malloc(trie_node_size(2));
493 if (!node) {
494 return NULL;
495 }
496
497 struct trie_leaf *leaf = new_trie_leaf(key, length);
498 if (!leaf) {
499 free(node);
500 return NULL;
501 }
502
503 node->bitmap = (1 << key_nibble) | (1 << rep_nibble);
504
505 size_t delta = mismatch - offset;
506 if (!trie_is_leaf(*ptr)) {
507 struct trie_node *child = trie_decode_node(*ptr);
508 child->offset -= delta;
509 }
510 node->offset = delta;
511
512 unsigned int key_index = key_nibble > rep_nibble;
513 node->children[key_index] = trie_encode_leaf(leaf);
514 node->children[key_index ^ 1] = *ptr;
515 *ptr = trie_encode_node(node);
516 return leaf;
517 }
518
trie_insert_str(struct trie * trie,const char * key)519 struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) {
520 return trie_insert_mem(trie, key, strlen(key) + 1);
521 }
522
trie_insert_mem(struct trie * trie,const void * key,size_t length)523 struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) {
524 struct trie_leaf *rep = trie_representative(trie, key, length);
525 if (!rep) {
526 struct trie_leaf *leaf = new_trie_leaf(key, length);
527 if (leaf) {
528 trie->root = trie_encode_leaf(leaf);
529 }
530 return leaf;
531 }
532
533 size_t limit = length < rep->length ? length : rep->length;
534 size_t mismatch = trie_key_mismatch(key, rep->key, limit);
535 if ((mismatch >> 1) >= length) {
536 return rep;
537 }
538
539 size_t offset = 0;
540 uintptr_t *ptr = &trie->root;
541 while (!trie_is_leaf(*ptr)) {
542 struct trie_node *node = trie_decode_node(*ptr);
543 if (offset + node->offset > mismatch) {
544 break;
545 }
546 offset += node->offset;
547
548 unsigned char nibble = trie_key_nibble(key, offset);
549 unsigned int bit = 1U << nibble;
550 if (node->bitmap & bit) {
551 assert(offset < mismatch);
552 unsigned int index = trie_popcount(node->bitmap & (bit - 1));
553 ptr = node->children + index;
554 } else {
555 assert(offset == mismatch);
556 return trie_node_insert(ptr, key, length, offset);
557 }
558 }
559
560 while (mismatch - offset > OFFSET_MAX) {
561 ptr = trie_jump(ptr, key, &offset);
562 if (!ptr) {
563 return NULL;
564 }
565 }
566
567 return trie_split(ptr, key, length, rep, offset, mismatch);
568 }
569
570 /** Free a chain of singleton nodes. */
trie_free_singletons(uintptr_t ptr)571 static void trie_free_singletons(uintptr_t ptr) {
572 while (!trie_is_leaf(ptr)) {
573 struct trie_node *node = trie_decode_node(ptr);
574
575 // Make sure the bitmap is a power of two, i.e. it has just one child
576 assert((node->bitmap & (node->bitmap - 1)) == 0);
577
578 ptr = node->children[0];
579 free(node);
580 }
581
582 free(trie_decode_leaf(ptr));
583 }
584
585 /**
586 * Try to collapse a two-child node like:
587 *
588 * parent child
589 * | |
590 * v v
591 * *----->*----->*----->leaf
592 * |
593 * +----->other
594 *
595 * into
596 *
597 * parent
598 * |
599 * v
600 * other
601 */
trie_collapse_node(uintptr_t * parent,struct trie_node * parent_node,unsigned int child_index)602 static int trie_collapse_node(uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) {
603 uintptr_t other = parent_node->children[child_index ^ 1];
604 if (!trie_is_leaf(other)) {
605 struct trie_node *other_node = trie_decode_node(other);
606 if (other_node->offset + parent_node->offset <= OFFSET_MAX) {
607 other_node->offset += parent_node->offset;
608 } else {
609 return -1;
610 }
611 }
612
613 *parent = other;
614 free(parent_node);
615 return 0;
616 }
617
trie_remove(struct trie * trie,struct trie_leaf * leaf)618 void trie_remove(struct trie *trie, struct trie_leaf *leaf) {
619 uintptr_t *child = &trie->root;
620 uintptr_t *parent = NULL;
621 unsigned int child_bit = 0, child_index = 0;
622 size_t offset = 0;
623 while (!trie_is_leaf(*child)) {
624 struct trie_node *node = trie_decode_node(*child);
625 offset += node->offset;
626 assert((offset >> 1) < leaf->length);
627
628 unsigned char nibble = trie_key_nibble(leaf->key, offset);
629 unsigned int bit = 1U << nibble;
630 unsigned int bitmap = node->bitmap;
631 assert(bitmap & bit);
632 unsigned int index = trie_popcount(bitmap & (bit - 1));
633
634 // Advance the parent pointer, unless this node had only one child
635 if (bitmap & (bitmap - 1)) {
636 parent = child;
637 child_bit = bit;
638 child_index = index;
639 }
640
641 child = node->children + index;
642 }
643
644 assert(trie_decode_leaf(*child) == leaf);
645
646 if (!parent) {
647 trie_free_singletons(trie->root);
648 trie->root = 0;
649 return;
650 }
651
652 struct trie_node *node = trie_decode_node(*parent);
653 child = node->children + child_index;
654 trie_free_singletons(*child);
655
656 node->bitmap ^= child_bit;
657 unsigned int parent_size = trie_popcount(node->bitmap);
658 assert(parent_size > 0);
659 if (parent_size == 1 && trie_collapse_node(parent, node, child_index) == 0) {
660 return;
661 }
662
663 if (child_index < parent_size) {
664 memmove(child, child + 1, (parent_size - child_index)*sizeof(*child));
665 }
666
667 if ((parent_size & (parent_size - 1)) == 0) {
668 node = realloc(node, trie_node_size(parent_size));
669 if (node) {
670 *parent = trie_encode_node(node);
671 }
672 }
673 }
674
675 /** Free an encoded pointer to a node. */
free_trie_ptr(uintptr_t ptr)676 static void free_trie_ptr(uintptr_t ptr) {
677 if (trie_is_leaf(ptr)) {
678 free(trie_decode_leaf(ptr));
679 } else {
680 struct trie_node *node = trie_decode_node(ptr);
681 size_t size = trie_popcount(node->bitmap);
682 for (size_t i = 0; i < size; ++i) {
683 free_trie_ptr(node->children[i]);
684 }
685 free(node);
686 }
687 }
688
trie_destroy(struct trie * trie)689 void trie_destroy(struct trie *trie) {
690 if (trie->root) {
691 free_trie_ptr(trie->root);
692 }
693 }
694