1 /* $NetBSD: radixtree.c,v 1.29 2023/03/06 21:39:06 andvar Exp $ */
2
3 /*-
4 * Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * radixtree.c
31 *
32 * Overview:
33 *
34 * This is an implementation of radix tree, whose keys are uint64_t and leafs
35 * are user provided pointers.
36 *
37 * Leaf nodes are just void * and this implementation doesn't care about
38 * what they actually point to. However, this implementation has an assumption
39 * about their alignment. Specifically, this implementation assumes that their
40 * 2 LSBs are always zero and uses them for internal accounting.
41 *
42 * Intermediate nodes and memory allocation:
43 *
44 * Intermediate nodes are automatically allocated and freed internally and
45 * basically users don't need to care about them. The allocation is done via
46 * pool_cache_get(9) for _KERNEL, malloc(3) for userland, and alloc() for
47 * _STANDALONE environment. Only radix_tree_insert_node function can allocate
48 * memory for intermediate nodes and thus can fail for ENOMEM.
49 *
50 * Memory Efficiency:
51 *
52 * It's designed to work efficiently with dense index distribution.
53 * The memory consumption (number of necessary intermediate nodes) heavily
54 * depends on the index distribution. Basically, more dense index distribution
55 * consumes less nodes per item. Approximately,
56 *
57 * - the best case: about RADIX_TREE_PTR_PER_NODE items per intermediate node.
58 * it would look like the following.
59 *
60 * root (t_height=1)
61 * |
62 * v
63 * [ | | | ] (intermediate node. RADIX_TREE_PTR_PER_NODE=4 in this fig)
64 * | | | |
65 * v v v v
66 * p p p p (items)
67 *
68 * - the worst case: RADIX_TREE_MAX_HEIGHT intermediate nodes per item.
69 * it would look like the following if RADIX_TREE_MAX_HEIGHT=3.
70 *
71 * root (t_height=3)
72 * |
73 * v
74 * [ | | | ]
75 * |
76 * v
77 * [ | | | ]
78 * |
79 * v
80 * [ | | | ]
81 * |
82 * v
83 * p
84 *
85 * The height of tree (t_height) is dynamic. It's smaller if only small
86 * index values are used. As an extreme case, if only index 0 is used,
87 * the corresponding value is directly stored in the root of the tree
88 * (struct radix_tree) without allocating any intermediate nodes. In that
89 * case, t_height=0.
90 *
91 * Gang lookup:
92 *
93 * This implementation provides a way to scan many nodes quickly via
94 * radix_tree_gang_lookup_node function and its varients.
95 *
96 * Tags:
97 *
98 * This implementation provides tagging functionality, which allows quick
99 * scanning of a subset of leaf nodes. Leaf nodes are untagged when inserted
100 * into the tree and can be tagged by radix_tree_set_tag function.
101 * radix_tree_gang_lookup_tagged_node function and its variants returns only
102 * leaf nodes with the given tag. To reduce amount of nodes to visit for
103 * these functions, this implementation keeps tagging information in internal
104 * intermediate nodes and quickly skips uninterested parts of a tree.
105 *
106 * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
107 * identified by a zero-origin numbers, tagid. For the current implementation,
108 * RADIX_TREE_TAG_ID_MAX is 2. A set of tags is described as a bitmask tagmask,
109 * which is a bitwise OR of (1 << tagid).
110 */
111
112 #include <sys/cdefs.h>
113
114 #if defined(_KERNEL) || defined(_STANDALONE)
115 __KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.29 2023/03/06 21:39:06 andvar Exp $");
116 #include <sys/param.h>
117 #include <sys/errno.h>
118 #include <sys/pool.h>
119 #include <sys/radixtree.h>
120 #include <lib/libkern/libkern.h>
121 #if defined(_STANDALONE)
122 #include <lib/libsa/stand.h>
123 #endif /* defined(_STANDALONE) */
124 #else /* defined(_KERNEL) || defined(_STANDALONE) */
125 __RCSID("$NetBSD: radixtree.c,v 1.29 2023/03/06 21:39:06 andvar Exp $");
126 #include <assert.h>
127 #include <errno.h>
128 #include <stdbool.h>
129 #include <stdlib.h>
130 #include <string.h>
131 #if 1
132 #define KASSERT assert
133 #else
134 #define KASSERT(a) /* nothing */
135 #endif
136 #endif /* defined(_KERNEL) || defined(_STANDALONE) */
137
138 #include <sys/radixtree.h>
139
140 #define RADIX_TREE_BITS_PER_HEIGHT 4 /* XXX tune */
141 #define RADIX_TREE_PTR_PER_NODE (1 << RADIX_TREE_BITS_PER_HEIGHT)
142 #define RADIX_TREE_MAX_HEIGHT (64 / RADIX_TREE_BITS_PER_HEIGHT)
143 #define RADIX_TREE_INVALID_HEIGHT (RADIX_TREE_MAX_HEIGHT + 1)
144 __CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
145
146 __CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
147 #define RADIX_TREE_TAG_MASK ((1 << RADIX_TREE_TAG_ID_MAX) - 1)
148
149 static inline void *
entry_ptr(void * p)150 entry_ptr(void *p)
151 {
152
153 return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
154 }
155
156 static inline unsigned int
entry_tagmask(void * p)157 entry_tagmask(void *p)
158 {
159
160 return (uintptr_t)p & RADIX_TREE_TAG_MASK;
161 }
162
163 static inline void *
entry_compose(void * p,unsigned int tagmask)164 entry_compose(void *p, unsigned int tagmask)
165 {
166
167 return (void *)((uintptr_t)p | tagmask);
168 }
169
170 static inline bool
entry_match_p(void * p,unsigned int tagmask)171 entry_match_p(void *p, unsigned int tagmask)
172 {
173
174 KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
175 if (p == NULL) {
176 return false;
177 }
178 if (tagmask == 0) {
179 return true;
180 }
181 return (entry_tagmask(p) & tagmask) != 0;
182 }
183
184 /*
185 * radix_tree_node: an intermediate node
186 *
187 * we don't care the type of leaf nodes. they are just void *.
188 *
189 * we used to maintain a count of non-NULL nodes in this structure, but it
190 * prevented it from being aligned to a cache line boundary; the performance
191 * benefit from being cache friendly is greater than the benefit of having
192 * a dedicated count value, especially in multi-processor situations where
193 * we need to avoid intra-pool-page false sharing.
194 */
195
196 struct radix_tree_node {
197 void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
198 };
199
200 /*
201 * p_refs[0].pptr == &t->t_root
202 * :
203 * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
204 * :
205 * :
206 * p_refs[t->t_height].pptr == &leaf_pointer
207 */
208
209 struct radix_tree_path {
210 struct radix_tree_node_ref {
211 void **pptr;
212 } p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
213 /*
214 * p_lastidx is either the index of the last valid element of p_refs[]
215 * or RADIX_TREE_INVALID_HEIGHT.
216 * RADIX_TREE_INVALID_HEIGHT means that radix_tree_lookup_ptr found
217 * that the height of the tree is not enough to cover the given index.
218 */
219 unsigned int p_lastidx;
220 };
221
222 static inline void **
path_pptr(const struct radix_tree * t,const struct radix_tree_path * p,unsigned int height)223 path_pptr(const struct radix_tree *t, const struct radix_tree_path *p,
224 unsigned int height)
225 {
226
227 KASSERT(height <= t->t_height);
228 return p->p_refs[height].pptr;
229 }
230
231 static inline struct radix_tree_node *
path_node(const struct radix_tree * t,const struct radix_tree_path * p,unsigned int height)232 path_node(const struct radix_tree * t, const struct radix_tree_path *p,
233 unsigned int height)
234 {
235
236 KASSERT(height <= t->t_height);
237 return entry_ptr(*path_pptr(t, p, height));
238 }
239
240 /*
241 * radix_tree_init_tree:
242 *
243 * Initialize a tree.
244 */
245
246 void
radix_tree_init_tree(struct radix_tree * t)247 radix_tree_init_tree(struct radix_tree *t)
248 {
249
250 t->t_height = 0;
251 t->t_root = NULL;
252 }
253
254 /*
255 * radix_tree_fini_tree:
256 *
257 * Finish using a tree.
258 */
259
260 void
radix_tree_fini_tree(struct radix_tree * t)261 radix_tree_fini_tree(struct radix_tree *t)
262 {
263
264 KASSERT(t->t_root == NULL);
265 KASSERT(t->t_height == 0);
266 }
267
268 /*
269 * radix_tree_empty_tree_p:
270 *
271 * Return if the tree is empty.
272 */
273
274 bool
radix_tree_empty_tree_p(struct radix_tree * t)275 radix_tree_empty_tree_p(struct radix_tree *t)
276 {
277
278 return t->t_root == NULL;
279 }
280
281 /*
282 * radix_tree_empty_tree_p:
283 *
284 * Return true if the tree has any nodes with the given tag. Otherwise
285 * return false.
286 *
287 * It's illegal to call this function with tagmask 0.
288 */
289
290 bool
radix_tree_empty_tagged_tree_p(struct radix_tree * t,unsigned int tagmask)291 radix_tree_empty_tagged_tree_p(struct radix_tree *t, unsigned int tagmask)
292 {
293
294 KASSERT(tagmask != 0);
295 return (entry_tagmask(t->t_root) & tagmask) == 0;
296 }
297
298 static void
radix_tree_node_init(struct radix_tree_node * n)299 radix_tree_node_init(struct radix_tree_node *n)
300 {
301
302 memset(n, 0, sizeof(*n));
303 }
304
305 #if defined(_KERNEL)
306 pool_cache_t radix_tree_node_cache __read_mostly;
307
308 static int
radix_tree_node_ctor(void * dummy,void * item,int flags)309 radix_tree_node_ctor(void *dummy, void *item, int flags)
310 {
311 struct radix_tree_node *n = item;
312
313 KASSERT(dummy == NULL);
314 radix_tree_node_init(n);
315 return 0;
316 }
317
318 /*
319 * radix_tree_init:
320 *
321 * initialize the subsystem.
322 */
323
324 void
radix_tree_init(void)325 radix_tree_init(void)
326 {
327
328 radix_tree_node_cache = pool_cache_init(sizeof(struct radix_tree_node),
329 coherency_unit, 0, PR_LARGECACHE, "radixnode", NULL, IPL_NONE,
330 radix_tree_node_ctor, NULL, NULL);
331 KASSERT(radix_tree_node_cache != NULL);
332 }
333
334 /*
335 * radix_tree_await_memory:
336 *
337 * after an insert has failed with ENOMEM, wait for memory to become
338 * available, so the caller can retry. this needs to ensure that the
339 * maximum possible required number of nodes is available.
340 */
341
342 void
radix_tree_await_memory(void)343 radix_tree_await_memory(void)
344 {
345 struct radix_tree_node *nodes[RADIX_TREE_MAX_HEIGHT];
346 int i;
347
348 for (i = 0; i < __arraycount(nodes); i++) {
349 nodes[i] = pool_cache_get(radix_tree_node_cache, PR_WAITOK);
350 }
351 while (--i >= 0) {
352 pool_cache_put(radix_tree_node_cache, nodes[i]);
353 }
354 }
355
356 #endif /* defined(_KERNEL) */
357
358 /*
359 * radix_tree_sum_node:
360 *
361 * return the logical sum of all entries in the given node. used to quickly
362 * check for tag masks or empty nodes.
363 */
364
365 static uintptr_t
radix_tree_sum_node(const struct radix_tree_node * n)366 radix_tree_sum_node(const struct radix_tree_node *n)
367 {
368 #if RADIX_TREE_PTR_PER_NODE > 16
369 unsigned int i;
370 uintptr_t sum;
371
372 for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
373 sum |= (uintptr_t)n->n_ptrs[i];
374 }
375 return sum;
376 #else /* RADIX_TREE_PTR_PER_NODE > 16 */
377 uintptr_t sum;
378
379 /*
380 * Unrolling the above is much better than a tight loop with two
381 * test+branch pairs. On x86 with gcc 5.5.0 this compiles into 19
382 * deterministic instructions including the "return" and prologue &
383 * epilogue.
384 */
385 sum = (uintptr_t)n->n_ptrs[0];
386 sum |= (uintptr_t)n->n_ptrs[1];
387 sum |= (uintptr_t)n->n_ptrs[2];
388 sum |= (uintptr_t)n->n_ptrs[3];
389 #if RADIX_TREE_PTR_PER_NODE > 4
390 sum |= (uintptr_t)n->n_ptrs[4];
391 sum |= (uintptr_t)n->n_ptrs[5];
392 sum |= (uintptr_t)n->n_ptrs[6];
393 sum |= (uintptr_t)n->n_ptrs[7];
394 #endif
395 #if RADIX_TREE_PTR_PER_NODE > 8
396 sum |= (uintptr_t)n->n_ptrs[8];
397 sum |= (uintptr_t)n->n_ptrs[9];
398 sum |= (uintptr_t)n->n_ptrs[10];
399 sum |= (uintptr_t)n->n_ptrs[11];
400 sum |= (uintptr_t)n->n_ptrs[12];
401 sum |= (uintptr_t)n->n_ptrs[13];
402 sum |= (uintptr_t)n->n_ptrs[14];
403 sum |= (uintptr_t)n->n_ptrs[15];
404 #endif
405 return sum;
406 #endif /* RADIX_TREE_PTR_PER_NODE > 16 */
407 }
408
409 static int __unused
radix_tree_node_count_ptrs(const struct radix_tree_node * n)410 radix_tree_node_count_ptrs(const struct radix_tree_node *n)
411 {
412 unsigned int i, c;
413
414 for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
415 c += (n->n_ptrs[i] != NULL);
416 }
417 return c;
418 }
419
420 static struct radix_tree_node *
radix_tree_alloc_node(void)421 radix_tree_alloc_node(void)
422 {
423 struct radix_tree_node *n;
424
425 #if defined(_KERNEL)
426 /*
427 * note that pool_cache_get can block.
428 */
429 n = pool_cache_get(radix_tree_node_cache, PR_NOWAIT);
430 #else /* defined(_KERNEL) */
431 #if defined(_STANDALONE)
432 n = alloc(sizeof(*n));
433 #else /* defined(_STANDALONE) */
434 n = malloc(sizeof(*n));
435 #endif /* defined(_STANDALONE) */
436 if (n != NULL) {
437 radix_tree_node_init(n);
438 }
439 #endif /* defined(_KERNEL) */
440 KASSERT(n == NULL || radix_tree_sum_node(n) == 0);
441 return n;
442 }
443
444 static void
radix_tree_free_node(struct radix_tree_node * n)445 radix_tree_free_node(struct radix_tree_node *n)
446 {
447
448 KASSERT(radix_tree_sum_node(n) == 0);
449 #if defined(_KERNEL)
450 pool_cache_put(radix_tree_node_cache, n);
451 #elif defined(_STANDALONE)
452 dealloc(n, sizeof(*n));
453 #else
454 free(n);
455 #endif
456 }
457
458 /*
459 * radix_tree_grow:
460 *
461 * increase the height of the tree.
462 */
463
464 static __noinline int
radix_tree_grow(struct radix_tree * t,unsigned int newheight)465 radix_tree_grow(struct radix_tree *t, unsigned int newheight)
466 {
467 const unsigned int tagmask = entry_tagmask(t->t_root);
468 struct radix_tree_node *newnodes[RADIX_TREE_MAX_HEIGHT];
469 void *root;
470 int h;
471
472 KASSERT(newheight <= RADIX_TREE_MAX_HEIGHT);
473 if ((root = t->t_root) == NULL) {
474 t->t_height = newheight;
475 return 0;
476 }
477 for (h = t->t_height; h < newheight; h++) {
478 newnodes[h] = radix_tree_alloc_node();
479 if (__predict_false(newnodes[h] == NULL)) {
480 while (--h >= (int)t->t_height) {
481 newnodes[h]->n_ptrs[0] = NULL;
482 radix_tree_free_node(newnodes[h]);
483 }
484 return ENOMEM;
485 }
486 newnodes[h]->n_ptrs[0] = root;
487 root = entry_compose(newnodes[h], tagmask);
488 }
489 t->t_root = root;
490 t->t_height = h;
491 return 0;
492 }
493
494 /*
495 * radix_tree_lookup_ptr:
496 *
497 * an internal helper function used for various exported functions.
498 *
499 * return the pointer to store the node for the given index.
500 *
501 * if alloc is true, try to allocate the storage. (note for _KERNEL:
502 * in that case, this function can block.) if the allocation failed or
503 * alloc is false, return NULL.
504 *
505 * if path is not NULL, fill it for the caller's investigation.
506 *
507 * if tagmask is not zero, search only for nodes with the tag set.
508 * note that, however, this function doesn't check the tagmask for the leaf
509 * pointer. it's a caller's responsibility to investigate the value which
510 * is pointed by the returned pointer if necessary.
511 *
512 * while this function is a bit large, as it's called with some constant
513 * arguments, inlining might have benefits. anyway, a compiler will decide.
514 */
515
516 static inline void **
radix_tree_lookup_ptr(struct radix_tree * t,uint64_t idx,struct radix_tree_path * path,bool alloc,const unsigned int tagmask)517 radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
518 struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
519 {
520 struct radix_tree_node *n;
521 int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
522 int shift;
523 void **vpp;
524 const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
525 struct radix_tree_node_ref *refs = NULL;
526
527 /*
528 * check unsupported combinations
529 */
530 KASSERT(tagmask == 0 || !alloc);
531 KASSERT(path == NULL || !alloc);
532 vpp = &t->t_root;
533 if (path != NULL) {
534 refs = path->p_refs;
535 refs->pptr = vpp;
536 }
537 n = NULL;
538 for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
539 struct radix_tree_node *c;
540 void *entry;
541 const uint64_t i = (idx >> shift) & mask;
542
543 if (shift >= hshift) {
544 unsigned int newheight;
545
546 KASSERT(vpp == &t->t_root);
547 if (i == 0) {
548 shift -= RADIX_TREE_BITS_PER_HEIGHT;
549 continue;
550 }
551 if (!alloc) {
552 if (path != NULL) {
553 KASSERT((refs - path->p_refs) == 0);
554 path->p_lastidx =
555 RADIX_TREE_INVALID_HEIGHT;
556 }
557 return NULL;
558 }
559 newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
560 if (radix_tree_grow(t, newheight)) {
561 return NULL;
562 }
563 hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
564 }
565 entry = *vpp;
566 c = entry_ptr(entry);
567 if (c == NULL ||
568 (tagmask != 0 &&
569 (entry_tagmask(entry) & tagmask) == 0)) {
570 if (!alloc) {
571 if (path != NULL) {
572 path->p_lastidx = refs - path->p_refs;
573 }
574 return NULL;
575 }
576 c = radix_tree_alloc_node();
577 if (c == NULL) {
578 return NULL;
579 }
580 *vpp = c;
581 }
582 n = c;
583 vpp = &n->n_ptrs[i];
584 if (path != NULL) {
585 refs++;
586 refs->pptr = vpp;
587 }
588 shift -= RADIX_TREE_BITS_PER_HEIGHT;
589 }
590 if (alloc) {
591 KASSERT(*vpp == NULL);
592 }
593 if (path != NULL) {
594 path->p_lastidx = refs - path->p_refs;
595 }
596 return vpp;
597 }
598
599 /*
600 * radix_tree_undo_insert_node:
601 *
602 * Undo the effects of a failed insert. The conditions that led to the
603 * insert may change and it may not be retried. If the insert is not
604 * retried, there will be no corresponding radix_tree_remove_node() for
605 * this index in the future. Therefore any adjustments made to the tree
606 * before memory was exhausted must be reverted.
607 */
608
609 static __noinline void
radix_tree_undo_insert_node(struct radix_tree * t,uint64_t idx)610 radix_tree_undo_insert_node(struct radix_tree *t, uint64_t idx)
611 {
612 struct radix_tree_path path;
613 int i;
614
615 (void)radix_tree_lookup_ptr(t, idx, &path, false, 0);
616 if (path.p_lastidx == RADIX_TREE_INVALID_HEIGHT) {
617 /*
618 * no nodes were inserted.
619 */
620 return;
621 }
622 for (i = path.p_lastidx - 1; i >= 0; i--) {
623 struct radix_tree_node ** const pptr =
624 (struct radix_tree_node **)path_pptr(t, &path, i);
625 struct radix_tree_node *n;
626
627 KASSERT(pptr != NULL);
628 n = entry_ptr(*pptr);
629 KASSERT(n != NULL);
630 if (radix_tree_sum_node(n) != 0) {
631 break;
632 }
633 radix_tree_free_node(n);
634 *pptr = NULL;
635 }
636 /*
637 * fix up height
638 */
639 if (i < 0) {
640 KASSERT(t->t_root == NULL);
641 t->t_height = 0;
642 }
643 }
644
645 /*
646 * radix_tree_insert_node:
647 *
648 * Insert the node at the given index.
649 *
650 * It's illegal to insert NULL. It's illegal to insert a non-aligned pointer.
651 *
652 * This function returns ENOMEM if necessary memory allocation failed.
653 * Otherwise, this function returns 0.
654 *
655 * Note that inserting a node can involves memory allocation for intermediate
656 * nodes. If _KERNEL, it's done with no-sleep IPL_NONE memory allocation.
657 *
658 * For the newly inserted node, all tags are cleared.
659 */
660
661 int
radix_tree_insert_node(struct radix_tree * t,uint64_t idx,void * p)662 radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
663 {
664 void **vpp;
665
666 KASSERT(p != NULL);
667 KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
668 vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
669 if (__predict_false(vpp == NULL)) {
670 radix_tree_undo_insert_node(t, idx);
671 return ENOMEM;
672 }
673 KASSERT(*vpp == NULL);
674 *vpp = p;
675 return 0;
676 }
677
678 /*
679 * radix_tree_replace_node:
680 *
681 * Replace a node at the given index with the given node and return the
682 * replaced one.
683 *
684 * It's illegal to try to replace a node which has not been inserted.
685 *
686 * This function keeps tags intact.
687 */
688
689 void *
radix_tree_replace_node(struct radix_tree * t,uint64_t idx,void * p)690 radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
691 {
692 void **vpp;
693 void *oldp;
694
695 KASSERT(p != NULL);
696 KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
697 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
698 KASSERT(vpp != NULL);
699 oldp = *vpp;
700 KASSERT(oldp != NULL);
701 *vpp = entry_compose(p, entry_tagmask(*vpp));
702 return entry_ptr(oldp);
703 }
704
705 /*
706 * radix_tree_remove_node:
707 *
708 * Remove the node at the given index.
709 *
710 * It's illegal to try to remove a node which has not been inserted.
711 */
712
713 void *
radix_tree_remove_node(struct radix_tree * t,uint64_t idx)714 radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
715 {
716 struct radix_tree_path path;
717 void **vpp;
718 void *oldp;
719 int i;
720
721 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
722 KASSERT(vpp != NULL);
723 oldp = *vpp;
724 KASSERT(oldp != NULL);
725 KASSERT(path.p_lastidx == t->t_height);
726 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
727 *vpp = NULL;
728 for (i = t->t_height - 1; i >= 0; i--) {
729 void *entry;
730 struct radix_tree_node ** const pptr =
731 (struct radix_tree_node **)path_pptr(t, &path, i);
732 struct radix_tree_node *n;
733
734 KASSERT(pptr != NULL);
735 entry = *pptr;
736 n = entry_ptr(entry);
737 KASSERT(n != NULL);
738 if (radix_tree_sum_node(n) != 0) {
739 break;
740 }
741 radix_tree_free_node(n);
742 *pptr = NULL;
743 }
744 /*
745 * fix up height
746 */
747 if (i < 0) {
748 KASSERT(t->t_root == NULL);
749 t->t_height = 0;
750 }
751 /*
752 * update tags
753 */
754 for (; i >= 0; i--) {
755 void *entry;
756 struct radix_tree_node ** const pptr =
757 (struct radix_tree_node **)path_pptr(t, &path, i);
758 struct radix_tree_node *n;
759 unsigned int newmask;
760
761 KASSERT(pptr != NULL);
762 entry = *pptr;
763 n = entry_ptr(entry);
764 KASSERT(n != NULL);
765 KASSERT(radix_tree_sum_node(n) != 0);
766 newmask = radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK;
767 if (newmask == entry_tagmask(entry)) {
768 break;
769 }
770 *pptr = entry_compose(n, newmask);
771 }
772 /*
773 * XXX is it worth to try to reduce height?
774 * if we do that, make radix_tree_grow rollback its change as well.
775 */
776 return entry_ptr(oldp);
777 }
778
779 /*
780 * radix_tree_lookup_node:
781 *
782 * Returns the node at the given index.
783 * Returns NULL if nothing is found at the given index.
784 */
785
786 void *
radix_tree_lookup_node(struct radix_tree * t,uint64_t idx)787 radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
788 {
789 void **vpp;
790
791 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
792 if (vpp == NULL) {
793 return NULL;
794 }
795 return entry_ptr(*vpp);
796 }
797
798 static inline void
gang_lookup_init(struct radix_tree * t,uint64_t idx,struct radix_tree_path * path,const unsigned int tagmask)799 gang_lookup_init(struct radix_tree *t, uint64_t idx,
800 struct radix_tree_path *path, const unsigned int tagmask)
801 {
802 void **vpp __unused;
803
804 vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
805 KASSERT(vpp == NULL ||
806 vpp == path_pptr(t, path, path->p_lastidx));
807 KASSERT(&t->t_root == path_pptr(t, path, 0));
808 KASSERT(path->p_lastidx == RADIX_TREE_INVALID_HEIGHT ||
809 path->p_lastidx == t->t_height ||
810 !entry_match_p(*path_pptr(t, path, path->p_lastidx), tagmask));
811 }
812
813 /*
814 * gang_lookup_scan:
815 *
816 * a helper routine for radix_tree_gang_lookup_node and its variants.
817 */
818
819 static inline unsigned int
820 __attribute__((__always_inline__))
gang_lookup_scan(struct radix_tree * t,struct radix_tree_path * path,void ** results,const unsigned int maxresults,const unsigned int tagmask,const bool reverse,const bool dense)821 gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
822 void **results, const unsigned int maxresults, const unsigned int tagmask,
823 const bool reverse, const bool dense)
824 {
825
826 /*
827 * we keep the path updated only for lastidx-1.
828 * vpp is what path_pptr(t, path, lastidx) would be.
829 */
830 void **vpp;
831 unsigned int nfound;
832 unsigned int lastidx;
833 /*
834 * set up scan direction dependant constants so that we can iterate
835 * n_ptrs as the following.
836 *
837 * for (i = first; i != guard; i += step)
838 * visit n->n_ptrs[i];
839 */
840 const int step = reverse ? -1 : 1;
841 const unsigned int first = reverse ? RADIX_TREE_PTR_PER_NODE - 1 : 0;
842 const unsigned int last = reverse ? 0 : RADIX_TREE_PTR_PER_NODE - 1;
843 const unsigned int guard = last + step;
844
845 KASSERT(maxresults > 0);
846 KASSERT(&t->t_root == path_pptr(t, path, 0));
847 lastidx = path->p_lastidx;
848 KASSERT(lastidx == RADIX_TREE_INVALID_HEIGHT ||
849 lastidx == t->t_height ||
850 !entry_match_p(*path_pptr(t, path, lastidx), tagmask));
851 nfound = 0;
852 if (lastidx == RADIX_TREE_INVALID_HEIGHT) {
853 /*
854 * requested idx is beyond the right-most node.
855 */
856 if (reverse && !dense) {
857 lastidx = 0;
858 vpp = path_pptr(t, path, lastidx);
859 goto descend;
860 }
861 return 0;
862 }
863 vpp = path_pptr(t, path, lastidx);
864 while (/*CONSTCOND*/true) {
865 struct radix_tree_node *n;
866 unsigned int i;
867
868 if (entry_match_p(*vpp, tagmask)) {
869 KASSERT(lastidx == t->t_height);
870 /*
871 * record the matching non-NULL leaf.
872 */
873 results[nfound] = entry_ptr(*vpp);
874 nfound++;
875 if (nfound == maxresults) {
876 return nfound;
877 }
878 } else if (dense) {
879 return nfound;
880 }
881 scan_siblings:
882 /*
883 * try to find the next matching non-NULL sibling.
884 */
885 if (lastidx == 0) {
886 /*
887 * the root has no siblings.
888 * we've done.
889 */
890 KASSERT(vpp == &t->t_root);
891 break;
892 }
893 n = path_node(t, path, lastidx - 1);
894 for (i = vpp - n->n_ptrs + step; i != guard; i += step) {
895 KASSERT(i < RADIX_TREE_PTR_PER_NODE);
896 if (entry_match_p(n->n_ptrs[i], tagmask)) {
897 vpp = &n->n_ptrs[i];
898 break;
899 } else if (dense) {
900 return nfound;
901 }
902 }
903 if (i == guard) {
904 /*
905 * not found. go to parent.
906 */
907 lastidx--;
908 vpp = path_pptr(t, path, lastidx);
909 goto scan_siblings;
910 }
911 descend:
912 /*
913 * following the left-most (or right-most in the case of
914 * reverse scan) child node, descend until reaching the leaf or
915 * a non-matching entry.
916 */
917 while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
918 /*
919 * save vpp in the path so that we can come back to this
920 * node after finishing visiting children.
921 */
922 path->p_refs[lastidx].pptr = vpp;
923 n = entry_ptr(*vpp);
924 vpp = &n->n_ptrs[first];
925 lastidx++;
926 }
927 }
928 return nfound;
929 }
930
931 /*
932 * radix_tree_gang_lookup_node:
933 *
934 * Scan the tree starting from the given index in the ascending order and
935 * return found nodes.
936 *
937 * results should be an array large enough to hold maxresults pointers.
938 * This function returns the number of nodes found, up to maxresults.
939 * Returning less than maxresults means there are no more nodes in the tree.
940 *
941 * If dense == true, this function stops scanning when it founds a hole of
942 * indexes. I.e. an index for which radix_tree_lookup_node would returns NULL.
943 * If dense == false, this function skips holes and continue scanning until
944 * maxresults nodes are found or it reaches the limit of the index range.
945 *
946 * The result of this function is semantically equivalent to what could be
947 * obtained by repeated calls of radix_tree_lookup_node with increasing index.
948 * but this function is expected to be computationally cheaper when looking up
949 * multiple nodes at once. Especially, it's expected to be much cheaper when
950 * node indexes are distributed sparsely.
951 *
952 * Note that this function doesn't return index values of found nodes.
953 * Thus, in the case of dense == false, if index values are important for
954 * a caller, it's the caller's responsibility to check them, typically
955 * by examining the returned nodes using some caller-specific knowledge
956 * about them.
957 * In the case of dense == true, a node returned via results[N] is always for
958 * the index (idx + N).
959 */
960
961 unsigned int
radix_tree_gang_lookup_node(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense)962 radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
963 void **results, unsigned int maxresults, bool dense)
964 {
965 struct radix_tree_path path;
966
967 gang_lookup_init(t, idx, &path, 0);
968 return gang_lookup_scan(t, &path, results, maxresults, 0, false, dense);
969 }
970
971 /*
972 * radix_tree_gang_lookup_node_reverse:
973 *
974 * Same as radix_tree_gang_lookup_node except that this one scans the
975 * tree in the reverse order. I.e. descending index values.
976 */
977
978 unsigned int
radix_tree_gang_lookup_node_reverse(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense)979 radix_tree_gang_lookup_node_reverse(struct radix_tree *t, uint64_t idx,
980 void **results, unsigned int maxresults, bool dense)
981 {
982 struct radix_tree_path path;
983
984 gang_lookup_init(t, idx, &path, 0);
985 return gang_lookup_scan(t, &path, results, maxresults, 0, true, dense);
986 }
987
988 /*
989 * radix_tree_gang_lookup_tagged_node:
990 *
991 * Same as radix_tree_gang_lookup_node except that this one only returns
992 * nodes tagged with tagid.
993 *
994 * It's illegal to call this function with tagmask 0.
995 */
996
997 unsigned int
radix_tree_gang_lookup_tagged_node(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense,unsigned int tagmask)998 radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
999 void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
1000 {
1001 struct radix_tree_path path;
1002
1003 KASSERT(tagmask != 0);
1004 gang_lookup_init(t, idx, &path, tagmask);
1005 return gang_lookup_scan(t, &path, results, maxresults, tagmask, false,
1006 dense);
1007 }
1008
1009 /*
1010 * radix_tree_gang_lookup_tagged_node_reverse:
1011 *
1012 * Same as radix_tree_gang_lookup_tagged_node except that this one scans the
1013 * tree in the reverse order. I.e. descending index values.
1014 */
1015
1016 unsigned int
radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense,unsigned int tagmask)1017 radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree *t, uint64_t idx,
1018 void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
1019 {
1020 struct radix_tree_path path;
1021
1022 KASSERT(tagmask != 0);
1023 gang_lookup_init(t, idx, &path, tagmask);
1024 return gang_lookup_scan(t, &path, results, maxresults, tagmask, true,
1025 dense);
1026 }
1027
1028 /*
1029 * radix_tree_get_tag:
1030 *
1031 * Return the tagmask for the node at the given index.
1032 *
1033 * It's illegal to call this function for a node which has not been inserted.
1034 */
1035
1036 unsigned int
radix_tree_get_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1037 radix_tree_get_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1038 {
1039 /*
1040 * the following two implementations should behave same.
1041 * the former one was chosen because it seems faster.
1042 */
1043 #if 1
1044 void **vpp;
1045
1046 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
1047 if (vpp == NULL) {
1048 return false;
1049 }
1050 KASSERT(*vpp != NULL);
1051 return (entry_tagmask(*vpp) & tagmask);
1052 #else
1053 void **vpp;
1054
1055 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
1056 KASSERT(vpp != NULL);
1057 return (entry_tagmask(*vpp) & tagmask);
1058 #endif
1059 }
1060
1061 /*
1062 * radix_tree_set_tag:
1063 *
1064 * Set the tag for the node at the given index.
1065 *
1066 * It's illegal to call this function for a node which has not been inserted.
1067 * It's illegal to call this function with tagmask 0.
1068 */
1069
1070 void
radix_tree_set_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1071 radix_tree_set_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1072 {
1073 struct radix_tree_path path;
1074 void **vpp __unused;
1075 int i;
1076
1077 KASSERT(tagmask != 0);
1078 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1079 KASSERT(vpp != NULL);
1080 KASSERT(*vpp != NULL);
1081 KASSERT(path.p_lastidx == t->t_height);
1082 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1083 for (i = t->t_height; i >= 0; i--) {
1084 void ** const pptr = (void **)path_pptr(t, &path, i);
1085 void *entry;
1086
1087 KASSERT(pptr != NULL);
1088 entry = *pptr;
1089 if ((entry_tagmask(entry) & tagmask) != 0) {
1090 break;
1091 }
1092 *pptr = (void *)((uintptr_t)entry | tagmask);
1093 }
1094 }
1095
1096 /*
1097 * radix_tree_clear_tag:
1098 *
1099 * Clear the tag for the node at the given index.
1100 *
1101 * It's illegal to call this function for a node which has not been inserted.
1102 * It's illegal to call this function with tagmask 0.
1103 */
1104
1105 void
radix_tree_clear_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1106 radix_tree_clear_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1107 {
1108 struct radix_tree_path path;
1109 void **vpp;
1110 int i;
1111
1112 KASSERT(tagmask != 0);
1113 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1114 KASSERT(vpp != NULL);
1115 KASSERT(*vpp != NULL);
1116 KASSERT(path.p_lastidx == t->t_height);
1117 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1118 /*
1119 * if already cleared, nothing to do
1120 */
1121 if ((entry_tagmask(*vpp) & tagmask) == 0) {
1122 return;
1123 }
1124 /*
1125 * clear the tag only if no children have the tag.
1126 */
1127 for (i = t->t_height; i >= 0; i--) {
1128 void ** const pptr = (void **)path_pptr(t, &path, i);
1129 void *entry;
1130
1131 KASSERT(pptr != NULL);
1132 entry = *pptr;
1133 KASSERT((entry_tagmask(entry) & tagmask) != 0);
1134 *pptr = entry_compose(entry_ptr(entry),
1135 entry_tagmask(entry) & ~tagmask);
1136 /*
1137 * check if we should proceed to process the next level.
1138 */
1139 if (0 < i) {
1140 struct radix_tree_node *n = path_node(t, &path, i - 1);
1141
1142 if ((radix_tree_sum_node(n) & tagmask) != 0) {
1143 break;
1144 }
1145 }
1146 }
1147 }
1148
1149 #if defined(UNITTEST)
1150
1151 #include <inttypes.h>
1152 #include <stdio.h>
1153
1154 static void
radix_tree_dump_node(const struct radix_tree * t,void * vp,uint64_t offset,unsigned int height)1155 radix_tree_dump_node(const struct radix_tree *t, void *vp,
1156 uint64_t offset, unsigned int height)
1157 {
1158 struct radix_tree_node *n;
1159 unsigned int i;
1160
1161 for (i = 0; i < t->t_height - height; i++) {
1162 printf(" ");
1163 }
1164 if (entry_tagmask(vp) == 0) {
1165 printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
1166 } else {
1167 printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
1168 entry_tagmask(vp));
1169 }
1170 if (height == 0) {
1171 printf(" (leaf)\n");
1172 return;
1173 }
1174 n = entry_ptr(vp);
1175 assert((radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK) ==
1176 entry_tagmask(vp));
1177 printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
1178 for (i = 0; i < __arraycount(n->n_ptrs); i++) {
1179 void *c;
1180
1181 c = n->n_ptrs[i];
1182 if (c == NULL) {
1183 continue;
1184 }
1185 radix_tree_dump_node(t, c,
1186 offset + i * (UINT64_C(1) <<
1187 (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
1188 }
1189 }
1190
1191 void radix_tree_dump(const struct radix_tree *);
1192
1193 void
radix_tree_dump(const struct radix_tree * t)1194 radix_tree_dump(const struct radix_tree *t)
1195 {
1196
1197 printf("tree %p height=%u\n", t, t->t_height);
1198 radix_tree_dump_node(t, t->t_root, 0, t->t_height);
1199 }
1200
1201 static void
test1(void)1202 test1(void)
1203 {
1204 struct radix_tree s;
1205 struct radix_tree *t = &s;
1206 void *results[3];
1207
1208 radix_tree_init_tree(t);
1209 radix_tree_dump(t);
1210 assert(radix_tree_lookup_node(t, 0) == NULL);
1211 assert(radix_tree_lookup_node(t, 1000) == NULL);
1212 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 0);
1213 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1214 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1215 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1216 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1217 0);
1218 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1219 0);
1220 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1221 == 0);
1222 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1223 == 0);
1224 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1225 == 0);
1226 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1227 == 0);
1228 assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, false, 1)
1229 == 0);
1230 assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, true, 1)
1231 == 0);
1232 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1233 false, 1) == 0);
1234 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1235 true, 1) == 0);
1236 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1237 false, 1) == 0);
1238 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1239 true, 1) == 0);
1240 assert(radix_tree_empty_tree_p(t));
1241 assert(radix_tree_empty_tagged_tree_p(t, 1));
1242 assert(radix_tree_empty_tagged_tree_p(t, 2));
1243 assert(radix_tree_insert_node(t, 0, (void *)0xdeadbea0) == 0);
1244 assert(!radix_tree_empty_tree_p(t));
1245 assert(radix_tree_empty_tagged_tree_p(t, 1));
1246 assert(radix_tree_empty_tagged_tree_p(t, 2));
1247 assert(radix_tree_lookup_node(t, 0) == (void *)0xdeadbea0);
1248 assert(radix_tree_lookup_node(t, 1000) == NULL);
1249 memset(results, 0, sizeof(results));
1250 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1251 assert(results[0] == (void *)0xdeadbea0);
1252 memset(results, 0, sizeof(results));
1253 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1254 assert(results[0] == (void *)0xdeadbea0);
1255 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1256 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1257 memset(results, 0, sizeof(results));
1258 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1259 1);
1260 assert(results[0] == (void *)0xdeadbea0);
1261 memset(results, 0, sizeof(results));
1262 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1263 1);
1264 assert(results[0] == (void *)0xdeadbea0);
1265 memset(results, 0, sizeof(results));
1266 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1267 == 1);
1268 assert(results[0] == (void *)0xdeadbea0);
1269 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1270 == 0);
1271 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1272 == 0);
1273 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1274 == 0);
1275 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1276 false, 1) == 0);
1277 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1278 true, 1) == 0);
1279 assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
1280 assert(radix_tree_remove_node(t, 0) == (void *)0xdeadbea0);
1281 assert(!radix_tree_empty_tree_p(t));
1282 radix_tree_dump(t);
1283 assert(radix_tree_lookup_node(t, 0) == NULL);
1284 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1285 memset(results, 0, sizeof(results));
1286 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1287 assert(results[0] == (void *)0xdeadbea0);
1288 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1289 memset(results, 0, sizeof(results));
1290 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 1);
1291 assert(results[0] == (void *)0xdeadbea0);
1292 memset(results, 0, sizeof(results));
1293 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 1);
1294 assert(results[0] == (void *)0xdeadbea0);
1295 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false)
1296 == 0);
1297 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true)
1298 == 0);
1299 memset(results, 0, sizeof(results));
1300 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1301 == 1);
1302 memset(results, 0, sizeof(results));
1303 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1304 == 1);
1305 assert(results[0] == (void *)0xdeadbea0);
1306 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1307 == 0);
1308 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1309 == 0);
1310 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1311 false, 1) == 0);
1312 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1313 true, 1) == 0);
1314 assert(!radix_tree_get_tag(t, 1000, 1));
1315 assert(!radix_tree_get_tag(t, 1000, 2));
1316 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 0);
1317 assert(radix_tree_empty_tagged_tree_p(t, 1));
1318 assert(radix_tree_empty_tagged_tree_p(t, 2));
1319 radix_tree_set_tag(t, 1000, 2);
1320 assert(!radix_tree_get_tag(t, 1000, 1));
1321 assert(radix_tree_get_tag(t, 1000, 2));
1322 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1323 assert(radix_tree_empty_tagged_tree_p(t, 1));
1324 assert(!radix_tree_empty_tagged_tree_p(t, 2));
1325 radix_tree_dump(t);
1326 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1327 assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
1328 radix_tree_dump(t);
1329 assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1330 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1331 assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
1332 == 0);
1333 radix_tree_dump(t);
1334 assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1335 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1336 assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
1337 (void *)0xdea0);
1338 radix_tree_dump(t);
1339 assert(!radix_tree_get_tag(t, 0, 2));
1340 assert(radix_tree_get_tag(t, 1000, 2));
1341 assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
1342 radix_tree_set_tag(t, 0, 2);
1343 radix_tree_set_tag(t, UINT64_C(10000000000), 2);
1344 radix_tree_dump(t);
1345 assert(radix_tree_get_tag(t, 0, 2));
1346 assert(radix_tree_get_tag(t, 1000, 2));
1347 assert(radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1348 radix_tree_clear_tag(t, 0, 2);
1349 radix_tree_clear_tag(t, UINT64_C(10000000000), 2);
1350 radix_tree_dump(t);
1351 assert(!radix_tree_get_tag(t, 0, 2));
1352 assert(radix_tree_get_tag(t, 1000, 2));
1353 assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1354 radix_tree_dump(t);
1355 assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
1356 (void *)0xdeadbea0);
1357 assert(!radix_tree_get_tag(t, 1000, 1));
1358 assert(radix_tree_get_tag(t, 1000, 2));
1359 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1360 memset(results, 0, sizeof(results));
1361 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 3);
1362 assert(results[0] == (void *)0xbea0);
1363 assert(results[1] == (void *)0x12345678);
1364 assert(results[2] == (void *)0xdea0);
1365 memset(results, 0, sizeof(results));
1366 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1367 assert(results[0] == (void *)0xbea0);
1368 memset(results, 0, sizeof(results));
1369 assert(radix_tree_gang_lookup_node(t, 1, results, 3, false) == 2);
1370 assert(results[0] == (void *)0x12345678);
1371 assert(results[1] == (void *)0xdea0);
1372 assert(radix_tree_gang_lookup_node(t, 1, results, 3, true) == 0);
1373 memset(results, 0, sizeof(results));
1374 assert(radix_tree_gang_lookup_node(t, 1001, results, 3, false) == 1);
1375 assert(results[0] == (void *)0xdea0);
1376 assert(radix_tree_gang_lookup_node(t, 1001, results, 3, true) == 0);
1377 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1378 false) == 0);
1379 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1380 true) == 0);
1381 assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1382 3, false) == 0);
1383 assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1384 3, true) == 0);
1385 memset(results, 0, sizeof(results));
1386 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, false, 2)
1387 == 1);
1388 assert(results[0] == (void *)0x12345678);
1389 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, true, 2)
1390 == 0);
1391 assert(entry_tagmask(t->t_root) != 0);
1392 assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
1393 assert(entry_tagmask(t->t_root) == 0);
1394 radix_tree_dump(t);
1395 assert(radix_tree_insert_node(t, UINT64_C(10000000001), (void *)0xfff0)
1396 == 0);
1397 memset(results, 0, sizeof(results));
1398 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1399 false) == 2);
1400 assert(results[0] == (void *)0xdea0);
1401 assert(results[1] == (void *)0xfff0);
1402 memset(results, 0, sizeof(results));
1403 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1404 true) == 2);
1405 assert(results[0] == (void *)0xdea0);
1406 assert(results[1] == (void *)0xfff0);
1407 memset(results, 0, sizeof(results));
1408 assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1409 results, 3, false) == 3);
1410 assert(results[0] == (void *)0xfff0);
1411 assert(results[1] == (void *)0xdea0);
1412 assert(results[2] == (void *)0xbea0);
1413 memset(results, 0, sizeof(results));
1414 assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1415 results, 3, true) == 2);
1416 assert(results[0] == (void *)0xfff0);
1417 assert(results[1] == (void *)0xdea0);
1418 assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
1419 (void *)0xdea0);
1420 assert(radix_tree_remove_node(t, UINT64_C(10000000001)) ==
1421 (void *)0xfff0);
1422 radix_tree_dump(t);
1423 assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
1424 radix_tree_dump(t);
1425 radix_tree_fini_tree(t);
1426 }
1427
1428 #include <sys/time.h>
1429
1430 struct testnode {
1431 uint64_t idx;
1432 bool tagged[RADIX_TREE_TAG_ID_MAX];
1433 };
1434
1435 static void
printops(const char * title,const char * name,int tag,unsigned int n,const struct timeval * stv,const struct timeval * etv)1436 printops(const char *title, const char *name, int tag, unsigned int n,
1437 const struct timeval *stv, const struct timeval *etv)
1438 {
1439 uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
1440 uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
1441
1442 printf("RESULT %s %s %d %lf op/s\n", title, name, tag,
1443 (double)n / (e - s) * 1000000);
1444 }
1445
1446 #define TEST2_GANG_LOOKUP_NODES 16
1447
1448 static bool
test2_should_tag(unsigned int i,unsigned int tagid)1449 test2_should_tag(unsigned int i, unsigned int tagid)
1450 {
1451
1452 if (tagid == 0) {
1453 return (i % 4) == 0; /* 25% */
1454 } else {
1455 return (i % 7) == 0; /* 14% */
1456 }
1457 return 1;
1458 }
1459
1460 static void
check_tag_count(const unsigned int * ntagged,unsigned int tagmask,unsigned int count)1461 check_tag_count(const unsigned int *ntagged, unsigned int tagmask,
1462 unsigned int count)
1463 {
1464 unsigned int tag;
1465
1466 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1467 if ((tagmask & (1 << tag)) == 0) {
1468 continue;
1469 }
1470 if (((tagmask - 1) & tagmask) == 0) {
1471 assert(count == ntagged[tag]);
1472 } else {
1473 assert(count >= ntagged[tag]);
1474 }
1475 }
1476 }
1477
1478 static void
test2(const char * title,bool dense)1479 test2(const char *title, bool dense)
1480 {
1481 struct radix_tree s;
1482 struct radix_tree *t = &s;
1483 struct testnode *n;
1484 unsigned int i;
1485 unsigned int nnodes = 100000;
1486 unsigned int removed;
1487 unsigned int tag;
1488 unsigned int tagmask;
1489 unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
1490 struct testnode *nodes;
1491 struct timeval stv;
1492 struct timeval etv;
1493
1494 nodes = malloc(nnodes * sizeof(*nodes));
1495 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1496 ntagged[tag] = 0;
1497 }
1498 radix_tree_init_tree(t);
1499 for (i = 0; i < nnodes; i++) {
1500 n = &nodes[i];
1501 n->idx = random();
1502 if (sizeof(long) == 4) {
1503 n->idx <<= 32;
1504 n->idx |= (uint32_t)random();
1505 }
1506 if (dense) {
1507 n->idx %= nnodes * 2;
1508 }
1509 while (radix_tree_lookup_node(t, n->idx) != NULL) {
1510 n->idx++;
1511 }
1512 radix_tree_insert_node(t, n->idx, n);
1513 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1514 tagmask = 1 << tag;
1515
1516 n->tagged[tag] = test2_should_tag(i, tag);
1517 if (n->tagged[tag]) {
1518 radix_tree_set_tag(t, n->idx, tagmask);
1519 ntagged[tag]++;
1520 }
1521 assert((n->tagged[tag] ? tagmask : 0) ==
1522 radix_tree_get_tag(t, n->idx, tagmask));
1523 }
1524 }
1525
1526 gettimeofday(&stv, NULL);
1527 for (i = 0; i < nnodes; i++) {
1528 n = &nodes[i];
1529 assert(radix_tree_lookup_node(t, n->idx) == n);
1530 }
1531 gettimeofday(&etv, NULL);
1532 printops(title, "lookup", 0, nnodes, &stv, &etv);
1533
1534 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1535 unsigned int count = 0;
1536
1537 gettimeofday(&stv, NULL);
1538 for (i = 0; i < nnodes; i++) {
1539 unsigned int tagged;
1540
1541 n = &nodes[i];
1542 tagged = radix_tree_get_tag(t, n->idx, tagmask);
1543 assert((tagged & ~tagmask) == 0);
1544 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1545 assert((tagmask & (1 << tag)) == 0 ||
1546 n->tagged[tag] == !!(tagged & (1 << tag)));
1547 }
1548 if (tagged) {
1549 count++;
1550 }
1551 }
1552 gettimeofday(&etv, NULL);
1553 check_tag_count(ntagged, tagmask, count);
1554 printops(title, "get_tag", tagmask, nnodes, &stv, &etv);
1555 }
1556
1557 gettimeofday(&stv, NULL);
1558 for (i = 0; i < nnodes; i++) {
1559 n = &nodes[i];
1560 radix_tree_remove_node(t, n->idx);
1561 }
1562 gettimeofday(&etv, NULL);
1563 printops(title, "remove", 0, nnodes, &stv, &etv);
1564
1565 gettimeofday(&stv, NULL);
1566 for (i = 0; i < nnodes; i++) {
1567 n = &nodes[i];
1568 radix_tree_insert_node(t, n->idx, n);
1569 }
1570 gettimeofday(&etv, NULL);
1571 printops(title, "insert", 0, nnodes, &stv, &etv);
1572
1573 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1574 tagmask = 1 << tag;
1575
1576 ntagged[tag] = 0;
1577 gettimeofday(&stv, NULL);
1578 for (i = 0; i < nnodes; i++) {
1579 n = &nodes[i];
1580 if (n->tagged[tag]) {
1581 radix_tree_set_tag(t, n->idx, tagmask);
1582 ntagged[tag]++;
1583 }
1584 }
1585 gettimeofday(&etv, NULL);
1586 printops(title, "set_tag", tag, ntagged[tag], &stv, &etv);
1587 }
1588
1589 gettimeofday(&stv, NULL);
1590 {
1591 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1592 uint64_t nextidx;
1593 unsigned int nfound;
1594 unsigned int total;
1595
1596 nextidx = 0;
1597 total = 0;
1598 while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1599 (void *)results, __arraycount(results), false)) > 0) {
1600 nextidx = results[nfound - 1]->idx + 1;
1601 total += nfound;
1602 if (nextidx == 0) {
1603 break;
1604 }
1605 }
1606 assert(total == nnodes);
1607 }
1608 gettimeofday(&etv, NULL);
1609 printops(title, "ganglookup", 0, nnodes, &stv, &etv);
1610
1611 gettimeofday(&stv, NULL);
1612 {
1613 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1614 uint64_t nextidx;
1615 unsigned int nfound;
1616 unsigned int total;
1617
1618 nextidx = UINT64_MAX;
1619 total = 0;
1620 while ((nfound = radix_tree_gang_lookup_node_reverse(t, nextidx,
1621 (void *)results, __arraycount(results), false)) > 0) {
1622 nextidx = results[nfound - 1]->idx - 1;
1623 total += nfound;
1624 if (nextidx == UINT64_MAX) {
1625 break;
1626 }
1627 }
1628 assert(total == nnodes);
1629 }
1630 gettimeofday(&etv, NULL);
1631 printops(title, "ganglookup_reverse", 0, nnodes, &stv, &etv);
1632
1633 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1634 unsigned int total = 0;
1635
1636 gettimeofday(&stv, NULL);
1637 {
1638 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1639 uint64_t nextidx;
1640 unsigned int nfound;
1641
1642 nextidx = 0;
1643 while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1644 nextidx, (void *)results, __arraycount(results),
1645 false, tagmask)) > 0) {
1646 nextidx = results[nfound - 1]->idx + 1;
1647 total += nfound;
1648 }
1649 }
1650 gettimeofday(&etv, NULL);
1651 check_tag_count(ntagged, tagmask, total);
1652 assert(tagmask != 0 || total == 0);
1653 printops(title, "ganglookup_tag", tagmask, total, &stv, &etv);
1654 }
1655
1656 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1657 unsigned int total = 0;
1658
1659 gettimeofday(&stv, NULL);
1660 {
1661 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1662 uint64_t nextidx;
1663 unsigned int nfound;
1664
1665 nextidx = UINT64_MAX;
1666 while ((nfound =
1667 radix_tree_gang_lookup_tagged_node_reverse(t,
1668 nextidx, (void *)results, __arraycount(results),
1669 false, tagmask)) > 0) {
1670 nextidx = results[nfound - 1]->idx - 1;
1671 total += nfound;
1672 if (nextidx == UINT64_MAX) {
1673 break;
1674 }
1675 }
1676 }
1677 gettimeofday(&etv, NULL);
1678 check_tag_count(ntagged, tagmask, total);
1679 assert(tagmask != 0 || total == 0);
1680 printops(title, "ganglookup_tag_reverse", tagmask, total,
1681 &stv, &etv);
1682 }
1683
1684 removed = 0;
1685 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1686 unsigned int total;
1687
1688 total = 0;
1689 tagmask = 1 << tag;
1690 gettimeofday(&stv, NULL);
1691 {
1692 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1693 uint64_t nextidx;
1694 unsigned int nfound;
1695
1696 nextidx = 0;
1697 while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1698 nextidx, (void *)results, __arraycount(results),
1699 false, tagmask)) > 0) {
1700 for (i = 0; i < nfound; i++) {
1701 radix_tree_remove_node(t,
1702 results[i]->idx);
1703 }
1704 nextidx = results[nfound - 1]->idx + 1;
1705 total += nfound;
1706 if (nextidx == 0) {
1707 break;
1708 }
1709 }
1710 }
1711 gettimeofday(&etv, NULL);
1712 if (tag == 0) {
1713 check_tag_count(ntagged, tagmask, total);
1714 } else {
1715 assert(total <= ntagged[tag]);
1716 }
1717 printops(title, "ganglookup_tag+remove", tagmask, total, &stv,
1718 &etv);
1719 removed += total;
1720 }
1721
1722 gettimeofday(&stv, NULL);
1723 {
1724 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1725 uint64_t nextidx;
1726 unsigned int nfound;
1727 unsigned int total;
1728
1729 nextidx = 0;
1730 total = 0;
1731 while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1732 (void *)results, __arraycount(results), false)) > 0) {
1733 for (i = 0; i < nfound; i++) {
1734 assert(results[i] == radix_tree_remove_node(t,
1735 results[i]->idx));
1736 }
1737 nextidx = results[nfound - 1]->idx + 1;
1738 total += nfound;
1739 if (nextidx == 0) {
1740 break;
1741 }
1742 }
1743 assert(total == nnodes - removed);
1744 }
1745 gettimeofday(&etv, NULL);
1746 printops(title, "ganglookup+remove", 0, nnodes - removed, &stv, &etv);
1747
1748 assert(radix_tree_empty_tree_p(t));
1749 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1750 assert(radix_tree_empty_tagged_tree_p(t, tagmask));
1751 }
1752 radix_tree_fini_tree(t);
1753 free(nodes);
1754 }
1755
1756 int
main(int argc,char * argv[])1757 main(int argc, char *argv[])
1758 {
1759
1760 test1();
1761 test2("dense", true);
1762 test2("sparse", false);
1763 return 0;
1764 }
1765
1766 #endif /* defined(UNITTEST) */
1767