xref: /openbsd/sys/net/art.c (revision 5c969a7e)
1 /*	$OpenBSD: art.c,v 1.31 2023/11/11 12:17:50 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Martin Pieuchot
5  * Copyright (c) 2001 Yoichi Hariguchi
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*
21  * Allotment Routing Table (ART).
22  *
23  * Yoichi Hariguchi paper can be found at:
24  *	http://www.hariguchi.org/art/art.pdf
25  */
26 
27 #ifndef _KERNEL
28 #include "kern_compat.h"
29 #else
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/malloc.h>
33 #include <sys/pool.h>
34 #include <sys/task.h>
35 #include <sys/socket.h>
36 #endif
37 
38 #include <net/art.h>
39 
40 int			 art_bindex(struct art_table *, const uint8_t *, int);
41 void			 art_allot(struct art_table *at, int, struct art_node *,
42 			     struct art_node *);
43 struct art_table	*art_table_get(struct art_root *, struct art_table *,
44 			     int);
45 struct art_table	*art_table_put(struct art_root *, struct art_table *);
46 struct art_node		*art_table_insert(struct art_root *, struct art_table *,
47 			     int, struct art_node *);
48 struct art_node		*art_table_delete(struct art_root *, struct art_table *,
49 			     int, struct art_node *);
50 struct art_table	*art_table_ref(struct art_root *, struct art_table *);
51 int			 art_table_free(struct art_root *, struct art_table *);
52 int			 art_table_walk(struct art_root *, struct art_table *,
53 			     int (*f)(struct art_node *, void *), void *);
54 int			 art_walk_apply(struct art_root *,
55 			     struct art_node *, struct art_node *,
56 			     int (*f)(struct art_node *, void *), void *);
57 void			 art_table_gc(void *);
58 void			 art_gc(void *);
59 
60 struct pool		an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
61 
62 struct art_table	*art_table_gc_list = NULL;
63 struct mutex		 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
64 struct task		 art_table_gc_task =
65 			     TASK_INITIALIZER(art_table_gc, NULL);
66 
67 struct art_node		*art_node_gc_list = NULL;
68 struct mutex		 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
69 struct task		 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
70 
71 void
art_init(void)72 art_init(void)
73 {
74 	pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0,
75 	    "art_node", NULL);
76 	pool_init(&at_pool, sizeof(struct art_table), 0, IPL_SOFTNET, 0,
77 	    "art_table", NULL);
78 	pool_init(&at_heap_4_pool, AT_HEAPSIZE(4), 0, IPL_SOFTNET, 0,
79 	    "art_heap4", NULL);
80 	pool_init(&at_heap_8_pool, AT_HEAPSIZE(8), 0, IPL_SOFTNET, 0,
81 	    "art_heap8", &pool_allocator_single);
82 }
83 
84 /*
85  * Per routing table initialization API function.
86  */
87 struct art_root *
art_alloc(unsigned int rtableid,unsigned int alen,unsigned int off)88 art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
89 {
90 	struct art_root		*ar;
91 	int			 i;
92 
93 	ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
94 	if (ar == NULL)
95 		return (NULL);
96 
97 	switch (alen) {
98 	case 32:
99 		ar->ar_alen = 32;
100 		ar->ar_nlvl = 7;
101 		ar->ar_bits[0] = 8;
102 		for (i = 1; i < ar->ar_nlvl; i++)
103 			ar->ar_bits[i] = 4;
104 		break;
105 	case 128:
106 		ar->ar_alen = 128;
107 		ar->ar_nlvl = 32;
108 		for (i = 0; i < ar->ar_nlvl; i++)
109 			ar->ar_bits[i] = 4;
110 		break;
111 	default:
112 		printf("%s: incorrect address length %u\n", __func__, alen);
113 		free(ar, M_RTABLE, sizeof(*ar));
114 		return (NULL);
115 	}
116 
117 	ar->ar_off = off;
118 	rw_init(&ar->ar_lock, "art");
119 
120 	return (ar);
121 }
122 
123 /*
124  * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
125  */
126 static inline int
art_check_duplicate(struct art_root * ar,struct art_node * old,struct art_node * new)127 art_check_duplicate(struct art_root *ar, struct art_node *old,
128     struct art_node *new)
129 {
130 	if (old == NULL)
131 		return (0);
132 
133 	if (old->an_plen == new->an_plen)
134 		return (1);
135 
136 	return (0);
137 }
138 
139 /*
140  * Return the base index of the part of ``addr'' and ``plen''
141  * corresponding to the range covered by the table ``at''.
142  *
143  * In other words, this function take the multi-level (complete)
144  * address ``addr'' and prefix length ``plen'' and return the
145  * single level base index for the table ``at''.
146  *
147  * For example with an address size of 32bit divided into four
148  * 8bit-long tables, there's a maximum of 4 base indexes if the
149  * prefix length is > 24.
150  */
151 int
art_bindex(struct art_table * at,const uint8_t * addr,int plen)152 art_bindex(struct art_table *at, const uint8_t *addr, int plen)
153 {
154 	uint8_t			boff, bend;
155 	uint32_t		k;
156 
157 	if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
158 		return (-1);
159 
160 	/*
161 	 * We are only interested in the part of the prefix length
162 	 * corresponding to the range of this table.
163 	 */
164 	plen -= at->at_offset;
165 
166 	/*
167 	 * Jump to the first byte of the address containing bits
168 	 * covered by this table.
169 	 */
170 	addr += (at->at_offset / 8);
171 
172 	/* ``at'' covers the bit range between ``boff'' & ``bend''. */
173 	boff = (at->at_offset % 8);
174 	bend = (at->at_bits + boff);
175 
176 	KASSERT(bend <= 32);
177 
178 	if (bend > 24) {
179 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
180 		k |= addr[1] << (bend - 16);
181 		k |= addr[2] << (bend - 24);
182 		k |= addr[3] >> (32 - bend);
183 	} else if (bend > 16) {
184 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
185 		k |= addr[1] << (bend - 16);
186 		k |= addr[2] >> (24 - bend);
187 	} else if (bend > 8) {
188 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
189 		k |= addr[1] >> (16 - bend);
190 	} else {
191 		k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
192 	}
193 
194 	/*
195 	 * Single level base index formula:
196 	 */
197 	return ((k >> (at->at_bits - plen)) + (1 << plen));
198 }
199 
200 /*
201  * Single level lookup function.
202  *
203  * Return the fringe index of the part of ``addr''
204  * corresponding to the range covered by the table ``at''.
205  */
206 static inline int
art_findex(struct art_table * at,const uint8_t * addr)207 art_findex(struct art_table *at, const uint8_t *addr)
208 {
209 	return art_bindex(at, addr, (at->at_offset + at->at_bits));
210 }
211 
212 /*
213  * (Non-perfect) lookup API function.
214  *
215  * Return the best existing match for a destination.
216  */
217 struct art_node *
art_match(struct art_root * ar,const void * addr,struct srp_ref * nsr)218 art_match(struct art_root *ar, const void *addr, struct srp_ref *nsr)
219 {
220 	struct srp_ref		dsr, ndsr;
221 	void			*entry;
222 	struct art_table	*at;
223 	struct art_node		*dflt, *ndflt;
224 	int			j;
225 
226 	entry = srp_enter(nsr, &ar->ar_root);
227 	at = entry;
228 
229 	if (at == NULL)
230 		goto done;
231 
232 	/*
233 	 * Remember the default route of each table we visit in case
234 	 * we do not find a better matching route.
235 	 */
236 	dflt = srp_enter(&dsr, &at->at_default);
237 
238 	/*
239 	 * Iterate until we find a leaf.
240 	 */
241 	while (1) {
242 		/* Do a single level route lookup. */
243 		j = art_findex(at, addr);
244 		entry = srp_follow(nsr, &at->at_heap[j].node);
245 
246 		/* If this is a leaf (NULL is a leaf) we're done. */
247 		if (ISLEAF(entry))
248 			break;
249 
250 		at = SUBTABLE(entry);
251 
252 		ndflt = srp_enter(&ndsr, &at->at_default);
253 		if (ndflt != NULL) {
254 			srp_leave(&dsr);
255 			dsr = ndsr;
256 			dflt = ndflt;
257 		} else
258 			srp_leave(&ndsr);
259 	}
260 
261 	if (entry == NULL) {
262 		srp_leave(nsr);
263 		*nsr = dsr;
264 		KASSERT(ISLEAF(dflt));
265 		return (dflt);
266 	}
267 
268 	srp_leave(&dsr);
269 done:
270 	KASSERT(ISLEAF(entry));
271 	return (entry);
272 }
273 
274 /*
275  * Perfect lookup API function.
276  *
277  * Return a perfect match for a destination/prefix-length pair or NULL if
278  * it does not exist.
279  */
280 struct art_node *
art_lookup(struct art_root * ar,const void * addr,int plen,struct srp_ref * nsr)281 art_lookup(struct art_root *ar, const void *addr, int plen, struct srp_ref *nsr)
282 {
283 	void			*entry;
284 	struct art_table	*at;
285 	int			 i, j;
286 
287 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
288 
289 	entry = srp_enter(nsr, &ar->ar_root);
290 	at = entry;
291 
292 	if (at == NULL)
293 		goto done;
294 
295 	/* Default route */
296 	if (plen == 0) {
297 		entry = srp_follow(nsr, &at->at_default);
298 		goto done;
299 	}
300 
301 	/*
302 	 * If the prefix length is smaller than the sum of
303 	 * the stride length at this level the entry must
304 	 * be in the current table.
305 	 */
306 	while (plen > (at->at_offset + at->at_bits)) {
307 		/* Do a single level route lookup. */
308 		j = art_findex(at, addr);
309 		entry = srp_follow(nsr, &at->at_heap[j].node);
310 
311 		/* A leaf is a match, but not a perfect one, or NULL */
312 		if (ISLEAF(entry))
313 			return (NULL);
314 
315 		at = SUBTABLE(entry);
316 	}
317 
318 	i = art_bindex(at, addr, plen);
319 	if (i == -1)
320 		return (NULL);
321 
322 	entry = srp_follow(nsr, &at->at_heap[i].node);
323 	if (!ISLEAF(entry))
324 		entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
325 
326 done:
327 	KASSERT(ISLEAF(entry));
328 	return (entry);
329 }
330 
331 
332 /*
333  * Insertion API function.
334  *
335  * Insert the given node or return an existing one if a node with the
336  * same destination/mask pair is already present.
337  */
338 struct art_node *
art_insert(struct art_root * ar,struct art_node * an,const void * addr,int plen)339 art_insert(struct art_root *ar, struct art_node *an, const void *addr, int plen)
340 {
341 	struct art_table	*at, *child;
342 	struct art_node		*node;
343 	int			 i, j;
344 
345 	rw_assert_wrlock(&ar->ar_lock);
346 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
347 
348 	at = srp_get_locked(&ar->ar_root);
349 	if (at == NULL) {
350 		at = art_table_get(ar, NULL, -1);
351 		if (at == NULL)
352 			return (NULL);
353 
354 		srp_swap_locked(&ar->ar_root, at);
355 	}
356 
357 	/* Default route */
358 	if (plen == 0) {
359 		node = srp_get_locked(&at->at_default);
360 		if (node != NULL)
361 			return (node);
362 
363 		art_table_ref(ar, at);
364 		srp_swap_locked(&at->at_default, an);
365 		return (an);
366 	}
367 
368 	/*
369 	 * If the prefix length is smaller than the sum of
370 	 * the stride length at this level the entry must
371 	 * be in the current table.
372 	 */
373 	while (plen > (at->at_offset + at->at_bits)) {
374 		/* Do a single level route lookup. */
375 		j = art_findex(at, addr);
376 		node = srp_get_locked(&at->at_heap[j].node);
377 
378 		/*
379 		 * If the node corresponding to the fringe index is
380 		 * a leaf we need to allocate a subtable.  The route
381 		 * entry of this node will then become the default
382 		 * route of the subtable.
383 		 */
384 		if (ISLEAF(node)) {
385 			child = art_table_get(ar, at, j);
386 			if (child == NULL)
387 				return (NULL);
388 
389 			art_table_ref(ar, at);
390 			srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
391 			at = child;
392 		} else
393 			at = SUBTABLE(node);
394 	}
395 
396 	i = art_bindex(at, addr, plen);
397 	if (i == -1)
398 		return (NULL);
399 
400 	return (art_table_insert(ar, at, i, an));
401 }
402 
403 /*
404  * Single level insertion.
405  */
406 struct art_node *
art_table_insert(struct art_root * ar,struct art_table * at,int i,struct art_node * an)407 art_table_insert(struct art_root *ar, struct art_table *at, int i,
408     struct art_node *an)
409 {
410 	struct art_node	*prev, *node;
411 
412 	node = srp_get_locked(&at->at_heap[i].node);
413 	if (!ISLEAF(node))
414 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
415 	else
416 		prev = node;
417 
418 	if (art_check_duplicate(ar, prev, an))
419 		return (prev);
420 
421 	art_table_ref(ar, at);
422 
423 	/*
424 	 * If the index `i' of the route that we are inserting is not
425 	 * a fringe index, we need to allot this new route pointer to
426 	 * all the corresponding fringe indices.
427 	 */
428 	if (i < at->at_minfringe)
429 		art_allot(at, i, prev, an);
430 	else if (!ISLEAF(node))
431 		srp_swap_locked(&SUBTABLE(node)->at_default, an);
432 	else
433 		srp_swap_locked(&at->at_heap[i].node, an);
434 
435 	return (an);
436 }
437 
438 
439 /*
440  * Deletion API function.
441  */
442 struct art_node *
art_delete(struct art_root * ar,struct art_node * an,const void * addr,int plen)443 art_delete(struct art_root *ar, struct art_node *an, const void *addr, int plen)
444 {
445 	struct art_table	*at;
446 	struct art_node		*node;
447 	int			 i, j;
448 
449 	rw_assert_wrlock(&ar->ar_lock);
450 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
451 
452 	at = srp_get_locked(&ar->ar_root);
453 	if (at == NULL)
454 		return (NULL);
455 
456 	/* Default route */
457 	if (plen == 0) {
458 		node = srp_get_locked(&at->at_default);
459 		srp_swap_locked(&at->at_default, NULL);
460 		art_table_free(ar, at);
461 		return (node);
462 	}
463 
464 	/*
465 	 * If the prefix length is smaller than the sum of
466 	 * the stride length at this level the entry must
467 	 * be in the current table.
468 	 */
469 	while (plen > (at->at_offset + at->at_bits)) {
470 		/* Do a single level route lookup. */
471 		j = art_findex(at, addr);
472 		node = srp_get_locked(&at->at_heap[j].node);
473 
474 		/* If this is a leaf, there is no route to delete. */
475 		if (ISLEAF(node))
476 			return (NULL);
477 
478 		at = SUBTABLE(node);
479 	}
480 
481 	i = art_bindex(at, addr, plen);
482 	if (i == -1)
483 		return (NULL);
484 
485 	return (art_table_delete(ar, at, i, an));
486 }
487 
488 /*
489  * Single level deletion.
490  */
491 struct art_node *
art_table_delete(struct art_root * ar,struct art_table * at,int i,struct art_node * an)492 art_table_delete(struct art_root *ar, struct art_table *at, int i,
493     struct art_node *an)
494 {
495 	struct art_node		*next, *node;
496 
497 #ifdef DIAGNOSTIC
498 	struct art_node		*prev;
499 #endif
500 
501 	node = srp_get_locked(&at->at_heap[i].node);
502 #ifdef DIAGNOSTIC
503 	if (!ISLEAF(node))
504 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
505 	else
506 		prev = node;
507 
508 	KASSERT(prev == an);
509 #endif
510 
511 	/* Get the next most specific route for the index `i'. */
512 	if ((i >> 1) > 1)
513 		next = srp_get_locked(&at->at_heap[i >> 1].node);
514 	else
515 		next = NULL;
516 
517 	/*
518 	 * If the index `i' of the route that we are removing is not
519 	 * a fringe index, we need to allot the next most specific
520 	 * route pointer to all the corresponding fringe indices.
521 	 */
522 	if (i < at->at_minfringe)
523 		art_allot(at, i, an, next);
524 	else if (!ISLEAF(node))
525 		srp_swap_locked(&SUBTABLE(node)->at_default, next);
526 	else
527 		srp_swap_locked(&at->at_heap[i].node, next);
528 
529 	/* We have removed an entry from this table. */
530 	art_table_free(ar, at);
531 
532 	return (an);
533 }
534 
535 struct art_table *
art_table_ref(struct art_root * ar,struct art_table * at)536 art_table_ref(struct art_root *ar, struct art_table *at)
537 {
538 	at->at_refcnt++;
539 	return (at);
540 }
541 
542 static inline int
art_table_rele(struct art_table * at)543 art_table_rele(struct art_table *at)
544 {
545 	if (at == NULL)
546 		return (0);
547 
548 	return (--at->at_refcnt == 0);
549 }
550 
551 int
art_table_free(struct art_root * ar,struct art_table * at)552 art_table_free(struct art_root *ar, struct art_table *at)
553 {
554 	if (art_table_rele(at)) {
555 		/*
556 		 * Garbage collect this table and all its parents
557 		 * that are empty.
558 		 */
559 		do {
560 			at = art_table_put(ar, at);
561 		} while (art_table_rele(at));
562 
563 		return (1);
564 	}
565 
566 	return (0);
567 }
568 
569 /*
570  * Iteration API function.
571  */
572 int
art_walk(struct art_root * ar,int (* f)(struct art_node *,void *),void * arg)573 art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
574 {
575 	struct srp_ref		 sr;
576 	struct art_table	*at;
577 	struct art_node		*node;
578 	int			 error = 0;
579 
580 	rw_enter_write(&ar->ar_lock);
581 	at = srp_get_locked(&ar->ar_root);
582 	if (at != NULL) {
583 		art_table_ref(ar, at);
584 
585 		/*
586 		 * The default route should be processed here because the root
587 		 * table does not have a parent.
588 		 */
589 		node = srp_enter(&sr, &at->at_default);
590 		error = art_walk_apply(ar, node, NULL, f, arg);
591 		srp_leave(&sr);
592 
593 		if (error == 0)
594 			error = art_table_walk(ar, at, f, arg);
595 
596 		art_table_free(ar, at);
597 	}
598 	rw_exit_write(&ar->ar_lock);
599 
600 	return (error);
601 }
602 
603 int
art_table_walk(struct art_root * ar,struct art_table * at,int (* f)(struct art_node *,void *),void * arg)604 art_table_walk(struct art_root *ar, struct art_table *at,
605     int (*f)(struct art_node *, void *), void *arg)
606 {
607 	struct srp_ref		 sr;
608 	struct art_node		*node, *next;
609 	struct art_table	*nat;
610 	int			 i, j, error = 0;
611 	uint32_t		 maxfringe = (at->at_minfringe << 1);
612 
613 	/*
614 	 * Iterate non-fringe nodes in ``natural'' order.
615 	 */
616 	for (j = 1; j < at->at_minfringe; j += 2) {
617 		/*
618 		 * The default route (index 1) is processed by the
619 		 * parent table (where it belongs) otherwise it could
620 		 * be processed more than once.
621 		 */
622 		for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
623 			next = srp_get_locked(&at->at_heap[i >> 1].node);
624 
625 			node = srp_enter(&sr, &at->at_heap[i].node);
626 			error = art_walk_apply(ar, node, next, f, arg);
627 			srp_leave(&sr);
628 
629 			if (error != 0)
630 				return (error);
631 		}
632 	}
633 
634 	/*
635 	 * Iterate fringe nodes.
636 	 */
637 	for (i = at->at_minfringe; i < maxfringe; i++) {
638 		next = srp_get_locked(&at->at_heap[i >> 1].node);
639 
640 		node = srp_enter(&sr, &at->at_heap[i].node);
641 		if (!ISLEAF(node)) {
642 			nat = art_table_ref(ar, SUBTABLE(node));
643 			node = srp_follow(&sr, &nat->at_default);
644 		} else
645 			nat = NULL;
646 
647 		error = art_walk_apply(ar, node, next, f, arg);
648 		srp_leave(&sr);
649 
650 		if (error != 0) {
651 			art_table_free(ar, nat);
652 			return (error);
653 		}
654 
655 		if (nat != NULL) {
656 			error = art_table_walk(ar, nat, f, arg);
657 			art_table_free(ar, nat);
658 			if (error != 0)
659 				return (error);
660 		}
661 	}
662 
663 	return (0);
664 }
665 
666 int
art_walk_apply(struct art_root * ar,struct art_node * an,struct art_node * next,int (* f)(struct art_node *,void *),void * arg)667 art_walk_apply(struct art_root *ar,
668     struct art_node *an, struct art_node *next,
669     int (*f)(struct art_node *, void *), void *arg)
670 {
671 	int error = 0;
672 
673 	if ((an != NULL) && (an != next)) {
674 		rw_exit_write(&ar->ar_lock);
675 		error = (*f)(an, arg);
676 		rw_enter_write(&ar->ar_lock);
677 	}
678 
679 	return (error);
680 }
681 
682 
683 /*
684  * Create a table and use the given index to set its default route.
685  *
686  * Note:  This function does not modify the root or the parent.
687  */
688 struct art_table *
art_table_get(struct art_root * ar,struct art_table * parent,int j)689 art_table_get(struct art_root *ar, struct art_table *parent, int j)
690 {
691 	struct art_table	*at;
692 	struct art_node		*node;
693 	void			*at_heap;
694 	uint32_t		 lvl;
695 
696 	KASSERT(j != 0 && j != 1);
697 	KASSERT(parent != NULL || j == -1);
698 
699 	if (parent != NULL)
700 		lvl = parent->at_level + 1;
701 	else
702 		lvl = 0;
703 
704 	KASSERT(lvl < ar->ar_nlvl);
705 
706 	at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
707 	if (at == NULL)
708 		return (NULL);
709 
710 	switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
711 	case AT_HEAPSIZE(4):
712 		at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
713 		break;
714 	case AT_HEAPSIZE(8):
715 		at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
716 		break;
717 	default:
718 		panic("incorrect stride length %u", ar->ar_bits[lvl]);
719 	}
720 
721 	if (at_heap == NULL) {
722 		pool_put(&at_pool, at);
723 		return (NULL);
724 	}
725 
726 	at->at_parent = parent;
727 	at->at_index = j;
728 	at->at_minfringe = (1 << ar->ar_bits[lvl]);
729 	at->at_level = lvl;
730 	at->at_bits = ar->ar_bits[lvl];
731 	at->at_heap = at_heap;
732 	at->at_refcnt = 0;
733 
734 	if (parent != NULL) {
735 		node = srp_get_locked(&parent->at_heap[j].node);
736 		/* node isn't being deleted, no srp_finalize needed */
737 		srp_swap_locked(&at->at_default, node);
738 		at->at_offset = (parent->at_offset + parent->at_bits);
739 	}
740 
741 	return (at);
742 }
743 
744 
745 /*
746  * Delete a table and use its index to restore its parent's default route.
747  *
748  * Note:  Modify its parent to unlink the table from it.
749  */
750 struct art_table *
art_table_put(struct art_root * ar,struct art_table * at)751 art_table_put(struct art_root *ar, struct art_table *at)
752 {
753 	struct art_table	*parent = at->at_parent;
754 	struct art_node		*node;
755 	uint32_t		 j = at->at_index;
756 
757 	KASSERT(at->at_refcnt == 0);
758 	KASSERT(j != 0 && j != 1);
759 
760 	if (parent != NULL) {
761 		KASSERT(j != -1);
762 		KASSERT(at->at_level == parent->at_level + 1);
763 		KASSERT(parent->at_refcnt >= 1);
764 
765 		/* Give the route back to its parent. */
766 		node = srp_get_locked(&at->at_default);
767 		srp_swap_locked(&parent->at_heap[j].node, node);
768 	} else {
769 		KASSERT(j == -1);
770 		KASSERT(at->at_level == 0);
771 		srp_swap_locked(&ar->ar_root, NULL);
772 	}
773 
774 	mtx_enter(&art_table_gc_mtx);
775 	at->at_parent = art_table_gc_list;
776 	art_table_gc_list = at;
777 	mtx_leave(&art_table_gc_mtx);
778 
779 	task_add(systqmp, &art_table_gc_task);
780 
781 	return (parent);
782 }
783 
784 void
art_table_gc(void * null)785 art_table_gc(void *null)
786 {
787 	struct art_table *at, *next;
788 
789 	mtx_enter(&art_table_gc_mtx);
790 	at = art_table_gc_list;
791 	art_table_gc_list = NULL;
792 	mtx_leave(&art_table_gc_mtx);
793 
794 	while (at != NULL) {
795 		next = at->at_parent;
796 
797 		if (at->at_level == 0)
798 			srp_finalize(at, "arttfini");
799 		else
800 			srp_finalize(ASNODE(at), "arttfini");
801 
802 		switch (AT_HEAPSIZE(at->at_bits)) {
803 		case AT_HEAPSIZE(4):
804 			pool_put(&at_heap_4_pool, at->at_heap);
805 			break;
806 		case AT_HEAPSIZE(8):
807 			pool_put(&at_heap_8_pool, at->at_heap);
808 			break;
809 		default:
810 			panic("incorrect stride length %u", at->at_bits);
811 		}
812 
813 		pool_put(&at_pool, at);
814 
815 		at = next;
816 	}
817 }
818 
819 /*
820  * Substitute a node by another in the subtree whose root index is given.
821  *
822  * This function iterates on the table ``at'' at index ``i'' until no
823  * more ``old'' node can be replaced by ``new''.
824  *
825  * This function was originally written by Don Knuth in CWEB. The
826  * complicated ``goto''s are the result of expansion of the two
827  * following recursions:
828  *
829  *	art_allot(at, i, old, new)
830  *	{
831  *		int k = i;
832  *		if (at->at_heap[k] == old)
833  *			at->at_heap[k] = new;
834  *		if (k >= at->at_minfringe)
835  *			 return;
836  *		k <<= 1;
837  *		art_allot(at, k, old, new);
838  *		k++;
839  *		art_allot(at, k, old, new);
840  *	}
841  */
842 void
art_allot(struct art_table * at,int i,struct art_node * old,struct art_node * new)843 art_allot(struct art_table *at, int i, struct art_node *old,
844     struct art_node *new)
845 {
846 	struct art_node		*node, *dflt;
847 	int			 k = i;
848 
849 	KASSERT(i < at->at_minfringe);
850 
851 again:
852 	k <<= 1;
853 	if (k < at->at_minfringe)
854 		goto nonfringe;
855 
856 	/* Change fringe nodes. */
857 	while (1) {
858 		node = srp_get_locked(&at->at_heap[k].node);
859 		if (!ISLEAF(node)) {
860 			dflt = srp_get_locked(&SUBTABLE(node)->at_default);
861 			if (dflt == old) {
862 				srp_swap_locked(&SUBTABLE(node)->at_default,
863 				    new);
864 			}
865 		} else if (node == old) {
866 			srp_swap_locked(&at->at_heap[k].node, new);
867 		}
868 		if (k % 2)
869 			goto moveup;
870 		k++;
871 	}
872 
873 nonfringe:
874 	node = srp_get_locked(&at->at_heap[k].node);
875 	if (node == old)
876 		goto again;
877 moveon:
878 	if (k % 2)
879 		goto moveup;
880 	k++;
881 	goto nonfringe;
882 moveup:
883 	k >>= 1;
884 	srp_swap_locked(&at->at_heap[k].node, new);
885 
886 	/* Change non-fringe node. */
887 	if (k != i)
888 		goto moveon;
889 }
890 
891 struct art_node *
art_get(uint8_t plen)892 art_get(uint8_t plen)
893 {
894 	struct art_node		*an;
895 
896 	an = pool_get(&an_pool, PR_NOWAIT | PR_ZERO);
897 	if (an == NULL)
898 		return (NULL);
899 
900 	an->an_plen = plen;
901 	SRPL_INIT(&an->an_rtlist);
902 
903 	return (an);
904 }
905 
906 void
art_put(struct art_node * an)907 art_put(struct art_node *an)
908 {
909 	KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
910 
911 	mtx_enter(&art_node_gc_mtx);
912 	an->an_gc = art_node_gc_list;
913 	art_node_gc_list = an;
914 	mtx_leave(&art_node_gc_mtx);
915 
916 	task_add(systqmp, &art_node_gc_task);
917 }
918 
919 void
art_gc(void * null)920 art_gc(void *null)
921 {
922 	struct art_node		*an, *next;
923 
924 	mtx_enter(&art_node_gc_mtx);
925 	an = art_node_gc_list;
926 	art_node_gc_list = NULL;
927 	mtx_leave(&art_node_gc_mtx);
928 
929 	while (an != NULL) {
930 		next = an->an_gc;
931 
932 		srp_finalize(an, "artnfini");
933 
934 		pool_put(&an_pool, an);
935 
936 		an = next;
937 	}
938 }
939