1 /* $OpenBSD: rtable.c,v 1.87 2024/04/09 12:53:08 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2014-2016 Martin Pieuchot
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #ifndef _KERNEL
20 #include "kern_compat.h"
21 #else
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/socket.h>
25 #include <sys/malloc.h>
26 #include <sys/queue.h>
27 #include <sys/domain.h>
28 #include <sys/srp.h>
29 #endif
30
31 #include <net/rtable.h>
32 #include <net/route.h>
33 #include <net/art.h>
34
35 /*
36 * Structures used by rtable_get() to retrieve the corresponding
37 * routing table for a given pair of ``af'' and ``rtableid''.
38 *
39 * Note that once allocated routing table heads are never freed.
40 * This way we do not need to reference count them.
41 *
42 * afmap rtmap/dommp
43 * ----------- --------- -----
44 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index)
45 * ----------- --------- ----- to rdomain/loopback (=value).
46 * | AF_INET |.
47 * ----------- `. .---------. .---------.
48 * ... `----> | rtable0 | ... | rtableN | Array of pointers for
49 * ----------- '---------' '---------' IPv4 routing tables
50 * | AF_MPLS | indexed by ``rtableid''.
51 * -----------
52 */
53 struct srp *afmap;
54 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */
55 uint8_t af2idx_max;
56
57 /* Array of routing table pointers. */
58 struct rtmap {
59 unsigned int limit;
60 void **tbl;
61 };
62
63 /*
64 * Array of rtableid -> rdomain mapping.
65 *
66 * Only used for the first index as described above.
67 */
68 struct dommp {
69 unsigned int limit;
70 /*
71 * Array to get the routing domain and loopback interface related to
72 * a routing table. Format:
73 *
74 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain
75 */
76 unsigned int *value;
77 };
78
79 unsigned int rtmap_limit = 0;
80
81 void rtmap_init(void);
82 void rtmap_grow(unsigned int, sa_family_t);
83 void rtmap_dtor(void *, void *);
84
85 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL);
86
87 void rtable_init_backend(void);
88 void *rtable_alloc(unsigned int, unsigned int, unsigned int);
89 void *rtable_get(unsigned int, sa_family_t);
90
91 void
rtmap_init(void)92 rtmap_init(void)
93 {
94 const struct domain *dp;
95 int i;
96
97 /* Start with a single table for every domain that requires it. */
98 for (i = 0; (dp = domains[i]) != NULL; i++) {
99 if (dp->dom_rtoffset == 0)
100 continue;
101
102 rtmap_grow(1, dp->dom_family);
103 }
104
105 /* Initialize the rtableid->rdomain mapping table. */
106 rtmap_grow(1, 0);
107
108 rtmap_limit = 1;
109 }
110
111 /*
112 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''.
113 */
114 void
rtmap_grow(unsigned int nlimit,sa_family_t af)115 rtmap_grow(unsigned int nlimit, sa_family_t af)
116 {
117 struct rtmap *map, *nmap;
118 int i;
119
120 KERNEL_ASSERT_LOCKED();
121
122 KASSERT(nlimit > rtmap_limit);
123
124 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK);
125 nmap->limit = nlimit;
126 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE,
127 M_WAITOK|M_ZERO);
128
129 map = srp_get_locked(&afmap[af2idx[af]]);
130 if (map != NULL) {
131 KASSERT(map->limit == rtmap_limit);
132
133 for (i = 0; i < map->limit; i++)
134 nmap->tbl[i] = map->tbl[i];
135 }
136
137 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap);
138 }
139
140 void
rtmap_dtor(void * null,void * xmap)141 rtmap_dtor(void *null, void *xmap)
142 {
143 struct rtmap *map = xmap;
144
145 /*
146 * doesn't need to be serialized since this is the last reference
147 * to this map. there's nothing to race against.
148 */
149 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl));
150 free(map, M_RTABLE, sizeof(*map));
151 }
152
153 void
rtable_init(void)154 rtable_init(void)
155 {
156 const struct domain *dp;
157 int i;
158
159 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp));
160
161 /* We use index 0 for the rtable/rdomain map. */
162 af2idx_max = 1;
163 memset(af2idx, 0, sizeof(af2idx));
164
165 /*
166 * Compute the maximum supported key length in case the routing
167 * table backend needs it.
168 */
169 for (i = 0; (dp = domains[i]) != NULL; i++) {
170 if (dp->dom_rtoffset == 0)
171 continue;
172
173 af2idx[dp->dom_family] = af2idx_max++;
174 }
175 rtable_init_backend();
176
177 /*
178 * Allocate AF-to-id table now that we now how many AFs this
179 * kernel supports.
180 */
181 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE,
182 M_WAITOK|M_ZERO);
183
184 rtmap_init();
185
186 if (rtable_add(0) != 0)
187 panic("unable to create default routing table");
188
189 rt_timer_init();
190 }
191
192 int
rtable_add(unsigned int id)193 rtable_add(unsigned int id)
194 {
195 const struct domain *dp;
196 void *tbl;
197 struct rtmap *map;
198 struct dommp *dmm;
199 sa_family_t af;
200 unsigned int off, alen;
201 int i, error = 0;
202
203 if (id > RT_TABLEID_MAX)
204 return (EINVAL);
205
206 KERNEL_LOCK();
207
208 if (rtable_exists(id))
209 goto out;
210
211 for (i = 0; (dp = domains[i]) != NULL; i++) {
212 if (dp->dom_rtoffset == 0)
213 continue;
214
215 af = dp->dom_family;
216 off = dp->dom_rtoffset;
217 alen = dp->dom_maxplen;
218
219 if (id >= rtmap_limit)
220 rtmap_grow(id + 1, af);
221
222 tbl = rtable_alloc(id, alen, off);
223 if (tbl == NULL) {
224 error = ENOMEM;
225 goto out;
226 }
227
228 map = srp_get_locked(&afmap[af2idx[af]]);
229 map->tbl[id] = tbl;
230 }
231
232 /* Reflect possible growth. */
233 if (id >= rtmap_limit) {
234 rtmap_grow(id + 1, 0);
235 rtmap_limit = id + 1;
236 }
237
238 /* Use main rtable/rdomain by default. */
239 dmm = srp_get_locked(&afmap[0]);
240 dmm->value[id] = 0;
241 out:
242 KERNEL_UNLOCK();
243
244 return (error);
245 }
246
247 void *
rtable_get(unsigned int rtableid,sa_family_t af)248 rtable_get(unsigned int rtableid, sa_family_t af)
249 {
250 struct rtmap *map;
251 void *tbl = NULL;
252 struct srp_ref sr;
253
254 if (af >= nitems(af2idx) || af2idx[af] == 0)
255 return (NULL);
256
257 map = srp_enter(&sr, &afmap[af2idx[af]]);
258 if (rtableid < map->limit)
259 tbl = map->tbl[rtableid];
260 srp_leave(&sr);
261
262 return (tbl);
263 }
264
265 int
rtable_exists(unsigned int rtableid)266 rtable_exists(unsigned int rtableid)
267 {
268 const struct domain *dp;
269 void *tbl;
270 int i;
271
272 for (i = 0; (dp = domains[i]) != NULL; i++) {
273 if (dp->dom_rtoffset == 0)
274 continue;
275
276 tbl = rtable_get(rtableid, dp->dom_family);
277 if (tbl != NULL)
278 return (1);
279 }
280
281 return (0);
282 }
283
284 int
rtable_empty(unsigned int rtableid)285 rtable_empty(unsigned int rtableid)
286 {
287 const struct domain *dp;
288 int i;
289 struct art_root *tbl;
290
291 for (i = 0; (dp = domains[i]) != NULL; i++) {
292 if (dp->dom_rtoffset == 0)
293 continue;
294
295 tbl = rtable_get(rtableid, dp->dom_family);
296 if (tbl == NULL)
297 continue;
298 if (tbl->ar_root.ref != NULL)
299 return (0);
300 }
301
302 return (1);
303 }
304
305 unsigned int
rtable_l2(unsigned int rtableid)306 rtable_l2(unsigned int rtableid)
307 {
308 struct dommp *dmm;
309 unsigned int rdomain = 0;
310 struct srp_ref sr;
311
312 dmm = srp_enter(&sr, &afmap[0]);
313 if (rtableid < dmm->limit)
314 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK);
315 srp_leave(&sr);
316
317 return (rdomain);
318 }
319
320 unsigned int
rtable_loindex(unsigned int rtableid)321 rtable_loindex(unsigned int rtableid)
322 {
323 struct dommp *dmm;
324 unsigned int loifidx = 0;
325 struct srp_ref sr;
326
327 dmm = srp_enter(&sr, &afmap[0]);
328 if (rtableid < dmm->limit)
329 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS);
330 srp_leave(&sr);
331
332 return (loifidx);
333 }
334
335 void
rtable_l2set(unsigned int rtableid,unsigned int rdomain,unsigned int loifidx)336 rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx)
337 {
338 struct dommp *dmm;
339 unsigned int value;
340
341 KERNEL_ASSERT_LOCKED();
342
343 if (!rtable_exists(rtableid) || !rtable_exists(rdomain))
344 return;
345
346 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS);
347
348 dmm = srp_get_locked(&afmap[0]);
349 dmm->value[rtableid] = value;
350 }
351
352
353 static inline const uint8_t *satoaddr(struct art_root *,
354 const struct sockaddr *);
355
356 int an_match(struct art_node *, const struct sockaddr *, int);
357 void rtentry_ref(void *, void *);
358 void rtentry_unref(void *, void *);
359
360 void rtable_mpath_insert(struct art_node *, struct rtentry *);
361
362 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL);
363
364 void
rtable_init_backend(void)365 rtable_init_backend(void)
366 {
367 art_init();
368 }
369
370 void *
rtable_alloc(unsigned int rtableid,unsigned int alen,unsigned int off)371 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
372 {
373 return (art_alloc(rtableid, alen, off));
374 }
375
376 int
rtable_setsource(unsigned int rtableid,int af,struct sockaddr * src)377 rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src)
378 {
379 struct art_root *ar;
380
381 NET_ASSERT_LOCKED_EXCLUSIVE();
382
383 if ((ar = rtable_get(rtableid, af)) == NULL)
384 return (EAFNOSUPPORT);
385
386 ar->ar_source = src;
387
388 return (0);
389 }
390
391 struct sockaddr *
rtable_getsource(unsigned int rtableid,int af)392 rtable_getsource(unsigned int rtableid, int af)
393 {
394 struct art_root *ar;
395
396 NET_ASSERT_LOCKED();
397
398 ar = rtable_get(rtableid, af);
399 if (ar == NULL)
400 return (NULL);
401
402 return (ar->ar_source);
403 }
404
405 void
rtable_clearsource(unsigned int rtableid,struct sockaddr * src)406 rtable_clearsource(unsigned int rtableid, struct sockaddr *src)
407 {
408 struct sockaddr *addr;
409
410 addr = rtable_getsource(rtableid, src->sa_family);
411 if (addr && (addr->sa_len == src->sa_len)) {
412 if (memcmp(src, addr, addr->sa_len) == 0) {
413 rtable_setsource(rtableid, src->sa_family, NULL);
414 }
415 }
416 }
417
418 struct rtentry *
rtable_lookup(unsigned int rtableid,const struct sockaddr * dst,const struct sockaddr * mask,const struct sockaddr * gateway,uint8_t prio)419 rtable_lookup(unsigned int rtableid, const struct sockaddr *dst,
420 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio)
421 {
422 struct art_root *ar;
423 struct art_node *an;
424 struct rtentry *rt = NULL;
425 struct srp_ref sr, nsr;
426 const uint8_t *addr;
427 int plen;
428
429 ar = rtable_get(rtableid, dst->sa_family);
430 if (ar == NULL)
431 return (NULL);
432
433 addr = satoaddr(ar, dst);
434
435 /* No need for a perfect match. */
436 if (mask == NULL) {
437 an = art_match(ar, addr, &nsr);
438 if (an == NULL)
439 goto out;
440 } else {
441 plen = rtable_satoplen(dst->sa_family, mask);
442 if (plen == -1)
443 return (NULL);
444
445 an = art_lookup(ar, addr, plen, &nsr);
446
447 /* Make sure we've got a perfect match. */
448 if (!an_match(an, dst, plen))
449 goto out;
450 }
451
452 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
453 if (prio != RTP_ANY &&
454 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
455 continue;
456
457 if (gateway == NULL)
458 break;
459
460 if (rt->rt_gateway->sa_len == gateway->sa_len &&
461 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0)
462 break;
463 }
464 if (rt != NULL)
465 rtref(rt);
466
467 SRPL_LEAVE(&sr);
468 out:
469 srp_leave(&nsr);
470
471 return (rt);
472 }
473
474 struct rtentry *
rtable_match(unsigned int rtableid,const struct sockaddr * dst,uint32_t * src)475 rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src)
476 {
477 struct art_root *ar;
478 struct art_node *an;
479 struct rtentry *rt = NULL;
480 struct srp_ref sr, nsr;
481 const uint8_t *addr;
482 int hash;
483
484 ar = rtable_get(rtableid, dst->sa_family);
485 if (ar == NULL)
486 return (NULL);
487
488 addr = satoaddr(ar, dst);
489
490 an = art_match(ar, addr, &nsr);
491 if (an == NULL)
492 goto out;
493
494 rt = SRPL_FIRST(&sr, &an->an_rtlist);
495 if (rt == NULL) {
496 SRPL_LEAVE(&sr);
497 goto out;
498 }
499 rtref(rt);
500 SRPL_LEAVE(&sr);
501
502 /* Gateway selection by Hash-Threshold (RFC 2992) */
503 if ((hash = rt_hash(rt, dst, src)) != -1) {
504 struct rtentry *mrt;
505 int threshold, npaths = 0;
506
507 KASSERT(hash <= 0xffff);
508
509 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) {
510 /* Only count nexthops with the same priority. */
511 if (mrt->rt_priority == rt->rt_priority)
512 npaths++;
513 }
514 SRPL_LEAVE(&sr);
515
516 threshold = (0xffff / npaths) + 1;
517
518 /*
519 * we have no protection against concurrent modification of the
520 * route list attached to the node, so we won't necessarily
521 * have the same number of routes. for most modifications,
522 * we'll pick a route that we wouldn't have if we only saw the
523 * list before or after the change. if we were going to use
524 * the last available route, but it got removed, we'll hit
525 * the end of the list and then pick the first route.
526 */
527
528 mrt = SRPL_FIRST(&sr, &an->an_rtlist);
529 while (hash > threshold && mrt != NULL) {
530 if (mrt->rt_priority == rt->rt_priority)
531 hash -= threshold;
532 mrt = SRPL_FOLLOW(&sr, mrt, rt_next);
533 }
534
535 if (mrt != NULL) {
536 rtref(mrt);
537 rtfree(rt);
538 rt = mrt;
539 }
540 SRPL_LEAVE(&sr);
541 }
542 out:
543 srp_leave(&nsr);
544 return (rt);
545 }
546
547 int
rtable_insert(unsigned int rtableid,struct sockaddr * dst,const struct sockaddr * mask,const struct sockaddr * gateway,uint8_t prio,struct rtentry * rt)548 rtable_insert(unsigned int rtableid, struct sockaddr *dst,
549 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio,
550 struct rtentry *rt)
551 {
552 struct rtentry *mrt;
553 struct srp_ref sr;
554 struct art_root *ar;
555 struct art_node *an, *prev;
556 const uint8_t *addr;
557 int plen;
558 unsigned int rt_flags;
559 int error = 0;
560
561 ar = rtable_get(rtableid, dst->sa_family);
562 if (ar == NULL)
563 return (EAFNOSUPPORT);
564
565 addr = satoaddr(ar, dst);
566 plen = rtable_satoplen(dst->sa_family, mask);
567 if (plen == -1)
568 return (EINVAL);
569
570 rtref(rt); /* guarantee rtfree won't do anything during insert */
571 rw_enter_write(&ar->ar_lock);
572
573 /* Do not permit exactly the same dst/mask/gw pair. */
574 an = art_lookup(ar, addr, plen, &sr);
575 srp_leave(&sr); /* an can't go away while we have the lock */
576 if (an_match(an, dst, plen)) {
577 struct rtentry *mrt;
578 int mpathok = ISSET(rt->rt_flags, RTF_MPATH);
579
580 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
581 if (prio != RTP_ANY &&
582 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
583 continue;
584
585 if (!mpathok ||
586 (mrt->rt_gateway->sa_len == gateway->sa_len &&
587 memcmp(mrt->rt_gateway, gateway,
588 gateway->sa_len) == 0)) {
589 error = EEXIST;
590 goto leave;
591 }
592 }
593 }
594
595 an = art_get(plen);
596 if (an == NULL) {
597 error = ENOBUFS;
598 goto leave;
599 }
600
601 /* prepare for immediate operation if insert succeeds */
602 rt_flags = rt->rt_flags;
603 rt->rt_flags &= ~RTF_MPATH;
604 rt->rt_dest = dst;
605 rt->rt_plen = plen;
606 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
607
608 prev = art_insert(ar, an, addr, plen);
609 if (prev != an) {
610 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
611 rt_next);
612 rt->rt_flags = rt_flags;
613 art_put(an);
614
615 if (prev == NULL) {
616 error = ESRCH;
617 goto leave;
618 }
619
620 an = prev;
621
622 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
623 KASSERT(mrt != NULL);
624 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio);
625
626 /*
627 * An ART node with the same destination/netmask already
628 * exists, MPATH conflict must have been already checked.
629 */
630 if (rt->rt_flags & RTF_MPATH) {
631 /*
632 * Only keep the RTF_MPATH flag if two routes have
633 * the same gateway.
634 */
635 rt->rt_flags &= ~RTF_MPATH;
636 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
637 if (mrt->rt_priority == prio) {
638 mrt->rt_flags |= RTF_MPATH;
639 rt->rt_flags |= RTF_MPATH;
640 }
641 }
642 }
643
644 /* Put newly inserted entry at the right place. */
645 rtable_mpath_insert(an, rt);
646 }
647 leave:
648 rw_exit_write(&ar->ar_lock);
649 rtfree(rt);
650 return (error);
651 }
652
653 int
rtable_delete(unsigned int rtableid,const struct sockaddr * dst,const struct sockaddr * mask,struct rtentry * rt)654 rtable_delete(unsigned int rtableid, const struct sockaddr *dst,
655 const struct sockaddr *mask, struct rtentry *rt)
656 {
657 struct art_root *ar;
658 struct art_node *an;
659 struct srp_ref sr;
660 const uint8_t *addr;
661 int plen;
662 struct rtentry *mrt;
663 int npaths = 0;
664 int error = 0;
665
666 ar = rtable_get(rtableid, dst->sa_family);
667 if (ar == NULL)
668 return (EAFNOSUPPORT);
669
670 addr = satoaddr(ar, dst);
671 plen = rtable_satoplen(dst->sa_family, mask);
672 if (plen == -1)
673 return (EINVAL);
674
675 rtref(rt); /* guarantee rtfree won't do anything under ar_lock */
676 rw_enter_write(&ar->ar_lock);
677 an = art_lookup(ar, addr, plen, &sr);
678 srp_leave(&sr); /* an can't go away while we have the lock */
679
680 /* Make sure we've got a perfect match. */
681 if (!an_match(an, dst, plen)) {
682 error = ESRCH;
683 goto leave;
684 }
685
686 /*
687 * If other multipath route entries are still attached to
688 * this ART node we only have to unlink it.
689 */
690 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next)
691 npaths++;
692
693 if (npaths > 1) {
694 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
695 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
696 rt_next);
697
698 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
699 if (npaths == 2)
700 mrt->rt_flags &= ~RTF_MPATH;
701
702 goto leave;
703 }
704
705 if (art_delete(ar, an, addr, plen) == NULL)
706 panic("art_delete failed to find node %p", an);
707
708 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
709 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
710 art_put(an);
711
712 leave:
713 rw_exit_write(&ar->ar_lock);
714 rtfree(rt);
715
716 return (error);
717 }
718
719 struct rtable_walk_cookie {
720 int (*rwc_func)(struct rtentry *, void *, unsigned int);
721 void *rwc_arg;
722 struct rtentry **rwc_prt;
723 unsigned int rwc_rid;
724 };
725
726 /*
727 * Helper for rtable_walk to keep the ART code free from any "struct rtentry".
728 */
729 int
rtable_walk_helper(struct art_node * an,void * xrwc)730 rtable_walk_helper(struct art_node *an, void *xrwc)
731 {
732 struct srp_ref sr;
733 struct rtable_walk_cookie *rwc = xrwc;
734 struct rtentry *rt;
735 int error = 0;
736
737 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
738 error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid);
739 if (error != 0)
740 break;
741 }
742 if (rwc->rwc_prt != NULL && rt != NULL) {
743 rtref(rt);
744 *rwc->rwc_prt = rt;
745 }
746 SRPL_LEAVE(&sr);
747
748 return (error);
749 }
750
751 int
rtable_walk(unsigned int rtableid,sa_family_t af,struct rtentry ** prt,int (* func)(struct rtentry *,void *,unsigned int),void * arg)752 rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt,
753 int (*func)(struct rtentry *, void *, unsigned int), void *arg)
754 {
755 struct art_root *ar;
756 struct rtable_walk_cookie rwc;
757 int error;
758
759 ar = rtable_get(rtableid, af);
760 if (ar == NULL)
761 return (EAFNOSUPPORT);
762
763 rwc.rwc_func = func;
764 rwc.rwc_arg = arg;
765 rwc.rwc_prt = prt;
766 rwc.rwc_rid = rtableid;
767
768 error = art_walk(ar, rtable_walk_helper, &rwc);
769
770 return (error);
771 }
772
773 struct rtentry *
rtable_iterate(struct rtentry * rt0)774 rtable_iterate(struct rtentry *rt0)
775 {
776 struct rtentry *rt = NULL;
777 struct srp_ref sr;
778
779 rt = SRPL_NEXT(&sr, rt0, rt_next);
780 if (rt != NULL)
781 rtref(rt);
782 SRPL_LEAVE(&sr);
783 rtfree(rt0);
784 return (rt);
785 }
786
787 int
rtable_mpath_capable(unsigned int rtableid,sa_family_t af)788 rtable_mpath_capable(unsigned int rtableid, sa_family_t af)
789 {
790 return (1);
791 }
792
793 int
rtable_mpath_reprio(unsigned int rtableid,struct sockaddr * dst,int plen,uint8_t prio,struct rtentry * rt)794 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst,
795 int plen, uint8_t prio, struct rtentry *rt)
796 {
797 struct art_root *ar;
798 struct art_node *an;
799 struct srp_ref sr;
800 const uint8_t *addr;
801 int error = 0;
802
803 ar = rtable_get(rtableid, dst->sa_family);
804 if (ar == NULL)
805 return (EAFNOSUPPORT);
806
807 addr = satoaddr(ar, dst);
808
809 rw_enter_write(&ar->ar_lock);
810 an = art_lookup(ar, addr, plen, &sr);
811 srp_leave(&sr); /* an can't go away while we have the lock */
812
813 /* Make sure we've got a perfect match. */
814 if (!an_match(an, dst, plen)) {
815 error = ESRCH;
816 } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt &&
817 SRPL_NEXT_LOCKED(rt, rt_next) == NULL) {
818 /*
819 * If there's only one entry on the list do not go
820 * through an insert/remove cycle. This is done to
821 * guarantee that ``an->an_rtlist'' is never empty
822 * when a node is in the tree.
823 */
824 rt->rt_priority = prio;
825 } else {
826 rtref(rt); /* keep rt alive in between remove and insert */
827 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist,
828 rt, rtentry, rt_next);
829 rt->rt_priority = prio;
830 rtable_mpath_insert(an, rt);
831 rtfree(rt);
832 error = EAGAIN;
833 }
834 rw_exit_write(&ar->ar_lock);
835
836 return (error);
837 }
838
839 void
rtable_mpath_insert(struct art_node * an,struct rtentry * rt)840 rtable_mpath_insert(struct art_node *an, struct rtentry *rt)
841 {
842 struct rtentry *mrt, *prt = NULL;
843 uint8_t prio = rt->rt_priority;
844
845 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) {
846 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
847 return;
848 }
849
850 /* Iterate until we find the route to be placed after ``rt''. */
851 while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) {
852 prt = mrt;
853 mrt = SRPL_NEXT_LOCKED(mrt, rt_next);
854 }
855
856 if (mrt->rt_priority <= prio) {
857 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next);
858 } else if (prt != NULL) {
859 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next);
860 } else {
861 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
862 }
863 }
864
865 /*
866 * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise.
867 */
868 int
an_match(struct art_node * an,const struct sockaddr * dst,int plen)869 an_match(struct art_node *an, const struct sockaddr *dst, int plen)
870 {
871 struct rtentry *rt;
872 struct srp_ref sr;
873 int match;
874
875 if (an == NULL || an->an_plen != plen)
876 return (0);
877
878 rt = SRPL_FIRST(&sr, &an->an_rtlist);
879 match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0);
880 SRPL_LEAVE(&sr);
881
882 return (match);
883 }
884
885 void
rtentry_ref(void * null,void * xrt)886 rtentry_ref(void *null, void *xrt)
887 {
888 struct rtentry *rt = xrt;
889
890 rtref(rt);
891 }
892
893 void
rtentry_unref(void * null,void * xrt)894 rtentry_unref(void *null, void *xrt)
895 {
896 struct rtentry *rt = xrt;
897
898 rtfree(rt);
899 }
900
901 /*
902 * Return a pointer to the address (key). This is an heritage from the
903 * BSD radix tree needed to skip the non-address fields from the flavor
904 * of "struct sockaddr" used by this routing table.
905 */
906 static inline const uint8_t *
satoaddr(struct art_root * at,const struct sockaddr * sa)907 satoaddr(struct art_root *at, const struct sockaddr *sa)
908 {
909 return (((const uint8_t *)sa) + at->ar_off);
910 }
911
912 /*
913 * Return the prefix length of a mask.
914 */
915 int
rtable_satoplen(sa_family_t af,const struct sockaddr * mask)916 rtable_satoplen(sa_family_t af, const struct sockaddr *mask)
917 {
918 const struct domain *dp;
919 uint8_t *ap, *ep;
920 int mlen, plen = 0;
921 int i;
922
923 for (i = 0; (dp = domains[i]) != NULL; i++) {
924 if (dp->dom_rtoffset == 0)
925 continue;
926
927 if (af == dp->dom_family)
928 break;
929 }
930 if (dp == NULL)
931 return (-1);
932
933 /* Host route */
934 if (mask == NULL)
935 return (dp->dom_maxplen);
936
937 mlen = mask->sa_len;
938
939 /* Default route */
940 if (mlen == 0)
941 return (0);
942
943 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset;
944 ep = (uint8_t *)((uint8_t *)mask) + mlen;
945 if (ap > ep)
946 return (-1);
947
948 /* Trim trailing zeroes. */
949 while (ap < ep && ep[-1] == 0)
950 ep--;
951
952 if (ap == ep)
953 return (0);
954
955 /* "Beauty" adapted from sbin/route/show.c ... */
956 while (ap < ep) {
957 switch (*ap++) {
958 case 0xff:
959 plen += 8;
960 break;
961 case 0xfe:
962 plen += 7;
963 goto out;
964 case 0xfc:
965 plen += 6;
966 goto out;
967 case 0xf8:
968 plen += 5;
969 goto out;
970 case 0xf0:
971 plen += 4;
972 goto out;
973 case 0xe0:
974 plen += 3;
975 goto out;
976 case 0xc0:
977 plen += 2;
978 goto out;
979 case 0x80:
980 plen += 1;
981 goto out;
982 default:
983 /* Non contiguous mask. */
984 return (-1);
985 }
986 }
987
988 out:
989 if (plen > dp->dom_maxplen || ap != ep)
990 return -1;
991
992 return (plen);
993 }
994