1 /* $OpenBSD: rtable.c,v 1.85 2023/11/12 17:51:40 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2014-2016 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _KERNEL 20 #include "kern_compat.h" 21 #else 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/socket.h> 25 #include <sys/malloc.h> 26 #include <sys/queue.h> 27 #include <sys/domain.h> 28 #include <sys/srp.h> 29 #endif 30 31 #include <net/rtable.h> 32 #include <net/route.h> 33 34 /* 35 * Structures used by rtable_get() to retrieve the corresponding 36 * routing table for a given pair of ``af'' and ``rtableid''. 37 * 38 * Note that once allocated routing table heads are never freed. 39 * This way we do not need to reference count them. 40 * 41 * afmap rtmap/dommp 42 * ----------- --------- ----- 43 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 44 * ----------- --------- ----- to rdomain/loopback (=value). 45 * | AF_INET |. 46 * ----------- `. .---------. .---------. 47 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 48 * ----------- '---------' '---------' IPv4 routing tables 49 * | AF_MPLS | indexed by ``rtableid''. 50 * ----------- 51 */ 52 struct srp *afmap; 53 uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 54 uint8_t af2idx_max; 55 56 /* Array of routing table pointers. */ 57 struct rtmap { 58 unsigned int limit; 59 void **tbl; 60 }; 61 62 /* 63 * Array of rtableid -> rdomain mapping. 64 * 65 * Only used for the first index as described above. 66 */ 67 struct dommp { 68 unsigned int limit; 69 /* 70 * Array to get the routing domain and loopback interface related to 71 * a routing table. Format: 72 * 73 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain 74 */ 75 unsigned int *value; 76 }; 77 78 unsigned int rtmap_limit = 0; 79 80 void rtmap_init(void); 81 void rtmap_grow(unsigned int, sa_family_t); 82 void rtmap_dtor(void *, void *); 83 84 struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 85 86 void rtable_init_backend(void); 87 void *rtable_alloc(unsigned int, unsigned int, unsigned int); 88 void *rtable_get(unsigned int, sa_family_t); 89 90 void 91 rtmap_init(void) 92 { 93 const struct domain *dp; 94 int i; 95 96 /* Start with a single table for every domain that requires it. */ 97 for (i = 0; (dp = domains[i]) != NULL; i++) { 98 if (dp->dom_rtoffset == 0) 99 continue; 100 101 rtmap_grow(1, dp->dom_family); 102 } 103 104 /* Initialize the rtableid->rdomain mapping table. */ 105 rtmap_grow(1, 0); 106 107 rtmap_limit = 1; 108 } 109 110 /* 111 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 112 */ 113 void 114 rtmap_grow(unsigned int nlimit, sa_family_t af) 115 { 116 struct rtmap *map, *nmap; 117 int i; 118 119 KERNEL_ASSERT_LOCKED(); 120 121 KASSERT(nlimit > rtmap_limit); 122 123 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 124 nmap->limit = nlimit; 125 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 126 M_WAITOK|M_ZERO); 127 128 map = srp_get_locked(&afmap[af2idx[af]]); 129 if (map != NULL) { 130 KASSERT(map->limit == rtmap_limit); 131 132 for (i = 0; i < map->limit; i++) 133 nmap->tbl[i] = map->tbl[i]; 134 } 135 136 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 137 } 138 139 void 140 rtmap_dtor(void *null, void *xmap) 141 { 142 struct rtmap *map = xmap; 143 144 /* 145 * doesn't need to be serialized since this is the last reference 146 * to this map. there's nothing to race against. 147 */ 148 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 149 free(map, M_RTABLE, sizeof(*map)); 150 } 151 152 void 153 rtable_init(void) 154 { 155 const struct domain *dp; 156 int i; 157 158 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp)); 159 160 /* We use index 0 for the rtable/rdomain map. */ 161 af2idx_max = 1; 162 memset(af2idx, 0, sizeof(af2idx)); 163 164 /* 165 * Compute the maximum supported key length in case the routing 166 * table backend needs it. 167 */ 168 for (i = 0; (dp = domains[i]) != NULL; i++) { 169 if (dp->dom_rtoffset == 0) 170 continue; 171 172 af2idx[dp->dom_family] = af2idx_max++; 173 } 174 rtable_init_backend(); 175 176 /* 177 * Allocate AF-to-id table now that we now how many AFs this 178 * kernel supports. 179 */ 180 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 181 M_WAITOK|M_ZERO); 182 183 rtmap_init(); 184 185 if (rtable_add(0) != 0) 186 panic("unable to create default routing table"); 187 188 rt_timer_init(); 189 } 190 191 int 192 rtable_add(unsigned int id) 193 { 194 const struct domain *dp; 195 void *tbl; 196 struct rtmap *map; 197 struct dommp *dmm; 198 sa_family_t af; 199 unsigned int off, alen; 200 int i, error = 0; 201 202 if (id > RT_TABLEID_MAX) 203 return (EINVAL); 204 205 KERNEL_LOCK(); 206 207 if (rtable_exists(id)) 208 goto out; 209 210 for (i = 0; (dp = domains[i]) != NULL; i++) { 211 if (dp->dom_rtoffset == 0) 212 continue; 213 214 af = dp->dom_family; 215 off = dp->dom_rtoffset; 216 alen = dp->dom_maxplen; 217 218 if (id >= rtmap_limit) 219 rtmap_grow(id + 1, af); 220 221 tbl = rtable_alloc(id, alen, off); 222 if (tbl == NULL) { 223 error = ENOMEM; 224 goto out; 225 } 226 227 map = srp_get_locked(&afmap[af2idx[af]]); 228 map->tbl[id] = tbl; 229 } 230 231 /* Reflect possible growth. */ 232 if (id >= rtmap_limit) { 233 rtmap_grow(id + 1, 0); 234 rtmap_limit = id + 1; 235 } 236 237 /* Use main rtable/rdomain by default. */ 238 dmm = srp_get_locked(&afmap[0]); 239 dmm->value[id] = 0; 240 out: 241 KERNEL_UNLOCK(); 242 243 return (error); 244 } 245 246 void * 247 rtable_get(unsigned int rtableid, sa_family_t af) 248 { 249 struct rtmap *map; 250 void *tbl = NULL; 251 struct srp_ref sr; 252 253 if (af >= nitems(af2idx) || af2idx[af] == 0) 254 return (NULL); 255 256 map = srp_enter(&sr, &afmap[af2idx[af]]); 257 if (rtableid < map->limit) 258 tbl = map->tbl[rtableid]; 259 srp_leave(&sr); 260 261 return (tbl); 262 } 263 264 int 265 rtable_exists(unsigned int rtableid) 266 { 267 const struct domain *dp; 268 void *tbl; 269 int i; 270 271 for (i = 0; (dp = domains[i]) != NULL; i++) { 272 if (dp->dom_rtoffset == 0) 273 continue; 274 275 tbl = rtable_get(rtableid, dp->dom_family); 276 if (tbl != NULL) 277 return (1); 278 } 279 280 return (0); 281 } 282 283 int 284 rtable_empty(unsigned int rtableid) 285 { 286 const struct domain *dp; 287 int i; 288 struct art_root *tbl; 289 290 for (i = 0; (dp = domains[i]) != NULL; i++) { 291 if (dp->dom_rtoffset == 0) 292 continue; 293 294 tbl = rtable_get(rtableid, dp->dom_family); 295 if (tbl == NULL) 296 continue; 297 if (tbl->ar_root.ref != NULL) 298 return (0); 299 } 300 301 return (1); 302 } 303 304 unsigned int 305 rtable_l2(unsigned int rtableid) 306 { 307 struct dommp *dmm; 308 unsigned int rdomain = 0; 309 struct srp_ref sr; 310 311 dmm = srp_enter(&sr, &afmap[0]); 312 if (rtableid < dmm->limit) 313 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK); 314 srp_leave(&sr); 315 316 return (rdomain); 317 } 318 319 unsigned int 320 rtable_loindex(unsigned int rtableid) 321 { 322 struct dommp *dmm; 323 unsigned int loifidx = 0; 324 struct srp_ref sr; 325 326 dmm = srp_enter(&sr, &afmap[0]); 327 if (rtableid < dmm->limit) 328 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS); 329 srp_leave(&sr); 330 331 return (loifidx); 332 } 333 334 void 335 rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx) 336 { 337 struct dommp *dmm; 338 unsigned int value; 339 340 KERNEL_ASSERT_LOCKED(); 341 342 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 343 return; 344 345 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS); 346 347 dmm = srp_get_locked(&afmap[0]); 348 dmm->value[rtableid] = value; 349 } 350 351 352 static inline const uint8_t *satoaddr(struct art_root *, 353 const struct sockaddr *); 354 355 int an_match(struct art_node *, const struct sockaddr *, int); 356 void rtentry_ref(void *, void *); 357 void rtentry_unref(void *, void *); 358 359 void rtable_mpath_insert(struct art_node *, struct rtentry *); 360 361 struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); 362 363 void 364 rtable_init_backend(void) 365 { 366 art_init(); 367 } 368 369 void * 370 rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 371 { 372 return (art_alloc(rtableid, alen, off)); 373 } 374 375 int 376 rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src) 377 { 378 struct art_root *ar; 379 380 NET_ASSERT_LOCKED_EXCLUSIVE(); 381 382 if ((ar = rtable_get(rtableid, af)) == NULL) 383 return (EAFNOSUPPORT); 384 385 ar->ar_source = src; 386 387 return (0); 388 } 389 390 struct sockaddr * 391 rtable_getsource(unsigned int rtableid, int af) 392 { 393 struct art_root *ar; 394 395 NET_ASSERT_LOCKED(); 396 397 ar = rtable_get(rtableid, af); 398 if (ar == NULL) 399 return (NULL); 400 401 return (ar->ar_source); 402 } 403 404 void 405 rtable_clearsource(unsigned int rtableid, struct sockaddr *src) 406 { 407 struct sockaddr *addr; 408 409 addr = rtable_getsource(rtableid, src->sa_family); 410 if (addr && (addr->sa_len == src->sa_len)) { 411 if (memcmp(src, addr, addr->sa_len) == 0) { 412 rtable_setsource(rtableid, src->sa_family, NULL); 413 } 414 } 415 } 416 417 struct rtentry * 418 rtable_lookup(unsigned int rtableid, const struct sockaddr *dst, 419 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio) 420 { 421 struct art_root *ar; 422 struct art_node *an; 423 struct rtentry *rt = NULL; 424 struct srp_ref sr, nsr; 425 const uint8_t *addr; 426 int plen; 427 428 ar = rtable_get(rtableid, dst->sa_family); 429 if (ar == NULL) 430 return (NULL); 431 432 addr = satoaddr(ar, dst); 433 434 /* No need for a perfect match. */ 435 if (mask == NULL) { 436 an = art_match(ar, addr, &nsr); 437 if (an == NULL) 438 goto out; 439 } else { 440 plen = rtable_satoplen(dst->sa_family, mask); 441 if (plen == -1) 442 return (NULL); 443 444 an = art_lookup(ar, addr, plen, &nsr); 445 446 /* Make sure we've got a perfect match. */ 447 if (!an_match(an, dst, plen)) 448 goto out; 449 } 450 451 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 452 if (prio != RTP_ANY && 453 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 454 continue; 455 456 if (gateway == NULL) 457 break; 458 459 if (rt->rt_gateway->sa_len == gateway->sa_len && 460 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 461 break; 462 } 463 if (rt != NULL) 464 rtref(rt); 465 466 SRPL_LEAVE(&sr); 467 out: 468 srp_leave(&nsr); 469 470 return (rt); 471 } 472 473 struct rtentry * 474 rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src) 475 { 476 struct art_root *ar; 477 struct art_node *an; 478 struct rtentry *rt = NULL; 479 struct srp_ref sr, nsr; 480 const uint8_t *addr; 481 int hash; 482 483 ar = rtable_get(rtableid, dst->sa_family); 484 if (ar == NULL) 485 return (NULL); 486 487 addr = satoaddr(ar, dst); 488 489 an = art_match(ar, addr, &nsr); 490 if (an == NULL) 491 goto out; 492 493 rt = SRPL_FIRST(&sr, &an->an_rtlist); 494 if (rt == NULL) { 495 SRPL_LEAVE(&sr); 496 goto out; 497 } 498 rtref(rt); 499 SRPL_LEAVE(&sr); 500 501 /* Gateway selection by Hash-Threshold (RFC 2992) */ 502 if ((hash = rt_hash(rt, dst, src)) != -1) { 503 struct rtentry *mrt; 504 int threshold, npaths = 0; 505 506 KASSERT(hash <= 0xffff); 507 508 SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { 509 /* Only count nexthops with the same priority. */ 510 if (mrt->rt_priority == rt->rt_priority) 511 npaths++; 512 } 513 SRPL_LEAVE(&sr); 514 515 threshold = (0xffff / npaths) + 1; 516 517 /* 518 * we have no protection against concurrent modification of the 519 * route list attached to the node, so we won't necessarily 520 * have the same number of routes. for most modifications, 521 * we'll pick a route that we wouldn't have if we only saw the 522 * list before or after the change. if we were going to use 523 * the last available route, but it got removed, we'll hit 524 * the end of the list and then pick the first route. 525 */ 526 527 mrt = SRPL_FIRST(&sr, &an->an_rtlist); 528 while (hash > threshold && mrt != NULL) { 529 if (mrt->rt_priority == rt->rt_priority) 530 hash -= threshold; 531 mrt = SRPL_FOLLOW(&sr, mrt, rt_next); 532 } 533 534 if (mrt != NULL) { 535 rtref(mrt); 536 rtfree(rt); 537 rt = mrt; 538 } 539 SRPL_LEAVE(&sr); 540 } 541 out: 542 srp_leave(&nsr); 543 return (rt); 544 } 545 546 int 547 rtable_insert(unsigned int rtableid, struct sockaddr *dst, 548 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio, 549 struct rtentry *rt) 550 { 551 struct rtentry *mrt; 552 struct srp_ref sr; 553 struct art_root *ar; 554 struct art_node *an, *prev; 555 const uint8_t *addr; 556 int plen; 557 unsigned int rt_flags; 558 int error = 0; 559 560 ar = rtable_get(rtableid, dst->sa_family); 561 if (ar == NULL) 562 return (EAFNOSUPPORT); 563 564 addr = satoaddr(ar, dst); 565 plen = rtable_satoplen(dst->sa_family, mask); 566 if (plen == -1) 567 return (EINVAL); 568 569 rtref(rt); /* guarantee rtfree won't do anything during insert */ 570 rw_enter_write(&ar->ar_lock); 571 572 /* Do not permit exactly the same dst/mask/gw pair. */ 573 an = art_lookup(ar, addr, plen, &sr); 574 srp_leave(&sr); /* an can't go away while we have the lock */ 575 if (an_match(an, dst, plen)) { 576 struct rtentry *mrt; 577 int mpathok = ISSET(rt->rt_flags, RTF_MPATH); 578 579 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 580 if (prio != RTP_ANY && 581 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 582 continue; 583 584 if (!mpathok || 585 (mrt->rt_gateway->sa_len == gateway->sa_len && 586 memcmp(mrt->rt_gateway, gateway, 587 gateway->sa_len) == 0)) { 588 error = EEXIST; 589 goto leave; 590 } 591 } 592 } 593 594 an = art_get(plen); 595 if (an == NULL) { 596 error = ENOBUFS; 597 goto leave; 598 } 599 600 /* prepare for immediate operation if insert succeeds */ 601 rt_flags = rt->rt_flags; 602 rt->rt_flags &= ~RTF_MPATH; 603 rt->rt_dest = dst; 604 rt->rt_plen = plen; 605 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 606 607 prev = art_insert(ar, an, addr, plen); 608 if (prev != an) { 609 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 610 rt_next); 611 rt->rt_flags = rt_flags; 612 art_put(an); 613 614 if (prev == NULL) { 615 error = ESRCH; 616 goto leave; 617 } 618 619 an = prev; 620 621 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 622 KASSERT(mrt != NULL); 623 KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); 624 625 /* 626 * An ART node with the same destination/netmask already 627 * exists, MPATH conflict must have been already checked. 628 */ 629 if (rt->rt_flags & RTF_MPATH) { 630 /* 631 * Only keep the RTF_MPATH flag if two routes have 632 * the same gateway. 633 */ 634 rt->rt_flags &= ~RTF_MPATH; 635 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { 636 if (mrt->rt_priority == prio) { 637 mrt->rt_flags |= RTF_MPATH; 638 rt->rt_flags |= RTF_MPATH; 639 } 640 } 641 } 642 643 /* Put newly inserted entry at the right place. */ 644 rtable_mpath_insert(an, rt); 645 } 646 leave: 647 rw_exit_write(&ar->ar_lock); 648 rtfree(rt); 649 return (error); 650 } 651 652 int 653 rtable_delete(unsigned int rtableid, const struct sockaddr *dst, 654 const struct sockaddr *mask, struct rtentry *rt) 655 { 656 struct art_root *ar; 657 struct art_node *an; 658 struct srp_ref sr; 659 const uint8_t *addr; 660 int plen; 661 struct rtentry *mrt; 662 int npaths = 0; 663 int error = 0; 664 665 ar = rtable_get(rtableid, dst->sa_family); 666 if (ar == NULL) 667 return (EAFNOSUPPORT); 668 669 addr = satoaddr(ar, dst); 670 plen = rtable_satoplen(dst->sa_family, mask); 671 if (plen == -1) 672 return (EINVAL); 673 674 rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ 675 rw_enter_write(&ar->ar_lock); 676 an = art_lookup(ar, addr, plen, &sr); 677 srp_leave(&sr); /* an can't go away while we have the lock */ 678 679 /* Make sure we've got a perfect match. */ 680 if (!an_match(an, dst, plen)) { 681 error = ESRCH; 682 goto leave; 683 } 684 685 /* 686 * If other multipath route entries are still attached to 687 * this ART node we only have to unlink it. 688 */ 689 SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) 690 npaths++; 691 692 if (npaths > 1) { 693 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); 694 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, 695 rt_next); 696 697 mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); 698 if (npaths == 2) 699 mrt->rt_flags &= ~RTF_MPATH; 700 701 goto leave; 702 } 703 704 if (art_delete(ar, an, addr, plen) == NULL) 705 panic("art_delete failed to find node %p", an); 706 707 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); 708 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); 709 art_put(an); 710 711 leave: 712 rw_exit_write(&ar->ar_lock); 713 rtfree(rt); 714 715 return (error); 716 } 717 718 struct rtable_walk_cookie { 719 int (*rwc_func)(struct rtentry *, void *, unsigned int); 720 void *rwc_arg; 721 struct rtentry **rwc_prt; 722 unsigned int rwc_rid; 723 }; 724 725 /* 726 * Helper for rtable_walk to keep the ART code free from any "struct rtentry". 727 */ 728 int 729 rtable_walk_helper(struct art_node *an, void *xrwc) 730 { 731 struct srp_ref sr; 732 struct rtable_walk_cookie *rwc = xrwc; 733 struct rtentry *rt; 734 int error = 0; 735 736 SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { 737 error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); 738 if (error != 0) 739 break; 740 } 741 if (rwc->rwc_prt != NULL && rt != NULL) { 742 rtref(rt); 743 *rwc->rwc_prt = rt; 744 } 745 SRPL_LEAVE(&sr); 746 747 return (error); 748 } 749 750 int 751 rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt, 752 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 753 { 754 struct art_root *ar; 755 struct rtable_walk_cookie rwc; 756 int error; 757 758 ar = rtable_get(rtableid, af); 759 if (ar == NULL) 760 return (EAFNOSUPPORT); 761 762 rwc.rwc_func = func; 763 rwc.rwc_arg = arg; 764 rwc.rwc_prt = prt; 765 rwc.rwc_rid = rtableid; 766 767 error = art_walk(ar, rtable_walk_helper, &rwc); 768 769 return (error); 770 } 771 772 struct rtentry * 773 rtable_iterate(struct rtentry *rt0) 774 { 775 struct rtentry *rt = NULL; 776 struct srp_ref sr; 777 778 rt = SRPL_NEXT(&sr, rt0, rt_next); 779 if (rt != NULL) 780 rtref(rt); 781 SRPL_LEAVE(&sr); 782 rtfree(rt0); 783 return (rt); 784 } 785 786 int 787 rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 788 { 789 return (1); 790 } 791 792 int 793 rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 794 int plen, uint8_t prio, struct rtentry *rt) 795 { 796 struct art_root *ar; 797 struct art_node *an; 798 struct srp_ref sr; 799 const uint8_t *addr; 800 int error = 0; 801 802 ar = rtable_get(rtableid, dst->sa_family); 803 if (ar == NULL) 804 return (EAFNOSUPPORT); 805 806 addr = satoaddr(ar, dst); 807 808 rw_enter_write(&ar->ar_lock); 809 an = art_lookup(ar, addr, plen, &sr); 810 srp_leave(&sr); /* an can't go away while we have the lock */ 811 812 /* Make sure we've got a perfect match. */ 813 if (!an_match(an, dst, plen)) { 814 error = ESRCH; 815 } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt && 816 SRPL_NEXT_LOCKED(rt, rt_next) == NULL) { 817 /* 818 * If there's only one entry on the list do not go 819 * through an insert/remove cycle. This is done to 820 * guarantee that ``an->an_rtlist'' is never empty 821 * when a node is in the tree. 822 */ 823 rt->rt_priority = prio; 824 } else { 825 rtref(rt); /* keep rt alive in between remove and insert */ 826 SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, 827 rt, rtentry, rt_next); 828 rt->rt_priority = prio; 829 rtable_mpath_insert(an, rt); 830 rtfree(rt); 831 error = EAGAIN; 832 } 833 rw_exit_write(&ar->ar_lock); 834 835 return (error); 836 } 837 838 void 839 rtable_mpath_insert(struct art_node *an, struct rtentry *rt) 840 { 841 struct rtentry *mrt, *prt = NULL; 842 uint8_t prio = rt->rt_priority; 843 844 if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) { 845 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 846 return; 847 } 848 849 /* Iterate until we find the route to be placed after ``rt''. */ 850 while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) { 851 prt = mrt; 852 mrt = SRPL_NEXT_LOCKED(mrt, rt_next); 853 } 854 855 if (mrt->rt_priority <= prio) { 856 SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); 857 } else if (prt != NULL) { 858 SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next); 859 } else { 860 SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); 861 } 862 } 863 864 /* 865 * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. 866 */ 867 int 868 an_match(struct art_node *an, const struct sockaddr *dst, int plen) 869 { 870 struct rtentry *rt; 871 struct srp_ref sr; 872 int match; 873 874 if (an == NULL || an->an_plen != plen) 875 return (0); 876 877 rt = SRPL_FIRST(&sr, &an->an_rtlist); 878 match = (memcmp(rt->rt_dest, dst, dst->sa_len) == 0); 879 SRPL_LEAVE(&sr); 880 881 return (match); 882 } 883 884 void 885 rtentry_ref(void *null, void *xrt) 886 { 887 struct rtentry *rt = xrt; 888 889 rtref(rt); 890 } 891 892 void 893 rtentry_unref(void *null, void *xrt) 894 { 895 struct rtentry *rt = xrt; 896 897 rtfree(rt); 898 } 899 900 /* 901 * Return a pointer to the address (key). This is an heritage from the 902 * BSD radix tree needed to skip the non-address fields from the flavor 903 * of "struct sockaddr" used by this routing table. 904 */ 905 static inline const uint8_t * 906 satoaddr(struct art_root *at, const struct sockaddr *sa) 907 { 908 return (((const uint8_t *)sa) + at->ar_off); 909 } 910 911 /* 912 * Return the prefix length of a mask. 913 */ 914 int 915 rtable_satoplen(sa_family_t af, const struct sockaddr *mask) 916 { 917 const struct domain *dp; 918 uint8_t *ap, *ep; 919 int mlen, plen = 0; 920 int i; 921 922 for (i = 0; (dp = domains[i]) != NULL; i++) { 923 if (dp->dom_rtoffset == 0) 924 continue; 925 926 if (af == dp->dom_family) 927 break; 928 } 929 if (dp == NULL) 930 return (-1); 931 932 /* Host route */ 933 if (mask == NULL) 934 return (dp->dom_maxplen); 935 936 mlen = mask->sa_len; 937 938 /* Default route */ 939 if (mlen == 0) 940 return (0); 941 942 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 943 ep = (uint8_t *)((uint8_t *)mask) + mlen; 944 if (ap > ep) 945 return (-1); 946 947 /* Trim trailing zeroes. */ 948 while (ap < ep && ep[-1] == 0) 949 ep--; 950 951 if (ap == ep) 952 return (0); 953 954 /* "Beauty" adapted from sbin/route/show.c ... */ 955 while (ap < ep) { 956 switch (*ap++) { 957 case 0xff: 958 plen += 8; 959 break; 960 case 0xfe: 961 plen += 7; 962 goto out; 963 case 0xfc: 964 plen += 6; 965 goto out; 966 case 0xf8: 967 plen += 5; 968 goto out; 969 case 0xf0: 970 plen += 4; 971 goto out; 972 case 0xe0: 973 plen += 3; 974 goto out; 975 case 0xc0: 976 plen += 2; 977 goto out; 978 case 0x80: 979 plen += 1; 980 goto out; 981 default: 982 /* Non contiguous mask. */ 983 return (-1); 984 } 985 } 986 987 out: 988 if (plen > dp->dom_maxplen || ap != ep) 989 return -1; 990 991 return (plen); 992 } 993