1 /* $OpenBSD: rde_rib.c,v 1.143 2016/08/27 01:26:22 guenther Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/queue.h> 21 22 #include <stdlib.h> 23 #include <string.h> 24 #include <siphash.h> 25 #include <time.h> 26 27 #include "bgpd.h" 28 #include "rde.h" 29 30 /* 31 * BGP RIB -- Routing Information Base 32 * 33 * The RIB is build with one aspect in mind. Speed -- actually update speed. 34 * Therefore one thing needs to be absolutely avoided, long table walks. 35 * This is achieved by heavily linking the different parts together. 36 */ 37 u_int16_t rib_size; 38 struct rib *ribs; 39 40 LIST_HEAD(, rib_context) rib_dump_h = LIST_HEAD_INITIALIZER(rib_dump_h); 41 42 struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int); 43 int rib_compare(const struct rib_entry *, const struct rib_entry *); 44 void rib_remove(struct rib_entry *); 45 int rib_empty(struct rib_entry *); 46 struct rib_entry *rib_restart(struct rib_context *); 47 48 RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare); 49 RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare); 50 51 52 /* RIB specific functions */ 53 u_int16_t 54 rib_new(char *name, u_int rtableid, u_int16_t flags) 55 { 56 struct rib *xribs; 57 u_int16_t id; 58 59 for (id = 0; id < rib_size; id++) { 60 if (*ribs[id].name == '\0') 61 break; 62 } 63 64 if (id == RIB_FAILED) 65 fatalx("rib_new: trying to use reserved id"); 66 67 if (id >= rib_size) { 68 if ((xribs = reallocarray(ribs, id + 1, 69 sizeof(struct rib))) == NULL) { 70 /* XXX this is not clever */ 71 fatal("rib_add"); 72 } 73 ribs = xribs; 74 rib_size = id + 1; 75 } 76 77 bzero(&ribs[id], sizeof(struct rib)); 78 strlcpy(ribs[id].name, name, sizeof(ribs[id].name)); 79 RB_INIT(&ribs[id].rib); 80 ribs[id].state = RECONF_REINIT; 81 ribs[id].id = id; 82 ribs[id].flags = flags; 83 ribs[id].rtableid = rtableid; 84 85 ribs[id].in_rules = calloc(1, sizeof(struct filter_head)); 86 if (ribs[id].in_rules == NULL) 87 fatal(NULL); 88 TAILQ_INIT(ribs[id].in_rules); 89 90 return (id); 91 } 92 93 u_int16_t 94 rib_find(char *name) 95 { 96 u_int16_t id; 97 98 if (name == NULL || *name == '\0') 99 return (1); /* no name returns the Loc-RIB */ 100 101 for (id = 0; id < rib_size; id++) { 102 if (!strcmp(ribs[id].name, name)) 103 return (id); 104 } 105 106 return (RIB_FAILED); 107 } 108 109 void 110 rib_free(struct rib *rib) 111 { 112 struct rib_context *ctx, *next; 113 struct rib_entry *re, *xre; 114 struct prefix *p, *np; 115 116 /* abort pending rib_dumps */ 117 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 118 next = LIST_NEXT(ctx, entry); 119 if (ctx->ctx_rib == rib) { 120 re = ctx->ctx_re; 121 re->flags &= ~F_RIB_ENTRYLOCK; 122 LIST_REMOVE(ctx, entry); 123 if (ctx->ctx_done) 124 ctx->ctx_done(ctx->ctx_arg); 125 else 126 free(ctx); 127 } 128 } 129 130 for (re = RB_MIN(rib_tree, &rib->rib); re != NULL; re = xre) { 131 xre = RB_NEXT(rib_tree, &rib->rib, re); 132 133 /* 134 * Removing the prefixes is tricky because the last one 135 * will remove the rib_entry as well and because we do 136 * an empty check in prefix_destroy() it is not possible to 137 * use the default for loop. 138 */ 139 while ((p = LIST_FIRST(&re->prefix_h))) { 140 np = LIST_NEXT(p, rib_l); 141 if (p->aspath->pftableid) { 142 struct bgpd_addr addr; 143 144 pt_getaddr(p->prefix, &addr); 145 /* Commit is done in peer_down() */ 146 rde_send_pftable(p->aspath->pftableid, &addr, 147 p->prefix->prefixlen, 1); 148 } 149 prefix_destroy(p); 150 if (np == NULL) 151 break; 152 } 153 } 154 filterlist_free(rib->in_rules_tmp); 155 filterlist_free(rib->in_rules); 156 bzero(rib, sizeof(struct rib)); 157 } 158 159 int 160 rib_compare(const struct rib_entry *a, const struct rib_entry *b) 161 { 162 return (pt_prefix_cmp(a->prefix, b->prefix)); 163 } 164 165 struct rib_entry * 166 rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 167 { 168 struct rib_entry xre; 169 struct pt_entry *pte; 170 171 pte = pt_fill(prefix, prefixlen); 172 bzero(&xre, sizeof(xre)); 173 xre.prefix = pte; 174 175 return (RB_FIND(rib_tree, &rib->rib, &xre)); 176 } 177 178 struct rib_entry * 179 rib_lookup(struct rib *rib, struct bgpd_addr *addr) 180 { 181 struct rib_entry *re; 182 int i; 183 184 switch (addr->aid) { 185 case AID_INET: 186 case AID_VPN_IPv4: 187 for (i = 32; i >= 0; i--) { 188 re = rib_get(rib, addr, i); 189 if (re != NULL) 190 return (re); 191 } 192 break; 193 case AID_INET6: 194 for (i = 128; i >= 0; i--) { 195 re = rib_get(rib, addr, i); 196 if (re != NULL) 197 return (re); 198 } 199 break; 200 default: 201 fatalx("rib_lookup: unknown af"); 202 } 203 return (NULL); 204 } 205 206 207 struct rib_entry * 208 rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen) 209 { 210 struct pt_entry *pte; 211 struct rib_entry *re; 212 213 pte = pt_get(prefix, prefixlen); 214 if (pte == NULL) 215 pte = pt_add(prefix, prefixlen); 216 217 if ((re = calloc(1, sizeof(*re))) == NULL) 218 fatal("rib_add"); 219 220 LIST_INIT(&re->prefix_h); 221 re->prefix = pte; 222 re->flags = rib->flags; 223 re->ribid = rib->id; 224 225 if (RB_INSERT(rib_tree, &rib->rib, re) != NULL) { 226 log_warnx("rib_add: insert failed"); 227 free(re); 228 return (NULL); 229 } 230 231 pt_ref(pte); 232 233 rdemem.rib_cnt++; 234 235 return (re); 236 } 237 238 void 239 rib_remove(struct rib_entry *re) 240 { 241 if (!rib_empty(re)) 242 fatalx("rib_remove: entry not empty"); 243 244 if (re->flags & F_RIB_ENTRYLOCK) 245 /* entry is locked, don't free it. */ 246 return; 247 248 pt_unref(re->prefix); 249 if (pt_empty(re->prefix)) 250 pt_remove(re->prefix); 251 252 if (RB_REMOVE(rib_tree, &ribs[re->ribid].rib, re) == NULL) 253 log_warnx("rib_remove: remove failed."); 254 255 free(re); 256 rdemem.rib_cnt--; 257 } 258 259 int 260 rib_empty(struct rib_entry *re) 261 { 262 return LIST_EMPTY(&re->prefix_h); 263 } 264 265 void 266 rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *), 267 void *arg, u_int8_t aid) 268 { 269 struct rib_context *ctx; 270 271 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) 272 fatal("rib_dump"); 273 ctx->ctx_rib = rib; 274 ctx->ctx_upcall = upcall; 275 ctx->ctx_arg = arg; 276 ctx->ctx_aid = aid; 277 rib_dump_r(ctx); 278 } 279 280 void 281 rib_dump_r(struct rib_context *ctx) 282 { 283 struct rib_entry *re; 284 unsigned int i; 285 286 if (ctx->ctx_re == NULL) { 287 re = RB_MIN(rib_tree, &ctx->ctx_rib->rib); 288 LIST_INSERT_HEAD(&rib_dump_h, ctx, entry); 289 } else 290 re = rib_restart(ctx); 291 292 for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) { 293 if (ctx->ctx_aid != AID_UNSPEC && 294 ctx->ctx_aid != re->prefix->aid) 295 continue; 296 if (ctx->ctx_count && i++ >= ctx->ctx_count && 297 (re->flags & F_RIB_ENTRYLOCK) == 0) { 298 /* store and lock last element */ 299 ctx->ctx_re = re; 300 re->flags |= F_RIB_ENTRYLOCK; 301 return; 302 } 303 ctx->ctx_upcall(re, ctx->ctx_arg); 304 } 305 306 LIST_REMOVE(ctx, entry); 307 if (ctx->ctx_done) 308 ctx->ctx_done(ctx->ctx_arg); 309 else 310 free(ctx); 311 } 312 313 struct rib_entry * 314 rib_restart(struct rib_context *ctx) 315 { 316 struct rib_entry *re; 317 318 re = ctx->ctx_re; 319 re->flags &= ~F_RIB_ENTRYLOCK; 320 321 /* find first non empty element */ 322 while (re && rib_empty(re)) 323 re = RB_NEXT(rib_tree, unused, re); 324 325 /* free the previously locked rib element if empty */ 326 if (rib_empty(ctx->ctx_re)) 327 rib_remove(ctx->ctx_re); 328 ctx->ctx_re = NULL; 329 return (re); 330 } 331 332 void 333 rib_dump_runner(void) 334 { 335 struct rib_context *ctx, *next; 336 337 for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) { 338 next = LIST_NEXT(ctx, entry); 339 rib_dump_r(ctx); 340 } 341 } 342 343 int 344 rib_dump_pending(void) 345 { 346 return (!LIST_EMPTY(&rib_dump_h)); 347 } 348 349 /* used to bump correct prefix counters */ 350 #define PREFIX_COUNT(x, op) \ 351 do { \ 352 (x)->prefix_cnt += (op); \ 353 } while (0) 354 355 /* path specific functions */ 356 357 static void path_link(struct rde_aspath *, struct rde_peer *); 358 359 struct path_table pathtable; 360 361 SIPHASH_KEY pathtablekey; 362 363 /* XXX the hash should also include communities and the other attrs */ 364 #define PATH_HASH(x) \ 365 &pathtable.path_hashtbl[SipHash24(&pathtablekey, (x)->data, (x)->len) & \ 366 pathtable.path_hashmask] 367 368 void 369 path_init(u_int32_t hashsize) 370 { 371 u_int32_t hs, i; 372 373 for (hs = 1; hs < hashsize; hs <<= 1) 374 ; 375 pathtable.path_hashtbl = calloc(hs, sizeof(struct aspath_head)); 376 if (pathtable.path_hashtbl == NULL) 377 fatal("path_init"); 378 379 for (i = 0; i < hs; i++) 380 LIST_INIT(&pathtable.path_hashtbl[i]); 381 382 pathtable.path_hashmask = hs - 1; 383 arc4random_buf(&pathtablekey, sizeof(pathtablekey)); 384 } 385 386 void 387 path_shutdown(void) 388 { 389 u_int32_t i; 390 391 for (i = 0; i <= pathtable.path_hashmask; i++) 392 if (!LIST_EMPTY(&pathtable.path_hashtbl[i])) 393 log_warnx("path_free: free non-free table"); 394 395 free(pathtable.path_hashtbl); 396 } 397 398 int 399 path_update(struct rib *rib, struct rde_peer *peer, struct rde_aspath *nasp, 400 struct bgpd_addr *prefix, int prefixlen) 401 { 402 struct rde_aspath *asp; 403 struct prefix *p; 404 405 if (nasp->pftableid) { 406 rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0); 407 rde_send_pftable_commit(); 408 } 409 410 /* 411 * First try to find a prefix in the specified RIB. 412 */ 413 if ((p = prefix_get(rib, peer, prefix, prefixlen, 0)) != NULL) { 414 if (path_compare(nasp, p->aspath) == 0) { 415 /* no change, update last change */ 416 p->lastchange = time(NULL); 417 return (0); 418 } 419 } 420 421 /* 422 * Either the prefix does not exist or the path changed. 423 * In both cases lookup the new aspath to make sure it is not 424 * already in the RIB. 425 */ 426 if ((asp = path_lookup(nasp, peer)) == NULL) { 427 /* Path not available, create and link a new one. */ 428 asp = path_copy(nasp); 429 path_link(asp, peer); 430 } 431 432 /* If the prefix was found move it else add it to the aspath. */ 433 if (p != NULL) 434 prefix_move(asp, p); 435 else 436 return (prefix_add(rib, asp, prefix, prefixlen)); 437 return (0); 438 } 439 440 int 441 path_compare(struct rde_aspath *a, struct rde_aspath *b) 442 { 443 int r; 444 445 if (a->origin > b->origin) 446 return (1); 447 if (a->origin < b->origin) 448 return (-1); 449 if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED)) 450 return (1); 451 if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED)) 452 return (-1); 453 if (a->med > b->med) 454 return (1); 455 if (a->med < b->med) 456 return (-1); 457 if (a->lpref > b->lpref) 458 return (1); 459 if (a->lpref < b->lpref) 460 return (-1); 461 if (a->weight > b->weight) 462 return (1); 463 if (a->weight < b->weight) 464 return (-1); 465 if (a->rtlabelid > b->rtlabelid) 466 return (1); 467 if (a->rtlabelid < b->rtlabelid) 468 return (-1); 469 if (a->pftableid > b->pftableid) 470 return (1); 471 if (a->pftableid < b->pftableid) 472 return (-1); 473 474 r = aspath_compare(a->aspath, b->aspath); 475 if (r == 0) 476 r = nexthop_compare(a->nexthop, b->nexthop); 477 if (r > 0) 478 return (1); 479 if (r < 0) 480 return (-1); 481 482 return (attr_compare(a, b)); 483 } 484 485 struct rde_aspath * 486 path_lookup(struct rde_aspath *aspath, struct rde_peer *peer) 487 { 488 struct aspath_head *head; 489 struct rde_aspath *asp; 490 491 head = PATH_HASH(aspath->aspath); 492 493 LIST_FOREACH(asp, head, path_l) { 494 if (peer == asp->peer && path_compare(aspath, asp) == 0) 495 return (asp); 496 } 497 return (NULL); 498 } 499 500 void 501 path_remove(struct rde_aspath *asp) 502 { 503 struct prefix *p, *np; 504 505 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { 506 np = LIST_NEXT(p, path_l); 507 if (asp->pftableid) { 508 struct bgpd_addr addr; 509 510 pt_getaddr(p->prefix, &addr); 511 /* Commit is done in peer_down() */ 512 rde_send_pftable(p->aspath->pftableid, &addr, 513 p->prefix->prefixlen, 1); 514 } 515 prefix_destroy(p); 516 } 517 } 518 519 /* remove all stale routes or if staletime is 0 remove all routes for 520 a specified AID. */ 521 u_int32_t 522 path_remove_stale(struct rde_aspath *asp, u_int8_t aid) 523 { 524 struct prefix *p, *np; 525 time_t staletime; 526 u_int32_t rprefixes; 527 528 rprefixes=0; 529 staletime = asp->peer->staletime[aid]; 530 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) { 531 np = LIST_NEXT(p, path_l); 532 if (p->prefix->aid != aid) 533 continue; 534 535 if (staletime && p->lastchange > staletime) 536 continue; 537 538 if (asp->pftableid) { 539 struct bgpd_addr addr; 540 541 pt_getaddr(p->prefix, &addr); 542 /* Commit is done in peer_flush() */ 543 rde_send_pftable(p->aspath->pftableid, &addr, 544 p->prefix->prefixlen, 1); 545 } 546 547 /* only count Adj-RIB-In */ 548 if (p->rib->ribid == 0) 549 rprefixes++; 550 551 prefix_destroy(p); 552 } 553 return (rprefixes); 554 } 555 556 557 /* this function is only called by prefix_remove and path_remove */ 558 void 559 path_destroy(struct rde_aspath *asp) 560 { 561 /* path_destroy can only unlink and free empty rde_aspath */ 562 if (asp->prefix_cnt != 0 || asp->active_cnt != 0) 563 log_warnx("path_destroy: prefix count out of sync"); 564 565 nexthop_unlink(asp); 566 LIST_REMOVE(asp, path_l); 567 LIST_REMOVE(asp, peer_l); 568 asp->peer = NULL; 569 asp->nexthop = NULL; 570 asp->flags &= ~F_ATTR_LINKED; 571 572 path_put(asp); 573 } 574 575 int 576 path_empty(struct rde_aspath *asp) 577 { 578 return LIST_EMPTY(&asp->prefix_h); 579 } 580 581 /* 582 * the path object is linked into multiple lists for fast access. 583 * These are peer_l, path_l and nexthop_l. 584 * peer_l: list of all aspaths that belong to that peer 585 * path_l: hash list to find paths quickly 586 * nexthop_l: list of all aspaths with an equal exit nexthop 587 */ 588 static void 589 path_link(struct rde_aspath *asp, struct rde_peer *peer) 590 { 591 struct aspath_head *head; 592 593 head = PATH_HASH(asp->aspath); 594 595 LIST_INSERT_HEAD(head, asp, path_l); 596 LIST_INSERT_HEAD(&peer->path_h, asp, peer_l); 597 asp->peer = peer; 598 nexthop_link(asp); 599 asp->flags |= F_ATTR_LINKED; 600 } 601 602 /* 603 * copy asp to a new UNLINKED one mainly for filtering 604 */ 605 struct rde_aspath * 606 path_copy(struct rde_aspath *asp) 607 { 608 struct rde_aspath *nasp; 609 610 nasp = path_get(); 611 nasp->aspath = asp->aspath; 612 if (nasp->aspath != NULL) { 613 nasp->aspath->refcnt++; 614 rdemem.aspath_refs++; 615 } 616 nasp->nexthop = asp->nexthop; 617 nasp->med = asp->med; 618 nasp->lpref = asp->lpref; 619 nasp->weight = asp->weight; 620 nasp->origin = asp->origin; 621 nasp->rtlabelid = asp->rtlabelid; 622 rtlabel_ref(nasp->rtlabelid); 623 nasp->pftableid = asp->pftableid; 624 pftable_ref(nasp->pftableid); 625 626 nasp->flags = asp->flags & ~F_ATTR_LINKED; 627 attr_copy(nasp, asp); 628 629 return (nasp); 630 } 631 632 /* alloc and initialize new entry. May not fail. */ 633 struct rde_aspath * 634 path_get(void) 635 { 636 struct rde_aspath *asp; 637 638 asp = calloc(1, sizeof(*asp)); 639 if (asp == NULL) 640 fatal("path_alloc"); 641 rdemem.path_cnt++; 642 643 LIST_INIT(&asp->prefix_h); 644 asp->origin = ORIGIN_INCOMPLETE; 645 asp->lpref = DEFAULT_LPREF; 646 /* med = 0 */ 647 /* weight = 0 */ 648 /* rtlabel = 0 */ 649 650 return (asp); 651 } 652 653 /* free an unlinked element */ 654 void 655 path_put(struct rde_aspath *asp) 656 { 657 if (asp == NULL) 658 return; 659 660 if (asp->flags & F_ATTR_LINKED) 661 fatalx("path_put: linked object"); 662 663 rtlabel_unref(asp->rtlabelid); 664 pftable_unref(asp->pftableid); 665 aspath_put(asp->aspath); 666 attr_freeall(asp); 667 rdemem.path_cnt--; 668 free(asp); 669 } 670 671 /* prefix specific functions */ 672 673 static struct prefix *prefix_alloc(void); 674 static void prefix_free(struct prefix *); 675 static void prefix_link(struct prefix *, struct rib_entry *, 676 struct rde_aspath *); 677 static void prefix_unlink(struct prefix *); 678 679 /* 680 * search for specified prefix of a peer. Returns NULL if not found. 681 */ 682 struct prefix * 683 prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 684 int prefixlen, u_int32_t flags) 685 { 686 struct rib_entry *re; 687 688 re = rib_get(rib, prefix, prefixlen); 689 if (re == NULL) 690 return (NULL); 691 return (prefix_bypeer(re, peer, flags)); 692 } 693 694 /* 695 * Adds or updates a prefix. 696 */ 697 int 698 prefix_add(struct rib *rib, struct rde_aspath *asp, struct bgpd_addr *prefix, 699 int prefixlen) 700 701 { 702 struct prefix *p; 703 struct rib_entry *re; 704 705 re = rib_get(rib, prefix, prefixlen); 706 if (re == NULL) 707 re = rib_add(rib, prefix, prefixlen); 708 709 p = prefix_bypeer(re, asp->peer, asp->flags); 710 if (p == NULL) { 711 p = prefix_alloc(); 712 prefix_link(p, re, asp); 713 return (1); 714 } else { 715 if (p->aspath != asp) { 716 /* prefix belongs to a different aspath so move */ 717 prefix_move(asp, p); 718 } else 719 p->lastchange = time(NULL); 720 return (0); 721 } 722 } 723 724 /* 725 * Move the prefix to the specified as path, removes the old asp if needed. 726 */ 727 void 728 prefix_move(struct rde_aspath *asp, struct prefix *p) 729 { 730 struct prefix *np; 731 struct rde_aspath *oasp; 732 733 if (asp->peer != p->aspath->peer) 734 fatalx("prefix_move: cross peer move"); 735 736 /* create new prefix node */ 737 np = prefix_alloc(); 738 np->aspath = asp; 739 /* peer and prefix pointers are still equal */ 740 np->prefix = p->prefix; 741 np->rib = p->rib; 742 np->lastchange = time(NULL); 743 744 /* add to new as path */ 745 LIST_INSERT_HEAD(&asp->prefix_h, np, path_l); 746 PREFIX_COUNT(asp, 1); 747 /* 748 * no need to update the peer prefix count because we are only moving 749 * the prefix without changing the peer. 750 */ 751 752 /* 753 * First kick the old prefix node out of the prefix list, 754 * afterwards run the route decision for new prefix node. 755 * Because of this only one update is generated if the prefix 756 * was active. 757 * This is save because we create a new prefix and so the change 758 * is noticed by prefix_evaluate(). 759 */ 760 LIST_REMOVE(p, rib_l); 761 prefix_evaluate(np, np->rib); 762 763 /* remove old prefix node */ 764 oasp = p->aspath; 765 LIST_REMOVE(p, path_l); 766 PREFIX_COUNT(oasp, -1); 767 /* as before peer count needs no update because of move */ 768 769 /* destroy all references to other objects and free the old prefix */ 770 p->aspath = NULL; 771 p->prefix = NULL; 772 p->rib = NULL; 773 prefix_free(p); 774 775 /* destroy old path if empty */ 776 if (path_empty(oasp)) 777 path_destroy(oasp); 778 } 779 780 /* 781 * Removes a prefix from all lists. If the parent objects -- path or 782 * pt_entry -- become empty remove them too. 783 */ 784 int 785 prefix_remove(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix, 786 int prefixlen, u_int32_t flags) 787 { 788 struct prefix *p; 789 struct rib_entry *re; 790 struct rde_aspath *asp; 791 792 re = rib_get(rib, prefix, prefixlen); 793 if (re == NULL) /* Got a dummy withdrawn request */ 794 return (0); 795 796 p = prefix_bypeer(re, peer, flags); 797 if (p == NULL) /* Got a dummy withdrawn request. */ 798 return (0); 799 800 asp = p->aspath; 801 802 if (asp->pftableid) { 803 /* only prefixes in the local RIB were pushed into pf */ 804 rde_send_pftable(asp->pftableid, prefix, prefixlen, 1); 805 rde_send_pftable_commit(); 806 } 807 808 prefix_destroy(p); 809 810 return (1); 811 } 812 813 /* dump a prefix into specified buffer */ 814 int 815 prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen) 816 { 817 int totlen; 818 819 switch (prefix->aid) { 820 case AID_INET: 821 case AID_INET6: 822 totlen = PREFIX_SIZE(plen); 823 824 if (totlen > len) 825 return (-1); 826 *buf++ = plen; 827 memcpy(buf, &prefix->ba, totlen - 1); 828 return (totlen); 829 case AID_VPN_IPv4: 830 totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) + 831 prefix->vpn4.labellen; 832 plen += (sizeof(prefix->vpn4.rd) + prefix->vpn4.labellen) * 8; 833 834 if (totlen > len) 835 return (-1); 836 *buf++ = plen; 837 memcpy(buf, &prefix->vpn4.labelstack, prefix->vpn4.labellen); 838 buf += prefix->vpn4.labellen; 839 memcpy(buf, &prefix->vpn4.rd, sizeof(prefix->vpn4.rd)); 840 buf += sizeof(prefix->vpn4.rd); 841 memcpy(buf, &prefix->vpn4.addr, PREFIX_SIZE(plen) - 1); 842 return (totlen); 843 default: 844 return (-1); 845 } 846 } 847 848 int 849 prefix_writebuf(struct ibuf *buf, struct bgpd_addr *prefix, u_int8_t plen) 850 { 851 int totlen; 852 void *bptr; 853 854 switch (prefix->aid) { 855 case AID_INET: 856 case AID_INET6: 857 totlen = PREFIX_SIZE(plen); 858 break; 859 case AID_VPN_IPv4: 860 totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) + 861 prefix->vpn4.labellen; 862 break; 863 default: 864 return (-1); 865 } 866 867 if ((bptr = ibuf_reserve(buf, totlen)) == NULL) 868 return (-1); 869 if (prefix_write(bptr, totlen, prefix, plen) == -1) 870 return (-1); 871 return (0); 872 } 873 874 /* 875 * Searches in the prefix list of specified pt_entry for a prefix entry 876 * belonging to the peer peer. Returns NULL if no match found. 877 */ 878 struct prefix * 879 prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, u_int32_t flags) 880 { 881 struct prefix *p; 882 883 LIST_FOREACH(p, &re->prefix_h, rib_l) { 884 if (p->aspath->peer != peer) 885 continue; 886 if (p->aspath->flags & flags && 887 (flags & F_ANN_DYNAMIC) != 888 (p->aspath->flags & F_ANN_DYNAMIC)) 889 continue; 890 return (p); 891 } 892 return (NULL); 893 } 894 895 void 896 prefix_updateall(struct rde_aspath *asp, enum nexthop_state state, 897 enum nexthop_state oldstate) 898 { 899 struct prefix *p; 900 901 LIST_FOREACH(p, &asp->prefix_h, path_l) { 902 /* 903 * skip non local-RIBs or RIBs that are flagged as noeval. 904 */ 905 if (p->rib->flags & F_RIB_NOEVALUATE) 906 continue; 907 908 if (oldstate == state && state == NEXTHOP_REACH) { 909 /* 910 * The state of the nexthop did not change. The only 911 * thing that may have changed is the true_nexthop 912 * or other internal infos. This will not change 913 * the routing decision so shortcut here. 914 */ 915 if ((p->rib->flags & F_RIB_NOFIB) == 0 && 916 p == p->rib->active) 917 rde_send_kroute(p, NULL, p->rib->ribid); 918 continue; 919 } 920 921 /* redo the route decision */ 922 LIST_REMOVE(p, rib_l); 923 /* 924 * If the prefix is the active one remove it first, 925 * this has to be done because we can not detect when 926 * the active prefix changes its state. In this case 927 * we know that this is a withdrawal and so the second 928 * prefix_evaluate() will generate no update because 929 * the nexthop is unreachable or ineligible. 930 */ 931 if (p == p->rib->active) 932 prefix_evaluate(NULL, p->rib); 933 prefix_evaluate(p, p->rib); 934 } 935 } 936 937 /* kill a prefix. */ 938 void 939 prefix_destroy(struct prefix *p) 940 { 941 struct rde_aspath *asp; 942 943 asp = p->aspath; 944 prefix_unlink(p); 945 prefix_free(p); 946 947 if (path_empty(asp)) 948 path_destroy(asp); 949 } 950 951 /* 952 * helper function to clean up the connected networks after a reload 953 */ 954 void 955 prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags) 956 { 957 struct rde_aspath *asp, *xasp; 958 struct prefix *p, *xp; 959 960 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) { 961 xasp = LIST_NEXT(asp, peer_l); 962 if ((asp->flags & F_ANN_DYNAMIC) != flags) 963 continue; 964 for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) { 965 xp = LIST_NEXT(p, path_l); 966 if (reloadtime > p->lastchange) { 967 prefix_unlink(p); 968 prefix_free(p); 969 } 970 } 971 if (path_empty(asp)) 972 path_destroy(asp); 973 } 974 } 975 976 /* 977 * Link a prefix into the different parent objects. 978 */ 979 static void 980 prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_aspath *asp) 981 { 982 LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l); 983 PREFIX_COUNT(asp, 1); 984 985 pref->aspath = asp; 986 pref->rib = re; 987 pref->prefix = re->prefix; 988 pt_ref(pref->prefix); 989 pref->lastchange = time(NULL); 990 991 /* make route decision */ 992 prefix_evaluate(pref, re); 993 } 994 995 /* 996 * Unlink a prefix from the different parent objects. 997 */ 998 static void 999 prefix_unlink(struct prefix *pref) 1000 { 1001 struct rib_entry *re = pref->rib; 1002 1003 /* make route decision */ 1004 LIST_REMOVE(pref, rib_l); 1005 prefix_evaluate(NULL, re); 1006 1007 LIST_REMOVE(pref, path_l); 1008 PREFIX_COUNT(pref->aspath, -1); 1009 1010 pt_unref(pref->prefix); 1011 if (pt_empty(pref->prefix)) 1012 pt_remove(pref->prefix); 1013 if (rib_empty(re)) 1014 rib_remove(re); 1015 1016 /* destroy all references to other objects */ 1017 pref->aspath = NULL; 1018 pref->prefix = NULL; 1019 pref->rib = NULL; 1020 1021 /* 1022 * It's the caller's duty to remove empty aspath structures. 1023 * Also freeing the unlinked prefix is the caller's duty. 1024 */ 1025 } 1026 1027 /* alloc and bzero new entry. May not fail. */ 1028 static struct prefix * 1029 prefix_alloc(void) 1030 { 1031 struct prefix *p; 1032 1033 p = calloc(1, sizeof(*p)); 1034 if (p == NULL) 1035 fatal("prefix_alloc"); 1036 rdemem.prefix_cnt++; 1037 return p; 1038 } 1039 1040 /* free a unlinked entry */ 1041 static void 1042 prefix_free(struct prefix *pref) 1043 { 1044 rdemem.prefix_cnt--; 1045 free(pref); 1046 } 1047 1048 /* 1049 * nexthop functions 1050 */ 1051 struct nexthop_head *nexthop_hash(struct bgpd_addr *); 1052 struct nexthop *nexthop_lookup(struct bgpd_addr *); 1053 1054 /* 1055 * In BGP there exist two nexthops: the exit nexthop which was announced via 1056 * BGP and the true nexthop which is used in the FIB -- forward information 1057 * base a.k.a kernel routing table. When sending updates it is even more 1058 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors 1059 * while in EBGP normally the address of the router is sent. The exit nexthop 1060 * may be passed to the external neighbor if the neighbor and the exit nexthop 1061 * reside in the same subnet -- directly connected. 1062 */ 1063 struct nexthop_table { 1064 LIST_HEAD(nexthop_head, nexthop) *nexthop_hashtbl; 1065 u_int32_t nexthop_hashmask; 1066 } nexthoptable; 1067 1068 SIPHASH_KEY nexthoptablekey; 1069 1070 void 1071 nexthop_init(u_int32_t hashsize) 1072 { 1073 u_int32_t hs, i; 1074 1075 for (hs = 1; hs < hashsize; hs <<= 1) 1076 ; 1077 nexthoptable.nexthop_hashtbl = calloc(hs, sizeof(struct nexthop_head)); 1078 if (nexthoptable.nexthop_hashtbl == NULL) 1079 fatal("nextop_init"); 1080 1081 for (i = 0; i < hs; i++) 1082 LIST_INIT(&nexthoptable.nexthop_hashtbl[i]); 1083 arc4random_buf(&nexthoptablekey, sizeof(nexthoptablekey)); 1084 1085 nexthoptable.nexthop_hashmask = hs - 1; 1086 } 1087 1088 void 1089 nexthop_shutdown(void) 1090 { 1091 u_int32_t i; 1092 struct nexthop *nh, *nnh; 1093 1094 for (i = 0; i <= nexthoptable.nexthop_hashmask; i++) { 1095 for (nh = LIST_FIRST(&nexthoptable.nexthop_hashtbl[i]); 1096 nh != NULL; nh = nnh) { 1097 nnh = LIST_NEXT(nh, nexthop_l); 1098 nh->state = NEXTHOP_UNREACH; 1099 (void)nexthop_delete(nh); 1100 } 1101 if (!LIST_EMPTY(&nexthoptable.nexthop_hashtbl[i])) 1102 log_warnx("nexthop_shutdown: non-free table"); 1103 } 1104 1105 free(nexthoptable.nexthop_hashtbl); 1106 } 1107 1108 void 1109 nexthop_update(struct kroute_nexthop *msg) 1110 { 1111 struct nexthop *nh; 1112 struct rde_aspath *asp; 1113 enum nexthop_state oldstate; 1114 1115 nh = nexthop_lookup(&msg->nexthop); 1116 if (nh == NULL) { 1117 log_warnx("nexthop_update: non-existent nexthop %s", 1118 log_addr(&msg->nexthop)); 1119 return; 1120 } 1121 1122 oldstate = nh->state; 1123 if (msg->valid) 1124 nh->state = NEXTHOP_REACH; 1125 else 1126 nh->state = NEXTHOP_UNREACH; 1127 1128 if (msg->connected) { 1129 nh->flags |= NEXTHOP_CONNECTED; 1130 memcpy(&nh->true_nexthop, &nh->exit_nexthop, 1131 sizeof(nh->true_nexthop)); 1132 } else 1133 memcpy(&nh->true_nexthop, &msg->gateway, 1134 sizeof(nh->true_nexthop)); 1135 1136 memcpy(&nh->nexthop_net, &msg->net, 1137 sizeof(nh->nexthop_net)); 1138 nh->nexthop_netlen = msg->netlen; 1139 1140 if (nexthop_delete(nh)) 1141 /* nexthop no longer used */ 1142 return; 1143 1144 if (rde_noevaluate()) 1145 /* 1146 * if the decision process is turned off there is no need 1147 * for the aspath list walk. 1148 */ 1149 return; 1150 1151 LIST_FOREACH(asp, &nh->path_h, nexthop_l) { 1152 prefix_updateall(asp, nh->state, oldstate); 1153 } 1154 } 1155 1156 void 1157 nexthop_modify(struct rde_aspath *asp, struct bgpd_addr *nexthop, 1158 enum action_types type, u_int8_t aid) 1159 { 1160 struct nexthop *nh; 1161 1162 if (type == ACTION_SET_NEXTHOP && aid != nexthop->aid) 1163 return; 1164 1165 asp->flags &= ~F_NEXTHOP_MASK; 1166 switch (type) { 1167 case ACTION_SET_NEXTHOP_REJECT: 1168 asp->flags |= F_NEXTHOP_REJECT; 1169 break; 1170 case ACTION_SET_NEXTHOP_BLACKHOLE: 1171 asp->flags |= F_NEXTHOP_BLACKHOLE; 1172 break; 1173 case ACTION_SET_NEXTHOP_NOMODIFY: 1174 asp->flags |= F_NEXTHOP_NOMODIFY; 1175 break; 1176 case ACTION_SET_NEXTHOP_SELF: 1177 asp->flags |= F_NEXTHOP_SELF; 1178 break; 1179 case ACTION_SET_NEXTHOP: 1180 nh = nexthop_get(nexthop); 1181 if (asp->flags & F_ATTR_LINKED) 1182 nexthop_unlink(asp); 1183 asp->nexthop = nh; 1184 if (asp->flags & F_ATTR_LINKED) 1185 nexthop_link(asp); 1186 break; 1187 default: 1188 break; 1189 } 1190 } 1191 1192 void 1193 nexthop_link(struct rde_aspath *asp) 1194 { 1195 if (asp->nexthop == NULL) 1196 return; 1197 1198 LIST_INSERT_HEAD(&asp->nexthop->path_h, asp, nexthop_l); 1199 } 1200 1201 void 1202 nexthop_unlink(struct rde_aspath *asp) 1203 { 1204 struct nexthop *nh; 1205 1206 if (asp->nexthop == NULL) 1207 return; 1208 1209 LIST_REMOVE(asp, nexthop_l); 1210 1211 /* see if list is empty */ 1212 nh = asp->nexthop; 1213 asp->nexthop = NULL; 1214 1215 (void)nexthop_delete(nh); 1216 } 1217 1218 int 1219 nexthop_delete(struct nexthop *nh) 1220 { 1221 /* nexthop still used by some other aspath */ 1222 if (!LIST_EMPTY(&nh->path_h)) 1223 return (0); 1224 1225 /* either pinned or in a state where it may not be deleted */ 1226 if (nh->refcnt > 0 || nh->state == NEXTHOP_LOOKUP) 1227 return (0); 1228 1229 LIST_REMOVE(nh, nexthop_l); 1230 rde_send_nexthop(&nh->exit_nexthop, 0); 1231 1232 rdemem.nexthop_cnt--; 1233 free(nh); 1234 return (1); 1235 } 1236 1237 struct nexthop * 1238 nexthop_get(struct bgpd_addr *nexthop) 1239 { 1240 struct nexthop *nh; 1241 1242 nh = nexthop_lookup(nexthop); 1243 if (nh == NULL) { 1244 nh = calloc(1, sizeof(*nh)); 1245 if (nh == NULL) 1246 fatal("nexthop_alloc"); 1247 rdemem.nexthop_cnt++; 1248 1249 LIST_INIT(&nh->path_h); 1250 nh->state = NEXTHOP_LOOKUP; 1251 nh->exit_nexthop = *nexthop; 1252 LIST_INSERT_HEAD(nexthop_hash(nexthop), nh, 1253 nexthop_l); 1254 1255 rde_send_nexthop(&nh->exit_nexthop, 1); 1256 } 1257 1258 return (nh); 1259 } 1260 1261 int 1262 nexthop_compare(struct nexthop *na, struct nexthop *nb) 1263 { 1264 struct bgpd_addr *a, *b; 1265 1266 if (na == nb) 1267 return (0); 1268 if (na == NULL) 1269 return (-1); 1270 if (nb == NULL) 1271 return (1); 1272 1273 a = &na->exit_nexthop; 1274 b = &nb->exit_nexthop; 1275 1276 if (a->aid != b->aid) 1277 return (a->aid - b->aid); 1278 1279 switch (a->aid) { 1280 case AID_INET: 1281 if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr)) 1282 return (1); 1283 if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr)) 1284 return (-1); 1285 return (0); 1286 case AID_INET6: 1287 return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr))); 1288 default: 1289 fatalx("nexthop_cmp: unknown af"); 1290 } 1291 return (-1); 1292 } 1293 1294 struct nexthop * 1295 nexthop_lookup(struct bgpd_addr *nexthop) 1296 { 1297 struct nexthop *nh; 1298 1299 LIST_FOREACH(nh, nexthop_hash(nexthop), nexthop_l) { 1300 if (memcmp(&nh->exit_nexthop, nexthop, 1301 sizeof(struct bgpd_addr)) == 0) 1302 return (nh); 1303 } 1304 return (NULL); 1305 } 1306 1307 struct nexthop_head * 1308 nexthop_hash(struct bgpd_addr *nexthop) 1309 { 1310 u_int32_t h = 0; 1311 1312 switch (nexthop->aid) { 1313 case AID_INET: 1314 h = SipHash24(&nexthoptablekey, &nexthop->v4.s_addr, 1315 sizeof(nexthop->v4.s_addr)); 1316 break; 1317 case AID_INET6: 1318 h = SipHash24(&nexthoptablekey, &nexthop->v6, 1319 sizeof(struct in6_addr)); 1320 break; 1321 default: 1322 fatalx("nexthop_hash: unsupported AF"); 1323 } 1324 return (&nexthoptable.nexthop_hashtbl[h & nexthoptable.nexthop_hashmask]); 1325 } 1326 1327