1 /* $OpenBSD: kroute.c,v 1.70 2019/06/28 13:32:48 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 2015, 2016 Renato Westphal <renato@openbsd.org> 5 * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org> 6 * Copyright (c) 2004 Esben Norby <norby@openbsd.org> 7 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 8 * 9 * Permission to use, copy, modify, and distribute this software for any 10 * purpose with or without fee is hereby granted, provided that the above 11 * copyright notice and this permission notice appear in all copies. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 20 */ 21 22 #include <sys/types.h> 23 #include <sys/socket.h> 24 #include <sys/ioctl.h> 25 #include <sys/sysctl.h> 26 #include <arpa/inet.h> 27 #include <net/if_dl.h> 28 #include <net/if_types.h> 29 #include <net/route.h> 30 #include <netmpls/mpls.h> 31 #include <errno.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <unistd.h> 35 #include <limits.h> 36 37 #include "ldpd.h" 38 #include "log.h" 39 40 struct { 41 uint32_t rtseq; 42 pid_t pid; 43 int fib_sync; 44 int fd; 45 int ioctl_fd; 46 struct event ev; 47 unsigned int rdomain; 48 } kr_state; 49 50 struct kroute_node { 51 TAILQ_ENTRY(kroute_node) entry; 52 struct kroute_priority *kprio; /* back pointer */ 53 struct kroute r; 54 }; 55 56 struct kroute_priority { 57 TAILQ_ENTRY(kroute_priority) entry; 58 struct kroute_prefix *kp; /* back pointer */ 59 uint8_t priority; 60 TAILQ_HEAD(, kroute_node) nexthops; 61 }; 62 63 struct kroute_prefix { 64 RB_ENTRY(kroute_prefix) entry; 65 int af; 66 union ldpd_addr prefix; 67 uint8_t prefixlen; 68 TAILQ_HEAD(plist, kroute_priority) priorities; 69 }; 70 RB_HEAD(kroute_tree, kroute_prefix); 71 RB_PROTOTYPE(kroute_tree, kroute_prefix, entry, kroute_compare) 72 73 struct kif_addr { 74 TAILQ_ENTRY(kif_addr) entry; 75 struct kaddr a; 76 }; 77 78 struct kif_node { 79 RB_ENTRY(kif_node) entry; 80 TAILQ_HEAD(, kif_addr) addrs; 81 struct kif k; 82 struct kpw *kpw; 83 }; 84 RB_HEAD(kif_tree, kif_node); 85 RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare) 86 87 static void kr_dispatch_msg(int, short, void *); 88 static void kr_redist_remove(struct kroute *); 89 static int kr_redist_eval(struct kroute *); 90 static void kr_redistribute(struct kroute_prefix *); 91 static __inline int kroute_compare(struct kroute_prefix *, 92 struct kroute_prefix *); 93 static struct kroute_prefix *kroute_find_prefix(int, union ldpd_addr *, 94 uint8_t); 95 static struct kroute_priority *kroute_find_prio(struct kroute_prefix *, 96 uint8_t); 97 static struct kroute_node *kroute_find_gw(struct kroute_priority *, 98 union ldpd_addr *); 99 static int kroute_insert(struct kroute *); 100 static int kroute_uninstall(struct kroute_node *); 101 static int kroute_remove(struct kroute *); 102 static void kroute_clear(void); 103 static __inline int kif_compare(struct kif_node *, struct kif_node *); 104 static struct kif_node *kif_find(unsigned short); 105 static struct kif_node *kif_insert(unsigned short); 106 static int kif_remove(struct kif_node *); 107 static struct kif_node *kif_update(unsigned short, int, struct if_data *, 108 struct sockaddr_dl *, int *); 109 static struct kroute_priority *kroute_match(int, union ldpd_addr *); 110 static uint8_t prefixlen_classful(in_addr_t); 111 static void get_rtaddrs(int, struct sockaddr *, 112 struct sockaddr **); 113 static void if_change(unsigned short, int, struct if_data *, 114 struct sockaddr_dl *); 115 static void if_newaddr(unsigned short, struct sockaddr *, 116 struct sockaddr *, struct sockaddr *); 117 static void if_deladdr(unsigned short, struct sockaddr *, 118 struct sockaddr *, struct sockaddr *); 119 static void if_announce(void *); 120 static int send_rtmsg(int, int, struct kroute *, int); 121 static int send_rtmsg_v4(int fd, int, struct kroute *, int); 122 static int send_rtmsg_v6(int fd, int, struct kroute *, int); 123 static int fetchtable(void); 124 static int fetchifs(void); 125 static int dispatch_rtmsg(void); 126 static int rtmsg_process(char *, size_t); 127 static int rtmsg_process_route(struct rt_msghdr *, 128 struct sockaddr *[RTAX_MAX]); 129 static int kmpw_install(const char *, struct kpw *); 130 static int kmpw_uninstall(const char *); 131 132 RB_GENERATE(kroute_tree, kroute_prefix, entry, kroute_compare) 133 RB_GENERATE(kif_tree, kif_node, entry, kif_compare) 134 135 static struct kroute_tree krt = RB_INITIALIZER(&krt); 136 static struct kif_tree kit = RB_INITIALIZER(&kit); 137 138 int 139 kif_init(void) 140 { 141 if (fetchifs() == -1) 142 return (-1); 143 144 if ((kr_state.ioctl_fd = socket(AF_INET, 145 SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) { 146 log_warn("%s: ioctl socket", __func__); 147 return (-1); 148 } 149 150 return (0); 151 } 152 153 int 154 kr_init(int fs, unsigned int rdomain) 155 { 156 int opt = 0, rcvbuf, default_rcvbuf; 157 socklen_t optlen; 158 unsigned int rtfilter; 159 160 kr_state.fib_sync = fs; 161 kr_state.rdomain = rdomain; 162 163 if ((kr_state.fd = socket(AF_ROUTE, 164 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) { 165 log_warn("%s: socket", __func__); 166 return (-1); 167 } 168 169 /* not interested in my own messages */ 170 if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK, 171 &opt, sizeof(opt)) == -1) 172 log_warn("%s: setsockopt(SO_USELOOPBACK)", __func__); 173 174 /* filter out unwanted messages */ 175 rtfilter = ROUTE_FILTER(RTM_ADD) | ROUTE_FILTER(RTM_GET) | 176 ROUTE_FILTER(RTM_CHANGE) | ROUTE_FILTER(RTM_DELETE) | 177 ROUTE_FILTER(RTM_IFINFO) | ROUTE_FILTER(RTM_NEWADDR) | 178 ROUTE_FILTER(RTM_DELADDR) | ROUTE_FILTER(RTM_IFANNOUNCE); 179 180 if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_MSGFILTER, 181 &rtfilter, sizeof(rtfilter)) == -1) 182 log_warn("%s: setsockopt(ROUTE_MSGFILTER)", __func__); 183 184 /* grow receive buffer, don't wanna miss messages */ 185 optlen = sizeof(default_rcvbuf); 186 if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, 187 &default_rcvbuf, &optlen) == -1) 188 log_warn("%s: getsockopt SOL_SOCKET SO_RCVBUF", __func__); 189 else 190 for (rcvbuf = MAX_RTSOCK_BUF; 191 rcvbuf > default_rcvbuf && 192 setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF, 193 &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS; 194 rcvbuf /= 2) 195 ; /* nothing */ 196 197 kr_state.pid = getpid(); 198 kr_state.rtseq = 1; 199 200 if (fetchtable() == -1) 201 return (-1); 202 203 event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST, 204 kr_dispatch_msg, NULL); 205 event_add(&kr_state.ev, NULL); 206 207 return (0); 208 } 209 210 void 211 kif_redistribute(const char *ifname) 212 { 213 struct kif_node *kif; 214 struct kif_addr *ka; 215 216 RB_FOREACH(kif, kif_tree, &kit) { 217 if (kif->k.rdomain != kr_state.rdomain) 218 continue; 219 220 if (ifname && strcmp(kif->k.ifname, ifname) != 0) 221 continue; 222 223 TAILQ_FOREACH(ka, &kif->addrs, entry) 224 main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, 225 sizeof(ka->a)); 226 } 227 } 228 229 int 230 kr_change(struct kroute *kr) 231 { 232 struct kroute_prefix *kp; 233 struct kroute_priority *kprio; 234 struct kroute_node *kn; 235 int action = RTM_ADD; 236 237 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 238 if (kp == NULL) 239 goto miss; 240 241 kprio = kroute_find_prio(kp, kr->priority); 242 if (kprio == NULL) 243 goto miss; 244 245 kn = kroute_find_gw(kprio, &kr->nexthop); 246 if (kn == NULL) 247 goto miss; 248 249 if (kn->r.flags & F_LDPD_INSERTED) 250 action = RTM_CHANGE; 251 252 kn->r.local_label = kr->local_label; 253 kn->r.remote_label = kr->remote_label; 254 kn->r.flags = kn->r.flags | F_LDPD_INSERTED; 255 256 /* send update */ 257 if (send_rtmsg(kr_state.fd, action, &kn->r, AF_MPLS) == -1) 258 return (-1); 259 260 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 261 kn->r.remote_label != NO_LABEL) { 262 if (send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, kn->r.af) == -1) 263 return (-1); 264 } 265 266 return (0); 267 268 miss: 269 log_warnx("%s: lost FEC %s/%d nexthop %s", __func__, 270 log_addr(kr->af, &kr->prefix), kr->prefixlen, 271 log_addr(kr->af, &kr->nexthop)); 272 return (-1); 273 } 274 275 int 276 kr_delete(struct kroute *kr) 277 { 278 struct kroute_prefix *kp; 279 struct kroute_priority *kprio; 280 struct kroute_node *kn; 281 int update = 0; 282 283 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 284 if (kp == NULL) 285 return (0); 286 kprio = kroute_find_prio(kp, kr->priority); 287 if (kprio == NULL) 288 return (0); 289 kn = kroute_find_gw(kprio, &kr->nexthop); 290 if (kn == NULL) 291 return (0); 292 293 if (!(kn->r.flags & F_LDPD_INSERTED)) 294 return (0); 295 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 296 kn->r.remote_label != NO_LABEL) 297 update = 1; 298 299 /* kill MPLS LSP */ 300 if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) == -1) 301 return (-1); 302 303 kn->r.flags &= ~F_LDPD_INSERTED; 304 kn->r.local_label = NO_LABEL; 305 kn->r.remote_label = NO_LABEL; 306 307 if (update && 308 send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, kn->r.af) == -1) 309 return (-1); 310 311 return (0); 312 } 313 314 void 315 kr_shutdown(void) 316 { 317 kr_fib_decouple(); 318 kroute_clear(); 319 kif_clear(); 320 } 321 322 void 323 kr_fib_couple(void) 324 { 325 struct kroute_prefix *kp; 326 struct kroute_priority *kprio; 327 struct kroute_node *kn; 328 struct kif_node *kif; 329 330 if (kr_state.fib_sync == 1) /* already coupled */ 331 return; 332 333 kr_state.fib_sync = 1; 334 335 RB_FOREACH(kp, kroute_tree, &krt) { 336 kprio = TAILQ_FIRST(&kp->priorities); 337 if (kprio == NULL) 338 continue; 339 340 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 341 if (!(kn->r.flags & F_LDPD_INSERTED)) 342 continue; 343 344 send_rtmsg(kr_state.fd, RTM_ADD, &kn->r, AF_MPLS); 345 346 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 347 kn->r.remote_label != NO_LABEL) { 348 send_rtmsg(kr_state.fd, RTM_CHANGE, 349 &kn->r, kn->r.af); 350 } 351 } 352 } 353 354 RB_FOREACH(kif, kif_tree, &kit) 355 if (kif->kpw) 356 kmpw_install(kif->k.ifname, kif->kpw); 357 358 log_info("kernel routing table coupled"); 359 } 360 361 void 362 kr_fib_decouple(void) 363 { 364 struct kroute_prefix *kp; 365 struct kroute_priority *kprio; 366 struct kroute_node *kn; 367 uint32_t rl; 368 struct kif_node *kif; 369 370 if (kr_state.fib_sync == 0) /* already decoupled */ 371 return; 372 373 RB_FOREACH(kp, kroute_tree, &krt) { 374 kprio = TAILQ_FIRST(&kp->priorities); 375 if (kprio == NULL) 376 continue; 377 378 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 379 if (!(kn->r.flags & F_LDPD_INSERTED)) 380 continue; 381 382 send_rtmsg(kr_state.fd, RTM_DELETE, 383 &kn->r, AF_MPLS); 384 385 if (ldp_addrisset(kn->r.af, &kn->r.nexthop) && 386 kn->r.remote_label != NO_LABEL) { 387 rl = kn->r.remote_label; 388 kn->r.remote_label = NO_LABEL; 389 send_rtmsg(kr_state.fd, RTM_CHANGE, 390 &kn->r, kn->r.af); 391 kn->r.remote_label = rl; 392 } 393 } 394 } 395 396 RB_FOREACH(kif, kif_tree, &kit) 397 if (kif->kpw) 398 kmpw_uninstall(kif->k.ifname); 399 400 kr_state.fib_sync = 0; 401 log_info("kernel routing table decoupled"); 402 } 403 404 void 405 kr_change_egress_label(int af, int was_implicit) 406 { 407 struct kroute_prefix *kp; 408 struct kroute_priority *kprio; 409 struct kroute_node *kn; 410 411 RB_FOREACH(kp, kroute_tree, &krt) { 412 if (kp->af != af) 413 continue; 414 415 TAILQ_FOREACH(kprio, &kp->priorities, entry) { 416 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 417 if (kn->r.local_label > MPLS_LABEL_RESERVED_MAX) 418 continue; 419 420 if (!was_implicit) { 421 kn->r.local_label = MPLS_LABEL_IMPLNULL; 422 continue; 423 } 424 425 switch (kn->r.af) { 426 case AF_INET: 427 kn->r.local_label = MPLS_LABEL_IPV4NULL; 428 break; 429 case AF_INET6: 430 kn->r.local_label = MPLS_LABEL_IPV6NULL; 431 break; 432 default: 433 break; 434 } 435 } 436 } 437 } 438 } 439 440 /* ARGSUSED */ 441 static void 442 kr_dispatch_msg(int fd, short event, void *bula) 443 { 444 if (dispatch_rtmsg() == -1) 445 event_loopexit(NULL); 446 } 447 448 void 449 kr_show_route(struct imsg *imsg) 450 { 451 struct kroute_prefix *kp; 452 struct kroute_priority *kprio; 453 struct kroute_node *kn; 454 int flags; 455 struct kroute kr; 456 457 switch (imsg->hdr.type) { 458 case IMSG_CTL_KROUTE: 459 if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) { 460 log_warnx("%s: wrong imsg len", __func__); 461 return; 462 } 463 memcpy(&flags, imsg->data, sizeof(flags)); 464 465 RB_FOREACH(kp, kroute_tree, &krt) 466 TAILQ_FOREACH(kprio, &kp->priorities, entry) 467 TAILQ_FOREACH(kn, &kprio->nexthops, entry) { 468 if (flags && !(kn->r.flags & flags)) 469 continue; 470 471 main_imsg_compose_ldpe(IMSG_CTL_KROUTE, 472 imsg->hdr.pid, &kn->r, 473 sizeof(kn->r)); 474 } 475 break; 476 case IMSG_CTL_KROUTE_ADDR: 477 if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(kr)) { 478 log_warnx("%s: wrong imsg len", __func__); 479 return; 480 } 481 memcpy(&kr, imsg->data, sizeof(kr)); 482 483 kprio = kroute_match(kr.af, &kr.prefix); 484 if (kprio == NULL) 485 break; 486 487 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 488 main_imsg_compose_ldpe(IMSG_CTL_KROUTE, imsg->hdr.pid, 489 &kn->r, sizeof(kn->r)); 490 break; 491 default: 492 log_debug("%s: error handling imsg", __func__); 493 break; 494 } 495 main_imsg_compose_ldpe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0); 496 } 497 498 void 499 kr_ifinfo(char *ifname, pid_t pid) 500 { 501 struct kif_node *kif; 502 503 RB_FOREACH(kif, kif_tree, &kit) 504 if (ifname == NULL || !strcmp(ifname, kif->k.ifname)) { 505 main_imsg_compose_ldpe(IMSG_CTL_IFINFO, 506 pid, &kif->k, sizeof(kif->k)); 507 } 508 509 main_imsg_compose_ldpe(IMSG_CTL_END, pid, NULL, 0); 510 } 511 512 static void 513 kr_redist_remove(struct kroute *kr) 514 { 515 /* was the route redistributed? */ 516 if ((kr->flags & F_REDISTRIBUTED) == 0) 517 return; 518 519 /* remove redistributed flag */ 520 kr->flags &= ~F_REDISTRIBUTED; 521 main_imsg_compose_lde(IMSG_NETWORK_DEL, 0, kr, sizeof(*kr)); 522 } 523 524 static int 525 kr_redist_eval(struct kroute *kr) 526 { 527 /* was the route redistributed? */ 528 if (kr->flags & F_REDISTRIBUTED) 529 goto dont_redistribute; 530 531 /* Dynamic routes are not redistributable. */ 532 if (kr->flags & F_DYNAMIC) 533 goto dont_redistribute; 534 535 /* filter-out non-redistributable addresses */ 536 if (bad_addr(kr->af, &kr->prefix) || 537 (kr->af == AF_INET6 && IN6_IS_SCOPE_EMBED(&kr->prefix.v6))) 538 goto dont_redistribute; 539 540 /* do not redistribute the default route */ 541 if (kr->prefixlen == 0) 542 goto dont_redistribute; 543 544 /* 545 * Consider networks with nexthop loopback as not redistributable 546 * unless it is a reject or blackhole route. 547 */ 548 switch (kr->af) { 549 case AF_INET: 550 if (kr->nexthop.v4.s_addr == htonl(INADDR_LOOPBACK) && 551 !(kr->flags & (F_BLACKHOLE|F_REJECT))) 552 goto dont_redistribute; 553 break; 554 case AF_INET6: 555 if (IN6_IS_ADDR_LOOPBACK(&kr->nexthop.v6) && 556 !(kr->flags & (F_BLACKHOLE|F_REJECT))) 557 goto dont_redistribute; 558 break; 559 default: 560 log_debug("%s: unexpected address-family", __func__); 561 break; 562 } 563 564 /* prefix should be redistributed */ 565 kr->flags |= F_REDISTRIBUTED; 566 main_imsg_compose_lde(IMSG_NETWORK_ADD, 0, kr, sizeof(*kr)); 567 return (1); 568 569 dont_redistribute: 570 return (0); 571 } 572 573 static void 574 kr_redistribute(struct kroute_prefix *kp) 575 { 576 struct kroute_priority *kprio; 577 struct kroute_node *kn; 578 579 TAILQ_FOREACH_REVERSE(kprio, &kp->priorities, plist, entry) { 580 if (kprio == TAILQ_FIRST(&kp->priorities)) { 581 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 582 kr_redist_eval(&kn->r); 583 } else { 584 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 585 kr_redist_remove(&kn->r); 586 } 587 } 588 } 589 590 /* rb-tree compare */ 591 static __inline int 592 kroute_compare(struct kroute_prefix *a, struct kroute_prefix *b) 593 { 594 int addrcmp; 595 596 if (a->af < b->af) 597 return (-1); 598 if (a->af > b->af) 599 return (1); 600 601 addrcmp = ldp_addrcmp(a->af, &a->prefix, &b->prefix); 602 if (addrcmp != 0) 603 return (addrcmp); 604 605 if (a->prefixlen < b->prefixlen) 606 return (-1); 607 if (a->prefixlen > b->prefixlen) 608 return (1); 609 610 return (0); 611 } 612 613 /* tree management */ 614 static struct kroute_prefix * 615 kroute_find_prefix(int af, union ldpd_addr *prefix, uint8_t prefixlen) 616 { 617 struct kroute_prefix s; 618 619 s.af = af; 620 s.prefix = *prefix; 621 s.prefixlen = prefixlen; 622 623 return (RB_FIND(kroute_tree, &krt, &s)); 624 } 625 626 static struct kroute_priority * 627 kroute_find_prio(struct kroute_prefix *kp, uint8_t prio) 628 { 629 struct kroute_priority *kprio; 630 631 /* RTP_ANY here picks the lowest priority node */ 632 if (prio == RTP_ANY) 633 return (TAILQ_FIRST(&kp->priorities)); 634 635 TAILQ_FOREACH(kprio, &kp->priorities, entry) 636 if (kprio->priority == prio) 637 return (kprio); 638 639 return (NULL); 640 } 641 642 static struct kroute_node * 643 kroute_find_gw(struct kroute_priority *kprio, union ldpd_addr *nh) 644 { 645 struct kroute_node *kn; 646 647 TAILQ_FOREACH(kn, &kprio->nexthops, entry) 648 if (ldp_addrcmp(kprio->kp->af, &kn->r.nexthop, nh) == 0) 649 return (kn); 650 651 return (NULL); 652 } 653 654 static int 655 kroute_insert(struct kroute *kr) 656 { 657 struct kroute_prefix *kp; 658 struct kroute_priority *kprio, *tmp; 659 struct kroute_node *kn; 660 661 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 662 if (kp == NULL) { 663 kp = calloc(1, sizeof((*kp))); 664 if (kp == NULL) 665 fatal(__func__); 666 kp->af = kr->af; 667 kp->prefix = kr->prefix; 668 kp->prefixlen = kr->prefixlen; 669 TAILQ_INIT(&kp->priorities); 670 RB_INSERT(kroute_tree, &krt, kp); 671 } 672 673 kprio = kroute_find_prio(kp, kr->priority); 674 if (kprio == NULL) { 675 kprio = calloc(1, sizeof(*kprio)); 676 if (kprio == NULL) 677 fatal(__func__); 678 kprio->kp = kp; 679 kprio->priority = kr->priority; 680 TAILQ_INIT(&kprio->nexthops); 681 682 /* lower priorities first */ 683 TAILQ_FOREACH(tmp, &kp->priorities, entry) 684 if (tmp->priority > kprio->priority) 685 break; 686 if (tmp) 687 TAILQ_INSERT_BEFORE(tmp, kprio, entry); 688 else 689 TAILQ_INSERT_TAIL(&kp->priorities, kprio, entry); 690 } 691 692 kn = kroute_find_gw(kprio, &kr->nexthop); 693 if (kn == NULL) { 694 kn = calloc(1, sizeof(*kn)); 695 if (kn == NULL) 696 fatal(__func__); 697 kn->kprio = kprio; 698 kn->r = *kr; 699 TAILQ_INSERT_TAIL(&kprio->nexthops, kn, entry); 700 } 701 702 kr_redistribute(kp); 703 return (0); 704 } 705 706 static int 707 kroute_uninstall(struct kroute_node *kn) 708 { 709 /* kill MPLS LSP if one was installed */ 710 if (kn->r.flags & F_LDPD_INSERTED) 711 if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) == -1) 712 return (-1); 713 714 return (0); 715 } 716 717 static int 718 kroute_remove(struct kroute *kr) 719 { 720 struct kroute_prefix *kp; 721 struct kroute_priority *kprio; 722 struct kroute_node *kn; 723 724 kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen); 725 if (kp == NULL) 726 goto notfound; 727 kprio = kroute_find_prio(kp, kr->priority); 728 if (kprio == NULL) 729 goto notfound; 730 kn = kroute_find_gw(kprio, &kr->nexthop); 731 if (kn == NULL) 732 goto notfound; 733 734 kr_redist_remove(&kn->r); 735 kroute_uninstall(kn); 736 737 TAILQ_REMOVE(&kprio->nexthops, kn, entry); 738 free(kn); 739 740 if (TAILQ_EMPTY(&kprio->nexthops)) { 741 TAILQ_REMOVE(&kp->priorities, kprio, entry); 742 free(kprio); 743 } 744 745 if (TAILQ_EMPTY(&kp->priorities)) { 746 if (RB_REMOVE(kroute_tree, &krt, kp) == NULL) { 747 log_warnx("%s failed for %s/%u", __func__, 748 log_addr(kr->af, &kr->prefix), kp->prefixlen); 749 return (-1); 750 } 751 free(kp); 752 } else 753 kr_redistribute(kp); 754 755 return (0); 756 757 notfound: 758 log_warnx("%s failed to find %s/%u", __func__, 759 log_addr(kr->af, &kr->prefix), kr->prefixlen); 760 return (-1); 761 } 762 763 static void 764 kroute_clear(void) 765 { 766 struct kroute_prefix *kp; 767 struct kroute_priority *kprio; 768 struct kroute_node *kn; 769 770 while ((kp = RB_MIN(kroute_tree, &krt)) != NULL) { 771 while ((kprio = TAILQ_FIRST(&kp->priorities)) != NULL) { 772 while ((kn = TAILQ_FIRST(&kprio->nexthops)) != NULL) { 773 kr_redist_remove(&kn->r); 774 kroute_uninstall(kn); 775 TAILQ_REMOVE(&kprio->nexthops, kn, entry); 776 free(kn); 777 } 778 TAILQ_REMOVE(&kp->priorities, kprio, entry); 779 free(kprio); 780 } 781 RB_REMOVE(kroute_tree, &krt, kp); 782 free(kp); 783 } 784 } 785 786 static __inline int 787 kif_compare(struct kif_node *a, struct kif_node *b) 788 { 789 return (b->k.ifindex - a->k.ifindex); 790 } 791 792 /* tree management */ 793 static struct kif_node * 794 kif_find(unsigned short ifindex) 795 { 796 struct kif_node s; 797 798 memset(&s, 0, sizeof(s)); 799 s.k.ifindex = ifindex; 800 801 return (RB_FIND(kif_tree, &kit, &s)); 802 } 803 804 struct kif * 805 kif_findname(char *ifname) 806 { 807 struct kif_node *kif; 808 809 RB_FOREACH(kif, kif_tree, &kit) 810 if (!strcmp(ifname, kif->k.ifname)) 811 return (&kif->k); 812 813 return (NULL); 814 } 815 816 static struct kif_node * 817 kif_insert(unsigned short ifindex) 818 { 819 struct kif_node *kif; 820 821 if ((kif = calloc(1, sizeof(struct kif_node))) == NULL) 822 return (NULL); 823 824 kif->k.ifindex = ifindex; 825 TAILQ_INIT(&kif->addrs); 826 827 if (RB_INSERT(kif_tree, &kit, kif) != NULL) 828 fatalx("kif_insert: RB_INSERT"); 829 830 return (kif); 831 } 832 833 static int 834 kif_remove(struct kif_node *kif) 835 { 836 struct kif_addr *ka; 837 838 if (RB_REMOVE(kif_tree, &kit, kif) == NULL) { 839 log_warnx("RB_REMOVE(kif_tree, &kit, kif)"); 840 return (-1); 841 } 842 843 while ((ka = TAILQ_FIRST(&kif->addrs)) != NULL) { 844 main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a)); 845 TAILQ_REMOVE(&kif->addrs, ka, entry); 846 free(ka); 847 } 848 free(kif); 849 return (0); 850 } 851 852 void 853 kif_clear(void) 854 { 855 struct kif_node *kif; 856 857 while ((kif = RB_MIN(kif_tree, &kit)) != NULL) 858 kif_remove(kif); 859 } 860 861 static struct kif_node * 862 kif_update(unsigned short ifindex, int flags, struct if_data *ifd, 863 struct sockaddr_dl *sdl, int *link_old) 864 { 865 struct kif_node *kif; 866 867 if ((kif = kif_find(ifindex)) == NULL) { 868 if ((kif = kif_insert(ifindex)) == NULL) 869 return (NULL); 870 } else 871 *link_old = (kif->k.flags & IFF_UP) && 872 LINK_STATE_IS_UP(kif->k.link_state); 873 874 kif->k.flags = flags; 875 kif->k.link_state = ifd->ifi_link_state; 876 if (sdl) 877 memcpy(kif->k.mac, LLADDR(sdl), sizeof(kif->k.mac)); 878 kif->k.if_type = ifd->ifi_type; 879 kif->k.baudrate = ifd->ifi_baudrate; 880 kif->k.mtu = ifd->ifi_mtu; 881 kif->k.rdomain = ifd->ifi_rdomain; 882 883 if (sdl && sdl->sdl_family == AF_LINK) { 884 if (sdl->sdl_nlen >= sizeof(kif->k.ifname)) 885 memcpy(kif->k.ifname, sdl->sdl_data, 886 sizeof(kif->k.ifname) - 1); 887 else if (sdl->sdl_nlen > 0) 888 memcpy(kif->k.ifname, sdl->sdl_data, 889 sdl->sdl_nlen); 890 /* string already terminated via calloc() */ 891 } 892 893 return (kif); 894 } 895 896 static struct kroute_priority * 897 kroute_match(int af, union ldpd_addr *key) 898 { 899 int i, maxprefixlen; 900 struct kroute_prefix *kp; 901 struct kroute_priority *kprio; 902 union ldpd_addr addr; 903 904 switch (af) { 905 case AF_INET: 906 maxprefixlen = 32; 907 break; 908 case AF_INET6: 909 maxprefixlen = 128; 910 break; 911 default: 912 log_warnx("%s: unknown af", __func__); 913 return (NULL); 914 } 915 916 for (i = maxprefixlen; i >= 0; i--) { 917 ldp_applymask(af, &addr, key, i); 918 919 kp = kroute_find_prefix(af, &addr, i); 920 if (kp == NULL) 921 continue; 922 923 kprio = kroute_find_prio(kp, RTP_ANY); 924 if (kprio != NULL) 925 return (kprio); 926 } 927 928 return (NULL); 929 } 930 931 /* misc */ 932 static uint8_t 933 prefixlen_classful(in_addr_t ina) 934 { 935 /* it hurt to write this. */ 936 937 if (ina >= 0xf0000000U) /* class E */ 938 return (32); 939 else if (ina >= 0xe0000000U) /* class D */ 940 return (4); 941 else if (ina >= 0xc0000000U) /* class C */ 942 return (24); 943 else if (ina >= 0x80000000U) /* class B */ 944 return (16); 945 else /* class A */ 946 return (8); 947 } 948 949 #define ROUNDUP(a) \ 950 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 951 952 static void 953 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info) 954 { 955 int i; 956 957 for (i = 0; i < RTAX_MAX; i++) { 958 if (addrs & (1 << i)) { 959 rti_info[i] = sa; 960 sa = (struct sockaddr *)((char *)(sa) + 961 ROUNDUP(sa->sa_len)); 962 } else 963 rti_info[i] = NULL; 964 } 965 } 966 967 static void 968 if_change(unsigned short ifindex, int flags, struct if_data *ifd, 969 struct sockaddr_dl *sdl) 970 { 971 struct kif_node *kif; 972 struct kif_addr *ka; 973 int link_old = 0, link_new; 974 975 kif = kif_update(ifindex, flags, ifd, sdl, &link_old); 976 if (!kif) { 977 log_warn("%s: kif_update(%u)", __func__, ifindex); 978 return; 979 } 980 link_new = (kif->k.flags & IFF_UP) && 981 LINK_STATE_IS_UP(kif->k.link_state); 982 983 if (link_new == link_old) 984 return; 985 986 main_imsg_compose_ldpe(IMSG_IFSTATUS, 0, &kif->k, sizeof(struct kif)); 987 if (link_new) { 988 TAILQ_FOREACH(ka, &kif->addrs, entry) 989 main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, 990 sizeof(ka->a)); 991 } else { 992 TAILQ_FOREACH(ka, &kif->addrs, entry) 993 main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, 994 sizeof(ka->a)); 995 } 996 } 997 998 static void 999 if_newaddr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask, 1000 struct sockaddr *brd) 1001 { 1002 struct kif_node *kif; 1003 struct sockaddr_in *ifa4, *mask4, *brd4; 1004 struct sockaddr_in6 *ifa6, *mask6, *brd6; 1005 struct kif_addr *ka; 1006 1007 if (ifa == NULL) 1008 return; 1009 if ((kif = kif_find(ifindex)) == NULL) { 1010 log_warnx("%s: corresponding if %d not found", __func__, 1011 ifindex); 1012 return; 1013 } 1014 1015 switch (ifa->sa_family) { 1016 case AF_INET: 1017 ifa4 = (struct sockaddr_in *) ifa; 1018 mask4 = (struct sockaddr_in *) mask; 1019 brd4 = (struct sockaddr_in *) brd; 1020 1021 /* filter out unwanted addresses */ 1022 if (bad_addr_v4(ifa4->sin_addr)) 1023 return; 1024 1025 if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL) 1026 fatal("if_newaddr"); 1027 ka->a.addr.v4 = ifa4->sin_addr; 1028 if (mask4) 1029 ka->a.prefixlen = 1030 mask2prefixlen(mask4->sin_addr.s_addr); 1031 if (brd4) 1032 ka->a.dstbrd.v4 = brd4->sin_addr; 1033 break; 1034 case AF_INET6: 1035 ifa6 = (struct sockaddr_in6 *) ifa; 1036 mask6 = (struct sockaddr_in6 *) mask; 1037 brd6 = (struct sockaddr_in6 *) brd; 1038 1039 /* We only care about link-local and global-scope. */ 1040 if (bad_addr_v6(&ifa6->sin6_addr)) 1041 return; 1042 1043 clearscope(&ifa6->sin6_addr); 1044 1045 if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL) 1046 fatal("if_newaddr"); 1047 ka->a.addr.v6 = ifa6->sin6_addr; 1048 if (mask6) 1049 ka->a.prefixlen = mask2prefixlen6(mask6); 1050 if (brd6) 1051 ka->a.dstbrd.v6 = brd6->sin6_addr; 1052 break; 1053 default: 1054 return; 1055 } 1056 1057 ka->a.ifindex = ifindex; 1058 ka->a.af = ifa->sa_family; 1059 TAILQ_INSERT_TAIL(&kif->addrs, ka, entry); 1060 1061 /* notify ldpe about new address */ 1062 main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, sizeof(ka->a)); 1063 } 1064 1065 static void 1066 if_deladdr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask, 1067 struct sockaddr *brd) 1068 { 1069 struct kif_node *kif; 1070 struct sockaddr_in *ifa4, *mask4, *brd4; 1071 struct sockaddr_in6 *ifa6, *mask6, *brd6; 1072 struct kaddr k; 1073 struct kif_addr *ka, *nka; 1074 1075 if (ifa == NULL) 1076 return; 1077 if ((kif = kif_find(ifindex)) == NULL) { 1078 log_warnx("%s: corresponding if %d not found", __func__, 1079 ifindex); 1080 return; 1081 } 1082 1083 memset(&k, 0, sizeof(k)); 1084 k.af = ifa->sa_family; 1085 switch (ifa->sa_family) { 1086 case AF_INET: 1087 ifa4 = (struct sockaddr_in *) ifa; 1088 mask4 = (struct sockaddr_in *) mask; 1089 brd4 = (struct sockaddr_in *) brd; 1090 1091 /* filter out unwanted addresses */ 1092 if (bad_addr_v4(ifa4->sin_addr)) 1093 return; 1094 1095 k.addr.v4 = ifa4->sin_addr; 1096 if (mask4) 1097 k.prefixlen = mask2prefixlen(mask4->sin_addr.s_addr); 1098 if (brd4) 1099 k.dstbrd.v4 = brd4->sin_addr; 1100 break; 1101 case AF_INET6: 1102 ifa6 = (struct sockaddr_in6 *) ifa; 1103 mask6 = (struct sockaddr_in6 *) mask; 1104 brd6 = (struct sockaddr_in6 *) brd; 1105 1106 /* We only care about link-local and global-scope. */ 1107 if (bad_addr_v6(&ifa6->sin6_addr)) 1108 return; 1109 1110 clearscope(&ifa6->sin6_addr); 1111 1112 k.addr.v6 = ifa6->sin6_addr; 1113 if (mask6) 1114 k.prefixlen = mask2prefixlen6(mask6); 1115 if (brd6) 1116 k.dstbrd.v6 = brd6->sin6_addr; 1117 break; 1118 default: 1119 return; 1120 } 1121 1122 for (ka = TAILQ_FIRST(&kif->addrs); ka != NULL; ka = nka) { 1123 nka = TAILQ_NEXT(ka, entry); 1124 1125 if (ka->a.af != k.af || 1126 ka->a.prefixlen != k.prefixlen || 1127 ldp_addrcmp(ka->a.af, &ka->a.addr, &k.addr)) 1128 continue; 1129 1130 /* notify ldpe about removed address */ 1131 main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a)); 1132 TAILQ_REMOVE(&kif->addrs, ka, entry); 1133 free(ka); 1134 return; 1135 } 1136 } 1137 1138 static void 1139 if_announce(void *msg) 1140 { 1141 struct if_announcemsghdr *ifan; 1142 struct kif_node *kif; 1143 1144 ifan = msg; 1145 1146 switch (ifan->ifan_what) { 1147 case IFAN_ARRIVAL: 1148 kif = kif_insert(ifan->ifan_index); 1149 if (kif) 1150 strlcpy(kif->k.ifname, ifan->ifan_name, 1151 sizeof(kif->k.ifname)); 1152 break; 1153 case IFAN_DEPARTURE: 1154 kif = kif_find(ifan->ifan_index); 1155 if (kif) 1156 kif_remove(kif); 1157 break; 1158 } 1159 } 1160 1161 /* rtsock */ 1162 static int 1163 send_rtmsg(int fd, int action, struct kroute *kr, int family) 1164 { 1165 switch (kr->af) { 1166 case AF_INET: 1167 return (send_rtmsg_v4(fd, action, kr, family)); 1168 case AF_INET6: 1169 return (send_rtmsg_v6(fd, action, kr, family)); 1170 default: 1171 fatalx("send_rtmsg: unknown af"); 1172 } 1173 } 1174 1175 static int 1176 send_rtmsg_v4(int fd, int action, struct kroute *kr, int family) 1177 { 1178 struct iovec iov[5]; 1179 struct rt_msghdr hdr; 1180 struct sockaddr_mpls label_in, label_out; 1181 struct sockaddr_in dst, mask, nexthop; 1182 int iovcnt = 0; 1183 1184 if (kr_state.fib_sync == 0) 1185 return (0); 1186 1187 /* 1188 * Reserved labels (implicit and explicit NULL) should not be added 1189 * to the FIB. 1190 */ 1191 if (family == AF_MPLS && kr->local_label < MPLS_LABEL_RESERVED_MAX) 1192 return (0); 1193 1194 /* initialize header */ 1195 memset(&hdr, 0, sizeof(hdr)); 1196 hdr.rtm_version = RTM_VERSION; 1197 1198 hdr.rtm_type = action; 1199 hdr.rtm_flags = RTF_UP; 1200 hdr.rtm_fmask = RTF_MPLS; 1201 hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */ 1202 hdr.rtm_msglen = sizeof(hdr); 1203 hdr.rtm_hdrlen = sizeof(struct rt_msghdr); 1204 hdr.rtm_priority = kr->priority; 1205 hdr.rtm_tableid = kr_state.rdomain; /* rtableid */ 1206 /* adjust iovec */ 1207 iov[iovcnt].iov_base = &hdr; 1208 iov[iovcnt++].iov_len = sizeof(hdr); 1209 1210 if (family == AF_MPLS) { 1211 memset(&label_in, 0, sizeof(label_in)); 1212 label_in.smpls_len = sizeof(label_in); 1213 label_in.smpls_family = AF_MPLS; 1214 label_in.smpls_label = 1215 htonl(kr->local_label << MPLS_LABEL_OFFSET); 1216 /* adjust header */ 1217 hdr.rtm_flags |= RTF_MPLS | RTF_MPATH; 1218 hdr.rtm_addrs |= RTA_DST; 1219 hdr.rtm_msglen += sizeof(label_in); 1220 /* adjust iovec */ 1221 iov[iovcnt].iov_base = &label_in; 1222 iov[iovcnt++].iov_len = sizeof(label_in); 1223 } else { 1224 memset(&dst, 0, sizeof(dst)); 1225 dst.sin_len = sizeof(dst); 1226 dst.sin_family = AF_INET; 1227 dst.sin_addr = kr->prefix.v4; 1228 /* adjust header */ 1229 hdr.rtm_addrs |= RTA_DST; 1230 hdr.rtm_msglen += sizeof(dst); 1231 /* adjust iovec */ 1232 iov[iovcnt].iov_base = &dst; 1233 iov[iovcnt++].iov_len = sizeof(dst); 1234 } 1235 1236 memset(&nexthop, 0, sizeof(nexthop)); 1237 nexthop.sin_len = sizeof(nexthop); 1238 nexthop.sin_family = AF_INET; 1239 nexthop.sin_addr = kr->nexthop.v4; 1240 /* adjust header */ 1241 hdr.rtm_flags |= RTF_GATEWAY; 1242 hdr.rtm_addrs |= RTA_GATEWAY; 1243 hdr.rtm_msglen += sizeof(nexthop); 1244 /* adjust iovec */ 1245 iov[iovcnt].iov_base = &nexthop; 1246 iov[iovcnt++].iov_len = sizeof(nexthop); 1247 1248 if (family == AF_INET) { 1249 memset(&mask, 0, sizeof(mask)); 1250 mask.sin_len = sizeof(mask); 1251 mask.sin_family = AF_INET; 1252 mask.sin_addr.s_addr = prefixlen2mask(kr->prefixlen); 1253 /* adjust header */ 1254 hdr.rtm_addrs |= RTA_NETMASK; 1255 hdr.rtm_msglen += sizeof(mask); 1256 /* adjust iovec */ 1257 iov[iovcnt].iov_base = &mask; 1258 iov[iovcnt++].iov_len = sizeof(mask); 1259 } 1260 1261 /* If action is RTM_DELETE we have to get rid of MPLS infos */ 1262 if (kr->remote_label != NO_LABEL && action != RTM_DELETE) { 1263 memset(&label_out, 0, sizeof(label_out)); 1264 label_out.smpls_len = sizeof(label_out); 1265 label_out.smpls_family = AF_MPLS; 1266 label_out.smpls_label = 1267 htonl(kr->remote_label << MPLS_LABEL_OFFSET); 1268 /* adjust header */ 1269 hdr.rtm_addrs |= RTA_SRC; 1270 hdr.rtm_flags |= RTF_MPLS; 1271 hdr.rtm_msglen += sizeof(label_out); 1272 /* adjust iovec */ 1273 iov[iovcnt].iov_base = &label_out; 1274 iov[iovcnt++].iov_len = sizeof(label_out); 1275 1276 if (kr->remote_label == MPLS_LABEL_IMPLNULL) { 1277 if (family == AF_MPLS) 1278 hdr.rtm_mpls = MPLS_OP_POP; 1279 else 1280 return (0); 1281 } else { 1282 if (family == AF_MPLS) 1283 hdr.rtm_mpls = MPLS_OP_SWAP; 1284 else 1285 hdr.rtm_mpls = MPLS_OP_PUSH; 1286 } 1287 } 1288 1289 retry: 1290 if (writev(fd, iov, iovcnt) == -1) { 1291 if (errno == ESRCH) { 1292 if (hdr.rtm_type == RTM_CHANGE && family == AF_MPLS) { 1293 hdr.rtm_type = RTM_ADD; 1294 goto retry; 1295 } else if (hdr.rtm_type == RTM_DELETE) { 1296 log_info("route %s/%u vanished before delete", 1297 inet_ntoa(kr->prefix.v4), kr->prefixlen); 1298 return (-1); 1299 } 1300 } 1301 log_warn("%s action %u, af %s, prefix %s/%u", __func__, 1302 hdr.rtm_type, af_name(family), inet_ntoa(kr->prefix.v4), 1303 kr->prefixlen); 1304 return (-1); 1305 } 1306 1307 return (0); 1308 } 1309 1310 static int 1311 send_rtmsg_v6(int fd, int action, struct kroute *kr, int family) 1312 { 1313 struct iovec iov[5]; 1314 struct rt_msghdr hdr; 1315 struct sockaddr_mpls label_in, label_out; 1316 struct sockaddr_in6 dst, mask, nexthop; 1317 int iovcnt = 0; 1318 1319 if (kr_state.fib_sync == 0) 1320 return (0); 1321 1322 /* 1323 * Reserved labels (implicit and explicit NULL) should not be added 1324 * to the FIB. 1325 */ 1326 if (family == AF_MPLS && kr->local_label < MPLS_LABEL_RESERVED_MAX) 1327 return (0); 1328 1329 /* initialize header */ 1330 memset(&hdr, 0, sizeof(hdr)); 1331 hdr.rtm_version = RTM_VERSION; 1332 1333 hdr.rtm_type = action; 1334 hdr.rtm_flags = RTF_UP; 1335 hdr.rtm_fmask = RTF_MPLS; 1336 hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */ 1337 hdr.rtm_msglen = sizeof(hdr); 1338 hdr.rtm_hdrlen = sizeof(struct rt_msghdr); 1339 hdr.rtm_priority = kr->priority; 1340 hdr.rtm_tableid = kr_state.rdomain; /* rtableid */ 1341 /* adjust iovec */ 1342 iov[iovcnt].iov_base = &hdr; 1343 iov[iovcnt++].iov_len = sizeof(hdr); 1344 1345 if (family == AF_MPLS) { 1346 memset(&label_in, 0, sizeof(label_in)); 1347 label_in.smpls_len = sizeof(label_in); 1348 label_in.smpls_family = AF_MPLS; 1349 label_in.smpls_label = 1350 htonl(kr->local_label << MPLS_LABEL_OFFSET); 1351 /* adjust header */ 1352 hdr.rtm_flags |= RTF_MPLS | RTF_MPATH; 1353 hdr.rtm_addrs |= RTA_DST; 1354 hdr.rtm_msglen += sizeof(label_in); 1355 /* adjust iovec */ 1356 iov[iovcnt].iov_base = &label_in; 1357 iov[iovcnt++].iov_len = sizeof(label_in); 1358 } else { 1359 memset(&dst, 0, sizeof(dst)); 1360 dst.sin6_len = sizeof(dst); 1361 dst.sin6_family = AF_INET6; 1362 dst.sin6_addr = kr->prefix.v6; 1363 /* adjust header */ 1364 hdr.rtm_addrs |= RTA_DST; 1365 hdr.rtm_msglen += ROUNDUP(sizeof(dst)); 1366 /* adjust iovec */ 1367 iov[iovcnt].iov_base = &dst; 1368 iov[iovcnt++].iov_len = ROUNDUP(sizeof(dst)); 1369 } 1370 1371 memset(&nexthop, 0, sizeof(nexthop)); 1372 nexthop.sin6_len = sizeof(nexthop); 1373 nexthop.sin6_family = AF_INET6; 1374 nexthop.sin6_addr = kr->nexthop.v6; 1375 nexthop.sin6_scope_id = kr->ifindex; 1376 /* 1377 * XXX we should set the sin6_scope_id but the kernel 1378 * XXX does not expect it that way. It must be fiddled 1379 * XXX into the sin6_addr. Welcome to the typical 1380 * XXX IPv6 insanity and all without wine bottles. 1381 */ 1382 embedscope(&nexthop); 1383 1384 /* adjust header */ 1385 hdr.rtm_flags |= RTF_GATEWAY; 1386 hdr.rtm_addrs |= RTA_GATEWAY; 1387 hdr.rtm_msglen += ROUNDUP(sizeof(nexthop)); 1388 /* adjust iovec */ 1389 iov[iovcnt].iov_base = &nexthop; 1390 iov[iovcnt++].iov_len = ROUNDUP(sizeof(nexthop)); 1391 1392 if (family == AF_INET6) { 1393 memset(&mask, 0, sizeof(mask)); 1394 mask.sin6_len = sizeof(mask); 1395 mask.sin6_family = AF_INET6; 1396 mask.sin6_addr = *prefixlen2mask6(kr->prefixlen); 1397 /* adjust header */ 1398 if (kr->prefixlen == 128) 1399 hdr.rtm_flags |= RTF_HOST; 1400 hdr.rtm_addrs |= RTA_NETMASK; 1401 hdr.rtm_msglen += ROUNDUP(sizeof(mask)); 1402 /* adjust iovec */ 1403 iov[iovcnt].iov_base = &mask; 1404 iov[iovcnt++].iov_len = ROUNDUP(sizeof(mask)); 1405 } 1406 1407 /* If action is RTM_DELETE we have to get rid of MPLS infos */ 1408 if (kr->remote_label != NO_LABEL && action != RTM_DELETE) { 1409 memset(&label_out, 0, sizeof(label_out)); 1410 label_out.smpls_len = sizeof(label_out); 1411 label_out.smpls_family = AF_MPLS; 1412 label_out.smpls_label = 1413 htonl(kr->remote_label << MPLS_LABEL_OFFSET); 1414 /* adjust header */ 1415 hdr.rtm_addrs |= RTA_SRC; 1416 hdr.rtm_flags |= RTF_MPLS; 1417 hdr.rtm_msglen += sizeof(label_out); 1418 /* adjust iovec */ 1419 iov[iovcnt].iov_base = &label_out; 1420 iov[iovcnt++].iov_len = sizeof(label_out); 1421 1422 if (kr->remote_label == MPLS_LABEL_IMPLNULL) { 1423 if (family == AF_MPLS) 1424 hdr.rtm_mpls = MPLS_OP_POP; 1425 else 1426 return (0); 1427 } else { 1428 if (family == AF_MPLS) 1429 hdr.rtm_mpls = MPLS_OP_SWAP; 1430 else 1431 hdr.rtm_mpls = MPLS_OP_PUSH; 1432 } 1433 } 1434 1435 retry: 1436 if (writev(fd, iov, iovcnt) == -1) { 1437 if (errno == ESRCH) { 1438 if (hdr.rtm_type == RTM_CHANGE && family == AF_MPLS) { 1439 hdr.rtm_type = RTM_ADD; 1440 goto retry; 1441 } else if (hdr.rtm_type == RTM_DELETE) { 1442 log_info("route %s/%u vanished before delete", 1443 log_addr(kr->af, &kr->prefix), 1444 kr->prefixlen); 1445 return (-1); 1446 } 1447 } 1448 log_warn("%s action %u, af %s, prefix %s/%u", __func__, 1449 hdr.rtm_type, af_name(family), log_addr(kr->af, 1450 &kr->prefix), kr->prefixlen); 1451 return (-1); 1452 } 1453 return (0); 1454 } 1455 1456 static int 1457 fetchtable(void) 1458 { 1459 size_t len; 1460 int mib[7]; 1461 char *buf; 1462 int rv; 1463 1464 mib[0] = CTL_NET; 1465 mib[1] = PF_ROUTE; 1466 mib[2] = 0; 1467 mib[3] = 0; 1468 mib[4] = NET_RT_DUMP; 1469 mib[5] = 0; 1470 mib[6] = kr_state.rdomain; /* rtableid */ 1471 1472 if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) { 1473 log_warn("sysctl"); 1474 return (-1); 1475 } 1476 if ((buf = malloc(len)) == NULL) { 1477 log_warn(__func__); 1478 return (-1); 1479 } 1480 if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) { 1481 log_warn("sysctl"); 1482 free(buf); 1483 return (-1); 1484 } 1485 1486 rv = rtmsg_process(buf, len); 1487 free(buf); 1488 1489 return (rv); 1490 } 1491 1492 static int 1493 fetchifs(void) 1494 { 1495 size_t len; 1496 int mib[6]; 1497 char *buf; 1498 int rv; 1499 1500 mib[0] = CTL_NET; 1501 mib[1] = PF_ROUTE; 1502 mib[2] = 0; 1503 mib[3] = 0; /* wildcard */ 1504 mib[4] = NET_RT_IFLIST; 1505 mib[5] = 0; 1506 1507 if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) { 1508 log_warn("sysctl"); 1509 return (-1); 1510 } 1511 if ((buf = malloc(len)) == NULL) { 1512 log_warn(__func__); 1513 return (-1); 1514 } 1515 if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) { 1516 log_warn("sysctl"); 1517 free(buf); 1518 return (-1); 1519 } 1520 1521 rv = rtmsg_process(buf, len); 1522 free(buf); 1523 1524 return (rv); 1525 } 1526 1527 static int 1528 dispatch_rtmsg(void) 1529 { 1530 char buf[RT_BUF_SIZE]; 1531 ssize_t n; 1532 1533 if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) { 1534 if (errno == EAGAIN || errno == EINTR) 1535 return (0); 1536 log_warn("%s: read error", __func__); 1537 return (-1); 1538 } 1539 1540 if (n == 0) { 1541 log_warnx("routing socket closed"); 1542 return (-1); 1543 } 1544 1545 return (rtmsg_process(buf, n)); 1546 } 1547 1548 static int 1549 rtmsg_process(char *buf, size_t len) 1550 { 1551 struct rt_msghdr *rtm; 1552 struct if_msghdr ifm; 1553 struct ifa_msghdr *ifam; 1554 struct sockaddr *sa, *rti_info[RTAX_MAX]; 1555 size_t offset; 1556 char *next; 1557 1558 for (offset = 0; offset < len; offset += rtm->rtm_msglen) { 1559 next = buf + offset; 1560 rtm = (struct rt_msghdr *)next; 1561 if (len < offset + sizeof(unsigned short) || 1562 len < offset + rtm->rtm_msglen) 1563 fatalx("rtmsg_process: partial rtm in buffer"); 1564 if (rtm->rtm_version != RTM_VERSION) 1565 continue; 1566 1567 sa = (struct sockaddr *)(next + rtm->rtm_hdrlen); 1568 get_rtaddrs(rtm->rtm_addrs, sa, rti_info); 1569 1570 switch (rtm->rtm_type) { 1571 case RTM_ADD: 1572 case RTM_GET: 1573 case RTM_CHANGE: 1574 case RTM_DELETE: 1575 if (rtm->rtm_errno) /* failed attempts... */ 1576 continue; 1577 1578 if (rtm->rtm_tableid != kr_state.rdomain) 1579 continue; 1580 1581 if (rtm->rtm_type == RTM_GET && 1582 rtm->rtm_pid != kr_state.pid) 1583 continue; 1584 1585 /* Skip ARP/ND cache and broadcast routes. */ 1586 if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST)) 1587 continue; 1588 1589 /* LDP should follow the IGP and ignore BGP routes */ 1590 if (rtm->rtm_priority == RTP_BGP) 1591 continue; 1592 1593 if (rtmsg_process_route(rtm, rti_info) == -1) 1594 return (-1); 1595 } 1596 1597 switch (rtm->rtm_type) { 1598 case RTM_IFINFO: 1599 memcpy(&ifm, next, sizeof(ifm)); 1600 if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data, 1601 (struct sockaddr_dl *)rti_info[RTAX_IFP]); 1602 break; 1603 case RTM_NEWADDR: 1604 ifam = (struct ifa_msghdr *)rtm; 1605 if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA | 1606 RTA_BRD)) == 0) 1607 break; 1608 1609 if_newaddr(ifam->ifam_index, 1610 (struct sockaddr *)rti_info[RTAX_IFA], 1611 (struct sockaddr *)rti_info[RTAX_NETMASK], 1612 (struct sockaddr *)rti_info[RTAX_BRD]); 1613 break; 1614 case RTM_DELADDR: 1615 ifam = (struct ifa_msghdr *)rtm; 1616 if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA | 1617 RTA_BRD)) == 0) 1618 break; 1619 1620 if_deladdr(ifam->ifam_index, 1621 (struct sockaddr *)rti_info[RTAX_IFA], 1622 (struct sockaddr *)rti_info[RTAX_NETMASK], 1623 (struct sockaddr *)rti_info[RTAX_BRD]); 1624 break; 1625 case RTM_IFANNOUNCE: 1626 if_announce(next); 1627 break; 1628 default: 1629 /* ignore for now */ 1630 break; 1631 } 1632 } 1633 1634 return (offset); 1635 } 1636 1637 static int 1638 rtmsg_process_route(struct rt_msghdr *rtm, struct sockaddr *rti_info[RTAX_MAX]) 1639 { 1640 struct sockaddr *sa; 1641 struct sockaddr_in *sa_in; 1642 struct sockaddr_in6 *sa_in6; 1643 struct kroute kr; 1644 struct kroute_prefix *kp; 1645 struct kroute_priority *kprio; 1646 struct kroute_node *kn; 1647 1648 if ((sa = rti_info[RTAX_DST]) == NULL) 1649 return (-1); 1650 1651 memset(&kr, 0, sizeof(kr)); 1652 kr.af = sa->sa_family; 1653 switch (kr.af) { 1654 case AF_INET: 1655 kr.prefix.v4 = ((struct sockaddr_in *)sa)->sin_addr; 1656 sa_in = (struct sockaddr_in *) rti_info[RTAX_NETMASK]; 1657 if (sa_in != NULL && sa_in->sin_len != 0) 1658 kr.prefixlen = mask2prefixlen(sa_in->sin_addr.s_addr); 1659 else if (rtm->rtm_flags & RTF_HOST) 1660 kr.prefixlen = 32; 1661 else if (kr.prefix.v4.s_addr == INADDR_ANY) 1662 kr.prefixlen = 0; 1663 else 1664 kr.prefixlen = prefixlen_classful(kr.prefix.v4.s_addr); 1665 break; 1666 case AF_INET6: 1667 kr.prefix.v6 = ((struct sockaddr_in6 *)sa)->sin6_addr; 1668 sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK]; 1669 if (sa_in6 != NULL && sa_in6->sin6_len != 0) 1670 kr.prefixlen = mask2prefixlen6(sa_in6); 1671 else if (rtm->rtm_flags & RTF_HOST) 1672 kr.prefixlen = 128; 1673 else if (IN6_IS_ADDR_UNSPECIFIED(&kr.prefix.v6)) 1674 kr.prefixlen = 0; 1675 else 1676 fatalx("in6 net addr without netmask"); 1677 break; 1678 default: 1679 return (0); 1680 } 1681 kr.ifindex = rtm->rtm_index; 1682 if ((sa = rti_info[RTAX_GATEWAY]) != NULL) { 1683 switch (sa->sa_family) { 1684 case AF_INET: 1685 kr.nexthop.v4 = ((struct sockaddr_in *)sa)->sin_addr; 1686 break; 1687 case AF_INET6: 1688 sa_in6 = (struct sockaddr_in6 *)sa; 1689 recoverscope(sa_in6); 1690 kr.nexthop.v6 = sa_in6->sin6_addr; 1691 if (sa_in6->sin6_scope_id) 1692 kr.ifindex = sa_in6->sin6_scope_id; 1693 break; 1694 case AF_LINK: 1695 kr.flags |= F_CONNECTED; 1696 break; 1697 } 1698 } 1699 1700 if (rtm->rtm_flags & RTF_STATIC) 1701 kr.flags |= F_STATIC; 1702 if (rtm->rtm_flags & RTF_BLACKHOLE) 1703 kr.flags |= F_BLACKHOLE; 1704 if (rtm->rtm_flags & RTF_REJECT) 1705 kr.flags |= F_REJECT; 1706 if (rtm->rtm_flags & RTF_DYNAMIC) 1707 kr.flags |= F_DYNAMIC; 1708 /* routes attached to connected or loopback interfaces */ 1709 if (rtm->rtm_flags & RTF_CONNECTED || 1710 ldp_addrcmp(kr.af, &kr.prefix, &kr.nexthop) == 0) 1711 kr.flags |= F_CONNECTED; 1712 kr.priority = rtm->rtm_priority; 1713 1714 if (rtm->rtm_type == RTM_CHANGE) { 1715 /* 1716 * The kernel doesn't allow RTM_CHANGE for multipath routes. 1717 * If we got this message we know that the route has only one 1718 * nexthop and we should remove it before installing the same 1719 * route with the new nexthop. 1720 */ 1721 kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen); 1722 if (kp) { 1723 kprio = kroute_find_prio(kp, kr.priority); 1724 if (kprio) { 1725 kn = TAILQ_FIRST(&kprio->nexthops); 1726 if (kn) 1727 kroute_remove(&kn->r); 1728 } 1729 } 1730 } 1731 1732 kn = NULL; 1733 kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen); 1734 if (kp) { 1735 kprio = kroute_find_prio(kp, kr.priority); 1736 if (kprio) 1737 kn = kroute_find_gw(kprio, &kr.nexthop); 1738 } 1739 1740 if (rtm->rtm_type == RTM_DELETE) { 1741 if (kn == NULL) 1742 return (0); 1743 return (kroute_remove(&kr)); 1744 } 1745 1746 if (!ldp_addrisset(kr.af, &kr.nexthop) && !(kr.flags & F_CONNECTED)) { 1747 log_warnx("%s: no nexthop for %s/%u", __func__, 1748 log_addr(kr.af, &kr.prefix), kr.prefixlen); 1749 return (-1); 1750 } 1751 1752 if (kn != NULL) { 1753 /* update route */ 1754 kn->r = kr; 1755 kr_redistribute(kp); 1756 } else { 1757 kr.local_label = NO_LABEL; 1758 kr.remote_label = NO_LABEL; 1759 kroute_insert(&kr); 1760 } 1761 1762 return (0); 1763 } 1764 1765 int 1766 kmpw_set(struct kpw *kpw) 1767 { 1768 struct kif_node *kif; 1769 1770 kif = kif_find(kpw->ifindex); 1771 if (kif == NULL) { 1772 log_warnx("%s: failed to find mpw by index (%u)", __func__, 1773 kpw->ifindex); 1774 return (-1); 1775 } 1776 1777 if (kif->kpw == NULL) 1778 kif->kpw = malloc(sizeof(*kif->kpw)); 1779 *kif->kpw = *kpw; 1780 1781 return (kmpw_install(kif->k.ifname, kpw)); 1782 } 1783 1784 int 1785 kmpw_unset(struct kpw *kpw) 1786 { 1787 struct kif_node *kif; 1788 1789 kif = kif_find(kpw->ifindex); 1790 if (kif == NULL) { 1791 log_warnx("%s: failed to find mpw by index (%u)", __func__, 1792 kpw->ifindex); 1793 return (-1); 1794 } 1795 1796 if (kif->kpw == NULL) { 1797 log_warnx("%s: %s is not set", __func__, kif->k.ifname); 1798 return (-1); 1799 } 1800 1801 free(kif->kpw); 1802 kif->kpw = NULL; 1803 return (kmpw_uninstall(kif->k.ifname)); 1804 } 1805 1806 static int 1807 kmpw_install(const char *ifname, struct kpw *kpw) 1808 { 1809 struct ifreq ifr; 1810 struct ifmpwreq imr; 1811 1812 memset(&imr, 0, sizeof(imr)); 1813 switch (kpw->pw_type) { 1814 case PW_TYPE_ETHERNET: 1815 imr.imr_type = IMR_TYPE_ETHERNET; 1816 break; 1817 case PW_TYPE_ETHERNET_TAGGED: 1818 imr.imr_type = IMR_TYPE_ETHERNET_TAGGED; 1819 break; 1820 default: 1821 log_warnx("%s: unhandled pseudowire type (%#X)", __func__, 1822 kpw->pw_type); 1823 return (-1); 1824 } 1825 1826 if (kpw->flags & F_PW_CWORD) 1827 imr.imr_flags |= IMR_FLAG_CONTROLWORD; 1828 1829 memcpy(&imr.imr_nexthop, addr2sa(kpw->af, &kpw->nexthop, 0), 1830 sizeof(imr.imr_nexthop)); 1831 1832 imr.imr_lshim.shim_label = kpw->local_label; 1833 imr.imr_rshim.shim_label = kpw->remote_label; 1834 1835 memset(&ifr, 0, sizeof(ifr)); 1836 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 1837 ifr.ifr_data = (caddr_t) &imr; 1838 if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr) == -1) { 1839 log_warn("ioctl SIOCSETMPWCFG"); 1840 return (-1); 1841 } 1842 1843 return (0); 1844 } 1845 1846 static int 1847 kmpw_uninstall(const char *ifname) 1848 { 1849 struct ifreq ifr; 1850 struct ifmpwreq imr; 1851 1852 memset(&ifr, 0, sizeof(ifr)); 1853 memset(&imr, 0, sizeof(imr)); 1854 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 1855 ifr.ifr_data = (caddr_t) &imr; 1856 if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr) == -1) { 1857 log_warn("ioctl SIOCSETMPWCFG"); 1858 return (-1); 1859 } 1860 1861 return (0); 1862 } 1863 1864 int 1865 kmpw_find(const char *ifname) 1866 { 1867 struct ifreq ifr; 1868 1869 memset(&ifr, 0, sizeof(ifr)); 1870 if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >= 1871 sizeof(ifr.ifr_name)) { 1872 errno = ENAMETOOLONG; 1873 return (-1); 1874 } 1875 1876 if (ioctl(kr_state.ioctl_fd, SIOCGPWE3, &ifr) == -1) 1877 return (-1); 1878 1879 if (ifr.ifr_pwe3 != IF_PWE3_ETHERNET) { 1880 errno = EPFNOSUPPORT; 1881 return (-1); 1882 } 1883 1884 return (0); 1885 } 1886