1 /* $OpenBSD: rde.c,v 1.321 2012/09/18 10:10:00 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/socket.h> 21 #include <sys/time.h> 22 #include <sys/resource.h> 23 24 #include <errno.h> 25 #include <ifaddrs.h> 26 #include <pwd.h> 27 #include <poll.h> 28 #include <signal.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "bgpd.h" 35 #include "mrt.h" 36 #include "rde.h" 37 #include "session.h" 38 39 #define PFD_PIPE_MAIN 0 40 #define PFD_PIPE_SESSION 1 41 #define PFD_PIPE_SESSION_CTL 2 42 #define PFD_PIPE_COUNT 3 43 44 void rde_sighdlr(int); 45 void rde_dispatch_imsg_session(struct imsgbuf *); 46 void rde_dispatch_imsg_parent(struct imsgbuf *); 47 int rde_update_dispatch(struct imsg *); 48 void rde_update_update(struct rde_peer *, struct rde_aspath *, 49 struct bgpd_addr *, u_int8_t); 50 void rde_update_withdraw(struct rde_peer *, struct bgpd_addr *, 51 u_int8_t); 52 int rde_attr_parse(u_char *, u_int16_t, struct rde_peer *, 53 struct rde_aspath *, struct mpattr *); 54 int rde_attr_add(struct rde_aspath *, u_char *, u_int16_t); 55 u_int8_t rde_attr_missing(struct rde_aspath *, int, u_int16_t); 56 int rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t, 57 struct rde_aspath *); 58 int rde_update_extract_prefix(u_char *, u_int16_t, void *, 59 u_int8_t, u_int8_t); 60 int rde_update_get_prefix(u_char *, u_int16_t, struct bgpd_addr *, 61 u_int8_t *); 62 int rde_update_get_prefix6(u_char *, u_int16_t, struct bgpd_addr *, 63 u_int8_t *); 64 int rde_update_get_vpn4(u_char *, u_int16_t, struct bgpd_addr *, 65 u_int8_t *); 66 void rde_update_err(struct rde_peer *, u_int8_t , u_int8_t, 67 void *, u_int16_t); 68 void rde_update_log(const char *, u_int16_t, 69 const struct rde_peer *, const struct bgpd_addr *, 70 const struct bgpd_addr *, u_int8_t); 71 void rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *); 72 void rde_reflector(struct rde_peer *, struct rde_aspath *); 73 74 void rde_dump_rib_as(struct prefix *, struct rde_aspath *,pid_t, 75 int); 76 void rde_dump_filter(struct prefix *, 77 struct ctl_show_rib_request *); 78 void rde_dump_filterout(struct rde_peer *, struct prefix *, 79 struct ctl_show_rib_request *); 80 void rde_dump_upcall(struct rib_entry *, void *); 81 void rde_dump_prefix_upcall(struct rib_entry *, void *); 82 void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, 83 enum imsg_type); 84 void rde_dump_mrt_new(struct mrt *, pid_t, int); 85 void rde_dump_done(void *); 86 87 int rde_rdomain_import(struct rde_aspath *, struct rdomain *); 88 void rde_up_dump_upcall(struct rib_entry *, void *); 89 void rde_softreconfig_out(struct rib_entry *, void *); 90 void rde_softreconfig_in(struct rib_entry *, void *); 91 void rde_softreconfig_load(struct rib_entry *, void *); 92 void rde_softreconfig_load_peer(struct rib_entry *, void *); 93 void rde_softreconfig_unload_peer(struct rib_entry *, void *); 94 void rde_update_queue_runner(void); 95 void rde_update6_queue_runner(u_int8_t); 96 97 void peer_init(u_int32_t); 98 void peer_shutdown(void); 99 void peer_localaddrs(struct rde_peer *, struct bgpd_addr *); 100 struct rde_peer *peer_add(u_int32_t, struct peer_config *); 101 struct rde_peer *peer_get(u_int32_t); 102 void peer_up(u_int32_t, struct session_up *); 103 void peer_down(u_int32_t); 104 void peer_flush(struct rde_peer *, u_int8_t); 105 void peer_stale(u_int32_t, u_int8_t); 106 void peer_recv_eor(struct rde_peer *, u_int8_t); 107 void peer_dump(u_int32_t, u_int8_t); 108 void peer_send_eor(struct rde_peer *, u_int8_t); 109 110 void network_add(struct network_config *, int); 111 void network_delete(struct network_config *, int); 112 void network_dump_upcall(struct rib_entry *, void *); 113 114 void rde_shutdown(void); 115 int sa_cmp(struct bgpd_addr *, struct sockaddr *); 116 117 volatile sig_atomic_t rde_quit = 0; 118 struct bgpd_config *conf, *nconf; 119 time_t reloadtime; 120 struct rde_peer_head peerlist; 121 struct rde_peer *peerself; 122 struct filter_head *rules_l, *newrules; 123 struct rdomain_head *rdomains_l, *newdomains; 124 struct imsgbuf *ibuf_se; 125 struct imsgbuf *ibuf_se_ctl; 126 struct imsgbuf *ibuf_main; 127 struct rde_memstats rdemem; 128 129 struct rde_dump_ctx { 130 struct rib_context ribctx; 131 struct ctl_show_rib_request req; 132 sa_family_t af; 133 }; 134 135 struct rde_mrt_ctx { 136 struct mrt mrt; 137 struct rib_context ribctx; 138 LIST_ENTRY(rde_mrt_ctx) entry; 139 }; 140 141 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts); 142 u_int rde_mrt_cnt; 143 144 void 145 rde_sighdlr(int sig) 146 { 147 switch (sig) { 148 case SIGINT: 149 case SIGTERM: 150 rde_quit = 1; 151 break; 152 } 153 } 154 155 u_int32_t peerhashsize = 64; 156 u_int32_t pathhashsize = 1024; 157 u_int32_t attrhashsize = 512; 158 u_int32_t nexthophashsize = 64; 159 160 pid_t 161 rde_main(int pipe_m2r[2], int pipe_s2r[2], int pipe_m2s[2], int pipe_s2rctl[2], 162 int debug) 163 { 164 pid_t pid; 165 struct passwd *pw; 166 struct pollfd *pfd = NULL; 167 struct rde_mrt_ctx *mctx, *xmctx; 168 void *newp; 169 u_int pfd_elms = 0, i, j; 170 int timeout; 171 u_int8_t aid; 172 173 switch (pid = fork()) { 174 case -1: 175 fatal("cannot fork"); 176 case 0: 177 break; 178 default: 179 return (pid); 180 } 181 182 if ((pw = getpwnam(BGPD_USER)) == NULL) 183 fatal("getpwnam"); 184 185 if (chroot(pw->pw_dir) == -1) 186 fatal("chroot"); 187 if (chdir("/") == -1) 188 fatal("chdir(\"/\")"); 189 190 setproctitle("route decision engine"); 191 bgpd_process = PROC_RDE; 192 193 if (setgroups(1, &pw->pw_gid) || 194 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 195 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 196 fatal("can't drop privileges"); 197 198 signal(SIGTERM, rde_sighdlr); 199 signal(SIGINT, rde_sighdlr); 200 signal(SIGPIPE, SIG_IGN); 201 signal(SIGHUP, SIG_IGN); 202 signal(SIGALRM, SIG_IGN); 203 signal(SIGUSR1, SIG_IGN); 204 205 close(pipe_s2r[0]); 206 close(pipe_s2rctl[0]); 207 close(pipe_m2r[0]); 208 close(pipe_m2s[0]); 209 close(pipe_m2s[1]); 210 211 /* initialize the RIB structures */ 212 if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL || 213 (ibuf_se_ctl = malloc(sizeof(struct imsgbuf))) == NULL || 214 (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 215 fatal(NULL); 216 imsg_init(ibuf_se, pipe_s2r[1]); 217 imsg_init(ibuf_se_ctl, pipe_s2rctl[1]); 218 imsg_init(ibuf_main, pipe_m2r[1]); 219 220 pt_init(); 221 path_init(pathhashsize); 222 aspath_init(pathhashsize); 223 attr_init(attrhashsize); 224 nexthop_init(nexthophashsize); 225 peer_init(peerhashsize); 226 227 rules_l = calloc(1, sizeof(struct filter_head)); 228 if (rules_l == NULL) 229 fatal(NULL); 230 TAILQ_INIT(rules_l); 231 rdomains_l = calloc(1, sizeof(struct rdomain_head)); 232 if (rdomains_l == NULL) 233 fatal(NULL); 234 SIMPLEQ_INIT(rdomains_l); 235 if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) 236 fatal(NULL); 237 log_info("route decision engine ready"); 238 239 while (rde_quit == 0) { 240 if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) { 241 if ((newp = realloc(pfd, sizeof(struct pollfd) * 242 (PFD_PIPE_COUNT + rde_mrt_cnt))) == NULL) { 243 /* panic for now */ 244 log_warn("could not resize pfd from %u -> %u" 245 " entries", pfd_elms, PFD_PIPE_COUNT + 246 rde_mrt_cnt); 247 fatalx("exiting"); 248 } 249 pfd = newp; 250 pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt; 251 } 252 timeout = INFTIM; 253 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 254 pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd; 255 pfd[PFD_PIPE_MAIN].events = POLLIN; 256 if (ibuf_main->w.queued > 0) 257 pfd[PFD_PIPE_MAIN].events |= POLLOUT; 258 259 pfd[PFD_PIPE_SESSION].fd = ibuf_se->fd; 260 pfd[PFD_PIPE_SESSION].events = POLLIN; 261 if (ibuf_se->w.queued > 0) 262 pfd[PFD_PIPE_SESSION].events |= POLLOUT; 263 264 pfd[PFD_PIPE_SESSION_CTL].fd = ibuf_se_ctl->fd; 265 pfd[PFD_PIPE_SESSION_CTL].events = POLLIN; 266 if (ibuf_se_ctl->w.queued > 0) 267 pfd[PFD_PIPE_SESSION_CTL].events |= POLLOUT; 268 else if (rib_dump_pending()) 269 timeout = 0; 270 271 i = PFD_PIPE_COUNT; 272 for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) { 273 xmctx = LIST_NEXT(mctx, entry); 274 if (mctx->mrt.wbuf.queued) { 275 pfd[i].fd = mctx->mrt.wbuf.fd; 276 pfd[i].events = POLLOUT; 277 i++; 278 } else if (mctx->mrt.state == MRT_STATE_REMOVE) { 279 close(mctx->mrt.wbuf.fd); 280 LIST_REMOVE(&mctx->ribctx, entry); 281 LIST_REMOVE(mctx, entry); 282 free(mctx); 283 rde_mrt_cnt--; 284 } 285 } 286 287 if (poll(pfd, i, timeout) == -1) { 288 if (errno != EINTR) 289 fatal("poll error"); 290 continue; 291 } 292 293 if ((pfd[PFD_PIPE_MAIN].revents & POLLOUT) && 294 ibuf_main->w.queued) 295 if (msgbuf_write(&ibuf_main->w) < 0) 296 fatal("pipe write error"); 297 298 if (pfd[PFD_PIPE_MAIN].revents & POLLIN) 299 rde_dispatch_imsg_parent(ibuf_main); 300 301 if ((pfd[PFD_PIPE_SESSION].revents & POLLOUT) && 302 ibuf_se->w.queued) 303 if (msgbuf_write(&ibuf_se->w) < 0) 304 fatal("pipe write error"); 305 306 if (pfd[PFD_PIPE_SESSION].revents & POLLIN) 307 rde_dispatch_imsg_session(ibuf_se); 308 309 if ((pfd[PFD_PIPE_SESSION_CTL].revents & POLLOUT) && 310 ibuf_se_ctl->w.queued) 311 if (msgbuf_write(&ibuf_se_ctl->w) < 0) 312 fatal("pipe write error"); 313 314 if (pfd[PFD_PIPE_SESSION_CTL].revents & POLLIN) 315 rde_dispatch_imsg_session(ibuf_se_ctl); 316 317 for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts); 318 j < i && mctx != 0; j++) { 319 if (pfd[j].fd == mctx->mrt.wbuf.fd && 320 pfd[j].revents & POLLOUT) 321 mrt_write(&mctx->mrt); 322 mctx = LIST_NEXT(mctx, entry); 323 } 324 325 rde_update_queue_runner(); 326 for (aid = AID_INET6; aid < AID_MAX; aid++) 327 rde_update6_queue_runner(aid); 328 if (ibuf_se_ctl->w.queued <= 0) 329 rib_dump_runner(); 330 } 331 332 /* do not clean up on shutdown on production, it takes ages. */ 333 if (debug) 334 rde_shutdown(); 335 336 while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) { 337 msgbuf_clear(&mctx->mrt.wbuf); 338 close(mctx->mrt.wbuf.fd); 339 LIST_REMOVE(&mctx->ribctx, entry); 340 LIST_REMOVE(mctx, entry); 341 free(mctx); 342 } 343 344 msgbuf_clear(&ibuf_se->w); 345 free(ibuf_se); 346 msgbuf_clear(&ibuf_se_ctl->w); 347 free(ibuf_se_ctl); 348 msgbuf_clear(&ibuf_main->w); 349 free(ibuf_main); 350 351 log_info("route decision engine exiting"); 352 _exit(0); 353 } 354 355 struct network_config netconf_s, netconf_p; 356 struct filter_set_head *session_set, *parent_set; 357 358 void 359 rde_dispatch_imsg_session(struct imsgbuf *ibuf) 360 { 361 struct imsg imsg; 362 struct peer p; 363 struct peer_config pconf; 364 struct session_up sup; 365 struct ctl_show_rib csr; 366 struct ctl_show_rib_request req; 367 struct rde_peer *peer; 368 struct rde_aspath *asp; 369 struct filter_set *s; 370 struct nexthop *nh; 371 u_int8_t *asdata; 372 ssize_t n; 373 int verbose; 374 u_int16_t len; 375 u_int8_t aid; 376 377 if ((n = imsg_read(ibuf)) == -1) 378 fatal("rde_dispatch_imsg_session: imsg_read error"); 379 if (n == 0) /* connection closed */ 380 fatalx("rde_dispatch_imsg_session: pipe closed"); 381 382 for (;;) { 383 if ((n = imsg_get(ibuf, &imsg)) == -1) 384 fatal("rde_dispatch_imsg_session: imsg_read error"); 385 if (n == 0) 386 break; 387 388 switch (imsg.hdr.type) { 389 case IMSG_UPDATE: 390 rde_update_dispatch(&imsg); 391 break; 392 case IMSG_SESSION_ADD: 393 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf)) 394 fatalx("incorrect size of session request"); 395 memcpy(&pconf, imsg.data, sizeof(pconf)); 396 peer = peer_add(imsg.hdr.peerid, &pconf); 397 if (peer == NULL) { 398 log_warnx("session add: " 399 "peer id %d already exists", 400 imsg.hdr.peerid); 401 break; 402 } 403 break; 404 case IMSG_SESSION_UP: 405 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup)) 406 fatalx("incorrect size of session request"); 407 memcpy(&sup, imsg.data, sizeof(sup)); 408 peer_up(imsg.hdr.peerid, &sup); 409 break; 410 case IMSG_SESSION_DOWN: 411 peer_down(imsg.hdr.peerid); 412 break; 413 case IMSG_SESSION_STALE: 414 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 415 log_warnx("rde_dispatch: wrong imsg len"); 416 break; 417 } 418 memcpy(&aid, imsg.data, sizeof(aid)); 419 if (aid >= AID_MAX) 420 fatalx("IMSG_SESSION_STALE: bad AID"); 421 peer_stale(imsg.hdr.peerid, aid); 422 break; 423 case IMSG_SESSION_FLUSH: 424 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 425 log_warnx("rde_dispatch: wrong imsg len"); 426 break; 427 } 428 memcpy(&aid, imsg.data, sizeof(aid)); 429 if (aid >= AID_MAX) 430 fatalx("IMSG_SESSION_FLUSH: bad AID"); 431 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { 432 log_warnx("rde_dispatch: unknown peer id %d", 433 imsg.hdr.peerid); 434 break; 435 } 436 peer_flush(peer, aid); 437 break; 438 case IMSG_SESSION_RESTARTED: 439 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 440 log_warnx("rde_dispatch: wrong imsg len"); 441 break; 442 } 443 memcpy(&aid, imsg.data, sizeof(aid)); 444 if (aid >= AID_MAX) 445 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 446 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { 447 log_warnx("rde_dispatch: unknown peer id %d", 448 imsg.hdr.peerid); 449 break; 450 } 451 if (peer->staletime[aid]) 452 peer_flush(peer, aid); 453 break; 454 case IMSG_REFRESH: 455 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 456 log_warnx("rde_dispatch: wrong imsg len"); 457 break; 458 } 459 memcpy(&aid, imsg.data, sizeof(aid)); 460 if (aid >= AID_MAX) 461 fatalx("IMSG_REFRESH: bad AID"); 462 peer_dump(imsg.hdr.peerid, aid); 463 break; 464 case IMSG_NETWORK_ADD: 465 if (imsg.hdr.len - IMSG_HEADER_SIZE != 466 sizeof(struct network_config)) { 467 log_warnx("rde_dispatch: wrong imsg len"); 468 break; 469 } 470 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 471 TAILQ_INIT(&netconf_s.attrset); 472 session_set = &netconf_s.attrset; 473 break; 474 case IMSG_NETWORK_ASPATH: 475 if (imsg.hdr.len - IMSG_HEADER_SIZE < 476 sizeof(struct ctl_show_rib)) { 477 log_warnx("rde_dispatch: wrong imsg len"); 478 bzero(&netconf_s, sizeof(netconf_s)); 479 break; 480 } 481 asdata = imsg.data; 482 asdata += sizeof(struct ctl_show_rib); 483 memcpy(&csr, imsg.data, sizeof(csr)); 484 if (csr.aspath_len + sizeof(csr) > imsg.hdr.len - 485 IMSG_HEADER_SIZE) { 486 log_warnx("rde_dispatch: wrong aspath len"); 487 bzero(&netconf_s, sizeof(netconf_s)); 488 break; 489 } 490 asp = path_get(); 491 asp->lpref = csr.local_pref; 492 asp->med = csr.med; 493 asp->weight = csr.weight; 494 asp->flags = csr.flags; 495 asp->origin = csr.origin; 496 asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC; 497 asp->aspath = aspath_get(asdata, csr.aspath_len); 498 netconf_s.asp = asp; 499 break; 500 case IMSG_NETWORK_ATTR: 501 if (imsg.hdr.len <= IMSG_HEADER_SIZE) { 502 log_warnx("rde_dispatch: wrong imsg len"); 503 break; 504 } 505 /* parse path attributes */ 506 len = imsg.hdr.len - IMSG_HEADER_SIZE; 507 asp = netconf_s.asp; 508 if (rde_attr_add(asp, imsg.data, len) == -1) { 509 log_warnx("rde_dispatch: bad network " 510 "attribute"); 511 path_put(asp); 512 bzero(&netconf_s, sizeof(netconf_s)); 513 break; 514 } 515 break; 516 case IMSG_NETWORK_DONE: 517 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 518 log_warnx("rde_dispatch: wrong imsg len"); 519 break; 520 } 521 session_set = NULL; 522 switch (netconf_s.prefix.aid) { 523 case AID_INET: 524 if (netconf_s.prefixlen > 32) 525 goto badnet; 526 network_add(&netconf_s, 0); 527 break; 528 case AID_INET6: 529 if (netconf_s.prefixlen > 128) 530 goto badnet; 531 network_add(&netconf_s, 0); 532 break; 533 case 0: 534 /* something failed beforehands */ 535 break; 536 default: 537 badnet: 538 log_warnx("rde_dispatch: bad network"); 539 break; 540 } 541 break; 542 case IMSG_NETWORK_REMOVE: 543 if (imsg.hdr.len - IMSG_HEADER_SIZE != 544 sizeof(struct network_config)) { 545 log_warnx("rde_dispatch: wrong imsg len"); 546 break; 547 } 548 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 549 TAILQ_INIT(&netconf_s.attrset); 550 network_delete(&netconf_s, 0); 551 break; 552 case IMSG_NETWORK_FLUSH: 553 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 554 log_warnx("rde_dispatch: wrong imsg len"); 555 break; 556 } 557 prefix_network_clean(peerself, time(NULL), 558 F_ANN_DYNAMIC); 559 break; 560 case IMSG_FILTER_SET: 561 if (imsg.hdr.len - IMSG_HEADER_SIZE != 562 sizeof(struct filter_set)) { 563 log_warnx("rde_dispatch: wrong imsg len"); 564 break; 565 } 566 if (session_set == NULL) { 567 log_warnx("rde_dispatch: " 568 "IMSG_FILTER_SET unexpected"); 569 break; 570 } 571 if ((s = malloc(sizeof(struct filter_set))) == NULL) 572 fatal(NULL); 573 memcpy(s, imsg.data, sizeof(struct filter_set)); 574 TAILQ_INSERT_TAIL(session_set, s, entry); 575 576 if (s->type == ACTION_SET_NEXTHOP) { 577 nh = nexthop_get(&s->action.nexthop); 578 nh->refcnt++; 579 } 580 break; 581 case IMSG_CTL_SHOW_NETWORK: 582 case IMSG_CTL_SHOW_RIB: 583 case IMSG_CTL_SHOW_RIB_AS: 584 case IMSG_CTL_SHOW_RIB_COMMUNITY: 585 case IMSG_CTL_SHOW_RIB_PREFIX: 586 if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) { 587 log_warnx("rde_dispatch: wrong imsg len"); 588 break; 589 } 590 memcpy(&req, imsg.data, sizeof(req)); 591 rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type); 592 break; 593 case IMSG_CTL_SHOW_NEIGHBOR: 594 if (imsg.hdr.len - IMSG_HEADER_SIZE != 595 sizeof(struct peer)) { 596 log_warnx("rde_dispatch: wrong imsg len"); 597 break; 598 } 599 memcpy(&p, imsg.data, sizeof(struct peer)); 600 peer = peer_get(p.conf.id); 601 if (peer != NULL) { 602 p.stats.prefix_cnt = peer->prefix_cnt; 603 p.stats.prefix_rcvd_update = 604 peer->prefix_rcvd_update; 605 p.stats.prefix_rcvd_withdraw = 606 peer->prefix_rcvd_withdraw; 607 p.stats.prefix_rcvd_eor = 608 peer->prefix_rcvd_eor; 609 p.stats.prefix_sent_update = 610 peer->prefix_sent_update; 611 p.stats.prefix_sent_withdraw = 612 peer->prefix_sent_withdraw; 613 p.stats.prefix_sent_eor = 614 peer->prefix_sent_eor; 615 } 616 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, 617 imsg.hdr.pid, -1, &p, sizeof(struct peer)); 618 break; 619 case IMSG_CTL_END: 620 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid, 621 -1, NULL, 0); 622 break; 623 case IMSG_CTL_SHOW_RIB_MEM: 624 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0, 625 imsg.hdr.pid, -1, &rdemem, sizeof(rdemem)); 626 break; 627 case IMSG_CTL_LOG_VERBOSE: 628 /* already checked by SE */ 629 memcpy(&verbose, imsg.data, sizeof(verbose)); 630 log_verbose(verbose); 631 break; 632 default: 633 break; 634 } 635 imsg_free(&imsg); 636 } 637 } 638 639 void 640 rde_dispatch_imsg_parent(struct imsgbuf *ibuf) 641 { 642 static struct rdomain *rd; 643 struct imsg imsg; 644 struct mrt xmrt; 645 struct rde_rib rn; 646 struct rde_peer *peer; 647 struct peer_config *pconf; 648 struct filter_rule *r; 649 struct filter_set *s; 650 struct nexthop *nh; 651 int n, fd, reconf_in = 0, reconf_out = 0, 652 reconf_rib = 0; 653 u_int16_t rid; 654 655 if ((n = imsg_read(ibuf)) == -1) 656 fatal("rde_dispatch_imsg_parent: imsg_read error"); 657 if (n == 0) /* connection closed */ 658 fatalx("rde_dispatch_imsg_parent: pipe closed"); 659 660 for (;;) { 661 if ((n = imsg_get(ibuf, &imsg)) == -1) 662 fatal("rde_dispatch_imsg_parent: imsg_read error"); 663 if (n == 0) 664 break; 665 666 switch (imsg.hdr.type) { 667 case IMSG_NETWORK_ADD: 668 if (imsg.hdr.len - IMSG_HEADER_SIZE != 669 sizeof(struct network_config)) { 670 log_warnx("rde_dispatch: wrong imsg len"); 671 break; 672 } 673 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 674 TAILQ_INIT(&netconf_p.attrset); 675 parent_set = &netconf_p.attrset; 676 break; 677 case IMSG_NETWORK_DONE: 678 parent_set = NULL; 679 network_add(&netconf_p, 1); 680 break; 681 case IMSG_NETWORK_REMOVE: 682 if (imsg.hdr.len - IMSG_HEADER_SIZE != 683 sizeof(struct network_config)) { 684 log_warnx("rde_dispatch: wrong imsg len"); 685 break; 686 } 687 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 688 TAILQ_INIT(&netconf_p.attrset); 689 network_delete(&netconf_p, 1); 690 break; 691 case IMSG_RECONF_CONF: 692 if (imsg.hdr.len - IMSG_HEADER_SIZE != 693 sizeof(struct bgpd_config)) 694 fatalx("IMSG_RECONF_CONF bad len"); 695 reloadtime = time(NULL); 696 newrules = calloc(1, sizeof(struct filter_head)); 697 if (newrules == NULL) 698 fatal(NULL); 699 TAILQ_INIT(newrules); 700 newdomains = calloc(1, sizeof(struct rdomain_head)); 701 if (newdomains == NULL) 702 fatal(NULL); 703 SIMPLEQ_INIT(newdomains); 704 if ((nconf = malloc(sizeof(struct bgpd_config))) == 705 NULL) 706 fatal(NULL); 707 memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); 708 for (rid = 0; rid < rib_size; rid++) 709 ribs[rid].state = RECONF_DELETE; 710 break; 711 case IMSG_RECONF_RIB: 712 if (imsg.hdr.len - IMSG_HEADER_SIZE != 713 sizeof(struct rde_rib)) 714 fatalx("IMSG_RECONF_RIB bad len"); 715 memcpy(&rn, imsg.data, sizeof(rn)); 716 rid = rib_find(rn.name); 717 if (rid == RIB_FAILED) 718 rib_new(rn.name, rn.rtableid, rn.flags); 719 else if (ribs[rid].rtableid != rn.rtableid || 720 (ribs[rid].flags & F_RIB_HASNOFIB) != 721 (rn.flags & F_RIB_HASNOFIB)) { 722 /* Big hammer in the F_RIB_NOFIB case but 723 * not often enough used to optimise it more. */ 724 rib_free(&ribs[rid]); 725 rib_new(rn.name, rn.rtableid, rn.flags); 726 } else 727 ribs[rid].state = RECONF_KEEP; 728 break; 729 case IMSG_RECONF_PEER: 730 if (imsg.hdr.len - IMSG_HEADER_SIZE != 731 sizeof(struct peer_config)) 732 fatalx("IMSG_RECONF_PEER bad len"); 733 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) 734 break; 735 pconf = imsg.data; 736 strlcpy(peer->conf.rib, pconf->rib, 737 sizeof(peer->conf.rib)); 738 break; 739 case IMSG_RECONF_FILTER: 740 if (imsg.hdr.len - IMSG_HEADER_SIZE != 741 sizeof(struct filter_rule)) 742 fatalx("IMSG_RECONF_FILTER bad len"); 743 if ((r = malloc(sizeof(struct filter_rule))) == NULL) 744 fatal(NULL); 745 memcpy(r, imsg.data, sizeof(struct filter_rule)); 746 TAILQ_INIT(&r->set); 747 r->peer.ribid = rib_find(r->rib); 748 parent_set = &r->set; 749 TAILQ_INSERT_TAIL(newrules, r, entry); 750 break; 751 case IMSG_RECONF_RDOMAIN: 752 if (imsg.hdr.len - IMSG_HEADER_SIZE != 753 sizeof(struct rdomain)) 754 fatalx("IMSG_RECONF_RDOMAIN bad len"); 755 if ((rd = malloc(sizeof(struct rdomain))) == NULL) 756 fatal(NULL); 757 memcpy(rd, imsg.data, sizeof(struct rdomain)); 758 TAILQ_INIT(&rd->import); 759 TAILQ_INIT(&rd->export); 760 SIMPLEQ_INSERT_TAIL(newdomains, rd, entry); 761 break; 762 case IMSG_RECONF_RDOMAIN_EXPORT: 763 if (rd == NULL) { 764 log_warnx("rde_dispatch_imsg_parent: " 765 "IMSG_RECONF_RDOMAIN_EXPORT unexpected"); 766 break; 767 } 768 parent_set = &rd->export; 769 break; 770 case IMSG_RECONF_RDOMAIN_IMPORT: 771 if (rd == NULL) { 772 log_warnx("rde_dispatch_imsg_parent: " 773 "IMSG_RECONF_RDOMAIN_IMPORT unexpected"); 774 break; 775 } 776 parent_set = &rd->import; 777 break; 778 case IMSG_RECONF_RDOMAIN_DONE: 779 parent_set = NULL; 780 break; 781 case IMSG_RECONF_DONE: 782 if (nconf == NULL) 783 fatalx("got IMSG_RECONF_DONE but no config"); 784 if ((nconf->flags & BGPD_FLAG_NO_EVALUATE) 785 != (conf->flags & BGPD_FLAG_NO_EVALUATE)) { 786 log_warnx("change to/from route-collector " 787 "mode ignored"); 788 if (conf->flags & BGPD_FLAG_NO_EVALUATE) 789 nconf->flags |= BGPD_FLAG_NO_EVALUATE; 790 else 791 nconf->flags &= ~BGPD_FLAG_NO_EVALUATE; 792 } 793 memcpy(conf, nconf, sizeof(struct bgpd_config)); 794 conf->listen_addrs = NULL; 795 conf->csock = NULL; 796 conf->rcsock = NULL; 797 free(nconf); 798 nconf = NULL; 799 parent_set = NULL; 800 /* sync peerself with conf */ 801 peerself->remote_bgpid = ntohl(conf->bgpid); 802 peerself->conf.local_as = conf->as; 803 peerself->conf.remote_as = conf->as; 804 peerself->short_as = conf->short_as; 805 806 /* apply new set of rdomain, sync will be done later */ 807 while ((rd = SIMPLEQ_FIRST(rdomains_l)) != NULL) { 808 SIMPLEQ_REMOVE_HEAD(rdomains_l, entry); 809 filterset_free(&rd->import); 810 filterset_free(&rd->export); 811 free(rd); 812 } 813 free(rdomains_l); 814 rdomains_l = newdomains; 815 816 /* check if filter changed */ 817 LIST_FOREACH(peer, &peerlist, peer_l) { 818 if (peer->conf.id == 0) 819 continue; 820 peer->reconf_out = 0; 821 peer->reconf_in = 0; 822 peer->reconf_rib = 0; 823 if (peer->conf.softreconfig_in && 824 !rde_filter_equal(rules_l, newrules, peer, 825 DIR_IN)) { 826 peer->reconf_in = 1; 827 reconf_in = 1; 828 } 829 if (peer->ribid != rib_find(peer->conf.rib)) { 830 rib_dump(&ribs[peer->ribid], 831 rde_softreconfig_unload_peer, peer, 832 AID_UNSPEC); 833 peer->ribid = rib_find(peer->conf.rib); 834 peer->reconf_rib = 1; 835 reconf_rib = 1; 836 continue; 837 } 838 if (peer->conf.softreconfig_out && 839 !rde_filter_equal(rules_l, newrules, peer, 840 DIR_OUT)) { 841 peer->reconf_out = 1; 842 reconf_out = 1; 843 } 844 } 845 /* bring ribs in sync before softreconfig dance */ 846 for (rid = 0; rid < rib_size; rid++) { 847 if (ribs[rid].state == RECONF_DELETE) 848 rib_free(&ribs[rid]); 849 else if (ribs[rid].state == RECONF_REINIT) 850 rib_dump(&ribs[0], 851 rde_softreconfig_load, &ribs[rid], 852 AID_UNSPEC); 853 } 854 /* sync local-RIBs first */ 855 if (reconf_in) 856 rib_dump(&ribs[0], rde_softreconfig_in, NULL, 857 AID_UNSPEC); 858 /* then sync peers */ 859 if (reconf_out) { 860 int i; 861 for (i = 1; i < rib_size; i++) { 862 if (ribs[i].state == RECONF_REINIT) 863 /* already synced by _load */ 864 continue; 865 rib_dump(&ribs[i], rde_softreconfig_out, 866 NULL, AID_UNSPEC); 867 } 868 } 869 if (reconf_rib) { 870 LIST_FOREACH(peer, &peerlist, peer_l) { 871 rib_dump(&ribs[peer->ribid], 872 rde_softreconfig_load_peer, 873 peer, AID_UNSPEC); 874 } 875 } 876 877 while ((r = TAILQ_FIRST(rules_l)) != NULL) { 878 TAILQ_REMOVE(rules_l, r, entry); 879 filterset_free(&r->set); 880 free(r); 881 } 882 free(rules_l); 883 rules_l = newrules; 884 885 log_info("RDE reconfigured"); 886 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 887 -1, NULL, 0); 888 break; 889 case IMSG_NEXTHOP_UPDATE: 890 nexthop_update(imsg.data); 891 break; 892 case IMSG_FILTER_SET: 893 if (imsg.hdr.len > IMSG_HEADER_SIZE + 894 sizeof(struct filter_set)) 895 fatalx("IMSG_RECONF_CONF bad len"); 896 if (parent_set == NULL) { 897 log_warnx("rde_dispatch_imsg_parent: " 898 "IMSG_FILTER_SET unexpected"); 899 break; 900 } 901 if ((s = malloc(sizeof(struct filter_set))) == NULL) 902 fatal(NULL); 903 memcpy(s, imsg.data, sizeof(struct filter_set)); 904 TAILQ_INSERT_TAIL(parent_set, s, entry); 905 906 if (s->type == ACTION_SET_NEXTHOP) { 907 nh = nexthop_get(&s->action.nexthop); 908 nh->refcnt++; 909 } 910 break; 911 case IMSG_MRT_OPEN: 912 case IMSG_MRT_REOPEN: 913 if (imsg.hdr.len > IMSG_HEADER_SIZE + 914 sizeof(struct mrt)) { 915 log_warnx("wrong imsg len"); 916 break; 917 } 918 memcpy(&xmrt, imsg.data, sizeof(xmrt)); 919 if ((fd = imsg.fd) == -1) 920 log_warnx("expected to receive fd for mrt dump " 921 "but didn't receive any"); 922 else if (xmrt.type == MRT_TABLE_DUMP || 923 xmrt.type == MRT_TABLE_DUMP_MP || 924 xmrt.type == MRT_TABLE_DUMP_V2) { 925 rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd); 926 } else 927 close(fd); 928 break; 929 case IMSG_MRT_CLOSE: 930 /* ignore end message because a dump is atomic */ 931 break; 932 default: 933 break; 934 } 935 imsg_free(&imsg); 936 } 937 } 938 939 /* handle routing updates from the session engine. */ 940 int 941 rde_update_dispatch(struct imsg *imsg) 942 { 943 struct bgpd_addr prefix; 944 struct mpattr mpa; 945 struct rde_peer *peer; 946 struct rde_aspath *asp = NULL; 947 u_char *p, *mpp = NULL; 948 int error = -1, pos = 0; 949 u_int16_t afi, len, mplen; 950 u_int16_t withdrawn_len; 951 u_int16_t attrpath_len; 952 u_int16_t nlri_len; 953 u_int8_t aid, prefixlen, safi, subtype; 954 u_int32_t fas; 955 956 peer = peer_get(imsg->hdr.peerid); 957 if (peer == NULL) /* unknown peer, cannot happen */ 958 return (-1); 959 if (peer->state != PEER_UP) 960 return (-1); /* peer is not yet up, cannot happen */ 961 962 p = imsg->data; 963 964 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) { 965 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 966 return (-1); 967 } 968 969 memcpy(&len, p, 2); 970 withdrawn_len = ntohs(len); 971 p += 2; 972 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) { 973 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 974 return (-1); 975 } 976 977 p += withdrawn_len; 978 memcpy(&len, p, 2); 979 attrpath_len = len = ntohs(len); 980 p += 2; 981 if (imsg->hdr.len < 982 IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) { 983 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 984 return (-1); 985 } 986 987 nlri_len = 988 imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len; 989 bzero(&mpa, sizeof(mpa)); 990 991 if (attrpath_len != 0) { /* 0 = no NLRI information in this message */ 992 /* parse path attributes */ 993 asp = path_get(); 994 while (len > 0) { 995 if ((pos = rde_attr_parse(p, len, peer, asp, 996 &mpa)) < 0) 997 goto done; 998 p += pos; 999 len -= pos; 1000 } 1001 1002 /* check for missing but necessary attributes */ 1003 if ((subtype = rde_attr_missing(asp, peer->conf.ebgp, 1004 nlri_len))) { 1005 rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR, 1006 &subtype, sizeof(u_int8_t)); 1007 goto done; 1008 } 1009 1010 rde_as4byte_fixup(peer, asp); 1011 1012 /* enforce remote AS if requested */ 1013 if (asp->flags & F_ATTR_ASPATH && 1014 peer->conf.enforce_as == ENFORCE_AS_ON) { 1015 fas = aspath_neighbor(asp->aspath); 1016 if (peer->conf.remote_as != fas) { 1017 log_peer_warnx(&peer->conf, "bad path, " 1018 "starting with %s, " 1019 "enforce neighbor-as enabled", log_as(fas)); 1020 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1021 NULL, 0); 1022 goto done; 1023 } 1024 } 1025 1026 rde_reflector(peer, asp); 1027 } 1028 1029 p = imsg->data; 1030 len = withdrawn_len; 1031 p += 2; 1032 /* withdraw prefix */ 1033 while (len > 0) { 1034 if ((pos = rde_update_get_prefix(p, len, &prefix, 1035 &prefixlen)) == -1) { 1036 /* 1037 * the RFC does not mention what we should do in 1038 * this case. Let's do the same as in the NLRI case. 1039 */ 1040 log_peer_warnx(&peer->conf, "bad withdraw prefix"); 1041 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1042 NULL, 0); 1043 goto done; 1044 } 1045 if (prefixlen > 32) { 1046 log_peer_warnx(&peer->conf, "bad withdraw prefix"); 1047 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1048 NULL, 0); 1049 goto done; 1050 } 1051 1052 p += pos; 1053 len -= pos; 1054 1055 if (peer->capa.mp[AID_INET] == 0) { 1056 log_peer_warnx(&peer->conf, 1057 "bad withdraw, %s disabled", aid2str(AID_INET)); 1058 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1059 NULL, 0); 1060 goto done; 1061 } 1062 1063 rde_update_withdraw(peer, &prefix, prefixlen); 1064 } 1065 1066 if (attrpath_len == 0) { 1067 /* 0 = no NLRI information in this message */ 1068 if (nlri_len != 0) { 1069 /* crap at end of update which should not be there */ 1070 rde_update_err(peer, ERR_UPDATE, 1071 ERR_UPD_ATTRLIST, NULL, 0); 1072 return (-1); 1073 } 1074 if (withdrawn_len == 0) { 1075 /* EoR marker */ 1076 peer_recv_eor(peer, AID_INET); 1077 } 1078 return (0); 1079 } 1080 1081 /* withdraw MP_UNREACH_NLRI if available */ 1082 if (mpa.unreach_len != 0) { 1083 mpp = mpa.unreach; 1084 mplen = mpa.unreach_len; 1085 memcpy(&afi, mpp, 2); 1086 mpp += 2; 1087 mplen -= 2; 1088 afi = ntohs(afi); 1089 safi = *mpp++; 1090 mplen--; 1091 1092 if (afi2aid(afi, safi, &aid) == -1) { 1093 log_peer_warnx(&peer->conf, 1094 "bad AFI/SAFI pair in withdraw"); 1095 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1096 NULL, 0); 1097 goto done; 1098 } 1099 1100 if (peer->capa.mp[aid] == 0) { 1101 log_peer_warnx(&peer->conf, 1102 "bad withdraw, %s disabled", aid2str(aid)); 1103 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1104 NULL, 0); 1105 goto done; 1106 } 1107 1108 if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) { 1109 /* EoR marker */ 1110 peer_recv_eor(peer, aid); 1111 } 1112 1113 switch (aid) { 1114 case AID_INET6: 1115 while (mplen > 0) { 1116 if ((pos = rde_update_get_prefix6(mpp, mplen, 1117 &prefix, &prefixlen)) == -1) { 1118 log_peer_warnx(&peer->conf, 1119 "bad IPv6 withdraw prefix"); 1120 rde_update_err(peer, ERR_UPDATE, 1121 ERR_UPD_OPTATTR, 1122 mpa.unreach, mpa.unreach_len); 1123 goto done; 1124 } 1125 if (prefixlen > 128) { 1126 log_peer_warnx(&peer->conf, 1127 "bad IPv6 withdraw prefix"); 1128 rde_update_err(peer, ERR_UPDATE, 1129 ERR_UPD_OPTATTR, 1130 mpa.unreach, mpa.unreach_len); 1131 goto done; 1132 } 1133 1134 mpp += pos; 1135 mplen -= pos; 1136 1137 rde_update_withdraw(peer, &prefix, prefixlen); 1138 } 1139 break; 1140 case AID_VPN_IPv4: 1141 while (mplen > 0) { 1142 if ((pos = rde_update_get_vpn4(mpp, mplen, 1143 &prefix, &prefixlen)) == -1) { 1144 log_peer_warnx(&peer->conf, 1145 "bad VPNv4 withdraw prefix"); 1146 rde_update_err(peer, ERR_UPDATE, 1147 ERR_UPD_OPTATTR, 1148 mpa.unreach, mpa.unreach_len); 1149 goto done; 1150 } 1151 if (prefixlen > 32) { 1152 log_peer_warnx(&peer->conf, 1153 "bad VPNv4 withdraw prefix"); 1154 rde_update_err(peer, ERR_UPDATE, 1155 ERR_UPD_OPTATTR, 1156 mpa.unreach, mpa.unreach_len); 1157 goto done; 1158 } 1159 1160 mpp += pos; 1161 mplen -= pos; 1162 1163 rde_update_withdraw(peer, &prefix, prefixlen); 1164 } 1165 break; 1166 default: 1167 /* silently ignore unsupported multiprotocol AF */ 1168 break; 1169 } 1170 1171 if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0) { 1172 error = 0; 1173 goto done; 1174 } 1175 } 1176 1177 /* shift to NLRI information */ 1178 p += 2 + attrpath_len; 1179 1180 /* aspath needs to be loop free nota bene this is not a hard error */ 1181 if (peer->conf.ebgp && !aspath_loopfree(asp->aspath, conf->as)) 1182 asp->flags |= F_ATTR_LOOP; 1183 1184 /* parse nlri prefix */ 1185 while (nlri_len > 0) { 1186 if ((pos = rde_update_get_prefix(p, nlri_len, &prefix, 1187 &prefixlen)) == -1) { 1188 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1189 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1190 NULL, 0); 1191 goto done; 1192 } 1193 if (prefixlen > 32) { 1194 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1195 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1196 NULL, 0); 1197 goto done; 1198 } 1199 1200 p += pos; 1201 nlri_len -= pos; 1202 1203 if (peer->capa.mp[AID_INET] == 0) { 1204 log_peer_warnx(&peer->conf, 1205 "bad update, %s disabled", aid2str(AID_INET)); 1206 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1207 NULL, 0); 1208 goto done; 1209 } 1210 1211 rde_update_update(peer, asp, &prefix, prefixlen); 1212 1213 /* max prefix checker */ 1214 if (peer->conf.max_prefix && 1215 peer->prefix_cnt >= peer->conf.max_prefix) { 1216 log_peer_warnx(&peer->conf, "prefix limit reached"); 1217 rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX, 1218 NULL, 0); 1219 goto done; 1220 } 1221 1222 } 1223 1224 /* add MP_REACH_NLRI if available */ 1225 if (mpa.reach_len != 0) { 1226 mpp = mpa.reach; 1227 mplen = mpa.reach_len; 1228 memcpy(&afi, mpp, 2); 1229 mpp += 2; 1230 mplen -= 2; 1231 afi = ntohs(afi); 1232 safi = *mpp++; 1233 mplen--; 1234 1235 if (afi2aid(afi, safi, &aid) == -1) { 1236 log_peer_warnx(&peer->conf, 1237 "bad AFI/SAFI pair in update"); 1238 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1239 NULL, 0); 1240 goto done; 1241 } 1242 1243 if (peer->capa.mp[aid] == 0) { 1244 log_peer_warnx(&peer->conf, 1245 "bad update, %s disabled", aid2str(aid)); 1246 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1247 NULL, 0); 1248 goto done; 1249 } 1250 1251 /* 1252 * this works because asp is not linked. 1253 * But first unlock the previously locked nexthop. 1254 */ 1255 if (asp->nexthop) { 1256 asp->nexthop->refcnt--; 1257 (void)nexthop_delete(asp->nexthop); 1258 asp->nexthop = NULL; 1259 } 1260 if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, asp)) == -1) { 1261 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1262 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1263 mpa.reach, mpa.reach_len); 1264 goto done; 1265 } 1266 mpp += pos; 1267 mplen -= pos; 1268 1269 switch (aid) { 1270 case AID_INET6: 1271 while (mplen > 0) { 1272 if ((pos = rde_update_get_prefix6(mpp, mplen, 1273 &prefix, &prefixlen)) == -1) { 1274 log_peer_warnx(&peer->conf, 1275 "bad IPv6 nlri prefix"); 1276 rde_update_err(peer, ERR_UPDATE, 1277 ERR_UPD_OPTATTR, 1278 mpa.reach, mpa.reach_len); 1279 goto done; 1280 } 1281 if (prefixlen > 128) { 1282 rde_update_err(peer, ERR_UPDATE, 1283 ERR_UPD_OPTATTR, 1284 mpa.reach, mpa.reach_len); 1285 goto done; 1286 } 1287 1288 mpp += pos; 1289 mplen -= pos; 1290 1291 rde_update_update(peer, asp, &prefix, 1292 prefixlen); 1293 1294 /* max prefix checker */ 1295 if (peer->conf.max_prefix && 1296 peer->prefix_cnt >= peer->conf.max_prefix) { 1297 log_peer_warnx(&peer->conf, 1298 "prefix limit reached"); 1299 rde_update_err(peer, ERR_CEASE, 1300 ERR_CEASE_MAX_PREFIX, NULL, 0); 1301 goto done; 1302 } 1303 1304 } 1305 break; 1306 case AID_VPN_IPv4: 1307 while (mplen > 0) { 1308 if ((pos = rde_update_get_vpn4(mpp, mplen, 1309 &prefix, &prefixlen)) == -1) { 1310 log_peer_warnx(&peer->conf, 1311 "bad VPNv4 nlri prefix"); 1312 rde_update_err(peer, ERR_UPDATE, 1313 ERR_UPD_OPTATTR, 1314 mpa.reach, mpa.reach_len); 1315 goto done; 1316 } 1317 if (prefixlen > 32) { 1318 rde_update_err(peer, ERR_UPDATE, 1319 ERR_UPD_OPTATTR, 1320 mpa.reach, mpa.reach_len); 1321 goto done; 1322 } 1323 1324 mpp += pos; 1325 mplen -= pos; 1326 1327 rde_update_update(peer, asp, &prefix, 1328 prefixlen); 1329 1330 /* max prefix checker */ 1331 if (peer->conf.max_prefix && 1332 peer->prefix_cnt >= peer->conf.max_prefix) { 1333 log_peer_warnx(&peer->conf, 1334 "prefix limit reached"); 1335 rde_update_err(peer, ERR_CEASE, 1336 ERR_CEASE_MAX_PREFIX, NULL, 0); 1337 goto done; 1338 } 1339 1340 } 1341 break; 1342 default: 1343 /* silently ignore unsupported multiprotocol AF */ 1344 break; 1345 } 1346 } 1347 1348 done: 1349 if (attrpath_len != 0) { 1350 /* unlock the previously locked entry */ 1351 if (asp->nexthop) { 1352 asp->nexthop->refcnt--; 1353 (void)nexthop_delete(asp->nexthop); 1354 } 1355 /* free allocated attribute memory that is no longer used */ 1356 path_put(asp); 1357 } 1358 1359 return (error); 1360 } 1361 1362 extern u_int16_t rib_size; 1363 1364 void 1365 rde_update_update(struct rde_peer *peer, struct rde_aspath *asp, 1366 struct bgpd_addr *prefix, u_int8_t prefixlen) 1367 { 1368 struct rde_aspath *fasp; 1369 enum filter_actions action; 1370 int r = 0, f = 0; 1371 u_int16_t i; 1372 1373 peer->prefix_rcvd_update++; 1374 /* add original path to the Adj-RIB-In */ 1375 if (peer->conf.softreconfig_in) 1376 r += path_update(&ribs[0], peer, asp, prefix, prefixlen); 1377 1378 for (i = 1; i < rib_size; i++) { 1379 /* input filter */ 1380 action = rde_filter(i, &fasp, rules_l, peer, asp, prefix, 1381 prefixlen, peer, DIR_IN); 1382 1383 if (fasp == NULL) 1384 fasp = asp; 1385 1386 if (action == ACTION_ALLOW) { 1387 rde_update_log("update", i, peer, 1388 &fasp->nexthop->exit_nexthop, prefix, prefixlen); 1389 r += path_update(&ribs[i], peer, fasp, prefix, 1390 prefixlen); 1391 } else if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 1392 0)) { 1393 rde_update_log("filtered withdraw", i, peer, 1394 NULL, prefix, prefixlen); 1395 f++; 1396 } 1397 1398 /* free modified aspath */ 1399 if (fasp != asp) 1400 path_put(fasp); 1401 } 1402 1403 if (r) 1404 peer->prefix_cnt++; 1405 else if (f) 1406 peer->prefix_cnt--; 1407 } 1408 1409 void 1410 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, 1411 u_int8_t prefixlen) 1412 { 1413 int r = 0; 1414 u_int16_t i; 1415 1416 peer->prefix_rcvd_withdraw++; 1417 1418 for (i = rib_size - 1; ; i--) { 1419 if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 0)) { 1420 rde_update_log("withdraw", i, peer, NULL, prefix, 1421 prefixlen); 1422 r++; 1423 } 1424 if (i == 0) 1425 break; 1426 } 1427 1428 if (r) 1429 peer->prefix_cnt--; 1430 } 1431 1432 /* 1433 * BGP UPDATE parser functions 1434 */ 1435 1436 /* attribute parser specific makros */ 1437 #define UPD_READ(t, p, plen, n) \ 1438 do { \ 1439 memcpy(t, p, n); \ 1440 p += n; \ 1441 plen += n; \ 1442 } while (0) 1443 1444 #define CHECK_FLAGS(s, t, m) \ 1445 (((s) & ~(ATTR_DEFMASK | (m))) == (t)) 1446 1447 int 1448 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer, 1449 struct rde_aspath *a, struct mpattr *mpa) 1450 { 1451 struct bgpd_addr nexthop; 1452 u_char *op = p, *npath; 1453 u_int32_t tmp32; 1454 int err; 1455 u_int16_t attr_len, nlen; 1456 u_int16_t plen = 0; 1457 u_int8_t flags; 1458 u_int8_t type; 1459 u_int8_t tmp8; 1460 1461 if (len < 3) { 1462 bad_len: 1463 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len); 1464 return (-1); 1465 } 1466 1467 UPD_READ(&flags, p, plen, 1); 1468 UPD_READ(&type, p, plen, 1); 1469 1470 if (flags & ATTR_EXTLEN) { 1471 if (len - plen < 2) 1472 goto bad_len; 1473 UPD_READ(&attr_len, p, plen, 2); 1474 attr_len = ntohs(attr_len); 1475 } else { 1476 UPD_READ(&tmp8, p, plen, 1); 1477 attr_len = tmp8; 1478 } 1479 1480 if (len - plen < attr_len) 1481 goto bad_len; 1482 1483 /* adjust len to the actual attribute size including header */ 1484 len = plen + attr_len; 1485 1486 switch (type) { 1487 case ATTR_UNDEF: 1488 /* ignore and drop path attributes with a type code of 0 */ 1489 plen += attr_len; 1490 break; 1491 case ATTR_ORIGIN: 1492 if (attr_len != 1) 1493 goto bad_len; 1494 1495 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) { 1496 bad_flags: 1497 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS, 1498 op, len); 1499 return (-1); 1500 } 1501 1502 UPD_READ(&a->origin, p, plen, 1); 1503 if (a->origin > ORIGIN_INCOMPLETE) { 1504 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN, 1505 op, len); 1506 return (-1); 1507 } 1508 if (a->flags & F_ATTR_ORIGIN) 1509 goto bad_list; 1510 a->flags |= F_ATTR_ORIGIN; 1511 break; 1512 case ATTR_ASPATH: 1513 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1514 goto bad_flags; 1515 err = aspath_verify(p, attr_len, rde_as4byte(peer)); 1516 if (err == AS_ERR_SOFT) { 1517 /* 1518 * soft errors like unexpected segment types are 1519 * not considered fatal and the path is just 1520 * marked invalid. 1521 */ 1522 a->flags |= F_ATTR_PARSE_ERR; 1523 log_peer_warnx(&peer->conf, "bad ASPATH, " 1524 "path invalidated and prefix withdrawn"); 1525 } else if (err != 0) { 1526 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1527 NULL, 0); 1528 return (-1); 1529 } 1530 if (a->flags & F_ATTR_ASPATH) 1531 goto bad_list; 1532 if (rde_as4byte(peer)) { 1533 npath = p; 1534 nlen = attr_len; 1535 } else 1536 npath = aspath_inflate(p, attr_len, &nlen); 1537 a->flags |= F_ATTR_ASPATH; 1538 a->aspath = aspath_get(npath, nlen); 1539 if (npath != p) 1540 free(npath); 1541 plen += attr_len; 1542 break; 1543 case ATTR_NEXTHOP: 1544 if (attr_len != 4) 1545 goto bad_len; 1546 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1547 goto bad_flags; 1548 if (a->flags & F_ATTR_NEXTHOP) 1549 goto bad_list; 1550 a->flags |= F_ATTR_NEXTHOP; 1551 1552 bzero(&nexthop, sizeof(nexthop)); 1553 nexthop.aid = AID_INET; 1554 UPD_READ(&nexthop.v4.s_addr, p, plen, 4); 1555 /* 1556 * Check if the nexthop is a valid IP address. We consider 1557 * multicast and experimental addresses as invalid. 1558 */ 1559 tmp32 = ntohl(nexthop.v4.s_addr); 1560 if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) { 1561 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1562 op, len); 1563 return (-1); 1564 } 1565 a->nexthop = nexthop_get(&nexthop); 1566 /* 1567 * lock the nexthop because it is not yet linked else 1568 * withdraws may remove this nexthop which in turn would 1569 * cause a use after free error. 1570 */ 1571 a->nexthop->refcnt++; 1572 break; 1573 case ATTR_MED: 1574 if (attr_len != 4) 1575 goto bad_len; 1576 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1577 goto bad_flags; 1578 if (a->flags & F_ATTR_MED) 1579 goto bad_list; 1580 a->flags |= F_ATTR_MED; 1581 1582 UPD_READ(&tmp32, p, plen, 4); 1583 a->med = ntohl(tmp32); 1584 break; 1585 case ATTR_LOCALPREF: 1586 if (attr_len != 4) 1587 goto bad_len; 1588 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1589 goto bad_flags; 1590 if (peer->conf.ebgp) { 1591 /* ignore local-pref attr on non ibgp peers */ 1592 plen += 4; 1593 break; 1594 } 1595 if (a->flags & F_ATTR_LOCALPREF) 1596 goto bad_list; 1597 a->flags |= F_ATTR_LOCALPREF; 1598 1599 UPD_READ(&tmp32, p, plen, 4); 1600 a->lpref = ntohl(tmp32); 1601 break; 1602 case ATTR_ATOMIC_AGGREGATE: 1603 if (attr_len != 0) 1604 goto bad_len; 1605 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1606 goto bad_flags; 1607 goto optattr; 1608 case ATTR_AGGREGATOR: 1609 if ((!rde_as4byte(peer) && attr_len != 6) || 1610 (rde_as4byte(peer) && attr_len != 8)) { 1611 /* 1612 * ignore attribute in case of error as per 1613 * draft-ietf-idr-optional-transitive-00.txt 1614 * but only if partial bit is set 1615 */ 1616 if ((flags & ATTR_PARTIAL) == 0) 1617 goto bad_len; 1618 log_peer_warnx(&peer->conf, "bad AGGREGATOR, " 1619 "partial attribute ignored"); 1620 plen += attr_len; 1621 break; 1622 } 1623 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1624 ATTR_PARTIAL)) 1625 goto bad_flags; 1626 if (!rde_as4byte(peer)) { 1627 /* need to inflate aggregator AS to 4-byte */ 1628 u_char t[8]; 1629 t[0] = t[1] = 0; 1630 UPD_READ(&t[2], p, plen, 2); 1631 UPD_READ(&t[4], p, plen, 4); 1632 if (attr_optadd(a, flags, type, t, 1633 sizeof(t)) == -1) 1634 goto bad_list; 1635 break; 1636 } 1637 /* 4-byte ready server take the default route */ 1638 goto optattr; 1639 case ATTR_COMMUNITIES: 1640 if (attr_len % 4 != 0) { 1641 /* 1642 * mark update as bad and withdraw all routes as per 1643 * draft-ietf-idr-optional-transitive-00.txt 1644 * but only if partial bit is set 1645 */ 1646 if ((flags & ATTR_PARTIAL) == 0) 1647 goto bad_len; 1648 a->flags |= F_ATTR_PARSE_ERR; 1649 log_peer_warnx(&peer->conf, "bad COMMUNITIES, " 1650 "path invalidated and prefix withdrawn"); 1651 } 1652 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1653 ATTR_PARTIAL)) 1654 goto bad_flags; 1655 goto optattr; 1656 case ATTR_EXT_COMMUNITIES: 1657 if (attr_len % 8 != 0) { 1658 /* 1659 * mark update as bad and withdraw all routes as per 1660 * draft-ietf-idr-optional-transitive-00.txt 1661 * but only if partial bit is set 1662 */ 1663 if ((flags & ATTR_PARTIAL) == 0) 1664 goto bad_len; 1665 a->flags |= F_ATTR_PARSE_ERR; 1666 log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, " 1667 "path invalidated and prefix withdrawn"); 1668 } 1669 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1670 ATTR_PARTIAL)) 1671 goto bad_flags; 1672 goto optattr; 1673 case ATTR_ORIGINATOR_ID: 1674 if (attr_len != 4) 1675 goto bad_len; 1676 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1677 goto bad_flags; 1678 goto optattr; 1679 case ATTR_CLUSTER_LIST: 1680 if (attr_len % 4 != 0) 1681 goto bad_len; 1682 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1683 goto bad_flags; 1684 goto optattr; 1685 case ATTR_MP_REACH_NLRI: 1686 if (attr_len < 4) 1687 goto bad_len; 1688 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1689 goto bad_flags; 1690 /* the validity is checked in rde_update_dispatch() */ 1691 if (a->flags & F_ATTR_MP_REACH) 1692 goto bad_list; 1693 a->flags |= F_ATTR_MP_REACH; 1694 1695 mpa->reach = p; 1696 mpa->reach_len = attr_len; 1697 plen += attr_len; 1698 break; 1699 case ATTR_MP_UNREACH_NLRI: 1700 if (attr_len < 3) 1701 goto bad_len; 1702 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1703 goto bad_flags; 1704 /* the validity is checked in rde_update_dispatch() */ 1705 if (a->flags & F_ATTR_MP_UNREACH) 1706 goto bad_list; 1707 a->flags |= F_ATTR_MP_UNREACH; 1708 1709 mpa->unreach = p; 1710 mpa->unreach_len = attr_len; 1711 plen += attr_len; 1712 break; 1713 case ATTR_AS4_AGGREGATOR: 1714 if (attr_len != 8) { 1715 /* see ATTR_AGGREGATOR ... */ 1716 if ((flags & ATTR_PARTIAL) == 0) 1717 goto bad_len; 1718 log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, " 1719 "partial attribute ignored"); 1720 plen += attr_len; 1721 break; 1722 } 1723 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1724 ATTR_PARTIAL)) 1725 goto bad_flags; 1726 a->flags |= F_ATTR_AS4BYTE_NEW; 1727 goto optattr; 1728 case ATTR_AS4_PATH: 1729 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1730 ATTR_PARTIAL)) 1731 goto bad_flags; 1732 if ((err = aspath_verify(p, attr_len, 1)) != 0) { 1733 /* 1734 * XXX RFC does not specify how to handle errors. 1735 * XXX Instead of dropping the session because of a 1736 * XXX bad path just mark the full update as having 1737 * XXX a parse error which makes the update no longer 1738 * XXX eligible and will not be considered for routing 1739 * XXX or redistribution. 1740 * XXX We follow draft-ietf-idr-optional-transitive 1741 * XXX by looking at the partial bit. 1742 * XXX Consider soft errors similar to a partial attr. 1743 */ 1744 if (flags & ATTR_PARTIAL || err == AS_ERR_SOFT) { 1745 a->flags |= F_ATTR_PARSE_ERR; 1746 log_peer_warnx(&peer->conf, "bad AS4_PATH, " 1747 "path invalidated and prefix withdrawn"); 1748 goto optattr; 1749 } else { 1750 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1751 NULL, 0); 1752 return (-1); 1753 } 1754 } 1755 a->flags |= F_ATTR_AS4BYTE_NEW; 1756 goto optattr; 1757 default: 1758 if ((flags & ATTR_OPTIONAL) == 0) { 1759 rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR, 1760 op, len); 1761 return (-1); 1762 } 1763 optattr: 1764 if (attr_optadd(a, flags, type, p, attr_len) == -1) { 1765 bad_list: 1766 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, 1767 NULL, 0); 1768 return (-1); 1769 } 1770 1771 plen += attr_len; 1772 break; 1773 } 1774 1775 return (plen); 1776 } 1777 1778 int 1779 rde_attr_add(struct rde_aspath *a, u_char *p, u_int16_t len) 1780 { 1781 u_int16_t attr_len; 1782 u_int16_t plen = 0; 1783 u_int8_t flags; 1784 u_int8_t type; 1785 u_int8_t tmp8; 1786 1787 if (a == NULL) /* no aspath, nothing to do */ 1788 return (0); 1789 if (len < 3) 1790 return (-1); 1791 1792 UPD_READ(&flags, p, plen, 1); 1793 UPD_READ(&type, p, plen, 1); 1794 1795 if (flags & ATTR_EXTLEN) { 1796 if (len - plen < 2) 1797 return (-1); 1798 UPD_READ(&attr_len, p, plen, 2); 1799 attr_len = ntohs(attr_len); 1800 } else { 1801 UPD_READ(&tmp8, p, plen, 1); 1802 attr_len = tmp8; 1803 } 1804 1805 if (len - plen < attr_len) 1806 return (-1); 1807 1808 if (attr_optadd(a, flags, type, p, attr_len) == -1) 1809 return (-1); 1810 return (0); 1811 } 1812 1813 #undef UPD_READ 1814 #undef CHECK_FLAGS 1815 1816 u_int8_t 1817 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen) 1818 { 1819 /* ATTR_MP_UNREACH_NLRI may be sent alone */ 1820 if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH && 1821 (a->flags & F_ATTR_MP_REACH) == 0) 1822 return (0); 1823 1824 if ((a->flags & F_ATTR_ORIGIN) == 0) 1825 return (ATTR_ORIGIN); 1826 if ((a->flags & F_ATTR_ASPATH) == 0) 1827 return (ATTR_ASPATH); 1828 if ((a->flags & F_ATTR_MP_REACH) == 0 && 1829 (a->flags & F_ATTR_NEXTHOP) == 0) 1830 return (ATTR_NEXTHOP); 1831 if (!ebgp) 1832 if ((a->flags & F_ATTR_LOCALPREF) == 0) 1833 return (ATTR_LOCALPREF); 1834 return (0); 1835 } 1836 1837 int 1838 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid, 1839 struct rde_aspath *asp) 1840 { 1841 struct bgpd_addr nexthop; 1842 u_int8_t totlen, nhlen; 1843 1844 if (len == 0) 1845 return (-1); 1846 1847 nhlen = *data++; 1848 totlen = 1; 1849 len--; 1850 1851 if (nhlen > len) 1852 return (-1); 1853 1854 bzero(&nexthop, sizeof(nexthop)); 1855 nexthop.aid = aid; 1856 switch (aid) { 1857 case AID_INET6: 1858 /* 1859 * RFC2545 describes that there may be a link-local 1860 * address carried in nexthop. Yikes! 1861 * This is not only silly, it is wrong and we just ignore 1862 * this link-local nexthop. The bgpd session doesn't run 1863 * over the link-local address so why should all other 1864 * traffic. 1865 */ 1866 if (nhlen != 16 && nhlen != 32) { 1867 log_warnx("bad multiprotocol nexthop, bad size"); 1868 return (-1); 1869 } 1870 memcpy(&nexthop.v6.s6_addr, data, 16); 1871 break; 1872 case AID_VPN_IPv4: 1873 /* 1874 * Neither RFC4364 nor RFC3107 specify the format of the 1875 * nexthop in an explicit way. The quality of RFC went down 1876 * the toilet the larger the number got. 1877 * RFC4364 is very confusing about VPN-IPv4 address and the 1878 * VPN-IPv4 prefix that carries also a MPLS label. 1879 * So the nexthop is a 12-byte address with a 64bit RD and 1880 * an IPv4 address following. In the nexthop case the RD can 1881 * be ignored. 1882 * Since the nexthop has to be in the main IPv4 table just 1883 * create an AID_INET nexthop. So we don't need to handle 1884 * AID_VPN_IPv4 in nexthop and kroute. 1885 */ 1886 if (nhlen != 12) { 1887 log_warnx("bad multiprotocol nexthop, bad size"); 1888 return (-1); 1889 } 1890 data += sizeof(u_int64_t); 1891 nexthop.aid = AID_INET; 1892 memcpy(&nexthop.v4, data, sizeof(nexthop.v4)); 1893 break; 1894 default: 1895 log_warnx("bad multiprotocol nexthop, bad AID"); 1896 return (-1); 1897 } 1898 1899 asp->nexthop = nexthop_get(&nexthop); 1900 /* 1901 * lock the nexthop because it is not yet linked else 1902 * withdraws may remove this nexthop which in turn would 1903 * cause a use after free error. 1904 */ 1905 asp->nexthop->refcnt++; 1906 1907 /* ignore reserved (old SNPA) field as per RFC4760 */ 1908 totlen += nhlen + 1; 1909 data += nhlen + 1; 1910 1911 return (totlen); 1912 } 1913 1914 int 1915 rde_update_extract_prefix(u_char *p, u_int16_t len, void *va, 1916 u_int8_t pfxlen, u_int8_t max) 1917 { 1918 static u_char addrmask[] = { 1919 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff }; 1920 u_char *a = va; 1921 int i; 1922 u_int16_t plen = 0; 1923 1924 for (i = 0; pfxlen && i < max; i++) { 1925 if (len <= plen) 1926 return (-1); 1927 if (pfxlen < 8) { 1928 a[i] = *p++ & addrmask[pfxlen]; 1929 plen++; 1930 break; 1931 } else { 1932 a[i] = *p++; 1933 plen++; 1934 pfxlen -= 8; 1935 } 1936 } 1937 return (plen); 1938 } 1939 1940 int 1941 rde_update_get_prefix(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1942 u_int8_t *prefixlen) 1943 { 1944 u_int8_t pfxlen; 1945 int plen; 1946 1947 if (len < 1) 1948 return (-1); 1949 1950 pfxlen = *p++; 1951 len--; 1952 1953 bzero(prefix, sizeof(struct bgpd_addr)); 1954 prefix->aid = AID_INET; 1955 *prefixlen = pfxlen; 1956 1957 if ((plen = rde_update_extract_prefix(p, len, &prefix->v4, pfxlen, 1958 sizeof(prefix->v4))) == -1) 1959 return (-1); 1960 1961 return (plen + 1); /* pfxlen needs to be added */ 1962 } 1963 1964 int 1965 rde_update_get_prefix6(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1966 u_int8_t *prefixlen) 1967 { 1968 int plen; 1969 u_int8_t pfxlen; 1970 1971 if (len < 1) 1972 return (-1); 1973 1974 pfxlen = *p++; 1975 len--; 1976 1977 bzero(prefix, sizeof(struct bgpd_addr)); 1978 prefix->aid = AID_INET6; 1979 *prefixlen = pfxlen; 1980 1981 if ((plen = rde_update_extract_prefix(p, len, &prefix->v6, pfxlen, 1982 sizeof(prefix->v6))) == -1) 1983 return (-1); 1984 1985 return (plen + 1); /* pfxlen needs to be added */ 1986 } 1987 1988 int 1989 rde_update_get_vpn4(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1990 u_int8_t *prefixlen) 1991 { 1992 int rv, done = 0; 1993 u_int8_t pfxlen; 1994 u_int16_t plen; 1995 1996 if (len < 1) 1997 return (-1); 1998 1999 memcpy(&pfxlen, p, 1); 2000 p += 1; 2001 plen = 1; 2002 2003 bzero(prefix, sizeof(struct bgpd_addr)); 2004 2005 /* label stack */ 2006 do { 2007 if (len - plen < 3 || pfxlen < 3 * 8) 2008 return (-1); 2009 if (prefix->vpn4.labellen + 3U > 2010 sizeof(prefix->vpn4.labelstack)) 2011 return (-1); 2012 prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; 2013 prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; 2014 prefix->vpn4.labelstack[prefix->vpn4.labellen] = *p++; 2015 if (prefix->vpn4.labelstack[prefix->vpn4.labellen] & 2016 BGP_MPLS_BOS) 2017 done = 1; 2018 prefix->vpn4.labellen++; 2019 plen += 3; 2020 pfxlen -= 3 * 8; 2021 } while (!done); 2022 2023 /* RD */ 2024 if (len - plen < (int)sizeof(u_int64_t) || 2025 pfxlen < sizeof(u_int64_t) * 8) 2026 return (-1); 2027 memcpy(&prefix->vpn4.rd, p, sizeof(u_int64_t)); 2028 pfxlen -= sizeof(u_int64_t) * 8; 2029 p += sizeof(u_int64_t); 2030 plen += sizeof(u_int64_t); 2031 2032 /* prefix */ 2033 prefix->aid = AID_VPN_IPv4; 2034 *prefixlen = pfxlen; 2035 2036 if ((rv = rde_update_extract_prefix(p, len, &prefix->vpn4.addr, 2037 pfxlen, sizeof(prefix->vpn4.addr))) == -1) 2038 return (-1); 2039 2040 return (plen + rv); 2041 } 2042 2043 void 2044 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr, 2045 void *data, u_int16_t size) 2046 { 2047 struct ibuf *wbuf; 2048 2049 if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0, 2050 size + sizeof(error) + sizeof(suberr))) == NULL) 2051 fatal("imsg_create error"); 2052 if (imsg_add(wbuf, &error, sizeof(error)) == -1 || 2053 imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 || 2054 imsg_add(wbuf, data, size) == -1) 2055 fatal("imsg_add error"); 2056 imsg_close(ibuf_se, wbuf); 2057 peer->state = PEER_ERR; 2058 } 2059 2060 void 2061 rde_update_log(const char *message, u_int16_t rid, 2062 const struct rde_peer *peer, const struct bgpd_addr *next, 2063 const struct bgpd_addr *prefix, u_int8_t prefixlen) 2064 { 2065 char *l = NULL; 2066 char *n = NULL; 2067 char *p = NULL; 2068 2069 if (!(conf->log & BGPD_LOG_UPDATES)) 2070 return; 2071 2072 if (next != NULL) 2073 if (asprintf(&n, " via %s", log_addr(next)) == -1) 2074 n = NULL; 2075 if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1) 2076 p = NULL; 2077 l = log_fmt_peer(&peer->conf); 2078 log_info("Rib %s: %s AS%s: %s %s%s", ribs[rid].name, 2079 l, log_as(peer->conf.remote_as), message, 2080 p ? p : "out of memory", n ? n : ""); 2081 2082 free(l); 2083 free(n); 2084 free(p); 2085 } 2086 2087 /* 2088 * 4-Byte ASN helper function. 2089 * Two scenarios need to be considered: 2090 * - NEW session with NEW attributes present -> just remove the attributes 2091 * - OLD session with NEW attributes present -> try to merge them 2092 */ 2093 void 2094 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a) 2095 { 2096 struct attr *nasp, *naggr, *oaggr; 2097 u_int32_t as; 2098 2099 /* 2100 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present 2101 * try to fixup the attributes. 2102 * Do not fixup if F_ATTR_PARSE_ERR is set. 2103 */ 2104 if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR) 2105 return; 2106 2107 /* first get the attributes */ 2108 nasp = attr_optget(a, ATTR_AS4_PATH); 2109 naggr = attr_optget(a, ATTR_AS4_AGGREGATOR); 2110 2111 if (rde_as4byte(peer)) { 2112 /* NEW session using 4-byte ASNs */ 2113 if (nasp) { 2114 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2115 "but sent AS4_PATH attribute."); 2116 attr_free(a, nasp); 2117 } 2118 if (naggr) { 2119 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2120 "but sent AS4_AGGREGATOR attribute."); 2121 attr_free(a, naggr); 2122 } 2123 return; 2124 } 2125 /* OLD session using 2-byte ASNs */ 2126 /* try to merge the new attributes into the old ones */ 2127 if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) { 2128 memcpy(&as, oaggr->data, sizeof(as)); 2129 if (ntohl(as) != AS_TRANS) { 2130 /* per RFC ignore AS4_PATH and AS4_AGGREGATOR */ 2131 if (nasp) 2132 attr_free(a, nasp); 2133 if (naggr) 2134 attr_free(a, naggr); 2135 return; 2136 } 2137 if (naggr) { 2138 /* switch over to new AGGREGATOR */ 2139 attr_free(a, oaggr); 2140 if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE, 2141 ATTR_AGGREGATOR, naggr->data, naggr->len)) 2142 fatalx("attr_optadd failed but impossible"); 2143 } 2144 } 2145 /* there is no need for AS4_AGGREGATOR any more */ 2146 if (naggr) 2147 attr_free(a, naggr); 2148 2149 /* merge AS4_PATH with ASPATH */ 2150 if (nasp) 2151 aspath_merge(a, nasp); 2152 } 2153 2154 2155 /* 2156 * route reflector helper function 2157 */ 2158 void 2159 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp) 2160 { 2161 struct attr *a; 2162 u_int8_t *p; 2163 u_int16_t len; 2164 u_int32_t id; 2165 2166 /* do not consider updates with parse errors */ 2167 if (asp->flags & F_ATTR_PARSE_ERR) 2168 return; 2169 2170 /* check for originator id if eq router_id drop */ 2171 if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) { 2172 if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) { 2173 /* this is coming from myself */ 2174 asp->flags |= F_ATTR_LOOP; 2175 return; 2176 } 2177 } else if (conf->flags & BGPD_FLAG_REFLECTOR) { 2178 if (peer->conf.ebgp) 2179 id = conf->bgpid; 2180 else 2181 id = htonl(peer->remote_bgpid); 2182 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID, 2183 &id, sizeof(u_int32_t)) == -1) 2184 fatalx("attr_optadd failed but impossible"); 2185 } 2186 2187 /* check for own id in the cluster list */ 2188 if (conf->flags & BGPD_FLAG_REFLECTOR) { 2189 if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) { 2190 for (len = 0; len < a->len; 2191 len += sizeof(conf->clusterid)) 2192 /* check if coming from my cluster */ 2193 if (memcmp(&conf->clusterid, a->data + len, 2194 sizeof(conf->clusterid)) == 0) { 2195 asp->flags |= F_ATTR_LOOP; 2196 return; 2197 } 2198 2199 /* prepend own clusterid by replacing attribute */ 2200 len = a->len + sizeof(conf->clusterid); 2201 if (len < a->len) 2202 fatalx("rde_reflector: cluster-list overflow"); 2203 if ((p = malloc(len)) == NULL) 2204 fatal("rde_reflector"); 2205 memcpy(p, &conf->clusterid, sizeof(conf->clusterid)); 2206 memcpy(p + sizeof(conf->clusterid), a->data, a->len); 2207 attr_free(asp, a); 2208 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2209 p, len) == -1) 2210 fatalx("attr_optadd failed but impossible"); 2211 free(p); 2212 } else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2213 &conf->clusterid, sizeof(conf->clusterid)) == -1) 2214 fatalx("attr_optadd failed but impossible"); 2215 } 2216 } 2217 2218 /* 2219 * control specific functions 2220 */ 2221 void 2222 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) 2223 { 2224 struct ctl_show_rib rib; 2225 struct ibuf *wbuf; 2226 struct attr *a; 2227 void *bp; 2228 time_t staletime; 2229 u_int8_t l; 2230 2231 bzero(&rib, sizeof(rib)); 2232 rib.lastchange = p->lastchange; 2233 rib.local_pref = asp->lpref; 2234 rib.med = asp->med; 2235 rib.weight = asp->weight; 2236 strlcpy(rib.descr, asp->peer->conf.descr, sizeof(rib.descr)); 2237 memcpy(&rib.remote_addr, &asp->peer->remote_addr, 2238 sizeof(rib.remote_addr)); 2239 rib.remote_id = asp->peer->remote_bgpid; 2240 if (asp->nexthop != NULL) { 2241 memcpy(&rib.true_nexthop, &asp->nexthop->true_nexthop, 2242 sizeof(rib.true_nexthop)); 2243 memcpy(&rib.exit_nexthop, &asp->nexthop->exit_nexthop, 2244 sizeof(rib.exit_nexthop)); 2245 } else { 2246 /* announced network may have a NULL nexthop */ 2247 bzero(&rib.true_nexthop, sizeof(rib.true_nexthop)); 2248 bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop)); 2249 rib.true_nexthop.aid = p->prefix->aid; 2250 rib.exit_nexthop.aid = p->prefix->aid; 2251 } 2252 pt_getaddr(p->prefix, &rib.prefix); 2253 rib.prefixlen = p->prefix->prefixlen; 2254 rib.origin = asp->origin; 2255 rib.flags = 0; 2256 if (p->rib->active == p) 2257 rib.flags |= F_PREF_ACTIVE; 2258 if (!asp->peer->conf.ebgp) 2259 rib.flags |= F_PREF_INTERNAL; 2260 if (asp->flags & F_PREFIX_ANNOUNCED) 2261 rib.flags |= F_PREF_ANNOUNCE; 2262 if (asp->nexthop == NULL || asp->nexthop->state == NEXTHOP_REACH) 2263 rib.flags |= F_PREF_ELIGIBLE; 2264 if (asp->flags & F_ATTR_LOOP) 2265 rib.flags &= ~F_PREF_ELIGIBLE; 2266 staletime = asp->peer->staletime[p->prefix->aid]; 2267 if (staletime && p->lastchange <= staletime) 2268 rib.flags |= F_PREF_STALE; 2269 rib.aspath_len = aspath_length(asp->aspath); 2270 2271 if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, 2272 sizeof(rib) + rib.aspath_len)) == NULL) 2273 return; 2274 if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 || 2275 imsg_add(wbuf, aspath_dump(asp->aspath), 2276 rib.aspath_len) == -1) 2277 return; 2278 imsg_close(ibuf_se_ctl, wbuf); 2279 2280 if (flags & F_CTL_DETAIL) 2281 for (l = 0; l < asp->others_len; l++) { 2282 if ((a = asp->others[l]) == NULL) 2283 break; 2284 if ((wbuf = imsg_create(ibuf_se_ctl, 2285 IMSG_CTL_SHOW_RIB_ATTR, 0, pid, 2286 attr_optlen(a))) == NULL) 2287 return; 2288 if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) { 2289 ibuf_free(wbuf); 2290 return; 2291 } 2292 if (attr_write(bp, attr_optlen(a), a->flags, 2293 a->type, a->data, a->len) == -1) { 2294 ibuf_free(wbuf); 2295 return; 2296 } 2297 imsg_close(ibuf_se_ctl, wbuf); 2298 } 2299 } 2300 2301 void 2302 rde_dump_filterout(struct rde_peer *peer, struct prefix *p, 2303 struct ctl_show_rib_request *req) 2304 { 2305 struct bgpd_addr addr; 2306 struct rde_aspath *asp; 2307 enum filter_actions a; 2308 2309 if (up_test_update(peer, p) != 1) 2310 return; 2311 2312 pt_getaddr(p->prefix, &addr); 2313 a = rde_filter(1 /* XXX */, &asp, rules_l, peer, p->aspath, &addr, 2314 p->prefix->prefixlen, p->aspath->peer, DIR_OUT); 2315 if (asp) 2316 asp->peer = p->aspath->peer; 2317 else 2318 asp = p->aspath; 2319 2320 if (a == ACTION_ALLOW) 2321 rde_dump_rib_as(p, asp, req->pid, req->flags); 2322 2323 if (asp != p->aspath) 2324 path_put(asp); 2325 } 2326 2327 void 2328 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) 2329 { 2330 struct rde_peer *peer; 2331 2332 if (req->flags & F_CTL_ADJ_IN || 2333 !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) { 2334 if (req->peerid && req->peerid != p->aspath->peer->conf.id) 2335 return; 2336 if (req->type == IMSG_CTL_SHOW_RIB_AS && 2337 !aspath_match(p->aspath->aspath->data, 2338 p->aspath->aspath->len, req->as.type, req->as.as)) 2339 return; 2340 if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && 2341 !community_match(p->aspath, req->community.as, 2342 req->community.type)) 2343 return; 2344 if ((req->flags & F_CTL_ACTIVE) && p->rib->active != p) 2345 return; 2346 rde_dump_rib_as(p, p->aspath, req->pid, req->flags); 2347 } else if (req->flags & F_CTL_ADJ_OUT) { 2348 if (p->rib->active != p) 2349 /* only consider active prefix */ 2350 return; 2351 if (req->peerid) { 2352 if ((peer = peer_get(req->peerid)) != NULL) 2353 rde_dump_filterout(peer, p, req); 2354 return; 2355 } 2356 } 2357 } 2358 2359 void 2360 rde_dump_upcall(struct rib_entry *re, void *ptr) 2361 { 2362 struct prefix *p; 2363 struct rde_dump_ctx *ctx = ptr; 2364 2365 LIST_FOREACH(p, &re->prefix_h, rib_l) 2366 rde_dump_filter(p, &ctx->req); 2367 } 2368 2369 void 2370 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) 2371 { 2372 struct rde_dump_ctx *ctx = ptr; 2373 struct prefix *p; 2374 struct pt_entry *pt; 2375 struct bgpd_addr addr; 2376 2377 pt = re->prefix; 2378 pt_getaddr(pt, &addr); 2379 if (addr.aid != ctx->req.prefix.aid) 2380 return; 2381 if (ctx->req.prefixlen > pt->prefixlen) 2382 return; 2383 if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen)) 2384 LIST_FOREACH(p, &re->prefix_h, rib_l) 2385 rde_dump_filter(p, &ctx->req); 2386 } 2387 2388 void 2389 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, 2390 enum imsg_type type) 2391 { 2392 struct rde_dump_ctx *ctx; 2393 struct rib_entry *re; 2394 u_int error; 2395 u_int16_t id; 2396 u_int8_t hostplen; 2397 2398 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2399 log_warn("rde_dump_ctx_new"); 2400 error = CTL_RES_NOMEM; 2401 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2402 sizeof(error)); 2403 return; 2404 } 2405 if ((id = rib_find(req->rib)) == RIB_FAILED) { 2406 log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); 2407 error = CTL_RES_NOSUCHPEER; 2408 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2409 sizeof(error)); 2410 free(ctx); 2411 return; 2412 } 2413 2414 memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); 2415 ctx->req.pid = pid; 2416 ctx->req.type = type; 2417 ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; 2418 ctx->ribctx.ctx_rib = &ribs[id]; 2419 switch (ctx->req.type) { 2420 case IMSG_CTL_SHOW_NETWORK: 2421 ctx->ribctx.ctx_upcall = network_dump_upcall; 2422 break; 2423 case IMSG_CTL_SHOW_RIB: 2424 case IMSG_CTL_SHOW_RIB_AS: 2425 case IMSG_CTL_SHOW_RIB_COMMUNITY: 2426 ctx->ribctx.ctx_upcall = rde_dump_upcall; 2427 break; 2428 case IMSG_CTL_SHOW_RIB_PREFIX: 2429 if (req->flags & F_LONGER) { 2430 ctx->ribctx.ctx_upcall = rde_dump_prefix_upcall; 2431 break; 2432 } 2433 switch (req->prefix.aid) { 2434 case AID_INET: 2435 case AID_VPN_IPv4: 2436 hostplen = 32; 2437 break; 2438 case AID_INET6: 2439 hostplen = 128; 2440 break; 2441 default: 2442 fatalx("rde_dump_ctx_new: unknown af"); 2443 } 2444 if (req->prefixlen == hostplen) 2445 re = rib_lookup(&ribs[id], &req->prefix); 2446 else 2447 re = rib_get(&ribs[id], &req->prefix, req->prefixlen); 2448 if (re) 2449 rde_dump_upcall(re, ctx); 2450 rde_dump_done(ctx); 2451 return; 2452 default: 2453 fatalx("rde_dump_ctx_new: unsupported imsg type"); 2454 } 2455 ctx->ribctx.ctx_done = rde_dump_done; 2456 ctx->ribctx.ctx_arg = ctx; 2457 ctx->ribctx.ctx_aid = ctx->req.aid; 2458 rib_dump_r(&ctx->ribctx); 2459 } 2460 2461 void 2462 rde_dump_done(void *arg) 2463 { 2464 struct rde_dump_ctx *ctx = arg; 2465 2466 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, 2467 -1, NULL, 0); 2468 free(ctx); 2469 } 2470 2471 void 2472 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd) 2473 { 2474 struct rde_mrt_ctx *ctx; 2475 u_int16_t id; 2476 2477 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2478 log_warn("rde_dump_mrt_new"); 2479 return; 2480 } 2481 memcpy(&ctx->mrt, mrt, sizeof(struct mrt)); 2482 TAILQ_INIT(&ctx->mrt.wbuf.bufs); 2483 ctx->mrt.wbuf.fd = fd; 2484 ctx->mrt.state = MRT_STATE_RUNNING; 2485 id = rib_find(ctx->mrt.rib); 2486 if (id == RIB_FAILED) { 2487 log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib); 2488 free(ctx); 2489 return; 2490 } 2491 2492 if (ctx->mrt.type == MRT_TABLE_DUMP_V2) 2493 mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist); 2494 2495 ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; 2496 ctx->ribctx.ctx_rib = &ribs[id]; 2497 ctx->ribctx.ctx_upcall = mrt_dump_upcall; 2498 ctx->ribctx.ctx_done = mrt_done; 2499 ctx->ribctx.ctx_arg = &ctx->mrt; 2500 ctx->ribctx.ctx_aid = AID_UNSPEC; 2501 LIST_INSERT_HEAD(&rde_mrts, ctx, entry); 2502 rde_mrt_cnt++; 2503 rib_dump_r(&ctx->ribctx); 2504 } 2505 2506 /* 2507 * kroute specific functions 2508 */ 2509 int 2510 rde_rdomain_import(struct rde_aspath *asp, struct rdomain *rd) 2511 { 2512 struct filter_set *s; 2513 2514 TAILQ_FOREACH(s, &rd->import, entry) { 2515 if (community_ext_match(asp, &s->action.ext_community, 0)) 2516 return (1); 2517 } 2518 return (0); 2519 } 2520 2521 void 2522 rde_send_kroute(struct prefix *new, struct prefix *old, u_int16_t ribid) 2523 { 2524 struct kroute_full kr; 2525 struct bgpd_addr addr; 2526 struct prefix *p; 2527 struct rdomain *rd; 2528 enum imsg_type type; 2529 2530 /* 2531 * Make sure that self announce prefixes are not commited to the 2532 * FIB. If both prefixes are unreachable no update is needed. 2533 */ 2534 if ((old == NULL || old->aspath->flags & F_PREFIX_ANNOUNCED) && 2535 (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED)) 2536 return; 2537 2538 if (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED) { 2539 type = IMSG_KROUTE_DELETE; 2540 p = old; 2541 } else { 2542 type = IMSG_KROUTE_CHANGE; 2543 p = new; 2544 } 2545 2546 pt_getaddr(p->prefix, &addr); 2547 bzero(&kr, sizeof(kr)); 2548 memcpy(&kr.prefix, &addr, sizeof(kr.prefix)); 2549 kr.prefixlen = p->prefix->prefixlen; 2550 if (p->aspath->flags & F_NEXTHOP_REJECT) 2551 kr.flags |= F_REJECT; 2552 if (p->aspath->flags & F_NEXTHOP_BLACKHOLE) 2553 kr.flags |= F_BLACKHOLE; 2554 if (type == IMSG_KROUTE_CHANGE) 2555 memcpy(&kr.nexthop, &p->aspath->nexthop->true_nexthop, 2556 sizeof(kr.nexthop)); 2557 strlcpy(kr.label, rtlabel_id2name(p->aspath->rtlabelid), 2558 sizeof(kr.label)); 2559 2560 switch (addr.aid) { 2561 case AID_VPN_IPv4: 2562 if (ribid != 1) 2563 /* not Loc-RIB, no update for VPNs */ 2564 break; 2565 2566 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 2567 if (!rde_rdomain_import(p->aspath, rd)) 2568 continue; 2569 /* must send exit_nexthop so that correct MPLS tunnel 2570 * is chosen 2571 */ 2572 if (type == IMSG_KROUTE_CHANGE) 2573 memcpy(&kr.nexthop, 2574 &p->aspath->nexthop->exit_nexthop, 2575 sizeof(kr.nexthop)); 2576 if (imsg_compose(ibuf_main, type, rd->rtableid, 0, -1, 2577 &kr, sizeof(kr)) == -1) 2578 fatal("imsg_compose error"); 2579 } 2580 break; 2581 default: 2582 if (imsg_compose(ibuf_main, type, ribs[ribid].rtableid, 0, -1, 2583 &kr, sizeof(kr)) == -1) 2584 fatal("imsg_compose error"); 2585 break; 2586 } 2587 } 2588 2589 /* 2590 * pf table specific functions 2591 */ 2592 void 2593 rde_send_pftable(u_int16_t id, struct bgpd_addr *addr, 2594 u_int8_t len, int del) 2595 { 2596 struct pftable_msg pfm; 2597 2598 if (id == 0) 2599 return; 2600 2601 /* do not run while cleaning up */ 2602 if (rde_quit) 2603 return; 2604 2605 bzero(&pfm, sizeof(pfm)); 2606 strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable)); 2607 memcpy(&pfm.addr, addr, sizeof(pfm.addr)); 2608 pfm.len = len; 2609 2610 if (imsg_compose(ibuf_main, 2611 del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD, 2612 0, 0, -1, &pfm, sizeof(pfm)) == -1) 2613 fatal("imsg_compose error"); 2614 } 2615 2616 void 2617 rde_send_pftable_commit(void) 2618 { 2619 /* do not run while cleaning up */ 2620 if (rde_quit) 2621 return; 2622 2623 if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) == 2624 -1) 2625 fatal("imsg_compose error"); 2626 } 2627 2628 /* 2629 * nexthop specific functions 2630 */ 2631 void 2632 rde_send_nexthop(struct bgpd_addr *next, int valid) 2633 { 2634 int type; 2635 2636 if (valid) 2637 type = IMSG_NEXTHOP_ADD; 2638 else 2639 type = IMSG_NEXTHOP_REMOVE; 2640 2641 if (imsg_compose(ibuf_main, type, 0, 0, -1, next, 2642 sizeof(struct bgpd_addr)) == -1) 2643 fatal("imsg_compose error"); 2644 } 2645 2646 /* 2647 * soft reconfig specific functions 2648 */ 2649 void 2650 rde_softreconfig_out(struct rib_entry *re, void *ptr) 2651 { 2652 struct prefix *p = re->active; 2653 struct pt_entry *pt; 2654 struct rde_peer *peer; 2655 struct rde_aspath *oasp, *nasp; 2656 enum filter_actions oa, na; 2657 struct bgpd_addr addr; 2658 2659 if (p == NULL) 2660 return; 2661 2662 pt = re->prefix; 2663 pt_getaddr(pt, &addr); 2664 LIST_FOREACH(peer, &peerlist, peer_l) { 2665 if (peer->conf.id == 0) 2666 continue; 2667 if (peer->ribid != re->ribid) 2668 continue; 2669 if (peer->reconf_out == 0) 2670 continue; 2671 if (up_test_update(peer, p) != 1) 2672 continue; 2673 2674 oa = rde_filter(re->ribid, &oasp, rules_l, peer, p->aspath, 2675 &addr, pt->prefixlen, p->aspath->peer, DIR_OUT); 2676 na = rde_filter(re->ribid, &nasp, newrules, peer, p->aspath, 2677 &addr, pt->prefixlen, p->aspath->peer, DIR_OUT); 2678 oasp = oasp != NULL ? oasp : p->aspath; 2679 nasp = nasp != NULL ? nasp : p->aspath; 2680 2681 if (oa == ACTION_DENY && na == ACTION_DENY) 2682 /* nothing todo */ 2683 goto done; 2684 if (oa == ACTION_DENY && na == ACTION_ALLOW) { 2685 /* send update */ 2686 up_generate(peer, nasp, &addr, pt->prefixlen); 2687 goto done; 2688 } 2689 if (oa == ACTION_ALLOW && na == ACTION_DENY) { 2690 /* send withdraw */ 2691 up_generate(peer, NULL, &addr, pt->prefixlen); 2692 goto done; 2693 } 2694 if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { 2695 if (path_compare(nasp, oasp) == 0) 2696 goto done; 2697 /* send update */ 2698 up_generate(peer, nasp, &addr, pt->prefixlen); 2699 } 2700 2701 done: 2702 if (oasp != p->aspath) 2703 path_put(oasp); 2704 if (nasp != p->aspath) 2705 path_put(nasp); 2706 } 2707 } 2708 2709 void 2710 rde_softreconfig_in(struct rib_entry *re, void *ptr) 2711 { 2712 struct prefix *p, *np; 2713 struct pt_entry *pt; 2714 struct rde_peer *peer; 2715 struct rde_aspath *asp, *oasp, *nasp; 2716 enum filter_actions oa, na; 2717 struct bgpd_addr addr; 2718 u_int16_t i; 2719 2720 pt = re->prefix; 2721 pt_getaddr(pt, &addr); 2722 for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) { 2723 np = LIST_NEXT(p, rib_l); 2724 2725 /* store aspath as prefix may change till we're done */ 2726 asp = p->aspath; 2727 peer = asp->peer; 2728 2729 /* XXX how can this happen ??? */ 2730 if (peer->reconf_in == 0) 2731 continue; 2732 2733 for (i = 1; i < rib_size; i++) { 2734 /* only active ribs need a softreconfig rerun */ 2735 if (ribs[i].state != RECONF_KEEP) 2736 continue; 2737 2738 /* check if prefix changed */ 2739 oa = rde_filter(i, &oasp, rules_l, peer, asp, &addr, 2740 pt->prefixlen, peer, DIR_IN); 2741 na = rde_filter(i, &nasp, newrules, peer, asp, &addr, 2742 pt->prefixlen, peer, DIR_IN); 2743 oasp = oasp != NULL ? oasp : asp; 2744 nasp = nasp != NULL ? nasp : asp; 2745 2746 if (oa == ACTION_DENY && na == ACTION_DENY) 2747 /* nothing todo */ 2748 goto done; 2749 if (oa == ACTION_DENY && na == ACTION_ALLOW) { 2750 /* update Local-RIB */ 2751 path_update(&ribs[i], peer, nasp, &addr, 2752 pt->prefixlen); 2753 goto done; 2754 } 2755 if (oa == ACTION_ALLOW && na == ACTION_DENY) { 2756 /* remove from Local-RIB */ 2757 prefix_remove(&ribs[i], peer, &addr, 2758 pt->prefixlen, 0); 2759 goto done; 2760 } 2761 if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { 2762 if (path_compare(nasp, oasp) == 0) 2763 goto done; 2764 /* send update */ 2765 path_update(&ribs[i], peer, nasp, &addr, 2766 pt->prefixlen); 2767 } 2768 2769 done: 2770 if (oasp != asp) 2771 path_put(oasp); 2772 if (nasp != asp) 2773 path_put(nasp); 2774 } 2775 } 2776 } 2777 2778 void 2779 rde_softreconfig_load(struct rib_entry *re, void *ptr) 2780 { 2781 struct rib *rib = ptr; 2782 struct prefix *p, *np; 2783 struct pt_entry *pt; 2784 struct rde_peer *peer; 2785 struct rde_aspath *asp, *nasp; 2786 enum filter_actions action; 2787 struct bgpd_addr addr; 2788 2789 pt = re->prefix; 2790 pt_getaddr(pt, &addr); 2791 for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) { 2792 np = LIST_NEXT(p, rib_l); 2793 2794 /* store aspath as prefix may change till we're done */ 2795 asp = p->aspath; 2796 peer = asp->peer; 2797 2798 action = rde_filter(rib->id, &nasp, newrules, peer, asp, &addr, 2799 pt->prefixlen, peer, DIR_IN); 2800 nasp = nasp != NULL ? nasp : asp; 2801 2802 if (action == ACTION_ALLOW) { 2803 /* update Local-RIB */ 2804 path_update(rib, peer, nasp, &addr, pt->prefixlen); 2805 } 2806 2807 if (nasp != asp) 2808 path_put(nasp); 2809 } 2810 } 2811 2812 void 2813 rde_softreconfig_load_peer(struct rib_entry *re, void *ptr) 2814 { 2815 struct rde_peer *peer = ptr; 2816 struct prefix *p = re->active; 2817 struct pt_entry *pt; 2818 struct rde_aspath *nasp; 2819 enum filter_actions na; 2820 struct bgpd_addr addr; 2821 2822 pt = re->prefix; 2823 pt_getaddr(pt, &addr); 2824 2825 /* check if prefix was announced */ 2826 if (up_test_update(peer, p) != 1) 2827 return; 2828 2829 na = rde_filter(re->ribid, &nasp, newrules, peer, p->aspath, 2830 &addr, pt->prefixlen, p->aspath->peer, DIR_OUT); 2831 nasp = nasp != NULL ? nasp : p->aspath; 2832 2833 if (na == ACTION_DENY) 2834 /* nothing todo */ 2835 goto done; 2836 2837 /* send update */ 2838 up_generate(peer, nasp, &addr, pt->prefixlen); 2839 done: 2840 if (nasp != p->aspath) 2841 path_put(nasp); 2842 } 2843 2844 void 2845 rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr) 2846 { 2847 struct rde_peer *peer = ptr; 2848 struct prefix *p = re->active; 2849 struct pt_entry *pt; 2850 struct rde_aspath *oasp; 2851 enum filter_actions oa; 2852 struct bgpd_addr addr; 2853 2854 pt = re->prefix; 2855 pt_getaddr(pt, &addr); 2856 2857 /* check if prefix was announced */ 2858 if (up_test_update(peer, p) != 1) 2859 return; 2860 2861 oa = rde_filter(re->ribid, &oasp, rules_l, peer, p->aspath, 2862 &addr, pt->prefixlen, p->aspath->peer, DIR_OUT); 2863 oasp = oasp != NULL ? oasp : p->aspath; 2864 2865 if (oa == ACTION_DENY) 2866 /* nothing todo */ 2867 goto done; 2868 2869 /* send withdraw */ 2870 up_generate(peer, NULL, &addr, pt->prefixlen); 2871 done: 2872 if (oasp != p->aspath) 2873 path_put(oasp); 2874 } 2875 2876 /* 2877 * update specific functions 2878 */ 2879 u_char queue_buf[4096]; 2880 2881 void 2882 rde_up_dump_upcall(struct rib_entry *re, void *ptr) 2883 { 2884 struct rde_peer *peer = ptr; 2885 2886 if (re->ribid != peer->ribid) 2887 fatalx("King Bula: monstrous evil horror."); 2888 if (re->active == NULL) 2889 return; 2890 up_generate_updates(rules_l, peer, re->active, NULL); 2891 } 2892 2893 void 2894 rde_generate_updates(u_int16_t ribid, struct prefix *new, struct prefix *old) 2895 { 2896 struct rde_peer *peer; 2897 2898 /* 2899 * If old is != NULL we know it was active and should be removed. 2900 * If new is != NULL we know it is reachable and then we should 2901 * generate an update. 2902 */ 2903 if (old == NULL && new == NULL) 2904 return; 2905 2906 LIST_FOREACH(peer, &peerlist, peer_l) { 2907 if (peer->conf.id == 0) 2908 continue; 2909 if (peer->ribid != ribid) 2910 continue; 2911 if (peer->state != PEER_UP) 2912 continue; 2913 up_generate_updates(rules_l, peer, new, old); 2914 } 2915 } 2916 2917 void 2918 rde_update_queue_runner(void) 2919 { 2920 struct rde_peer *peer; 2921 int r, sent, max = RDE_RUNNER_ROUNDS, eor = 0; 2922 u_int16_t len, wd_len, wpos; 2923 2924 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2925 do { 2926 sent = 0; 2927 LIST_FOREACH(peer, &peerlist, peer_l) { 2928 if (peer->conf.id == 0) 2929 continue; 2930 if (peer->state != PEER_UP) 2931 continue; 2932 /* first withdraws */ 2933 wpos = 2; /* reserve space for the length field */ 2934 r = up_dump_prefix(queue_buf + wpos, len - wpos - 2, 2935 &peer->withdraws[AID_INET], peer); 2936 wd_len = r; 2937 /* write withdraws length filed */ 2938 wd_len = htons(wd_len); 2939 memcpy(queue_buf, &wd_len, 2); 2940 wpos += r; 2941 2942 /* now bgp path attributes */ 2943 r = up_dump_attrnlri(queue_buf + wpos, len - wpos, 2944 peer); 2945 switch (r) { 2946 case -1: 2947 eor = 1; 2948 if (wd_len == 0) { 2949 /* no withdraws queued just send EoR */ 2950 peer_send_eor(peer, AID_INET); 2951 continue; 2952 } 2953 break; 2954 case 2: 2955 if (wd_len == 0) { 2956 /* 2957 * No packet to send. No withdraws and 2958 * no path attributes. Skip. 2959 */ 2960 continue; 2961 } 2962 /* FALLTHROUGH */ 2963 default: 2964 wpos += r; 2965 break; 2966 } 2967 2968 /* finally send message to SE */ 2969 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 2970 0, -1, queue_buf, wpos) == -1) 2971 fatal("imsg_compose error"); 2972 sent++; 2973 if (eor) { 2974 eor = 0; 2975 peer_send_eor(peer, AID_INET); 2976 } 2977 } 2978 max -= sent; 2979 } while (sent != 0 && max > 0); 2980 } 2981 2982 void 2983 rde_update6_queue_runner(u_int8_t aid) 2984 { 2985 struct rde_peer *peer; 2986 u_char *b; 2987 int r, sent, max = RDE_RUNNER_ROUNDS / 2; 2988 u_int16_t len; 2989 2990 /* first withdraws ... */ 2991 do { 2992 sent = 0; 2993 LIST_FOREACH(peer, &peerlist, peer_l) { 2994 if (peer->conf.id == 0) 2995 continue; 2996 if (peer->state != PEER_UP) 2997 continue; 2998 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2999 b = up_dump_mp_unreach(queue_buf, &len, peer, aid); 3000 3001 if (b == NULL) 3002 continue; 3003 /* finally send message to SE */ 3004 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3005 0, -1, b, len) == -1) 3006 fatal("imsg_compose error"); 3007 sent++; 3008 } 3009 max -= sent; 3010 } while (sent != 0 && max > 0); 3011 3012 /* ... then updates */ 3013 max = RDE_RUNNER_ROUNDS / 2; 3014 do { 3015 sent = 0; 3016 LIST_FOREACH(peer, &peerlist, peer_l) { 3017 if (peer->conf.id == 0) 3018 continue; 3019 if (peer->state != PEER_UP) 3020 continue; 3021 len = sizeof(queue_buf) - MSGSIZE_HEADER; 3022 r = up_dump_mp_reach(queue_buf, &len, peer, aid); 3023 switch (r) { 3024 case -2: 3025 continue; 3026 case -1: 3027 peer_send_eor(peer, aid); 3028 continue; 3029 default: 3030 b = queue_buf + r; 3031 break; 3032 } 3033 3034 /* finally send message to SE */ 3035 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3036 0, -1, b, len) == -1) 3037 fatal("imsg_compose error"); 3038 sent++; 3039 } 3040 max -= sent; 3041 } while (sent != 0 && max > 0); 3042 } 3043 3044 /* 3045 * generic helper function 3046 */ 3047 u_int32_t 3048 rde_local_as(void) 3049 { 3050 return (conf->as); 3051 } 3052 3053 int 3054 rde_noevaluate(void) 3055 { 3056 /* do not run while cleaning up */ 3057 if (rde_quit) 3058 return (1); 3059 3060 return (conf->flags & BGPD_FLAG_NO_EVALUATE); 3061 } 3062 3063 int 3064 rde_decisionflags(void) 3065 { 3066 return (conf->flags & BGPD_FLAG_DECISION_MASK); 3067 } 3068 3069 int 3070 rde_as4byte(struct rde_peer *peer) 3071 { 3072 return (peer->capa.as4byte); 3073 } 3074 3075 /* 3076 * peer functions 3077 */ 3078 struct peer_table { 3079 struct rde_peer_head *peer_hashtbl; 3080 u_int32_t peer_hashmask; 3081 } peertable; 3082 3083 #define PEER_HASH(x) \ 3084 &peertable.peer_hashtbl[(x) & peertable.peer_hashmask] 3085 3086 void 3087 peer_init(u_int32_t hashsize) 3088 { 3089 struct peer_config pc; 3090 u_int32_t hs, i; 3091 3092 for (hs = 1; hs < hashsize; hs <<= 1) 3093 ; 3094 peertable.peer_hashtbl = calloc(hs, sizeof(struct rde_peer_head)); 3095 if (peertable.peer_hashtbl == NULL) 3096 fatal("peer_init"); 3097 3098 for (i = 0; i < hs; i++) 3099 LIST_INIT(&peertable.peer_hashtbl[i]); 3100 LIST_INIT(&peerlist); 3101 3102 peertable.peer_hashmask = hs - 1; 3103 3104 bzero(&pc, sizeof(pc)); 3105 snprintf(pc.descr, sizeof(pc.descr), "LOCAL"); 3106 3107 peerself = peer_add(0, &pc); 3108 if (peerself == NULL) 3109 fatalx("peer_init add self"); 3110 3111 peerself->state = PEER_UP; 3112 } 3113 3114 void 3115 peer_shutdown(void) 3116 { 3117 u_int32_t i; 3118 3119 for (i = 0; i <= peertable.peer_hashmask; i++) 3120 if (!LIST_EMPTY(&peertable.peer_hashtbl[i])) 3121 log_warnx("peer_free: free non-free table"); 3122 3123 free(peertable.peer_hashtbl); 3124 } 3125 3126 struct rde_peer * 3127 peer_get(u_int32_t id) 3128 { 3129 struct rde_peer_head *head; 3130 struct rde_peer *peer; 3131 3132 head = PEER_HASH(id); 3133 3134 LIST_FOREACH(peer, head, hash_l) { 3135 if (peer->conf.id == id) 3136 return (peer); 3137 } 3138 return (NULL); 3139 } 3140 3141 struct rde_peer * 3142 peer_add(u_int32_t id, struct peer_config *p_conf) 3143 { 3144 struct rde_peer_head *head; 3145 struct rde_peer *peer; 3146 3147 if (peer_get(id)) 3148 return (NULL); 3149 3150 peer = calloc(1, sizeof(struct rde_peer)); 3151 if (peer == NULL) 3152 fatal("peer_add"); 3153 3154 LIST_INIT(&peer->path_h); 3155 memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); 3156 peer->remote_bgpid = 0; 3157 peer->ribid = rib_find(peer->conf.rib); 3158 peer->state = PEER_NONE; 3159 up_init(peer); 3160 3161 head = PEER_HASH(id); 3162 3163 LIST_INSERT_HEAD(head, peer, hash_l); 3164 LIST_INSERT_HEAD(&peerlist, peer, peer_l); 3165 3166 return (peer); 3167 } 3168 3169 void 3170 peer_localaddrs(struct rde_peer *peer, struct bgpd_addr *laddr) 3171 { 3172 struct ifaddrs *ifap, *ifa, *match; 3173 3174 if (getifaddrs(&ifap) == -1) 3175 fatal("getifaddrs"); 3176 3177 for (match = ifap; match != NULL; match = match->ifa_next) 3178 if (sa_cmp(laddr, match->ifa_addr) == 0) 3179 break; 3180 3181 if (match == NULL) 3182 fatalx("peer_localaddrs: local address not found"); 3183 3184 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 3185 if (ifa->ifa_addr->sa_family == AF_INET && 3186 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 3187 if (ifa->ifa_addr->sa_family == 3188 match->ifa_addr->sa_family) 3189 ifa = match; 3190 sa2addr(ifa->ifa_addr, &peer->local_v4_addr); 3191 break; 3192 } 3193 } 3194 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 3195 if (ifa->ifa_addr->sa_family == AF_INET6 && 3196 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 3197 /* 3198 * only accept global scope addresses except explicitly 3199 * specified. 3200 */ 3201 if (ifa->ifa_addr->sa_family == 3202 match->ifa_addr->sa_family) 3203 ifa = match; 3204 else if (IN6_IS_ADDR_LINKLOCAL( 3205 &((struct sockaddr_in6 *)ifa-> 3206 ifa_addr)->sin6_addr) || 3207 IN6_IS_ADDR_SITELOCAL( 3208 &((struct sockaddr_in6 *)ifa-> 3209 ifa_addr)->sin6_addr)) 3210 continue; 3211 sa2addr(ifa->ifa_addr, &peer->local_v6_addr); 3212 break; 3213 } 3214 } 3215 3216 freeifaddrs(ifap); 3217 } 3218 3219 void 3220 peer_up(u_int32_t id, struct session_up *sup) 3221 { 3222 struct rde_peer *peer; 3223 u_int8_t i; 3224 3225 peer = peer_get(id); 3226 if (peer == NULL) { 3227 log_warnx("peer_up: unknown peer id %d", id); 3228 return; 3229 } 3230 3231 if (peer->state != PEER_DOWN && peer->state != PEER_NONE && 3232 peer->state != PEER_UP) 3233 fatalx("peer_up: bad state"); 3234 peer->remote_bgpid = ntohl(sup->remote_bgpid); 3235 peer->short_as = sup->short_as; 3236 memcpy(&peer->remote_addr, &sup->remote_addr, 3237 sizeof(peer->remote_addr)); 3238 memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)); 3239 3240 peer_localaddrs(peer, &sup->local_addr); 3241 3242 peer->state = PEER_UP; 3243 up_init(peer); 3244 3245 if (rde_noevaluate()) 3246 /* 3247 * no need to dump the table to the peer, there are no active 3248 * prefixes anyway. This is a speed up hack. 3249 */ 3250 return; 3251 3252 for (i = 0; i < AID_MAX; i++) { 3253 if (peer->capa.mp[i]) 3254 peer_dump(id, i); 3255 } 3256 } 3257 3258 void 3259 peer_down(u_int32_t id) 3260 { 3261 struct rde_peer *peer; 3262 struct rde_aspath *asp, *nasp; 3263 3264 peer = peer_get(id); 3265 if (peer == NULL) { 3266 log_warnx("peer_down: unknown peer id %d", id); 3267 return; 3268 } 3269 peer->remote_bgpid = 0; 3270 peer->state = PEER_DOWN; 3271 up_down(peer); 3272 3273 /* walk through per peer RIB list and remove all prefixes. */ 3274 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { 3275 nasp = LIST_NEXT(asp, peer_l); 3276 path_remove(asp); 3277 } 3278 LIST_INIT(&peer->path_h); 3279 peer->prefix_cnt = 0; 3280 3281 /* Deletions are performed in path_remove() */ 3282 rde_send_pftable_commit(); 3283 3284 LIST_REMOVE(peer, hash_l); 3285 LIST_REMOVE(peer, peer_l); 3286 free(peer); 3287 } 3288 3289 /* 3290 * Flush all routes older then staletime. If staletime is 0 all routes will 3291 * be flushed. 3292 */ 3293 void 3294 peer_flush(struct rde_peer *peer, u_int8_t aid) 3295 { 3296 struct rde_aspath *asp, *nasp; 3297 3298 /* walk through per peer RIB list and remove all stale prefixes. */ 3299 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { 3300 nasp = LIST_NEXT(asp, peer_l); 3301 path_remove_stale(asp, aid); 3302 } 3303 3304 /* Deletions are performed in path_remove() */ 3305 rde_send_pftable_commit(); 3306 3307 /* flushed no need to keep staletime */ 3308 peer->staletime[aid] = 0; 3309 } 3310 3311 void 3312 peer_stale(u_int32_t id, u_int8_t aid) 3313 { 3314 struct rde_peer *peer; 3315 time_t now; 3316 3317 peer = peer_get(id); 3318 if (peer == NULL) { 3319 log_warnx("peer_stale: unknown peer id %d", id); 3320 return; 3321 } 3322 3323 if (peer->staletime[aid]) 3324 peer_flush(peer, aid); 3325 peer->staletime[aid] = now = time(NULL); 3326 3327 /* make sure new prefixes start on a higher timestamp */ 3328 do { 3329 sleep(1); 3330 } while (now >= time(NULL)); 3331 } 3332 3333 void 3334 peer_dump(u_int32_t id, u_int8_t aid) 3335 { 3336 struct rde_peer *peer; 3337 3338 peer = peer_get(id); 3339 if (peer == NULL) { 3340 log_warnx("peer_dump: unknown peer id %d", id); 3341 return; 3342 } 3343 3344 if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) 3345 up_generate_default(rules_l, peer, aid); 3346 else 3347 rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid); 3348 if (peer->capa.grestart.restart) 3349 up_generate_marker(peer, aid); 3350 } 3351 3352 /* End-of-RIB marker, RFC 4724 */ 3353 void 3354 peer_recv_eor(struct rde_peer *peer, u_int8_t aid) 3355 { 3356 peer->prefix_rcvd_eor++; 3357 3358 /* First notify SE to remove possible race with the timeout. */ 3359 if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, 3360 0, -1, &aid, sizeof(aid)) == -1) 3361 fatal("imsg_compose error"); 3362 } 3363 3364 void 3365 peer_send_eor(struct rde_peer *peer, u_int8_t aid) 3366 { 3367 u_int16_t afi; 3368 u_int8_t safi; 3369 3370 peer->prefix_sent_eor++; 3371 3372 if (aid == AID_INET) { 3373 u_char null[4]; 3374 3375 bzero(&null, 4); 3376 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3377 0, -1, &null, 4) == -1) 3378 fatal("imsg_compose error in peer_send_eor"); 3379 } else { 3380 u_int16_t i; 3381 u_char buf[10]; 3382 3383 if (aid2afi(aid, &afi, &safi) == -1) 3384 fatalx("peer_send_eor: bad AID"); 3385 3386 i = 0; /* v4 withdrawn len */ 3387 bcopy(&i, &buf[0], sizeof(i)); 3388 i = htons(6); /* path attr len */ 3389 bcopy(&i, &buf[2], sizeof(i)); 3390 buf[4] = ATTR_OPTIONAL; 3391 buf[5] = ATTR_MP_UNREACH_NLRI; 3392 buf[6] = 3; /* withdrawn len */ 3393 i = htons(afi); 3394 bcopy(&i, &buf[7], sizeof(i)); 3395 buf[9] = safi; 3396 3397 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3398 0, -1, &buf, 10) == -1) 3399 fatal("imsg_compose error in peer_send_eor"); 3400 } 3401 } 3402 3403 /* 3404 * network announcement stuff 3405 */ 3406 void 3407 network_add(struct network_config *nc, int flagstatic) 3408 { 3409 struct rdomain *rd; 3410 struct rde_aspath *asp; 3411 struct filter_set_head *vpnset = NULL; 3412 in_addr_t prefix4; 3413 u_int16_t i; 3414 3415 if (nc->rtableid) { 3416 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 3417 if (rd->rtableid != nc->rtableid) 3418 continue; 3419 switch (nc->prefix.aid) { 3420 case AID_INET: 3421 prefix4 = nc->prefix.v4.s_addr; 3422 bzero(&nc->prefix, sizeof(nc->prefix)); 3423 nc->prefix.aid = AID_VPN_IPv4; 3424 nc->prefix.vpn4.rd = rd->rd; 3425 nc->prefix.vpn4.addr.s_addr = prefix4; 3426 nc->prefix.vpn4.labellen = 3; 3427 nc->prefix.vpn4.labelstack[0] = 3428 (rd->label >> 12) & 0xff; 3429 nc->prefix.vpn4.labelstack[1] = 3430 (rd->label >> 4) & 0xff; 3431 nc->prefix.vpn4.labelstack[2] = 3432 (rd->label << 4) & 0xf0; 3433 nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; 3434 vpnset = &rd->export; 3435 break; 3436 default: 3437 log_warnx("unable to VPNize prefix"); 3438 filterset_free(&nc->attrset); 3439 return; 3440 } 3441 } 3442 } 3443 3444 if (nc->type == NETWORK_MRTCLONE) { 3445 asp = nc->asp; 3446 } else { 3447 asp = path_get(); 3448 asp->aspath = aspath_get(NULL, 0); 3449 asp->origin = ORIGIN_IGP; 3450 asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH | 3451 F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED; 3452 /* the nexthop is unset unless a default set overrides it */ 3453 } 3454 if (!flagstatic) 3455 asp->flags |= F_ANN_DYNAMIC; 3456 rde_apply_set(asp, &nc->attrset, nc->prefix.aid, peerself, peerself); 3457 if (vpnset) 3458 rde_apply_set(asp, vpnset, nc->prefix.aid, peerself, peerself); 3459 for (i = 1; i < rib_size; i++) 3460 path_update(&ribs[i], peerself, asp, &nc->prefix, 3461 nc->prefixlen); 3462 path_put(asp); 3463 filterset_free(&nc->attrset); 3464 } 3465 3466 void 3467 network_delete(struct network_config *nc, int flagstatic) 3468 { 3469 struct rdomain *rd; 3470 in_addr_t prefix4; 3471 u_int32_t flags = F_PREFIX_ANNOUNCED; 3472 u_int32_t i; 3473 3474 if (!flagstatic) 3475 flags |= F_ANN_DYNAMIC; 3476 3477 if (nc->rtableid) { 3478 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 3479 if (rd->rtableid != nc->rtableid) 3480 continue; 3481 switch (nc->prefix.aid) { 3482 case AID_INET: 3483 prefix4 = nc->prefix.v4.s_addr; 3484 bzero(&nc->prefix, sizeof(nc->prefix)); 3485 nc->prefix.aid = AID_VPN_IPv4; 3486 nc->prefix.vpn4.rd = rd->rd; 3487 nc->prefix.vpn4.addr.s_addr = prefix4; 3488 nc->prefix.vpn4.labellen = 3; 3489 nc->prefix.vpn4.labelstack[0] = 3490 (rd->label >> 12) & 0xff; 3491 nc->prefix.vpn4.labelstack[1] = 3492 (rd->label >> 4) & 0xff; 3493 nc->prefix.vpn4.labelstack[2] = 3494 (rd->label << 4) & 0xf0; 3495 nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; 3496 break; 3497 default: 3498 log_warnx("unable to VPNize prefix"); 3499 return; 3500 } 3501 } 3502 } 3503 3504 for (i = rib_size - 1; i > 0; i--) 3505 prefix_remove(&ribs[i], peerself, &nc->prefix, nc->prefixlen, 3506 flags); 3507 } 3508 3509 void 3510 network_dump_upcall(struct rib_entry *re, void *ptr) 3511 { 3512 struct prefix *p; 3513 struct kroute_full k; 3514 struct bgpd_addr addr; 3515 struct rde_dump_ctx *ctx = ptr; 3516 3517 LIST_FOREACH(p, &re->prefix_h, rib_l) { 3518 if (!(p->aspath->flags & F_PREFIX_ANNOUNCED)) 3519 continue; 3520 pt_getaddr(p->prefix, &addr); 3521 3522 bzero(&k, sizeof(k)); 3523 memcpy(&k.prefix, &addr, sizeof(k.prefix)); 3524 if (p->aspath->nexthop == NULL || 3525 p->aspath->nexthop->state != NEXTHOP_REACH) 3526 k.nexthop.aid = k.prefix.aid; 3527 else 3528 memcpy(&k.nexthop, &p->aspath->nexthop->true_nexthop, 3529 sizeof(k.nexthop)); 3530 k.prefixlen = p->prefix->prefixlen; 3531 k.flags = F_KERNEL; 3532 if ((p->aspath->flags & F_ANN_DYNAMIC) == 0) 3533 k.flags = F_STATIC; 3534 if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0, 3535 ctx->req.pid, -1, &k, sizeof(k)) == -1) 3536 log_warnx("network_dump_upcall: " 3537 "imsg_compose error"); 3538 } 3539 } 3540 3541 /* clean up */ 3542 void 3543 rde_shutdown(void) 3544 { 3545 struct rde_peer *p; 3546 struct filter_rule *r; 3547 u_int32_t i; 3548 3549 /* 3550 * the decision process is turned off if rde_quit = 1 and 3551 * rde_shutdown depends on this. 3552 */ 3553 3554 /* 3555 * All peers go down 3556 */ 3557 for (i = 0; i <= peertable.peer_hashmask; i++) 3558 while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL) 3559 peer_down(p->conf.id); 3560 3561 /* free filters */ 3562 while ((r = TAILQ_FIRST(rules_l)) != NULL) { 3563 TAILQ_REMOVE(rules_l, r, entry); 3564 filterset_free(&r->set); 3565 free(r); 3566 } 3567 free(rules_l); 3568 3569 nexthop_shutdown(); 3570 path_shutdown(); 3571 aspath_shutdown(); 3572 attr_shutdown(); 3573 pt_shutdown(); 3574 peer_shutdown(); 3575 } 3576 3577 int 3578 sa_cmp(struct bgpd_addr *a, struct sockaddr *b) 3579 { 3580 struct sockaddr_in *in_b; 3581 struct sockaddr_in6 *in6_b; 3582 3583 if (aid2af(a->aid) != b->sa_family) 3584 return (1); 3585 3586 switch (b->sa_family) { 3587 case AF_INET: 3588 in_b = (struct sockaddr_in *)b; 3589 if (a->v4.s_addr != in_b->sin_addr.s_addr) 3590 return (1); 3591 break; 3592 case AF_INET6: 3593 in6_b = (struct sockaddr_in6 *)b; 3594 #ifdef __KAME__ 3595 /* directly stolen from sbin/ifconfig/ifconfig.c */ 3596 if (IN6_IS_ADDR_LINKLOCAL(&in6_b->sin6_addr)) { 3597 in6_b->sin6_scope_id = 3598 ntohs(*(u_int16_t *)&in6_b->sin6_addr.s6_addr[2]); 3599 in6_b->sin6_addr.s6_addr[2] = 3600 in6_b->sin6_addr.s6_addr[3] = 0; 3601 } 3602 #endif 3603 if (bcmp(&a->v6, &in6_b->sin6_addr, 3604 sizeof(struct in6_addr))) 3605 return (1); 3606 break; 3607 default: 3608 fatal("king bula sez: unknown address family"); 3609 /* NOTREACHED */ 3610 } 3611 3612 return (0); 3613 } 3614