1 /* $OpenBSD: rde.c,v 1.352 2016/10/18 19:47:52 benno Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2016 Job Snijders <job@instituut.net> 6 * Copyright (c) 2016 Peter Hessler <phessler@openbsd.org> 7 * 8 * Permission to use, copy, modify, and distribute this software for any 9 * purpose with or without fee is hereby granted, provided that the above 10 * copyright notice and this permission notice appear in all copies. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 */ 20 21 #include <sys/types.h> 22 #include <sys/socket.h> 23 #include <sys/time.h> 24 #include <sys/resource.h> 25 26 #include <errno.h> 27 #include <ifaddrs.h> 28 #include <pwd.h> 29 #include <poll.h> 30 #include <signal.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <unistd.h> 35 #include <err.h> 36 37 #include "bgpd.h" 38 #include "mrt.h" 39 #include "rde.h" 40 #include "session.h" 41 42 #define PFD_PIPE_MAIN 0 43 #define PFD_PIPE_SESSION 1 44 #define PFD_PIPE_SESSION_CTL 2 45 #define PFD_PIPE_COUNT 3 46 47 void rde_sighdlr(int); 48 void rde_dispatch_imsg_session(struct imsgbuf *); 49 void rde_dispatch_imsg_parent(struct imsgbuf *); 50 int rde_update_dispatch(struct imsg *); 51 void rde_update_update(struct rde_peer *, struct rde_aspath *, 52 struct bgpd_addr *, u_int8_t); 53 void rde_update_withdraw(struct rde_peer *, struct bgpd_addr *, 54 u_int8_t); 55 int rde_attr_parse(u_char *, u_int16_t, struct rde_peer *, 56 struct rde_aspath *, struct mpattr *); 57 int rde_attr_add(struct rde_aspath *, u_char *, u_int16_t); 58 u_int8_t rde_attr_missing(struct rde_aspath *, int, u_int16_t); 59 int rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t, 60 struct rde_aspath *); 61 int rde_update_extract_prefix(u_char *, u_int16_t, void *, 62 u_int8_t, u_int8_t); 63 int rde_update_get_prefix(u_char *, u_int16_t, struct bgpd_addr *, 64 u_int8_t *); 65 int rde_update_get_prefix6(u_char *, u_int16_t, struct bgpd_addr *, 66 u_int8_t *); 67 int rde_update_get_vpn4(u_char *, u_int16_t, struct bgpd_addr *, 68 u_int8_t *); 69 void rde_update_err(struct rde_peer *, u_int8_t , u_int8_t, 70 void *, u_int16_t); 71 void rde_update_log(const char *, u_int16_t, 72 const struct rde_peer *, const struct bgpd_addr *, 73 const struct bgpd_addr *, u_int8_t); 74 void rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *); 75 void rde_reflector(struct rde_peer *, struct rde_aspath *); 76 77 void rde_dump_rib_as(struct prefix *, struct rde_aspath *,pid_t, 78 int); 79 void rde_dump_filter(struct prefix *, 80 struct ctl_show_rib_request *); 81 void rde_dump_filterout(struct rde_peer *, struct prefix *, 82 struct ctl_show_rib_request *); 83 void rde_dump_upcall(struct rib_entry *, void *); 84 void rde_dump_prefix_upcall(struct rib_entry *, void *); 85 void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t, 86 enum imsg_type); 87 void rde_dump_mrt_new(struct mrt *, pid_t, int); 88 void rde_dump_done(void *); 89 90 int rde_rdomain_import(struct rde_aspath *, struct rdomain *); 91 void rde_reload_done(void); 92 void rde_softreconfig_out(struct rib_entry *, void *); 93 void rde_softreconfig_in(struct rib_entry *, void *); 94 void rde_softreconfig_unload_peer(struct rib_entry *, void *); 95 void rde_up_dump_upcall(struct rib_entry *, void *); 96 void rde_update_queue_runner(void); 97 void rde_update6_queue_runner(u_int8_t); 98 99 void peer_init(u_int32_t); 100 void peer_shutdown(void); 101 int peer_localaddrs(struct rde_peer *, struct bgpd_addr *); 102 struct rde_peer *peer_add(u_int32_t, struct peer_config *); 103 struct rde_peer *peer_get(u_int32_t); 104 void peer_up(u_int32_t, struct session_up *); 105 void peer_down(u_int32_t); 106 void peer_flush(struct rde_peer *, u_int8_t); 107 void peer_stale(u_int32_t, u_int8_t); 108 void peer_recv_eor(struct rde_peer *, u_int8_t); 109 void peer_dump(u_int32_t, u_int8_t); 110 void peer_send_eor(struct rde_peer *, u_int8_t); 111 112 void network_add(struct network_config *, int); 113 void network_delete(struct network_config *, int); 114 void network_dump_upcall(struct rib_entry *, void *); 115 116 void rde_shutdown(void); 117 int sa_cmp(struct bgpd_addr *, struct sockaddr *); 118 119 volatile sig_atomic_t rde_quit = 0; 120 struct bgpd_config *conf, *nconf; 121 time_t reloadtime; 122 struct rde_peer_head peerlist; 123 struct rde_peer *peerself; 124 struct filter_head *out_rules, *out_rules_tmp; 125 struct rdomain_head *rdomains_l, *newdomains; 126 struct imsgbuf *ibuf_se; 127 struct imsgbuf *ibuf_se_ctl; 128 struct imsgbuf *ibuf_main; 129 struct rde_memstats rdemem; 130 131 struct rde_dump_ctx { 132 struct rib_context ribctx; 133 struct ctl_show_rib_request req; 134 sa_family_t af; 135 }; 136 137 struct rde_mrt_ctx { 138 struct mrt mrt; 139 struct rib_context ribctx; 140 LIST_ENTRY(rde_mrt_ctx) entry; 141 }; 142 143 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts); 144 u_int rde_mrt_cnt; 145 146 void 147 rde_sighdlr(int sig) 148 { 149 switch (sig) { 150 case SIGINT: 151 case SIGTERM: 152 rde_quit = 1; 153 break; 154 } 155 } 156 157 u_int32_t peerhashsize = 64; 158 u_int32_t pathhashsize = 1024; 159 u_int32_t attrhashsize = 512; 160 u_int32_t nexthophashsize = 64; 161 162 void 163 rde_main(int debug, int verbose) 164 { 165 struct passwd *pw; 166 struct pollfd *pfd = NULL; 167 struct rde_mrt_ctx *mctx, *xmctx; 168 void *newp; 169 u_int pfd_elms = 0, i, j; 170 int timeout; 171 u_int8_t aid; 172 173 bgpd_process = PROC_RDE; 174 log_procname = log_procnames[bgpd_process]; 175 176 log_init(debug); 177 log_verbose(verbose); 178 179 if ((pw = getpwnam(BGPD_USER)) == NULL) 180 fatal("getpwnam"); 181 182 if (chroot(pw->pw_dir) == -1) 183 fatal("chroot"); 184 if (chdir("/") == -1) 185 fatal("chdir(\"/\")"); 186 187 setproctitle("route decision engine"); 188 189 if (setgroups(1, &pw->pw_gid) || 190 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 191 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 192 fatal("can't drop privileges"); 193 194 if (pledge("stdio route recvfd", NULL) == -1) 195 fatal("pledge"); 196 197 signal(SIGTERM, rde_sighdlr); 198 signal(SIGINT, rde_sighdlr); 199 signal(SIGPIPE, SIG_IGN); 200 signal(SIGHUP, SIG_IGN); 201 signal(SIGALRM, SIG_IGN); 202 signal(SIGUSR1, SIG_IGN); 203 204 /* initialize the RIB structures */ 205 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 206 fatal(NULL); 207 imsg_init(ibuf_main, 3); 208 209 pt_init(); 210 path_init(pathhashsize); 211 aspath_init(pathhashsize); 212 attr_init(attrhashsize); 213 nexthop_init(nexthophashsize); 214 peer_init(peerhashsize); 215 216 out_rules = calloc(1, sizeof(struct filter_head)); 217 if (out_rules == NULL) 218 fatal(NULL); 219 TAILQ_INIT(out_rules); 220 rdomains_l = calloc(1, sizeof(struct rdomain_head)); 221 if (rdomains_l == NULL) 222 fatal(NULL); 223 SIMPLEQ_INIT(rdomains_l); 224 if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) 225 fatal(NULL); 226 log_info("route decision engine ready"); 227 228 while (rde_quit == 0) { 229 if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) { 230 if ((newp = reallocarray(pfd, 231 PFD_PIPE_COUNT + rde_mrt_cnt, 232 sizeof(struct pollfd))) == NULL) { 233 /* panic for now */ 234 log_warn("could not resize pfd from %u -> %u" 235 " entries", pfd_elms, PFD_PIPE_COUNT + 236 rde_mrt_cnt); 237 fatalx("exiting"); 238 } 239 pfd = newp; 240 pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt; 241 } 242 timeout = INFTIM; 243 bzero(pfd, sizeof(struct pollfd) * pfd_elms); 244 245 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 246 set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se); 247 set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl); 248 249 if (rib_dump_pending() && 250 ibuf_se_ctl && ibuf_se_ctl->w.queued == 0) 251 timeout = 0; 252 253 i = PFD_PIPE_COUNT; 254 for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) { 255 xmctx = LIST_NEXT(mctx, entry); 256 if (mctx->mrt.wbuf.queued) { 257 pfd[i].fd = mctx->mrt.wbuf.fd; 258 pfd[i].events = POLLOUT; 259 i++; 260 } else if (mctx->mrt.state == MRT_STATE_REMOVE) { 261 close(mctx->mrt.wbuf.fd); 262 LIST_REMOVE(&mctx->ribctx, entry); 263 LIST_REMOVE(mctx, entry); 264 free(mctx); 265 rde_mrt_cnt--; 266 } 267 } 268 269 if (poll(pfd, i, timeout) == -1) { 270 if (errno != EINTR) 271 fatal("poll error"); 272 continue; 273 } 274 275 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) 276 fatalx("Lost connection to parent"); 277 else 278 rde_dispatch_imsg_parent(ibuf_main); 279 280 if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) { 281 log_warnx("RDE: Lost connection to SE"); 282 msgbuf_clear(&ibuf_se->w); 283 free(ibuf_se); 284 ibuf_se = NULL; 285 } else 286 rde_dispatch_imsg_session(ibuf_se); 287 288 if (handle_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl) == 289 -1) { 290 log_warnx("RDE: Lost connection to SE control"); 291 msgbuf_clear(&ibuf_se_ctl->w); 292 free(ibuf_se_ctl); 293 ibuf_se_ctl = NULL; 294 } else 295 rde_dispatch_imsg_session(ibuf_se_ctl); 296 297 for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts); 298 j < i && mctx != 0; j++) { 299 if (pfd[j].fd == mctx->mrt.wbuf.fd && 300 pfd[j].revents & POLLOUT) 301 mrt_write(&mctx->mrt); 302 mctx = LIST_NEXT(mctx, entry); 303 } 304 305 rde_update_queue_runner(); 306 for (aid = AID_INET6; aid < AID_MAX; aid++) 307 rde_update6_queue_runner(aid); 308 if (rib_dump_pending() && 309 ibuf_se_ctl && ibuf_se_ctl->w.queued <= 10) 310 rib_dump_runner(); 311 } 312 313 /* close pipes */ 314 if (ibuf_se) { 315 msgbuf_clear(&ibuf_se->w); 316 close(ibuf_se->fd); 317 free(ibuf_se); 318 } 319 if (ibuf_se_ctl) { 320 msgbuf_clear(&ibuf_se_ctl->w); 321 close(ibuf_se_ctl->fd); 322 free(ibuf_se_ctl); 323 } 324 msgbuf_clear(&ibuf_main->w); 325 close(ibuf_main->fd); 326 free(ibuf_main); 327 328 /* do not clean up on shutdown on production, it takes ages. */ 329 if (debug) 330 rde_shutdown(); 331 332 while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) { 333 msgbuf_clear(&mctx->mrt.wbuf); 334 close(mctx->mrt.wbuf.fd); 335 LIST_REMOVE(&mctx->ribctx, entry); 336 LIST_REMOVE(mctx, entry); 337 free(mctx); 338 } 339 340 341 log_info("route decision engine exiting"); 342 exit(0); 343 } 344 345 struct network_config netconf_s, netconf_p; 346 struct filter_set_head *session_set, *parent_set; 347 348 void 349 rde_dispatch_imsg_session(struct imsgbuf *ibuf) 350 { 351 struct imsg imsg; 352 struct peer p; 353 struct peer_config pconf; 354 struct session_up sup; 355 struct ctl_show_rib csr; 356 struct ctl_show_rib_request req; 357 struct rde_peer *peer; 358 struct rde_aspath *asp; 359 struct filter_set *s; 360 struct nexthop *nh; 361 u_int8_t *asdata; 362 ssize_t n; 363 int verbose; 364 u_int16_t len; 365 u_int8_t aid; 366 367 while (ibuf) { 368 if ((n = imsg_get(ibuf, &imsg)) == -1) 369 fatal("rde_dispatch_imsg_session: imsg_get error"); 370 if (n == 0) 371 break; 372 373 switch (imsg.hdr.type) { 374 case IMSG_UPDATE: 375 rde_update_dispatch(&imsg); 376 break; 377 case IMSG_SESSION_ADD: 378 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf)) 379 fatalx("incorrect size of session request"); 380 memcpy(&pconf, imsg.data, sizeof(pconf)); 381 peer_add(imsg.hdr.peerid, &pconf); 382 break; 383 case IMSG_SESSION_UP: 384 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup)) 385 fatalx("incorrect size of session request"); 386 memcpy(&sup, imsg.data, sizeof(sup)); 387 peer_up(imsg.hdr.peerid, &sup); 388 break; 389 case IMSG_SESSION_DOWN: 390 peer_down(imsg.hdr.peerid); 391 break; 392 case IMSG_SESSION_STALE: 393 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 394 log_warnx("rde_dispatch: wrong imsg len"); 395 break; 396 } 397 memcpy(&aid, imsg.data, sizeof(aid)); 398 if (aid >= AID_MAX) 399 fatalx("IMSG_SESSION_STALE: bad AID"); 400 peer_stale(imsg.hdr.peerid, aid); 401 break; 402 case IMSG_SESSION_FLUSH: 403 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 404 log_warnx("rde_dispatch: wrong imsg len"); 405 break; 406 } 407 memcpy(&aid, imsg.data, sizeof(aid)); 408 if (aid >= AID_MAX) 409 fatalx("IMSG_SESSION_FLUSH: bad AID"); 410 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { 411 log_warnx("rde_dispatch: unknown peer id %d", 412 imsg.hdr.peerid); 413 break; 414 } 415 peer_flush(peer, aid); 416 break; 417 case IMSG_SESSION_RESTARTED: 418 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 419 log_warnx("rde_dispatch: wrong imsg len"); 420 break; 421 } 422 memcpy(&aid, imsg.data, sizeof(aid)); 423 if (aid >= AID_MAX) 424 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 425 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { 426 log_warnx("rde_dispatch: unknown peer id %d", 427 imsg.hdr.peerid); 428 break; 429 } 430 if (peer->staletime[aid]) 431 peer_flush(peer, aid); 432 break; 433 case IMSG_REFRESH: 434 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { 435 log_warnx("rde_dispatch: wrong imsg len"); 436 break; 437 } 438 memcpy(&aid, imsg.data, sizeof(aid)); 439 if (aid >= AID_MAX) 440 fatalx("IMSG_REFRESH: bad AID"); 441 peer_dump(imsg.hdr.peerid, aid); 442 break; 443 case IMSG_NETWORK_ADD: 444 if (imsg.hdr.len - IMSG_HEADER_SIZE != 445 sizeof(struct network_config)) { 446 log_warnx("rde_dispatch: wrong imsg len"); 447 break; 448 } 449 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 450 TAILQ_INIT(&netconf_s.attrset); 451 session_set = &netconf_s.attrset; 452 break; 453 case IMSG_NETWORK_ASPATH: 454 if (imsg.hdr.len - IMSG_HEADER_SIZE < 455 sizeof(struct ctl_show_rib)) { 456 log_warnx("rde_dispatch: wrong imsg len"); 457 bzero(&netconf_s, sizeof(netconf_s)); 458 break; 459 } 460 asdata = imsg.data; 461 asdata += sizeof(struct ctl_show_rib); 462 memcpy(&csr, imsg.data, sizeof(csr)); 463 if (csr.aspath_len + sizeof(csr) > imsg.hdr.len - 464 IMSG_HEADER_SIZE) { 465 log_warnx("rde_dispatch: wrong aspath len"); 466 bzero(&netconf_s, sizeof(netconf_s)); 467 break; 468 } 469 asp = path_get(); 470 asp->lpref = csr.local_pref; 471 asp->med = csr.med; 472 asp->weight = csr.weight; 473 asp->flags = csr.flags; 474 asp->origin = csr.origin; 475 asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC; 476 asp->aspath = aspath_get(asdata, csr.aspath_len); 477 netconf_s.asp = asp; 478 break; 479 case IMSG_NETWORK_ATTR: 480 if (imsg.hdr.len <= IMSG_HEADER_SIZE) { 481 log_warnx("rde_dispatch: wrong imsg len"); 482 break; 483 } 484 /* parse path attributes */ 485 len = imsg.hdr.len - IMSG_HEADER_SIZE; 486 asp = netconf_s.asp; 487 if (rde_attr_add(asp, imsg.data, len) == -1) { 488 log_warnx("rde_dispatch: bad network " 489 "attribute"); 490 path_put(asp); 491 bzero(&netconf_s, sizeof(netconf_s)); 492 break; 493 } 494 break; 495 case IMSG_NETWORK_DONE: 496 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 497 log_warnx("rde_dispatch: wrong imsg len"); 498 break; 499 } 500 session_set = NULL; 501 switch (netconf_s.prefix.aid) { 502 case AID_INET: 503 if (netconf_s.prefixlen > 32) 504 goto badnet; 505 network_add(&netconf_s, 0); 506 break; 507 case AID_INET6: 508 if (netconf_s.prefixlen > 128) 509 goto badnet; 510 network_add(&netconf_s, 0); 511 break; 512 case 0: 513 /* something failed beforehands */ 514 break; 515 default: 516 badnet: 517 log_warnx("rde_dispatch: bad network"); 518 break; 519 } 520 break; 521 case IMSG_NETWORK_REMOVE: 522 if (imsg.hdr.len - IMSG_HEADER_SIZE != 523 sizeof(struct network_config)) { 524 log_warnx("rde_dispatch: wrong imsg len"); 525 break; 526 } 527 memcpy(&netconf_s, imsg.data, sizeof(netconf_s)); 528 TAILQ_INIT(&netconf_s.attrset); 529 network_delete(&netconf_s, 0); 530 break; 531 case IMSG_NETWORK_FLUSH: 532 if (imsg.hdr.len != IMSG_HEADER_SIZE) { 533 log_warnx("rde_dispatch: wrong imsg len"); 534 break; 535 } 536 prefix_network_clean(peerself, time(NULL), 537 F_ANN_DYNAMIC); 538 break; 539 case IMSG_FILTER_SET: 540 if (imsg.hdr.len - IMSG_HEADER_SIZE != 541 sizeof(struct filter_set)) { 542 log_warnx("rde_dispatch: wrong imsg len"); 543 break; 544 } 545 if (session_set == NULL) { 546 log_warnx("rde_dispatch: " 547 "IMSG_FILTER_SET unexpected"); 548 break; 549 } 550 if ((s = malloc(sizeof(struct filter_set))) == NULL) 551 fatal(NULL); 552 memcpy(s, imsg.data, sizeof(struct filter_set)); 553 TAILQ_INSERT_TAIL(session_set, s, entry); 554 555 if (s->type == ACTION_SET_NEXTHOP) { 556 nh = nexthop_get(&s->action.nexthop); 557 nh->refcnt++; 558 } 559 break; 560 case IMSG_CTL_SHOW_NETWORK: 561 case IMSG_CTL_SHOW_RIB: 562 case IMSG_CTL_SHOW_RIB_AS: 563 case IMSG_CTL_SHOW_RIB_COMMUNITY: 564 case IMSG_CTL_SHOW_RIB_LARGECOMMUNITY: 565 case IMSG_CTL_SHOW_RIB_PREFIX: 566 if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) { 567 log_warnx("rde_dispatch: wrong imsg len"); 568 break; 569 } 570 memcpy(&req, imsg.data, sizeof(req)); 571 rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type); 572 break; 573 case IMSG_CTL_SHOW_NEIGHBOR: 574 if (imsg.hdr.len - IMSG_HEADER_SIZE != 575 sizeof(struct peer)) { 576 log_warnx("rde_dispatch: wrong imsg len"); 577 break; 578 } 579 memcpy(&p, imsg.data, sizeof(struct peer)); 580 peer = peer_get(p.conf.id); 581 if (peer != NULL) { 582 p.stats.prefix_cnt = peer->prefix_cnt; 583 p.stats.prefix_rcvd_update = 584 peer->prefix_rcvd_update; 585 p.stats.prefix_rcvd_withdraw = 586 peer->prefix_rcvd_withdraw; 587 p.stats.prefix_rcvd_eor = 588 peer->prefix_rcvd_eor; 589 p.stats.prefix_sent_update = 590 peer->prefix_sent_update; 591 p.stats.prefix_sent_withdraw = 592 peer->prefix_sent_withdraw; 593 p.stats.prefix_sent_eor = 594 peer->prefix_sent_eor; 595 } 596 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, 597 imsg.hdr.pid, -1, &p, sizeof(struct peer)); 598 break; 599 case IMSG_CTL_END: 600 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid, 601 -1, NULL, 0); 602 break; 603 case IMSG_CTL_SHOW_RIB_MEM: 604 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0, 605 imsg.hdr.pid, -1, &rdemem, sizeof(rdemem)); 606 break; 607 case IMSG_CTL_LOG_VERBOSE: 608 /* already checked by SE */ 609 memcpy(&verbose, imsg.data, sizeof(verbose)); 610 log_verbose(verbose); 611 break; 612 default: 613 break; 614 } 615 imsg_free(&imsg); 616 } 617 } 618 619 void 620 rde_dispatch_imsg_parent(struct imsgbuf *ibuf) 621 { 622 static struct rdomain *rd; 623 struct imsg imsg; 624 struct mrt xmrt; 625 struct rde_rib rn; 626 struct imsgbuf *i; 627 struct filter_head *nr; 628 struct filter_rule *r; 629 struct filter_set *s; 630 struct nexthop *nh; 631 int n, fd; 632 u_int16_t rid; 633 634 while (ibuf) { 635 if ((n = imsg_get(ibuf, &imsg)) == -1) 636 fatal("rde_dispatch_imsg_parent: imsg_get error"); 637 if (n == 0) 638 break; 639 640 switch (imsg.hdr.type) { 641 case IMSG_SOCKET_CONN: 642 case IMSG_SOCKET_CONN_CTL: 643 if ((fd = imsg.fd) == -1) { 644 log_warnx("expected to receive imsg fd to " 645 "SE but didn't receive any"); 646 break; 647 } 648 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 649 fatal(NULL); 650 imsg_init(i, fd); 651 if (imsg.hdr.type == IMSG_SOCKET_CONN) { 652 if (ibuf_se) { 653 log_warnx("Unexpected imsg connection " 654 "to SE received"); 655 msgbuf_clear(&ibuf_se->w); 656 free(ibuf_se); 657 } 658 ibuf_se = i; 659 } else { 660 if (ibuf_se_ctl) { 661 log_warnx("Unexpected imsg ctl " 662 "connection to SE received"); 663 msgbuf_clear(&ibuf_se_ctl->w); 664 free(ibuf_se_ctl); 665 } 666 ibuf_se_ctl = i; 667 } 668 break; 669 case IMSG_NETWORK_ADD: 670 if (imsg.hdr.len - IMSG_HEADER_SIZE != 671 sizeof(struct network_config)) { 672 log_warnx("rde_dispatch: wrong imsg len"); 673 break; 674 } 675 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 676 TAILQ_INIT(&netconf_p.attrset); 677 parent_set = &netconf_p.attrset; 678 break; 679 case IMSG_NETWORK_DONE: 680 parent_set = NULL; 681 network_add(&netconf_p, 1); 682 break; 683 case IMSG_NETWORK_REMOVE: 684 if (imsg.hdr.len - IMSG_HEADER_SIZE != 685 sizeof(struct network_config)) { 686 log_warnx("rde_dispatch: wrong imsg len"); 687 break; 688 } 689 memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); 690 TAILQ_INIT(&netconf_p.attrset); 691 network_delete(&netconf_p, 1); 692 break; 693 case IMSG_RECONF_CONF: 694 if (imsg.hdr.len - IMSG_HEADER_SIZE != 695 sizeof(struct bgpd_config)) 696 fatalx("IMSG_RECONF_CONF bad len"); 697 reloadtime = time(NULL); 698 out_rules_tmp = calloc(1, sizeof(struct filter_head)); 699 if (out_rules_tmp == NULL) 700 fatal(NULL); 701 TAILQ_INIT(out_rules_tmp); 702 newdomains = calloc(1, sizeof(struct rdomain_head)); 703 if (newdomains == NULL) 704 fatal(NULL); 705 SIMPLEQ_INIT(newdomains); 706 if ((nconf = malloc(sizeof(struct bgpd_config))) == 707 NULL) 708 fatal(NULL); 709 memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); 710 for (rid = 0; rid < rib_size; rid++) { 711 if (*ribs[rid].name == '\0') 712 break; 713 ribs[rid].state = RECONF_DELETE; 714 } 715 break; 716 case IMSG_RECONF_RIB: 717 if (imsg.hdr.len - IMSG_HEADER_SIZE != 718 sizeof(struct rde_rib)) 719 fatalx("IMSG_RECONF_RIB bad len"); 720 memcpy(&rn, imsg.data, sizeof(rn)); 721 rid = rib_find(rn.name); 722 if (rid == RIB_FAILED) 723 rib_new(rn.name, rn.rtableid, rn.flags); 724 else if (ribs[rid].rtableid != rn.rtableid || 725 (ribs[rid].flags & F_RIB_HASNOFIB) != 726 (rn.flags & F_RIB_HASNOFIB)) { 727 struct filter_head *in_rules; 728 /* 729 * Big hammer in the F_RIB_HASNOFIB case but 730 * not often enough used to optimise it more. 731 * Need to save the filters so that they're not 732 * lost. 733 */ 734 in_rules = ribs[rid].in_rules; 735 ribs[rid].in_rules = NULL; 736 rib_free(&ribs[rid]); 737 rib_new(rn.name, rn.rtableid, rn.flags); 738 ribs[rid].in_rules = in_rules; 739 } else 740 ribs[rid].state = RECONF_KEEP; 741 break; 742 case IMSG_RECONF_FILTER: 743 if (imsg.hdr.len - IMSG_HEADER_SIZE != 744 sizeof(struct filter_rule)) 745 fatalx("IMSG_RECONF_FILTER bad len"); 746 if ((r = malloc(sizeof(struct filter_rule))) == NULL) 747 fatal(NULL); 748 memcpy(r, imsg.data, sizeof(struct filter_rule)); 749 TAILQ_INIT(&r->set); 750 if ((r->peer.ribid = rib_find(r->rib)) == RIB_FAILED) { 751 log_warnx("IMSG_RECONF_FILTER: filter rule " 752 "for nonexistent rib %s", r->rib); 753 parent_set = NULL; 754 free(r); 755 break; 756 } 757 parent_set = &r->set; 758 if (r->dir == DIR_IN) { 759 nr = ribs[r->peer.ribid].in_rules_tmp; 760 if (nr == NULL) { 761 nr = calloc(1, 762 sizeof(struct filter_head)); 763 if (nr == NULL) 764 fatal(NULL); 765 TAILQ_INIT(nr); 766 ribs[r->peer.ribid].in_rules_tmp = nr; 767 } 768 TAILQ_INSERT_TAIL(nr, r, entry); 769 } else 770 TAILQ_INSERT_TAIL(out_rules_tmp, r, entry); 771 break; 772 case IMSG_RECONF_RDOMAIN: 773 if (imsg.hdr.len - IMSG_HEADER_SIZE != 774 sizeof(struct rdomain)) 775 fatalx("IMSG_RECONF_RDOMAIN bad len"); 776 if ((rd = malloc(sizeof(struct rdomain))) == NULL) 777 fatal(NULL); 778 memcpy(rd, imsg.data, sizeof(struct rdomain)); 779 TAILQ_INIT(&rd->import); 780 TAILQ_INIT(&rd->export); 781 SIMPLEQ_INSERT_TAIL(newdomains, rd, entry); 782 break; 783 case IMSG_RECONF_RDOMAIN_EXPORT: 784 if (rd == NULL) { 785 log_warnx("rde_dispatch_imsg_parent: " 786 "IMSG_RECONF_RDOMAIN_EXPORT unexpected"); 787 break; 788 } 789 parent_set = &rd->export; 790 break; 791 case IMSG_RECONF_RDOMAIN_IMPORT: 792 if (rd == NULL) { 793 log_warnx("rde_dispatch_imsg_parent: " 794 "IMSG_RECONF_RDOMAIN_IMPORT unexpected"); 795 break; 796 } 797 parent_set = &rd->import; 798 break; 799 case IMSG_RECONF_RDOMAIN_DONE: 800 parent_set = NULL; 801 break; 802 case IMSG_RECONF_DONE: 803 if (nconf == NULL) 804 fatalx("got IMSG_RECONF_DONE but no config"); 805 parent_set = NULL; 806 807 rde_reload_done(); 808 break; 809 case IMSG_NEXTHOP_UPDATE: 810 nexthop_update(imsg.data); 811 break; 812 case IMSG_FILTER_SET: 813 if (imsg.hdr.len > IMSG_HEADER_SIZE + 814 sizeof(struct filter_set)) 815 fatalx("IMSG_FILTER_SET bad len"); 816 if (parent_set == NULL) { 817 log_warnx("rde_dispatch_imsg_parent: " 818 "IMSG_FILTER_SET unexpected"); 819 break; 820 } 821 if ((s = malloc(sizeof(struct filter_set))) == NULL) 822 fatal(NULL); 823 memcpy(s, imsg.data, sizeof(struct filter_set)); 824 TAILQ_INSERT_TAIL(parent_set, s, entry); 825 826 if (s->type == ACTION_SET_NEXTHOP) { 827 nh = nexthop_get(&s->action.nexthop); 828 nh->refcnt++; 829 } 830 break; 831 case IMSG_MRT_OPEN: 832 case IMSG_MRT_REOPEN: 833 if (imsg.hdr.len > IMSG_HEADER_SIZE + 834 sizeof(struct mrt)) { 835 log_warnx("wrong imsg len"); 836 break; 837 } 838 memcpy(&xmrt, imsg.data, sizeof(xmrt)); 839 if ((fd = imsg.fd) == -1) 840 log_warnx("expected to receive fd for mrt dump " 841 "but didn't receive any"); 842 else if (xmrt.type == MRT_TABLE_DUMP || 843 xmrt.type == MRT_TABLE_DUMP_MP || 844 xmrt.type == MRT_TABLE_DUMP_V2) { 845 rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd); 846 } else 847 close(fd); 848 break; 849 case IMSG_MRT_CLOSE: 850 /* ignore end message because a dump is atomic */ 851 break; 852 default: 853 break; 854 } 855 imsg_free(&imsg); 856 } 857 } 858 859 /* handle routing updates from the session engine. */ 860 int 861 rde_update_dispatch(struct imsg *imsg) 862 { 863 struct bgpd_addr prefix; 864 struct mpattr mpa; 865 struct rde_peer *peer; 866 struct rde_aspath *asp = NULL; 867 u_char *p, *mpp = NULL; 868 int error = -1, pos = 0; 869 u_int16_t afi, len, mplen; 870 u_int16_t withdrawn_len; 871 u_int16_t attrpath_len; 872 u_int16_t nlri_len; 873 u_int8_t aid, prefixlen, safi, subtype; 874 u_int32_t fas; 875 876 peer = peer_get(imsg->hdr.peerid); 877 if (peer == NULL) /* unknown peer, cannot happen */ 878 return (-1); 879 if (peer->state != PEER_UP) 880 return (-1); /* peer is not yet up, cannot happen */ 881 882 p = imsg->data; 883 884 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) { 885 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 886 return (-1); 887 } 888 889 memcpy(&len, p, 2); 890 withdrawn_len = ntohs(len); 891 p += 2; 892 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) { 893 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 894 return (-1); 895 } 896 897 p += withdrawn_len; 898 memcpy(&len, p, 2); 899 attrpath_len = len = ntohs(len); 900 p += 2; 901 if (imsg->hdr.len < 902 IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) { 903 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0); 904 return (-1); 905 } 906 907 nlri_len = 908 imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len; 909 bzero(&mpa, sizeof(mpa)); 910 911 if (attrpath_len != 0) { /* 0 = no NLRI information in this message */ 912 /* parse path attributes */ 913 asp = path_get(); 914 while (len > 0) { 915 if ((pos = rde_attr_parse(p, len, peer, asp, 916 &mpa)) < 0) 917 goto done; 918 p += pos; 919 len -= pos; 920 } 921 922 /* check for missing but necessary attributes */ 923 if ((subtype = rde_attr_missing(asp, peer->conf.ebgp, 924 nlri_len))) { 925 rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR, 926 &subtype, sizeof(u_int8_t)); 927 goto done; 928 } 929 930 rde_as4byte_fixup(peer, asp); 931 932 /* enforce remote AS if requested */ 933 if (asp->flags & F_ATTR_ASPATH && 934 peer->conf.enforce_as == ENFORCE_AS_ON) { 935 fas = aspath_neighbor(asp->aspath); 936 if (peer->conf.remote_as != fas) { 937 log_peer_warnx(&peer->conf, "bad path, " 938 "starting with %s, " 939 "enforce neighbor-as enabled", log_as(fas)); 940 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 941 NULL, 0); 942 goto done; 943 } 944 } 945 946 rde_reflector(peer, asp); 947 } 948 949 p = imsg->data; 950 len = withdrawn_len; 951 p += 2; 952 /* withdraw prefix */ 953 while (len > 0) { 954 if ((pos = rde_update_get_prefix(p, len, &prefix, 955 &prefixlen)) == -1) { 956 /* 957 * the RFC does not mention what we should do in 958 * this case. Let's do the same as in the NLRI case. 959 */ 960 log_peer_warnx(&peer->conf, "bad withdraw prefix"); 961 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 962 NULL, 0); 963 goto done; 964 } 965 if (prefixlen > 32) { 966 log_peer_warnx(&peer->conf, "bad withdraw prefix"); 967 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 968 NULL, 0); 969 goto done; 970 } 971 972 p += pos; 973 len -= pos; 974 975 if (peer->capa.mp[AID_INET] == 0) { 976 log_peer_warnx(&peer->conf, 977 "bad withdraw, %s disabled", aid2str(AID_INET)); 978 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 979 NULL, 0); 980 goto done; 981 } 982 983 rde_update_withdraw(peer, &prefix, prefixlen); 984 } 985 986 if (attrpath_len == 0) { 987 /* 0 = no NLRI information in this message */ 988 if (nlri_len != 0) { 989 /* crap at end of update which should not be there */ 990 rde_update_err(peer, ERR_UPDATE, 991 ERR_UPD_ATTRLIST, NULL, 0); 992 return (-1); 993 } 994 if (withdrawn_len == 0) { 995 /* EoR marker */ 996 peer_recv_eor(peer, AID_INET); 997 } 998 return (0); 999 } 1000 1001 /* withdraw MP_UNREACH_NLRI if available */ 1002 if (mpa.unreach_len != 0) { 1003 mpp = mpa.unreach; 1004 mplen = mpa.unreach_len; 1005 memcpy(&afi, mpp, 2); 1006 mpp += 2; 1007 mplen -= 2; 1008 afi = ntohs(afi); 1009 safi = *mpp++; 1010 mplen--; 1011 1012 if (afi2aid(afi, safi, &aid) == -1) { 1013 log_peer_warnx(&peer->conf, 1014 "bad AFI/SAFI pair in withdraw"); 1015 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1016 NULL, 0); 1017 goto done; 1018 } 1019 1020 if (peer->capa.mp[aid] == 0) { 1021 log_peer_warnx(&peer->conf, 1022 "bad withdraw, %s disabled", aid2str(aid)); 1023 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1024 NULL, 0); 1025 goto done; 1026 } 1027 1028 if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) { 1029 /* EoR marker */ 1030 peer_recv_eor(peer, aid); 1031 } 1032 1033 switch (aid) { 1034 case AID_INET6: 1035 while (mplen > 0) { 1036 if ((pos = rde_update_get_prefix6(mpp, mplen, 1037 &prefix, &prefixlen)) == -1) { 1038 log_peer_warnx(&peer->conf, 1039 "bad IPv6 withdraw prefix"); 1040 rde_update_err(peer, ERR_UPDATE, 1041 ERR_UPD_OPTATTR, 1042 mpa.unreach, mpa.unreach_len); 1043 goto done; 1044 } 1045 if (prefixlen > 128) { 1046 log_peer_warnx(&peer->conf, 1047 "bad IPv6 withdraw prefix"); 1048 rde_update_err(peer, ERR_UPDATE, 1049 ERR_UPD_OPTATTR, 1050 mpa.unreach, mpa.unreach_len); 1051 goto done; 1052 } 1053 1054 mpp += pos; 1055 mplen -= pos; 1056 1057 rde_update_withdraw(peer, &prefix, prefixlen); 1058 } 1059 break; 1060 case AID_VPN_IPv4: 1061 while (mplen > 0) { 1062 if ((pos = rde_update_get_vpn4(mpp, mplen, 1063 &prefix, &prefixlen)) == -1) { 1064 log_peer_warnx(&peer->conf, 1065 "bad VPNv4 withdraw prefix"); 1066 rde_update_err(peer, ERR_UPDATE, 1067 ERR_UPD_OPTATTR, 1068 mpa.unreach, mpa.unreach_len); 1069 goto done; 1070 } 1071 if (prefixlen > 32) { 1072 log_peer_warnx(&peer->conf, 1073 "bad VPNv4 withdraw prefix"); 1074 rde_update_err(peer, ERR_UPDATE, 1075 ERR_UPD_OPTATTR, 1076 mpa.unreach, mpa.unreach_len); 1077 goto done; 1078 } 1079 1080 mpp += pos; 1081 mplen -= pos; 1082 1083 rde_update_withdraw(peer, &prefix, prefixlen); 1084 } 1085 break; 1086 default: 1087 /* silently ignore unsupported multiprotocol AF */ 1088 break; 1089 } 1090 1091 if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0) { 1092 error = 0; 1093 goto done; 1094 } 1095 } 1096 1097 /* shift to NLRI information */ 1098 p += 2 + attrpath_len; 1099 1100 /* aspath needs to be loop free nota bene this is not a hard error */ 1101 if (peer->conf.ebgp && !aspath_loopfree(asp->aspath, conf->as)) 1102 asp->flags |= F_ATTR_LOOP; 1103 1104 /* parse nlri prefix */ 1105 while (nlri_len > 0) { 1106 if ((pos = rde_update_get_prefix(p, nlri_len, &prefix, 1107 &prefixlen)) == -1) { 1108 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1109 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1110 NULL, 0); 1111 goto done; 1112 } 1113 if (prefixlen > 32) { 1114 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1115 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1116 NULL, 0); 1117 goto done; 1118 } 1119 1120 p += pos; 1121 nlri_len -= pos; 1122 1123 if (peer->capa.mp[AID_INET] == 0) { 1124 log_peer_warnx(&peer->conf, 1125 "bad update, %s disabled", aid2str(AID_INET)); 1126 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1127 NULL, 0); 1128 goto done; 1129 } 1130 1131 rde_update_update(peer, asp, &prefix, prefixlen); 1132 1133 /* max prefix checker */ 1134 if (peer->conf.max_prefix && 1135 peer->prefix_cnt > peer->conf.max_prefix) { 1136 log_peer_warnx(&peer->conf, "prefix limit reached" 1137 " (>%u/%u)", peer->prefix_cnt, peer->conf.max_prefix); 1138 rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX, 1139 NULL, 0); 1140 goto done; 1141 } 1142 1143 } 1144 1145 /* add MP_REACH_NLRI if available */ 1146 if (mpa.reach_len != 0) { 1147 mpp = mpa.reach; 1148 mplen = mpa.reach_len; 1149 memcpy(&afi, mpp, 2); 1150 mpp += 2; 1151 mplen -= 2; 1152 afi = ntohs(afi); 1153 safi = *mpp++; 1154 mplen--; 1155 1156 if (afi2aid(afi, safi, &aid) == -1) { 1157 log_peer_warnx(&peer->conf, 1158 "bad AFI/SAFI pair in update"); 1159 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1160 NULL, 0); 1161 goto done; 1162 } 1163 1164 if (peer->capa.mp[aid] == 0) { 1165 log_peer_warnx(&peer->conf, 1166 "bad update, %s disabled", aid2str(aid)); 1167 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1168 NULL, 0); 1169 goto done; 1170 } 1171 1172 /* 1173 * this works because asp is not linked. 1174 * But first unlock the previously locked nexthop. 1175 */ 1176 if (asp->nexthop) { 1177 asp->nexthop->refcnt--; 1178 (void)nexthop_delete(asp->nexthop); 1179 asp->nexthop = NULL; 1180 } 1181 if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, asp)) == -1) { 1182 log_peer_warnx(&peer->conf, "bad nlri prefix"); 1183 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, 1184 mpa.reach, mpa.reach_len); 1185 goto done; 1186 } 1187 mpp += pos; 1188 mplen -= pos; 1189 1190 switch (aid) { 1191 case AID_INET6: 1192 while (mplen > 0) { 1193 if ((pos = rde_update_get_prefix6(mpp, mplen, 1194 &prefix, &prefixlen)) == -1) { 1195 log_peer_warnx(&peer->conf, 1196 "bad IPv6 nlri prefix"); 1197 rde_update_err(peer, ERR_UPDATE, 1198 ERR_UPD_OPTATTR, 1199 mpa.reach, mpa.reach_len); 1200 goto done; 1201 } 1202 if (prefixlen > 128) { 1203 rde_update_err(peer, ERR_UPDATE, 1204 ERR_UPD_OPTATTR, 1205 mpa.reach, mpa.reach_len); 1206 goto done; 1207 } 1208 1209 mpp += pos; 1210 mplen -= pos; 1211 1212 rde_update_update(peer, asp, &prefix, 1213 prefixlen); 1214 1215 /* max prefix checker */ 1216 if (peer->conf.max_prefix && 1217 peer->prefix_cnt > peer->conf.max_prefix) { 1218 log_peer_warnx(&peer->conf, 1219 "prefix limit reached" 1220 " (>%u/%u)", peer->prefix_cnt, 1221 peer->conf.max_prefix); 1222 rde_update_err(peer, ERR_CEASE, 1223 ERR_CEASE_MAX_PREFIX, NULL, 0); 1224 goto done; 1225 } 1226 1227 } 1228 break; 1229 case AID_VPN_IPv4: 1230 while (mplen > 0) { 1231 if ((pos = rde_update_get_vpn4(mpp, mplen, 1232 &prefix, &prefixlen)) == -1) { 1233 log_peer_warnx(&peer->conf, 1234 "bad VPNv4 nlri prefix"); 1235 rde_update_err(peer, ERR_UPDATE, 1236 ERR_UPD_OPTATTR, 1237 mpa.reach, mpa.reach_len); 1238 goto done; 1239 } 1240 if (prefixlen > 32) { 1241 rde_update_err(peer, ERR_UPDATE, 1242 ERR_UPD_OPTATTR, 1243 mpa.reach, mpa.reach_len); 1244 goto done; 1245 } 1246 1247 mpp += pos; 1248 mplen -= pos; 1249 1250 rde_update_update(peer, asp, &prefix, 1251 prefixlen); 1252 1253 /* max prefix checker */ 1254 if (peer->conf.max_prefix && 1255 peer->prefix_cnt > peer->conf.max_prefix) { 1256 log_peer_warnx(&peer->conf, 1257 "prefix limit reached" 1258 " (>%u/%u)", peer->prefix_cnt, 1259 peer->conf.max_prefix); 1260 rde_update_err(peer, ERR_CEASE, 1261 ERR_CEASE_MAX_PREFIX, NULL, 0); 1262 goto done; 1263 } 1264 1265 } 1266 break; 1267 default: 1268 /* silently ignore unsupported multiprotocol AF */ 1269 break; 1270 } 1271 } 1272 1273 done: 1274 if (attrpath_len != 0) { 1275 /* unlock the previously locked entry */ 1276 if (asp->nexthop) { 1277 asp->nexthop->refcnt--; 1278 (void)nexthop_delete(asp->nexthop); 1279 } 1280 /* free allocated attribute memory that is no longer used */ 1281 path_put(asp); 1282 } 1283 1284 return (error); 1285 } 1286 1287 void 1288 rde_update_update(struct rde_peer *peer, struct rde_aspath *asp, 1289 struct bgpd_addr *prefix, u_int8_t prefixlen) 1290 { 1291 struct rde_aspath *fasp; 1292 enum filter_actions action; 1293 int r = 0, f = 0; 1294 u_int16_t i; 1295 1296 peer->prefix_rcvd_update++; 1297 /* add original path to the Adj-RIB-In */ 1298 if (peer->conf.softreconfig_in) 1299 r += path_update(&ribs[0], peer, asp, prefix, prefixlen); 1300 1301 for (i = 1; i < rib_size; i++) { 1302 if (*ribs[i].name == '\0') 1303 break; 1304 /* input filter */ 1305 action = rde_filter(ribs[i].in_rules, &fasp, peer, asp, prefix, 1306 prefixlen, peer); 1307 1308 if (fasp == NULL) 1309 fasp = asp; 1310 1311 if (action == ACTION_ALLOW) { 1312 rde_update_log("update", i, peer, 1313 &fasp->nexthop->exit_nexthop, prefix, prefixlen); 1314 r += path_update(&ribs[i], peer, fasp, prefix, 1315 prefixlen); 1316 } else if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 1317 0)) { 1318 rde_update_log("filtered withdraw", i, peer, 1319 NULL, prefix, prefixlen); 1320 f++; 1321 } 1322 1323 /* free modified aspath */ 1324 if (fasp != asp) 1325 path_put(fasp); 1326 } 1327 1328 if (r) 1329 peer->prefix_cnt++; 1330 else if (f) 1331 peer->prefix_cnt--; 1332 } 1333 1334 void 1335 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix, 1336 u_int8_t prefixlen) 1337 { 1338 int r = 0; 1339 u_int16_t i; 1340 1341 peer->prefix_rcvd_withdraw++; 1342 1343 for (i = rib_size - 1; ; i--) { 1344 if (*ribs[i].name == '\0') 1345 break; 1346 if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 0)) { 1347 rde_update_log("withdraw", i, peer, NULL, prefix, 1348 prefixlen); 1349 r++; 1350 } 1351 if (i == 0) 1352 break; 1353 } 1354 1355 if (r) 1356 peer->prefix_cnt--; 1357 } 1358 1359 /* 1360 * BGP UPDATE parser functions 1361 */ 1362 1363 /* attribute parser specific makros */ 1364 #define UPD_READ(t, p, plen, n) \ 1365 do { \ 1366 memcpy(t, p, n); \ 1367 p += n; \ 1368 plen += n; \ 1369 } while (0) 1370 1371 #define CHECK_FLAGS(s, t, m) \ 1372 (((s) & ~(ATTR_DEFMASK | (m))) == (t)) 1373 1374 int 1375 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer, 1376 struct rde_aspath *a, struct mpattr *mpa) 1377 { 1378 struct bgpd_addr nexthop; 1379 u_char *op = p, *npath; 1380 u_int32_t tmp32; 1381 int error; 1382 u_int16_t attr_len, nlen; 1383 u_int16_t plen = 0; 1384 u_int8_t flags; 1385 u_int8_t type; 1386 u_int8_t tmp8; 1387 1388 if (len < 3) { 1389 bad_len: 1390 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len); 1391 return (-1); 1392 } 1393 1394 UPD_READ(&flags, p, plen, 1); 1395 UPD_READ(&type, p, plen, 1); 1396 1397 if (flags & ATTR_EXTLEN) { 1398 if (len - plen < 2) 1399 goto bad_len; 1400 UPD_READ(&attr_len, p, plen, 2); 1401 attr_len = ntohs(attr_len); 1402 } else { 1403 UPD_READ(&tmp8, p, plen, 1); 1404 attr_len = tmp8; 1405 } 1406 1407 if (len - plen < attr_len) 1408 goto bad_len; 1409 1410 /* adjust len to the actual attribute size including header */ 1411 len = plen + attr_len; 1412 1413 switch (type) { 1414 case ATTR_UNDEF: 1415 /* ignore and drop path attributes with a type code of 0 */ 1416 plen += attr_len; 1417 break; 1418 case ATTR_ORIGIN: 1419 if (attr_len != 1) 1420 goto bad_len; 1421 1422 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) { 1423 bad_flags: 1424 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS, 1425 op, len); 1426 return (-1); 1427 } 1428 1429 UPD_READ(&a->origin, p, plen, 1); 1430 if (a->origin > ORIGIN_INCOMPLETE) { 1431 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN, 1432 op, len); 1433 return (-1); 1434 } 1435 if (a->flags & F_ATTR_ORIGIN) 1436 goto bad_list; 1437 a->flags |= F_ATTR_ORIGIN; 1438 break; 1439 case ATTR_ASPATH: 1440 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1441 goto bad_flags; 1442 error = aspath_verify(p, attr_len, rde_as4byte(peer)); 1443 if (error == AS_ERR_SOFT) { 1444 /* 1445 * soft errors like unexpected segment types are 1446 * not considered fatal and the path is just 1447 * marked invalid. 1448 */ 1449 a->flags |= F_ATTR_PARSE_ERR; 1450 log_peer_warnx(&peer->conf, "bad ASPATH, " 1451 "path invalidated and prefix withdrawn"); 1452 } else if (error != 0) { 1453 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1454 NULL, 0); 1455 return (-1); 1456 } 1457 if (a->flags & F_ATTR_ASPATH) 1458 goto bad_list; 1459 if (rde_as4byte(peer)) { 1460 npath = p; 1461 nlen = attr_len; 1462 } else 1463 npath = aspath_inflate(p, attr_len, &nlen); 1464 a->flags |= F_ATTR_ASPATH; 1465 a->aspath = aspath_get(npath, nlen); 1466 if (npath != p) 1467 free(npath); 1468 plen += attr_len; 1469 break; 1470 case ATTR_NEXTHOP: 1471 if (attr_len != 4) 1472 goto bad_len; 1473 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1474 goto bad_flags; 1475 if (a->flags & F_ATTR_NEXTHOP) 1476 goto bad_list; 1477 a->flags |= F_ATTR_NEXTHOP; 1478 1479 bzero(&nexthop, sizeof(nexthop)); 1480 nexthop.aid = AID_INET; 1481 UPD_READ(&nexthop.v4.s_addr, p, plen, 4); 1482 /* 1483 * Check if the nexthop is a valid IP address. We consider 1484 * multicast and experimental addresses as invalid. 1485 */ 1486 tmp32 = ntohl(nexthop.v4.s_addr); 1487 if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) { 1488 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK, 1489 op, len); 1490 return (-1); 1491 } 1492 a->nexthop = nexthop_get(&nexthop); 1493 /* 1494 * lock the nexthop because it is not yet linked else 1495 * withdraws may remove this nexthop which in turn would 1496 * cause a use after free error. 1497 */ 1498 a->nexthop->refcnt++; 1499 break; 1500 case ATTR_MED: 1501 if (attr_len != 4) 1502 goto bad_len; 1503 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1504 goto bad_flags; 1505 if (a->flags & F_ATTR_MED) 1506 goto bad_list; 1507 a->flags |= F_ATTR_MED; 1508 1509 UPD_READ(&tmp32, p, plen, 4); 1510 a->med = ntohl(tmp32); 1511 break; 1512 case ATTR_LOCALPREF: 1513 if (attr_len != 4) 1514 goto bad_len; 1515 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1516 goto bad_flags; 1517 if (peer->conf.ebgp) { 1518 /* ignore local-pref attr on non ibgp peers */ 1519 plen += 4; 1520 break; 1521 } 1522 if (a->flags & F_ATTR_LOCALPREF) 1523 goto bad_list; 1524 a->flags |= F_ATTR_LOCALPREF; 1525 1526 UPD_READ(&tmp32, p, plen, 4); 1527 a->lpref = ntohl(tmp32); 1528 break; 1529 case ATTR_ATOMIC_AGGREGATE: 1530 if (attr_len != 0) 1531 goto bad_len; 1532 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) 1533 goto bad_flags; 1534 goto optattr; 1535 case ATTR_AGGREGATOR: 1536 if ((!rde_as4byte(peer) && attr_len != 6) || 1537 (rde_as4byte(peer) && attr_len != 8)) { 1538 /* 1539 * ignore attribute in case of error as per 1540 * draft-ietf-idr-optional-transitive-00.txt 1541 * but only if partial bit is set 1542 */ 1543 if ((flags & ATTR_PARTIAL) == 0) 1544 goto bad_len; 1545 log_peer_warnx(&peer->conf, "bad AGGREGATOR, " 1546 "partial attribute ignored"); 1547 plen += attr_len; 1548 break; 1549 } 1550 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1551 ATTR_PARTIAL)) 1552 goto bad_flags; 1553 if (!rde_as4byte(peer)) { 1554 /* need to inflate aggregator AS to 4-byte */ 1555 u_char t[8]; 1556 t[0] = t[1] = 0; 1557 UPD_READ(&t[2], p, plen, 2); 1558 UPD_READ(&t[4], p, plen, 4); 1559 if (attr_optadd(a, flags, type, t, 1560 sizeof(t)) == -1) 1561 goto bad_list; 1562 break; 1563 } 1564 /* 4-byte ready server take the default route */ 1565 goto optattr; 1566 case ATTR_COMMUNITIES: 1567 if (attr_len % 4 != 0) { 1568 /* 1569 * mark update as bad and withdraw all routes as per 1570 * draft-ietf-idr-optional-transitive-00.txt 1571 * but only if partial bit is set 1572 */ 1573 if ((flags & ATTR_PARTIAL) == 0) 1574 goto bad_len; 1575 a->flags |= F_ATTR_PARSE_ERR; 1576 log_peer_warnx(&peer->conf, "bad COMMUNITIES, " 1577 "path invalidated and prefix withdrawn"); 1578 } 1579 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1580 ATTR_PARTIAL)) 1581 goto bad_flags; 1582 goto optattr; 1583 case ATTR_LARGE_COMMUNITIES: 1584 if (attr_len % 12 != 0) { 1585 /* 1586 * mark update as bad and withdraw all routes as per 1587 * draft-ietf-idr-optional-transitive-00.txt 1588 * but only if partial bit is set 1589 */ 1590 if ((flags & ATTR_PARTIAL) == 0) 1591 goto bad_len; 1592 a->flags |= F_ATTR_PARSE_ERR; 1593 log_peer_warnx(&peer->conf, "bad LARGE COMMUNITIES, " 1594 "path invalidated and prefix withdrawn"); 1595 } 1596 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1597 ATTR_PARTIAL)) 1598 goto bad_flags; 1599 goto optattr; 1600 case ATTR_EXT_COMMUNITIES: 1601 if (attr_len % 8 != 0) { 1602 /* 1603 * mark update as bad and withdraw all routes as per 1604 * draft-ietf-idr-optional-transitive-00.txt 1605 * but only if partial bit is set 1606 */ 1607 if ((flags & ATTR_PARTIAL) == 0) 1608 goto bad_len; 1609 a->flags |= F_ATTR_PARSE_ERR; 1610 log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, " 1611 "path invalidated and prefix withdrawn"); 1612 } 1613 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1614 ATTR_PARTIAL)) 1615 goto bad_flags; 1616 goto optattr; 1617 case ATTR_ORIGINATOR_ID: 1618 if (attr_len != 4) 1619 goto bad_len; 1620 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1621 goto bad_flags; 1622 goto optattr; 1623 case ATTR_CLUSTER_LIST: 1624 if (attr_len % 4 != 0) 1625 goto bad_len; 1626 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1627 goto bad_flags; 1628 goto optattr; 1629 case ATTR_MP_REACH_NLRI: 1630 if (attr_len < 4) 1631 goto bad_len; 1632 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1633 goto bad_flags; 1634 /* the validity is checked in rde_update_dispatch() */ 1635 if (a->flags & F_ATTR_MP_REACH) 1636 goto bad_list; 1637 a->flags |= F_ATTR_MP_REACH; 1638 1639 mpa->reach = p; 1640 mpa->reach_len = attr_len; 1641 plen += attr_len; 1642 break; 1643 case ATTR_MP_UNREACH_NLRI: 1644 if (attr_len < 3) 1645 goto bad_len; 1646 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) 1647 goto bad_flags; 1648 /* the validity is checked in rde_update_dispatch() */ 1649 if (a->flags & F_ATTR_MP_UNREACH) 1650 goto bad_list; 1651 a->flags |= F_ATTR_MP_UNREACH; 1652 1653 mpa->unreach = p; 1654 mpa->unreach_len = attr_len; 1655 plen += attr_len; 1656 break; 1657 case ATTR_AS4_AGGREGATOR: 1658 if (attr_len != 8) { 1659 /* see ATTR_AGGREGATOR ... */ 1660 if ((flags & ATTR_PARTIAL) == 0) 1661 goto bad_len; 1662 log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, " 1663 "partial attribute ignored"); 1664 plen += attr_len; 1665 break; 1666 } 1667 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1668 ATTR_PARTIAL)) 1669 goto bad_flags; 1670 a->flags |= F_ATTR_AS4BYTE_NEW; 1671 goto optattr; 1672 case ATTR_AS4_PATH: 1673 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 1674 ATTR_PARTIAL)) 1675 goto bad_flags; 1676 if ((error = aspath_verify(p, attr_len, 1)) != 0) { 1677 /* 1678 * XXX RFC does not specify how to handle errors. 1679 * XXX Instead of dropping the session because of a 1680 * XXX bad path just mark the full update as having 1681 * XXX a parse error which makes the update no longer 1682 * XXX eligible and will not be considered for routing 1683 * XXX or redistribution. 1684 * XXX We follow draft-ietf-idr-optional-transitive 1685 * XXX by looking at the partial bit. 1686 * XXX Consider soft errors similar to a partial attr. 1687 */ 1688 if (flags & ATTR_PARTIAL || error == AS_ERR_SOFT) { 1689 a->flags |= F_ATTR_PARSE_ERR; 1690 log_peer_warnx(&peer->conf, "bad AS4_PATH, " 1691 "path invalidated and prefix withdrawn"); 1692 goto optattr; 1693 } else { 1694 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, 1695 NULL, 0); 1696 return (-1); 1697 } 1698 } 1699 a->flags |= F_ATTR_AS4BYTE_NEW; 1700 goto optattr; 1701 default: 1702 if ((flags & ATTR_OPTIONAL) == 0) { 1703 rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR, 1704 op, len); 1705 return (-1); 1706 } 1707 optattr: 1708 if (attr_optadd(a, flags, type, p, attr_len) == -1) { 1709 bad_list: 1710 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, 1711 NULL, 0); 1712 return (-1); 1713 } 1714 1715 plen += attr_len; 1716 break; 1717 } 1718 1719 return (plen); 1720 } 1721 1722 int 1723 rde_attr_add(struct rde_aspath *a, u_char *p, u_int16_t len) 1724 { 1725 u_int16_t attr_len; 1726 u_int16_t plen = 0; 1727 u_int8_t flags; 1728 u_int8_t type; 1729 u_int8_t tmp8; 1730 1731 if (a == NULL) /* no aspath, nothing to do */ 1732 return (0); 1733 if (len < 3) 1734 return (-1); 1735 1736 UPD_READ(&flags, p, plen, 1); 1737 UPD_READ(&type, p, plen, 1); 1738 1739 if (flags & ATTR_EXTLEN) { 1740 if (len - plen < 2) 1741 return (-1); 1742 UPD_READ(&attr_len, p, plen, 2); 1743 attr_len = ntohs(attr_len); 1744 } else { 1745 UPD_READ(&tmp8, p, plen, 1); 1746 attr_len = tmp8; 1747 } 1748 1749 if (len - plen < attr_len) 1750 return (-1); 1751 1752 if (attr_optadd(a, flags, type, p, attr_len) == -1) 1753 return (-1); 1754 return (0); 1755 } 1756 1757 #undef UPD_READ 1758 #undef CHECK_FLAGS 1759 1760 u_int8_t 1761 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen) 1762 { 1763 /* ATTR_MP_UNREACH_NLRI may be sent alone */ 1764 if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH && 1765 (a->flags & F_ATTR_MP_REACH) == 0) 1766 return (0); 1767 1768 if ((a->flags & F_ATTR_ORIGIN) == 0) 1769 return (ATTR_ORIGIN); 1770 if ((a->flags & F_ATTR_ASPATH) == 0) 1771 return (ATTR_ASPATH); 1772 if ((a->flags & F_ATTR_MP_REACH) == 0 && 1773 (a->flags & F_ATTR_NEXTHOP) == 0) 1774 return (ATTR_NEXTHOP); 1775 if (!ebgp) 1776 if ((a->flags & F_ATTR_LOCALPREF) == 0) 1777 return (ATTR_LOCALPREF); 1778 return (0); 1779 } 1780 1781 int 1782 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid, 1783 struct rde_aspath *asp) 1784 { 1785 struct bgpd_addr nexthop; 1786 u_int8_t totlen, nhlen; 1787 1788 if (len == 0) 1789 return (-1); 1790 1791 nhlen = *data++; 1792 totlen = 1; 1793 len--; 1794 1795 if (nhlen > len) 1796 return (-1); 1797 1798 bzero(&nexthop, sizeof(nexthop)); 1799 nexthop.aid = aid; 1800 switch (aid) { 1801 case AID_INET6: 1802 /* 1803 * RFC2545 describes that there may be a link-local 1804 * address carried in nexthop. Yikes! 1805 * This is not only silly, it is wrong and we just ignore 1806 * this link-local nexthop. The bgpd session doesn't run 1807 * over the link-local address so why should all other 1808 * traffic. 1809 */ 1810 if (nhlen != 16 && nhlen != 32) { 1811 log_warnx("bad multiprotocol nexthop, bad size"); 1812 return (-1); 1813 } 1814 memcpy(&nexthop.v6.s6_addr, data, 16); 1815 break; 1816 case AID_VPN_IPv4: 1817 /* 1818 * Neither RFC4364 nor RFC3107 specify the format of the 1819 * nexthop in an explicit way. The quality of RFC went down 1820 * the toilet the larger the number got. 1821 * RFC4364 is very confusing about VPN-IPv4 address and the 1822 * VPN-IPv4 prefix that carries also a MPLS label. 1823 * So the nexthop is a 12-byte address with a 64bit RD and 1824 * an IPv4 address following. In the nexthop case the RD can 1825 * be ignored. 1826 * Since the nexthop has to be in the main IPv4 table just 1827 * create an AID_INET nexthop. So we don't need to handle 1828 * AID_VPN_IPv4 in nexthop and kroute. 1829 */ 1830 if (nhlen != 12) { 1831 log_warnx("bad multiprotocol nexthop, bad size"); 1832 return (-1); 1833 } 1834 data += sizeof(u_int64_t); 1835 nexthop.aid = AID_INET; 1836 memcpy(&nexthop.v4, data, sizeof(nexthop.v4)); 1837 break; 1838 default: 1839 log_warnx("bad multiprotocol nexthop, bad AID"); 1840 return (-1); 1841 } 1842 1843 asp->nexthop = nexthop_get(&nexthop); 1844 /* 1845 * lock the nexthop because it is not yet linked else 1846 * withdraws may remove this nexthop which in turn would 1847 * cause a use after free error. 1848 */ 1849 asp->nexthop->refcnt++; 1850 1851 /* ignore reserved (old SNPA) field as per RFC4760 */ 1852 totlen += nhlen + 1; 1853 data += nhlen + 1; 1854 1855 return (totlen); 1856 } 1857 1858 int 1859 rde_update_extract_prefix(u_char *p, u_int16_t len, void *va, 1860 u_int8_t pfxlen, u_int8_t max) 1861 { 1862 static u_char addrmask[] = { 1863 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff }; 1864 u_char *a = va; 1865 int i; 1866 u_int16_t plen = 0; 1867 1868 for (i = 0; pfxlen && i < max; i++) { 1869 if (len <= plen) 1870 return (-1); 1871 if (pfxlen < 8) { 1872 a[i] = *p++ & addrmask[pfxlen]; 1873 plen++; 1874 break; 1875 } else { 1876 a[i] = *p++; 1877 plen++; 1878 pfxlen -= 8; 1879 } 1880 } 1881 return (plen); 1882 } 1883 1884 int 1885 rde_update_get_prefix(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1886 u_int8_t *prefixlen) 1887 { 1888 u_int8_t pfxlen; 1889 int plen; 1890 1891 if (len < 1) 1892 return (-1); 1893 1894 pfxlen = *p++; 1895 len--; 1896 1897 bzero(prefix, sizeof(struct bgpd_addr)); 1898 prefix->aid = AID_INET; 1899 *prefixlen = pfxlen; 1900 1901 if ((plen = rde_update_extract_prefix(p, len, &prefix->v4, pfxlen, 1902 sizeof(prefix->v4))) == -1) 1903 return (-1); 1904 1905 return (plen + 1); /* pfxlen needs to be added */ 1906 } 1907 1908 int 1909 rde_update_get_prefix6(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1910 u_int8_t *prefixlen) 1911 { 1912 int plen; 1913 u_int8_t pfxlen; 1914 1915 if (len < 1) 1916 return (-1); 1917 1918 pfxlen = *p++; 1919 len--; 1920 1921 bzero(prefix, sizeof(struct bgpd_addr)); 1922 prefix->aid = AID_INET6; 1923 *prefixlen = pfxlen; 1924 1925 if ((plen = rde_update_extract_prefix(p, len, &prefix->v6, pfxlen, 1926 sizeof(prefix->v6))) == -1) 1927 return (-1); 1928 1929 return (plen + 1); /* pfxlen needs to be added */ 1930 } 1931 1932 int 1933 rde_update_get_vpn4(u_char *p, u_int16_t len, struct bgpd_addr *prefix, 1934 u_int8_t *prefixlen) 1935 { 1936 int rv, done = 0; 1937 u_int8_t pfxlen; 1938 u_int16_t plen; 1939 1940 if (len < 1) 1941 return (-1); 1942 1943 memcpy(&pfxlen, p, 1); 1944 p += 1; 1945 plen = 1; 1946 1947 bzero(prefix, sizeof(struct bgpd_addr)); 1948 1949 /* label stack */ 1950 do { 1951 if (len - plen < 3 || pfxlen < 3 * 8) 1952 return (-1); 1953 if (prefix->vpn4.labellen + 3U > 1954 sizeof(prefix->vpn4.labelstack)) 1955 return (-1); 1956 prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; 1957 prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; 1958 prefix->vpn4.labelstack[prefix->vpn4.labellen] = *p++; 1959 if (prefix->vpn4.labelstack[prefix->vpn4.labellen] & 1960 BGP_MPLS_BOS) 1961 done = 1; 1962 prefix->vpn4.labellen++; 1963 plen += 3; 1964 pfxlen -= 3 * 8; 1965 } while (!done); 1966 1967 /* RD */ 1968 if (len - plen < (int)sizeof(u_int64_t) || 1969 pfxlen < sizeof(u_int64_t) * 8) 1970 return (-1); 1971 memcpy(&prefix->vpn4.rd, p, sizeof(u_int64_t)); 1972 pfxlen -= sizeof(u_int64_t) * 8; 1973 p += sizeof(u_int64_t); 1974 plen += sizeof(u_int64_t); 1975 1976 /* prefix */ 1977 prefix->aid = AID_VPN_IPv4; 1978 *prefixlen = pfxlen; 1979 1980 if ((rv = rde_update_extract_prefix(p, len, &prefix->vpn4.addr, 1981 pfxlen, sizeof(prefix->vpn4.addr))) == -1) 1982 return (-1); 1983 1984 return (plen + rv); 1985 } 1986 1987 void 1988 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr, 1989 void *data, u_int16_t size) 1990 { 1991 struct ibuf *wbuf; 1992 1993 if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0, 1994 size + sizeof(error) + sizeof(suberr))) == NULL) 1995 fatal("%s %d imsg_create error", __func__, __LINE__); 1996 if (imsg_add(wbuf, &error, sizeof(error)) == -1 || 1997 imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 || 1998 imsg_add(wbuf, data, size) == -1) 1999 fatal("%s %d imsg_add error", __func__, __LINE__); 2000 imsg_close(ibuf_se, wbuf); 2001 peer->state = PEER_ERR; 2002 } 2003 2004 void 2005 rde_update_log(const char *message, u_int16_t rid, 2006 const struct rde_peer *peer, const struct bgpd_addr *next, 2007 const struct bgpd_addr *prefix, u_int8_t prefixlen) 2008 { 2009 char *l = NULL; 2010 char *n = NULL; 2011 char *p = NULL; 2012 2013 if ( !((conf->log & BGPD_LOG_UPDATES) || 2014 (peer->conf.flags & PEERFLAG_LOG_UPDATES)) ) 2015 return; 2016 2017 if (next != NULL) 2018 if (asprintf(&n, " via %s", log_addr(next)) == -1) 2019 n = NULL; 2020 if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1) 2021 p = NULL; 2022 l = log_fmt_peer(&peer->conf); 2023 log_info("Rib %s: %s AS%s: %s %s%s", ribs[rid].name, 2024 l, log_as(peer->conf.remote_as), message, 2025 p ? p : "out of memory", n ? n : ""); 2026 2027 free(l); 2028 free(n); 2029 free(p); 2030 } 2031 2032 /* 2033 * 4-Byte ASN helper function. 2034 * Two scenarios need to be considered: 2035 * - NEW session with NEW attributes present -> just remove the attributes 2036 * - OLD session with NEW attributes present -> try to merge them 2037 */ 2038 void 2039 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a) 2040 { 2041 struct attr *nasp, *naggr, *oaggr; 2042 u_int32_t as; 2043 2044 /* 2045 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present 2046 * try to fixup the attributes. 2047 * Do not fixup if F_ATTR_PARSE_ERR is set. 2048 */ 2049 if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR) 2050 return; 2051 2052 /* first get the attributes */ 2053 nasp = attr_optget(a, ATTR_AS4_PATH); 2054 naggr = attr_optget(a, ATTR_AS4_AGGREGATOR); 2055 2056 if (rde_as4byte(peer)) { 2057 /* NEW session using 4-byte ASNs */ 2058 if (nasp) { 2059 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2060 "but sent AS4_PATH attribute."); 2061 attr_free(a, nasp); 2062 } 2063 if (naggr) { 2064 log_peer_warnx(&peer->conf, "uses 4-byte ASN " 2065 "but sent AS4_AGGREGATOR attribute."); 2066 attr_free(a, naggr); 2067 } 2068 return; 2069 } 2070 /* OLD session using 2-byte ASNs */ 2071 /* try to merge the new attributes into the old ones */ 2072 if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) { 2073 memcpy(&as, oaggr->data, sizeof(as)); 2074 if (ntohl(as) != AS_TRANS) { 2075 /* per RFC ignore AS4_PATH and AS4_AGGREGATOR */ 2076 if (nasp) 2077 attr_free(a, nasp); 2078 if (naggr) 2079 attr_free(a, naggr); 2080 return; 2081 } 2082 if (naggr) { 2083 /* switch over to new AGGREGATOR */ 2084 attr_free(a, oaggr); 2085 if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE, 2086 ATTR_AGGREGATOR, naggr->data, naggr->len)) 2087 fatalx("attr_optadd failed but impossible"); 2088 } 2089 } 2090 /* there is no need for AS4_AGGREGATOR any more */ 2091 if (naggr) 2092 attr_free(a, naggr); 2093 2094 /* merge AS4_PATH with ASPATH */ 2095 if (nasp) 2096 aspath_merge(a, nasp); 2097 } 2098 2099 2100 /* 2101 * route reflector helper function 2102 */ 2103 void 2104 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp) 2105 { 2106 struct attr *a; 2107 u_int8_t *p; 2108 u_int16_t len; 2109 u_int32_t id; 2110 2111 /* do not consider updates with parse errors */ 2112 if (asp->flags & F_ATTR_PARSE_ERR) 2113 return; 2114 2115 /* check for originator id if eq router_id drop */ 2116 if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) { 2117 if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) { 2118 /* this is coming from myself */ 2119 asp->flags |= F_ATTR_LOOP; 2120 return; 2121 } 2122 } else if (conf->flags & BGPD_FLAG_REFLECTOR) { 2123 if (peer->conf.ebgp) 2124 id = conf->bgpid; 2125 else 2126 id = htonl(peer->remote_bgpid); 2127 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID, 2128 &id, sizeof(u_int32_t)) == -1) 2129 fatalx("attr_optadd failed but impossible"); 2130 } 2131 2132 /* check for own id in the cluster list */ 2133 if (conf->flags & BGPD_FLAG_REFLECTOR) { 2134 if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) { 2135 for (len = 0; len < a->len; 2136 len += sizeof(conf->clusterid)) 2137 /* check if coming from my cluster */ 2138 if (memcmp(&conf->clusterid, a->data + len, 2139 sizeof(conf->clusterid)) == 0) { 2140 asp->flags |= F_ATTR_LOOP; 2141 return; 2142 } 2143 2144 /* prepend own clusterid by replacing attribute */ 2145 len = a->len + sizeof(conf->clusterid); 2146 if (len < a->len) 2147 fatalx("rde_reflector: cluster-list overflow"); 2148 if ((p = malloc(len)) == NULL) 2149 fatal("rde_reflector"); 2150 memcpy(p, &conf->clusterid, sizeof(conf->clusterid)); 2151 memcpy(p + sizeof(conf->clusterid), a->data, a->len); 2152 attr_free(asp, a); 2153 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2154 p, len) == -1) 2155 fatalx("attr_optadd failed but impossible"); 2156 free(p); 2157 } else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST, 2158 &conf->clusterid, sizeof(conf->clusterid)) == -1) 2159 fatalx("attr_optadd failed but impossible"); 2160 } 2161 } 2162 2163 /* 2164 * control specific functions 2165 */ 2166 void 2167 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) 2168 { 2169 struct ctl_show_rib rib; 2170 struct ibuf *wbuf; 2171 struct attr *a; 2172 void *bp; 2173 time_t staletime; 2174 u_int8_t l; 2175 2176 bzero(&rib, sizeof(rib)); 2177 rib.lastchange = p->lastchange; 2178 rib.local_pref = asp->lpref; 2179 rib.med = asp->med; 2180 rib.weight = asp->weight; 2181 strlcpy(rib.descr, asp->peer->conf.descr, sizeof(rib.descr)); 2182 memcpy(&rib.remote_addr, &asp->peer->remote_addr, 2183 sizeof(rib.remote_addr)); 2184 rib.remote_id = asp->peer->remote_bgpid; 2185 if (asp->nexthop != NULL) { 2186 memcpy(&rib.true_nexthop, &asp->nexthop->true_nexthop, 2187 sizeof(rib.true_nexthop)); 2188 memcpy(&rib.exit_nexthop, &asp->nexthop->exit_nexthop, 2189 sizeof(rib.exit_nexthop)); 2190 } else { 2191 /* announced network may have a NULL nexthop */ 2192 bzero(&rib.true_nexthop, sizeof(rib.true_nexthop)); 2193 bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop)); 2194 rib.true_nexthop.aid = p->prefix->aid; 2195 rib.exit_nexthop.aid = p->prefix->aid; 2196 } 2197 pt_getaddr(p->prefix, &rib.prefix); 2198 rib.prefixlen = p->prefix->prefixlen; 2199 rib.origin = asp->origin; 2200 rib.flags = 0; 2201 if (p->rib->active == p) 2202 rib.flags |= F_PREF_ACTIVE; 2203 if (!asp->peer->conf.ebgp) 2204 rib.flags |= F_PREF_INTERNAL; 2205 if (asp->flags & F_PREFIX_ANNOUNCED) 2206 rib.flags |= F_PREF_ANNOUNCE; 2207 if (asp->nexthop == NULL || asp->nexthop->state == NEXTHOP_REACH) 2208 rib.flags |= F_PREF_ELIGIBLE; 2209 if (asp->flags & F_ATTR_LOOP) 2210 rib.flags &= ~F_PREF_ELIGIBLE; 2211 staletime = asp->peer->staletime[p->prefix->aid]; 2212 if (staletime && p->lastchange <= staletime) 2213 rib.flags |= F_PREF_STALE; 2214 rib.aspath_len = aspath_length(asp->aspath); 2215 2216 if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, 2217 sizeof(rib) + rib.aspath_len)) == NULL) 2218 return; 2219 if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 || 2220 imsg_add(wbuf, aspath_dump(asp->aspath), 2221 rib.aspath_len) == -1) 2222 return; 2223 imsg_close(ibuf_se_ctl, wbuf); 2224 2225 if (flags & F_CTL_DETAIL) 2226 for (l = 0; l < asp->others_len; l++) { 2227 if ((a = asp->others[l]) == NULL) 2228 break; 2229 if ((wbuf = imsg_create(ibuf_se_ctl, 2230 IMSG_CTL_SHOW_RIB_ATTR, 0, pid, 2231 attr_optlen(a))) == NULL) 2232 return; 2233 if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) { 2234 ibuf_free(wbuf); 2235 return; 2236 } 2237 if (attr_write(bp, attr_optlen(a), a->flags, 2238 a->type, a->data, a->len) == -1) { 2239 ibuf_free(wbuf); 2240 return; 2241 } 2242 imsg_close(ibuf_se_ctl, wbuf); 2243 } 2244 } 2245 2246 void 2247 rde_dump_filterout(struct rde_peer *peer, struct prefix *p, 2248 struct ctl_show_rib_request *req) 2249 { 2250 struct bgpd_addr addr; 2251 struct rde_aspath *asp; 2252 enum filter_actions a; 2253 2254 if (up_test_update(peer, p) != 1) 2255 return; 2256 2257 pt_getaddr(p->prefix, &addr); 2258 a = rde_filter(out_rules, &asp, peer, p->aspath, &addr, 2259 p->prefix->prefixlen, p->aspath->peer); 2260 if (asp) 2261 asp->peer = p->aspath->peer; 2262 else 2263 asp = p->aspath; 2264 2265 if (a == ACTION_ALLOW) 2266 rde_dump_rib_as(p, asp, req->pid, req->flags); 2267 2268 if (asp != p->aspath) 2269 path_put(asp); 2270 } 2271 2272 void 2273 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req) 2274 { 2275 struct rde_peer *peer; 2276 2277 if (req->flags & F_CTL_ADJ_IN || 2278 !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) { 2279 if (req->peerid && req->peerid != p->aspath->peer->conf.id) 2280 return; 2281 if (req->type == IMSG_CTL_SHOW_RIB_AS && 2282 !aspath_match(p->aspath->aspath->data, 2283 p->aspath->aspath->len, &req->as, req->as.as)) 2284 return; 2285 if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && 2286 !community_match(p->aspath, req->community.as, 2287 req->community.type)) 2288 return; 2289 if (req->type == IMSG_CTL_SHOW_RIB_LARGECOMMUNITY && 2290 !community_large_match(p->aspath, req->large_community.as, 2291 req->large_community.ld1, req->large_community.ld2)) 2292 return; 2293 if ((req->flags & F_CTL_ACTIVE) && p->rib->active != p) 2294 return; 2295 rde_dump_rib_as(p, p->aspath, req->pid, req->flags); 2296 } else if (req->flags & F_CTL_ADJ_OUT) { 2297 if (p->rib->active != p) 2298 /* only consider active prefix */ 2299 return; 2300 if (req->peerid) { 2301 if ((peer = peer_get(req->peerid)) != NULL) 2302 rde_dump_filterout(peer, p, req); 2303 return; 2304 } 2305 } 2306 } 2307 2308 void 2309 rde_dump_upcall(struct rib_entry *re, void *ptr) 2310 { 2311 struct prefix *p; 2312 struct rde_dump_ctx *ctx = ptr; 2313 2314 LIST_FOREACH(p, &re->prefix_h, rib_l) 2315 rde_dump_filter(p, &ctx->req); 2316 } 2317 2318 void 2319 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) 2320 { 2321 struct rde_dump_ctx *ctx = ptr; 2322 struct prefix *p; 2323 struct pt_entry *pt; 2324 struct bgpd_addr addr; 2325 2326 pt = re->prefix; 2327 pt_getaddr(pt, &addr); 2328 if (addr.aid != ctx->req.prefix.aid) 2329 return; 2330 if (ctx->req.prefixlen > pt->prefixlen) 2331 return; 2332 if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen)) 2333 LIST_FOREACH(p, &re->prefix_h, rib_l) 2334 rde_dump_filter(p, &ctx->req); 2335 } 2336 2337 void 2338 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, 2339 enum imsg_type type) 2340 { 2341 struct rde_dump_ctx *ctx; 2342 struct rib_entry *re; 2343 u_int error; 2344 u_int16_t id; 2345 u_int8_t hostplen; 2346 2347 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2348 log_warn("rde_dump_ctx_new"); 2349 error = CTL_RES_NOMEM; 2350 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2351 sizeof(error)); 2352 return; 2353 } 2354 if ((id = rib_find(req->rib)) == RIB_FAILED) { 2355 log_warnx("rde_dump_ctx_new: no such rib %s", req->rib); 2356 error = CTL_RES_NOSUCHPEER; 2357 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, 2358 sizeof(error)); 2359 free(ctx); 2360 return; 2361 } 2362 2363 memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request)); 2364 ctx->req.pid = pid; 2365 ctx->req.type = type; 2366 ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; 2367 ctx->ribctx.ctx_rib = &ribs[id]; 2368 switch (ctx->req.type) { 2369 case IMSG_CTL_SHOW_NETWORK: 2370 ctx->ribctx.ctx_upcall = network_dump_upcall; 2371 break; 2372 case IMSG_CTL_SHOW_RIB: 2373 case IMSG_CTL_SHOW_RIB_AS: 2374 case IMSG_CTL_SHOW_RIB_COMMUNITY: 2375 case IMSG_CTL_SHOW_RIB_LARGECOMMUNITY: 2376 ctx->ribctx.ctx_upcall = rde_dump_upcall; 2377 break; 2378 case IMSG_CTL_SHOW_RIB_PREFIX: 2379 if (req->flags & F_LONGER) { 2380 ctx->ribctx.ctx_upcall = rde_dump_prefix_upcall; 2381 break; 2382 } 2383 switch (req->prefix.aid) { 2384 case AID_INET: 2385 case AID_VPN_IPv4: 2386 hostplen = 32; 2387 break; 2388 case AID_INET6: 2389 hostplen = 128; 2390 break; 2391 default: 2392 fatalx("rde_dump_ctx_new: unknown af"); 2393 } 2394 if (req->prefixlen == hostplen) 2395 re = rib_lookup(&ribs[id], &req->prefix); 2396 else 2397 re = rib_get(&ribs[id], &req->prefix, req->prefixlen); 2398 if (re) 2399 rde_dump_upcall(re, ctx); 2400 rde_dump_done(ctx); 2401 return; 2402 default: 2403 fatalx("rde_dump_ctx_new: unsupported imsg type"); 2404 } 2405 ctx->ribctx.ctx_done = rde_dump_done; 2406 ctx->ribctx.ctx_arg = ctx; 2407 ctx->ribctx.ctx_aid = ctx->req.aid; 2408 rib_dump_r(&ctx->ribctx); 2409 } 2410 2411 void 2412 rde_dump_done(void *arg) 2413 { 2414 struct rde_dump_ctx *ctx = arg; 2415 2416 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, 2417 -1, NULL, 0); 2418 free(ctx); 2419 } 2420 2421 void 2422 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd) 2423 { 2424 struct rde_mrt_ctx *ctx; 2425 u_int16_t id; 2426 2427 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { 2428 log_warn("rde_dump_mrt_new"); 2429 return; 2430 } 2431 memcpy(&ctx->mrt, mrt, sizeof(struct mrt)); 2432 TAILQ_INIT(&ctx->mrt.wbuf.bufs); 2433 ctx->mrt.wbuf.fd = fd; 2434 ctx->mrt.state = MRT_STATE_RUNNING; 2435 id = rib_find(ctx->mrt.rib); 2436 if (id == RIB_FAILED) { 2437 log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib); 2438 free(ctx); 2439 return; 2440 } 2441 2442 if (ctx->mrt.type == MRT_TABLE_DUMP_V2) 2443 mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist); 2444 2445 ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; 2446 ctx->ribctx.ctx_rib = &ribs[id]; 2447 ctx->ribctx.ctx_upcall = mrt_dump_upcall; 2448 ctx->ribctx.ctx_done = mrt_done; 2449 ctx->ribctx.ctx_arg = &ctx->mrt; 2450 ctx->ribctx.ctx_aid = AID_UNSPEC; 2451 LIST_INSERT_HEAD(&rde_mrts, ctx, entry); 2452 rde_mrt_cnt++; 2453 rib_dump_r(&ctx->ribctx); 2454 } 2455 2456 /* 2457 * kroute specific functions 2458 */ 2459 int 2460 rde_rdomain_import(struct rde_aspath *asp, struct rdomain *rd) 2461 { 2462 struct filter_set *s; 2463 2464 TAILQ_FOREACH(s, &rd->import, entry) { 2465 if (community_ext_match(asp, &s->action.ext_community, 0)) 2466 return (1); 2467 } 2468 return (0); 2469 } 2470 2471 void 2472 rde_send_kroute(struct prefix *new, struct prefix *old, u_int16_t ribid) 2473 { 2474 struct kroute_full kr; 2475 struct bgpd_addr addr; 2476 struct prefix *p; 2477 struct rdomain *rd; 2478 enum imsg_type type; 2479 2480 /* 2481 * Make sure that self announce prefixes are not committed to the 2482 * FIB. If both prefixes are unreachable no update is needed. 2483 */ 2484 if ((old == NULL || old->aspath->flags & F_PREFIX_ANNOUNCED) && 2485 (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED)) 2486 return; 2487 2488 if (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED) { 2489 type = IMSG_KROUTE_DELETE; 2490 p = old; 2491 } else { 2492 type = IMSG_KROUTE_CHANGE; 2493 p = new; 2494 } 2495 2496 pt_getaddr(p->prefix, &addr); 2497 bzero(&kr, sizeof(kr)); 2498 memcpy(&kr.prefix, &addr, sizeof(kr.prefix)); 2499 kr.prefixlen = p->prefix->prefixlen; 2500 if (p->aspath->flags & F_NEXTHOP_REJECT) 2501 kr.flags |= F_REJECT; 2502 if (p->aspath->flags & F_NEXTHOP_BLACKHOLE) 2503 kr.flags |= F_BLACKHOLE; 2504 if (type == IMSG_KROUTE_CHANGE) 2505 memcpy(&kr.nexthop, &p->aspath->nexthop->true_nexthop, 2506 sizeof(kr.nexthop)); 2507 strlcpy(kr.label, rtlabel_id2name(p->aspath->rtlabelid), 2508 sizeof(kr.label)); 2509 2510 switch (addr.aid) { 2511 case AID_VPN_IPv4: 2512 if (ribid != 1) 2513 /* not Loc-RIB, no update for VPNs */ 2514 break; 2515 2516 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 2517 if (!rde_rdomain_import(p->aspath, rd)) 2518 continue; 2519 /* must send exit_nexthop so that correct MPLS tunnel 2520 * is chosen 2521 */ 2522 if (type == IMSG_KROUTE_CHANGE) 2523 memcpy(&kr.nexthop, 2524 &p->aspath->nexthop->exit_nexthop, 2525 sizeof(kr.nexthop)); 2526 if (imsg_compose(ibuf_main, type, rd->rtableid, 0, -1, 2527 &kr, sizeof(kr)) == -1) 2528 fatal("%s %d imsg_compose error", __func__, 2529 __LINE__); 2530 } 2531 break; 2532 default: 2533 if (imsg_compose(ibuf_main, type, ribs[ribid].rtableid, 0, -1, 2534 &kr, sizeof(kr)) == -1) 2535 fatal("%s %d imsg_compose error", __func__, __LINE__); 2536 break; 2537 } 2538 } 2539 2540 /* 2541 * pf table specific functions 2542 */ 2543 void 2544 rde_send_pftable(u_int16_t id, struct bgpd_addr *addr, 2545 u_int8_t len, int del) 2546 { 2547 struct pftable_msg pfm; 2548 2549 if (id == 0) 2550 return; 2551 2552 /* do not run while cleaning up */ 2553 if (rde_quit) 2554 return; 2555 2556 bzero(&pfm, sizeof(pfm)); 2557 strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable)); 2558 memcpy(&pfm.addr, addr, sizeof(pfm.addr)); 2559 pfm.len = len; 2560 2561 if (imsg_compose(ibuf_main, 2562 del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD, 2563 0, 0, -1, &pfm, sizeof(pfm)) == -1) 2564 fatal("%s %d imsg_compose error", __func__, __LINE__); 2565 } 2566 2567 void 2568 rde_send_pftable_commit(void) 2569 { 2570 /* do not run while cleaning up */ 2571 if (rde_quit) 2572 return; 2573 2574 if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) == 2575 -1) 2576 fatal("%s %d imsg_compose error", __func__, __LINE__); 2577 } 2578 2579 /* 2580 * nexthop specific functions 2581 */ 2582 void 2583 rde_send_nexthop(struct bgpd_addr *next, int valid) 2584 { 2585 int type; 2586 2587 if (valid) 2588 type = IMSG_NEXTHOP_ADD; 2589 else 2590 type = IMSG_NEXTHOP_REMOVE; 2591 2592 if (imsg_compose(ibuf_main, type, 0, 0, -1, next, 2593 sizeof(struct bgpd_addr)) == -1) 2594 fatal("%s %d imsg_compose error", __func__, __LINE__); 2595 } 2596 2597 /* 2598 * soft reconfig specific functions 2599 */ 2600 void 2601 rde_reload_done(void) 2602 { 2603 struct rdomain *rd; 2604 struct rde_peer *peer; 2605 struct filter_head *fh; 2606 u_int16_t rid; 2607 2608 /* first merge the main config */ 2609 if ((nconf->flags & BGPD_FLAG_NO_EVALUATE) 2610 != (conf->flags & BGPD_FLAG_NO_EVALUATE)) { 2611 log_warnx("change to/from route-collector " 2612 "mode ignored"); 2613 if (conf->flags & BGPD_FLAG_NO_EVALUATE) 2614 nconf->flags |= BGPD_FLAG_NO_EVALUATE; 2615 else 2616 nconf->flags &= ~BGPD_FLAG_NO_EVALUATE; 2617 } 2618 memcpy(conf, nconf, sizeof(struct bgpd_config)); 2619 conf->listen_addrs = NULL; 2620 conf->csock = NULL; 2621 conf->rcsock = NULL; 2622 free(nconf); 2623 nconf = NULL; 2624 2625 /* sync peerself with conf */ 2626 peerself->remote_bgpid = ntohl(conf->bgpid); 2627 peerself->conf.local_as = conf->as; 2628 peerself->conf.remote_as = conf->as; 2629 peerself->short_as = conf->short_as; 2630 2631 /* apply new set of rdomain, sync will be done later */ 2632 while ((rd = SIMPLEQ_FIRST(rdomains_l)) != NULL) { 2633 SIMPLEQ_REMOVE_HEAD(rdomains_l, entry); 2634 filterset_free(&rd->import); 2635 filterset_free(&rd->export); 2636 free(rd); 2637 } 2638 free(rdomains_l); 2639 rdomains_l = newdomains; 2640 /* XXX WHERE IS THE SYNC ??? */ 2641 2642 rde_filter_calc_skip_steps(out_rules_tmp); 2643 2644 /* 2645 * make the new filter rules the active one but keep the old for 2646 * softrconfig. This is needed so that changes happening are using 2647 * the right filters. 2648 */ 2649 fh = out_rules; 2650 out_rules = out_rules_tmp; 2651 out_rules_tmp = fh; 2652 2653 /* check if filter changed */ 2654 LIST_FOREACH(peer, &peerlist, peer_l) { 2655 if (peer->conf.id == 0) 2656 continue; 2657 peer->reconf_out = 0; 2658 peer->reconf_rib = 0; 2659 if (peer->ribid != rib_find(peer->conf.rib)) { 2660 rib_dump(&ribs[peer->ribid], 2661 rde_softreconfig_unload_peer, peer, AID_UNSPEC); 2662 peer->ribid = rib_find(peer->conf.rib); 2663 if (peer->ribid == RIB_FAILED) 2664 fatalx("King Bula's peer met an unknown RIB"); 2665 peer->reconf_rib = 1; 2666 continue; 2667 } 2668 if (peer->conf.softreconfig_out && 2669 !rde_filter_equal(out_rules, out_rules_tmp, peer)) { 2670 peer->reconf_out = 1; 2671 } 2672 } 2673 /* bring ribs in sync */ 2674 for (rid = 0; rid < rib_size; rid++) { 2675 if (*ribs[rid].name == '\0') 2676 continue; 2677 rde_filter_calc_skip_steps(ribs[rid].in_rules_tmp); 2678 2679 /* flip rules, make new active */ 2680 fh = ribs[rid].in_rules; 2681 ribs[rid].in_rules = ribs[rid].in_rules_tmp; 2682 ribs[rid].in_rules_tmp = fh; 2683 2684 switch (ribs[rid].state) { 2685 case RECONF_DELETE: 2686 rib_free(&ribs[rid]); 2687 break; 2688 case RECONF_KEEP: 2689 if (rde_filter_equal(ribs[rid].in_rules, 2690 ribs[rid].in_rules_tmp, NULL)) 2691 /* rib is in sync */ 2692 break; 2693 ribs[rid].state = RECONF_RELOAD; 2694 /* FALLTHROUGH */ 2695 case RECONF_REINIT: 2696 rib_dump(&ribs[0], rde_softreconfig_in, &ribs[rid], 2697 AID_UNSPEC); 2698 break; 2699 case RECONF_RELOAD: 2700 log_warnx("Bad rib reload state"); 2701 /* FALLTHROUGH */ 2702 case RECONF_NONE: 2703 break; 2704 } 2705 } 2706 LIST_FOREACH(peer, &peerlist, peer_l) { 2707 if (peer->reconf_out) 2708 rib_dump(&ribs[peer->ribid], rde_softreconfig_out, 2709 peer, AID_UNSPEC); 2710 else if (peer->reconf_rib) 2711 /* dump the full table to neighbors that changed rib */ 2712 peer_dump(peer->conf.id, AID_UNSPEC); 2713 } 2714 filterlist_free(out_rules_tmp); 2715 out_rules_tmp = NULL; 2716 for (rid = 0; rid < rib_size; rid++) { 2717 if (*ribs[rid].name == '\0') 2718 continue; 2719 filterlist_free(ribs[rid].in_rules_tmp); 2720 ribs[rid].in_rules_tmp = NULL; 2721 ribs[rid].state = RECONF_NONE; 2722 } 2723 2724 log_info("RDE reconfigured"); 2725 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 2726 -1, NULL, 0); 2727 } 2728 2729 void 2730 rde_softreconfig_in(struct rib_entry *re, void *ptr) 2731 { 2732 struct rib *rib = ptr; 2733 struct prefix *p, *np; 2734 struct pt_entry *pt; 2735 struct rde_peer *peer; 2736 struct rde_aspath *asp, *oasp, *nasp; 2737 enum filter_actions oa, na; 2738 struct bgpd_addr addr; 2739 2740 pt = re->prefix; 2741 pt_getaddr(pt, &addr); 2742 for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) { 2743 /* 2744 * prefix_remove() and path_update() may change the object 2745 * so cache the values. 2746 */ 2747 np = LIST_NEXT(p, rib_l); 2748 asp = p->aspath; 2749 peer = asp->peer; 2750 2751 /* check if prefix changed */ 2752 if (rib->state == RECONF_RELOAD) { 2753 oa = rde_filter(rib->in_rules_tmp, &oasp, peer, 2754 asp, &addr, pt->prefixlen, peer); 2755 oasp = oasp != NULL ? oasp : asp; 2756 } else { 2757 /* make sure we update everything for RECONF_REINIT */ 2758 oa = ACTION_DENY; 2759 oasp = asp; 2760 } 2761 na = rde_filter(rib->in_rules, &nasp, peer, asp, 2762 &addr, pt->prefixlen, peer); 2763 nasp = nasp != NULL ? nasp : asp; 2764 2765 /* go through all 4 possible combinations */ 2766 /* if (oa == ACTION_DENY && na == ACTION_DENY) */ 2767 /* nothing todo */ 2768 if (oa == ACTION_DENY && na == ACTION_ALLOW) { 2769 /* update Local-RIB */ 2770 path_update(rib, peer, nasp, &addr, pt->prefixlen); 2771 } else if (oa == ACTION_ALLOW && na == ACTION_DENY) { 2772 /* remove from Local-RIB */ 2773 prefix_remove(rib, peer, &addr, pt->prefixlen, 0); 2774 } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { 2775 if (path_compare(nasp, oasp) != 0) 2776 /* send update */ 2777 path_update(rib, peer, nasp, &addr, 2778 pt->prefixlen); 2779 } 2780 2781 if (oasp != asp) 2782 path_put(oasp); 2783 if (nasp != asp) 2784 path_put(nasp); 2785 } 2786 } 2787 2788 void 2789 rde_softreconfig_out(struct rib_entry *re, void *ptr) 2790 { 2791 struct prefix *p = re->active; 2792 struct pt_entry *pt; 2793 struct rde_peer *peer = ptr; 2794 struct rde_aspath *oasp, *nasp; 2795 enum filter_actions oa, na; 2796 struct bgpd_addr addr; 2797 2798 if (peer->conf.id == 0) 2799 fatalx("King Bula troubled by bad peer"); 2800 2801 if (p == NULL) 2802 return; 2803 2804 pt = re->prefix; 2805 pt_getaddr(pt, &addr); 2806 2807 if (up_test_update(peer, p) != 1) 2808 return; 2809 2810 oa = rde_filter(out_rules_tmp, &oasp, peer, p->aspath, 2811 &addr, pt->prefixlen, p->aspath->peer); 2812 na = rde_filter(out_rules, &nasp, peer, p->aspath, 2813 &addr, pt->prefixlen, p->aspath->peer); 2814 oasp = oasp != NULL ? oasp : p->aspath; 2815 nasp = nasp != NULL ? nasp : p->aspath; 2816 2817 /* go through all 4 possible combinations */ 2818 /* if (oa == ACTION_DENY && na == ACTION_DENY) */ 2819 /* nothing todo */ 2820 if (oa == ACTION_DENY && na == ACTION_ALLOW) { 2821 /* send update */ 2822 up_generate(peer, nasp, &addr, pt->prefixlen); 2823 } else if (oa == ACTION_ALLOW && na == ACTION_DENY) { 2824 /* send withdraw */ 2825 up_generate(peer, NULL, &addr, pt->prefixlen); 2826 } else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) { 2827 /* send update if path attributes changed */ 2828 if (path_compare(nasp, oasp) != 0) 2829 up_generate(peer, nasp, &addr, pt->prefixlen); 2830 } 2831 2832 if (oasp != p->aspath) 2833 path_put(oasp); 2834 if (nasp != p->aspath) 2835 path_put(nasp); 2836 } 2837 2838 void 2839 rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr) 2840 { 2841 struct rde_peer *peer = ptr; 2842 struct prefix *p = re->active; 2843 struct pt_entry *pt; 2844 struct rde_aspath *oasp; 2845 enum filter_actions oa; 2846 struct bgpd_addr addr; 2847 2848 pt = re->prefix; 2849 pt_getaddr(pt, &addr); 2850 2851 /* check if prefix was announced */ 2852 if (up_test_update(peer, p) != 1) 2853 return; 2854 2855 oa = rde_filter(out_rules_tmp, &oasp, peer, p->aspath, 2856 &addr, pt->prefixlen, p->aspath->peer); 2857 oasp = oasp != NULL ? oasp : p->aspath; 2858 2859 if (oa == ACTION_DENY) 2860 /* nothing todo */ 2861 goto done; 2862 2863 /* send withdraw */ 2864 up_generate(peer, NULL, &addr, pt->prefixlen); 2865 done: 2866 if (oasp != p->aspath) 2867 path_put(oasp); 2868 } 2869 2870 /* 2871 * update specific functions 2872 */ 2873 u_char queue_buf[4096]; 2874 2875 void 2876 rde_up_dump_upcall(struct rib_entry *re, void *ptr) 2877 { 2878 struct rde_peer *peer = ptr; 2879 2880 if (re->ribid != peer->ribid) 2881 fatalx("King Bula: monstrous evil horror."); 2882 if (re->active == NULL) 2883 return; 2884 up_generate_updates(out_rules, peer, re->active, NULL); 2885 } 2886 2887 void 2888 rde_generate_updates(u_int16_t ribid, struct prefix *new, struct prefix *old) 2889 { 2890 struct rde_peer *peer; 2891 2892 /* 2893 * If old is != NULL we know it was active and should be removed. 2894 * If new is != NULL we know it is reachable and then we should 2895 * generate an update. 2896 */ 2897 if (old == NULL && new == NULL) 2898 return; 2899 2900 LIST_FOREACH(peer, &peerlist, peer_l) { 2901 if (peer->conf.id == 0) 2902 continue; 2903 if (peer->ribid != ribid) 2904 continue; 2905 if (peer->state != PEER_UP) 2906 continue; 2907 up_generate_updates(out_rules, peer, new, old); 2908 } 2909 } 2910 2911 void 2912 rde_update_queue_runner(void) 2913 { 2914 struct rde_peer *peer; 2915 int r, sent, max = RDE_RUNNER_ROUNDS, eor = 0; 2916 u_int16_t len, wd_len, wpos; 2917 2918 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2919 do { 2920 sent = 0; 2921 LIST_FOREACH(peer, &peerlist, peer_l) { 2922 if (peer->conf.id == 0) 2923 continue; 2924 if (peer->state != PEER_UP) 2925 continue; 2926 /* first withdraws */ 2927 wpos = 2; /* reserve space for the length field */ 2928 r = up_dump_prefix(queue_buf + wpos, len - wpos - 2, 2929 &peer->withdraws[AID_INET], peer); 2930 wd_len = r; 2931 /* write withdraws length filed */ 2932 wd_len = htons(wd_len); 2933 memcpy(queue_buf, &wd_len, 2); 2934 wpos += r; 2935 2936 /* now bgp path attributes */ 2937 r = up_dump_attrnlri(queue_buf + wpos, len - wpos, 2938 peer); 2939 switch (r) { 2940 case -1: 2941 eor = 1; 2942 if (wd_len == 0) { 2943 /* no withdraws queued just send EoR */ 2944 peer_send_eor(peer, AID_INET); 2945 continue; 2946 } 2947 break; 2948 case 2: 2949 if (wd_len == 0) { 2950 /* 2951 * No packet to send. No withdraws and 2952 * no path attributes. Skip. 2953 */ 2954 continue; 2955 } 2956 /* FALLTHROUGH */ 2957 default: 2958 wpos += r; 2959 break; 2960 } 2961 2962 /* finally send message to SE */ 2963 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 2964 0, -1, queue_buf, wpos) == -1) 2965 fatal("%s %d imsg_compose error", __func__, 2966 __LINE__); 2967 sent++; 2968 if (eor) { 2969 eor = 0; 2970 peer_send_eor(peer, AID_INET); 2971 } 2972 } 2973 max -= sent; 2974 } while (sent != 0 && max > 0); 2975 } 2976 2977 void 2978 rde_update6_queue_runner(u_int8_t aid) 2979 { 2980 struct rde_peer *peer; 2981 u_char *b; 2982 int r, sent, max = RDE_RUNNER_ROUNDS / 2; 2983 u_int16_t len; 2984 2985 /* first withdraws ... */ 2986 do { 2987 sent = 0; 2988 LIST_FOREACH(peer, &peerlist, peer_l) { 2989 if (peer->conf.id == 0) 2990 continue; 2991 if (peer->state != PEER_UP) 2992 continue; 2993 len = sizeof(queue_buf) - MSGSIZE_HEADER; 2994 b = up_dump_mp_unreach(queue_buf, &len, peer, aid); 2995 2996 if (b == NULL) 2997 continue; 2998 /* finally send message to SE */ 2999 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3000 0, -1, b, len) == -1) 3001 fatal("%s %d imsg_compose error", __func__, 3002 __LINE__); 3003 sent++; 3004 } 3005 max -= sent; 3006 } while (sent != 0 && max > 0); 3007 3008 /* ... then updates */ 3009 max = RDE_RUNNER_ROUNDS / 2; 3010 do { 3011 sent = 0; 3012 LIST_FOREACH(peer, &peerlist, peer_l) { 3013 if (peer->conf.id == 0) 3014 continue; 3015 if (peer->state != PEER_UP) 3016 continue; 3017 len = sizeof(queue_buf) - MSGSIZE_HEADER; 3018 r = up_dump_mp_reach(queue_buf, &len, peer, aid); 3019 switch (r) { 3020 case -2: 3021 continue; 3022 case -1: 3023 peer_send_eor(peer, aid); 3024 continue; 3025 default: 3026 b = queue_buf + r; 3027 break; 3028 } 3029 3030 /* finally send message to SE */ 3031 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3032 0, -1, b, len) == -1) 3033 fatal("%s %d imsg_compose error", __func__, 3034 __LINE__); 3035 sent++; 3036 } 3037 max -= sent; 3038 } while (sent != 0 && max > 0); 3039 } 3040 3041 /* 3042 * generic helper function 3043 */ 3044 u_int32_t 3045 rde_local_as(void) 3046 { 3047 return (conf->as); 3048 } 3049 3050 int 3051 rde_noevaluate(void) 3052 { 3053 /* do not run while cleaning up */ 3054 if (rde_quit) 3055 return (1); 3056 3057 return (conf->flags & BGPD_FLAG_NO_EVALUATE); 3058 } 3059 3060 int 3061 rde_decisionflags(void) 3062 { 3063 return (conf->flags & BGPD_FLAG_DECISION_MASK); 3064 } 3065 3066 int 3067 rde_as4byte(struct rde_peer *peer) 3068 { 3069 return (peer->capa.as4byte); 3070 } 3071 3072 /* 3073 * peer functions 3074 */ 3075 struct peer_table { 3076 struct rde_peer_head *peer_hashtbl; 3077 u_int32_t peer_hashmask; 3078 } peertable; 3079 3080 #define PEER_HASH(x) \ 3081 &peertable.peer_hashtbl[(x) & peertable.peer_hashmask] 3082 3083 void 3084 peer_init(u_int32_t hashsize) 3085 { 3086 struct peer_config pc; 3087 u_int32_t hs, i; 3088 3089 for (hs = 1; hs < hashsize; hs <<= 1) 3090 ; 3091 peertable.peer_hashtbl = calloc(hs, sizeof(struct rde_peer_head)); 3092 if (peertable.peer_hashtbl == NULL) 3093 fatal("peer_init"); 3094 3095 for (i = 0; i < hs; i++) 3096 LIST_INIT(&peertable.peer_hashtbl[i]); 3097 LIST_INIT(&peerlist); 3098 3099 peertable.peer_hashmask = hs - 1; 3100 3101 bzero(&pc, sizeof(pc)); 3102 snprintf(pc.descr, sizeof(pc.descr), "LOCAL"); 3103 3104 peerself = peer_add(0, &pc); 3105 if (peerself == NULL) 3106 fatalx("peer_init add self"); 3107 3108 peerself->state = PEER_UP; 3109 } 3110 3111 void 3112 peer_shutdown(void) 3113 { 3114 u_int32_t i; 3115 3116 for (i = 0; i <= peertable.peer_hashmask; i++) 3117 if (!LIST_EMPTY(&peertable.peer_hashtbl[i])) 3118 log_warnx("peer_free: free non-free table"); 3119 3120 free(peertable.peer_hashtbl); 3121 } 3122 3123 struct rde_peer * 3124 peer_get(u_int32_t id) 3125 { 3126 struct rde_peer_head *head; 3127 struct rde_peer *peer; 3128 3129 head = PEER_HASH(id); 3130 3131 LIST_FOREACH(peer, head, hash_l) { 3132 if (peer->conf.id == id) 3133 return (peer); 3134 } 3135 return (NULL); 3136 } 3137 3138 struct rde_peer * 3139 peer_add(u_int32_t id, struct peer_config *p_conf) 3140 { 3141 struct rde_peer_head *head; 3142 struct rde_peer *peer; 3143 3144 if ((peer = peer_get(id))) { 3145 memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); 3146 return (NULL); 3147 } 3148 3149 peer = calloc(1, sizeof(struct rde_peer)); 3150 if (peer == NULL) 3151 fatal("peer_add"); 3152 3153 LIST_INIT(&peer->path_h); 3154 memcpy(&peer->conf, p_conf, sizeof(struct peer_config)); 3155 peer->remote_bgpid = 0; 3156 peer->ribid = rib_find(peer->conf.rib); 3157 if (peer->ribid == RIB_FAILED) 3158 fatalx("King Bula's new peer met an unknown RIB"); 3159 peer->state = PEER_NONE; 3160 up_init(peer); 3161 3162 head = PEER_HASH(id); 3163 3164 LIST_INSERT_HEAD(head, peer, hash_l); 3165 LIST_INSERT_HEAD(&peerlist, peer, peer_l); 3166 3167 return (peer); 3168 } 3169 3170 int 3171 peer_localaddrs(struct rde_peer *peer, struct bgpd_addr *laddr) 3172 { 3173 struct ifaddrs *ifap, *ifa, *match; 3174 3175 if (getifaddrs(&ifap) == -1) 3176 fatal("getifaddrs"); 3177 3178 for (match = ifap; match != NULL; match = match->ifa_next) 3179 if (sa_cmp(laddr, match->ifa_addr) == 0) 3180 break; 3181 3182 if (match == NULL) { 3183 log_warnx("peer_localaddrs: local address not found"); 3184 return (-1); 3185 } 3186 3187 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 3188 if (ifa->ifa_addr->sa_family == AF_INET && 3189 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 3190 if (ifa->ifa_addr->sa_family == 3191 match->ifa_addr->sa_family) 3192 ifa = match; 3193 sa2addr(ifa->ifa_addr, &peer->local_v4_addr); 3194 break; 3195 } 3196 } 3197 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 3198 if (ifa->ifa_addr->sa_family == AF_INET6 && 3199 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 3200 /* 3201 * only accept global scope addresses except explicitly 3202 * specified. 3203 */ 3204 if (ifa->ifa_addr->sa_family == 3205 match->ifa_addr->sa_family) 3206 ifa = match; 3207 else if (IN6_IS_ADDR_LINKLOCAL( 3208 &((struct sockaddr_in6 *)ifa-> 3209 ifa_addr)->sin6_addr) || 3210 IN6_IS_ADDR_SITELOCAL( 3211 &((struct sockaddr_in6 *)ifa-> 3212 ifa_addr)->sin6_addr)) 3213 continue; 3214 sa2addr(ifa->ifa_addr, &peer->local_v6_addr); 3215 break; 3216 } 3217 } 3218 3219 freeifaddrs(ifap); 3220 return (0); 3221 } 3222 3223 void 3224 peer_up(u_int32_t id, struct session_up *sup) 3225 { 3226 struct rde_peer *peer; 3227 u_int8_t i; 3228 3229 peer = peer_get(id); 3230 if (peer == NULL) { 3231 log_warnx("peer_up: unknown peer id %d", id); 3232 return; 3233 } 3234 3235 if (peer->state != PEER_DOWN && peer->state != PEER_NONE && 3236 peer->state != PEER_UP) { 3237 /* 3238 * There is a race condition when doing PEER_ERR -> PEER_DOWN. 3239 * So just do a full reset of the peer here. 3240 */ 3241 for (i = 0; i < AID_MAX; i++) { 3242 peer->staletime[i] = 0; 3243 peer_flush(peer, i); 3244 } 3245 up_down(peer); 3246 peer->prefix_cnt = 0; 3247 peer->state = PEER_DOWN; 3248 } 3249 peer->remote_bgpid = ntohl(sup->remote_bgpid); 3250 peer->short_as = sup->short_as; 3251 memcpy(&peer->remote_addr, &sup->remote_addr, 3252 sizeof(peer->remote_addr)); 3253 memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)); 3254 3255 if (peer_localaddrs(peer, &sup->local_addr)) { 3256 peer->state = PEER_DOWN; 3257 imsg_compose(ibuf_se, IMSG_SESSION_DOWN, id, 0, -1, NULL, 0); 3258 return; 3259 } 3260 3261 peer->state = PEER_UP; 3262 up_init(peer); 3263 3264 if (rde_noevaluate()) 3265 /* 3266 * no need to dump the table to the peer, there are no active 3267 * prefixes anyway. This is a speed up hack. 3268 */ 3269 return; 3270 3271 for (i = 0; i < AID_MAX; i++) { 3272 if (peer->capa.mp[i]) 3273 peer_dump(id, i); 3274 } 3275 } 3276 3277 void 3278 peer_down(u_int32_t id) 3279 { 3280 struct rde_peer *peer; 3281 struct rde_aspath *asp, *nasp; 3282 3283 peer = peer_get(id); 3284 if (peer == NULL) { 3285 log_warnx("peer_down: unknown peer id %d", id); 3286 return; 3287 } 3288 peer->remote_bgpid = 0; 3289 peer->state = PEER_DOWN; 3290 up_down(peer); 3291 3292 /* walk through per peer RIB list and remove all prefixes. */ 3293 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { 3294 nasp = LIST_NEXT(asp, peer_l); 3295 path_remove(asp); 3296 } 3297 LIST_INIT(&peer->path_h); 3298 peer->prefix_cnt = 0; 3299 3300 /* Deletions are performed in path_remove() */ 3301 rde_send_pftable_commit(); 3302 3303 LIST_REMOVE(peer, hash_l); 3304 LIST_REMOVE(peer, peer_l); 3305 free(peer); 3306 } 3307 3308 /* 3309 * Flush all routes older then staletime. If staletime is 0 all routes will 3310 * be flushed. 3311 */ 3312 void 3313 peer_flush(struct rde_peer *peer, u_int8_t aid) 3314 { 3315 struct rde_aspath *asp, *nasp; 3316 u_int32_t rprefixes; 3317 3318 rprefixes = 0; 3319 /* walk through per peer RIB list and remove all stale prefixes. */ 3320 for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { 3321 nasp = LIST_NEXT(asp, peer_l); 3322 rprefixes += path_remove_stale(asp, aid); 3323 } 3324 3325 /* Deletions are performed in path_remove() */ 3326 rde_send_pftable_commit(); 3327 3328 /* flushed no need to keep staletime */ 3329 peer->staletime[aid] = 0; 3330 3331 if (peer->prefix_cnt > rprefixes) 3332 peer->prefix_cnt -= rprefixes; 3333 else 3334 peer->prefix_cnt = 0; 3335 } 3336 3337 void 3338 peer_stale(u_int32_t id, u_int8_t aid) 3339 { 3340 struct rde_peer *peer; 3341 time_t now; 3342 3343 peer = peer_get(id); 3344 if (peer == NULL) { 3345 log_warnx("peer_stale: unknown peer id %d", id); 3346 return; 3347 } 3348 3349 /* flush the now even staler routes out */ 3350 if (peer->staletime[aid]) 3351 peer_flush(peer, aid); 3352 peer->staletime[aid] = now = time(NULL); 3353 3354 /* make sure new prefixes start on a higher timestamp */ 3355 do { 3356 sleep(1); 3357 } while (now >= time(NULL)); 3358 } 3359 3360 void 3361 peer_dump(u_int32_t id, u_int8_t aid) 3362 { 3363 struct rde_peer *peer; 3364 3365 peer = peer_get(id); 3366 if (peer == NULL) { 3367 log_warnx("peer_dump: unknown peer id %d", id); 3368 return; 3369 } 3370 3371 if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) 3372 up_generate_default(out_rules, peer, aid); 3373 else 3374 rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid); 3375 if (peer->capa.grestart.restart) 3376 up_generate_marker(peer, aid); 3377 } 3378 3379 /* End-of-RIB marker, RFC 4724 */ 3380 void 3381 peer_recv_eor(struct rde_peer *peer, u_int8_t aid) 3382 { 3383 peer->prefix_rcvd_eor++; 3384 3385 /* 3386 * First notify SE to avert a possible race with the restart timeout. 3387 * If the timeout fires before this imsg is processed by the SE it will 3388 * result in the same operation since the timeout issues a FLUSH which 3389 * does the same as the RESTARTED action (flushing stale routes). 3390 * The logic in the SE is so that only one of FLUSH or RESTARTED will 3391 * be sent back to the RDE and so peer_flush is only called once. 3392 */ 3393 if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, 3394 0, -1, &aid, sizeof(aid)) == -1) 3395 fatal("%s %d imsg_compose error", __func__, __LINE__); 3396 } 3397 3398 void 3399 peer_send_eor(struct rde_peer *peer, u_int8_t aid) 3400 { 3401 u_int16_t afi; 3402 u_int8_t safi; 3403 3404 peer->prefix_sent_eor++; 3405 3406 if (aid == AID_INET) { 3407 u_char null[4]; 3408 3409 bzero(&null, 4); 3410 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3411 0, -1, &null, 4) == -1) 3412 fatal("%s %d imsg_compose error in peer_send_eor", 3413 __func__, __LINE__); 3414 } else { 3415 u_int16_t i; 3416 u_char buf[10]; 3417 3418 if (aid2afi(aid, &afi, &safi) == -1) 3419 fatalx("peer_send_eor: bad AID"); 3420 3421 i = 0; /* v4 withdrawn len */ 3422 bcopy(&i, &buf[0], sizeof(i)); 3423 i = htons(6); /* path attr len */ 3424 bcopy(&i, &buf[2], sizeof(i)); 3425 buf[4] = ATTR_OPTIONAL; 3426 buf[5] = ATTR_MP_UNREACH_NLRI; 3427 buf[6] = 3; /* withdrawn len */ 3428 i = htons(afi); 3429 bcopy(&i, &buf[7], sizeof(i)); 3430 buf[9] = safi; 3431 3432 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 3433 0, -1, &buf, 10) == -1) 3434 fatal("%s %d imsg_compose error in peer_send_eor", 3435 __func__, __LINE__); 3436 } 3437 } 3438 3439 /* 3440 * network announcement stuff 3441 */ 3442 void 3443 network_add(struct network_config *nc, int flagstatic) 3444 { 3445 struct rdomain *rd; 3446 struct rde_aspath *asp; 3447 struct filter_set_head *vpnset = NULL; 3448 in_addr_t prefix4; 3449 u_int16_t i; 3450 3451 if (nc->rtableid) { 3452 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 3453 if (rd->rtableid != nc->rtableid) 3454 continue; 3455 switch (nc->prefix.aid) { 3456 case AID_INET: 3457 prefix4 = nc->prefix.v4.s_addr; 3458 bzero(&nc->prefix, sizeof(nc->prefix)); 3459 nc->prefix.aid = AID_VPN_IPv4; 3460 nc->prefix.vpn4.rd = rd->rd; 3461 nc->prefix.vpn4.addr.s_addr = prefix4; 3462 nc->prefix.vpn4.labellen = 3; 3463 nc->prefix.vpn4.labelstack[0] = 3464 (rd->label >> 12) & 0xff; 3465 nc->prefix.vpn4.labelstack[1] = 3466 (rd->label >> 4) & 0xff; 3467 nc->prefix.vpn4.labelstack[2] = 3468 (rd->label << 4) & 0xf0; 3469 nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; 3470 vpnset = &rd->export; 3471 break; 3472 default: 3473 log_warnx("unable to VPNize prefix"); 3474 filterset_free(&nc->attrset); 3475 return; 3476 } 3477 break; 3478 } 3479 if (rd == NULL) { 3480 log_warnx("network_add: " 3481 "prefix %s/%u in non-existing rdomain %u", 3482 log_addr(&nc->prefix), nc->prefixlen, nc->rtableid); 3483 return; 3484 } 3485 } 3486 3487 if (nc->type == NETWORK_MRTCLONE) { 3488 asp = nc->asp; 3489 } else { 3490 asp = path_get(); 3491 asp->aspath = aspath_get(NULL, 0); 3492 asp->origin = ORIGIN_IGP; 3493 asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH | 3494 F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED; 3495 /* the nexthop is unset unless a default set overrides it */ 3496 } 3497 if (!flagstatic) 3498 asp->flags |= F_ANN_DYNAMIC; 3499 rde_apply_set(asp, &nc->attrset, nc->prefix.aid, peerself, peerself); 3500 if (vpnset) 3501 rde_apply_set(asp, vpnset, nc->prefix.aid, peerself, peerself); 3502 for (i = 1; i < rib_size; i++) { 3503 if (*ribs[i].name == '\0') 3504 break; 3505 path_update(&ribs[i], peerself, asp, &nc->prefix, 3506 nc->prefixlen); 3507 } 3508 path_put(asp); 3509 filterset_free(&nc->attrset); 3510 } 3511 3512 void 3513 network_delete(struct network_config *nc, int flagstatic) 3514 { 3515 struct rdomain *rd; 3516 in_addr_t prefix4; 3517 u_int32_t flags = F_PREFIX_ANNOUNCED; 3518 u_int32_t i; 3519 3520 if (!flagstatic) 3521 flags |= F_ANN_DYNAMIC; 3522 3523 if (nc->rtableid) { 3524 SIMPLEQ_FOREACH(rd, rdomains_l, entry) { 3525 if (rd->rtableid != nc->rtableid) 3526 continue; 3527 switch (nc->prefix.aid) { 3528 case AID_INET: 3529 prefix4 = nc->prefix.v4.s_addr; 3530 bzero(&nc->prefix, sizeof(nc->prefix)); 3531 nc->prefix.aid = AID_VPN_IPv4; 3532 nc->prefix.vpn4.rd = rd->rd; 3533 nc->prefix.vpn4.addr.s_addr = prefix4; 3534 nc->prefix.vpn4.labellen = 3; 3535 nc->prefix.vpn4.labelstack[0] = 3536 (rd->label >> 12) & 0xff; 3537 nc->prefix.vpn4.labelstack[1] = 3538 (rd->label >> 4) & 0xff; 3539 nc->prefix.vpn4.labelstack[2] = 3540 (rd->label << 4) & 0xf0; 3541 nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; 3542 break; 3543 default: 3544 log_warnx("unable to VPNize prefix"); 3545 return; 3546 } 3547 } 3548 } 3549 3550 for (i = rib_size - 1; i > 0; i--) { 3551 if (*ribs[i].name == '\0') 3552 break; 3553 prefix_remove(&ribs[i], peerself, &nc->prefix, nc->prefixlen, 3554 flags); 3555 } 3556 } 3557 3558 void 3559 network_dump_upcall(struct rib_entry *re, void *ptr) 3560 { 3561 struct prefix *p; 3562 struct kroute_full k; 3563 struct bgpd_addr addr; 3564 struct rde_dump_ctx *ctx = ptr; 3565 3566 LIST_FOREACH(p, &re->prefix_h, rib_l) { 3567 if (!(p->aspath->flags & F_PREFIX_ANNOUNCED)) 3568 continue; 3569 pt_getaddr(p->prefix, &addr); 3570 3571 bzero(&k, sizeof(k)); 3572 memcpy(&k.prefix, &addr, sizeof(k.prefix)); 3573 if (p->aspath->nexthop == NULL || 3574 p->aspath->nexthop->state != NEXTHOP_REACH) 3575 k.nexthop.aid = k.prefix.aid; 3576 else 3577 memcpy(&k.nexthop, &p->aspath->nexthop->true_nexthop, 3578 sizeof(k.nexthop)); 3579 k.prefixlen = p->prefix->prefixlen; 3580 k.flags = F_KERNEL; 3581 if ((p->aspath->flags & F_ANN_DYNAMIC) == 0) 3582 k.flags = F_STATIC; 3583 if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0, 3584 ctx->req.pid, -1, &k, sizeof(k)) == -1) 3585 log_warnx("network_dump_upcall: " 3586 "imsg_compose error"); 3587 } 3588 } 3589 3590 /* clean up */ 3591 void 3592 rde_shutdown(void) 3593 { 3594 struct rde_peer *p; 3595 u_int32_t i; 3596 3597 /* 3598 * the decision process is turned off if rde_quit = 1 and 3599 * rde_shutdown depends on this. 3600 */ 3601 3602 /* 3603 * All peers go down 3604 */ 3605 for (i = 0; i <= peertable.peer_hashmask; i++) 3606 while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL) 3607 peer_down(p->conf.id); 3608 3609 /* free filters */ 3610 filterlist_free(out_rules); 3611 for (i = 0; i < rib_size; i++) { 3612 if (*ribs[i].name == '\0') 3613 break; 3614 filterlist_free(ribs[i].in_rules); 3615 } 3616 3617 nexthop_shutdown(); 3618 path_shutdown(); 3619 aspath_shutdown(); 3620 attr_shutdown(); 3621 pt_shutdown(); 3622 peer_shutdown(); 3623 } 3624 3625 int 3626 sa_cmp(struct bgpd_addr *a, struct sockaddr *b) 3627 { 3628 struct sockaddr_in *in_b; 3629 struct sockaddr_in6 *in6_b; 3630 3631 if (aid2af(a->aid) != b->sa_family) 3632 return (1); 3633 3634 switch (b->sa_family) { 3635 case AF_INET: 3636 in_b = (struct sockaddr_in *)b; 3637 if (a->v4.s_addr != in_b->sin_addr.s_addr) 3638 return (1); 3639 break; 3640 case AF_INET6: 3641 in6_b = (struct sockaddr_in6 *)b; 3642 #ifdef __KAME__ 3643 /* directly stolen from sbin/ifconfig/ifconfig.c */ 3644 if (IN6_IS_ADDR_LINKLOCAL(&in6_b->sin6_addr)) { 3645 in6_b->sin6_scope_id = 3646 ntohs(*(u_int16_t *)&in6_b->sin6_addr.s6_addr[2]); 3647 in6_b->sin6_addr.s6_addr[2] = 3648 in6_b->sin6_addr.s6_addr[3] = 0; 3649 } 3650 #endif 3651 if (bcmp(&a->v6, &in6_b->sin6_addr, 3652 sizeof(struct in6_addr))) 3653 return (1); 3654 break; 3655 default: 3656 fatal("king bula sez: unknown address family"); 3657 /* NOTREACHED */ 3658 } 3659 3660 return (0); 3661 } 3662