1 /* $OpenBSD: session.c,v 1.478 2024/05/22 08:41:14 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/types.h> 21 22 #include <sys/mman.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/resource.h> 26 #include <sys/un.h> 27 #include <netinet/in.h> 28 #include <netinet/ip.h> 29 #include <netinet/tcp.h> 30 #include <arpa/inet.h> 31 #include <limits.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <ifaddrs.h> 37 #include <poll.h> 38 #include <pwd.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <syslog.h> 44 #include <unistd.h> 45 46 #include "bgpd.h" 47 #include "session.h" 48 #include "log.h" 49 50 #define PFD_PIPE_MAIN 0 51 #define PFD_PIPE_ROUTE 1 52 #define PFD_PIPE_ROUTE_CTL 2 53 #define PFD_SOCK_CTL 3 54 #define PFD_SOCK_RCTL 4 55 #define PFD_LISTENERS_START 5 56 57 void session_sighdlr(int); 58 int setup_listeners(u_int *); 59 void init_peer(struct peer *); 60 void start_timer_holdtime(struct peer *); 61 void start_timer_sendholdtime(struct peer *); 62 void start_timer_keepalive(struct peer *); 63 void session_close_connection(struct peer *); 64 void change_state(struct peer *, enum session_state, enum session_events); 65 int session_setup_socket(struct peer *); 66 void session_accept(int); 67 int session_connect(struct peer *); 68 void session_tcp_established(struct peer *); 69 int session_capa_add(struct ibuf *, uint8_t, uint8_t); 70 int session_capa_add_mp(struct ibuf *, uint8_t); 71 int session_capa_add_afi(struct ibuf *, uint8_t, uint8_t); 72 struct bgp_msg *session_newmsg(enum msg_type, uint16_t); 73 int session_sendmsg(struct bgp_msg *, struct peer *); 74 void session_open(struct peer *); 75 void session_keepalive(struct peer *); 76 void session_update(uint32_t, struct ibuf *); 77 void session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *); 78 void session_notification_data(struct peer *, uint8_t, uint8_t, void *, 79 size_t); 80 void session_rrefresh(struct peer *, uint8_t, uint8_t); 81 int session_graceful_restart(struct peer *); 82 int session_graceful_stop(struct peer *); 83 int session_dispatch_msg(struct pollfd *, struct peer *); 84 void session_process_msg(struct peer *); 85 int parse_header(struct peer *, u_char *, uint16_t *, uint8_t *); 86 int parse_open(struct peer *); 87 int parse_update(struct peer *); 88 int parse_rrefresh(struct peer *); 89 void parse_notification(struct peer *); 90 int parse_capabilities(struct peer *, struct ibuf *, uint32_t *); 91 int capa_neg_calc(struct peer *); 92 void session_dispatch_imsg(struct imsgbuf *, int, u_int *); 93 void session_up(struct peer *); 94 void session_down(struct peer *); 95 int imsg_rde(int, uint32_t, void *, uint16_t); 96 void session_demote(struct peer *, int); 97 void merge_peers(struct bgpd_config *, struct bgpd_config *); 98 99 int la_cmp(struct listen_addr *, struct listen_addr *); 100 void session_template_clone(struct peer *, struct sockaddr *, 101 uint32_t, uint32_t); 102 int session_match_mask(struct peer *, struct bgpd_addr *); 103 104 static struct bgpd_config *conf, *nconf; 105 static struct imsgbuf *ibuf_rde; 106 static struct imsgbuf *ibuf_rde_ctl; 107 static struct imsgbuf *ibuf_main; 108 109 struct bgpd_sysdep sysdep; 110 volatile sig_atomic_t session_quit; 111 int pending_reconf; 112 int csock = -1, rcsock = -1; 113 u_int peer_cnt; 114 115 struct mrt_head mrthead; 116 time_t pauseaccept; 117 118 static const uint8_t marker[MSGSIZE_HEADER_MARKER] = { 119 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 120 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 121 }; 122 123 static inline int 124 peer_compare(const struct peer *a, const struct peer *b) 125 { 126 return a->conf.id - b->conf.id; 127 } 128 129 RB_GENERATE(peer_head, peer, entry, peer_compare); 130 131 void 132 session_sighdlr(int sig) 133 { 134 switch (sig) { 135 case SIGINT: 136 case SIGTERM: 137 session_quit = 1; 138 break; 139 } 140 } 141 142 int 143 setup_listeners(u_int *la_cnt) 144 { 145 int ttl = 255; 146 struct listen_addr *la; 147 u_int cnt = 0; 148 149 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 150 la->reconf = RECONF_NONE; 151 cnt++; 152 153 if (la->flags & LISTENER_LISTENING) 154 continue; 155 156 if (la->fd == -1) { 157 log_warn("cannot establish listener on %s: invalid fd", 158 log_sockaddr((struct sockaddr *)&la->sa, 159 la->sa_len)); 160 continue; 161 } 162 163 if (tcp_md5_prep_listener(la, &conf->peers) == -1) 164 fatal("tcp_md5_prep_listener"); 165 166 /* set ttl to 255 so that ttl-security works */ 167 if (la->sa.ss_family == AF_INET && setsockopt(la->fd, 168 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) { 169 log_warn("setup_listeners setsockopt TTL"); 170 continue; 171 } 172 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd, 173 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) { 174 log_warn("setup_listeners setsockopt hoplimit"); 175 continue; 176 } 177 178 if (listen(la->fd, MAX_BACKLOG)) { 179 close(la->fd); 180 fatal("listen"); 181 } 182 183 la->flags |= LISTENER_LISTENING; 184 185 log_info("listening on %s", 186 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len)); 187 } 188 189 *la_cnt = cnt; 190 191 return (0); 192 } 193 194 void 195 session_main(int debug, int verbose) 196 { 197 int timeout; 198 unsigned int i, j, idx_peers, idx_listeners, idx_mrts; 199 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0; 200 u_int listener_cnt, ctl_cnt, mrt_cnt; 201 u_int new_cnt; 202 struct passwd *pw; 203 struct peer *p, **peer_l = NULL, *next; 204 struct mrt *m, *xm, **mrt_l = NULL; 205 struct pollfd *pfd = NULL; 206 struct listen_addr *la; 207 void *newp; 208 time_t now; 209 short events; 210 211 log_init(debug, LOG_DAEMON); 212 log_setverbose(verbose); 213 214 log_procinit(log_procnames[PROC_SE]); 215 216 if ((pw = getpwnam(BGPD_USER)) == NULL) 217 fatal(NULL); 218 219 if (chroot(pw->pw_dir) == -1) 220 fatal("chroot"); 221 if (chdir("/") == -1) 222 fatal("chdir(\"/\")"); 223 224 setproctitle("session engine"); 225 226 if (setgroups(1, &pw->pw_gid) || 227 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || 228 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) 229 fatal("can't drop privileges"); 230 231 if (pledge("stdio inet recvfd", NULL) == -1) 232 fatal("pledge"); 233 234 signal(SIGTERM, session_sighdlr); 235 signal(SIGINT, session_sighdlr); 236 signal(SIGPIPE, SIG_IGN); 237 signal(SIGHUP, SIG_IGN); 238 signal(SIGALRM, SIG_IGN); 239 signal(SIGUSR1, SIG_IGN); 240 241 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL) 242 fatal(NULL); 243 imsg_init(ibuf_main, 3); 244 245 LIST_INIT(&mrthead); 246 listener_cnt = 0; 247 peer_cnt = 0; 248 ctl_cnt = 0; 249 250 conf = new_config(); 251 log_info("session engine ready"); 252 253 while (session_quit == 0) { 254 /* check for peers to be initialized or deleted */ 255 if (!pending_reconf) { 256 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 257 /* cloned peer that idled out? */ 258 if (p->template && (p->state == STATE_IDLE || 259 p->state == STATE_ACTIVE) && 260 getmonotime() - p->stats.last_updown >= 261 INTERVAL_HOLD_CLONED) 262 p->reconf_action = RECONF_DELETE; 263 264 /* new peer that needs init? */ 265 if (p->state == STATE_NONE) 266 init_peer(p); 267 268 /* deletion due? */ 269 if (p->reconf_action == RECONF_DELETE) { 270 if (p->demoted) 271 session_demote(p, -1); 272 p->conf.demote_group[0] = 0; 273 session_stop(p, ERR_CEASE_PEER_UNCONF, 274 NULL); 275 timer_remove_all(&p->timers); 276 tcp_md5_del_listener(conf, p); 277 RB_REMOVE(peer_head, &conf->peers, p); 278 log_peer_warnx(&p->conf, "removed"); 279 free(p); 280 peer_cnt--; 281 continue; 282 } 283 p->reconf_action = RECONF_NONE; 284 } 285 } 286 287 if (peer_cnt > peer_l_elms) { 288 if ((newp = reallocarray(peer_l, peer_cnt, 289 sizeof(struct peer *))) == NULL) { 290 /* panic for now */ 291 log_warn("could not resize peer_l from %u -> %u" 292 " entries", peer_l_elms, peer_cnt); 293 fatalx("exiting"); 294 } 295 peer_l = newp; 296 peer_l_elms = peer_cnt; 297 } 298 299 mrt_cnt = 0; 300 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) { 301 xm = LIST_NEXT(m, entry); 302 if (m->state == MRT_STATE_REMOVE) { 303 mrt_clean(m); 304 LIST_REMOVE(m, entry); 305 free(m); 306 continue; 307 } 308 if (m->wbuf.queued) 309 mrt_cnt++; 310 } 311 312 if (mrt_cnt > mrt_l_elms) { 313 if ((newp = reallocarray(mrt_l, mrt_cnt, 314 sizeof(struct mrt *))) == NULL) { 315 /* panic for now */ 316 log_warn("could not resize mrt_l from %u -> %u" 317 " entries", mrt_l_elms, mrt_cnt); 318 fatalx("exiting"); 319 } 320 mrt_l = newp; 321 mrt_l_elms = mrt_cnt; 322 } 323 324 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt + 325 ctl_cnt + mrt_cnt; 326 if (new_cnt > pfd_elms) { 327 if ((newp = reallocarray(pfd, new_cnt, 328 sizeof(struct pollfd))) == NULL) { 329 /* panic for now */ 330 log_warn("could not resize pfd from %u -> %u" 331 " entries", pfd_elms, new_cnt); 332 fatalx("exiting"); 333 } 334 pfd = newp; 335 pfd_elms = new_cnt; 336 } 337 338 memset(pfd, 0, sizeof(struct pollfd) * pfd_elms); 339 340 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main); 341 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde); 342 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl); 343 344 if (pauseaccept == 0) { 345 pfd[PFD_SOCK_CTL].fd = csock; 346 pfd[PFD_SOCK_CTL].events = POLLIN; 347 pfd[PFD_SOCK_RCTL].fd = rcsock; 348 pfd[PFD_SOCK_RCTL].events = POLLIN; 349 } else { 350 pfd[PFD_SOCK_CTL].fd = -1; 351 pfd[PFD_SOCK_RCTL].fd = -1; 352 } 353 354 i = PFD_LISTENERS_START; 355 TAILQ_FOREACH(la, conf->listen_addrs, entry) { 356 if (pauseaccept == 0) { 357 pfd[i].fd = la->fd; 358 pfd[i].events = POLLIN; 359 } else 360 pfd[i].fd = -1; 361 i++; 362 } 363 idx_listeners = i; 364 timeout = 240; /* loop every 240s at least */ 365 366 now = getmonotime(); 367 RB_FOREACH(p, peer_head, &conf->peers) { 368 time_t nextaction; 369 struct timer *pt; 370 371 /* check timers */ 372 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) { 373 switch (pt->type) { 374 case Timer_Hold: 375 bgp_fsm(p, EVNT_TIMER_HOLDTIME); 376 break; 377 case Timer_SendHold: 378 bgp_fsm(p, EVNT_TIMER_SENDHOLD); 379 break; 380 case Timer_ConnectRetry: 381 bgp_fsm(p, EVNT_TIMER_CONNRETRY); 382 break; 383 case Timer_Keepalive: 384 bgp_fsm(p, EVNT_TIMER_KEEPALIVE); 385 break; 386 case Timer_IdleHold: 387 bgp_fsm(p, EVNT_START); 388 break; 389 case Timer_IdleHoldReset: 390 p->IdleHoldTime = 391 INTERVAL_IDLE_HOLD_INITIAL; 392 p->errcnt = 0; 393 timer_stop(&p->timers, 394 Timer_IdleHoldReset); 395 break; 396 case Timer_CarpUndemote: 397 timer_stop(&p->timers, 398 Timer_CarpUndemote); 399 if (p->demoted && 400 p->state == STATE_ESTABLISHED) 401 session_demote(p, -1); 402 break; 403 case Timer_RestartTimeout: 404 timer_stop(&p->timers, 405 Timer_RestartTimeout); 406 session_graceful_stop(p); 407 break; 408 default: 409 fatalx("King Bula lost in time"); 410 } 411 } 412 if ((nextaction = timer_nextduein(&p->timers, 413 now)) != -1 && nextaction < timeout) 414 timeout = nextaction; 415 416 /* are we waiting for a write? */ 417 events = POLLIN; 418 if (p->wbuf.queued > 0 || p->state == STATE_CONNECT) 419 events |= POLLOUT; 420 /* is there still work to do? */ 421 if (p->rpending && p->rbuf && p->rbuf->wpos) 422 timeout = 0; 423 424 /* poll events */ 425 if (p->fd != -1 && events != 0) { 426 pfd[i].fd = p->fd; 427 pfd[i].events = events; 428 peer_l[i - idx_listeners] = p; 429 i++; 430 } 431 } 432 433 idx_peers = i; 434 435 LIST_FOREACH(m, &mrthead, entry) 436 if (m->wbuf.queued) { 437 pfd[i].fd = m->wbuf.fd; 438 pfd[i].events = POLLOUT; 439 mrt_l[i - idx_peers] = m; 440 i++; 441 } 442 443 idx_mrts = i; 444 445 i += control_fill_pfds(pfd + i, pfd_elms -i); 446 447 if (i > pfd_elms) 448 fatalx("poll pfd overflow"); 449 450 if (pauseaccept && timeout > 1) 451 timeout = 1; 452 if (timeout < 0) 453 timeout = 0; 454 if (poll(pfd, i, timeout * 1000) == -1) { 455 if (errno == EINTR) 456 continue; 457 fatal("poll error"); 458 } 459 460 /* 461 * If we previously saw fd exhaustion, we stop accept() 462 * for 1 second to throttle the accept() loop. 463 */ 464 if (pauseaccept && getmonotime() > pauseaccept + 1) 465 pauseaccept = 0; 466 467 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) { 468 log_warnx("SE: Lost connection to parent"); 469 session_quit = 1; 470 continue; 471 } else 472 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN, 473 &listener_cnt); 474 475 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) { 476 log_warnx("SE: Lost connection to RDE"); 477 msgbuf_clear(&ibuf_rde->w); 478 free(ibuf_rde); 479 ibuf_rde = NULL; 480 } else 481 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, 482 &listener_cnt); 483 484 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) == 485 -1) { 486 log_warnx("SE: Lost connection to RDE control"); 487 msgbuf_clear(&ibuf_rde_ctl->w); 488 free(ibuf_rde_ctl); 489 ibuf_rde_ctl = NULL; 490 } else 491 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL, 492 &listener_cnt); 493 494 if (pfd[PFD_SOCK_CTL].revents & POLLIN) 495 ctl_cnt += control_accept(csock, 0); 496 497 if (pfd[PFD_SOCK_RCTL].revents & POLLIN) 498 ctl_cnt += control_accept(rcsock, 1); 499 500 for (j = PFD_LISTENERS_START; j < idx_listeners; j++) 501 if (pfd[j].revents & POLLIN) 502 session_accept(pfd[j].fd); 503 504 for (; j < idx_peers; j++) 505 session_dispatch_msg(&pfd[j], 506 peer_l[j - idx_listeners]); 507 508 RB_FOREACH(p, peer_head, &conf->peers) 509 if (p->rbuf && p->rbuf->wpos) 510 session_process_msg(p); 511 512 for (; j < idx_mrts; j++) 513 if (pfd[j].revents & POLLOUT) 514 mrt_write(mrt_l[j - idx_peers]); 515 516 for (; j < i; j++) 517 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers); 518 } 519 520 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) { 521 session_stop(p, ERR_CEASE_ADMIN_DOWN, "bgpd shutting down"); 522 timer_remove_all(&p->timers); 523 tcp_md5_del_listener(conf, p); 524 RB_REMOVE(peer_head, &conf->peers, p); 525 free(p); 526 } 527 528 while ((m = LIST_FIRST(&mrthead)) != NULL) { 529 mrt_clean(m); 530 LIST_REMOVE(m, entry); 531 free(m); 532 } 533 534 free_config(conf); 535 free(peer_l); 536 free(mrt_l); 537 free(pfd); 538 539 /* close pipes */ 540 if (ibuf_rde) { 541 msgbuf_write(&ibuf_rde->w); 542 msgbuf_clear(&ibuf_rde->w); 543 close(ibuf_rde->fd); 544 free(ibuf_rde); 545 } 546 if (ibuf_rde_ctl) { 547 msgbuf_clear(&ibuf_rde_ctl->w); 548 close(ibuf_rde_ctl->fd); 549 free(ibuf_rde_ctl); 550 } 551 msgbuf_write(&ibuf_main->w); 552 msgbuf_clear(&ibuf_main->w); 553 close(ibuf_main->fd); 554 free(ibuf_main); 555 556 control_shutdown(csock); 557 control_shutdown(rcsock); 558 log_info("session engine exiting"); 559 exit(0); 560 } 561 562 void 563 init_peer(struct peer *p) 564 { 565 TAILQ_INIT(&p->timers); 566 p->fd = p->wbuf.fd = -1; 567 568 if (p->conf.if_depend[0]) 569 imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1, 570 p->conf.if_depend, sizeof(p->conf.if_depend)); 571 else 572 p->depend_ok = 1; 573 574 peer_cnt++; 575 576 change_state(p, STATE_IDLE, EVNT_NONE); 577 if (p->conf.down) 578 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */ 579 else 580 timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY); 581 582 p->stats.last_updown = getmonotime(); 583 584 /* 585 * on startup, demote if requested. 586 * do not handle new peers. they must reach ESTABLISHED beforehand. 587 * peers added at runtime have reconf_action set to RECONF_REINIT. 588 */ 589 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0]) 590 session_demote(p, +1); 591 } 592 593 void 594 bgp_fsm(struct peer *peer, enum session_events event) 595 { 596 switch (peer->state) { 597 case STATE_NONE: 598 /* nothing */ 599 break; 600 case STATE_IDLE: 601 switch (event) { 602 case EVNT_START: 603 timer_stop(&peer->timers, Timer_Hold); 604 timer_stop(&peer->timers, Timer_SendHold); 605 timer_stop(&peer->timers, Timer_Keepalive); 606 timer_stop(&peer->timers, Timer_IdleHold); 607 608 /* allocate read buffer */ 609 peer->rbuf = calloc(1, sizeof(struct ibuf_read)); 610 if (peer->rbuf == NULL) 611 fatal(NULL); 612 613 /* init write buffer */ 614 msgbuf_init(&peer->wbuf); 615 616 if (!peer->depend_ok) 617 timer_stop(&peer->timers, Timer_ConnectRetry); 618 else if (peer->passive || peer->conf.passive || 619 peer->conf.template) { 620 change_state(peer, STATE_ACTIVE, event); 621 timer_stop(&peer->timers, Timer_ConnectRetry); 622 } else { 623 change_state(peer, STATE_CONNECT, event); 624 timer_set(&peer->timers, Timer_ConnectRetry, 625 conf->connectretry); 626 session_connect(peer); 627 } 628 peer->passive = 0; 629 break; 630 case EVNT_STOP: 631 timer_stop(&peer->timers, Timer_IdleHold); 632 break; 633 default: 634 /* ignore */ 635 break; 636 } 637 break; 638 case STATE_CONNECT: 639 switch (event) { 640 case EVNT_START: 641 /* ignore */ 642 break; 643 case EVNT_CON_OPEN: 644 session_tcp_established(peer); 645 session_open(peer); 646 timer_stop(&peer->timers, Timer_ConnectRetry); 647 peer->holdtime = INTERVAL_HOLD_INITIAL; 648 start_timer_holdtime(peer); 649 change_state(peer, STATE_OPENSENT, event); 650 break; 651 case EVNT_CON_OPENFAIL: 652 timer_set(&peer->timers, Timer_ConnectRetry, 653 conf->connectretry); 654 session_close_connection(peer); 655 change_state(peer, STATE_ACTIVE, event); 656 break; 657 case EVNT_TIMER_CONNRETRY: 658 timer_set(&peer->timers, Timer_ConnectRetry, 659 conf->connectretry); 660 session_connect(peer); 661 break; 662 default: 663 change_state(peer, STATE_IDLE, event); 664 break; 665 } 666 break; 667 case STATE_ACTIVE: 668 switch (event) { 669 case EVNT_START: 670 /* ignore */ 671 break; 672 case EVNT_CON_OPEN: 673 session_tcp_established(peer); 674 session_open(peer); 675 timer_stop(&peer->timers, Timer_ConnectRetry); 676 peer->holdtime = INTERVAL_HOLD_INITIAL; 677 start_timer_holdtime(peer); 678 change_state(peer, STATE_OPENSENT, event); 679 break; 680 case EVNT_CON_OPENFAIL: 681 timer_set(&peer->timers, Timer_ConnectRetry, 682 conf->connectretry); 683 session_close_connection(peer); 684 change_state(peer, STATE_ACTIVE, event); 685 break; 686 case EVNT_TIMER_CONNRETRY: 687 timer_set(&peer->timers, Timer_ConnectRetry, 688 peer->holdtime); 689 change_state(peer, STATE_CONNECT, event); 690 session_connect(peer); 691 break; 692 default: 693 change_state(peer, STATE_IDLE, event); 694 break; 695 } 696 break; 697 case STATE_OPENSENT: 698 switch (event) { 699 case EVNT_START: 700 /* ignore */ 701 break; 702 case EVNT_STOP: 703 change_state(peer, STATE_IDLE, event); 704 break; 705 case EVNT_CON_CLOSED: 706 session_close_connection(peer); 707 timer_set(&peer->timers, Timer_ConnectRetry, 708 conf->connectretry); 709 change_state(peer, STATE_ACTIVE, event); 710 break; 711 case EVNT_CON_FATAL: 712 change_state(peer, STATE_IDLE, event); 713 break; 714 case EVNT_TIMER_HOLDTIME: 715 session_notification(peer, ERR_HOLDTIMEREXPIRED, 716 0, NULL); 717 change_state(peer, STATE_IDLE, event); 718 break; 719 case EVNT_TIMER_SENDHOLD: 720 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 721 0, NULL); 722 change_state(peer, STATE_IDLE, event); 723 break; 724 case EVNT_RCVD_OPEN: 725 /* parse_open calls change_state itself on failure */ 726 if (parse_open(peer)) 727 break; 728 session_keepalive(peer); 729 change_state(peer, STATE_OPENCONFIRM, event); 730 break; 731 case EVNT_RCVD_NOTIFICATION: 732 parse_notification(peer); 733 break; 734 default: 735 session_notification(peer, 736 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL); 737 change_state(peer, STATE_IDLE, event); 738 break; 739 } 740 break; 741 case STATE_OPENCONFIRM: 742 switch (event) { 743 case EVNT_START: 744 /* ignore */ 745 break; 746 case EVNT_STOP: 747 change_state(peer, STATE_IDLE, event); 748 break; 749 case EVNT_CON_CLOSED: 750 case EVNT_CON_FATAL: 751 change_state(peer, STATE_IDLE, event); 752 break; 753 case EVNT_TIMER_HOLDTIME: 754 session_notification(peer, ERR_HOLDTIMEREXPIRED, 755 0, NULL); 756 change_state(peer, STATE_IDLE, event); 757 break; 758 case EVNT_TIMER_SENDHOLD: 759 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 760 0, NULL); 761 change_state(peer, STATE_IDLE, event); 762 break; 763 case EVNT_TIMER_KEEPALIVE: 764 session_keepalive(peer); 765 break; 766 case EVNT_RCVD_KEEPALIVE: 767 start_timer_holdtime(peer); 768 change_state(peer, STATE_ESTABLISHED, event); 769 break; 770 case EVNT_RCVD_NOTIFICATION: 771 parse_notification(peer); 772 break; 773 default: 774 session_notification(peer, 775 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL); 776 change_state(peer, STATE_IDLE, event); 777 break; 778 } 779 break; 780 case STATE_ESTABLISHED: 781 switch (event) { 782 case EVNT_START: 783 /* ignore */ 784 break; 785 case EVNT_STOP: 786 change_state(peer, STATE_IDLE, event); 787 break; 788 case EVNT_CON_CLOSED: 789 case EVNT_CON_FATAL: 790 change_state(peer, STATE_IDLE, event); 791 break; 792 case EVNT_TIMER_HOLDTIME: 793 session_notification(peer, ERR_HOLDTIMEREXPIRED, 794 0, NULL); 795 change_state(peer, STATE_IDLE, event); 796 break; 797 case EVNT_TIMER_SENDHOLD: 798 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED, 799 0, NULL); 800 change_state(peer, STATE_IDLE, event); 801 break; 802 case EVNT_TIMER_KEEPALIVE: 803 session_keepalive(peer); 804 break; 805 case EVNT_RCVD_KEEPALIVE: 806 start_timer_holdtime(peer); 807 break; 808 case EVNT_RCVD_UPDATE: 809 start_timer_holdtime(peer); 810 if (parse_update(peer)) 811 change_state(peer, STATE_IDLE, event); 812 else 813 start_timer_holdtime(peer); 814 break; 815 case EVNT_RCVD_NOTIFICATION: 816 parse_notification(peer); 817 break; 818 default: 819 session_notification(peer, 820 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL); 821 change_state(peer, STATE_IDLE, event); 822 break; 823 } 824 break; 825 } 826 } 827 828 void 829 start_timer_holdtime(struct peer *peer) 830 { 831 if (peer->holdtime > 0) 832 timer_set(&peer->timers, Timer_Hold, peer->holdtime); 833 else 834 timer_stop(&peer->timers, Timer_Hold); 835 } 836 837 void 838 start_timer_sendholdtime(struct peer *peer) 839 { 840 uint16_t holdtime = INTERVAL_HOLD; 841 842 if (peer->holdtime > INTERVAL_HOLD) 843 holdtime = peer->holdtime; 844 845 if (peer->holdtime > 0) 846 timer_set(&peer->timers, Timer_SendHold, holdtime); 847 else 848 timer_stop(&peer->timers, Timer_SendHold); 849 } 850 851 void 852 start_timer_keepalive(struct peer *peer) 853 { 854 if (peer->holdtime > 0) 855 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3); 856 else 857 timer_stop(&peer->timers, Timer_Keepalive); 858 } 859 860 void 861 session_close_connection(struct peer *peer) 862 { 863 if (peer->fd != -1) { 864 close(peer->fd); 865 pauseaccept = 0; 866 } 867 peer->fd = peer->wbuf.fd = -1; 868 } 869 870 void 871 change_state(struct peer *peer, enum session_state state, 872 enum session_events event) 873 { 874 struct mrt *mrt; 875 876 switch (state) { 877 case STATE_IDLE: 878 /* carp demotion first. new peers handled in init_peer */ 879 if (peer->state == STATE_ESTABLISHED && 880 peer->conf.demote_group[0] && !peer->demoted) 881 session_demote(peer, +1); 882 883 /* 884 * try to write out what's buffered (maybe a notification), 885 * don't bother if it fails 886 */ 887 if (peer->state >= STATE_OPENSENT && peer->wbuf.queued) 888 msgbuf_write(&peer->wbuf); 889 890 /* 891 * we must start the timer for the next EVNT_START 892 * if we are coming here due to an error and the 893 * session was not established successfully before, the 894 * starttimerinterval needs to be exponentially increased 895 */ 896 if (peer->IdleHoldTime == 0) 897 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL; 898 peer->holdtime = INTERVAL_HOLD_INITIAL; 899 timer_stop(&peer->timers, Timer_ConnectRetry); 900 timer_stop(&peer->timers, Timer_Keepalive); 901 timer_stop(&peer->timers, Timer_Hold); 902 timer_stop(&peer->timers, Timer_SendHold); 903 timer_stop(&peer->timers, Timer_IdleHold); 904 timer_stop(&peer->timers, Timer_IdleHoldReset); 905 session_close_connection(peer); 906 msgbuf_clear(&peer->wbuf); 907 free(peer->rbuf); 908 peer->rbuf = NULL; 909 peer->rpending = 0; 910 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 911 if (!peer->template) 912 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 913 peer->conf.id, 0, -1, NULL, 0); 914 915 if (event != EVNT_STOP) { 916 timer_set(&peer->timers, Timer_IdleHold, 917 peer->IdleHoldTime); 918 if (event != EVNT_NONE && 919 peer->IdleHoldTime < MAX_IDLE_HOLD/2) 920 peer->IdleHoldTime *= 2; 921 } 922 if (peer->state == STATE_ESTABLISHED) { 923 if (peer->capa.neg.grestart.restart == 2 && 924 (event == EVNT_CON_CLOSED || 925 event == EVNT_CON_FATAL)) { 926 /* don't punish graceful restart */ 927 timer_set(&peer->timers, Timer_IdleHold, 0); 928 peer->IdleHoldTime /= 2; 929 session_graceful_restart(peer); 930 } else 931 session_down(peer); 932 } 933 if (peer->state == STATE_NONE || 934 peer->state == STATE_ESTABLISHED) { 935 /* initialize capability negotiation structures */ 936 memcpy(&peer->capa.ann, &peer->conf.capabilities, 937 sizeof(peer->capa.ann)); 938 } 939 break; 940 case STATE_CONNECT: 941 if (peer->state == STATE_ESTABLISHED && 942 peer->capa.neg.grestart.restart == 2) { 943 /* do the graceful restart dance */ 944 session_graceful_restart(peer); 945 peer->holdtime = INTERVAL_HOLD_INITIAL; 946 timer_stop(&peer->timers, Timer_ConnectRetry); 947 timer_stop(&peer->timers, Timer_Keepalive); 948 timer_stop(&peer->timers, Timer_Hold); 949 timer_stop(&peer->timers, Timer_SendHold); 950 timer_stop(&peer->timers, Timer_IdleHold); 951 timer_stop(&peer->timers, Timer_IdleHoldReset); 952 session_close_connection(peer); 953 msgbuf_clear(&peer->wbuf); 954 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer)); 955 } 956 break; 957 case STATE_ACTIVE: 958 if (!peer->template) 959 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 960 peer->conf.id, 0, -1, NULL, 0); 961 break; 962 case STATE_OPENSENT: 963 break; 964 case STATE_OPENCONFIRM: 965 break; 966 case STATE_ESTABLISHED: 967 timer_set(&peer->timers, Timer_IdleHoldReset, 968 peer->IdleHoldTime); 969 if (peer->demoted) 970 timer_set(&peer->timers, Timer_CarpUndemote, 971 INTERVAL_HOLD_DEMOTED); 972 session_up(peer); 973 break; 974 default: /* something seriously fucked */ 975 break; 976 } 977 978 log_statechange(peer, state, event); 979 LIST_FOREACH(mrt, &mrthead, entry) { 980 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT)) 981 continue; 982 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 983 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 && 984 mrt->group_id == peer->conf.groupid)) 985 mrt_dump_state(mrt, peer->state, state, peer); 986 } 987 peer->prev_state = peer->state; 988 peer->state = state; 989 } 990 991 void 992 session_accept(int listenfd) 993 { 994 int connfd; 995 socklen_t len; 996 struct sockaddr_storage cliaddr; 997 struct peer *p = NULL; 998 999 len = sizeof(cliaddr); 1000 if ((connfd = accept4(listenfd, 1001 (struct sockaddr *)&cliaddr, &len, 1002 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) { 1003 if (errno == ENFILE || errno == EMFILE) 1004 pauseaccept = getmonotime(); 1005 else if (errno != EWOULDBLOCK && errno != EINTR && 1006 errno != ECONNABORTED) 1007 log_warn("accept"); 1008 return; 1009 } 1010 1011 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr); 1012 1013 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) { 1014 if (timer_running(&p->timers, Timer_IdleHold, NULL)) { 1015 /* fast reconnect after clear */ 1016 p->passive = 1; 1017 bgp_fsm(p, EVNT_START); 1018 } 1019 } 1020 1021 if (p != NULL && 1022 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) { 1023 if (p->fd != -1) { 1024 if (p->state == STATE_CONNECT) 1025 session_close_connection(p); 1026 else { 1027 close(connfd); 1028 return; 1029 } 1030 } 1031 1032 open: 1033 if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1034 log_peer_warnx(&p->conf, 1035 "ipsec or md5sig configured but not available"); 1036 close(connfd); 1037 return; 1038 } 1039 1040 if (tcp_md5_check(connfd, p) == -1) { 1041 close(connfd); 1042 return; 1043 } 1044 p->fd = p->wbuf.fd = connfd; 1045 if (session_setup_socket(p)) { 1046 close(connfd); 1047 return; 1048 } 1049 bgp_fsm(p, EVNT_CON_OPEN); 1050 return; 1051 } else if (p != NULL && p->state == STATE_ESTABLISHED && 1052 p->capa.neg.grestart.restart == 2) { 1053 /* first do the graceful restart dance */ 1054 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED); 1055 /* then do part of the open dance */ 1056 goto open; 1057 } else { 1058 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len); 1059 close(connfd); 1060 } 1061 } 1062 1063 int 1064 session_connect(struct peer *peer) 1065 { 1066 struct sockaddr *sa; 1067 struct bgpd_addr *bind_addr = NULL; 1068 socklen_t sa_len; 1069 1070 /* 1071 * we do not need the overcomplicated collision detection RFC 1771 1072 * describes; we simply make sure there is only ever one concurrent 1073 * tcp connection per peer. 1074 */ 1075 if (peer->fd != -1) 1076 return (-1); 1077 1078 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), 1079 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) { 1080 log_peer_warn(&peer->conf, "session_connect socket"); 1081 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1082 return (-1); 1083 } 1084 1085 if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) { 1086 log_peer_warnx(&peer->conf, 1087 "ipsec or md5sig configured but not available"); 1088 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1089 return (-1); 1090 } 1091 1092 tcp_md5_set(peer->fd, peer); 1093 peer->wbuf.fd = peer->fd; 1094 1095 /* if local-address is set we need to bind() */ 1096 switch (peer->conf.remote_addr.aid) { 1097 case AID_INET: 1098 bind_addr = &peer->conf.local_addr_v4; 1099 break; 1100 case AID_INET6: 1101 bind_addr = &peer->conf.local_addr_v6; 1102 break; 1103 } 1104 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) { 1105 if (bind(peer->fd, sa, sa_len) == -1) { 1106 log_peer_warn(&peer->conf, "session_connect bind"); 1107 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1108 return (-1); 1109 } 1110 } 1111 1112 if (session_setup_socket(peer)) { 1113 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1114 return (-1); 1115 } 1116 1117 sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len); 1118 if (connect(peer->fd, sa, sa_len) == -1) { 1119 if (errno != EINPROGRESS) { 1120 if (errno != peer->lasterr) 1121 log_peer_warn(&peer->conf, "connect"); 1122 peer->lasterr = errno; 1123 bgp_fsm(peer, EVNT_CON_OPENFAIL); 1124 return (-1); 1125 } 1126 } else 1127 bgp_fsm(peer, EVNT_CON_OPEN); 1128 1129 return (0); 1130 } 1131 1132 int 1133 session_setup_socket(struct peer *p) 1134 { 1135 int ttl = p->conf.distance; 1136 int pre = IPTOS_PREC_INTERNETCONTROL; 1137 int nodelay = 1; 1138 int bsize; 1139 1140 switch (p->conf.remote_addr.aid) { 1141 case AID_INET: 1142 /* set precedence, see RFC 1771 appendix 5 */ 1143 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == 1144 -1) { 1145 log_peer_warn(&p->conf, 1146 "session_setup_socket setsockopt TOS"); 1147 return (-1); 1148 } 1149 1150 if (p->conf.ebgp) { 1151 /* 1152 * set TTL to foreign router's distance 1153 * 1=direct n=multihop with ttlsec, we always use 255 1154 */ 1155 if (p->conf.ttlsec) { 1156 ttl = 256 - p->conf.distance; 1157 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, 1158 &ttl, sizeof(ttl)) == -1) { 1159 log_peer_warn(&p->conf, 1160 "session_setup_socket: " 1161 "setsockopt MINTTL"); 1162 return (-1); 1163 } 1164 ttl = 255; 1165 } 1166 1167 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl, 1168 sizeof(ttl)) == -1) { 1169 log_peer_warn(&p->conf, 1170 "session_setup_socket setsockopt TTL"); 1171 return (-1); 1172 } 1173 } 1174 break; 1175 case AID_INET6: 1176 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre, 1177 sizeof(pre)) == -1) { 1178 log_peer_warn(&p->conf, "session_setup_socket " 1179 "setsockopt TCLASS"); 1180 return (-1); 1181 } 1182 1183 if (p->conf.ebgp) { 1184 /* 1185 * set hoplimit to foreign router's distance 1186 * 1=direct n=multihop with ttlsec, we always use 255 1187 */ 1188 if (p->conf.ttlsec) { 1189 ttl = 256 - p->conf.distance; 1190 if (setsockopt(p->fd, IPPROTO_IPV6, 1191 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) 1192 == -1) { 1193 log_peer_warn(&p->conf, 1194 "session_setup_socket: " 1195 "setsockopt MINHOPCOUNT"); 1196 return (-1); 1197 } 1198 ttl = 255; 1199 } 1200 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1201 &ttl, sizeof(ttl)) == -1) { 1202 log_peer_warn(&p->conf, 1203 "session_setup_socket setsockopt hoplimit"); 1204 return (-1); 1205 } 1206 } 1207 break; 1208 } 1209 1210 /* set TCP_NODELAY */ 1211 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, 1212 sizeof(nodelay)) == -1) { 1213 log_peer_warn(&p->conf, 1214 "session_setup_socket setsockopt TCP_NODELAY"); 1215 return (-1); 1216 } 1217 1218 /* limit bufsize. no biggie if it fails */ 1219 bsize = 65535; 1220 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, sizeof(bsize)); 1221 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, sizeof(bsize)); 1222 1223 return (0); 1224 } 1225 1226 /* 1227 * compare the bgpd_addr with the sockaddr by converting the latter into 1228 * a bgpd_addr. Return true if the two are equal, including any scope 1229 */ 1230 static int 1231 sa_equal(struct bgpd_addr *ba, struct sockaddr *b) 1232 { 1233 struct bgpd_addr bb; 1234 1235 sa2addr(b, &bb, NULL); 1236 return (memcmp(ba, &bb, sizeof(*ba)) == 0); 1237 } 1238 1239 static void 1240 get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote, 1241 struct bgpd_addr *alt, unsigned int *scope) 1242 { 1243 struct ifaddrs *ifap, *ifa, *match; 1244 int connected = 0; 1245 u_int8_t plen; 1246 1247 if (getifaddrs(&ifap) == -1) 1248 fatal("getifaddrs"); 1249 1250 for (match = ifap; match != NULL; match = match->ifa_next) { 1251 if (match->ifa_addr == NULL) 1252 continue; 1253 if (match->ifa_addr->sa_family != AF_INET && 1254 match->ifa_addr->sa_family != AF_INET6) 1255 continue; 1256 if (sa_equal(local, match->ifa_addr)) { 1257 if (match->ifa_flags & IFF_POINTOPOINT && 1258 match->ifa_dstaddr != NULL) { 1259 if (sa_equal(remote, match->ifa_dstaddr)) 1260 connected = 1; 1261 } else if (match->ifa_netmask != NULL) { 1262 plen = mask2prefixlen( 1263 match->ifa_addr->sa_family, 1264 match->ifa_netmask); 1265 if (prefix_compare(local, remote, plen) == 0) 1266 connected = 1; 1267 } 1268 break; 1269 } 1270 } 1271 1272 if (match == NULL) { 1273 log_warnx("%s: local address not found", __func__); 1274 return; 1275 } 1276 if (connected) 1277 *scope = if_nametoindex(match->ifa_name); 1278 else 1279 *scope = 0; 1280 1281 switch (local->aid) { 1282 case AID_INET6: 1283 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1284 if (ifa->ifa_addr != NULL && 1285 ifa->ifa_addr->sa_family == AF_INET && 1286 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1287 sa2addr(ifa->ifa_addr, alt, NULL); 1288 break; 1289 } 1290 } 1291 break; 1292 case AID_INET: 1293 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { 1294 if (ifa->ifa_addr != NULL && 1295 ifa->ifa_addr->sa_family == AF_INET6 && 1296 strcmp(ifa->ifa_name, match->ifa_name) == 0) { 1297 struct sockaddr_in6 *s = 1298 (struct sockaddr_in6 *)ifa->ifa_addr; 1299 1300 /* only accept global scope addresses */ 1301 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) || 1302 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr)) 1303 continue; 1304 sa2addr(ifa->ifa_addr, alt, NULL); 1305 break; 1306 } 1307 } 1308 break; 1309 default: 1310 log_warnx("%s: unsupported address family %s", __func__, 1311 aid2str(local->aid)); 1312 break; 1313 } 1314 1315 freeifaddrs(ifap); 1316 } 1317 1318 void 1319 session_tcp_established(struct peer *peer) 1320 { 1321 struct sockaddr_storage ss; 1322 socklen_t len; 1323 1324 len = sizeof(ss); 1325 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1326 log_warn("getsockname"); 1327 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port); 1328 len = sizeof(ss); 1329 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1) 1330 log_warn("getpeername"); 1331 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port); 1332 1333 get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt, 1334 &peer->if_scope); 1335 } 1336 1337 int 1338 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len) 1339 { 1340 int errs = 0; 1341 1342 errs += ibuf_add_n8(opb, capa_code); 1343 errs += ibuf_add_n8(opb, capa_len); 1344 return (errs); 1345 } 1346 1347 int 1348 session_capa_add_mp(struct ibuf *buf, uint8_t aid) 1349 { 1350 uint16_t afi; 1351 uint8_t safi; 1352 int errs = 0; 1353 1354 if (aid2afi(aid, &afi, &safi) == -1) { 1355 log_warn("%s: bad AID", __func__); 1356 return (-1); 1357 } 1358 1359 errs += ibuf_add_n16(buf, afi); 1360 errs += ibuf_add_zero(buf, 1); 1361 errs += ibuf_add_n8(buf, safi); 1362 1363 return (errs); 1364 } 1365 1366 int 1367 session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags) 1368 { 1369 u_int errs = 0; 1370 uint16_t afi; 1371 uint8_t safi; 1372 1373 if (aid2afi(aid, &afi, &safi)) { 1374 log_warn("%s: bad AID", __func__); 1375 return (-1); 1376 } 1377 1378 errs += ibuf_add_n16(b, afi); 1379 errs += ibuf_add_n8(b, safi); 1380 errs += ibuf_add_n8(b, flags); 1381 1382 return (errs); 1383 } 1384 1385 struct bgp_msg * 1386 session_newmsg(enum msg_type msgtype, uint16_t len) 1387 { 1388 struct bgp_msg *msg; 1389 struct ibuf *buf; 1390 int errs = 0; 1391 1392 if ((buf = ibuf_open(len)) == NULL) 1393 return (NULL); 1394 1395 errs += ibuf_add(buf, marker, sizeof(marker)); 1396 errs += ibuf_add_n16(buf, len); 1397 errs += ibuf_add_n8(buf, msgtype); 1398 1399 if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { 1400 ibuf_free(buf); 1401 return (NULL); 1402 } 1403 1404 msg->buf = buf; 1405 msg->type = msgtype; 1406 msg->len = len; 1407 1408 return (msg); 1409 } 1410 1411 int 1412 session_sendmsg(struct bgp_msg *msg, struct peer *p) 1413 { 1414 struct mrt *mrt; 1415 1416 LIST_FOREACH(mrt, &mrthead, entry) { 1417 if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE && 1418 mrt->type == MRT_UPDATE_OUT))) 1419 continue; 1420 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 1421 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 1422 mrt->group_id == p->conf.groupid)) 1423 mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p, 1424 msg->type); 1425 } 1426 1427 ibuf_close(&p->wbuf, msg->buf); 1428 if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) { 1429 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1) 1430 log_peer_warn(&p->conf, "imsg_compose XOFF"); 1431 else 1432 p->throttled = 1; 1433 } 1434 1435 free(msg); 1436 return (0); 1437 } 1438 1439 /* 1440 * Translate between internal roles and the value expected by RFC 9234. 1441 */ 1442 static uint8_t 1443 role2capa(enum role role) 1444 { 1445 switch (role) { 1446 case ROLE_CUSTOMER: 1447 return CAPA_ROLE_CUSTOMER; 1448 case ROLE_PROVIDER: 1449 return CAPA_ROLE_PROVIDER; 1450 case ROLE_RS: 1451 return CAPA_ROLE_RS; 1452 case ROLE_RS_CLIENT: 1453 return CAPA_ROLE_RS_CLIENT; 1454 case ROLE_PEER: 1455 return CAPA_ROLE_PEER; 1456 default: 1457 fatalx("Unsupported role for role capability"); 1458 } 1459 } 1460 1461 static enum role 1462 capa2role(uint8_t val) 1463 { 1464 switch (val) { 1465 case CAPA_ROLE_PROVIDER: 1466 return ROLE_PROVIDER; 1467 case CAPA_ROLE_RS: 1468 return ROLE_RS; 1469 case CAPA_ROLE_RS_CLIENT: 1470 return ROLE_RS_CLIENT; 1471 case CAPA_ROLE_CUSTOMER: 1472 return ROLE_CUSTOMER; 1473 case CAPA_ROLE_PEER: 1474 return ROLE_PEER; 1475 default: 1476 return ROLE_NONE; 1477 } 1478 } 1479 1480 void 1481 session_open(struct peer *p) 1482 { 1483 struct bgp_msg *buf; 1484 struct ibuf *opb; 1485 size_t len, optparamlen; 1486 uint16_t holdtime; 1487 uint8_t i; 1488 int errs = 0, extlen = 0; 1489 int mpcapa = 0; 1490 1491 1492 if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) { 1493 bgp_fsm(p, EVNT_CON_FATAL); 1494 return; 1495 } 1496 1497 /* multiprotocol extensions, RFC 4760 */ 1498 for (i = AID_MIN; i < AID_MAX; i++) 1499 if (p->capa.ann.mp[i]) { /* 4 bytes data */ 1500 errs += session_capa_add(opb, CAPA_MP, 4); 1501 errs += session_capa_add_mp(opb, i); 1502 mpcapa++; 1503 } 1504 1505 /* route refresh, RFC 2918 */ 1506 if (p->capa.ann.refresh) /* no data */ 1507 errs += session_capa_add(opb, CAPA_REFRESH, 0); 1508 1509 /* BGP open policy, RFC 9234, only for ebgp sessions */ 1510 if (p->conf.ebgp && p->capa.ann.policy && 1511 p->conf.role != ROLE_NONE && 1512 (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || 1513 mpcapa == 0)) { 1514 errs += session_capa_add(opb, CAPA_ROLE, 1); 1515 errs += ibuf_add_n8(opb, role2capa(p->conf.role)); 1516 } 1517 1518 /* graceful restart and End-of-RIB marker, RFC 4724 */ 1519 if (p->capa.ann.grestart.restart) { 1520 int rst = 0; 1521 uint16_t hdr = 0; 1522 1523 for (i = AID_MIN; i < AID_MAX; i++) { 1524 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) 1525 rst++; 1526 } 1527 1528 /* Only set the R-flag if no graceful restart is ongoing */ 1529 if (!rst) 1530 hdr |= CAPA_GR_R_FLAG; 1531 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); 1532 errs += ibuf_add_n16(opb, hdr); 1533 } 1534 1535 /* 4-bytes AS numbers, RFC6793 */ 1536 if (p->capa.ann.as4byte) { /* 4 bytes data */ 1537 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t)); 1538 errs += ibuf_add_n32(opb, p->conf.local_as); 1539 } 1540 1541 /* advertisement of multiple paths, RFC7911 */ 1542 if (p->capa.ann.add_path[AID_MIN]) { /* variable */ 1543 uint8_t aplen; 1544 1545 if (mpcapa) 1546 aplen = 4 * mpcapa; 1547 else /* AID_INET */ 1548 aplen = 4; 1549 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen); 1550 if (mpcapa) { 1551 for (i = AID_MIN; i < AID_MAX; i++) { 1552 if (p->capa.ann.mp[i]) { 1553 errs += session_capa_add_afi(opb, 1554 i, p->capa.ann.add_path[i] & 1555 CAPA_AP_MASK); 1556 } 1557 } 1558 } else { /* AID_INET */ 1559 errs += session_capa_add_afi(opb, AID_INET, 1560 p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK); 1561 } 1562 } 1563 1564 /* enhanced route-refresh, RFC7313 */ 1565 if (p->capa.ann.enhanced_rr) /* no data */ 1566 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0); 1567 1568 if (errs) { 1569 ibuf_free(opb); 1570 bgp_fsm(p, EVNT_CON_FATAL); 1571 return; 1572 } 1573 1574 optparamlen = ibuf_size(opb); 1575 len = MSGSIZE_OPEN_MIN + optparamlen; 1576 if (optparamlen == 0) { 1577 /* nothing */ 1578 } else if (optparamlen + 2 >= 255) { 1579 /* RFC9072: use 255 as magic size and request extra header */ 1580 optparamlen = 255; 1581 extlen = 1; 1582 /* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */ 1583 len += 2 * 3; 1584 } else { 1585 /* regular capabilities header */ 1586 optparamlen += 2; 1587 len += 2; 1588 } 1589 1590 if ((buf = session_newmsg(OPEN, len)) == NULL) { 1591 ibuf_free(opb); 1592 bgp_fsm(p, EVNT_CON_FATAL); 1593 return; 1594 } 1595 1596 if (p->conf.holdtime) 1597 holdtime = p->conf.holdtime; 1598 else 1599 holdtime = conf->holdtime; 1600 1601 errs += ibuf_add_n8(buf->buf, 4); 1602 errs += ibuf_add_n16(buf->buf, p->conf.local_short_as); 1603 errs += ibuf_add_n16(buf->buf, holdtime); 1604 /* is already in network byte order */ 1605 errs += ibuf_add_n32(buf->buf, conf->bgpid); 1606 errs += ibuf_add_n8(buf->buf, optparamlen); 1607 1608 if (extlen) { 1609 /* RFC9072 extra header which spans over the capabilities hdr */ 1610 errs += ibuf_add_n8(buf->buf, OPT_PARAM_EXT_LEN); 1611 errs += ibuf_add_n16(buf->buf, ibuf_size(opb) + 1 + 2); 1612 } 1613 1614 if (optparamlen) { 1615 errs += ibuf_add_n8(buf->buf, OPT_PARAM_CAPABILITIES); 1616 1617 if (extlen) { 1618 /* RFC9072: 2-byte extended length */ 1619 errs += ibuf_add_n16(buf->buf, ibuf_size(opb)); 1620 } else { 1621 errs += ibuf_add_n8(buf->buf, ibuf_size(opb)); 1622 } 1623 errs += ibuf_add_buf(buf->buf, opb); 1624 } 1625 1626 ibuf_free(opb); 1627 1628 if (errs) { 1629 ibuf_free(buf->buf); 1630 free(buf); 1631 bgp_fsm(p, EVNT_CON_FATAL); 1632 return; 1633 } 1634 1635 if (session_sendmsg(buf, p) == -1) { 1636 bgp_fsm(p, EVNT_CON_FATAL); 1637 return; 1638 } 1639 1640 p->stats.msg_sent_open++; 1641 } 1642 1643 void 1644 session_keepalive(struct peer *p) 1645 { 1646 struct bgp_msg *buf; 1647 1648 if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL || 1649 session_sendmsg(buf, p) == -1) { 1650 bgp_fsm(p, EVNT_CON_FATAL); 1651 return; 1652 } 1653 1654 start_timer_keepalive(p); 1655 p->stats.msg_sent_keepalive++; 1656 } 1657 1658 void 1659 session_update(uint32_t peerid, struct ibuf *ibuf) 1660 { 1661 struct peer *p; 1662 struct bgp_msg *buf; 1663 1664 if ((p = getpeerbyid(conf, peerid)) == NULL) { 1665 log_warnx("no such peer: id=%u", peerid); 1666 return; 1667 } 1668 1669 if (p->state != STATE_ESTABLISHED) 1670 return; 1671 1672 if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + ibuf_size(ibuf))) == 1673 NULL) { 1674 bgp_fsm(p, EVNT_CON_FATAL); 1675 return; 1676 } 1677 1678 if (ibuf_add_buf(buf->buf, ibuf)) { 1679 ibuf_free(buf->buf); 1680 free(buf); 1681 bgp_fsm(p, EVNT_CON_FATAL); 1682 return; 1683 } 1684 1685 if (session_sendmsg(buf, p) == -1) { 1686 bgp_fsm(p, EVNT_CON_FATAL); 1687 return; 1688 } 1689 1690 start_timer_keepalive(p); 1691 p->stats.msg_sent_update++; 1692 } 1693 1694 void 1695 session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode, 1696 void *data, size_t datalen) 1697 { 1698 struct ibuf ibuf; 1699 1700 ibuf_from_buffer(&ibuf, data, datalen); 1701 session_notification(p, errcode, subcode, &ibuf); 1702 } 1703 1704 void 1705 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode, 1706 struct ibuf *ibuf) 1707 { 1708 struct bgp_msg *buf; 1709 int errs = 0; 1710 size_t datalen = 0; 1711 1712 switch (p->state) { 1713 case STATE_OPENSENT: 1714 case STATE_OPENCONFIRM: 1715 case STATE_ESTABLISHED: 1716 break; 1717 default: 1718 /* session not open, no need to send notification */ 1719 log_notification(p, errcode, subcode, ibuf, "dropping"); 1720 return; 1721 } 1722 1723 log_notification(p, errcode, subcode, ibuf, "sending"); 1724 1725 /* cap to maximum size */ 1726 if (ibuf != NULL) { 1727 if (ibuf_size(ibuf) > 1728 MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) { 1729 log_peer_warnx(&p->conf, 1730 "oversized notification, data trunkated"); 1731 ibuf_truncate(ibuf, MAX_PKTSIZE - 1732 MSGSIZE_NOTIFICATION_MIN); 1733 } 1734 datalen = ibuf_size(ibuf); 1735 } 1736 1737 if ((buf = session_newmsg(NOTIFICATION, 1738 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) { 1739 bgp_fsm(p, EVNT_CON_FATAL); 1740 return; 1741 } 1742 1743 errs += ibuf_add_n8(buf->buf, errcode); 1744 errs += ibuf_add_n8(buf->buf, subcode); 1745 1746 if (ibuf != NULL) 1747 errs += ibuf_add_buf(buf->buf, ibuf); 1748 1749 if (errs) { 1750 ibuf_free(buf->buf); 1751 free(buf); 1752 bgp_fsm(p, EVNT_CON_FATAL); 1753 return; 1754 } 1755 1756 if (session_sendmsg(buf, p) == -1) { 1757 bgp_fsm(p, EVNT_CON_FATAL); 1758 return; 1759 } 1760 1761 p->stats.msg_sent_notification++; 1762 p->stats.last_sent_errcode = errcode; 1763 p->stats.last_sent_suberr = subcode; 1764 } 1765 1766 int 1767 session_neighbor_rrefresh(struct peer *p) 1768 { 1769 uint8_t i; 1770 1771 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr)) 1772 return (-1); 1773 1774 for (i = AID_MIN; i < AID_MAX; i++) { 1775 if (p->capa.neg.mp[i] != 0) 1776 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST); 1777 } 1778 1779 return (0); 1780 } 1781 1782 void 1783 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype) 1784 { 1785 struct bgp_msg *buf; 1786 int errs = 0; 1787 uint16_t afi; 1788 uint8_t safi; 1789 1790 switch (subtype) { 1791 case ROUTE_REFRESH_REQUEST: 1792 p->stats.refresh_sent_req++; 1793 break; 1794 case ROUTE_REFRESH_BEGIN_RR: 1795 case ROUTE_REFRESH_END_RR: 1796 /* requires enhanced route refresh */ 1797 if (!p->capa.neg.enhanced_rr) 1798 return; 1799 if (subtype == ROUTE_REFRESH_BEGIN_RR) 1800 p->stats.refresh_sent_borr++; 1801 else 1802 p->stats.refresh_sent_eorr++; 1803 break; 1804 default: 1805 fatalx("session_rrefresh: bad subtype %d", subtype); 1806 } 1807 1808 if (aid2afi(aid, &afi, &safi) == -1) 1809 fatalx("session_rrefresh: bad afi/safi pair"); 1810 1811 if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) { 1812 bgp_fsm(p, EVNT_CON_FATAL); 1813 return; 1814 } 1815 1816 errs += ibuf_add_n16(buf->buf, afi); 1817 errs += ibuf_add_n8(buf->buf, subtype); 1818 errs += ibuf_add_n8(buf->buf, safi); 1819 1820 if (errs) { 1821 ibuf_free(buf->buf); 1822 free(buf); 1823 bgp_fsm(p, EVNT_CON_FATAL); 1824 return; 1825 } 1826 1827 if (session_sendmsg(buf, p) == -1) { 1828 bgp_fsm(p, EVNT_CON_FATAL); 1829 return; 1830 } 1831 1832 p->stats.msg_sent_rrefresh++; 1833 } 1834 1835 int 1836 session_graceful_restart(struct peer *p) 1837 { 1838 uint8_t i; 1839 1840 timer_set(&p->timers, Timer_RestartTimeout, 1841 p->capa.neg.grestart.timeout); 1842 1843 for (i = AID_MIN; i < AID_MAX; i++) { 1844 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) { 1845 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id, 1846 &i, sizeof(i)) == -1) 1847 return (-1); 1848 log_peer_warnx(&p->conf, 1849 "graceful restart of %s, keeping routes", 1850 aid2str(i)); 1851 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING; 1852 } else if (p->capa.neg.mp[i]) { 1853 if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id, 1854 &i, sizeof(i)) == -1) 1855 return (-1); 1856 log_peer_warnx(&p->conf, 1857 "graceful restart of %s, flushing routes", 1858 aid2str(i)); 1859 } 1860 } 1861 return (0); 1862 } 1863 1864 int 1865 session_graceful_stop(struct peer *p) 1866 { 1867 uint8_t i; 1868 1869 for (i = AID_MIN; i < AID_MAX; i++) { 1870 /* 1871 * Only flush if the peer is restarting and the timeout fired. 1872 * In all other cases the session was already flushed when the 1873 * session went down or when the new open message was parsed. 1874 */ 1875 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) { 1876 log_peer_warnx(&p->conf, "graceful restart of %s, " 1877 "time-out, flushing", aid2str(i)); 1878 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 1879 &i, sizeof(i)) == -1) 1880 return (-1); 1881 } 1882 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING; 1883 } 1884 return (0); 1885 } 1886 1887 int 1888 session_dispatch_msg(struct pollfd *pfd, struct peer *p) 1889 { 1890 ssize_t n; 1891 socklen_t len; 1892 int error; 1893 1894 if (p->state == STATE_CONNECT) { 1895 if (pfd->revents & POLLOUT) { 1896 if (pfd->revents & POLLIN) { 1897 /* error occurred */ 1898 len = sizeof(error); 1899 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR, 1900 &error, &len) == -1 || error) { 1901 if (error) 1902 errno = error; 1903 if (errno != p->lasterr) { 1904 log_peer_warn(&p->conf, 1905 "socket error"); 1906 p->lasterr = errno; 1907 } 1908 bgp_fsm(p, EVNT_CON_OPENFAIL); 1909 return (1); 1910 } 1911 } 1912 bgp_fsm(p, EVNT_CON_OPEN); 1913 return (1); 1914 } 1915 if (pfd->revents & POLLHUP) { 1916 bgp_fsm(p, EVNT_CON_OPENFAIL); 1917 return (1); 1918 } 1919 if (pfd->revents & (POLLERR|POLLNVAL)) { 1920 bgp_fsm(p, EVNT_CON_FATAL); 1921 return (1); 1922 } 1923 return (0); 1924 } 1925 1926 if (pfd->revents & POLLHUP) { 1927 bgp_fsm(p, EVNT_CON_CLOSED); 1928 return (1); 1929 } 1930 if (pfd->revents & (POLLERR|POLLNVAL)) { 1931 bgp_fsm(p, EVNT_CON_FATAL); 1932 return (1); 1933 } 1934 1935 if (pfd->revents & POLLOUT && p->wbuf.queued) { 1936 if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) { 1937 if (error == 0) 1938 log_peer_warnx(&p->conf, "Connection closed"); 1939 else if (error == -1) 1940 log_peer_warn(&p->conf, "write error"); 1941 bgp_fsm(p, EVNT_CON_FATAL); 1942 return (1); 1943 } 1944 p->stats.last_write = getmonotime(); 1945 start_timer_sendholdtime(p); 1946 if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) { 1947 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1) 1948 log_peer_warn(&p->conf, "imsg_compose XON"); 1949 else 1950 p->throttled = 0; 1951 } 1952 if (!(pfd->revents & POLLIN)) 1953 return (1); 1954 } 1955 1956 if (p->rbuf && pfd->revents & POLLIN) { 1957 if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos, 1958 sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) { 1959 if (errno != EINTR && errno != EAGAIN) { 1960 log_peer_warn(&p->conf, "read error"); 1961 bgp_fsm(p, EVNT_CON_FATAL); 1962 } 1963 return (1); 1964 } 1965 if (n == 0) { /* connection closed */ 1966 bgp_fsm(p, EVNT_CON_CLOSED); 1967 return (1); 1968 } 1969 1970 p->rbuf->wpos += n; 1971 p->stats.last_read = getmonotime(); 1972 return (1); 1973 } 1974 return (0); 1975 } 1976 1977 void 1978 session_process_msg(struct peer *p) 1979 { 1980 struct mrt *mrt; 1981 ssize_t rpos, av, left; 1982 int processed = 0; 1983 uint16_t msglen; 1984 uint8_t msgtype; 1985 1986 rpos = 0; 1987 av = p->rbuf->wpos; 1988 p->rpending = 0; 1989 1990 /* 1991 * session might drop to IDLE -> buffers deallocated 1992 * we MUST check rbuf != NULL before use 1993 */ 1994 for (;;) { 1995 if (p->rbuf == NULL) 1996 return; 1997 if (rpos + MSGSIZE_HEADER > av) 1998 break; 1999 if (parse_header(p, p->rbuf->buf + rpos, &msglen, 2000 &msgtype) == -1) 2001 return; 2002 if (rpos + msglen > av) 2003 break; 2004 p->rbuf->rptr = p->rbuf->buf + rpos; 2005 2006 /* dump to MRT as soon as we have a full packet */ 2007 LIST_FOREACH(mrt, &mrthead, entry) { 2008 if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE && 2009 mrt->type == MRT_UPDATE_IN))) 2010 continue; 2011 if ((mrt->peer_id == 0 && mrt->group_id == 0) || 2012 mrt->peer_id == p->conf.id || (mrt->group_id != 0 && 2013 mrt->group_id == p->conf.groupid)) 2014 mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p, 2015 msgtype); 2016 } 2017 2018 switch (msgtype) { 2019 case OPEN: 2020 bgp_fsm(p, EVNT_RCVD_OPEN); 2021 p->stats.msg_rcvd_open++; 2022 break; 2023 case UPDATE: 2024 bgp_fsm(p, EVNT_RCVD_UPDATE); 2025 p->stats.msg_rcvd_update++; 2026 break; 2027 case NOTIFICATION: 2028 bgp_fsm(p, EVNT_RCVD_NOTIFICATION); 2029 p->stats.msg_rcvd_notification++; 2030 break; 2031 case KEEPALIVE: 2032 bgp_fsm(p, EVNT_RCVD_KEEPALIVE); 2033 p->stats.msg_rcvd_keepalive++; 2034 break; 2035 case RREFRESH: 2036 parse_rrefresh(p); 2037 p->stats.msg_rcvd_rrefresh++; 2038 break; 2039 default: /* cannot happen */ 2040 session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE, 2041 &msgtype, 1); 2042 log_warnx("received message with unknown type %u", 2043 msgtype); 2044 bgp_fsm(p, EVNT_CON_FATAL); 2045 } 2046 rpos += msglen; 2047 if (++processed > MSG_PROCESS_LIMIT) { 2048 p->rpending = 1; 2049 break; 2050 } 2051 } 2052 2053 if (p->rbuf == NULL) 2054 return; 2055 if (rpos < av) { 2056 left = av - rpos; 2057 memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left); 2058 p->rbuf->wpos = left; 2059 } else 2060 p->rbuf->wpos = 0; 2061 } 2062 2063 int 2064 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type) 2065 { 2066 u_char *p; 2067 uint16_t olen; 2068 2069 /* caller MUST make sure we are getting 19 bytes! */ 2070 p = data; 2071 if (memcmp(p, marker, sizeof(marker))) { 2072 log_peer_warnx(&peer->conf, "sync error"); 2073 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL); 2074 bgp_fsm(peer, EVNT_CON_FATAL); 2075 return (-1); 2076 } 2077 p += MSGSIZE_HEADER_MARKER; 2078 2079 memcpy(&olen, p, 2); 2080 *len = ntohs(olen); 2081 p += 2; 2082 memcpy(type, p, 1); 2083 2084 if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) { 2085 log_peer_warnx(&peer->conf, 2086 "received message: illegal length: %u byte", *len); 2087 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2088 &olen, sizeof(olen)); 2089 bgp_fsm(peer, EVNT_CON_FATAL); 2090 return (-1); 2091 } 2092 2093 switch (*type) { 2094 case OPEN: 2095 if (*len < MSGSIZE_OPEN_MIN) { 2096 log_peer_warnx(&peer->conf, 2097 "received OPEN: illegal len: %u byte", *len); 2098 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2099 &olen, sizeof(olen)); 2100 bgp_fsm(peer, EVNT_CON_FATAL); 2101 return (-1); 2102 } 2103 break; 2104 case NOTIFICATION: 2105 if (*len < MSGSIZE_NOTIFICATION_MIN) { 2106 log_peer_warnx(&peer->conf, 2107 "received NOTIFICATION: illegal len: %u byte", 2108 *len); 2109 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2110 &olen, sizeof(olen)); 2111 bgp_fsm(peer, EVNT_CON_FATAL); 2112 return (-1); 2113 } 2114 break; 2115 case UPDATE: 2116 if (*len < MSGSIZE_UPDATE_MIN) { 2117 log_peer_warnx(&peer->conf, 2118 "received UPDATE: illegal len: %u byte", *len); 2119 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2120 &olen, sizeof(olen)); 2121 bgp_fsm(peer, EVNT_CON_FATAL); 2122 return (-1); 2123 } 2124 break; 2125 case KEEPALIVE: 2126 if (*len != MSGSIZE_KEEPALIVE) { 2127 log_peer_warnx(&peer->conf, 2128 "received KEEPALIVE: illegal len: %u byte", *len); 2129 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2130 &olen, sizeof(olen)); 2131 bgp_fsm(peer, EVNT_CON_FATAL); 2132 return (-1); 2133 } 2134 break; 2135 case RREFRESH: 2136 if (*len < MSGSIZE_RREFRESH_MIN) { 2137 log_peer_warnx(&peer->conf, 2138 "received RREFRESH: illegal len: %u byte", *len); 2139 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN, 2140 &olen, sizeof(olen)); 2141 bgp_fsm(peer, EVNT_CON_FATAL); 2142 return (-1); 2143 } 2144 break; 2145 default: 2146 log_peer_warnx(&peer->conf, 2147 "received msg with unknown type %u", *type); 2148 session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE, 2149 type, 1); 2150 bgp_fsm(peer, EVNT_CON_FATAL); 2151 return (-1); 2152 } 2153 return (0); 2154 } 2155 2156 int 2157 parse_open(struct peer *peer) 2158 { 2159 struct ibuf ibuf; 2160 u_char *p; 2161 uint8_t version, rversion; 2162 uint16_t short_as, msglen; 2163 uint16_t holdtime, myholdtime; 2164 uint32_t as, bgpid; 2165 uint8_t optparamlen; 2166 2167 p = peer->rbuf->rptr; 2168 p += MSGSIZE_HEADER_MARKER; 2169 memcpy(&msglen, p, sizeof(msglen)); 2170 msglen = ntohs(msglen); 2171 2172 p = peer->rbuf->rptr; 2173 p += MSGSIZE_HEADER; /* header is already checked */ 2174 msglen -= MSGSIZE_HEADER; 2175 2176 /* XXX */ 2177 ibuf_from_buffer(&ibuf, p, msglen); 2178 2179 if (ibuf_get_n8(&ibuf, &version) == -1 || 2180 ibuf_get_n16(&ibuf, &short_as) == -1 || 2181 ibuf_get_n16(&ibuf, &holdtime) == -1 || 2182 ibuf_get_n32(&ibuf, &bgpid) == -1 || 2183 ibuf_get_n8(&ibuf, &optparamlen) == -1) 2184 goto bad_len; 2185 2186 if (version != BGP_VERSION) { 2187 log_peer_warnx(&peer->conf, 2188 "peer wants unrecognized version %u", version); 2189 if (version > BGP_VERSION) 2190 rversion = version - BGP_VERSION; 2191 else 2192 rversion = BGP_VERSION; 2193 session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION, 2194 &rversion, sizeof(rversion)); 2195 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2196 return (-1); 2197 } 2198 2199 as = peer->short_as = short_as; 2200 if (as == 0) { 2201 log_peer_warnx(&peer->conf, 2202 "peer requests unacceptable AS %u", as); 2203 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); 2204 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2205 return (-1); 2206 } 2207 2208 if (holdtime && holdtime < peer->conf.min_holdtime) { 2209 log_peer_warnx(&peer->conf, 2210 "peer requests unacceptable holdtime %u", holdtime); 2211 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL); 2212 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2213 return (-1); 2214 } 2215 2216 myholdtime = peer->conf.holdtime; 2217 if (!myholdtime) 2218 myholdtime = conf->holdtime; 2219 if (holdtime < myholdtime) 2220 peer->holdtime = holdtime; 2221 else 2222 peer->holdtime = myholdtime; 2223 2224 /* check bgpid for validity - just disallow 0 */ 2225 if (bgpid == 0) { 2226 log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable"); 2227 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); 2228 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2229 return (-1); 2230 } 2231 peer->remote_bgpid = bgpid; 2232 2233 if (optparamlen != 0) { 2234 struct ibuf oparams, op; 2235 uint8_t ext_type, op_type; 2236 uint16_t ext_len, op_len; 2237 2238 ibuf_from_ibuf(&oparams, &ibuf); 2239 2240 /* check for RFC9072 encoding */ 2241 if (ibuf_get_n8(&oparams, &ext_type) == -1) 2242 goto bad_len; 2243 if (ext_type == OPT_PARAM_EXT_LEN) { 2244 if (ibuf_get_n16(&oparams, &ext_len) == -1) 2245 goto bad_len; 2246 /* skip RFC9072 header */ 2247 if (ibuf_skip(&ibuf, 3) == -1) 2248 goto bad_len; 2249 } else { 2250 ext_len = optparamlen; 2251 ibuf_rewind(&oparams); 2252 } 2253 2254 if (ibuf_truncate(&oparams, ext_len) == -1 || 2255 ibuf_skip(&ibuf, ext_len) == -1) 2256 goto bad_len; 2257 2258 while (ibuf_size(&oparams) > 0) { 2259 if (ibuf_get_n8(&oparams, &op_type) == -1) 2260 goto bad_len; 2261 2262 if (ext_type == OPT_PARAM_EXT_LEN) { 2263 if (ibuf_get_n16(&oparams, &op_len) == -1) 2264 goto bad_len; 2265 } else { 2266 uint8_t tmp; 2267 if (ibuf_get_n8(&oparams, &tmp) == -1) 2268 goto bad_len; 2269 op_len = tmp; 2270 } 2271 2272 if (ibuf_get_ibuf(&oparams, op_len, &op) == -1) 2273 goto bad_len; 2274 2275 switch (op_type) { 2276 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */ 2277 if (parse_capabilities(peer, &op, &as) == -1) { 2278 session_notification(peer, ERR_OPEN, 0, 2279 NULL); 2280 change_state(peer, STATE_IDLE, 2281 EVNT_RCVD_OPEN); 2282 return (-1); 2283 } 2284 break; 2285 case OPT_PARAM_AUTH: /* deprecated */ 2286 default: 2287 /* 2288 * unsupported type 2289 * the RFCs tell us to leave the data section 2290 * empty and notify the peer with ERR_OPEN, 2291 * ERR_OPEN_OPT. How the peer should know 2292 * _which_ optional parameter we don't support 2293 * is beyond me. 2294 */ 2295 log_peer_warnx(&peer->conf, 2296 "received OPEN message with unsupported " 2297 "optional parameter: type %u", op_type); 2298 session_notification(peer, ERR_OPEN, 2299 ERR_OPEN_OPT, NULL); 2300 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2301 return (-1); 2302 } 2303 } 2304 } 2305 2306 if (ibuf_size(&ibuf) != 0) { 2307 bad_len: 2308 log_peer_warnx(&peer->conf, 2309 "corrupt OPEN message received: length mismatch"); 2310 session_notification(peer, ERR_OPEN, 0, NULL); 2311 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2312 return (-1); 2313 } 2314 2315 /* if remote-as is zero and it's a cloned neighbor, accept any */ 2316 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) { 2317 peer->conf.remote_as = as; 2318 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as); 2319 if (!peer->conf.ebgp) 2320 /* force enforce_as off for iBGP sessions */ 2321 peer->conf.enforce_as = ENFORCE_AS_OFF; 2322 } 2323 2324 if (peer->conf.remote_as != as) { 2325 log_peer_warnx(&peer->conf, "peer sent wrong AS %s", 2326 log_as(as)); 2327 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL); 2328 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2329 return (-1); 2330 } 2331 2332 /* on iBGP sessions check for bgpid collision */ 2333 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) { 2334 struct in_addr ina; 2335 ina.s_addr = htonl(bgpid); 2336 log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours", 2337 inet_ntoa(ina)); 2338 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL); 2339 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2340 return (-1); 2341 } 2342 2343 if (capa_neg_calc(peer) == -1) { 2344 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2345 return (-1); 2346 } 2347 2348 return (0); 2349 } 2350 2351 int 2352 parse_update(struct peer *peer) 2353 { 2354 u_char *p; 2355 uint16_t datalen; 2356 2357 /* 2358 * we pass the message verbatim to the rde. 2359 * in case of errors the whole session is reset with a 2360 * notification anyway, we only need to know the peer 2361 */ 2362 p = peer->rbuf->rptr; 2363 p += MSGSIZE_HEADER_MARKER; 2364 memcpy(&datalen, p, sizeof(datalen)); 2365 datalen = ntohs(datalen); 2366 2367 p = peer->rbuf->rptr; 2368 p += MSGSIZE_HEADER; /* header is already checked */ 2369 datalen -= MSGSIZE_HEADER; 2370 2371 if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1) 2372 return (-1); 2373 2374 return (0); 2375 } 2376 2377 int 2378 parse_rrefresh(struct peer *peer) 2379 { 2380 struct route_refresh rr; 2381 struct ibuf ibuf; 2382 uint16_t afi, datalen; 2383 uint8_t aid, safi, subtype; 2384 u_char *p; 2385 2386 p = peer->rbuf->rptr; 2387 p += MSGSIZE_HEADER_MARKER; 2388 memcpy(&datalen, p, sizeof(datalen)); 2389 datalen = ntohs(datalen); 2390 2391 p = peer->rbuf->rptr; 2392 p += MSGSIZE_HEADER; /* header is already checked */ 2393 datalen -= MSGSIZE_HEADER; 2394 2395 /* XXX */ 2396 ibuf_from_buffer(&ibuf, p, datalen); 2397 2398 if (ibuf_get_n16(&ibuf, &afi) == -1 || 2399 ibuf_get_n8(&ibuf, &subtype) == -1 || 2400 ibuf_get_n8(&ibuf, &safi) == -1) { 2401 /* minimum size checked in session_process_msg() */ 2402 fatalx("%s: message too small", __func__); 2403 } 2404 2405 /* check subtype if peer announced enhanced route refresh */ 2406 if (peer->capa.neg.enhanced_rr) { 2407 switch (subtype) { 2408 case ROUTE_REFRESH_REQUEST: 2409 /* no ORF support, so no oversized RREFRESH msgs */ 2410 if (datalen != MSGSIZE_RREFRESH) { 2411 log_peer_warnx(&peer->conf, 2412 "received RREFRESH: illegal len: %u byte", 2413 datalen); 2414 datalen = htons(datalen); 2415 session_notification_data(peer, ERR_HEADER, 2416 ERR_HDR_LEN, &datalen, sizeof(datalen)); 2417 bgp_fsm(peer, EVNT_CON_FATAL); 2418 return (-1); 2419 } 2420 peer->stats.refresh_rcvd_req++; 2421 break; 2422 case ROUTE_REFRESH_BEGIN_RR: 2423 case ROUTE_REFRESH_END_RR: 2424 /* special handling for RFC7313 */ 2425 if (datalen != MSGSIZE_RREFRESH) { 2426 log_peer_warnx(&peer->conf, 2427 "received RREFRESH: illegal len: %u byte", 2428 datalen); 2429 ibuf_rewind(&ibuf); 2430 session_notification(peer, ERR_RREFRESH, 2431 ERR_RR_INV_LEN, &ibuf); 2432 bgp_fsm(peer, EVNT_CON_FATAL); 2433 return (-1); 2434 } 2435 if (subtype == ROUTE_REFRESH_BEGIN_RR) 2436 peer->stats.refresh_rcvd_borr++; 2437 else 2438 peer->stats.refresh_rcvd_eorr++; 2439 break; 2440 default: 2441 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2442 "bad subtype %d", subtype); 2443 return (0); 2444 } 2445 } else { 2446 /* force subtype to default */ 2447 subtype = ROUTE_REFRESH_REQUEST; 2448 peer->stats.refresh_rcvd_req++; 2449 } 2450 2451 /* afi/safi unchecked - unrecognized values will be ignored anyway */ 2452 if (afi2aid(afi, safi, &aid) == -1) { 2453 log_peer_warnx(&peer->conf, "peer sent bad refresh, " 2454 "invalid afi/safi pair"); 2455 return (0); 2456 } 2457 2458 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) { 2459 log_peer_warnx(&peer->conf, "peer sent unexpected refresh"); 2460 return (0); 2461 } 2462 2463 rr.aid = aid; 2464 rr.subtype = subtype; 2465 2466 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1) 2467 return (-1); 2468 2469 return (0); 2470 } 2471 2472 void 2473 parse_notification(struct peer *peer) 2474 { 2475 struct ibuf ibuf; 2476 u_char *p; 2477 uint16_t datalen; 2478 uint8_t errcode, subcode; 2479 uint8_t reason_len; 2480 2481 /* just log */ 2482 p = peer->rbuf->rptr; 2483 p += MSGSIZE_HEADER_MARKER; 2484 memcpy(&datalen, p, sizeof(datalen)); 2485 datalen = ntohs(datalen); 2486 2487 p = peer->rbuf->rptr; 2488 p += MSGSIZE_HEADER; /* header is already checked */ 2489 datalen -= MSGSIZE_HEADER; 2490 2491 /* XXX */ 2492 ibuf_from_buffer(&ibuf, p, datalen); 2493 2494 if (ibuf_get_n8(&ibuf, &errcode) == -1 || 2495 ibuf_get_n8(&ibuf, &subcode) == -1) { 2496 log_peer_warnx(&peer->conf, "received bad notification"); 2497 goto done; 2498 } 2499 2500 peer->errcnt++; 2501 peer->stats.last_rcvd_errcode = errcode; 2502 peer->stats.last_rcvd_suberr = subcode; 2503 2504 log_notification(peer, errcode, subcode, &ibuf, "received"); 2505 2506 CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX); 2507 memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason)); 2508 if (errcode == ERR_CEASE && 2509 (subcode == ERR_CEASE_ADMIN_DOWN || 2510 subcode == ERR_CEASE_ADMIN_RESET)) { 2511 /* check if shutdown reason is included */ 2512 if (ibuf_get_n8(&ibuf, &reason_len) != -1 && reason_len != 0) { 2513 if (ibuf_get(&ibuf, peer->stats.last_reason, 2514 reason_len) == -1) 2515 log_peer_warnx(&peer->conf, 2516 "received truncated shutdown reason"); 2517 } 2518 } 2519 2520 done: 2521 change_state(peer, STATE_IDLE, EVNT_RCVD_NOTIFICATION); 2522 } 2523 2524 int 2525 parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as) 2526 { 2527 struct ibuf capabuf; 2528 uint16_t afi, gr_header; 2529 uint8_t capa_code, capa_len; 2530 uint8_t safi, aid, role, flags; 2531 2532 while (ibuf_size(buf) > 0) { 2533 if (ibuf_get_n8(buf, &capa_code) == -1 || 2534 ibuf_get_n8(buf, &capa_len) == -1) { 2535 log_peer_warnx(&peer->conf, "Bad capabilities attr " 2536 "length: too short"); 2537 return (-1); 2538 } 2539 if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) { 2540 log_peer_warnx(&peer->conf, 2541 "Received bad capabilities attr length: " 2542 "len %zu smaller than capa_len %u", 2543 ibuf_size(buf), capa_len); 2544 return (-1); 2545 } 2546 2547 switch (capa_code) { 2548 case CAPA_MP: /* RFC 4760 */ 2549 if (capa_len != 4 || 2550 ibuf_get_n16(&capabuf, &afi) == -1 || 2551 ibuf_skip(&capabuf, 1) == -1 || 2552 ibuf_get_n8(&capabuf, &safi) == -1) { 2553 log_peer_warnx(&peer->conf, 2554 "Received bad multi protocol capability"); 2555 break; 2556 } 2557 if (afi2aid(afi, safi, &aid) == -1) { 2558 log_peer_warnx(&peer->conf, 2559 "Received multi protocol capability: " 2560 " unknown AFI %u, safi %u pair", 2561 afi, safi); 2562 break; 2563 } 2564 peer->capa.peer.mp[aid] = 1; 2565 break; 2566 case CAPA_REFRESH: 2567 peer->capa.peer.refresh = 1; 2568 break; 2569 case CAPA_ROLE: 2570 if (capa_len != 1 || 2571 ibuf_get_n8(&capabuf, &role) == -1) { 2572 log_peer_warnx(&peer->conf, 2573 "Received bad role capability"); 2574 break; 2575 } 2576 if (!peer->conf.ebgp) { 2577 log_peer_warnx(&peer->conf, 2578 "Received role capability on iBGP session"); 2579 break; 2580 } 2581 peer->capa.peer.policy = 1; 2582 peer->remote_role = capa2role(role); 2583 break; 2584 case CAPA_RESTART: 2585 if (capa_len == 2) { 2586 /* peer only supports EoR marker */ 2587 peer->capa.peer.grestart.restart = 1; 2588 peer->capa.peer.grestart.timeout = 0; 2589 break; 2590 } else if (capa_len % 4 != 2) { 2591 log_peer_warnx(&peer->conf, 2592 "Bad graceful restart capability"); 2593 peer->capa.peer.grestart.restart = 0; 2594 peer->capa.peer.grestart.timeout = 0; 2595 break; 2596 } 2597 2598 if (ibuf_get_n16(&capabuf, &gr_header) == -1) { 2599 bad_gr_restart: 2600 log_peer_warnx(&peer->conf, 2601 "Bad graceful restart capability"); 2602 peer->capa.peer.grestart.restart = 0; 2603 peer->capa.peer.grestart.timeout = 0; 2604 break; 2605 } 2606 2607 peer->capa.peer.grestart.timeout = 2608 gr_header & CAPA_GR_TIMEMASK; 2609 if (peer->capa.peer.grestart.timeout == 0) { 2610 log_peer_warnx(&peer->conf, "Received " 2611 "graceful restart with zero timeout"); 2612 peer->capa.peer.grestart.restart = 0; 2613 break; 2614 } 2615 2616 while (ibuf_size(&capabuf) > 0) { 2617 if (ibuf_get_n16(&capabuf, &afi) == -1 || 2618 ibuf_get_n8(&capabuf, &safi) == -1 || 2619 ibuf_get_n8(&capabuf, &flags) == -1) 2620 goto bad_gr_restart; 2621 if (afi2aid(afi, safi, &aid) == -1) { 2622 log_peer_warnx(&peer->conf, 2623 "Received graceful restart capa: " 2624 " unknown AFI %u, safi %u pair", 2625 afi, safi); 2626 continue; 2627 } 2628 peer->capa.peer.grestart.flags[aid] |= 2629 CAPA_GR_PRESENT; 2630 if (flags & CAPA_GR_F_FLAG) 2631 peer->capa.peer.grestart.flags[aid] |= 2632 CAPA_GR_FORWARD; 2633 if (gr_header & CAPA_GR_R_FLAG) 2634 peer->capa.peer.grestart.flags[aid] |= 2635 CAPA_GR_RESTART; 2636 peer->capa.peer.grestart.restart = 2; 2637 } 2638 break; 2639 case CAPA_AS4BYTE: 2640 if (capa_len != 4 || 2641 ibuf_get_n32(&capabuf, as) == -1) { 2642 log_peer_warnx(&peer->conf, 2643 "Received bad AS4BYTE capability"); 2644 peer->capa.peer.as4byte = 0; 2645 break; 2646 } 2647 if (*as == 0) { 2648 log_peer_warnx(&peer->conf, 2649 "peer requests unacceptable AS %u", *as); 2650 session_notification(peer, ERR_OPEN, 2651 ERR_OPEN_AS, NULL); 2652 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN); 2653 return (-1); 2654 } 2655 peer->capa.peer.as4byte = 1; 2656 break; 2657 case CAPA_ADD_PATH: 2658 if (capa_len % 4 != 0) { 2659 bad_add_path: 2660 log_peer_warnx(&peer->conf, 2661 "Received bad ADD-PATH capability"); 2662 memset(peer->capa.peer.add_path, 0, 2663 sizeof(peer->capa.peer.add_path)); 2664 break; 2665 } 2666 while (ibuf_size(&capabuf) > 0) { 2667 if (ibuf_get_n16(&capabuf, &afi) == -1 || 2668 ibuf_get_n8(&capabuf, &safi) == -1 || 2669 ibuf_get_n8(&capabuf, &flags) == -1) 2670 goto bad_add_path; 2671 if (afi2aid(afi, safi, &aid) == -1) { 2672 log_peer_warnx(&peer->conf, 2673 "Received ADD-PATH capa: " 2674 " unknown AFI %u, safi %u pair", 2675 afi, safi); 2676 memset(peer->capa.peer.add_path, 0, 2677 sizeof(peer->capa.peer.add_path)); 2678 break; 2679 } 2680 if (flags & ~CAPA_AP_BIDIR) { 2681 log_peer_warnx(&peer->conf, 2682 "Received ADD-PATH capa: " 2683 " bad flags %x", flags); 2684 memset(peer->capa.peer.add_path, 0, 2685 sizeof(peer->capa.peer.add_path)); 2686 break; 2687 } 2688 peer->capa.peer.add_path[aid] = flags; 2689 } 2690 break; 2691 case CAPA_ENHANCED_RR: 2692 peer->capa.peer.enhanced_rr = 1; 2693 break; 2694 default: 2695 break; 2696 } 2697 } 2698 2699 return (0); 2700 } 2701 2702 int 2703 capa_neg_calc(struct peer *p) 2704 { 2705 struct ibuf *ebuf; 2706 uint8_t i, hasmp = 0, capa_code, capa_len, capa_aid = 0; 2707 2708 /* a capability is accepted only if both sides announced it */ 2709 2710 p->capa.neg.refresh = 2711 (p->capa.ann.refresh && p->capa.peer.refresh) != 0; 2712 p->capa.neg.enhanced_rr = 2713 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0; 2714 p->capa.neg.as4byte = 2715 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0; 2716 2717 /* MP: both side must agree on the AFI,SAFI pair */ 2718 for (i = AID_MIN; i < AID_MAX; i++) { 2719 if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) 2720 p->capa.neg.mp[i] = 1; 2721 else 2722 p->capa.neg.mp[i] = 0; 2723 if (p->capa.ann.mp[i]) 2724 hasmp = 1; 2725 } 2726 /* if no MP capability present default to IPv4 unicast mode */ 2727 if (!hasmp) 2728 p->capa.neg.mp[AID_INET] = 1; 2729 2730 /* 2731 * graceful restart: the peer capabilities are of interest here. 2732 * It is necessary to compare the new values with the previous ones 2733 * and act accordingly. AFI/SAFI that are not part in the MP capability 2734 * are treated as not being present. 2735 * Also make sure that a flush happens if the session stopped 2736 * supporting graceful restart. 2737 */ 2738 2739 for (i = AID_MIN; i < AID_MAX; i++) { 2740 int8_t negflags; 2741 2742 /* disable GR if the AFI/SAFI is not present */ 2743 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT && 2744 p->capa.neg.mp[i] == 0)) 2745 p->capa.peer.grestart.flags[i] = 0; /* disable */ 2746 /* look at current GR state and decide what to do */ 2747 negflags = p->capa.neg.grestart.flags[i]; 2748 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i]; 2749 if (negflags & CAPA_GR_RESTARTING) { 2750 if (p->capa.ann.grestart.restart != 0 && 2751 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) { 2752 p->capa.neg.grestart.flags[i] |= 2753 CAPA_GR_RESTARTING; 2754 } else { 2755 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id, 2756 &i, sizeof(i)) == -1) { 2757 log_peer_warnx(&p->conf, 2758 "imsg send failed"); 2759 return (-1); 2760 } 2761 log_peer_warnx(&p->conf, "graceful restart of " 2762 "%s, not restarted, flushing", aid2str(i)); 2763 } 2764 } 2765 } 2766 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout; 2767 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart; 2768 if (p->capa.ann.grestart.restart == 0) 2769 p->capa.neg.grestart.restart = 0; 2770 2771 /* 2772 * ADD-PATH: set only those bits where both sides agree. 2773 * For this compare our send bit with the recv bit from the peer 2774 * and vice versa. 2775 * The flags are stored from this systems view point. 2776 * At index 0 the flags are set if any per-AID flag is set. 2777 */ 2778 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path)); 2779 for (i = AID_MIN; i < AID_MAX; i++) { 2780 if (p->capa.neg.mp[i] == 0) 2781 continue; 2782 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) && 2783 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) { 2784 p->capa.neg.add_path[i] |= CAPA_AP_RECV; 2785 p->capa.neg.add_path[0] |= CAPA_AP_RECV; 2786 } 2787 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) && 2788 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) { 2789 p->capa.neg.add_path[i] |= CAPA_AP_SEND; 2790 p->capa.neg.add_path[0] |= CAPA_AP_SEND; 2791 } 2792 } 2793 2794 /* 2795 * Open policy: check that the policy is sensible. 2796 * 2797 * Make sure that the roles match and set the negotiated capability 2798 * to the role of the peer. So the RDE can inject the OTC attribute. 2799 * See RFC 9234, section 4.2. 2800 * These checks should only happen on ebgp sessions. 2801 */ 2802 if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 && 2803 p->conf.ebgp) { 2804 switch (p->conf.role) { 2805 case ROLE_PROVIDER: 2806 if (p->remote_role != ROLE_CUSTOMER) 2807 goto policyfail; 2808 break; 2809 case ROLE_RS: 2810 if (p->remote_role != ROLE_RS_CLIENT) 2811 goto policyfail; 2812 break; 2813 case ROLE_RS_CLIENT: 2814 if (p->remote_role != ROLE_RS) 2815 goto policyfail; 2816 break; 2817 case ROLE_CUSTOMER: 2818 if (p->remote_role != ROLE_PROVIDER) 2819 goto policyfail; 2820 break; 2821 case ROLE_PEER: 2822 if (p->remote_role != ROLE_PEER) 2823 goto policyfail; 2824 break; 2825 default: 2826 policyfail: 2827 log_peer_warnx(&p->conf, "open policy role mismatch: " 2828 "our role %s, their role %s", 2829 log_policy(p->conf.role), 2830 log_policy(p->remote_role)); 2831 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); 2832 return (-1); 2833 } 2834 p->capa.neg.policy = 1; 2835 } 2836 2837 /* enforce presence of open policy role capability */ 2838 if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 && 2839 p->conf.ebgp) { 2840 log_peer_warnx(&p->conf, "open policy role enforced but " 2841 "not present"); 2842 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL); 2843 return (-1); 2844 } 2845 2846 /* enforce presence of other capabilities */ 2847 if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) { 2848 capa_code = CAPA_REFRESH; 2849 capa_len = 0; 2850 goto fail; 2851 } 2852 if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) { 2853 capa_code = CAPA_ENHANCED_RR; 2854 capa_len = 0; 2855 goto fail; 2856 } 2857 if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) { 2858 capa_code = CAPA_AS4BYTE; 2859 capa_len = 4; 2860 goto fail; 2861 } 2862 if (p->capa.ann.grestart.restart == 2 && 2863 p->capa.neg.grestart.restart == 0) { 2864 capa_code = CAPA_RESTART; 2865 capa_len = 2; 2866 goto fail; 2867 } 2868 for (i = AID_MIN; i < AID_MAX; i++) { 2869 if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) { 2870 capa_code = CAPA_MP; 2871 capa_len = 4; 2872 capa_aid = i; 2873 goto fail; 2874 } 2875 } 2876 2877 for (i = AID_MIN; i < AID_MAX; i++) { 2878 if (p->capa.neg.mp[i] == 0) 2879 continue; 2880 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) && 2881 (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) { 2882 capa_code = CAPA_ADD_PATH; 2883 capa_len = 4; 2884 capa_aid = i; 2885 goto fail; 2886 } 2887 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) && 2888 (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) { 2889 capa_code = CAPA_ADD_PATH; 2890 capa_len = 4; 2891 capa_aid = i; 2892 goto fail; 2893 } 2894 } 2895 2896 return (0); 2897 2898 fail: 2899 if ((ebuf = ibuf_dynamic(2, 256)) == NULL) 2900 return (-1); 2901 /* best effort, no problem if it fails */ 2902 session_capa_add(ebuf, capa_code, capa_len); 2903 if (capa_code == CAPA_MP) 2904 session_capa_add_mp(ebuf, capa_aid); 2905 else if (capa_code == CAPA_ADD_PATH) 2906 session_capa_add_afi(ebuf, capa_aid, 0); 2907 else if (capa_len > 0) 2908 ibuf_add_zero(ebuf, capa_len); 2909 2910 session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf); 2911 ibuf_free(ebuf); 2912 return (-1); 2913 } 2914 2915 void 2916 session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt) 2917 { 2918 struct imsg imsg; 2919 struct ibuf ibuf; 2920 struct mrt xmrt; 2921 struct route_refresh rr; 2922 struct mrt *mrt; 2923 struct imsgbuf *i; 2924 struct peer *p; 2925 struct listen_addr *la, *next, nla; 2926 struct session_dependon sdon; 2927 struct bgpd_config tconf; 2928 size_t len; 2929 uint32_t peerid; 2930 int n, fd, depend_ok, restricted; 2931 uint16_t t; 2932 uint8_t aid, errcode, subcode; 2933 2934 while (imsgbuf) { 2935 if ((n = imsg_get(imsgbuf, &imsg)) == -1) 2936 fatal("session_dispatch_imsg: imsg_get error"); 2937 2938 if (n == 0) 2939 break; 2940 2941 peerid = imsg_get_id(&imsg); 2942 switch (imsg_get_type(&imsg)) { 2943 case IMSG_SOCKET_CONN: 2944 case IMSG_SOCKET_CONN_CTL: 2945 if (idx != PFD_PIPE_MAIN) 2946 fatalx("reconf request not from parent"); 2947 if ((fd = imsg_get_fd(&imsg)) == -1) { 2948 log_warnx("expected to receive imsg fd to " 2949 "RDE but didn't receive any"); 2950 break; 2951 } 2952 if ((i = malloc(sizeof(struct imsgbuf))) == NULL) 2953 fatal(NULL); 2954 imsg_init(i, fd); 2955 if (imsg_get_type(&imsg) == IMSG_SOCKET_CONN) { 2956 if (ibuf_rde) { 2957 log_warnx("Unexpected imsg connection " 2958 "to RDE received"); 2959 msgbuf_clear(&ibuf_rde->w); 2960 free(ibuf_rde); 2961 } 2962 ibuf_rde = i; 2963 } else { 2964 if (ibuf_rde_ctl) { 2965 log_warnx("Unexpected imsg ctl " 2966 "connection to RDE received"); 2967 msgbuf_clear(&ibuf_rde_ctl->w); 2968 free(ibuf_rde_ctl); 2969 } 2970 ibuf_rde_ctl = i; 2971 } 2972 break; 2973 case IMSG_RECONF_CONF: 2974 if (idx != PFD_PIPE_MAIN) 2975 fatalx("reconf request not from parent"); 2976 if (imsg_get_data(&imsg, &tconf, sizeof(tconf)) == -1) 2977 fatal("imsg_get_data"); 2978 2979 nconf = new_config(); 2980 copy_config(nconf, &tconf); 2981 pending_reconf = 1; 2982 break; 2983 case IMSG_RECONF_PEER: 2984 if (idx != PFD_PIPE_MAIN) 2985 fatalx("reconf request not from parent"); 2986 if ((p = calloc(1, sizeof(struct peer))) == NULL) 2987 fatal("new_peer"); 2988 if (imsg_get_data(&imsg, &p->conf, sizeof(p->conf)) == 2989 -1) 2990 fatal("imsg_get_data"); 2991 p->state = p->prev_state = STATE_NONE; 2992 p->reconf_action = RECONF_REINIT; 2993 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL) 2994 fatalx("%s: peer tree is corrupt", __func__); 2995 break; 2996 case IMSG_RECONF_LISTENER: 2997 if (idx != PFD_PIPE_MAIN) 2998 fatalx("reconf request not from parent"); 2999 if (nconf == NULL) 3000 fatalx("IMSG_RECONF_LISTENER but no config"); 3001 if (imsg_get_data(&imsg, &nla, sizeof(nla)) == -1) 3002 fatal("imsg_get_data"); 3003 TAILQ_FOREACH(la, conf->listen_addrs, entry) 3004 if (!la_cmp(la, &nla)) 3005 break; 3006 3007 if (la == NULL) { 3008 if (nla.reconf != RECONF_REINIT) 3009 fatalx("king bula sez: " 3010 "expected REINIT"); 3011 3012 if ((nla.fd = imsg_get_fd(&imsg)) == -1) 3013 log_warnx("expected to receive fd for " 3014 "%s but didn't receive any", 3015 log_sockaddr((struct sockaddr *) 3016 &nla.sa, nla.sa_len)); 3017 3018 la = calloc(1, sizeof(struct listen_addr)); 3019 if (la == NULL) 3020 fatal(NULL); 3021 memcpy(&la->sa, &nla.sa, sizeof(la->sa)); 3022 la->flags = nla.flags; 3023 la->fd = nla.fd; 3024 la->reconf = RECONF_REINIT; 3025 TAILQ_INSERT_TAIL(nconf->listen_addrs, la, 3026 entry); 3027 } else { 3028 if (nla.reconf != RECONF_KEEP) 3029 fatalx("king bula sez: expected KEEP"); 3030 la->reconf = RECONF_KEEP; 3031 } 3032 3033 break; 3034 case IMSG_RECONF_CTRL: 3035 if (idx != PFD_PIPE_MAIN) 3036 fatalx("reconf request not from parent"); 3037 3038 if (imsg_get_data(&imsg, &restricted, 3039 sizeof(restricted)) == -1) 3040 fatal("imsg_get_data"); 3041 if ((fd = imsg_get_fd(&imsg)) == -1) { 3042 log_warnx("expected to receive fd for control " 3043 "socket but didn't receive any"); 3044 break; 3045 } 3046 if (restricted) { 3047 control_shutdown(rcsock); 3048 rcsock = fd; 3049 } else { 3050 control_shutdown(csock); 3051 csock = fd; 3052 } 3053 break; 3054 case IMSG_RECONF_DRAIN: 3055 switch (idx) { 3056 case PFD_PIPE_ROUTE: 3057 if (nconf != NULL) 3058 fatalx("got unexpected %s from RDE", 3059 "IMSG_RECONF_DONE"); 3060 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, 3061 -1, NULL, 0); 3062 break; 3063 case PFD_PIPE_MAIN: 3064 if (nconf == NULL) 3065 fatalx("got unexpected %s from parent", 3066 "IMSG_RECONF_DONE"); 3067 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0, 3068 -1, NULL, 0); 3069 break; 3070 default: 3071 fatalx("reconf request not from parent or RDE"); 3072 } 3073 break; 3074 case IMSG_RECONF_DONE: 3075 if (idx != PFD_PIPE_MAIN) 3076 fatalx("reconf request not from parent"); 3077 if (nconf == NULL) 3078 fatalx("got IMSG_RECONF_DONE but no config"); 3079 copy_config(conf, nconf); 3080 merge_peers(conf, nconf); 3081 3082 /* delete old listeners */ 3083 TAILQ_FOREACH_SAFE(la, conf->listen_addrs, entry, 3084 next) { 3085 if (la->reconf == RECONF_NONE) { 3086 log_info("not listening on %s any more", 3087 log_sockaddr((struct sockaddr *) 3088 &la->sa, la->sa_len)); 3089 TAILQ_REMOVE(conf->listen_addrs, la, 3090 entry); 3091 close(la->fd); 3092 free(la); 3093 } 3094 } 3095 3096 /* add new listeners */ 3097 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs, 3098 entry); 3099 3100 setup_listeners(listener_cnt); 3101 free_config(nconf); 3102 nconf = NULL; 3103 pending_reconf = 0; 3104 log_info("SE reconfigured"); 3105 /* 3106 * IMSG_RECONF_DONE is sent when the RDE drained 3107 * the peer config sent in merge_peers(). 3108 */ 3109 break; 3110 case IMSG_SESSION_DEPENDON: 3111 if (idx != PFD_PIPE_MAIN) 3112 fatalx("IFINFO message not from parent"); 3113 if (imsg_get_data(&imsg, &sdon, sizeof(sdon)) == -1) 3114 fatalx("DEPENDON imsg with wrong len"); 3115 depend_ok = sdon.depend_state; 3116 3117 RB_FOREACH(p, peer_head, &conf->peers) 3118 if (!strcmp(p->conf.if_depend, sdon.ifname)) { 3119 if (depend_ok && !p->depend_ok) { 3120 p->depend_ok = depend_ok; 3121 bgp_fsm(p, EVNT_START); 3122 } else if (!depend_ok && p->depend_ok) { 3123 p->depend_ok = depend_ok; 3124 session_stop(p, 3125 ERR_CEASE_OTHER_CHANGE, 3126 NULL); 3127 } 3128 } 3129 break; 3130 case IMSG_MRT_OPEN: 3131 case IMSG_MRT_REOPEN: 3132 if (idx != PFD_PIPE_MAIN) 3133 fatalx("mrt request not from parent"); 3134 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) { 3135 log_warnx("mrt open, wrong imsg len"); 3136 break; 3137 } 3138 3139 if ((xmrt.wbuf.fd = imsg_get_fd(&imsg)) == -1) { 3140 log_warnx("expected to receive fd for mrt dump " 3141 "but didn't receive any"); 3142 break; 3143 } 3144 3145 mrt = mrt_get(&mrthead, &xmrt); 3146 if (mrt == NULL) { 3147 /* new dump */ 3148 mrt = calloc(1, sizeof(struct mrt)); 3149 if (mrt == NULL) 3150 fatal("session_dispatch_imsg"); 3151 memcpy(mrt, &xmrt, sizeof(struct mrt)); 3152 TAILQ_INIT(&mrt->wbuf.bufs); 3153 LIST_INSERT_HEAD(&mrthead, mrt, entry); 3154 } else { 3155 /* old dump reopened */ 3156 close(mrt->wbuf.fd); 3157 mrt->wbuf.fd = xmrt.wbuf.fd; 3158 } 3159 break; 3160 case IMSG_MRT_CLOSE: 3161 if (idx != PFD_PIPE_MAIN) 3162 fatalx("mrt request not from parent"); 3163 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) { 3164 log_warnx("mrt close, wrong imsg len"); 3165 break; 3166 } 3167 3168 mrt = mrt_get(&mrthead, &xmrt); 3169 if (mrt != NULL) 3170 mrt_done(mrt); 3171 break; 3172 case IMSG_CTL_KROUTE: 3173 case IMSG_CTL_KROUTE_ADDR: 3174 case IMSG_CTL_SHOW_NEXTHOP: 3175 case IMSG_CTL_SHOW_INTERFACE: 3176 case IMSG_CTL_SHOW_FIB_TABLES: 3177 case IMSG_CTL_SHOW_RTR: 3178 case IMSG_CTL_SHOW_TIMER: 3179 if (idx != PFD_PIPE_MAIN) 3180 fatalx("ctl kroute request not from parent"); 3181 control_imsg_relay(&imsg, NULL); 3182 break; 3183 case IMSG_CTL_SHOW_NEIGHBOR: 3184 if (idx != PFD_PIPE_ROUTE_CTL) 3185 fatalx("ctl rib request not from RDE"); 3186 p = getpeerbyid(conf, peerid); 3187 control_imsg_relay(&imsg, p); 3188 break; 3189 case IMSG_CTL_SHOW_RIB: 3190 case IMSG_CTL_SHOW_RIB_PREFIX: 3191 case IMSG_CTL_SHOW_RIB_COMMUNITIES: 3192 case IMSG_CTL_SHOW_RIB_ATTR: 3193 case IMSG_CTL_SHOW_RIB_MEM: 3194 case IMSG_CTL_SHOW_NETWORK: 3195 case IMSG_CTL_SHOW_FLOWSPEC: 3196 case IMSG_CTL_SHOW_SET: 3197 if (idx != PFD_PIPE_ROUTE_CTL) 3198 fatalx("ctl rib request not from RDE"); 3199 control_imsg_relay(&imsg, NULL); 3200 break; 3201 case IMSG_CTL_END: 3202 case IMSG_CTL_RESULT: 3203 control_imsg_relay(&imsg, NULL); 3204 break; 3205 case IMSG_UPDATE: 3206 if (idx != PFD_PIPE_ROUTE) 3207 fatalx("update request not from RDE"); 3208 len = imsg_get_len(&imsg); 3209 if (imsg_get_ibuf(&imsg, &ibuf) == -1 || 3210 len > MAX_PKTSIZE - MSGSIZE_HEADER || 3211 len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER) 3212 log_warnx("RDE sent invalid update"); 3213 else 3214 session_update(peerid, &ibuf); 3215 break; 3216 case IMSG_UPDATE_ERR: 3217 if (idx != PFD_PIPE_ROUTE) 3218 fatalx("update request not from RDE"); 3219 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3220 log_warnx("no such peer: id=%u", peerid); 3221 break; 3222 } 3223 if (imsg_get_ibuf(&imsg, &ibuf) == -1 || 3224 ibuf_get_n8(&ibuf, &errcode) == -1 || 3225 ibuf_get_n8(&ibuf, &subcode) == -1) { 3226 log_warnx("RDE sent invalid notification"); 3227 break; 3228 } 3229 3230 session_notification(p, errcode, subcode, &ibuf); 3231 switch (errcode) { 3232 case ERR_CEASE: 3233 switch (subcode) { 3234 case ERR_CEASE_MAX_PREFIX: 3235 case ERR_CEASE_MAX_SENT_PREFIX: 3236 t = p->conf.max_out_prefix_restart; 3237 if (subcode == ERR_CEASE_MAX_PREFIX) 3238 t = p->conf.max_prefix_restart; 3239 3240 bgp_fsm(p, EVNT_STOP); 3241 if (t) 3242 timer_set(&p->timers, 3243 Timer_IdleHold, 60 * t); 3244 break; 3245 default: 3246 bgp_fsm(p, EVNT_CON_FATAL); 3247 break; 3248 } 3249 break; 3250 default: 3251 bgp_fsm(p, EVNT_CON_FATAL); 3252 break; 3253 } 3254 break; 3255 case IMSG_REFRESH: 3256 if (idx != PFD_PIPE_ROUTE) 3257 fatalx("route refresh request not from RDE"); 3258 if (imsg_get_data(&imsg, &rr, sizeof(rr)) == -1) { 3259 log_warnx("RDE sent invalid refresh msg"); 3260 break; 3261 } 3262 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3263 log_warnx("no such peer: id=%u", peerid); 3264 break; 3265 } 3266 if (rr.aid < AID_MIN || rr.aid >= AID_MAX) 3267 fatalx("IMSG_REFRESH: bad AID"); 3268 session_rrefresh(p, rr.aid, rr.subtype); 3269 break; 3270 case IMSG_SESSION_RESTARTED: 3271 if (idx != PFD_PIPE_ROUTE) 3272 fatalx("session restart not from RDE"); 3273 if (imsg_get_data(&imsg, &aid, sizeof(aid)) == -1) { 3274 log_warnx("RDE sent invalid restart msg"); 3275 break; 3276 } 3277 if ((p = getpeerbyid(conf, peerid)) == NULL) { 3278 log_warnx("no such peer: id=%u", peerid); 3279 break; 3280 } 3281 if (aid < AID_MIN || aid >= AID_MAX) 3282 fatalx("IMSG_SESSION_RESTARTED: bad AID"); 3283 if (p->capa.neg.grestart.flags[aid] & 3284 CAPA_GR_RESTARTING) { 3285 log_peer_warnx(&p->conf, 3286 "graceful restart of %s finished", 3287 aid2str(aid)); 3288 p->capa.neg.grestart.flags[aid] &= 3289 ~CAPA_GR_RESTARTING; 3290 timer_stop(&p->timers, Timer_RestartTimeout); 3291 3292 /* signal back to RDE to cleanup stale routes */ 3293 if (imsg_rde(IMSG_SESSION_RESTARTED, 3294 peerid, &aid, sizeof(aid)) == -1) 3295 fatal("imsg_compose: " 3296 "IMSG_SESSION_RESTARTED"); 3297 } 3298 break; 3299 default: 3300 break; 3301 } 3302 imsg_free(&imsg); 3303 } 3304 } 3305 3306 int 3307 la_cmp(struct listen_addr *a, struct listen_addr *b) 3308 { 3309 struct sockaddr_in *in_a, *in_b; 3310 struct sockaddr_in6 *in6_a, *in6_b; 3311 3312 if (a->sa.ss_family != b->sa.ss_family) 3313 return (1); 3314 3315 switch (a->sa.ss_family) { 3316 case AF_INET: 3317 in_a = (struct sockaddr_in *)&a->sa; 3318 in_b = (struct sockaddr_in *)&b->sa; 3319 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr) 3320 return (1); 3321 if (in_a->sin_port != in_b->sin_port) 3322 return (1); 3323 break; 3324 case AF_INET6: 3325 in6_a = (struct sockaddr_in6 *)&a->sa; 3326 in6_b = (struct sockaddr_in6 *)&b->sa; 3327 if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr, 3328 sizeof(struct in6_addr))) 3329 return (1); 3330 if (in6_a->sin6_port != in6_b->sin6_port) 3331 return (1); 3332 break; 3333 default: 3334 fatal("king bula sez: unknown address family"); 3335 /* NOTREACHED */ 3336 } 3337 3338 return (0); 3339 } 3340 3341 struct peer * 3342 getpeerbydesc(struct bgpd_config *c, const char *descr) 3343 { 3344 struct peer *p, *res = NULL; 3345 int match = 0; 3346 3347 RB_FOREACH(p, peer_head, &c->peers) 3348 if (!strcmp(p->conf.descr, descr)) { 3349 res = p; 3350 match++; 3351 } 3352 3353 if (match > 1) 3354 log_info("neighbor description \"%s\" not unique, request " 3355 "aborted", descr); 3356 3357 if (match == 1) 3358 return (res); 3359 else 3360 return (NULL); 3361 } 3362 3363 struct peer * 3364 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip) 3365 { 3366 struct bgpd_addr addr; 3367 struct peer *p, *newpeer, *loose = NULL; 3368 uint32_t id; 3369 3370 sa2addr(ip, &addr, NULL); 3371 3372 /* we might want a more effective way to find peers by IP */ 3373 RB_FOREACH(p, peer_head, &c->peers) 3374 if (!p->conf.template && 3375 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr))) 3376 return (p); 3377 3378 /* try template matching */ 3379 RB_FOREACH(p, peer_head, &c->peers) 3380 if (p->conf.template && 3381 p->conf.remote_addr.aid == addr.aid && 3382 session_match_mask(p, &addr)) 3383 if (loose == NULL || loose->conf.remote_masklen < 3384 p->conf.remote_masklen) 3385 loose = p; 3386 3387 if (loose != NULL) { 3388 /* clone */ 3389 if ((newpeer = malloc(sizeof(struct peer))) == NULL) 3390 fatal(NULL); 3391 memcpy(newpeer, loose, sizeof(struct peer)); 3392 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) { 3393 if (getpeerbyid(c, id) == NULL) /* we found a free id */ 3394 break; 3395 } 3396 newpeer->template = loose; 3397 session_template_clone(newpeer, ip, id, 0); 3398 newpeer->state = newpeer->prev_state = STATE_NONE; 3399 newpeer->reconf_action = RECONF_KEEP; 3400 newpeer->rbuf = NULL; 3401 newpeer->rpending = 0; 3402 init_peer(newpeer); 3403 bgp_fsm(newpeer, EVNT_START); 3404 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL) 3405 fatalx("%s: peer tree is corrupt", __func__); 3406 return (newpeer); 3407 } 3408 3409 return (NULL); 3410 } 3411 3412 struct peer * 3413 getpeerbyid(struct bgpd_config *c, uint32_t peerid) 3414 { 3415 static struct peer lookup; 3416 3417 lookup.conf.id = peerid; 3418 3419 return RB_FIND(peer_head, &c->peers, &lookup); 3420 } 3421 3422 int 3423 peer_matched(struct peer *p, struct ctl_neighbor *n) 3424 { 3425 char *s; 3426 3427 if (n && n->addr.aid) { 3428 if (memcmp(&p->conf.remote_addr, &n->addr, 3429 sizeof(p->conf.remote_addr))) 3430 return 0; 3431 } else if (n && n->descr[0]) { 3432 s = n->is_group ? p->conf.group : p->conf.descr; 3433 /* cannot trust n->descr to be properly terminated */ 3434 if (strncmp(s, n->descr, sizeof(n->descr))) 3435 return 0; 3436 } 3437 return 1; 3438 } 3439 3440 void 3441 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id, 3442 uint32_t as) 3443 { 3444 struct bgpd_addr remote_addr; 3445 3446 if (ip) 3447 sa2addr(ip, &remote_addr, NULL); 3448 else 3449 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr)); 3450 3451 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config)); 3452 3453 p->conf.id = id; 3454 3455 if (as) { 3456 p->conf.remote_as = as; 3457 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as); 3458 if (!p->conf.ebgp) 3459 /* force enforce_as off for iBGP sessions */ 3460 p->conf.enforce_as = ENFORCE_AS_OFF; 3461 } 3462 3463 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr)); 3464 switch (p->conf.remote_addr.aid) { 3465 case AID_INET: 3466 p->conf.remote_masklen = 32; 3467 break; 3468 case AID_INET6: 3469 p->conf.remote_masklen = 128; 3470 break; 3471 } 3472 p->conf.template = 0; 3473 } 3474 3475 int 3476 session_match_mask(struct peer *p, struct bgpd_addr *a) 3477 { 3478 struct bgpd_addr masked; 3479 3480 applymask(&masked, a, p->conf.remote_masklen); 3481 if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0) 3482 return (1); 3483 return (0); 3484 } 3485 3486 void 3487 session_down(struct peer *peer) 3488 { 3489 memset(&peer->capa.neg, 0, sizeof(peer->capa.neg)); 3490 peer->stats.last_updown = getmonotime(); 3491 /* 3492 * session_down is called in the exit code path so check 3493 * if the RDE is still around, if not there is no need to 3494 * send the message. 3495 */ 3496 if (ibuf_rde == NULL) 3497 return; 3498 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1) 3499 fatalx("imsg_compose error"); 3500 } 3501 3502 void 3503 session_up(struct peer *p) 3504 { 3505 struct session_up sup; 3506 3507 /* clear last errors, now that the session is up */ 3508 p->stats.last_sent_errcode = 0; 3509 p->stats.last_sent_suberr = 0; 3510 p->stats.last_rcvd_errcode = 0; 3511 p->stats.last_rcvd_suberr = 0; 3512 memset(p->stats.last_reason, 0, sizeof(p->stats.last_reason)); 3513 3514 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3515 &p->conf, sizeof(p->conf)) == -1) 3516 fatalx("imsg_compose error"); 3517 3518 if (p->local.aid == AID_INET) { 3519 sup.local_v4_addr = p->local; 3520 sup.local_v6_addr = p->local_alt; 3521 } else { 3522 sup.local_v6_addr = p->local; 3523 sup.local_v4_addr = p->local_alt; 3524 } 3525 sup.remote_addr = p->remote; 3526 sup.if_scope = p->if_scope; 3527 3528 sup.remote_bgpid = p->remote_bgpid; 3529 sup.short_as = p->short_as; 3530 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa)); 3531 p->stats.last_updown = getmonotime(); 3532 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1) 3533 fatalx("imsg_compose error"); 3534 } 3535 3536 int 3537 imsg_ctl_parent(struct imsg *imsg) 3538 { 3539 return imsg_forward(ibuf_main, imsg); 3540 } 3541 3542 int 3543 imsg_ctl_rde(struct imsg *imsg) 3544 { 3545 if (ibuf_rde_ctl == NULL) 3546 return (0); 3547 /* 3548 * Use control socket to talk to RDE to bypass the queue of the 3549 * regular imsg socket. 3550 */ 3551 return imsg_forward(ibuf_rde_ctl, imsg); 3552 } 3553 3554 int 3555 imsg_ctl_rde_msg(int type, uint32_t peerid, pid_t pid) 3556 { 3557 if (ibuf_rde_ctl == NULL) 3558 return (0); 3559 3560 /* 3561 * Use control socket to talk to RDE to bypass the queue of the 3562 * regular imsg socket. 3563 */ 3564 return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, NULL, 0); 3565 } 3566 3567 int 3568 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen) 3569 { 3570 if (ibuf_rde == NULL) 3571 return (0); 3572 3573 return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen); 3574 } 3575 3576 void 3577 session_demote(struct peer *p, int level) 3578 { 3579 struct demote_msg msg; 3580 3581 strlcpy(msg.demote_group, p->conf.demote_group, 3582 sizeof(msg.demote_group)); 3583 msg.level = level; 3584 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1, 3585 &msg, sizeof(msg)) == -1) 3586 fatalx("imsg_compose error"); 3587 3588 p->demoted += level; 3589 } 3590 3591 void 3592 session_stop(struct peer *peer, uint8_t subcode, const char *reason) 3593 { 3594 struct ibuf *ibuf; 3595 3596 if (reason != NULL) 3597 strlcpy(peer->conf.reason, reason, sizeof(peer->conf.reason)); 3598 3599 ibuf = ibuf_dynamic(0, REASON_LEN); 3600 3601 if ((subcode == ERR_CEASE_ADMIN_DOWN || 3602 subcode == ERR_CEASE_ADMIN_RESET) && 3603 reason != NULL && *reason != '\0' && 3604 ibuf != NULL) { 3605 if (ibuf_add_n8(ibuf, strlen(reason)) == -1 || 3606 ibuf_add(ibuf, reason, strlen(reason))) { 3607 log_peer_warnx(&peer->conf, 3608 "trying to send overly long shutdown reason"); 3609 ibuf_free(ibuf); 3610 ibuf = NULL; 3611 } 3612 } 3613 switch (peer->state) { 3614 case STATE_OPENSENT: 3615 case STATE_OPENCONFIRM: 3616 case STATE_ESTABLISHED: 3617 session_notification(peer, ERR_CEASE, subcode, ibuf); 3618 break; 3619 default: 3620 /* session not open, no need to send notification */ 3621 if (subcode >= sizeof(suberr_cease_names) / sizeof(char *) || 3622 suberr_cease_names[subcode] == NULL) 3623 log_peer_warnx(&peer->conf, "session stop: %s, " 3624 "unknown subcode %u", errnames[ERR_CEASE], subcode); 3625 else 3626 log_peer_warnx(&peer->conf, "session stop: %s, %s", 3627 errnames[ERR_CEASE], suberr_cease_names[subcode]); 3628 break; 3629 } 3630 ibuf_free(ibuf); 3631 bgp_fsm(peer, EVNT_STOP); 3632 } 3633 3634 void 3635 merge_peers(struct bgpd_config *c, struct bgpd_config *nc) 3636 { 3637 struct peer *p, *np, *next; 3638 3639 RB_FOREACH(p, peer_head, &c->peers) { 3640 /* templates are handled specially */ 3641 if (p->template != NULL) 3642 continue; 3643 np = getpeerbyid(nc, p->conf.id); 3644 if (np == NULL) { 3645 p->reconf_action = RECONF_DELETE; 3646 continue; 3647 } 3648 3649 /* peer no longer uses TCP MD5SIG so deconfigure */ 3650 if (p->conf.auth.method == AUTH_MD5SIG && 3651 np->conf.auth.method != AUTH_MD5SIG) 3652 tcp_md5_del_listener(c, p); 3653 else if (np->conf.auth.method == AUTH_MD5SIG) 3654 tcp_md5_add_listener(c, np); 3655 3656 memcpy(&p->conf, &np->conf, sizeof(p->conf)); 3657 RB_REMOVE(peer_head, &nc->peers, np); 3658 free(np); 3659 3660 p->reconf_action = RECONF_KEEP; 3661 3662 /* had demotion, is demoted, demote removed? */ 3663 if (p->demoted && !p->conf.demote_group[0]) 3664 session_demote(p, -1); 3665 3666 /* if session is not open then refresh pfkey data */ 3667 if (p->state < STATE_OPENSENT && !p->template) 3668 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD, 3669 p->conf.id, 0, -1, NULL, 0); 3670 3671 /* sync the RDE in case we keep the peer */ 3672 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id, 3673 &p->conf, sizeof(struct peer_config)) == -1) 3674 fatalx("imsg_compose error"); 3675 3676 /* apply the config to all clones of a template */ 3677 if (p->conf.template) { 3678 struct peer *xp; 3679 RB_FOREACH(xp, peer_head, &c->peers) { 3680 if (xp->template != p) 3681 continue; 3682 session_template_clone(xp, NULL, xp->conf.id, 3683 xp->conf.remote_as); 3684 if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id, 3685 &xp->conf, sizeof(xp->conf)) == -1) 3686 fatalx("imsg_compose error"); 3687 } 3688 } 3689 } 3690 3691 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1) 3692 fatalx("imsg_compose error"); 3693 3694 /* pfkeys of new peers already loaded by the parent process */ 3695 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) { 3696 RB_REMOVE(peer_head, &nc->peers, np); 3697 if (RB_INSERT(peer_head, &c->peers, np) != NULL) 3698 fatalx("%s: peer tree is corrupt", __func__); 3699 if (np->conf.auth.method == AUTH_MD5SIG) 3700 tcp_md5_add_listener(c, np); 3701 } 3702 } 3703