xref: /openbsd/usr.sbin/bgpd/session.c (revision 9e6efb0a)
1 /*	$OpenBSD: session.c,v 1.478 2024/05/22 08:41:14 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <ifaddrs.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <unistd.h>
45 
46 #include "bgpd.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_LISTENERS_START	5
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_sendholdtime(struct peer *);
62 void	start_timer_keepalive(struct peer *);
63 void	session_close_connection(struct peer *);
64 void	change_state(struct peer *, enum session_state, enum session_events);
65 int	session_setup_socket(struct peer *);
66 void	session_accept(int);
67 int	session_connect(struct peer *);
68 void	session_tcp_established(struct peer *);
69 int	session_capa_add(struct ibuf *, uint8_t, uint8_t);
70 int	session_capa_add_mp(struct ibuf *, uint8_t);
71 int	session_capa_add_afi(struct ibuf *, uint8_t, uint8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, uint16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(uint32_t, struct ibuf *);
77 void	session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *);
78 void	session_notification_data(struct peer *, uint8_t, uint8_t, void *,
79 	    size_t);
80 void	session_rrefresh(struct peer *, uint8_t, uint8_t);
81 int	session_graceful_restart(struct peer *);
82 int	session_graceful_stop(struct peer *);
83 int	session_dispatch_msg(struct pollfd *, struct peer *);
84 void	session_process_msg(struct peer *);
85 int	parse_header(struct peer *, u_char *, uint16_t *, uint8_t *);
86 int	parse_open(struct peer *);
87 int	parse_update(struct peer *);
88 int	parse_rrefresh(struct peer *);
89 void	parse_notification(struct peer *);
90 int	parse_capabilities(struct peer *, struct ibuf *, uint32_t *);
91 int	capa_neg_calc(struct peer *);
92 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
93 void	session_up(struct peer *);
94 void	session_down(struct peer *);
95 int	imsg_rde(int, uint32_t, void *, uint16_t);
96 void	session_demote(struct peer *, int);
97 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
98 
99 int		 la_cmp(struct listen_addr *, struct listen_addr *);
100 void		 session_template_clone(struct peer *, struct sockaddr *,
101 		    uint32_t, uint32_t);
102 int		 session_match_mask(struct peer *, struct bgpd_addr *);
103 
104 static struct bgpd_config	*conf, *nconf;
105 static struct imsgbuf		*ibuf_rde;
106 static struct imsgbuf		*ibuf_rde_ctl;
107 static struct imsgbuf		*ibuf_main;
108 
109 struct bgpd_sysdep	 sysdep;
110 volatile sig_atomic_t	 session_quit;
111 int			 pending_reconf;
112 int			 csock = -1, rcsock = -1;
113 u_int			 peer_cnt;
114 
115 struct mrt_head		 mrthead;
116 time_t			 pauseaccept;
117 
118 static const uint8_t	 marker[MSGSIZE_HEADER_MARKER] = {
119 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
120 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
121 };
122 
123 static inline int
124 peer_compare(const struct peer *a, const struct peer *b)
125 {
126 	return a->conf.id - b->conf.id;
127 }
128 
129 RB_GENERATE(peer_head, peer, entry, peer_compare);
130 
131 void
132 session_sighdlr(int sig)
133 {
134 	switch (sig) {
135 	case SIGINT:
136 	case SIGTERM:
137 		session_quit = 1;
138 		break;
139 	}
140 }
141 
142 int
143 setup_listeners(u_int *la_cnt)
144 {
145 	int			 ttl = 255;
146 	struct listen_addr	*la;
147 	u_int			 cnt = 0;
148 
149 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
150 		la->reconf = RECONF_NONE;
151 		cnt++;
152 
153 		if (la->flags & LISTENER_LISTENING)
154 			continue;
155 
156 		if (la->fd == -1) {
157 			log_warn("cannot establish listener on %s: invalid fd",
158 			    log_sockaddr((struct sockaddr *)&la->sa,
159 			    la->sa_len));
160 			continue;
161 		}
162 
163 		if (tcp_md5_prep_listener(la, &conf->peers) == -1)
164 			fatal("tcp_md5_prep_listener");
165 
166 		/* set ttl to 255 so that ttl-security works */
167 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
168 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
169 			log_warn("setup_listeners setsockopt TTL");
170 			continue;
171 		}
172 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
173 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
174 			log_warn("setup_listeners setsockopt hoplimit");
175 			continue;
176 		}
177 
178 		if (listen(la->fd, MAX_BACKLOG)) {
179 			close(la->fd);
180 			fatal("listen");
181 		}
182 
183 		la->flags |= LISTENER_LISTENING;
184 
185 		log_info("listening on %s",
186 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
187 	}
188 
189 	*la_cnt = cnt;
190 
191 	return (0);
192 }
193 
194 void
195 session_main(int debug, int verbose)
196 {
197 	int			 timeout;
198 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
199 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
200 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
201 	u_int			 new_cnt;
202 	struct passwd		*pw;
203 	struct peer		*p, **peer_l = NULL, *next;
204 	struct mrt		*m, *xm, **mrt_l = NULL;
205 	struct pollfd		*pfd = NULL;
206 	struct listen_addr	*la;
207 	void			*newp;
208 	time_t			 now;
209 	short			 events;
210 
211 	log_init(debug, LOG_DAEMON);
212 	log_setverbose(verbose);
213 
214 	log_procinit(log_procnames[PROC_SE]);
215 
216 	if ((pw = getpwnam(BGPD_USER)) == NULL)
217 		fatal(NULL);
218 
219 	if (chroot(pw->pw_dir) == -1)
220 		fatal("chroot");
221 	if (chdir("/") == -1)
222 		fatal("chdir(\"/\")");
223 
224 	setproctitle("session engine");
225 
226 	if (setgroups(1, &pw->pw_gid) ||
227 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
228 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
229 		fatal("can't drop privileges");
230 
231 	if (pledge("stdio inet recvfd", NULL) == -1)
232 		fatal("pledge");
233 
234 	signal(SIGTERM, session_sighdlr);
235 	signal(SIGINT, session_sighdlr);
236 	signal(SIGPIPE, SIG_IGN);
237 	signal(SIGHUP, SIG_IGN);
238 	signal(SIGALRM, SIG_IGN);
239 	signal(SIGUSR1, SIG_IGN);
240 
241 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
242 		fatal(NULL);
243 	imsg_init(ibuf_main, 3);
244 
245 	LIST_INIT(&mrthead);
246 	listener_cnt = 0;
247 	peer_cnt = 0;
248 	ctl_cnt = 0;
249 
250 	conf = new_config();
251 	log_info("session engine ready");
252 
253 	while (session_quit == 0) {
254 		/* check for peers to be initialized or deleted */
255 		if (!pending_reconf) {
256 			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
257 				/* cloned peer that idled out? */
258 				if (p->template && (p->state == STATE_IDLE ||
259 				    p->state == STATE_ACTIVE) &&
260 				    getmonotime() - p->stats.last_updown >=
261 				    INTERVAL_HOLD_CLONED)
262 					p->reconf_action = RECONF_DELETE;
263 
264 				/* new peer that needs init? */
265 				if (p->state == STATE_NONE)
266 					init_peer(p);
267 
268 				/* deletion due? */
269 				if (p->reconf_action == RECONF_DELETE) {
270 					if (p->demoted)
271 						session_demote(p, -1);
272 					p->conf.demote_group[0] = 0;
273 					session_stop(p, ERR_CEASE_PEER_UNCONF,
274 					    NULL);
275 					timer_remove_all(&p->timers);
276 					tcp_md5_del_listener(conf, p);
277 					RB_REMOVE(peer_head, &conf->peers, p);
278 					log_peer_warnx(&p->conf, "removed");
279 					free(p);
280 					peer_cnt--;
281 					continue;
282 				}
283 				p->reconf_action = RECONF_NONE;
284 			}
285 		}
286 
287 		if (peer_cnt > peer_l_elms) {
288 			if ((newp = reallocarray(peer_l, peer_cnt,
289 			    sizeof(struct peer *))) == NULL) {
290 				/* panic for now */
291 				log_warn("could not resize peer_l from %u -> %u"
292 				    " entries", peer_l_elms, peer_cnt);
293 				fatalx("exiting");
294 			}
295 			peer_l = newp;
296 			peer_l_elms = peer_cnt;
297 		}
298 
299 		mrt_cnt = 0;
300 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
301 			xm = LIST_NEXT(m, entry);
302 			if (m->state == MRT_STATE_REMOVE) {
303 				mrt_clean(m);
304 				LIST_REMOVE(m, entry);
305 				free(m);
306 				continue;
307 			}
308 			if (m->wbuf.queued)
309 				mrt_cnt++;
310 		}
311 
312 		if (mrt_cnt > mrt_l_elms) {
313 			if ((newp = reallocarray(mrt_l, mrt_cnt,
314 			    sizeof(struct mrt *))) == NULL) {
315 				/* panic for now */
316 				log_warn("could not resize mrt_l from %u -> %u"
317 				    " entries", mrt_l_elms, mrt_cnt);
318 				fatalx("exiting");
319 			}
320 			mrt_l = newp;
321 			mrt_l_elms = mrt_cnt;
322 		}
323 
324 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
325 		    ctl_cnt + mrt_cnt;
326 		if (new_cnt > pfd_elms) {
327 			if ((newp = reallocarray(pfd, new_cnt,
328 			    sizeof(struct pollfd))) == NULL) {
329 				/* panic for now */
330 				log_warn("could not resize pfd from %u -> %u"
331 				    " entries", pfd_elms, new_cnt);
332 				fatalx("exiting");
333 			}
334 			pfd = newp;
335 			pfd_elms = new_cnt;
336 		}
337 
338 		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
339 
340 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
341 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
342 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
343 
344 		if (pauseaccept == 0) {
345 			pfd[PFD_SOCK_CTL].fd = csock;
346 			pfd[PFD_SOCK_CTL].events = POLLIN;
347 			pfd[PFD_SOCK_RCTL].fd = rcsock;
348 			pfd[PFD_SOCK_RCTL].events = POLLIN;
349 		} else {
350 			pfd[PFD_SOCK_CTL].fd = -1;
351 			pfd[PFD_SOCK_RCTL].fd = -1;
352 		}
353 
354 		i = PFD_LISTENERS_START;
355 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
356 			if (pauseaccept == 0) {
357 				pfd[i].fd = la->fd;
358 				pfd[i].events = POLLIN;
359 			} else
360 				pfd[i].fd = -1;
361 			i++;
362 		}
363 		idx_listeners = i;
364 		timeout = 240;	/* loop every 240s at least */
365 
366 		now = getmonotime();
367 		RB_FOREACH(p, peer_head, &conf->peers) {
368 			time_t	nextaction;
369 			struct timer *pt;
370 
371 			/* check timers */
372 			if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
373 				switch (pt->type) {
374 				case Timer_Hold:
375 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
376 					break;
377 				case Timer_SendHold:
378 					bgp_fsm(p, EVNT_TIMER_SENDHOLD);
379 					break;
380 				case Timer_ConnectRetry:
381 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
382 					break;
383 				case Timer_Keepalive:
384 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
385 					break;
386 				case Timer_IdleHold:
387 					bgp_fsm(p, EVNT_START);
388 					break;
389 				case Timer_IdleHoldReset:
390 					p->IdleHoldTime =
391 					    INTERVAL_IDLE_HOLD_INITIAL;
392 					p->errcnt = 0;
393 					timer_stop(&p->timers,
394 					    Timer_IdleHoldReset);
395 					break;
396 				case Timer_CarpUndemote:
397 					timer_stop(&p->timers,
398 					    Timer_CarpUndemote);
399 					if (p->demoted &&
400 					    p->state == STATE_ESTABLISHED)
401 						session_demote(p, -1);
402 					break;
403 				case Timer_RestartTimeout:
404 					timer_stop(&p->timers,
405 					    Timer_RestartTimeout);
406 					session_graceful_stop(p);
407 					break;
408 				default:
409 					fatalx("King Bula lost in time");
410 				}
411 			}
412 			if ((nextaction = timer_nextduein(&p->timers,
413 			    now)) != -1 && nextaction < timeout)
414 				timeout = nextaction;
415 
416 			/* are we waiting for a write? */
417 			events = POLLIN;
418 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
419 				events |= POLLOUT;
420 			/* is there still work to do? */
421 			if (p->rpending && p->rbuf && p->rbuf->wpos)
422 				timeout = 0;
423 
424 			/* poll events */
425 			if (p->fd != -1 && events != 0) {
426 				pfd[i].fd = p->fd;
427 				pfd[i].events = events;
428 				peer_l[i - idx_listeners] = p;
429 				i++;
430 			}
431 		}
432 
433 		idx_peers = i;
434 
435 		LIST_FOREACH(m, &mrthead, entry)
436 			if (m->wbuf.queued) {
437 				pfd[i].fd = m->wbuf.fd;
438 				pfd[i].events = POLLOUT;
439 				mrt_l[i - idx_peers] = m;
440 				i++;
441 			}
442 
443 		idx_mrts = i;
444 
445 		i += control_fill_pfds(pfd + i, pfd_elms -i);
446 
447 		if (i > pfd_elms)
448 			fatalx("poll pfd overflow");
449 
450 		if (pauseaccept && timeout > 1)
451 			timeout = 1;
452 		if (timeout < 0)
453 			timeout = 0;
454 		if (poll(pfd, i, timeout * 1000) == -1) {
455 			if (errno == EINTR)
456 				continue;
457 			fatal("poll error");
458 		}
459 
460 		/*
461 		 * If we previously saw fd exhaustion, we stop accept()
462 		 * for 1 second to throttle the accept() loop.
463 		 */
464 		if (pauseaccept && getmonotime() > pauseaccept + 1)
465 			pauseaccept = 0;
466 
467 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
468 			log_warnx("SE: Lost connection to parent");
469 			session_quit = 1;
470 			continue;
471 		} else
472 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
473 			    &listener_cnt);
474 
475 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
476 			log_warnx("SE: Lost connection to RDE");
477 			msgbuf_clear(&ibuf_rde->w);
478 			free(ibuf_rde);
479 			ibuf_rde = NULL;
480 		} else
481 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
482 			    &listener_cnt);
483 
484 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
485 		    -1) {
486 			log_warnx("SE: Lost connection to RDE control");
487 			msgbuf_clear(&ibuf_rde_ctl->w);
488 			free(ibuf_rde_ctl);
489 			ibuf_rde_ctl = NULL;
490 		} else
491 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
492 			    &listener_cnt);
493 
494 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
495 			ctl_cnt += control_accept(csock, 0);
496 
497 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
498 			ctl_cnt += control_accept(rcsock, 1);
499 
500 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
501 			if (pfd[j].revents & POLLIN)
502 				session_accept(pfd[j].fd);
503 
504 		for (; j < idx_peers; j++)
505 			session_dispatch_msg(&pfd[j],
506 			    peer_l[j - idx_listeners]);
507 
508 		RB_FOREACH(p, peer_head, &conf->peers)
509 			if (p->rbuf && p->rbuf->wpos)
510 				session_process_msg(p);
511 
512 		for (; j < idx_mrts; j++)
513 			if (pfd[j].revents & POLLOUT)
514 				mrt_write(mrt_l[j - idx_peers]);
515 
516 		for (; j < i; j++)
517 			ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers);
518 	}
519 
520 	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
521 		session_stop(p, ERR_CEASE_ADMIN_DOWN, "bgpd shutting down");
522 		timer_remove_all(&p->timers);
523 		tcp_md5_del_listener(conf, p);
524 		RB_REMOVE(peer_head, &conf->peers, p);
525 		free(p);
526 	}
527 
528 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
529 		mrt_clean(m);
530 		LIST_REMOVE(m, entry);
531 		free(m);
532 	}
533 
534 	free_config(conf);
535 	free(peer_l);
536 	free(mrt_l);
537 	free(pfd);
538 
539 	/* close pipes */
540 	if (ibuf_rde) {
541 		msgbuf_write(&ibuf_rde->w);
542 		msgbuf_clear(&ibuf_rde->w);
543 		close(ibuf_rde->fd);
544 		free(ibuf_rde);
545 	}
546 	if (ibuf_rde_ctl) {
547 		msgbuf_clear(&ibuf_rde_ctl->w);
548 		close(ibuf_rde_ctl->fd);
549 		free(ibuf_rde_ctl);
550 	}
551 	msgbuf_write(&ibuf_main->w);
552 	msgbuf_clear(&ibuf_main->w);
553 	close(ibuf_main->fd);
554 	free(ibuf_main);
555 
556 	control_shutdown(csock);
557 	control_shutdown(rcsock);
558 	log_info("session engine exiting");
559 	exit(0);
560 }
561 
562 void
563 init_peer(struct peer *p)
564 {
565 	TAILQ_INIT(&p->timers);
566 	p->fd = p->wbuf.fd = -1;
567 
568 	if (p->conf.if_depend[0])
569 		imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1,
570 		    p->conf.if_depend, sizeof(p->conf.if_depend));
571 	else
572 		p->depend_ok = 1;
573 
574 	peer_cnt++;
575 
576 	change_state(p, STATE_IDLE, EVNT_NONE);
577 	if (p->conf.down)
578 		timer_stop(&p->timers, Timer_IdleHold); /* no autostart */
579 	else
580 		timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY);
581 
582 	p->stats.last_updown = getmonotime();
583 
584 	/*
585 	 * on startup, demote if requested.
586 	 * do not handle new peers. they must reach ESTABLISHED beforehand.
587 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
588 	 */
589 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
590 		session_demote(p, +1);
591 }
592 
593 void
594 bgp_fsm(struct peer *peer, enum session_events event)
595 {
596 	switch (peer->state) {
597 	case STATE_NONE:
598 		/* nothing */
599 		break;
600 	case STATE_IDLE:
601 		switch (event) {
602 		case EVNT_START:
603 			timer_stop(&peer->timers, Timer_Hold);
604 			timer_stop(&peer->timers, Timer_SendHold);
605 			timer_stop(&peer->timers, Timer_Keepalive);
606 			timer_stop(&peer->timers, Timer_IdleHold);
607 
608 			/* allocate read buffer */
609 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
610 			if (peer->rbuf == NULL)
611 				fatal(NULL);
612 
613 			/* init write buffer */
614 			msgbuf_init(&peer->wbuf);
615 
616 			if (!peer->depend_ok)
617 				timer_stop(&peer->timers, Timer_ConnectRetry);
618 			else if (peer->passive || peer->conf.passive ||
619 			    peer->conf.template) {
620 				change_state(peer, STATE_ACTIVE, event);
621 				timer_stop(&peer->timers, Timer_ConnectRetry);
622 			} else {
623 				change_state(peer, STATE_CONNECT, event);
624 				timer_set(&peer->timers, Timer_ConnectRetry,
625 				    conf->connectretry);
626 				session_connect(peer);
627 			}
628 			peer->passive = 0;
629 			break;
630 		case EVNT_STOP:
631 			timer_stop(&peer->timers, Timer_IdleHold);
632 			break;
633 		default:
634 			/* ignore */
635 			break;
636 		}
637 		break;
638 	case STATE_CONNECT:
639 		switch (event) {
640 		case EVNT_START:
641 			/* ignore */
642 			break;
643 		case EVNT_CON_OPEN:
644 			session_tcp_established(peer);
645 			session_open(peer);
646 			timer_stop(&peer->timers, Timer_ConnectRetry);
647 			peer->holdtime = INTERVAL_HOLD_INITIAL;
648 			start_timer_holdtime(peer);
649 			change_state(peer, STATE_OPENSENT, event);
650 			break;
651 		case EVNT_CON_OPENFAIL:
652 			timer_set(&peer->timers, Timer_ConnectRetry,
653 			    conf->connectretry);
654 			session_close_connection(peer);
655 			change_state(peer, STATE_ACTIVE, event);
656 			break;
657 		case EVNT_TIMER_CONNRETRY:
658 			timer_set(&peer->timers, Timer_ConnectRetry,
659 			    conf->connectretry);
660 			session_connect(peer);
661 			break;
662 		default:
663 			change_state(peer, STATE_IDLE, event);
664 			break;
665 		}
666 		break;
667 	case STATE_ACTIVE:
668 		switch (event) {
669 		case EVNT_START:
670 			/* ignore */
671 			break;
672 		case EVNT_CON_OPEN:
673 			session_tcp_established(peer);
674 			session_open(peer);
675 			timer_stop(&peer->timers, Timer_ConnectRetry);
676 			peer->holdtime = INTERVAL_HOLD_INITIAL;
677 			start_timer_holdtime(peer);
678 			change_state(peer, STATE_OPENSENT, event);
679 			break;
680 		case EVNT_CON_OPENFAIL:
681 			timer_set(&peer->timers, Timer_ConnectRetry,
682 			    conf->connectretry);
683 			session_close_connection(peer);
684 			change_state(peer, STATE_ACTIVE, event);
685 			break;
686 		case EVNT_TIMER_CONNRETRY:
687 			timer_set(&peer->timers, Timer_ConnectRetry,
688 			    peer->holdtime);
689 			change_state(peer, STATE_CONNECT, event);
690 			session_connect(peer);
691 			break;
692 		default:
693 			change_state(peer, STATE_IDLE, event);
694 			break;
695 		}
696 		break;
697 	case STATE_OPENSENT:
698 		switch (event) {
699 		case EVNT_START:
700 			/* ignore */
701 			break;
702 		case EVNT_STOP:
703 			change_state(peer, STATE_IDLE, event);
704 			break;
705 		case EVNT_CON_CLOSED:
706 			session_close_connection(peer);
707 			timer_set(&peer->timers, Timer_ConnectRetry,
708 			    conf->connectretry);
709 			change_state(peer, STATE_ACTIVE, event);
710 			break;
711 		case EVNT_CON_FATAL:
712 			change_state(peer, STATE_IDLE, event);
713 			break;
714 		case EVNT_TIMER_HOLDTIME:
715 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
716 			    0, NULL);
717 			change_state(peer, STATE_IDLE, event);
718 			break;
719 		case EVNT_TIMER_SENDHOLD:
720 			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
721 			    0, NULL);
722 			change_state(peer, STATE_IDLE, event);
723 			break;
724 		case EVNT_RCVD_OPEN:
725 			/* parse_open calls change_state itself on failure */
726 			if (parse_open(peer))
727 				break;
728 			session_keepalive(peer);
729 			change_state(peer, STATE_OPENCONFIRM, event);
730 			break;
731 		case EVNT_RCVD_NOTIFICATION:
732 			parse_notification(peer);
733 			break;
734 		default:
735 			session_notification(peer,
736 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL);
737 			change_state(peer, STATE_IDLE, event);
738 			break;
739 		}
740 		break;
741 	case STATE_OPENCONFIRM:
742 		switch (event) {
743 		case EVNT_START:
744 			/* ignore */
745 			break;
746 		case EVNT_STOP:
747 			change_state(peer, STATE_IDLE, event);
748 			break;
749 		case EVNT_CON_CLOSED:
750 		case EVNT_CON_FATAL:
751 			change_state(peer, STATE_IDLE, event);
752 			break;
753 		case EVNT_TIMER_HOLDTIME:
754 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
755 			    0, NULL);
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_TIMER_SENDHOLD:
759 			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
760 			    0, NULL);
761 			change_state(peer, STATE_IDLE, event);
762 			break;
763 		case EVNT_TIMER_KEEPALIVE:
764 			session_keepalive(peer);
765 			break;
766 		case EVNT_RCVD_KEEPALIVE:
767 			start_timer_holdtime(peer);
768 			change_state(peer, STATE_ESTABLISHED, event);
769 			break;
770 		case EVNT_RCVD_NOTIFICATION:
771 			parse_notification(peer);
772 			break;
773 		default:
774 			session_notification(peer,
775 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL);
776 			change_state(peer, STATE_IDLE, event);
777 			break;
778 		}
779 		break;
780 	case STATE_ESTABLISHED:
781 		switch (event) {
782 		case EVNT_START:
783 			/* ignore */
784 			break;
785 		case EVNT_STOP:
786 			change_state(peer, STATE_IDLE, event);
787 			break;
788 		case EVNT_CON_CLOSED:
789 		case EVNT_CON_FATAL:
790 			change_state(peer, STATE_IDLE, event);
791 			break;
792 		case EVNT_TIMER_HOLDTIME:
793 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
794 			    0, NULL);
795 			change_state(peer, STATE_IDLE, event);
796 			break;
797 		case EVNT_TIMER_SENDHOLD:
798 			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
799 			    0, NULL);
800 			change_state(peer, STATE_IDLE, event);
801 			break;
802 		case EVNT_TIMER_KEEPALIVE:
803 			session_keepalive(peer);
804 			break;
805 		case EVNT_RCVD_KEEPALIVE:
806 			start_timer_holdtime(peer);
807 			break;
808 		case EVNT_RCVD_UPDATE:
809 			start_timer_holdtime(peer);
810 			if (parse_update(peer))
811 				change_state(peer, STATE_IDLE, event);
812 			else
813 				start_timer_holdtime(peer);
814 			break;
815 		case EVNT_RCVD_NOTIFICATION:
816 			parse_notification(peer);
817 			break;
818 		default:
819 			session_notification(peer,
820 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL);
821 			change_state(peer, STATE_IDLE, event);
822 			break;
823 		}
824 		break;
825 	}
826 }
827 
828 void
829 start_timer_holdtime(struct peer *peer)
830 {
831 	if (peer->holdtime > 0)
832 		timer_set(&peer->timers, Timer_Hold, peer->holdtime);
833 	else
834 		timer_stop(&peer->timers, Timer_Hold);
835 }
836 
837 void
838 start_timer_sendholdtime(struct peer *peer)
839 {
840 	uint16_t holdtime = INTERVAL_HOLD;
841 
842 	if (peer->holdtime > INTERVAL_HOLD)
843 		holdtime = peer->holdtime;
844 
845 	if (peer->holdtime > 0)
846 		timer_set(&peer->timers, Timer_SendHold, holdtime);
847 	else
848 		timer_stop(&peer->timers, Timer_SendHold);
849 }
850 
851 void
852 start_timer_keepalive(struct peer *peer)
853 {
854 	if (peer->holdtime > 0)
855 		timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3);
856 	else
857 		timer_stop(&peer->timers, Timer_Keepalive);
858 }
859 
860 void
861 session_close_connection(struct peer *peer)
862 {
863 	if (peer->fd != -1) {
864 		close(peer->fd);
865 		pauseaccept = 0;
866 	}
867 	peer->fd = peer->wbuf.fd = -1;
868 }
869 
870 void
871 change_state(struct peer *peer, enum session_state state,
872     enum session_events event)
873 {
874 	struct mrt	*mrt;
875 
876 	switch (state) {
877 	case STATE_IDLE:
878 		/* carp demotion first. new peers handled in init_peer */
879 		if (peer->state == STATE_ESTABLISHED &&
880 		    peer->conf.demote_group[0] && !peer->demoted)
881 			session_demote(peer, +1);
882 
883 		/*
884 		 * try to write out what's buffered (maybe a notification),
885 		 * don't bother if it fails
886 		 */
887 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
888 			msgbuf_write(&peer->wbuf);
889 
890 		/*
891 		 * we must start the timer for the next EVNT_START
892 		 * if we are coming here due to an error and the
893 		 * session was not established successfully before, the
894 		 * starttimerinterval needs to be exponentially increased
895 		 */
896 		if (peer->IdleHoldTime == 0)
897 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
898 		peer->holdtime = INTERVAL_HOLD_INITIAL;
899 		timer_stop(&peer->timers, Timer_ConnectRetry);
900 		timer_stop(&peer->timers, Timer_Keepalive);
901 		timer_stop(&peer->timers, Timer_Hold);
902 		timer_stop(&peer->timers, Timer_SendHold);
903 		timer_stop(&peer->timers, Timer_IdleHold);
904 		timer_stop(&peer->timers, Timer_IdleHoldReset);
905 		session_close_connection(peer);
906 		msgbuf_clear(&peer->wbuf);
907 		free(peer->rbuf);
908 		peer->rbuf = NULL;
909 		peer->rpending = 0;
910 		memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
911 		if (!peer->template)
912 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
913 			    peer->conf.id, 0, -1, NULL, 0);
914 
915 		if (event != EVNT_STOP) {
916 			timer_set(&peer->timers, Timer_IdleHold,
917 			    peer->IdleHoldTime);
918 			if (event != EVNT_NONE &&
919 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
920 				peer->IdleHoldTime *= 2;
921 		}
922 		if (peer->state == STATE_ESTABLISHED) {
923 			if (peer->capa.neg.grestart.restart == 2 &&
924 			    (event == EVNT_CON_CLOSED ||
925 			    event == EVNT_CON_FATAL)) {
926 				/* don't punish graceful restart */
927 				timer_set(&peer->timers, Timer_IdleHold, 0);
928 				peer->IdleHoldTime /= 2;
929 				session_graceful_restart(peer);
930 			} else
931 				session_down(peer);
932 		}
933 		if (peer->state == STATE_NONE ||
934 		    peer->state == STATE_ESTABLISHED) {
935 			/* initialize capability negotiation structures */
936 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
937 			    sizeof(peer->capa.ann));
938 		}
939 		break;
940 	case STATE_CONNECT:
941 		if (peer->state == STATE_ESTABLISHED &&
942 		    peer->capa.neg.grestart.restart == 2) {
943 			/* do the graceful restart dance */
944 			session_graceful_restart(peer);
945 			peer->holdtime = INTERVAL_HOLD_INITIAL;
946 			timer_stop(&peer->timers, Timer_ConnectRetry);
947 			timer_stop(&peer->timers, Timer_Keepalive);
948 			timer_stop(&peer->timers, Timer_Hold);
949 			timer_stop(&peer->timers, Timer_SendHold);
950 			timer_stop(&peer->timers, Timer_IdleHold);
951 			timer_stop(&peer->timers, Timer_IdleHoldReset);
952 			session_close_connection(peer);
953 			msgbuf_clear(&peer->wbuf);
954 			memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
955 		}
956 		break;
957 	case STATE_ACTIVE:
958 		if (!peer->template)
959 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
960 			    peer->conf.id, 0, -1, NULL, 0);
961 		break;
962 	case STATE_OPENSENT:
963 		break;
964 	case STATE_OPENCONFIRM:
965 		break;
966 	case STATE_ESTABLISHED:
967 		timer_set(&peer->timers, Timer_IdleHoldReset,
968 		    peer->IdleHoldTime);
969 		if (peer->demoted)
970 			timer_set(&peer->timers, Timer_CarpUndemote,
971 			    INTERVAL_HOLD_DEMOTED);
972 		session_up(peer);
973 		break;
974 	default:		/* something seriously fucked */
975 		break;
976 	}
977 
978 	log_statechange(peer, state, event);
979 	LIST_FOREACH(mrt, &mrthead, entry) {
980 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
981 			continue;
982 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
983 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
984 		    mrt->group_id == peer->conf.groupid))
985 			mrt_dump_state(mrt, peer->state, state, peer);
986 	}
987 	peer->prev_state = peer->state;
988 	peer->state = state;
989 }
990 
991 void
992 session_accept(int listenfd)
993 {
994 	int			 connfd;
995 	socklen_t		 len;
996 	struct sockaddr_storage	 cliaddr;
997 	struct peer		*p = NULL;
998 
999 	len = sizeof(cliaddr);
1000 	if ((connfd = accept4(listenfd,
1001 	    (struct sockaddr *)&cliaddr, &len,
1002 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
1003 		if (errno == ENFILE || errno == EMFILE)
1004 			pauseaccept = getmonotime();
1005 		else if (errno != EWOULDBLOCK && errno != EINTR &&
1006 		    errno != ECONNABORTED)
1007 			log_warn("accept");
1008 		return;
1009 	}
1010 
1011 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
1012 
1013 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1014 		if (timer_running(&p->timers, Timer_IdleHold, NULL)) {
1015 			/* fast reconnect after clear */
1016 			p->passive = 1;
1017 			bgp_fsm(p, EVNT_START);
1018 		}
1019 	}
1020 
1021 	if (p != NULL &&
1022 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1023 		if (p->fd != -1) {
1024 			if (p->state == STATE_CONNECT)
1025 				session_close_connection(p);
1026 			else {
1027 				close(connfd);
1028 				return;
1029 			}
1030 		}
1031 
1032 open:
1033 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1034 			log_peer_warnx(&p->conf,
1035 			    "ipsec or md5sig configured but not available");
1036 			close(connfd);
1037 			return;
1038 		}
1039 
1040 		if (tcp_md5_check(connfd, p) == -1) {
1041 			close(connfd);
1042 			return;
1043 		}
1044 		p->fd = p->wbuf.fd = connfd;
1045 		if (session_setup_socket(p)) {
1046 			close(connfd);
1047 			return;
1048 		}
1049 		bgp_fsm(p, EVNT_CON_OPEN);
1050 		return;
1051 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1052 	    p->capa.neg.grestart.restart == 2) {
1053 		/* first do the graceful restart dance */
1054 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1055 		/* then do part of the open dance */
1056 		goto open;
1057 	} else {
1058 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1059 		close(connfd);
1060 	}
1061 }
1062 
1063 int
1064 session_connect(struct peer *peer)
1065 {
1066 	struct sockaddr		*sa;
1067 	struct bgpd_addr	*bind_addr = NULL;
1068 	socklen_t		 sa_len;
1069 
1070 	/*
1071 	 * we do not need the overcomplicated collision detection RFC 1771
1072 	 * describes; we simply make sure there is only ever one concurrent
1073 	 * tcp connection per peer.
1074 	 */
1075 	if (peer->fd != -1)
1076 		return (-1);
1077 
1078 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1079 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1080 		log_peer_warn(&peer->conf, "session_connect socket");
1081 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1082 		return (-1);
1083 	}
1084 
1085 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1086 		log_peer_warnx(&peer->conf,
1087 		    "ipsec or md5sig configured but not available");
1088 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1089 		return (-1);
1090 	}
1091 
1092 	tcp_md5_set(peer->fd, peer);
1093 	peer->wbuf.fd = peer->fd;
1094 
1095 	/* if local-address is set we need to bind() */
1096 	switch (peer->conf.remote_addr.aid) {
1097 	case AID_INET:
1098 		bind_addr = &peer->conf.local_addr_v4;
1099 		break;
1100 	case AID_INET6:
1101 		bind_addr = &peer->conf.local_addr_v6;
1102 		break;
1103 	}
1104 	if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1105 		if (bind(peer->fd, sa, sa_len) == -1) {
1106 			log_peer_warn(&peer->conf, "session_connect bind");
1107 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1108 			return (-1);
1109 		}
1110 	}
1111 
1112 	if (session_setup_socket(peer)) {
1113 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1114 		return (-1);
1115 	}
1116 
1117 	sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len);
1118 	if (connect(peer->fd, sa, sa_len) == -1) {
1119 		if (errno != EINPROGRESS) {
1120 			if (errno != peer->lasterr)
1121 				log_peer_warn(&peer->conf, "connect");
1122 			peer->lasterr = errno;
1123 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1124 			return (-1);
1125 		}
1126 	} else
1127 		bgp_fsm(peer, EVNT_CON_OPEN);
1128 
1129 	return (0);
1130 }
1131 
1132 int
1133 session_setup_socket(struct peer *p)
1134 {
1135 	int	ttl = p->conf.distance;
1136 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1137 	int	nodelay = 1;
1138 	int	bsize;
1139 
1140 	switch (p->conf.remote_addr.aid) {
1141 	case AID_INET:
1142 		/* set precedence, see RFC 1771 appendix 5 */
1143 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1144 		    -1) {
1145 			log_peer_warn(&p->conf,
1146 			    "session_setup_socket setsockopt TOS");
1147 			return (-1);
1148 		}
1149 
1150 		if (p->conf.ebgp) {
1151 			/*
1152 			 * set TTL to foreign router's distance
1153 			 * 1=direct n=multihop with ttlsec, we always use 255
1154 			 */
1155 			if (p->conf.ttlsec) {
1156 				ttl = 256 - p->conf.distance;
1157 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1158 				    &ttl, sizeof(ttl)) == -1) {
1159 					log_peer_warn(&p->conf,
1160 					    "session_setup_socket: "
1161 					    "setsockopt MINTTL");
1162 					return (-1);
1163 				}
1164 				ttl = 255;
1165 			}
1166 
1167 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1168 			    sizeof(ttl)) == -1) {
1169 				log_peer_warn(&p->conf,
1170 				    "session_setup_socket setsockopt TTL");
1171 				return (-1);
1172 			}
1173 		}
1174 		break;
1175 	case AID_INET6:
1176 		if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre,
1177 		    sizeof(pre)) == -1) {
1178 			log_peer_warn(&p->conf, "session_setup_socket "
1179 			    "setsockopt TCLASS");
1180 			return (-1);
1181 		}
1182 
1183 		if (p->conf.ebgp) {
1184 			/*
1185 			 * set hoplimit to foreign router's distance
1186 			 * 1=direct n=multihop with ttlsec, we always use 255
1187 			 */
1188 			if (p->conf.ttlsec) {
1189 				ttl = 256 - p->conf.distance;
1190 				if (setsockopt(p->fd, IPPROTO_IPV6,
1191 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1192 				    == -1) {
1193 					log_peer_warn(&p->conf,
1194 					    "session_setup_socket: "
1195 					    "setsockopt MINHOPCOUNT");
1196 					return (-1);
1197 				}
1198 				ttl = 255;
1199 			}
1200 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1201 			    &ttl, sizeof(ttl)) == -1) {
1202 				log_peer_warn(&p->conf,
1203 				    "session_setup_socket setsockopt hoplimit");
1204 				return (-1);
1205 			}
1206 		}
1207 		break;
1208 	}
1209 
1210 	/* set TCP_NODELAY */
1211 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1212 	    sizeof(nodelay)) == -1) {
1213 		log_peer_warn(&p->conf,
1214 		    "session_setup_socket setsockopt TCP_NODELAY");
1215 		return (-1);
1216 	}
1217 
1218 	/* limit bufsize. no biggie if it fails */
1219 	bsize = 65535;
1220 	setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, sizeof(bsize));
1221 	setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, sizeof(bsize));
1222 
1223 	return (0);
1224 }
1225 
1226 /*
1227  * compare the bgpd_addr with the sockaddr by converting the latter into
1228  * a bgpd_addr. Return true if the two are equal, including any scope
1229  */
1230 static int
1231 sa_equal(struct bgpd_addr *ba, struct sockaddr *b)
1232 {
1233 	struct bgpd_addr bb;
1234 
1235 	sa2addr(b, &bb, NULL);
1236 	return (memcmp(ba, &bb, sizeof(*ba)) == 0);
1237 }
1238 
1239 static void
1240 get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote,
1241     struct bgpd_addr *alt, unsigned int *scope)
1242 {
1243 	struct ifaddrs	*ifap, *ifa, *match;
1244 	int connected = 0;
1245 	u_int8_t plen;
1246 
1247 	if (getifaddrs(&ifap) == -1)
1248 		fatal("getifaddrs");
1249 
1250 	for (match = ifap; match != NULL; match = match->ifa_next) {
1251 		if (match->ifa_addr == NULL)
1252 			continue;
1253 		if (match->ifa_addr->sa_family != AF_INET &&
1254 		    match->ifa_addr->sa_family != AF_INET6)
1255 			continue;
1256 		if (sa_equal(local, match->ifa_addr)) {
1257 			if (match->ifa_flags & IFF_POINTOPOINT &&
1258 			    match->ifa_dstaddr != NULL) {
1259 				if (sa_equal(remote, match->ifa_dstaddr))
1260 					connected = 1;
1261 			} else if (match->ifa_netmask != NULL) {
1262 				plen = mask2prefixlen(
1263 				    match->ifa_addr->sa_family,
1264 				    match->ifa_netmask);
1265 				if (prefix_compare(local, remote, plen) == 0)
1266 					connected = 1;
1267 			}
1268 			break;
1269 		}
1270 	}
1271 
1272 	if (match == NULL) {
1273 		log_warnx("%s: local address not found", __func__);
1274 		return;
1275 	}
1276 	if (connected)
1277 		*scope = if_nametoindex(match->ifa_name);
1278 	else
1279 		*scope = 0;
1280 
1281 	switch (local->aid) {
1282 	case AID_INET6:
1283 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1284 			if (ifa->ifa_addr != NULL &&
1285 			    ifa->ifa_addr->sa_family == AF_INET &&
1286 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1287 				sa2addr(ifa->ifa_addr, alt, NULL);
1288 				break;
1289 			}
1290 		}
1291 		break;
1292 	case AID_INET:
1293 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1294 			if (ifa->ifa_addr != NULL &&
1295 			    ifa->ifa_addr->sa_family == AF_INET6 &&
1296 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1297 				struct sockaddr_in6 *s =
1298 				    (struct sockaddr_in6 *)ifa->ifa_addr;
1299 
1300 				/* only accept global scope addresses */
1301 				if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1302 				    IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1303 					continue;
1304 				sa2addr(ifa->ifa_addr, alt, NULL);
1305 				break;
1306 			}
1307 		}
1308 		break;
1309 	default:
1310 		log_warnx("%s: unsupported address family %s", __func__,
1311 		    aid2str(local->aid));
1312 		break;
1313 	}
1314 
1315 	freeifaddrs(ifap);
1316 }
1317 
1318 void
1319 session_tcp_established(struct peer *peer)
1320 {
1321 	struct sockaddr_storage	ss;
1322 	socklen_t		len;
1323 
1324 	len = sizeof(ss);
1325 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1326 		log_warn("getsockname");
1327 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1328 	len = sizeof(ss);
1329 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1330 		log_warn("getpeername");
1331 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1332 
1333 	get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt,
1334 	    &peer->if_scope);
1335 }
1336 
1337 int
1338 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len)
1339 {
1340 	int errs = 0;
1341 
1342 	errs += ibuf_add_n8(opb, capa_code);
1343 	errs += ibuf_add_n8(opb, capa_len);
1344 	return (errs);
1345 }
1346 
1347 int
1348 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
1349 {
1350 	uint16_t		 afi;
1351 	uint8_t			 safi;
1352 	int			 errs = 0;
1353 
1354 	if (aid2afi(aid, &afi, &safi) == -1) {
1355 		log_warn("%s: bad AID", __func__);
1356 		return (-1);
1357 	}
1358 
1359 	errs += ibuf_add_n16(buf, afi);
1360 	errs += ibuf_add_zero(buf, 1);
1361 	errs += ibuf_add_n8(buf, safi);
1362 
1363 	return (errs);
1364 }
1365 
1366 int
1367 session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
1368 {
1369 	u_int		errs = 0;
1370 	uint16_t	afi;
1371 	uint8_t		safi;
1372 
1373 	if (aid2afi(aid, &afi, &safi)) {
1374 		log_warn("%s: bad AID", __func__);
1375 		return (-1);
1376 	}
1377 
1378 	errs += ibuf_add_n16(b, afi);
1379 	errs += ibuf_add_n8(b, safi);
1380 	errs += ibuf_add_n8(b, flags);
1381 
1382 	return (errs);
1383 }
1384 
1385 struct bgp_msg *
1386 session_newmsg(enum msg_type msgtype, uint16_t len)
1387 {
1388 	struct bgp_msg		*msg;
1389 	struct ibuf		*buf;
1390 	int			 errs = 0;
1391 
1392 	if ((buf = ibuf_open(len)) == NULL)
1393 		return (NULL);
1394 
1395 	errs += ibuf_add(buf, marker, sizeof(marker));
1396 	errs += ibuf_add_n16(buf, len);
1397 	errs += ibuf_add_n8(buf, msgtype);
1398 
1399 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1400 		ibuf_free(buf);
1401 		return (NULL);
1402 	}
1403 
1404 	msg->buf = buf;
1405 	msg->type = msgtype;
1406 	msg->len = len;
1407 
1408 	return (msg);
1409 }
1410 
1411 int
1412 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1413 {
1414 	struct mrt		*mrt;
1415 
1416 	LIST_FOREACH(mrt, &mrthead, entry) {
1417 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1418 		    mrt->type == MRT_UPDATE_OUT)))
1419 			continue;
1420 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1421 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1422 		    mrt->group_id == p->conf.groupid))
1423 			mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p,
1424 			    msg->type);
1425 	}
1426 
1427 	ibuf_close(&p->wbuf, msg->buf);
1428 	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1429 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1430 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1431 		else
1432 			p->throttled = 1;
1433 	}
1434 
1435 	free(msg);
1436 	return (0);
1437 }
1438 
1439 /*
1440  * Translate between internal roles and the value expected by RFC 9234.
1441  */
1442 static uint8_t
1443 role2capa(enum role role)
1444 {
1445 	switch (role) {
1446 	case ROLE_CUSTOMER:
1447 		return CAPA_ROLE_CUSTOMER;
1448 	case ROLE_PROVIDER:
1449 		return CAPA_ROLE_PROVIDER;
1450 	case ROLE_RS:
1451 		return CAPA_ROLE_RS;
1452 	case ROLE_RS_CLIENT:
1453 		return CAPA_ROLE_RS_CLIENT;
1454 	case ROLE_PEER:
1455 		return CAPA_ROLE_PEER;
1456 	default:
1457 		fatalx("Unsupported role for role capability");
1458 	}
1459 }
1460 
1461 static enum role
1462 capa2role(uint8_t val)
1463 {
1464 	switch (val) {
1465 	case CAPA_ROLE_PROVIDER:
1466 		return ROLE_PROVIDER;
1467 	case CAPA_ROLE_RS:
1468 		return ROLE_RS;
1469 	case CAPA_ROLE_RS_CLIENT:
1470 		return ROLE_RS_CLIENT;
1471 	case CAPA_ROLE_CUSTOMER:
1472 		return ROLE_CUSTOMER;
1473 	case CAPA_ROLE_PEER:
1474 		return ROLE_PEER;
1475 	default:
1476 		return ROLE_NONE;
1477 	}
1478 }
1479 
1480 void
1481 session_open(struct peer *p)
1482 {
1483 	struct bgp_msg		*buf;
1484 	struct ibuf		*opb;
1485 	size_t			 len, optparamlen;
1486 	uint16_t		 holdtime;
1487 	uint8_t			 i;
1488 	int			 errs = 0, extlen = 0;
1489 	int			 mpcapa = 0;
1490 
1491 
1492 	if ((opb = ibuf_dynamic(0, UINT16_MAX - 3)) == NULL) {
1493 		bgp_fsm(p, EVNT_CON_FATAL);
1494 		return;
1495 	}
1496 
1497 	/* multiprotocol extensions, RFC 4760 */
1498 	for (i = AID_MIN; i < AID_MAX; i++)
1499 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1500 			errs += session_capa_add(opb, CAPA_MP, 4);
1501 			errs += session_capa_add_mp(opb, i);
1502 			mpcapa++;
1503 		}
1504 
1505 	/* route refresh, RFC 2918 */
1506 	if (p->capa.ann.refresh)	/* no data */
1507 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1508 
1509 	/* BGP open policy, RFC 9234, only for ebgp sessions */
1510 	if (p->conf.ebgp && p->capa.ann.policy &&
1511 	    p->conf.role != ROLE_NONE &&
1512 	    (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
1513 	    mpcapa == 0)) {
1514 		errs += session_capa_add(opb, CAPA_ROLE, 1);
1515 		errs += ibuf_add_n8(opb, role2capa(p->conf.role));
1516 	}
1517 
1518 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1519 	if (p->capa.ann.grestart.restart) {
1520 		int		rst = 0;
1521 		uint16_t	hdr = 0;
1522 
1523 		for (i = AID_MIN; i < AID_MAX; i++) {
1524 			if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
1525 				rst++;
1526 		}
1527 
1528 		/* Only set the R-flag if no graceful restart is ongoing */
1529 		if (!rst)
1530 			hdr |= CAPA_GR_R_FLAG;
1531 		errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
1532 		errs += ibuf_add_n16(opb, hdr);
1533 	}
1534 
1535 	/* 4-bytes AS numbers, RFC6793 */
1536 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1537 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t));
1538 		errs += ibuf_add_n32(opb, p->conf.local_as);
1539 	}
1540 
1541 	/* advertisement of multiple paths, RFC7911 */
1542 	if (p->capa.ann.add_path[AID_MIN]) {	/* variable */
1543 		uint8_t	aplen;
1544 
1545 		if (mpcapa)
1546 			aplen = 4 * mpcapa;
1547 		else	/* AID_INET */
1548 			aplen = 4;
1549 		errs += session_capa_add(opb, CAPA_ADD_PATH, aplen);
1550 		if (mpcapa) {
1551 			for (i = AID_MIN; i < AID_MAX; i++) {
1552 				if (p->capa.ann.mp[i]) {
1553 					errs += session_capa_add_afi(opb,
1554 					    i, p->capa.ann.add_path[i] &
1555 					    CAPA_AP_MASK);
1556 				}
1557 			}
1558 		} else {	/* AID_INET */
1559 			errs += session_capa_add_afi(opb, AID_INET,
1560 			    p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK);
1561 		}
1562 	}
1563 
1564 	/* enhanced route-refresh, RFC7313 */
1565 	if (p->capa.ann.enhanced_rr)	/* no data */
1566 		errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0);
1567 
1568 	if (errs) {
1569 		ibuf_free(opb);
1570 		bgp_fsm(p, EVNT_CON_FATAL);
1571 		return;
1572 	}
1573 
1574 	optparamlen = ibuf_size(opb);
1575 	len = MSGSIZE_OPEN_MIN + optparamlen;
1576 	if (optparamlen == 0) {
1577 		/* nothing */
1578 	} else if (optparamlen + 2 >= 255) {
1579 		/* RFC9072: use 255 as magic size and request extra header */
1580 		optparamlen = 255;
1581 		extlen = 1;
1582 		/* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */
1583 		len += 2 * 3;
1584 	} else {
1585 		/* regular capabilities header */
1586 		optparamlen += 2;
1587 		len += 2;
1588 	}
1589 
1590 	if ((buf = session_newmsg(OPEN, len)) == NULL) {
1591 		ibuf_free(opb);
1592 		bgp_fsm(p, EVNT_CON_FATAL);
1593 		return;
1594 	}
1595 
1596 	if (p->conf.holdtime)
1597 		holdtime = p->conf.holdtime;
1598 	else
1599 		holdtime = conf->holdtime;
1600 
1601 	errs += ibuf_add_n8(buf->buf, 4);
1602 	errs += ibuf_add_n16(buf->buf, p->conf.local_short_as);
1603 	errs += ibuf_add_n16(buf->buf, holdtime);
1604 	/* is already in network byte order */
1605 	errs += ibuf_add_n32(buf->buf, conf->bgpid);
1606 	errs += ibuf_add_n8(buf->buf, optparamlen);
1607 
1608 	if (extlen) {
1609 		/* RFC9072 extra header which spans over the capabilities hdr */
1610 		errs += ibuf_add_n8(buf->buf, OPT_PARAM_EXT_LEN);
1611 		errs += ibuf_add_n16(buf->buf, ibuf_size(opb) + 1 + 2);
1612 	}
1613 
1614 	if (optparamlen) {
1615 		errs += ibuf_add_n8(buf->buf, OPT_PARAM_CAPABILITIES);
1616 
1617 		if (extlen) {
1618 			/* RFC9072: 2-byte extended length */
1619 			errs += ibuf_add_n16(buf->buf, ibuf_size(opb));
1620 		} else {
1621 			errs += ibuf_add_n8(buf->buf, ibuf_size(opb));
1622 		}
1623 		errs += ibuf_add_buf(buf->buf, opb);
1624 	}
1625 
1626 	ibuf_free(opb);
1627 
1628 	if (errs) {
1629 		ibuf_free(buf->buf);
1630 		free(buf);
1631 		bgp_fsm(p, EVNT_CON_FATAL);
1632 		return;
1633 	}
1634 
1635 	if (session_sendmsg(buf, p) == -1) {
1636 		bgp_fsm(p, EVNT_CON_FATAL);
1637 		return;
1638 	}
1639 
1640 	p->stats.msg_sent_open++;
1641 }
1642 
1643 void
1644 session_keepalive(struct peer *p)
1645 {
1646 	struct bgp_msg		*buf;
1647 
1648 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1649 	    session_sendmsg(buf, p) == -1) {
1650 		bgp_fsm(p, EVNT_CON_FATAL);
1651 		return;
1652 	}
1653 
1654 	start_timer_keepalive(p);
1655 	p->stats.msg_sent_keepalive++;
1656 }
1657 
1658 void
1659 session_update(uint32_t peerid, struct ibuf *ibuf)
1660 {
1661 	struct peer		*p;
1662 	struct bgp_msg		*buf;
1663 
1664 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1665 		log_warnx("no such peer: id=%u", peerid);
1666 		return;
1667 	}
1668 
1669 	if (p->state != STATE_ESTABLISHED)
1670 		return;
1671 
1672 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + ibuf_size(ibuf))) ==
1673 	    NULL) {
1674 		bgp_fsm(p, EVNT_CON_FATAL);
1675 		return;
1676 	}
1677 
1678 	if (ibuf_add_buf(buf->buf, ibuf)) {
1679 		ibuf_free(buf->buf);
1680 		free(buf);
1681 		bgp_fsm(p, EVNT_CON_FATAL);
1682 		return;
1683 	}
1684 
1685 	if (session_sendmsg(buf, p) == -1) {
1686 		bgp_fsm(p, EVNT_CON_FATAL);
1687 		return;
1688 	}
1689 
1690 	start_timer_keepalive(p);
1691 	p->stats.msg_sent_update++;
1692 }
1693 
1694 void
1695 session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode,
1696     void *data, size_t datalen)
1697 {
1698 	struct ibuf ibuf;
1699 
1700 	ibuf_from_buffer(&ibuf, data, datalen);
1701 	session_notification(p, errcode, subcode, &ibuf);
1702 }
1703 
1704 void
1705 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode,
1706     struct ibuf *ibuf)
1707 {
1708 	struct bgp_msg		*buf;
1709 	int			 errs = 0;
1710 	size_t			 datalen = 0;
1711 
1712 	switch (p->state) {
1713 	case STATE_OPENSENT:
1714 	case STATE_OPENCONFIRM:
1715 	case STATE_ESTABLISHED:
1716 		break;
1717 	default:
1718 		/* session not open, no need to send notification */
1719 		log_notification(p, errcode, subcode, ibuf, "dropping");
1720 		return;
1721 	}
1722 
1723 	log_notification(p, errcode, subcode, ibuf, "sending");
1724 
1725 	/* cap to maximum size */
1726 	if (ibuf != NULL) {
1727 		if (ibuf_size(ibuf) >
1728 		    MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN) {
1729 			log_peer_warnx(&p->conf,
1730 			    "oversized notification, data trunkated");
1731 			ibuf_truncate(ibuf, MAX_PKTSIZE -
1732 			    MSGSIZE_NOTIFICATION_MIN);
1733 		}
1734 		datalen = ibuf_size(ibuf);
1735 	}
1736 
1737 	if ((buf = session_newmsg(NOTIFICATION,
1738 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1739 		bgp_fsm(p, EVNT_CON_FATAL);
1740 		return;
1741 	}
1742 
1743 	errs += ibuf_add_n8(buf->buf, errcode);
1744 	errs += ibuf_add_n8(buf->buf, subcode);
1745 
1746 	if (ibuf != NULL)
1747 		errs += ibuf_add_buf(buf->buf, ibuf);
1748 
1749 	if (errs) {
1750 		ibuf_free(buf->buf);
1751 		free(buf);
1752 		bgp_fsm(p, EVNT_CON_FATAL);
1753 		return;
1754 	}
1755 
1756 	if (session_sendmsg(buf, p) == -1) {
1757 		bgp_fsm(p, EVNT_CON_FATAL);
1758 		return;
1759 	}
1760 
1761 	p->stats.msg_sent_notification++;
1762 	p->stats.last_sent_errcode = errcode;
1763 	p->stats.last_sent_suberr = subcode;
1764 }
1765 
1766 int
1767 session_neighbor_rrefresh(struct peer *p)
1768 {
1769 	uint8_t	i;
1770 
1771 	if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr))
1772 		return (-1);
1773 
1774 	for (i = AID_MIN; i < AID_MAX; i++) {
1775 		if (p->capa.neg.mp[i] != 0)
1776 			session_rrefresh(p, i, ROUTE_REFRESH_REQUEST);
1777 	}
1778 
1779 	return (0);
1780 }
1781 
1782 void
1783 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype)
1784 {
1785 	struct bgp_msg		*buf;
1786 	int			 errs = 0;
1787 	uint16_t		 afi;
1788 	uint8_t			 safi;
1789 
1790 	switch (subtype) {
1791 	case ROUTE_REFRESH_REQUEST:
1792 		p->stats.refresh_sent_req++;
1793 		break;
1794 	case ROUTE_REFRESH_BEGIN_RR:
1795 	case ROUTE_REFRESH_END_RR:
1796 		/* requires enhanced route refresh */
1797 		if (!p->capa.neg.enhanced_rr)
1798 			return;
1799 		if (subtype == ROUTE_REFRESH_BEGIN_RR)
1800 			p->stats.refresh_sent_borr++;
1801 		else
1802 			p->stats.refresh_sent_eorr++;
1803 		break;
1804 	default:
1805 		fatalx("session_rrefresh: bad subtype %d", subtype);
1806 	}
1807 
1808 	if (aid2afi(aid, &afi, &safi) == -1)
1809 		fatalx("session_rrefresh: bad afi/safi pair");
1810 
1811 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1812 		bgp_fsm(p, EVNT_CON_FATAL);
1813 		return;
1814 	}
1815 
1816 	errs += ibuf_add_n16(buf->buf, afi);
1817 	errs += ibuf_add_n8(buf->buf, subtype);
1818 	errs += ibuf_add_n8(buf->buf, safi);
1819 
1820 	if (errs) {
1821 		ibuf_free(buf->buf);
1822 		free(buf);
1823 		bgp_fsm(p, EVNT_CON_FATAL);
1824 		return;
1825 	}
1826 
1827 	if (session_sendmsg(buf, p) == -1) {
1828 		bgp_fsm(p, EVNT_CON_FATAL);
1829 		return;
1830 	}
1831 
1832 	p->stats.msg_sent_rrefresh++;
1833 }
1834 
1835 int
1836 session_graceful_restart(struct peer *p)
1837 {
1838 	uint8_t	i;
1839 
1840 	timer_set(&p->timers, Timer_RestartTimeout,
1841 	    p->capa.neg.grestart.timeout);
1842 
1843 	for (i = AID_MIN; i < AID_MAX; i++) {
1844 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1845 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1846 			    &i, sizeof(i)) == -1)
1847 				return (-1);
1848 			log_peer_warnx(&p->conf,
1849 			    "graceful restart of %s, keeping routes",
1850 			    aid2str(i));
1851 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1852 		} else if (p->capa.neg.mp[i]) {
1853 			if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id,
1854 			    &i, sizeof(i)) == -1)
1855 				return (-1);
1856 			log_peer_warnx(&p->conf,
1857 			    "graceful restart of %s, flushing routes",
1858 			    aid2str(i));
1859 		}
1860 	}
1861 	return (0);
1862 }
1863 
1864 int
1865 session_graceful_stop(struct peer *p)
1866 {
1867 	uint8_t	i;
1868 
1869 	for (i = AID_MIN; i < AID_MAX; i++) {
1870 		/*
1871 		 * Only flush if the peer is restarting and the timeout fired.
1872 		 * In all other cases the session was already flushed when the
1873 		 * session went down or when the new open message was parsed.
1874 		 */
1875 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1876 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1877 			    "time-out, flushing", aid2str(i));
1878 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1879 			    &i, sizeof(i)) == -1)
1880 				return (-1);
1881 		}
1882 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1883 	}
1884 	return (0);
1885 }
1886 
1887 int
1888 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1889 {
1890 	ssize_t		n;
1891 	socklen_t	len;
1892 	int		error;
1893 
1894 	if (p->state == STATE_CONNECT) {
1895 		if (pfd->revents & POLLOUT) {
1896 			if (pfd->revents & POLLIN) {
1897 				/* error occurred */
1898 				len = sizeof(error);
1899 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1900 				    &error, &len) == -1 || error) {
1901 					if (error)
1902 						errno = error;
1903 					if (errno != p->lasterr) {
1904 						log_peer_warn(&p->conf,
1905 						    "socket error");
1906 						p->lasterr = errno;
1907 					}
1908 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1909 					return (1);
1910 				}
1911 			}
1912 			bgp_fsm(p, EVNT_CON_OPEN);
1913 			return (1);
1914 		}
1915 		if (pfd->revents & POLLHUP) {
1916 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1917 			return (1);
1918 		}
1919 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1920 			bgp_fsm(p, EVNT_CON_FATAL);
1921 			return (1);
1922 		}
1923 		return (0);
1924 	}
1925 
1926 	if (pfd->revents & POLLHUP) {
1927 		bgp_fsm(p, EVNT_CON_CLOSED);
1928 		return (1);
1929 	}
1930 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1931 		bgp_fsm(p, EVNT_CON_FATAL);
1932 		return (1);
1933 	}
1934 
1935 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1936 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1937 			if (error == 0)
1938 				log_peer_warnx(&p->conf, "Connection closed");
1939 			else if (error == -1)
1940 				log_peer_warn(&p->conf, "write error");
1941 			bgp_fsm(p, EVNT_CON_FATAL);
1942 			return (1);
1943 		}
1944 		p->stats.last_write = getmonotime();
1945 		start_timer_sendholdtime(p);
1946 		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1947 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
1948 				log_peer_warn(&p->conf, "imsg_compose XON");
1949 			else
1950 				p->throttled = 0;
1951 		}
1952 		if (!(pfd->revents & POLLIN))
1953 			return (1);
1954 	}
1955 
1956 	if (p->rbuf && pfd->revents & POLLIN) {
1957 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1958 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1959 			if (errno != EINTR && errno != EAGAIN) {
1960 				log_peer_warn(&p->conf, "read error");
1961 				bgp_fsm(p, EVNT_CON_FATAL);
1962 			}
1963 			return (1);
1964 		}
1965 		if (n == 0) {	/* connection closed */
1966 			bgp_fsm(p, EVNT_CON_CLOSED);
1967 			return (1);
1968 		}
1969 
1970 		p->rbuf->wpos += n;
1971 		p->stats.last_read = getmonotime();
1972 		return (1);
1973 	}
1974 	return (0);
1975 }
1976 
1977 void
1978 session_process_msg(struct peer *p)
1979 {
1980 	struct mrt	*mrt;
1981 	ssize_t		rpos, av, left;
1982 	int		processed = 0;
1983 	uint16_t	msglen;
1984 	uint8_t		msgtype;
1985 
1986 	rpos = 0;
1987 	av = p->rbuf->wpos;
1988 	p->rpending = 0;
1989 
1990 	/*
1991 	 * session might drop to IDLE -> buffers deallocated
1992 	 * we MUST check rbuf != NULL before use
1993 	 */
1994 	for (;;) {
1995 		if (p->rbuf == NULL)
1996 			return;
1997 		if (rpos + MSGSIZE_HEADER > av)
1998 			break;
1999 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
2000 		    &msgtype) == -1)
2001 			return;
2002 		if (rpos + msglen > av)
2003 			break;
2004 		p->rbuf->rptr = p->rbuf->buf + rpos;
2005 
2006 		/* dump to MRT as soon as we have a full packet */
2007 		LIST_FOREACH(mrt, &mrthead, entry) {
2008 			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
2009 			    mrt->type == MRT_UPDATE_IN)))
2010 				continue;
2011 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
2012 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
2013 			    mrt->group_id == p->conf.groupid))
2014 				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p,
2015 				    msgtype);
2016 		}
2017 
2018 		switch (msgtype) {
2019 		case OPEN:
2020 			bgp_fsm(p, EVNT_RCVD_OPEN);
2021 			p->stats.msg_rcvd_open++;
2022 			break;
2023 		case UPDATE:
2024 			bgp_fsm(p, EVNT_RCVD_UPDATE);
2025 			p->stats.msg_rcvd_update++;
2026 			break;
2027 		case NOTIFICATION:
2028 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
2029 			p->stats.msg_rcvd_notification++;
2030 			break;
2031 		case KEEPALIVE:
2032 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
2033 			p->stats.msg_rcvd_keepalive++;
2034 			break;
2035 		case RREFRESH:
2036 			parse_rrefresh(p);
2037 			p->stats.msg_rcvd_rrefresh++;
2038 			break;
2039 		default:	/* cannot happen */
2040 			session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE,
2041 			    &msgtype, 1);
2042 			log_warnx("received message with unknown type %u",
2043 			    msgtype);
2044 			bgp_fsm(p, EVNT_CON_FATAL);
2045 		}
2046 		rpos += msglen;
2047 		if (++processed > MSG_PROCESS_LIMIT) {
2048 			p->rpending = 1;
2049 			break;
2050 		}
2051 	}
2052 
2053 	if (p->rbuf == NULL)
2054 		return;
2055 	if (rpos < av) {
2056 		left = av - rpos;
2057 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
2058 		p->rbuf->wpos = left;
2059 	} else
2060 		p->rbuf->wpos = 0;
2061 }
2062 
2063 int
2064 parse_header(struct peer *peer, u_char *data, uint16_t *len, uint8_t *type)
2065 {
2066 	u_char			*p;
2067 	uint16_t		 olen;
2068 
2069 	/* caller MUST make sure we are getting 19 bytes! */
2070 	p = data;
2071 	if (memcmp(p, marker, sizeof(marker))) {
2072 		log_peer_warnx(&peer->conf, "sync error");
2073 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL);
2074 		bgp_fsm(peer, EVNT_CON_FATAL);
2075 		return (-1);
2076 	}
2077 	p += MSGSIZE_HEADER_MARKER;
2078 
2079 	memcpy(&olen, p, 2);
2080 	*len = ntohs(olen);
2081 	p += 2;
2082 	memcpy(type, p, 1);
2083 
2084 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
2085 		log_peer_warnx(&peer->conf,
2086 		    "received message: illegal length: %u byte", *len);
2087 		session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2088 		    &olen, sizeof(olen));
2089 		bgp_fsm(peer, EVNT_CON_FATAL);
2090 		return (-1);
2091 	}
2092 
2093 	switch (*type) {
2094 	case OPEN:
2095 		if (*len < MSGSIZE_OPEN_MIN) {
2096 			log_peer_warnx(&peer->conf,
2097 			    "received OPEN: illegal len: %u byte", *len);
2098 			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2099 			    &olen, sizeof(olen));
2100 			bgp_fsm(peer, EVNT_CON_FATAL);
2101 			return (-1);
2102 		}
2103 		break;
2104 	case NOTIFICATION:
2105 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
2106 			log_peer_warnx(&peer->conf,
2107 			    "received NOTIFICATION: illegal len: %u byte",
2108 			    *len);
2109 			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2110 			    &olen, sizeof(olen));
2111 			bgp_fsm(peer, EVNT_CON_FATAL);
2112 			return (-1);
2113 		}
2114 		break;
2115 	case UPDATE:
2116 		if (*len < MSGSIZE_UPDATE_MIN) {
2117 			log_peer_warnx(&peer->conf,
2118 			    "received UPDATE: illegal len: %u byte", *len);
2119 			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2120 			    &olen, sizeof(olen));
2121 			bgp_fsm(peer, EVNT_CON_FATAL);
2122 			return (-1);
2123 		}
2124 		break;
2125 	case KEEPALIVE:
2126 		if (*len != MSGSIZE_KEEPALIVE) {
2127 			log_peer_warnx(&peer->conf,
2128 			    "received KEEPALIVE: illegal len: %u byte", *len);
2129 			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2130 			    &olen, sizeof(olen));
2131 			bgp_fsm(peer, EVNT_CON_FATAL);
2132 			return (-1);
2133 		}
2134 		break;
2135 	case RREFRESH:
2136 		if (*len < MSGSIZE_RREFRESH_MIN) {
2137 			log_peer_warnx(&peer->conf,
2138 			    "received RREFRESH: illegal len: %u byte", *len);
2139 			session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2140 			    &olen, sizeof(olen));
2141 			bgp_fsm(peer, EVNT_CON_FATAL);
2142 			return (-1);
2143 		}
2144 		break;
2145 	default:
2146 		log_peer_warnx(&peer->conf,
2147 		    "received msg with unknown type %u", *type);
2148 		session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE,
2149 		    type, 1);
2150 		bgp_fsm(peer, EVNT_CON_FATAL);
2151 		return (-1);
2152 	}
2153 	return (0);
2154 }
2155 
2156 int
2157 parse_open(struct peer *peer)
2158 {
2159 	struct ibuf	 ibuf;
2160 	u_char		*p;
2161 	uint8_t		 version, rversion;
2162 	uint16_t	 short_as, msglen;
2163 	uint16_t	 holdtime, myholdtime;
2164 	uint32_t	 as, bgpid;
2165 	uint8_t		 optparamlen;
2166 
2167 	p = peer->rbuf->rptr;
2168 	p += MSGSIZE_HEADER_MARKER;
2169 	memcpy(&msglen, p, sizeof(msglen));
2170 	msglen = ntohs(msglen);
2171 
2172 	p = peer->rbuf->rptr;
2173 	p += MSGSIZE_HEADER;	/* header is already checked */
2174 	msglen -= MSGSIZE_HEADER;
2175 
2176 	/* XXX */
2177 	ibuf_from_buffer(&ibuf, p, msglen);
2178 
2179 	if (ibuf_get_n8(&ibuf, &version) == -1 ||
2180 	    ibuf_get_n16(&ibuf, &short_as) == -1 ||
2181 	    ibuf_get_n16(&ibuf, &holdtime) == -1 ||
2182 	    ibuf_get_n32(&ibuf, &bgpid) == -1 ||
2183 	    ibuf_get_n8(&ibuf, &optparamlen) == -1)
2184 		goto bad_len;
2185 
2186 	if (version != BGP_VERSION) {
2187 		log_peer_warnx(&peer->conf,
2188 		    "peer wants unrecognized version %u", version);
2189 		if (version > BGP_VERSION)
2190 			rversion = version - BGP_VERSION;
2191 		else
2192 			rversion = BGP_VERSION;
2193 		session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION,
2194 		    &rversion, sizeof(rversion));
2195 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2196 		return (-1);
2197 	}
2198 
2199 	as = peer->short_as = short_as;
2200 	if (as == 0) {
2201 		log_peer_warnx(&peer->conf,
2202 		    "peer requests unacceptable AS %u", as);
2203 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2204 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2205 		return (-1);
2206 	}
2207 
2208 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2209 		log_peer_warnx(&peer->conf,
2210 		    "peer requests unacceptable holdtime %u", holdtime);
2211 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL);
2212 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2213 		return (-1);
2214 	}
2215 
2216 	myholdtime = peer->conf.holdtime;
2217 	if (!myholdtime)
2218 		myholdtime = conf->holdtime;
2219 	if (holdtime < myholdtime)
2220 		peer->holdtime = holdtime;
2221 	else
2222 		peer->holdtime = myholdtime;
2223 
2224 	/* check bgpid for validity - just disallow 0 */
2225 	if (bgpid == 0) {
2226 		log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable");
2227 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2228 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2229 		return (-1);
2230 	}
2231 	peer->remote_bgpid = bgpid;
2232 
2233 	if (optparamlen != 0) {
2234 		struct ibuf oparams, op;
2235 		uint8_t ext_type, op_type;
2236 		uint16_t ext_len, op_len;
2237 
2238 		ibuf_from_ibuf(&oparams, &ibuf);
2239 
2240 		/* check for RFC9072 encoding */
2241 		if (ibuf_get_n8(&oparams, &ext_type) == -1)
2242 			goto bad_len;
2243 		if (ext_type == OPT_PARAM_EXT_LEN) {
2244 			if (ibuf_get_n16(&oparams, &ext_len) == -1)
2245 				goto bad_len;
2246 			/* skip RFC9072 header */
2247 			if (ibuf_skip(&ibuf, 3) == -1)
2248 				goto bad_len;
2249 		} else {
2250 			ext_len = optparamlen;
2251 			ibuf_rewind(&oparams);
2252 		}
2253 
2254 		if (ibuf_truncate(&oparams, ext_len) == -1 ||
2255 		    ibuf_skip(&ibuf, ext_len) == -1)
2256 			goto bad_len;
2257 
2258 		while (ibuf_size(&oparams) > 0) {
2259 			if (ibuf_get_n8(&oparams, &op_type) == -1)
2260 				goto bad_len;
2261 
2262 			if (ext_type == OPT_PARAM_EXT_LEN) {
2263 				if (ibuf_get_n16(&oparams, &op_len) == -1)
2264 					goto bad_len;
2265 			} else {
2266 				uint8_t tmp;
2267 				if (ibuf_get_n8(&oparams, &tmp) == -1)
2268 					goto bad_len;
2269 				op_len = tmp;
2270 			}
2271 
2272 			if (ibuf_get_ibuf(&oparams, op_len, &op) == -1)
2273 				goto bad_len;
2274 
2275 			switch (op_type) {
2276 			case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2277 				if (parse_capabilities(peer, &op, &as) == -1) {
2278 					session_notification(peer, ERR_OPEN, 0,
2279 					    NULL);
2280 					change_state(peer, STATE_IDLE,
2281 					    EVNT_RCVD_OPEN);
2282 					return (-1);
2283 				}
2284 				break;
2285 			case OPT_PARAM_AUTH:			/* deprecated */
2286 			default:
2287 				/*
2288 				 * unsupported type
2289 				 * the RFCs tell us to leave the data section
2290 				 * empty and notify the peer with ERR_OPEN,
2291 				 * ERR_OPEN_OPT. How the peer should know
2292 				 * _which_ optional parameter we don't support
2293 				 * is beyond me.
2294 				 */
2295 				log_peer_warnx(&peer->conf,
2296 				    "received OPEN message with unsupported "
2297 				    "optional parameter: type %u", op_type);
2298 				session_notification(peer, ERR_OPEN,
2299 				    ERR_OPEN_OPT, NULL);
2300 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2301 				return (-1);
2302 			}
2303 		}
2304 	}
2305 
2306 	if (ibuf_size(&ibuf) != 0) {
2307  bad_len:
2308 		log_peer_warnx(&peer->conf,
2309 		    "corrupt OPEN message received: length mismatch");
2310 		session_notification(peer, ERR_OPEN, 0, NULL);
2311 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2312 		return (-1);
2313 	}
2314 
2315 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2316 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2317 		peer->conf.remote_as = as;
2318 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2319 		if (!peer->conf.ebgp)
2320 			/* force enforce_as off for iBGP sessions */
2321 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2322 	}
2323 
2324 	if (peer->conf.remote_as != as) {
2325 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2326 		    log_as(as));
2327 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2328 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2329 		return (-1);
2330 	}
2331 
2332 	/* on iBGP sessions check for bgpid collision */
2333 	if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) {
2334 		struct in_addr ina;
2335 		ina.s_addr = htonl(bgpid);
2336 		log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours",
2337 		    inet_ntoa(ina));
2338 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2339 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2340 		return (-1);
2341 	}
2342 
2343 	if (capa_neg_calc(peer) == -1) {
2344 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2345 		return (-1);
2346 	}
2347 
2348 	return (0);
2349 }
2350 
2351 int
2352 parse_update(struct peer *peer)
2353 {
2354 	u_char		*p;
2355 	uint16_t	 datalen;
2356 
2357 	/*
2358 	 * we pass the message verbatim to the rde.
2359 	 * in case of errors the whole session is reset with a
2360 	 * notification anyway, we only need to know the peer
2361 	 */
2362 	p = peer->rbuf->rptr;
2363 	p += MSGSIZE_HEADER_MARKER;
2364 	memcpy(&datalen, p, sizeof(datalen));
2365 	datalen = ntohs(datalen);
2366 
2367 	p = peer->rbuf->rptr;
2368 	p += MSGSIZE_HEADER;	/* header is already checked */
2369 	datalen -= MSGSIZE_HEADER;
2370 
2371 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, p, datalen) == -1)
2372 		return (-1);
2373 
2374 	return (0);
2375 }
2376 
2377 int
2378 parse_rrefresh(struct peer *peer)
2379 {
2380 	struct route_refresh rr;
2381 	struct ibuf ibuf;
2382 	uint16_t afi, datalen;
2383 	uint8_t aid, safi, subtype;
2384 	u_char *p;
2385 
2386 	p = peer->rbuf->rptr;
2387 	p += MSGSIZE_HEADER_MARKER;
2388 	memcpy(&datalen, p, sizeof(datalen));
2389 	datalen = ntohs(datalen);
2390 
2391 	p = peer->rbuf->rptr;
2392 	p += MSGSIZE_HEADER;	/* header is already checked */
2393 	datalen -= MSGSIZE_HEADER;
2394 
2395 	/* XXX */
2396 	ibuf_from_buffer(&ibuf, p, datalen);
2397 
2398 	if (ibuf_get_n16(&ibuf, &afi) == -1 ||
2399 	    ibuf_get_n8(&ibuf, &subtype) == -1 ||
2400 	    ibuf_get_n8(&ibuf, &safi) == -1) {
2401 		/* minimum size checked in session_process_msg() */
2402 		fatalx("%s: message too small", __func__);
2403 	}
2404 
2405 	/* check subtype if peer announced enhanced route refresh */
2406 	if (peer->capa.neg.enhanced_rr) {
2407 		switch (subtype) {
2408 		case ROUTE_REFRESH_REQUEST:
2409 			/* no ORF support, so no oversized RREFRESH msgs */
2410 			if (datalen != MSGSIZE_RREFRESH) {
2411 				log_peer_warnx(&peer->conf,
2412 				    "received RREFRESH: illegal len: %u byte",
2413 				    datalen);
2414 				datalen = htons(datalen);
2415 				session_notification_data(peer, ERR_HEADER,
2416 				    ERR_HDR_LEN, &datalen, sizeof(datalen));
2417 				bgp_fsm(peer, EVNT_CON_FATAL);
2418 				return (-1);
2419 			}
2420 			peer->stats.refresh_rcvd_req++;
2421 			break;
2422 		case ROUTE_REFRESH_BEGIN_RR:
2423 		case ROUTE_REFRESH_END_RR:
2424 			/* special handling for RFC7313 */
2425 			if (datalen != MSGSIZE_RREFRESH) {
2426 				log_peer_warnx(&peer->conf,
2427 				    "received RREFRESH: illegal len: %u byte",
2428 				    datalen);
2429 				ibuf_rewind(&ibuf);
2430 				session_notification(peer, ERR_RREFRESH,
2431 				    ERR_RR_INV_LEN, &ibuf);
2432 				bgp_fsm(peer, EVNT_CON_FATAL);
2433 				return (-1);
2434 			}
2435 			if (subtype == ROUTE_REFRESH_BEGIN_RR)
2436 				peer->stats.refresh_rcvd_borr++;
2437 			else
2438 				peer->stats.refresh_rcvd_eorr++;
2439 			break;
2440 		default:
2441 			log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2442 			    "bad subtype %d", subtype);
2443 			return (0);
2444 		}
2445 	} else {
2446 		/* force subtype to default */
2447 		subtype = ROUTE_REFRESH_REQUEST;
2448 		peer->stats.refresh_rcvd_req++;
2449 	}
2450 
2451 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2452 	if (afi2aid(afi, safi, &aid) == -1) {
2453 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2454 		    "invalid afi/safi pair");
2455 		return (0);
2456 	}
2457 
2458 	if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) {
2459 		log_peer_warnx(&peer->conf, "peer sent unexpected refresh");
2460 		return (0);
2461 	}
2462 
2463 	rr.aid = aid;
2464 	rr.subtype = subtype;
2465 
2466 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1)
2467 		return (-1);
2468 
2469 	return (0);
2470 }
2471 
2472 void
2473 parse_notification(struct peer *peer)
2474 {
2475 	struct ibuf	 ibuf;
2476 	u_char		*p;
2477 	uint16_t	 datalen;
2478 	uint8_t		 errcode, subcode;
2479 	uint8_t		 reason_len;
2480 
2481 	/* just log */
2482 	p = peer->rbuf->rptr;
2483 	p += MSGSIZE_HEADER_MARKER;
2484 	memcpy(&datalen, p, sizeof(datalen));
2485 	datalen = ntohs(datalen);
2486 
2487 	p = peer->rbuf->rptr;
2488 	p += MSGSIZE_HEADER;	/* header is already checked */
2489 	datalen -= MSGSIZE_HEADER;
2490 
2491 	/* XXX */
2492 	ibuf_from_buffer(&ibuf, p, datalen);
2493 
2494 	if (ibuf_get_n8(&ibuf, &errcode) == -1 ||
2495 	    ibuf_get_n8(&ibuf, &subcode) == -1) {
2496 		log_peer_warnx(&peer->conf, "received bad notification");
2497 		goto done;
2498 	}
2499 
2500 	peer->errcnt++;
2501 	peer->stats.last_rcvd_errcode = errcode;
2502 	peer->stats.last_rcvd_suberr = subcode;
2503 
2504 	log_notification(peer, errcode, subcode, &ibuf, "received");
2505 
2506 	CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX);
2507 	memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason));
2508 	if (errcode == ERR_CEASE &&
2509 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2510 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2511 		/* check if shutdown reason is included */
2512 		if (ibuf_get_n8(&ibuf, &reason_len) != -1 && reason_len != 0) {
2513 			if (ibuf_get(&ibuf, peer->stats.last_reason,
2514 			    reason_len) == -1)
2515 				log_peer_warnx(&peer->conf,
2516 				    "received truncated shutdown reason");
2517 		}
2518 	}
2519 
2520 done:
2521 	change_state(peer, STATE_IDLE, EVNT_RCVD_NOTIFICATION);
2522 }
2523 
2524 int
2525 parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
2526 {
2527 	struct ibuf	 capabuf;
2528 	uint16_t	 afi, gr_header;
2529 	uint8_t		 capa_code, capa_len;
2530 	uint8_t		 safi, aid, role, flags;
2531 
2532 	while (ibuf_size(buf) > 0) {
2533 		if (ibuf_get_n8(buf, &capa_code) == -1 ||
2534 		    ibuf_get_n8(buf, &capa_len) == -1) {
2535 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2536 			    "length: too short");
2537 			return (-1);
2538 		}
2539 		if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) {
2540 			log_peer_warnx(&peer->conf,
2541 			    "Received bad capabilities attr length: "
2542 			    "len %zu smaller than capa_len %u",
2543 			    ibuf_size(buf), capa_len);
2544 			return (-1);
2545 		}
2546 
2547 		switch (capa_code) {
2548 		case CAPA_MP:			/* RFC 4760 */
2549 			if (capa_len != 4 ||
2550 			    ibuf_get_n16(&capabuf, &afi) == -1 ||
2551 			    ibuf_skip(&capabuf, 1) == -1 ||
2552 			    ibuf_get_n8(&capabuf, &safi) == -1) {
2553 				log_peer_warnx(&peer->conf,
2554 				    "Received bad multi protocol capability");
2555 				break;
2556 			}
2557 			if (afi2aid(afi, safi, &aid) == -1) {
2558 				log_peer_warnx(&peer->conf,
2559 				    "Received multi protocol capability: "
2560 				    " unknown AFI %u, safi %u pair",
2561 				    afi, safi);
2562 				break;
2563 			}
2564 			peer->capa.peer.mp[aid] = 1;
2565 			break;
2566 		case CAPA_REFRESH:
2567 			peer->capa.peer.refresh = 1;
2568 			break;
2569 		case CAPA_ROLE:
2570 			if (capa_len != 1 ||
2571 			    ibuf_get_n8(&capabuf, &role) == -1) {
2572 				log_peer_warnx(&peer->conf,
2573 				    "Received bad role capability");
2574 				break;
2575 			}
2576 			if (!peer->conf.ebgp) {
2577 				log_peer_warnx(&peer->conf,
2578 				    "Received role capability on iBGP session");
2579 				break;
2580 			}
2581 			peer->capa.peer.policy = 1;
2582 			peer->remote_role = capa2role(role);
2583 			break;
2584 		case CAPA_RESTART:
2585 			if (capa_len == 2) {
2586 				/* peer only supports EoR marker */
2587 				peer->capa.peer.grestart.restart = 1;
2588 				peer->capa.peer.grestart.timeout = 0;
2589 				break;
2590 			} else if (capa_len % 4 != 2) {
2591 				log_peer_warnx(&peer->conf,
2592 				    "Bad graceful restart capability");
2593 				peer->capa.peer.grestart.restart = 0;
2594 				peer->capa.peer.grestart.timeout = 0;
2595 				break;
2596 			}
2597 
2598 			if (ibuf_get_n16(&capabuf, &gr_header) == -1) {
2599  bad_gr_restart:
2600 				log_peer_warnx(&peer->conf,
2601 				    "Bad graceful restart capability");
2602 				peer->capa.peer.grestart.restart = 0;
2603 				peer->capa.peer.grestart.timeout = 0;
2604 				break;
2605 			}
2606 
2607 			peer->capa.peer.grestart.timeout =
2608 			    gr_header & CAPA_GR_TIMEMASK;
2609 			if (peer->capa.peer.grestart.timeout == 0) {
2610 				log_peer_warnx(&peer->conf, "Received "
2611 				    "graceful restart with zero timeout");
2612 				peer->capa.peer.grestart.restart = 0;
2613 				break;
2614 			}
2615 
2616 			while (ibuf_size(&capabuf) > 0) {
2617 				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2618 				    ibuf_get_n8(&capabuf, &safi) == -1 ||
2619 				    ibuf_get_n8(&capabuf, &flags) == -1)
2620 					goto bad_gr_restart;
2621 				if (afi2aid(afi, safi, &aid) == -1) {
2622 					log_peer_warnx(&peer->conf,
2623 					    "Received graceful restart capa: "
2624 					    " unknown AFI %u, safi %u pair",
2625 					    afi, safi);
2626 					continue;
2627 				}
2628 				peer->capa.peer.grestart.flags[aid] |=
2629 				    CAPA_GR_PRESENT;
2630 				if (flags & CAPA_GR_F_FLAG)
2631 					peer->capa.peer.grestart.flags[aid] |=
2632 					    CAPA_GR_FORWARD;
2633 				if (gr_header & CAPA_GR_R_FLAG)
2634 					peer->capa.peer.grestart.flags[aid] |=
2635 					    CAPA_GR_RESTART;
2636 				peer->capa.peer.grestart.restart = 2;
2637 			}
2638 			break;
2639 		case CAPA_AS4BYTE:
2640 			if (capa_len != 4 ||
2641 			    ibuf_get_n32(&capabuf, as) == -1) {
2642 				log_peer_warnx(&peer->conf,
2643 				    "Received bad AS4BYTE capability");
2644 				peer->capa.peer.as4byte = 0;
2645 				break;
2646 			}
2647 			if (*as == 0) {
2648 				log_peer_warnx(&peer->conf,
2649 				    "peer requests unacceptable AS %u", *as);
2650 				session_notification(peer, ERR_OPEN,
2651 				    ERR_OPEN_AS, NULL);
2652 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2653 				return (-1);
2654 			}
2655 			peer->capa.peer.as4byte = 1;
2656 			break;
2657 		case CAPA_ADD_PATH:
2658 			if (capa_len % 4 != 0) {
2659  bad_add_path:
2660 				log_peer_warnx(&peer->conf,
2661 				    "Received bad ADD-PATH capability");
2662 				memset(peer->capa.peer.add_path, 0,
2663 				    sizeof(peer->capa.peer.add_path));
2664 				break;
2665 			}
2666 			while (ibuf_size(&capabuf) > 0) {
2667 				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2668 				    ibuf_get_n8(&capabuf, &safi) == -1 ||
2669 				    ibuf_get_n8(&capabuf, &flags) == -1)
2670 					goto bad_add_path;
2671 				if (afi2aid(afi, safi, &aid) == -1) {
2672 					log_peer_warnx(&peer->conf,
2673 					    "Received ADD-PATH capa: "
2674 					    " unknown AFI %u, safi %u pair",
2675 					    afi, safi);
2676 					memset(peer->capa.peer.add_path, 0,
2677 					    sizeof(peer->capa.peer.add_path));
2678 					break;
2679 				}
2680 				if (flags & ~CAPA_AP_BIDIR) {
2681 					log_peer_warnx(&peer->conf,
2682 					    "Received ADD-PATH capa: "
2683 					    " bad flags %x", flags);
2684 					memset(peer->capa.peer.add_path, 0,
2685 					    sizeof(peer->capa.peer.add_path));
2686 					break;
2687 				}
2688 				peer->capa.peer.add_path[aid] = flags;
2689 			}
2690 			break;
2691 		case CAPA_ENHANCED_RR:
2692 			peer->capa.peer.enhanced_rr = 1;
2693 			break;
2694 		default:
2695 			break;
2696 		}
2697 	}
2698 
2699 	return (0);
2700 }
2701 
2702 int
2703 capa_neg_calc(struct peer *p)
2704 {
2705 	struct ibuf *ebuf;
2706 	uint8_t	i, hasmp = 0, capa_code, capa_len, capa_aid = 0;
2707 
2708 	/* a capability is accepted only if both sides announced it */
2709 
2710 	p->capa.neg.refresh =
2711 	    (p->capa.ann.refresh && p->capa.peer.refresh) != 0;
2712 	p->capa.neg.enhanced_rr =
2713 	    (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
2714 	p->capa.neg.as4byte =
2715 	    (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
2716 
2717 	/* MP: both side must agree on the AFI,SAFI pair */
2718 	for (i = AID_MIN; i < AID_MAX; i++) {
2719 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i])
2720 			p->capa.neg.mp[i] = 1;
2721 		else
2722 			p->capa.neg.mp[i] = 0;
2723 		if (p->capa.ann.mp[i])
2724 			hasmp = 1;
2725 	}
2726 	/* if no MP capability present default to IPv4 unicast mode */
2727 	if (!hasmp)
2728 		p->capa.neg.mp[AID_INET] = 1;
2729 
2730 	/*
2731 	 * graceful restart: the peer capabilities are of interest here.
2732 	 * It is necessary to compare the new values with the previous ones
2733 	 * and act accordingly. AFI/SAFI that are not part in the MP capability
2734 	 * are treated as not being present.
2735 	 * Also make sure that a flush happens if the session stopped
2736 	 * supporting graceful restart.
2737 	 */
2738 
2739 	for (i = AID_MIN; i < AID_MAX; i++) {
2740 		int8_t	negflags;
2741 
2742 		/* disable GR if the AFI/SAFI is not present */
2743 		if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2744 		    p->capa.neg.mp[i] == 0))
2745 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2746 		/* look at current GR state and decide what to do */
2747 		negflags = p->capa.neg.grestart.flags[i];
2748 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2749 		if (negflags & CAPA_GR_RESTARTING) {
2750 			if (p->capa.ann.grestart.restart != 0 &&
2751 			    p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) {
2752 				p->capa.neg.grestart.flags[i] |=
2753 				    CAPA_GR_RESTARTING;
2754 			} else {
2755 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2756 				    &i, sizeof(i)) == -1) {
2757 					log_peer_warnx(&p->conf,
2758 					    "imsg send failed");
2759 					return (-1);
2760 				}
2761 				log_peer_warnx(&p->conf, "graceful restart of "
2762 				    "%s, not restarted, flushing", aid2str(i));
2763 			}
2764 		}
2765 	}
2766 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2767 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2768 	if (p->capa.ann.grestart.restart == 0)
2769 		p->capa.neg.grestart.restart = 0;
2770 
2771 	/*
2772 	 * ADD-PATH: set only those bits where both sides agree.
2773 	 * For this compare our send bit with the recv bit from the peer
2774 	 * and vice versa.
2775 	 * The flags are stored from this systems view point.
2776 	 * At index 0 the flags are set if any per-AID flag is set.
2777 	 */
2778 	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2779 	for (i = AID_MIN; i < AID_MAX; i++) {
2780 		if (p->capa.neg.mp[i] == 0)
2781 			continue;
2782 		if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
2783 		    (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
2784 			p->capa.neg.add_path[i] |= CAPA_AP_RECV;
2785 			p->capa.neg.add_path[0] |= CAPA_AP_RECV;
2786 		}
2787 		if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
2788 		    (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
2789 			p->capa.neg.add_path[i] |= CAPA_AP_SEND;
2790 			p->capa.neg.add_path[0] |= CAPA_AP_SEND;
2791 		}
2792 	}
2793 
2794 	/*
2795 	 * Open policy: check that the policy is sensible.
2796 	 *
2797 	 * Make sure that the roles match and set the negotiated capability
2798 	 * to the role of the peer. So the RDE can inject the OTC attribute.
2799 	 * See RFC 9234, section 4.2.
2800 	 * These checks should only happen on ebgp sessions.
2801 	 */
2802 	if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 &&
2803 	    p->conf.ebgp) {
2804 		switch (p->conf.role) {
2805 		case ROLE_PROVIDER:
2806 			if (p->remote_role != ROLE_CUSTOMER)
2807 				goto policyfail;
2808 			break;
2809 		case ROLE_RS:
2810 			if (p->remote_role != ROLE_RS_CLIENT)
2811 				goto policyfail;
2812 			break;
2813 		case ROLE_RS_CLIENT:
2814 			if (p->remote_role != ROLE_RS)
2815 				goto policyfail;
2816 			break;
2817 		case ROLE_CUSTOMER:
2818 			if (p->remote_role != ROLE_PROVIDER)
2819 				goto policyfail;
2820 			break;
2821 		case ROLE_PEER:
2822 			if (p->remote_role != ROLE_PEER)
2823 				goto policyfail;
2824 			break;
2825 		default:
2826  policyfail:
2827 			log_peer_warnx(&p->conf, "open policy role mismatch: "
2828 			    "our role %s, their role %s",
2829 			    log_policy(p->conf.role),
2830 			    log_policy(p->remote_role));
2831 			session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2832 			return (-1);
2833 		}
2834 		p->capa.neg.policy = 1;
2835 	}
2836 
2837 	/* enforce presence of open policy role capability */
2838 	if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 &&
2839 	    p->conf.ebgp) {
2840 		log_peer_warnx(&p->conf, "open policy role enforced but "
2841 		    "not present");
2842 		session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2843 		return (-1);
2844 	}
2845 
2846 	/* enforce presence of other capabilities */
2847 	if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) {
2848 		capa_code = CAPA_REFRESH;
2849 		capa_len = 0;
2850 		goto fail;
2851 	}
2852 	if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) {
2853 		capa_code = CAPA_ENHANCED_RR;
2854 		capa_len = 0;
2855 		goto fail;
2856 	}
2857 	if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) {
2858 		capa_code = CAPA_AS4BYTE;
2859 		capa_len = 4;
2860 		goto fail;
2861 	}
2862 	if (p->capa.ann.grestart.restart == 2 &&
2863 	    p->capa.neg.grestart.restart == 0) {
2864 		capa_code = CAPA_RESTART;
2865 		capa_len = 2;
2866 		goto fail;
2867 	}
2868 	for (i = AID_MIN; i < AID_MAX; i++) {
2869 		if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) {
2870 			capa_code = CAPA_MP;
2871 			capa_len = 4;
2872 			capa_aid = i;
2873 			goto fail;
2874 		}
2875 	}
2876 
2877 	for (i = AID_MIN; i < AID_MAX; i++) {
2878 		if (p->capa.neg.mp[i] == 0)
2879 			continue;
2880 		if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) &&
2881 		    (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) {
2882 			capa_code = CAPA_ADD_PATH;
2883 			capa_len = 4;
2884 			capa_aid = i;
2885 			goto fail;
2886 		}
2887 		if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) &&
2888 		    (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) {
2889 			capa_code = CAPA_ADD_PATH;
2890 			capa_len = 4;
2891 			capa_aid = i;
2892 			goto fail;
2893 		}
2894 	}
2895 
2896 	return (0);
2897 
2898  fail:
2899 	if ((ebuf = ibuf_dynamic(2, 256)) == NULL)
2900 		return (-1);
2901 	/* best effort, no problem if it fails */
2902 	session_capa_add(ebuf, capa_code, capa_len);
2903 	if (capa_code == CAPA_MP)
2904 		session_capa_add_mp(ebuf, capa_aid);
2905 	else if (capa_code == CAPA_ADD_PATH)
2906 		session_capa_add_afi(ebuf, capa_aid, 0);
2907 	else if (capa_len > 0)
2908 		ibuf_add_zero(ebuf, capa_len);
2909 
2910 	session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf);
2911 	ibuf_free(ebuf);
2912 	return (-1);
2913 }
2914 
2915 void
2916 session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt)
2917 {
2918 	struct imsg		 imsg;
2919 	struct ibuf		 ibuf;
2920 	struct mrt		 xmrt;
2921 	struct route_refresh	 rr;
2922 	struct mrt		*mrt;
2923 	struct imsgbuf		*i;
2924 	struct peer		*p;
2925 	struct listen_addr	*la, *next, nla;
2926 	struct session_dependon	 sdon;
2927 	struct bgpd_config	 tconf;
2928 	size_t			 len;
2929 	uint32_t		 peerid;
2930 	int			 n, fd, depend_ok, restricted;
2931 	uint16_t		 t;
2932 	uint8_t			 aid, errcode, subcode;
2933 
2934 	while (imsgbuf) {
2935 		if ((n = imsg_get(imsgbuf, &imsg)) == -1)
2936 			fatal("session_dispatch_imsg: imsg_get error");
2937 
2938 		if (n == 0)
2939 			break;
2940 
2941 		peerid = imsg_get_id(&imsg);
2942 		switch (imsg_get_type(&imsg)) {
2943 		case IMSG_SOCKET_CONN:
2944 		case IMSG_SOCKET_CONN_CTL:
2945 			if (idx != PFD_PIPE_MAIN)
2946 				fatalx("reconf request not from parent");
2947 			if ((fd = imsg_get_fd(&imsg)) == -1) {
2948 				log_warnx("expected to receive imsg fd to "
2949 				    "RDE but didn't receive any");
2950 				break;
2951 			}
2952 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2953 				fatal(NULL);
2954 			imsg_init(i, fd);
2955 			if (imsg_get_type(&imsg) == IMSG_SOCKET_CONN) {
2956 				if (ibuf_rde) {
2957 					log_warnx("Unexpected imsg connection "
2958 					    "to RDE received");
2959 					msgbuf_clear(&ibuf_rde->w);
2960 					free(ibuf_rde);
2961 				}
2962 				ibuf_rde = i;
2963 			} else {
2964 				if (ibuf_rde_ctl) {
2965 					log_warnx("Unexpected imsg ctl "
2966 					    "connection to RDE received");
2967 					msgbuf_clear(&ibuf_rde_ctl->w);
2968 					free(ibuf_rde_ctl);
2969 				}
2970 				ibuf_rde_ctl = i;
2971 			}
2972 			break;
2973 		case IMSG_RECONF_CONF:
2974 			if (idx != PFD_PIPE_MAIN)
2975 				fatalx("reconf request not from parent");
2976 			if (imsg_get_data(&imsg, &tconf, sizeof(tconf)) == -1)
2977 				fatal("imsg_get_data");
2978 
2979 			nconf = new_config();
2980 			copy_config(nconf, &tconf);
2981 			pending_reconf = 1;
2982 			break;
2983 		case IMSG_RECONF_PEER:
2984 			if (idx != PFD_PIPE_MAIN)
2985 				fatalx("reconf request not from parent");
2986 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
2987 				fatal("new_peer");
2988 			if (imsg_get_data(&imsg, &p->conf, sizeof(p->conf)) ==
2989 			    -1)
2990 				fatal("imsg_get_data");
2991 			p->state = p->prev_state = STATE_NONE;
2992 			p->reconf_action = RECONF_REINIT;
2993 			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
2994 				fatalx("%s: peer tree is corrupt", __func__);
2995 			break;
2996 		case IMSG_RECONF_LISTENER:
2997 			if (idx != PFD_PIPE_MAIN)
2998 				fatalx("reconf request not from parent");
2999 			if (nconf == NULL)
3000 				fatalx("IMSG_RECONF_LISTENER but no config");
3001 			if (imsg_get_data(&imsg, &nla, sizeof(nla)) == -1)
3002 				fatal("imsg_get_data");
3003 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
3004 				if (!la_cmp(la, &nla))
3005 					break;
3006 
3007 			if (la == NULL) {
3008 				if (nla.reconf != RECONF_REINIT)
3009 					fatalx("king bula sez: "
3010 					    "expected REINIT");
3011 
3012 				if ((nla.fd = imsg_get_fd(&imsg)) == -1)
3013 					log_warnx("expected to receive fd for "
3014 					    "%s but didn't receive any",
3015 					    log_sockaddr((struct sockaddr *)
3016 					    &nla.sa, nla.sa_len));
3017 
3018 				la = calloc(1, sizeof(struct listen_addr));
3019 				if (la == NULL)
3020 					fatal(NULL);
3021 				memcpy(&la->sa, &nla.sa, sizeof(la->sa));
3022 				la->flags = nla.flags;
3023 				la->fd = nla.fd;
3024 				la->reconf = RECONF_REINIT;
3025 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
3026 				    entry);
3027 			} else {
3028 				if (nla.reconf != RECONF_KEEP)
3029 					fatalx("king bula sez: expected KEEP");
3030 				la->reconf = RECONF_KEEP;
3031 			}
3032 
3033 			break;
3034 		case IMSG_RECONF_CTRL:
3035 			if (idx != PFD_PIPE_MAIN)
3036 				fatalx("reconf request not from parent");
3037 
3038 			if (imsg_get_data(&imsg, &restricted,
3039 			    sizeof(restricted)) == -1)
3040 				fatal("imsg_get_data");
3041 			if ((fd = imsg_get_fd(&imsg)) == -1) {
3042 				log_warnx("expected to receive fd for control "
3043 				    "socket but didn't receive any");
3044 				break;
3045 			}
3046 			if (restricted) {
3047 				control_shutdown(rcsock);
3048 				rcsock = fd;
3049 			} else {
3050 				control_shutdown(csock);
3051 				csock = fd;
3052 			}
3053 			break;
3054 		case IMSG_RECONF_DRAIN:
3055 			switch (idx) {
3056 			case PFD_PIPE_ROUTE:
3057 				if (nconf != NULL)
3058 					fatalx("got unexpected %s from RDE",
3059 					    "IMSG_RECONF_DONE");
3060 				imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3061 				    -1, NULL, 0);
3062 				break;
3063 			case PFD_PIPE_MAIN:
3064 				if (nconf == NULL)
3065 					fatalx("got unexpected %s from parent",
3066 					    "IMSG_RECONF_DONE");
3067 				imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
3068 				    -1, NULL, 0);
3069 				break;
3070 			default:
3071 				fatalx("reconf request not from parent or RDE");
3072 			}
3073 			break;
3074 		case IMSG_RECONF_DONE:
3075 			if (idx != PFD_PIPE_MAIN)
3076 				fatalx("reconf request not from parent");
3077 			if (nconf == NULL)
3078 				fatalx("got IMSG_RECONF_DONE but no config");
3079 			copy_config(conf, nconf);
3080 			merge_peers(conf, nconf);
3081 
3082 			/* delete old listeners */
3083 			TAILQ_FOREACH_SAFE(la, conf->listen_addrs, entry,
3084 			    next) {
3085 				if (la->reconf == RECONF_NONE) {
3086 					log_info("not listening on %s any more",
3087 					    log_sockaddr((struct sockaddr *)
3088 					    &la->sa, la->sa_len));
3089 					TAILQ_REMOVE(conf->listen_addrs, la,
3090 					    entry);
3091 					close(la->fd);
3092 					free(la);
3093 				}
3094 			}
3095 
3096 			/* add new listeners */
3097 			TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
3098 			    entry);
3099 
3100 			setup_listeners(listener_cnt);
3101 			free_config(nconf);
3102 			nconf = NULL;
3103 			pending_reconf = 0;
3104 			log_info("SE reconfigured");
3105 			/*
3106 			 * IMSG_RECONF_DONE is sent when the RDE drained
3107 			 * the peer config sent in merge_peers().
3108 			 */
3109 			break;
3110 		case IMSG_SESSION_DEPENDON:
3111 			if (idx != PFD_PIPE_MAIN)
3112 				fatalx("IFINFO message not from parent");
3113 			if (imsg_get_data(&imsg, &sdon, sizeof(sdon)) == -1)
3114 				fatalx("DEPENDON imsg with wrong len");
3115 			depend_ok = sdon.depend_state;
3116 
3117 			RB_FOREACH(p, peer_head, &conf->peers)
3118 				if (!strcmp(p->conf.if_depend, sdon.ifname)) {
3119 					if (depend_ok && !p->depend_ok) {
3120 						p->depend_ok = depend_ok;
3121 						bgp_fsm(p, EVNT_START);
3122 					} else if (!depend_ok && p->depend_ok) {
3123 						p->depend_ok = depend_ok;
3124 						session_stop(p,
3125 						    ERR_CEASE_OTHER_CHANGE,
3126 						    NULL);
3127 					}
3128 				}
3129 			break;
3130 		case IMSG_MRT_OPEN:
3131 		case IMSG_MRT_REOPEN:
3132 			if (idx != PFD_PIPE_MAIN)
3133 				fatalx("mrt request not from parent");
3134 			if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3135 				log_warnx("mrt open, wrong imsg len");
3136 				break;
3137 			}
3138 
3139 			if ((xmrt.wbuf.fd = imsg_get_fd(&imsg)) == -1) {
3140 				log_warnx("expected to receive fd for mrt dump "
3141 				    "but didn't receive any");
3142 				break;
3143 			}
3144 
3145 			mrt = mrt_get(&mrthead, &xmrt);
3146 			if (mrt == NULL) {
3147 				/* new dump */
3148 				mrt = calloc(1, sizeof(struct mrt));
3149 				if (mrt == NULL)
3150 					fatal("session_dispatch_imsg");
3151 				memcpy(mrt, &xmrt, sizeof(struct mrt));
3152 				TAILQ_INIT(&mrt->wbuf.bufs);
3153 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
3154 			} else {
3155 				/* old dump reopened */
3156 				close(mrt->wbuf.fd);
3157 				mrt->wbuf.fd = xmrt.wbuf.fd;
3158 			}
3159 			break;
3160 		case IMSG_MRT_CLOSE:
3161 			if (idx != PFD_PIPE_MAIN)
3162 				fatalx("mrt request not from parent");
3163 			if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3164 				log_warnx("mrt close, wrong imsg len");
3165 				break;
3166 			}
3167 
3168 			mrt = mrt_get(&mrthead, &xmrt);
3169 			if (mrt != NULL)
3170 				mrt_done(mrt);
3171 			break;
3172 		case IMSG_CTL_KROUTE:
3173 		case IMSG_CTL_KROUTE_ADDR:
3174 		case IMSG_CTL_SHOW_NEXTHOP:
3175 		case IMSG_CTL_SHOW_INTERFACE:
3176 		case IMSG_CTL_SHOW_FIB_TABLES:
3177 		case IMSG_CTL_SHOW_RTR:
3178 		case IMSG_CTL_SHOW_TIMER:
3179 			if (idx != PFD_PIPE_MAIN)
3180 				fatalx("ctl kroute request not from parent");
3181 			control_imsg_relay(&imsg, NULL);
3182 			break;
3183 		case IMSG_CTL_SHOW_NEIGHBOR:
3184 			if (idx != PFD_PIPE_ROUTE_CTL)
3185 				fatalx("ctl rib request not from RDE");
3186 			p = getpeerbyid(conf, peerid);
3187 			control_imsg_relay(&imsg, p);
3188 			break;
3189 		case IMSG_CTL_SHOW_RIB:
3190 		case IMSG_CTL_SHOW_RIB_PREFIX:
3191 		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
3192 		case IMSG_CTL_SHOW_RIB_ATTR:
3193 		case IMSG_CTL_SHOW_RIB_MEM:
3194 		case IMSG_CTL_SHOW_NETWORK:
3195 		case IMSG_CTL_SHOW_FLOWSPEC:
3196 		case IMSG_CTL_SHOW_SET:
3197 			if (idx != PFD_PIPE_ROUTE_CTL)
3198 				fatalx("ctl rib request not from RDE");
3199 			control_imsg_relay(&imsg, NULL);
3200 			break;
3201 		case IMSG_CTL_END:
3202 		case IMSG_CTL_RESULT:
3203 			control_imsg_relay(&imsg, NULL);
3204 			break;
3205 		case IMSG_UPDATE:
3206 			if (idx != PFD_PIPE_ROUTE)
3207 				fatalx("update request not from RDE");
3208 			len = imsg_get_len(&imsg);
3209 			if (imsg_get_ibuf(&imsg, &ibuf) == -1 ||
3210 			    len > MAX_PKTSIZE - MSGSIZE_HEADER ||
3211 			    len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
3212 				log_warnx("RDE sent invalid update");
3213 			else
3214 				session_update(peerid, &ibuf);
3215 			break;
3216 		case IMSG_UPDATE_ERR:
3217 			if (idx != PFD_PIPE_ROUTE)
3218 				fatalx("update request not from RDE");
3219 			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3220 				log_warnx("no such peer: id=%u", peerid);
3221 				break;
3222 			}
3223 			if (imsg_get_ibuf(&imsg, &ibuf) == -1 ||
3224 			    ibuf_get_n8(&ibuf, &errcode) == -1 ||
3225 			    ibuf_get_n8(&ibuf, &subcode) == -1) {
3226 				log_warnx("RDE sent invalid notification");
3227 				break;
3228 			}
3229 
3230 			session_notification(p, errcode, subcode, &ibuf);
3231 			switch (errcode) {
3232 			case ERR_CEASE:
3233 				switch (subcode) {
3234 				case ERR_CEASE_MAX_PREFIX:
3235 				case ERR_CEASE_MAX_SENT_PREFIX:
3236 					t = p->conf.max_out_prefix_restart;
3237 					if (subcode == ERR_CEASE_MAX_PREFIX)
3238 						t = p->conf.max_prefix_restart;
3239 
3240 					bgp_fsm(p, EVNT_STOP);
3241 					if (t)
3242 						timer_set(&p->timers,
3243 						    Timer_IdleHold, 60 * t);
3244 					break;
3245 				default:
3246 					bgp_fsm(p, EVNT_CON_FATAL);
3247 					break;
3248 				}
3249 				break;
3250 			default:
3251 				bgp_fsm(p, EVNT_CON_FATAL);
3252 				break;
3253 			}
3254 			break;
3255 		case IMSG_REFRESH:
3256 			if (idx != PFD_PIPE_ROUTE)
3257 				fatalx("route refresh request not from RDE");
3258 			if (imsg_get_data(&imsg, &rr, sizeof(rr)) == -1) {
3259 				log_warnx("RDE sent invalid refresh msg");
3260 				break;
3261 			}
3262 			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3263 				log_warnx("no such peer: id=%u", peerid);
3264 				break;
3265 			}
3266 			if (rr.aid < AID_MIN || rr.aid >= AID_MAX)
3267 				fatalx("IMSG_REFRESH: bad AID");
3268 			session_rrefresh(p, rr.aid, rr.subtype);
3269 			break;
3270 		case IMSG_SESSION_RESTARTED:
3271 			if (idx != PFD_PIPE_ROUTE)
3272 				fatalx("session restart not from RDE");
3273 			if (imsg_get_data(&imsg, &aid, sizeof(aid)) == -1) {
3274 				log_warnx("RDE sent invalid restart msg");
3275 				break;
3276 			}
3277 			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3278 				log_warnx("no such peer: id=%u", peerid);
3279 				break;
3280 			}
3281 			if (aid < AID_MIN || aid >= AID_MAX)
3282 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
3283 			if (p->capa.neg.grestart.flags[aid] &
3284 			    CAPA_GR_RESTARTING) {
3285 				log_peer_warnx(&p->conf,
3286 				    "graceful restart of %s finished",
3287 				    aid2str(aid));
3288 				p->capa.neg.grestart.flags[aid] &=
3289 				    ~CAPA_GR_RESTARTING;
3290 				timer_stop(&p->timers, Timer_RestartTimeout);
3291 
3292 				/* signal back to RDE to cleanup stale routes */
3293 				if (imsg_rde(IMSG_SESSION_RESTARTED,
3294 				    peerid, &aid, sizeof(aid)) == -1)
3295 					fatal("imsg_compose: "
3296 					    "IMSG_SESSION_RESTARTED");
3297 			}
3298 			break;
3299 		default:
3300 			break;
3301 		}
3302 		imsg_free(&imsg);
3303 	}
3304 }
3305 
3306 int
3307 la_cmp(struct listen_addr *a, struct listen_addr *b)
3308 {
3309 	struct sockaddr_in	*in_a, *in_b;
3310 	struct sockaddr_in6	*in6_a, *in6_b;
3311 
3312 	if (a->sa.ss_family != b->sa.ss_family)
3313 		return (1);
3314 
3315 	switch (a->sa.ss_family) {
3316 	case AF_INET:
3317 		in_a = (struct sockaddr_in *)&a->sa;
3318 		in_b = (struct sockaddr_in *)&b->sa;
3319 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3320 			return (1);
3321 		if (in_a->sin_port != in_b->sin_port)
3322 			return (1);
3323 		break;
3324 	case AF_INET6:
3325 		in6_a = (struct sockaddr_in6 *)&a->sa;
3326 		in6_b = (struct sockaddr_in6 *)&b->sa;
3327 		if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3328 		    sizeof(struct in6_addr)))
3329 			return (1);
3330 		if (in6_a->sin6_port != in6_b->sin6_port)
3331 			return (1);
3332 		break;
3333 	default:
3334 		fatal("king bula sez: unknown address family");
3335 		/* NOTREACHED */
3336 	}
3337 
3338 	return (0);
3339 }
3340 
3341 struct peer *
3342 getpeerbydesc(struct bgpd_config *c, const char *descr)
3343 {
3344 	struct peer	*p, *res = NULL;
3345 	int		 match = 0;
3346 
3347 	RB_FOREACH(p, peer_head, &c->peers)
3348 		if (!strcmp(p->conf.descr, descr)) {
3349 			res = p;
3350 			match++;
3351 		}
3352 
3353 	if (match > 1)
3354 		log_info("neighbor description \"%s\" not unique, request "
3355 		    "aborted", descr);
3356 
3357 	if (match == 1)
3358 		return (res);
3359 	else
3360 		return (NULL);
3361 }
3362 
3363 struct peer *
3364 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
3365 {
3366 	struct bgpd_addr addr;
3367 	struct peer	*p, *newpeer, *loose = NULL;
3368 	uint32_t	 id;
3369 
3370 	sa2addr(ip, &addr, NULL);
3371 
3372 	/* we might want a more effective way to find peers by IP */
3373 	RB_FOREACH(p, peer_head, &c->peers)
3374 		if (!p->conf.template &&
3375 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3376 			return (p);
3377 
3378 	/* try template matching */
3379 	RB_FOREACH(p, peer_head, &c->peers)
3380 		if (p->conf.template &&
3381 		    p->conf.remote_addr.aid == addr.aid &&
3382 		    session_match_mask(p, &addr))
3383 			if (loose == NULL || loose->conf.remote_masklen <
3384 			    p->conf.remote_masklen)
3385 				loose = p;
3386 
3387 	if (loose != NULL) {
3388 		/* clone */
3389 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3390 			fatal(NULL);
3391 		memcpy(newpeer, loose, sizeof(struct peer));
3392 		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3393 			if (getpeerbyid(c, id) == NULL)	/* we found a free id */
3394 				break;
3395 		}
3396 		newpeer->template = loose;
3397 		session_template_clone(newpeer, ip, id, 0);
3398 		newpeer->state = newpeer->prev_state = STATE_NONE;
3399 		newpeer->reconf_action = RECONF_KEEP;
3400 		newpeer->rbuf = NULL;
3401 		newpeer->rpending = 0;
3402 		init_peer(newpeer);
3403 		bgp_fsm(newpeer, EVNT_START);
3404 		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3405 			fatalx("%s: peer tree is corrupt", __func__);
3406 		return (newpeer);
3407 	}
3408 
3409 	return (NULL);
3410 }
3411 
3412 struct peer *
3413 getpeerbyid(struct bgpd_config *c, uint32_t peerid)
3414 {
3415 	static struct peer lookup;
3416 
3417 	lookup.conf.id = peerid;
3418 
3419 	return RB_FIND(peer_head, &c->peers, &lookup);
3420 }
3421 
3422 int
3423 peer_matched(struct peer *p, struct ctl_neighbor *n)
3424 {
3425 	char *s;
3426 
3427 	if (n && n->addr.aid) {
3428 		if (memcmp(&p->conf.remote_addr, &n->addr,
3429 		    sizeof(p->conf.remote_addr)))
3430 			return 0;
3431 	} else if (n && n->descr[0]) {
3432 		s = n->is_group ? p->conf.group : p->conf.descr;
3433 		/* cannot trust n->descr to be properly terminated */
3434 		if (strncmp(s, n->descr, sizeof(n->descr)))
3435 			return 0;
3436 	}
3437 	return 1;
3438 }
3439 
3440 void
3441 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id,
3442     uint32_t as)
3443 {
3444 	struct bgpd_addr	remote_addr;
3445 
3446 	if (ip)
3447 		sa2addr(ip, &remote_addr, NULL);
3448 	else
3449 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3450 
3451 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3452 
3453 	p->conf.id = id;
3454 
3455 	if (as) {
3456 		p->conf.remote_as = as;
3457 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3458 		if (!p->conf.ebgp)
3459 			/* force enforce_as off for iBGP sessions */
3460 			p->conf.enforce_as = ENFORCE_AS_OFF;
3461 	}
3462 
3463 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3464 	switch (p->conf.remote_addr.aid) {
3465 	case AID_INET:
3466 		p->conf.remote_masklen = 32;
3467 		break;
3468 	case AID_INET6:
3469 		p->conf.remote_masklen = 128;
3470 		break;
3471 	}
3472 	p->conf.template = 0;
3473 }
3474 
3475 int
3476 session_match_mask(struct peer *p, struct bgpd_addr *a)
3477 {
3478 	struct bgpd_addr masked;
3479 
3480 	applymask(&masked, a, p->conf.remote_masklen);
3481 	if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0)
3482 		return (1);
3483 	return (0);
3484 }
3485 
3486 void
3487 session_down(struct peer *peer)
3488 {
3489 	memset(&peer->capa.neg, 0, sizeof(peer->capa.neg));
3490 	peer->stats.last_updown = getmonotime();
3491 	/*
3492 	 * session_down is called in the exit code path so check
3493 	 * if the RDE is still around, if not there is no need to
3494 	 * send the message.
3495 	 */
3496 	if (ibuf_rde == NULL)
3497 		return;
3498 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3499 		fatalx("imsg_compose error");
3500 }
3501 
3502 void
3503 session_up(struct peer *p)
3504 {
3505 	struct session_up	 sup;
3506 
3507 	/* clear last errors, now that the session is up */
3508 	p->stats.last_sent_errcode = 0;
3509 	p->stats.last_sent_suberr = 0;
3510 	p->stats.last_rcvd_errcode = 0;
3511 	p->stats.last_rcvd_suberr = 0;
3512 	memset(p->stats.last_reason, 0, sizeof(p->stats.last_reason));
3513 
3514 	if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3515 	    &p->conf, sizeof(p->conf)) == -1)
3516 		fatalx("imsg_compose error");
3517 
3518 	if (p->local.aid == AID_INET) {
3519 		sup.local_v4_addr = p->local;
3520 		sup.local_v6_addr = p->local_alt;
3521 	} else {
3522 		sup.local_v6_addr = p->local;
3523 		sup.local_v4_addr = p->local_alt;
3524 	}
3525 	sup.remote_addr = p->remote;
3526 	sup.if_scope = p->if_scope;
3527 
3528 	sup.remote_bgpid = p->remote_bgpid;
3529 	sup.short_as = p->short_as;
3530 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3531 	p->stats.last_updown = getmonotime();
3532 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3533 		fatalx("imsg_compose error");
3534 }
3535 
3536 int
3537 imsg_ctl_parent(struct imsg *imsg)
3538 {
3539 	return imsg_forward(ibuf_main, imsg);
3540 }
3541 
3542 int
3543 imsg_ctl_rde(struct imsg *imsg)
3544 {
3545 	if (ibuf_rde_ctl == NULL)
3546 		return (0);
3547 	/*
3548 	 * Use control socket to talk to RDE to bypass the queue of the
3549 	 * regular imsg socket.
3550 	 */
3551 	return imsg_forward(ibuf_rde_ctl, imsg);
3552 }
3553 
3554 int
3555 imsg_ctl_rde_msg(int type, uint32_t peerid, pid_t pid)
3556 {
3557 	if (ibuf_rde_ctl == NULL)
3558 		return (0);
3559 
3560 	/*
3561 	 * Use control socket to talk to RDE to bypass the queue of the
3562 	 * regular imsg socket.
3563 	 */
3564 	return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, NULL, 0);
3565 }
3566 
3567 int
3568 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen)
3569 {
3570 	if (ibuf_rde == NULL)
3571 		return (0);
3572 
3573 	return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen);
3574 }
3575 
3576 void
3577 session_demote(struct peer *p, int level)
3578 {
3579 	struct demote_msg	msg;
3580 
3581 	strlcpy(msg.demote_group, p->conf.demote_group,
3582 	    sizeof(msg.demote_group));
3583 	msg.level = level;
3584 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3585 	    &msg, sizeof(msg)) == -1)
3586 		fatalx("imsg_compose error");
3587 
3588 	p->demoted += level;
3589 }
3590 
3591 void
3592 session_stop(struct peer *peer, uint8_t subcode, const char *reason)
3593 {
3594 	struct ibuf *ibuf;
3595 
3596 	if (reason != NULL)
3597 		strlcpy(peer->conf.reason, reason, sizeof(peer->conf.reason));
3598 
3599 	ibuf = ibuf_dynamic(0, REASON_LEN);
3600 
3601 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3602 	    subcode == ERR_CEASE_ADMIN_RESET) &&
3603 	    reason != NULL && *reason != '\0' &&
3604 	    ibuf != NULL) {
3605 		if (ibuf_add_n8(ibuf, strlen(reason)) == -1 ||
3606 		    ibuf_add(ibuf, reason, strlen(reason))) {
3607 			log_peer_warnx(&peer->conf,
3608 			    "trying to send overly long shutdown reason");
3609 			ibuf_free(ibuf);
3610 			ibuf = NULL;
3611 		}
3612 	}
3613 	switch (peer->state) {
3614 	case STATE_OPENSENT:
3615 	case STATE_OPENCONFIRM:
3616 	case STATE_ESTABLISHED:
3617 		session_notification(peer, ERR_CEASE, subcode, ibuf);
3618 		break;
3619 	default:
3620 		/* session not open, no need to send notification */
3621 		if (subcode >= sizeof(suberr_cease_names) / sizeof(char *) ||
3622 		    suberr_cease_names[subcode] == NULL)
3623 			log_peer_warnx(&peer->conf, "session stop: %s, "
3624 			    "unknown subcode %u", errnames[ERR_CEASE], subcode);
3625 		else
3626 			log_peer_warnx(&peer->conf, "session stop: %s, %s",
3627 			    errnames[ERR_CEASE], suberr_cease_names[subcode]);
3628 		break;
3629 	}
3630 	ibuf_free(ibuf);
3631 	bgp_fsm(peer, EVNT_STOP);
3632 }
3633 
3634 void
3635 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3636 {
3637 	struct peer *p, *np, *next;
3638 
3639 	RB_FOREACH(p, peer_head, &c->peers) {
3640 		/* templates are handled specially */
3641 		if (p->template != NULL)
3642 			continue;
3643 		np = getpeerbyid(nc, p->conf.id);
3644 		if (np == NULL) {
3645 			p->reconf_action = RECONF_DELETE;
3646 			continue;
3647 		}
3648 
3649 		/* peer no longer uses TCP MD5SIG so deconfigure */
3650 		if (p->conf.auth.method == AUTH_MD5SIG &&
3651 		    np->conf.auth.method != AUTH_MD5SIG)
3652 			tcp_md5_del_listener(c, p);
3653 		else if (np->conf.auth.method == AUTH_MD5SIG)
3654 			tcp_md5_add_listener(c, np);
3655 
3656 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3657 		RB_REMOVE(peer_head, &nc->peers, np);
3658 		free(np);
3659 
3660 		p->reconf_action = RECONF_KEEP;
3661 
3662 		/* had demotion, is demoted, demote removed? */
3663 		if (p->demoted && !p->conf.demote_group[0])
3664 			session_demote(p, -1);
3665 
3666 		/* if session is not open then refresh pfkey data */
3667 		if (p->state < STATE_OPENSENT && !p->template)
3668 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3669 			    p->conf.id, 0, -1, NULL, 0);
3670 
3671 		/* sync the RDE in case we keep the peer */
3672 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3673 		    &p->conf, sizeof(struct peer_config)) == -1)
3674 			fatalx("imsg_compose error");
3675 
3676 		/* apply the config to all clones of a template */
3677 		if (p->conf.template) {
3678 			struct peer *xp;
3679 			RB_FOREACH(xp, peer_head, &c->peers) {
3680 				if (xp->template != p)
3681 					continue;
3682 				session_template_clone(xp, NULL, xp->conf.id,
3683 				    xp->conf.remote_as);
3684 				if (imsg_rde(IMSG_SESSION_ADD, xp->conf.id,
3685 				    &xp->conf, sizeof(xp->conf)) == -1)
3686 					fatalx("imsg_compose error");
3687 			}
3688 		}
3689 	}
3690 
3691 	if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1)
3692 		fatalx("imsg_compose error");
3693 
3694 	/* pfkeys of new peers already loaded by the parent process */
3695 	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3696 		RB_REMOVE(peer_head, &nc->peers, np);
3697 		if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3698 			fatalx("%s: peer tree is corrupt", __func__);
3699 		if (np->conf.auth.method == AUTH_MD5SIG)
3700 			tcp_md5_add_listener(c, np);
3701 	}
3702 }
3703