xref: /openbsd/usr.sbin/bgpd/bgpd.c (revision eafe309e)
1 /*	$OpenBSD: bgpd.c,v 1.254 2022/08/17 15:15:25 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/wait.h>
22 #include <netinet/in.h>
23 #include <arpa/inet.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <pwd.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <syslog.h>
34 #include <unistd.h>
35 
36 #include "bgpd.h"
37 #include "session.h"
38 #include "log.h"
39 #include "version.h"
40 
41 void		sighdlr(int);
42 __dead void	usage(void);
43 int		main(int, char *[]);
44 pid_t		start_child(enum bgpd_process, char *, int, int, int);
45 int		send_filterset(struct imsgbuf *, struct filter_set_head *);
46 int		reconfigure(char *, struct bgpd_config *);
47 int		send_config(struct bgpd_config *);
48 int		dispatch_imsg(struct imsgbuf *, int, struct bgpd_config *);
49 int		control_setup(struct bgpd_config *);
50 static void	getsockpair(int [2]);
51 int		imsg_send_sockets(struct imsgbuf *, struct imsgbuf *,
52 		    struct imsgbuf *);
53 void		bgpd_rtr_connect(struct rtr_config *);
54 void		bgpd_rtr_connect_done(int, struct bgpd_config *);
55 
56 int			 cflags;
57 volatile sig_atomic_t	 mrtdump;
58 volatile sig_atomic_t	 quit;
59 volatile sig_atomic_t	 reconfig;
60 pid_t			 reconfpid;
61 int			 reconfpending;
62 struct imsgbuf		*ibuf_se;
63 struct imsgbuf		*ibuf_rde;
64 struct imsgbuf		*ibuf_rtr;
65 struct rib_names	 ribnames = SIMPLEQ_HEAD_INITIALIZER(ribnames);
66 char			*cname;
67 char			*rcname;
68 
69 struct connect_elm {
70 	TAILQ_ENTRY(connect_elm)	entry;
71 	uint32_t			id;
72 	int				fd;
73 };
74 
75 TAILQ_HEAD( ,connect_elm)	connect_queue = \
76 				    TAILQ_HEAD_INITIALIZER(connect_queue);
77 u_int				connect_cnt;
78 #define MAX_CONNECT_CNT		32
79 
80 void
81 sighdlr(int sig)
82 {
83 	switch (sig) {
84 	case SIGTERM:
85 	case SIGINT:
86 		quit = 1;
87 		break;
88 	case SIGHUP:
89 		reconfig = 1;
90 		break;
91 	case SIGALRM:
92 	case SIGUSR1:
93 		mrtdump = 1;
94 		break;
95 	}
96 }
97 
98 __dead void
99 usage(void)
100 {
101 	extern char *__progname;
102 
103 	fprintf(stderr, "usage: %s [-cdnvV] [-D macro=value] [-f file]\n",
104 	    __progname);
105 	exit(1);
106 }
107 
108 #define PFD_PIPE_SESSION	0
109 #define PFD_PIPE_RDE		1
110 #define PFD_PIPE_RTR		2
111 #define PFD_SOCK_ROUTE		3
112 #define PFD_SOCK_PFKEY		4
113 #define PFD_CONNECT_START	5
114 #define MAX_TIMEOUT		3600
115 
116 int	 cmd_opts;
117 
118 int
119 main(int argc, char *argv[])
120 {
121 	struct bgpd_config	*conf;
122 	enum bgpd_process	 proc = PROC_MAIN;
123 	struct rde_rib		*rr;
124 	struct peer		*p;
125 	struct pollfd		*pfd = NULL;
126 	struct connect_elm	*ce;
127 	time_t			 timeout;
128 	pid_t			 se_pid = 0, rde_pid = 0, rtr_pid = 0, pid;
129 	char			*conffile;
130 	char			*saved_argv0;
131 	u_int			 pfd_elms = 0, npfd, i;
132 	int			 debug = 0;
133 	int			 rfd, keyfd;
134 	int			 ch, status;
135 	int			 pipe_m2s[2];
136 	int			 pipe_m2r[2];
137 	int			 pipe_m2roa[2];
138 
139 	conffile = CONFFILE;
140 
141 	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */
142 	log_procinit(log_procnames[PROC_MAIN]);
143 	log_setverbose(1);
144 
145 	saved_argv0 = argv[0];
146 	if (saved_argv0 == NULL)
147 		saved_argv0 = "bgpd";
148 
149 	while ((ch = getopt(argc, argv, "cdD:f:nRSTvV")) != -1) {
150 		switch (ch) {
151 		case 'c':
152 			cmd_opts |= BGPD_OPT_FORCE_DEMOTE;
153 			break;
154 		case 'd':
155 			debug = 1;
156 			break;
157 		case 'D':
158 			if (cmdline_symset(optarg) < 0)
159 				log_warnx("could not parse macro definition %s",
160 				    optarg);
161 			break;
162 		case 'f':
163 			conffile = optarg;
164 			break;
165 		case 'n':
166 			cmd_opts |= BGPD_OPT_NOACTION;
167 			break;
168 		case 'v':
169 			if (cmd_opts & BGPD_OPT_VERBOSE)
170 				cmd_opts |= BGPD_OPT_VERBOSE2;
171 			cmd_opts |= BGPD_OPT_VERBOSE;
172 			break;
173 		case 'R':
174 			proc = PROC_RDE;
175 			break;
176 		case 'S':
177 			proc = PROC_SE;
178 			break;
179 		case 'T':
180 			proc = PROC_RTR;
181 			break;
182 		case 'V':
183 			fprintf(stderr, "OpenBGPD %s\n", BGPD_VERSION);
184 			return 0;
185 		default:
186 			usage();
187 			/* NOTREACHED */
188 		}
189 	}
190 
191 	argc -= optind;
192 	argv += optind;
193 	if (argc > 0)
194 		usage();
195 
196 	if (cmd_opts & BGPD_OPT_NOACTION) {
197 		if ((conf = parse_config(conffile, NULL, NULL)) == NULL)
198 			exit(1);
199 
200 		if (cmd_opts & BGPD_OPT_VERBOSE)
201 			print_config(conf, &ribnames);
202 		else
203 			fprintf(stderr, "configuration OK\n");
204 
205 		while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
206 			SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
207 			free(rr);
208 		}
209 		free_config(conf);
210 		exit(0);
211 	}
212 
213 	switch (proc) {
214 	case PROC_MAIN:
215 		break;
216 	case PROC_RDE:
217 		rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
218 		/* NOTREACHED */
219 	case PROC_SE:
220 		session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
221 		/* NOTREACHED */
222 	case PROC_RTR:
223 		rtr_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
224 		/* NOTREACHED */
225 	}
226 
227 	if (geteuid())
228 		errx(1, "need root privileges");
229 
230 	if (getpwnam(BGPD_USER) == NULL)
231 		errx(1, "unknown user %s", BGPD_USER);
232 
233 	if ((conf = parse_config(conffile, NULL, NULL)) == NULL) {
234 		log_warnx("config file %s has errors", conffile);
235 		exit(1);
236 	}
237 
238 	if (prepare_listeners(conf) == -1)
239 		exit(1);
240 
241 	log_init(debug, LOG_DAEMON);
242 	log_setverbose(cmd_opts & BGPD_OPT_VERBOSE);
243 
244 	if (!debug)
245 		daemon(1, 0);
246 
247 	log_info("startup");
248 
249 	getsockpair(pipe_m2s);
250 	getsockpair(pipe_m2r);
251 	getsockpair(pipe_m2roa);
252 
253 	/* fork children */
254 	rde_pid = start_child(PROC_RDE, saved_argv0, pipe_m2r[1], debug,
255 	    cmd_opts & BGPD_OPT_VERBOSE);
256 	se_pid = start_child(PROC_SE, saved_argv0, pipe_m2s[1], debug,
257 	    cmd_opts & BGPD_OPT_VERBOSE);
258 	rtr_pid = start_child(PROC_RTR, saved_argv0, pipe_m2roa[1], debug,
259 	    cmd_opts & BGPD_OPT_VERBOSE);
260 
261 	signal(SIGTERM, sighdlr);
262 	signal(SIGINT, sighdlr);
263 	signal(SIGHUP, sighdlr);
264 	signal(SIGALRM, sighdlr);
265 	signal(SIGUSR1, sighdlr);
266 	signal(SIGPIPE, SIG_IGN);
267 
268 	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
269 	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
270 	    (ibuf_rtr = malloc(sizeof(struct imsgbuf))) == NULL)
271 		fatal(NULL);
272 	imsg_init(ibuf_se, pipe_m2s[0]);
273 	imsg_init(ibuf_rde, pipe_m2r[0]);
274 	imsg_init(ibuf_rtr, pipe_m2roa[0]);
275 	mrt_init(ibuf_rde, ibuf_se);
276 	if (kr_init(&rfd, conf->fib_priority) == -1)
277 		quit = 1;
278 	keyfd = pfkey_init();
279 
280 	/*
281 	 * rpath, read config file
282 	 * cpath, unlink control socket
283 	 * fattr, chmod on control socket
284 	 * wpath, needed if we are doing mrt dumps
285 	 *
286 	 * pledge placed here because kr_init() does a setsockopt on the
287 	 * routing socket thats not allowed at all.
288 	 */
289 #if 0
290 	/*
291 	 * disabled because we do ioctls on /dev/pf and SIOCSIFGATTR
292 	 * this needs some redesign of bgpd to be fixed.
293 	 */
294 BROKEN	if (pledge("stdio rpath wpath cpath fattr unix route recvfd sendfd",
295 	    NULL) == -1)
296 		fatal("pledge");
297 #endif
298 
299 	if (imsg_send_sockets(ibuf_se, ibuf_rde, ibuf_rtr))
300 		fatal("could not establish imsg links");
301 	/* control setup needs to happen late since it sends imsgs */
302 	if (control_setup(conf) == -1)
303 		quit = 1;
304 	if (send_config(conf) != 0)
305 		quit = 1;
306 	if (pftable_clear_all() != 0)
307 		quit = 1;
308 
309 	while (quit == 0) {
310 		if (pfd_elms < PFD_CONNECT_START + connect_cnt) {
311 			struct pollfd *newp;
312 
313 			if ((newp = reallocarray(pfd,
314 			    PFD_CONNECT_START + connect_cnt,
315 			    sizeof(struct pollfd))) == NULL) {
316 				log_warn("could not resize pfd from %u -> %u"
317 				    " entries", pfd_elms, PFD_CONNECT_START +
318 				    connect_cnt);
319 				fatalx("exiting");
320 			}
321 			pfd = newp;
322 			pfd_elms = PFD_CONNECT_START + connect_cnt;
323 		}
324 		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
325 
326 		timeout = mrt_timeout(conf->mrt);
327 
328 		pfd[PFD_SOCK_ROUTE].fd = rfd;
329 		pfd[PFD_SOCK_ROUTE].events = POLLIN;
330 
331 		pfd[PFD_SOCK_PFKEY].fd = keyfd;
332 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
333 
334 		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
335 		set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
336 		set_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr);
337 
338 		npfd = PFD_CONNECT_START;
339 		TAILQ_FOREACH(ce, &connect_queue, entry) {
340 			pfd[npfd].fd = ce->fd;
341 			pfd[npfd++].events = POLLOUT;
342 			if (npfd > pfd_elms)
343 				fatalx("polli pfd overflow");
344 		}
345 
346 		if (timeout < 0 || timeout > MAX_TIMEOUT)
347 			timeout = MAX_TIMEOUT;
348 		if (poll(pfd, npfd, timeout * 1000) == -1) {
349 			if (errno != EINTR) {
350 				log_warn("poll error");
351 				quit = 1;
352 			}
353 			goto next_loop;
354 		}
355 
356 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
357 			log_warnx("main: Lost connection to SE");
358 			msgbuf_clear(&ibuf_se->w);
359 			free(ibuf_se);
360 			ibuf_se = NULL;
361 			quit = 1;
362 		} else {
363 			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION, conf) ==
364 			    -1)
365 				quit = 1;
366 		}
367 
368 		if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
369 			log_warnx("main: Lost connection to RDE");
370 			msgbuf_clear(&ibuf_rde->w);
371 			free(ibuf_rde);
372 			ibuf_rde = NULL;
373 			quit = 1;
374 		} else {
375 			if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) == -1)
376 				quit = 1;
377 		}
378 
379 		if (handle_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr) == -1) {
380 			log_warnx("main: Lost connection to RTR");
381 			msgbuf_clear(&ibuf_rtr->w);
382 			free(ibuf_rtr);
383 			ibuf_rtr = NULL;
384 			quit = 1;
385 		} else {
386 			if (dispatch_imsg(ibuf_rtr, PFD_PIPE_RTR, conf) == -1)
387 				quit = 1;
388 		}
389 
390 		if (pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
391 			if (kr_dispatch_msg() == -1)
392 				quit = 1;
393 		}
394 
395 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
396 			if (pfkey_read(keyfd, NULL) == -1) {
397 				log_warnx("pfkey_read failed, exiting...");
398 				quit = 1;
399 			}
400 		}
401 
402 		for (i = PFD_CONNECT_START; i < npfd; i++)
403 			if (pfd[i].revents != 0)
404 				bgpd_rtr_connect_done(pfd[i].fd, conf);
405 
406  next_loop:
407 		if (reconfig) {
408 			u_int	error;
409 
410 			reconfig = 0;
411 			switch (reconfigure(conffile, conf)) {
412 			case -1:	/* fatal error */
413 				quit = 1;
414 				break;
415 			case 0:		/* all OK */
416 				error = 0;
417 				break;
418 			case 2:
419 				log_info("previous reload still running");
420 				error = CTL_RES_PENDING;
421 				break;
422 			default:	/* parse error */
423 				log_warnx("config file %s has errors, "
424 				    "not reloading", conffile);
425 				error = CTL_RES_PARSE_ERROR;
426 				break;
427 			}
428 			if (reconfpid != 0) {
429 				send_imsg_session(IMSG_CTL_RESULT, reconfpid,
430 				    &error, sizeof(error));
431 				reconfpid = 0;
432 			}
433 		}
434 
435 		if (mrtdump) {
436 			mrtdump = 0;
437 			mrt_handler(conf->mrt);
438 		}
439 	}
440 
441 	/* close pipes */
442 	if (ibuf_se) {
443 		msgbuf_clear(&ibuf_se->w);
444 		close(ibuf_se->fd);
445 		free(ibuf_se);
446 		ibuf_se = NULL;
447 	}
448 	if (ibuf_rde) {
449 		msgbuf_clear(&ibuf_rde->w);
450 		close(ibuf_rde->fd);
451 		free(ibuf_rde);
452 		ibuf_rde = NULL;
453 	}
454 	if (ibuf_rtr) {
455 		msgbuf_clear(&ibuf_rtr->w);
456 		close(ibuf_rtr->fd);
457 		free(ibuf_rtr);
458 		ibuf_rtr = NULL;
459 	}
460 
461 	/* cleanup kernel data structures */
462 	carp_demote_shutdown();
463 	kr_shutdown();
464 	pftable_clear_all();
465 
466 	RB_FOREACH(p, peer_head, &conf->peers)
467 		pfkey_remove(p);
468 
469 	while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
470 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
471 		free(rr);
472 	}
473 	free_config(conf);
474 
475 	log_debug("waiting for children to terminate");
476 	do {
477 		pid = wait(&status);
478 		if (pid == -1) {
479 			if (errno != EINTR && errno != ECHILD)
480 				fatal("wait");
481 		} else if (WIFSIGNALED(status)) {
482 			char *name = "unknown process";
483 			if (pid == rde_pid)
484 				name = "route decision engine";
485 			else if (pid == se_pid)
486 				name = "session engine";
487 			else if (pid == rtr_pid)
488 				name = "rtr engine";
489 			log_warnx("%s terminated; signal %d", name,
490 				WTERMSIG(status));
491 		}
492 	} while (pid != -1 || (pid == -1 && errno == EINTR));
493 
494 	free(rcname);
495 	free(cname);
496 
497 	log_info("terminating");
498 	return (0);
499 }
500 
501 pid_t
502 start_child(enum bgpd_process p, char *argv0, int fd, int debug, int verbose)
503 {
504 	char *argv[5];
505 	int argc = 0;
506 	pid_t pid;
507 
508 	switch (pid = fork()) {
509 	case -1:
510 		fatal("cannot fork");
511 	case 0:
512 		break;
513 	default:
514 		close(fd);
515 		return (pid);
516 	}
517 
518 	if (fd != 3) {
519 		if (dup2(fd, 3) == -1)
520 			fatal("cannot setup imsg fd");
521 	} else if (fcntl(fd, F_SETFD, 0) == -1)
522 		fatal("cannot setup imsg fd");
523 
524 	argv[argc++] = argv0;
525 	switch (p) {
526 	case PROC_MAIN:
527 		fatalx("Can not start main process");
528 	case PROC_RDE:
529 		argv[argc++] = "-R";
530 		break;
531 	case PROC_SE:
532 		argv[argc++] = "-S";
533 		break;
534 	case PROC_RTR:
535 		argv[argc++] = "-T";
536 		break;
537 	}
538 	if (debug)
539 		argv[argc++] = "-d";
540 	if (verbose)
541 		argv[argc++] = "-v";
542 	argv[argc++] = NULL;
543 
544 	execvp(argv0, argv);
545 	fatal("execvp");
546 }
547 
548 int
549 send_filterset(struct imsgbuf *i, struct filter_set_head *set)
550 {
551 	struct filter_set	*s;
552 
553 	TAILQ_FOREACH(s, set, entry)
554 		if (imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s,
555 		    sizeof(struct filter_set)) == -1)
556 			return (-1);
557 	return (0);
558 }
559 
560 int
561 reconfigure(char *conffile, struct bgpd_config *conf)
562 {
563 	struct bgpd_config	*new_conf;
564 
565 	if (reconfpending)
566 		return (2);
567 
568 	log_info("rereading config");
569 	if ((new_conf = parse_config(conffile, &conf->peers,
570 	    &conf->rtrs)) == NULL)
571 		return (1);
572 
573 	merge_config(conf, new_conf);
574 
575 	if (prepare_listeners(conf) == -1) {
576 		return (1);
577 	}
578 
579 	if (control_setup(conf) == -1) {
580 		return (1);
581 	}
582 
583 	return send_config(conf);
584 }
585 
586 int
587 send_config(struct bgpd_config *conf)
588 {
589 	struct peer		*p;
590 	struct filter_rule	*r;
591 	struct listen_addr	*la;
592 	struct rde_rib		*rr;
593 	struct l3vpn		*vpn;
594 	struct as_set		*aset;
595 	struct prefixset	*ps;
596 	struct prefixset_item	*psi, *npsi;
597 	struct roa		*roa, *nroa;
598 	struct rtr_config	*rtr;
599 
600 	reconfpending = 3;	/* one per child */
601 
602 	expand_networks(conf, &conf->networks);
603 	SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry)
604 		expand_networks(conf, &vpn->net_l);
605 
606 	cflags = conf->flags;
607 
608 	/* start reconfiguration */
609 	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
610 	    conf, sizeof(*conf)) == -1)
611 		return (-1);
612 	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
613 	    conf, sizeof(*conf)) == -1)
614 		return (-1);
615 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_CONF, 0, 0, -1,
616 	    conf, sizeof(*conf)) == -1)
617 		return (-1);
618 
619 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
620 		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
621 		    la, sizeof(*la)) == -1)
622 			return (-1);
623 		la->fd = -1;
624 	}
625 
626 	/* adjust fib syncing on reload */
627 	ktable_preload();
628 
629 	/* RIBs for the RDE */
630 	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
631 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
632 		if (ktable_update(rr->rtableid, rr->name, rr->flags) == -1) {
633 			log_warnx("failed to load routing table %d",
634 			    rr->rtableid);
635 			return (-1);
636 		}
637 		if (imsg_compose(ibuf_rde, IMSG_RECONF_RIB, 0, 0, -1,
638 		    rr, sizeof(*rr)) == -1)
639 			return (-1);
640 		free(rr);
641 	}
642 
643 	/* send peer list to the SE */
644 	RB_FOREACH(p, peer_head, &conf->peers) {
645 		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
646 		    &p->conf, sizeof(p->conf)) == -1)
647 			return (-1);
648 
649 		if (p->reconf_action == RECONF_REINIT)
650 			if (pfkey_establish(p) == -1)
651 				log_peer_warnx(&p->conf, "pfkey setup failed");
652 	}
653 
654 	/* networks go via kroute to the RDE */
655 	kr_net_reload(conf->default_tableid, 0, &conf->networks);
656 
657 	/* prefixsets for filters in the RDE */
658 	while ((ps = SIMPLEQ_FIRST(&conf->prefixsets)) != NULL) {
659 		SIMPLEQ_REMOVE_HEAD(&conf->prefixsets, entry);
660 		if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET, 0, 0, -1,
661 		    ps->name, sizeof(ps->name)) == -1)
662 			return (-1);
663 		RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
664 			RB_REMOVE(prefixset_tree, &ps->psitems, psi);
665 			if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
666 			    0, 0, -1, psi, sizeof(*psi)) == -1)
667 				return (-1);
668 			free(psi);
669 		}
670 		free(ps);
671 	}
672 
673 	/* originsets for filters in the RDE */
674 	while ((ps = SIMPLEQ_FIRST(&conf->originsets)) != NULL) {
675 		SIMPLEQ_REMOVE_HEAD(&conf->originsets, entry);
676 		if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
677 		    ps->name, sizeof(ps->name)) == -1)
678 			return (-1);
679 		RB_FOREACH_SAFE(roa, roa_tree, &ps->roaitems, nroa) {
680 			RB_REMOVE(roa_tree, &ps->roaitems, roa);
681 			if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
682 			    -1, roa, sizeof(*roa)) == -1)
683 				return (-1);
684 			free(roa);
685 		}
686 		free(ps);
687 	}
688 
689 	/* roa table and rtr config are sent to the RTR engine */
690 	RB_FOREACH_SAFE(roa, roa_tree, &conf->roa, nroa) {
691 		RB_REMOVE(roa_tree, &conf->roa, roa);
692 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ROA_ITEM, 0, 0,
693 		    -1, roa, sizeof(*roa)) == -1)
694 			return (-1);
695 		free(roa);
696 	}
697 	SIMPLEQ_FOREACH(rtr, &conf->rtrs, entry) {
698 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_RTR_CONFIG, rtr->id,
699 		    0, -1, rtr->descr, sizeof(rtr->descr)) == -1)
700 			return (-1);
701 	}
702 
703 	/* as-sets for filters in the RDE */
704 	while ((aset = SIMPLEQ_FIRST(&conf->as_sets)) != NULL) {
705 		struct ibuf *wbuf;
706 		uint32_t *as;
707 		size_t i, l, n;
708 
709 		SIMPLEQ_REMOVE_HEAD(&conf->as_sets, entry);
710 
711 		as = set_get(aset->set, &n);
712 		if ((wbuf = imsg_create(ibuf_rde, IMSG_RECONF_AS_SET, 0, 0,
713 		    sizeof(n) + sizeof(aset->name))) == NULL)
714 			return -1;
715 		if (imsg_add(wbuf, &n, sizeof(n)) == -1 ||
716 		    imsg_add(wbuf, aset->name, sizeof(aset->name)) == -1)
717 			return -1;
718 		imsg_close(ibuf_rde, wbuf);
719 
720 		for (i = 0; i < n; i += l) {
721 			l = (n - i > 1024 ? 1024 : n - i);
722 			if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_ITEMS,
723 			    0, 0, -1, as + i, l * sizeof(*as)) == -1)
724 				return -1;
725 		}
726 
727 		if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_DONE, 0, 0, -1,
728 		    NULL, 0) == -1)
729 			return -1;
730 
731 		set_free(aset->set);
732 		free(aset);
733 	}
734 
735 	/* filters for the RDE */
736 	while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
737 		TAILQ_REMOVE(conf->filters, r, entry);
738 		if (send_filterset(ibuf_rde, &r->set) == -1)
739 			return (-1);
740 		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
741 		    r, sizeof(struct filter_rule)) == -1)
742 			return (-1);
743 		filterset_free(&r->set);
744 		free(r);
745 	}
746 
747 	while ((vpn = SIMPLEQ_FIRST(&conf->l3vpns)) != NULL) {
748 		SIMPLEQ_REMOVE_HEAD(&conf->l3vpns, entry);
749 		if (ktable_update(vpn->rtableid, vpn->descr, vpn->flags) ==
750 		    -1) {
751 			log_warnx("failed to load routing table %d",
752 			    vpn->rtableid);
753 			return (-1);
754 		}
755 		/* networks go via kroute to the RDE */
756 		kr_net_reload(vpn->rtableid, vpn->rd, &vpn->net_l);
757 
758 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN, 0, 0, -1,
759 		    vpn, sizeof(*vpn)) == -1)
760 			return (-1);
761 
762 		/* export targets */
763 		if (send_filterset(ibuf_rde, &vpn->export) == -1)
764 			return (-1);
765 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
766 		    -1, NULL, 0) == -1)
767 			return (-1);
768 		filterset_free(&vpn->export);
769 
770 		/* import targets */
771 		if (send_filterset(ibuf_rde, &vpn->import) == -1)
772 			return (-1);
773 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
774 		    -1, NULL, 0) == -1)
775 			return (-1);
776 		filterset_free(&vpn->import);
777 
778 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_DONE, 0, 0,
779 		    -1, NULL, 0) == -1)
780 			return (-1);
781 
782 		free(vpn);
783 	}
784 
785 	/* send a drain message to know when all messages where processed */
786 	if (imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
787 		return (-1);
788 	if (imsg_compose(ibuf_rde, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
789 		return (-1);
790 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
791 		return (-1);
792 
793 	/* mrt changes can be sent out of bound */
794 	mrt_reconfigure(conf->mrt);
795 	return (0);
796 }
797 
798 int
799 dispatch_imsg(struct imsgbuf *ibuf, int idx, struct bgpd_config *conf)
800 {
801 	struct imsg		 imsg;
802 	struct peer		*p;
803 	struct rtr_config	*r;
804 	ssize_t			 n;
805 	u_int			 rtableid;
806 	int			 rv, verbose;
807 
808 	rv = 0;
809 	while (ibuf) {
810 		if ((n = imsg_get(ibuf, &imsg)) == -1)
811 			return (-1);
812 
813 		if (n == 0)
814 			break;
815 
816 		switch (imsg.hdr.type) {
817 		case IMSG_KROUTE_CHANGE:
818 			if (idx != PFD_PIPE_RDE)
819 				log_warnx("route request not from RDE");
820 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
821 			    sizeof(struct kroute_full))
822 				log_warnx("wrong imsg len");
823 			else if (kr_change(imsg.hdr.peerid, imsg.data))
824 				rv = -1;
825 			break;
826 		case IMSG_KROUTE_DELETE:
827 			if (idx != PFD_PIPE_RDE)
828 				log_warnx("route request not from RDE");
829 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
830 			    sizeof(struct kroute_full))
831 				log_warnx("wrong imsg len");
832 			else if (kr_delete(imsg.hdr.peerid, imsg.data))
833 				rv = -1;
834 			break;
835 		case IMSG_KROUTE_FLUSH:
836 			if (idx != PFD_PIPE_RDE)
837 				log_warnx("route request not from RDE");
838 			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
839 				log_warnx("wrong imsg len");
840 			else if (kr_flush(imsg.hdr.peerid))
841 				rv = -1;
842 			break;
843 		case IMSG_NEXTHOP_ADD:
844 			if (idx != PFD_PIPE_RDE)
845 				log_warnx("nexthop request not from RDE");
846 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
847 			    sizeof(struct bgpd_addr))
848 				log_warnx("wrong imsg len");
849 			else {
850 				rtableid = conf->default_tableid;
851 				if (kr_nexthop_add(rtableid, imsg.data) == -1)
852 					rv = -1;
853 			}
854 			break;
855 		case IMSG_NEXTHOP_REMOVE:
856 			if (idx != PFD_PIPE_RDE)
857 				log_warnx("nexthop request not from RDE");
858 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
859 			    sizeof(struct bgpd_addr))
860 				log_warnx("wrong imsg len");
861 			else {
862 				rtableid = conf->default_tableid;
863 				kr_nexthop_delete(rtableid, imsg.data);
864 			}
865 			break;
866 		case IMSG_PFTABLE_ADD:
867 			if (idx != PFD_PIPE_RDE)
868 				log_warnx("pftable request not from RDE");
869 			else
870 				if (imsg.hdr.len != IMSG_HEADER_SIZE +
871 				    sizeof(struct pftable_msg))
872 					log_warnx("wrong imsg len");
873 				else if (pftable_addr_add(imsg.data) != 0)
874 					rv = -1;
875 			break;
876 		case IMSG_PFTABLE_REMOVE:
877 			if (idx != PFD_PIPE_RDE)
878 				log_warnx("pftable request not from RDE");
879 			else
880 				if (imsg.hdr.len != IMSG_HEADER_SIZE +
881 				    sizeof(struct pftable_msg))
882 					log_warnx("wrong imsg len");
883 				else if (pftable_addr_remove(imsg.data) != 0)
884 					rv = -1;
885 			break;
886 		case IMSG_PFTABLE_COMMIT:
887 			if (idx != PFD_PIPE_RDE)
888 				log_warnx("pftable request not from RDE");
889 			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
890 				log_warnx("wrong imsg len");
891 			else if (pftable_commit() != 0)
892 				rv = -1;
893 			break;
894 		case IMSG_PFKEY_RELOAD:
895 			if (idx != PFD_PIPE_SESSION) {
896 				log_warnx("pfkey reload request not from SE");
897 				break;
898 			}
899 			p = getpeerbyid(conf, imsg.hdr.peerid);
900 			if (p != NULL) {
901 				if (pfkey_establish(p) == -1)
902 					log_peer_warnx(&p->conf,
903 					    "pfkey setup failed");
904 			}
905 			break;
906 		case IMSG_CTL_RELOAD:
907 			if (idx != PFD_PIPE_SESSION)
908 				log_warnx("reload request not from SE");
909 			else {
910 				reconfig = 1;
911 				reconfpid = imsg.hdr.pid;
912 				if (imsg.hdr.len == IMSG_HEADER_SIZE +
913 				    REASON_LEN && ((char *)imsg.data)[0])
914 					log_info("reload due to: %s",
915 					    log_reason(imsg.data));
916 			}
917 			break;
918 		case IMSG_CTL_FIB_COUPLE:
919 			if (idx != PFD_PIPE_SESSION)
920 				log_warnx("couple request not from SE");
921 			else
922 				kr_fib_couple(imsg.hdr.peerid);
923 			break;
924 		case IMSG_CTL_FIB_DECOUPLE:
925 			if (idx != PFD_PIPE_SESSION)
926 				log_warnx("decouple request not from SE");
927 			else
928 				kr_fib_decouple(imsg.hdr.peerid);
929 			break;
930 		case IMSG_CTL_KROUTE:
931 		case IMSG_CTL_KROUTE_ADDR:
932 		case IMSG_CTL_SHOW_NEXTHOP:
933 		case IMSG_CTL_SHOW_INTERFACE:
934 		case IMSG_CTL_SHOW_FIB_TABLES:
935 			if (idx != PFD_PIPE_SESSION)
936 				log_warnx("kroute request not from SE");
937 			else
938 				kr_show_route(&imsg);
939 			break;
940 		case IMSG_SESSION_DEPENDON:
941 			if (idx != PFD_PIPE_SESSION)
942 				log_warnx("DEPENDON request not from SE");
943 			else if (imsg.hdr.len != IMSG_HEADER_SIZE + IFNAMSIZ)
944 				log_warnx("DEPENDON request with wrong len");
945 			else
946 				kr_ifinfo(imsg.data);
947 			break;
948 		case IMSG_DEMOTE:
949 			if (idx != PFD_PIPE_SESSION)
950 				log_warnx("demote request not from SE");
951 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
952 			    sizeof(struct demote_msg))
953 				log_warnx("DEMOTE request with wrong len");
954 			else {
955 				struct demote_msg	*msg;
956 
957 				msg = imsg.data;
958 				carp_demote_set(msg->demote_group, msg->level);
959 			}
960 			break;
961 		case IMSG_CTL_LOG_VERBOSE:
962 			/* already checked by SE */
963 			memcpy(&verbose, imsg.data, sizeof(verbose));
964 			log_setverbose(verbose);
965 			break;
966 		case IMSG_RECONF_DONE:
967 			if (reconfpending == 0) {
968 				log_warnx("unexpected RECONF_DONE received");
969 				break;
970 			}
971 			if (idx == PFD_PIPE_SESSION) {
972 				imsg_compose(ibuf_rtr, IMSG_RECONF_DONE, 0,
973 				    0, -1, NULL, 0);
974 			} else if (idx == PFD_PIPE_RTR) {
975 				imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0,
976 				    0, -1, NULL, 0);
977 
978 				/* finally fix kroute information */
979 				ktable_postload();
980 
981 				/* redistribute list needs to be reloaded too */
982 				kr_reload();
983 			}
984 			reconfpending--;
985 			break;
986 		case IMSG_RECONF_DRAIN:
987 			if (reconfpending == 0) {
988 				log_warnx("unexpected RECONF_DRAIN received");
989 				break;
990 			}
991 			reconfpending--;
992 			if (reconfpending == 0) {
993 				/*
994 				 * SE goes first to bring templated neighbors
995 				 * in sync.
996 				 */
997 				imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0,
998 				    0, -1, NULL, 0);
999 				reconfpending = 3; /* expecting 2 DONE msg */
1000 			}
1001 			break;
1002 		case IMSG_SOCKET_CONN:
1003 			if (idx != PFD_PIPE_RTR) {
1004 				log_warnx("connect request not from RTR");
1005 			} else {
1006 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1007 					if (imsg.hdr.peerid == r->id)
1008 						break;
1009 				}
1010 				if (r == NULL)
1011 					log_warnx("unknown rtr id %d",
1012 					    imsg.hdr.peerid);
1013 				else
1014 					bgpd_rtr_connect(r);
1015 			}
1016 			break;
1017 		case IMSG_CTL_SHOW_RTR:
1018 			if (idx == PFD_PIPE_SESSION) {
1019 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1020 					imsg_compose(ibuf_rtr, imsg.hdr.type,
1021 					    r->id, imsg.hdr.pid, -1, NULL, 0);
1022 				}
1023 				imsg_compose(ibuf_rtr, IMSG_CTL_END,
1024 				    0, imsg.hdr.pid, -1, NULL, 0);
1025 			} else if (imsg.hdr.len != IMSG_HEADER_SIZE +
1026 			    sizeof(struct ctl_show_rtr)) {
1027 				log_warnx("IMSG_CTL_SHOW_RTR with wrong len");
1028 			} else if (idx == PFD_PIPE_RTR) {
1029 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1030 					if (imsg.hdr.peerid == r->id)
1031 						break;
1032 				}
1033 				if (r != NULL) {
1034 					struct ctl_show_rtr *msg;
1035 					msg = imsg.data;
1036 					strlcpy(msg->descr, r->descr,
1037 					    sizeof(msg->descr));
1038 					msg->local_addr = r->local_addr;
1039 					msg->remote_addr = r->remote_addr;
1040 					msg->remote_port = r->remote_port;
1041 
1042 					imsg_compose(ibuf_se, imsg.hdr.type,
1043 					    imsg.hdr.peerid, imsg.hdr.pid,
1044 					    -1, imsg.data,
1045 					    imsg.hdr.len - IMSG_HEADER_SIZE);
1046 				}
1047 			}
1048 			break;
1049 		case IMSG_CTL_END:
1050 		case IMSG_CTL_SHOW_TIMER:
1051 			if (idx != PFD_PIPE_RTR) {
1052 				log_warnx("connect request not from RTR");
1053 				break;
1054 			}
1055 			imsg_compose(ibuf_se, imsg.hdr.type, imsg.hdr.peerid,
1056 			    imsg.hdr.pid, -1, imsg.data,
1057 			    imsg.hdr.len - IMSG_HEADER_SIZE);
1058 			break;
1059 		default:
1060 			break;
1061 		}
1062 		imsg_free(&imsg);
1063 		if (rv != 0)
1064 			return (rv);
1065 	}
1066 	return (0);
1067 }
1068 
1069 void
1070 send_nexthop_update(struct kroute_nexthop *msg)
1071 {
1072 	char	*gw = NULL;
1073 
1074 	if (msg->gateway.aid)
1075 		if (asprintf(&gw, ": via %s",
1076 		    log_addr(&msg->gateway)) == -1) {
1077 			log_warn("send_nexthop_update");
1078 			quit = 1;
1079 		}
1080 
1081 	log_debug("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
1082 	    msg->valid ? "valid" : "invalid",
1083 	    msg->connected ? ": directly connected" : "",
1084 	    msg->gateway.aid ? gw : "");
1085 
1086 	free(gw);
1087 
1088 	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
1089 	    msg, sizeof(struct kroute_nexthop)) == -1)
1090 		quit = 1;
1091 }
1092 
1093 void
1094 send_imsg_session(int type, pid_t pid, void *data, uint16_t datalen)
1095 {
1096 	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
1097 }
1098 
1099 int
1100 send_network(int type, struct network_config *net, struct filter_set_head *h)
1101 {
1102 	if (quit)
1103 		return (0);
1104 	if (imsg_compose(ibuf_rde, type, 0, 0, -1, net,
1105 	    sizeof(struct network_config)) == -1)
1106 		return (-1);
1107 	/* networks that get deleted don't need to send the filter set */
1108 	if (type == IMSG_NETWORK_REMOVE)
1109 		return (0);
1110 	if (send_filterset(ibuf_rde, h) == -1)
1111 		return (-1);
1112 	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
1113 		return (-1);
1114 
1115 	return (0);
1116 }
1117 
1118 /*
1119  * Return true if a route can be used for nexthop resolution.
1120  */
1121 int
1122 bgpd_oknexthop(struct kroute_full *kf)
1123 {
1124 	if (kf->flags & F_BGPD)
1125 		return ((cflags & BGPD_FLAG_NEXTHOP_BGP) != 0);
1126 
1127 	if (kf->prefixlen == 0)
1128 		return ((cflags & BGPD_FLAG_NEXTHOP_DEFAULT) != 0);
1129 
1130 	/* any other route is fine */
1131 	return (1);
1132 }
1133 
1134 int
1135 control_setup(struct bgpd_config *conf)
1136 {
1137 	int fd, restricted;
1138 
1139 	/* control socket is outside chroot */
1140 	if (!cname || strcmp(cname, conf->csock)) {
1141 		if (cname) {
1142 			free(cname);
1143 		}
1144 		if ((cname = strdup(conf->csock)) == NULL)
1145 			fatal("strdup");
1146 		if (control_check(cname) == -1)
1147 			return (-1);
1148 		if ((fd = control_init(0, cname)) == -1)
1149 			fatalx("control socket setup failed");
1150 		if (control_listen(fd) == -1)
1151 			fatalx("control socket setup failed");
1152 		restricted = 0;
1153 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1154 		    &restricted, sizeof(restricted)) == -1)
1155 			return (-1);
1156 	}
1157 	if (!conf->rcsock) {
1158 		/* remove restricted socket */
1159 		free(rcname);
1160 		rcname = NULL;
1161 	} else if (!rcname || strcmp(rcname, conf->rcsock)) {
1162 		if (rcname) {
1163 			free(rcname);
1164 		}
1165 		if ((rcname = strdup(conf->rcsock)) == NULL)
1166 			fatal("strdup");
1167 		if (control_check(rcname) == -1)
1168 			return (-1);
1169 		if ((fd = control_init(1, rcname)) == -1)
1170 			fatalx("control socket setup failed");
1171 		if (control_listen(fd) == -1)
1172 			fatalx("control socket setup failed");
1173 		restricted = 1;
1174 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1175 		    &restricted, sizeof(restricted)) == -1)
1176 			return (-1);
1177 	}
1178 	return (0);
1179 }
1180 
1181 void
1182 set_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1183 {
1184 	if (i == NULL || i->fd == -1) {
1185 		pfd->fd = -1;
1186 		return;
1187 	}
1188 	pfd->fd = i->fd;
1189 	pfd->events = POLLIN;
1190 	if (i->w.queued > 0)
1191 		pfd->events |= POLLOUT;
1192 }
1193 
1194 int
1195 handle_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1196 {
1197 	ssize_t n;
1198 
1199 	if (i == NULL)
1200 		return (0);
1201 
1202 	if (pfd->revents & POLLOUT)
1203 		if (msgbuf_write(&i->w) <= 0 && errno != EAGAIN) {
1204 			log_warn("imsg write error");
1205 			close(i->fd);
1206 			i->fd = -1;
1207 			return (-1);
1208 		}
1209 
1210 	if (pfd->revents & POLLIN) {
1211 		if ((n = imsg_read(i)) == -1 && errno != EAGAIN) {
1212 			log_warn("imsg read error");
1213 			close(i->fd);
1214 			i->fd = -1;
1215 			return (-1);
1216 		}
1217 		if (n == 0) {
1218 			log_warnx("peer closed imsg connection");
1219 			close(i->fd);
1220 			i->fd = -1;
1221 			return (-1);
1222 		}
1223 	}
1224 	return (0);
1225 }
1226 
1227 static void
1228 getsockpair(int pipe[2])
1229 {
1230 	int bsize, i;
1231 
1232 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
1233 	    PF_UNSPEC, pipe) == -1)
1234 		fatal("socketpair");
1235 
1236 	for (i = 0; i < 2; i++) {
1237 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1238 			if (setsockopt(pipe[i], SOL_SOCKET, SO_RCVBUF,
1239 			    &bsize, sizeof(bsize)) == -1) {
1240 				if (errno != ENOBUFS)
1241 					fatal("setsockopt(SO_RCVBUF, %d)",
1242 					    bsize);
1243 				log_warn("setsockopt(SO_RCVBUF, %d)", bsize);
1244 				continue;
1245 			}
1246 			break;
1247 		}
1248 	}
1249 	for (i = 0; i < 2; i++) {
1250 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1251 			if (setsockopt(pipe[i], SOL_SOCKET, SO_SNDBUF,
1252 			    &bsize, sizeof(bsize)) == -1) {
1253 				if (errno != ENOBUFS)
1254 					fatal("setsockopt(SO_SNDBUF, %d)",
1255 					    bsize);
1256 				log_warn("setsockopt(SO_SNDBUF, %d)", bsize);
1257 				continue;
1258 			}
1259 			break;
1260 		}
1261 	}
1262 }
1263 
1264 int
1265 imsg_send_sockets(struct imsgbuf *se, struct imsgbuf *rde, struct imsgbuf *roa)
1266 {
1267 	int pipe_s2r[2];
1268 	int pipe_s2r_ctl[2];
1269 	int pipe_r2r[2];
1270 
1271 	getsockpair(pipe_s2r);
1272 	getsockpair(pipe_s2r_ctl);
1273 	getsockpair(pipe_r2r);
1274 
1275 	if (imsg_compose(se, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[0],
1276 	    NULL, 0) == -1)
1277 		return (-1);
1278 	if (imsg_compose(rde, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[1],
1279 	    NULL, 0) == -1)
1280 		return (-1);
1281 
1282 	if (imsg_compose(se, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[0],
1283 	    NULL, 0) == -1)
1284 		return (-1);
1285 	if (imsg_compose(rde, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[1],
1286 	    NULL, 0) == -1)
1287 		return (-1);
1288 
1289 	if (imsg_compose(roa, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[0],
1290 	    NULL, 0) == -1)
1291 		return (-1);
1292 	if (imsg_compose(rde, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[1],
1293 	    NULL, 0) == -1)
1294 		return (-1);
1295 
1296 	return (0);
1297 }
1298 
1299 void
1300 bgpd_rtr_connect(struct rtr_config *r)
1301 {
1302 	struct connect_elm *ce;
1303 	struct sockaddr *sa;
1304 	socklen_t len;
1305 
1306 	if (connect_cnt >= MAX_CONNECT_CNT) {
1307 		log_warnx("rtr %s: too many concurrent connection requests",
1308 		    r->descr);
1309 		return;
1310 	}
1311 
1312 	if ((ce = calloc(1, sizeof(*ce))) == NULL) {
1313 		log_warn("rtr %s", r->descr);
1314 		return;
1315 	}
1316 
1317 	ce->id = r->id;
1318 	ce->fd = socket(aid2af(r->remote_addr.aid),
1319 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP);
1320 	if (ce->fd == -1) {
1321 		log_warn("rtr %s", r->descr);
1322 		free(ce);
1323 		return;
1324 	}
1325 
1326 	if ((sa = addr2sa(&r->local_addr, 0, &len)) != NULL) {
1327 		if (bind(ce->fd, sa, len) == -1) {
1328 			log_warn("rtr %s: bind to %s", r->descr,
1329 			    log_addr(&r->local_addr));
1330 			close(ce->fd);
1331 			free(ce);
1332 			return;
1333 		}
1334 	}
1335 
1336 	sa = addr2sa(&r->remote_addr, r->remote_port, &len);
1337 	if (connect(ce->fd, sa, len) == -1) {
1338 		if (errno != EINPROGRESS) {
1339 			log_warn("rtr %s: connect to %s:%u", r->descr,
1340 			    log_addr(&r->remote_addr), r->remote_port);
1341 			close(ce->fd);
1342 			free(ce);
1343 			return;
1344 		}
1345 		TAILQ_INSERT_TAIL(&connect_queue, ce, entry);
1346 		connect_cnt++;
1347 		return;
1348 	}
1349 
1350 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1351 	free(ce);
1352 }
1353 
1354 void
1355 bgpd_rtr_connect_done(int fd, struct bgpd_config *conf)
1356 {
1357 	struct rtr_config *r;
1358 	struct connect_elm *ce;
1359 	int error = 0;
1360 	socklen_t len;
1361 
1362 	TAILQ_FOREACH(ce, &connect_queue, entry) {
1363 		if (ce->fd == fd)
1364 			break;
1365 	}
1366 	if (ce == NULL)
1367 		fatalx("connect entry not found");
1368 
1369 	TAILQ_REMOVE(&connect_queue, ce, entry);
1370 	connect_cnt--;
1371 
1372 	SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1373 		if (ce->id == r->id)
1374 			break;
1375 	}
1376 	if (r == NULL) {
1377 		log_warnx("rtr id %d no longer exists", ce->id);
1378 		goto fail;
1379 	}
1380 
1381 	len = sizeof(error);
1382 	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) == -1) {
1383 		log_warn("rtr %s: getsockopt SO_ERROR", r->descr);
1384 		goto fail;
1385 	}
1386 
1387 	if (error != 0) {
1388 		errno = error;
1389 		log_warn("rtr %s: connect to %s:%u", r->descr,
1390 		    log_addr(&r->remote_addr), r->remote_port);
1391 		goto fail;
1392 	}
1393 
1394 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1395 	free(ce);
1396 	return;
1397 
1398 fail:
1399 	close(fd);
1400 	free(ce);
1401 }
1402