xref: /openbsd/usr.sbin/bgpd/bgpd.c (revision 25bccace)
1 /*	$OpenBSD: bgpd.c,v 1.246 2022/06/15 10:10:03 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/wait.h>
22 #include <netinet/in.h>
23 #include <arpa/inet.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <pwd.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <syslog.h>
34 #include <unistd.h>
35 
36 #include "bgpd.h"
37 #include "session.h"
38 #include "log.h"
39 #include "version.h"
40 
41 void		sighdlr(int);
42 __dead void	usage(void);
43 int		main(int, char *[]);
44 pid_t		start_child(enum bgpd_process, char *, int, int, int);
45 int		send_filterset(struct imsgbuf *, struct filter_set_head *);
46 int		reconfigure(char *, struct bgpd_config *);
47 int		send_config(struct bgpd_config *);
48 int		dispatch_imsg(struct imsgbuf *, int, struct bgpd_config *);
49 int		control_setup(struct bgpd_config *);
50 static void	getsockpair(int [2]);
51 int		imsg_send_sockets(struct imsgbuf *, struct imsgbuf *,
52 		    struct imsgbuf *);
53 void		bgpd_rtr_connect(struct rtr_config *);
54 void		bgpd_rtr_connect_done(int, struct bgpd_config *);
55 
56 int			 cflags;
57 volatile sig_atomic_t	 mrtdump;
58 volatile sig_atomic_t	 quit;
59 volatile sig_atomic_t	 reconfig;
60 pid_t			 reconfpid;
61 int			 reconfpending;
62 struct imsgbuf		*ibuf_se;
63 struct imsgbuf		*ibuf_rde;
64 struct imsgbuf		*ibuf_rtr;
65 struct rib_names	 ribnames = SIMPLEQ_HEAD_INITIALIZER(ribnames);
66 char			*cname;
67 char			*rcname;
68 
69 struct connect_elm {
70 	TAILQ_ENTRY(connect_elm)	entry;
71 	uint32_t			id;
72 	int				fd;
73 };
74 
75 TAILQ_HEAD( ,connect_elm)	connect_queue = \
76 				    TAILQ_HEAD_INITIALIZER(connect_queue);
77 u_int				connect_cnt;
78 #define MAX_CONNECT_CNT		32
79 
80 void
81 sighdlr(int sig)
82 {
83 	switch (sig) {
84 	case SIGTERM:
85 	case SIGINT:
86 		quit = 1;
87 		break;
88 	case SIGHUP:
89 		reconfig = 1;
90 		break;
91 	case SIGALRM:
92 	case SIGUSR1:
93 		mrtdump = 1;
94 		break;
95 	}
96 }
97 
98 __dead void
99 usage(void)
100 {
101 	extern char *__progname;
102 
103 	fprintf(stderr, "usage: %s [-cdnvV] [-D macro=value] [-f file]\n",
104 	    __progname);
105 	exit(1);
106 }
107 
108 #define PFD_PIPE_SESSION	0
109 #define PFD_PIPE_RDE		1
110 #define PFD_PIPE_RTR		2
111 #define PFD_SOCK_ROUTE		3
112 #define PFD_SOCK_PFKEY		4
113 #define PFD_CONNECT_START	5
114 #define MAX_TIMEOUT		3600
115 
116 int	 cmd_opts;
117 
118 int
119 main(int argc, char *argv[])
120 {
121 	struct bgpd_config	*conf;
122 	enum bgpd_process	 proc = PROC_MAIN;
123 	struct rde_rib		*rr;
124 	struct peer		*p;
125 	struct pollfd		*pfd = NULL;
126 	struct connect_elm	*ce;
127 	time_t			 timeout;
128 	pid_t			 se_pid = 0, rde_pid = 0, rtr_pid = 0, pid;
129 	char			*conffile;
130 	char			*saved_argv0;
131 	u_int			 pfd_elms = 0, npfd, i;
132 	int			 debug = 0;
133 	int			 rfd, keyfd;
134 	int			 ch, status;
135 	int			 pipe_m2s[2];
136 	int			 pipe_m2r[2];
137 	int			 pipe_m2roa[2];
138 
139 	conffile = CONFFILE;
140 
141 	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */
142 	log_procinit(log_procnames[PROC_MAIN]);
143 	log_setverbose(1);
144 
145 	saved_argv0 = argv[0];
146 	if (saved_argv0 == NULL)
147 		saved_argv0 = "bgpd";
148 
149 	while ((ch = getopt(argc, argv, "cdD:f:nRSTvV")) != -1) {
150 		switch (ch) {
151 		case 'c':
152 			cmd_opts |= BGPD_OPT_FORCE_DEMOTE;
153 			break;
154 		case 'd':
155 			debug = 1;
156 			break;
157 		case 'D':
158 			if (cmdline_symset(optarg) < 0)
159 				log_warnx("could not parse macro definition %s",
160 				    optarg);
161 			break;
162 		case 'f':
163 			conffile = optarg;
164 			break;
165 		case 'n':
166 			cmd_opts |= BGPD_OPT_NOACTION;
167 			break;
168 		case 'v':
169 			if (cmd_opts & BGPD_OPT_VERBOSE)
170 				cmd_opts |= BGPD_OPT_VERBOSE2;
171 			cmd_opts |= BGPD_OPT_VERBOSE;
172 			break;
173 		case 'R':
174 			proc = PROC_RDE;
175 			break;
176 		case 'S':
177 			proc = PROC_SE;
178 			break;
179 		case 'T':
180 			proc = PROC_RTR;
181 			break;
182 		case 'V':
183 			fprintf(stderr, "OpenBGPD %s\n", BGPD_VERSION);
184 			return 0;
185 		default:
186 			usage();
187 			/* NOTREACHED */
188 		}
189 	}
190 
191 	argc -= optind;
192 	argv += optind;
193 	if (argc > 0)
194 		usage();
195 
196 	if (cmd_opts & BGPD_OPT_NOACTION) {
197 		if ((conf = parse_config(conffile, NULL, NULL)) == NULL)
198 			exit(1);
199 
200 		if (cmd_opts & BGPD_OPT_VERBOSE)
201 			print_config(conf, &ribnames);
202 		else
203 			fprintf(stderr, "configuration OK\n");
204 
205 		while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
206 			SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
207 			free(rr);
208 		}
209 		free_config(conf);
210 		exit(0);
211 	}
212 
213 	switch (proc) {
214 	case PROC_MAIN:
215 		break;
216 	case PROC_RDE:
217 		rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
218 		/* NOTREACHED */
219 	case PROC_SE:
220 		session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
221 		/* NOTREACHED */
222 	case PROC_RTR:
223 		rtr_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
224 		/* NOTREACHED */
225 	}
226 
227 	if (geteuid())
228 		errx(1, "need root privileges");
229 
230 	if (getpwnam(BGPD_USER) == NULL)
231 		errx(1, "unknown user %s", BGPD_USER);
232 
233 	if ((conf = parse_config(conffile, NULL, NULL)) == NULL) {
234 		log_warnx("config file %s has errors", conffile);
235 		exit(1);
236 	}
237 
238 	if (prepare_listeners(conf) == -1)
239 		exit(1);
240 
241 	log_init(debug, LOG_DAEMON);
242 	log_setverbose(cmd_opts & BGPD_OPT_VERBOSE);
243 
244 	if (!debug)
245 		daemon(1, 0);
246 
247 	log_info("startup");
248 
249 	getsockpair(pipe_m2s);
250 	getsockpair(pipe_m2r);
251 	getsockpair(pipe_m2roa);
252 
253 	/* fork children */
254 	rde_pid = start_child(PROC_RDE, saved_argv0, pipe_m2r[1], debug,
255 	    cmd_opts & BGPD_OPT_VERBOSE);
256 	se_pid = start_child(PROC_SE, saved_argv0, pipe_m2s[1], debug,
257 	    cmd_opts & BGPD_OPT_VERBOSE);
258 	rtr_pid = start_child(PROC_RTR, saved_argv0, pipe_m2roa[1], debug,
259 	    cmd_opts & BGPD_OPT_VERBOSE);
260 
261 	signal(SIGTERM, sighdlr);
262 	signal(SIGINT, sighdlr);
263 	signal(SIGHUP, sighdlr);
264 	signal(SIGALRM, sighdlr);
265 	signal(SIGUSR1, sighdlr);
266 	signal(SIGPIPE, SIG_IGN);
267 
268 	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
269 	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
270 	    (ibuf_rtr = malloc(sizeof(struct imsgbuf))) == NULL)
271 		fatal(NULL);
272 	imsg_init(ibuf_se, pipe_m2s[0]);
273 	imsg_init(ibuf_rde, pipe_m2r[0]);
274 	imsg_init(ibuf_rtr, pipe_m2roa[0]);
275 	mrt_init(ibuf_rde, ibuf_se);
276 	if (kr_init(&rfd, conf->fib_priority) == -1)
277 		quit = 1;
278 	keyfd = pfkey_init();
279 
280 	/*
281 	 * rpath, read config file
282 	 * cpath, unlink control socket
283 	 * fattr, chmod on control socket
284 	 * wpath, needed if we are doing mrt dumps
285 	 *
286 	 * pledge placed here because kr_init() does a setsockopt on the
287 	 * routing socket thats not allowed at all.
288 	 */
289 #if 0
290 	/*
291 	 * disabled because we do ioctls on /dev/pf and SIOCSIFGATTR
292 	 * this needs some redesign of bgpd to be fixed.
293 	 */
294 BROKEN	if (pledge("stdio rpath wpath cpath fattr unix route recvfd sendfd",
295 	    NULL) == -1)
296 		fatal("pledge");
297 #endif
298 
299 	if (imsg_send_sockets(ibuf_se, ibuf_rde, ibuf_rtr))
300 		fatal("could not establish imsg links");
301 	/* control setup needs to happen late since it sends imsgs */
302 	if (control_setup(conf) == -1)
303 		quit = 1;
304 	if (send_config(conf) != 0)
305 		quit = 1;
306 	if (pftable_clear_all() != 0)
307 		quit = 1;
308 
309 	while (quit == 0) {
310 		if (pfd_elms < PFD_CONNECT_START + connect_cnt) {
311 			struct pollfd *newp;
312 
313 			if ((newp = reallocarray(pfd,
314 			    PFD_CONNECT_START + connect_cnt,
315 			    sizeof(struct pollfd))) == NULL) {
316 				log_warn("could not resize pfd from %u -> %u"
317 				    " entries", pfd_elms, PFD_CONNECT_START +
318 				    connect_cnt);
319 				fatalx("exiting");
320 			}
321 			pfd = newp;
322 			pfd_elms = PFD_CONNECT_START + connect_cnt;
323 		}
324 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
325 
326 		timeout = mrt_timeout(conf->mrt);
327 
328 		pfd[PFD_SOCK_ROUTE].fd = rfd;
329 		pfd[PFD_SOCK_ROUTE].events = POLLIN;
330 
331 		pfd[PFD_SOCK_PFKEY].fd = keyfd;
332 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
333 
334 		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
335 		set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
336 		set_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr);
337 
338 		npfd = PFD_CONNECT_START;
339 		TAILQ_FOREACH(ce, &connect_queue, entry) {
340 			pfd[npfd].fd = ce->fd;
341 			pfd[npfd++].events = POLLOUT;
342 			if (npfd > pfd_elms)
343 				fatalx("polli pfd overflow");
344 		}
345 
346 		if (timeout < 0 || timeout > MAX_TIMEOUT)
347 			timeout = MAX_TIMEOUT;
348 		if (poll(pfd, npfd, timeout * 1000) == -1) {
349 			if (errno != EINTR) {
350 				log_warn("poll error");
351 				quit = 1;
352 			}
353 			goto next_loop;
354 		}
355 
356 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
357 			log_warnx("main: Lost connection to SE");
358 			msgbuf_clear(&ibuf_se->w);
359 			free(ibuf_se);
360 			ibuf_se = NULL;
361 			quit = 1;
362 		} else {
363 			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION, conf) ==
364 			    -1)
365 				quit = 1;
366 		}
367 
368 		if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
369 			log_warnx("main: Lost connection to RDE");
370 			msgbuf_clear(&ibuf_rde->w);
371 			free(ibuf_rde);
372 			ibuf_rde = NULL;
373 			quit = 1;
374 		} else {
375 			if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) == -1)
376 				quit = 1;
377 		}
378 
379 		if (handle_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr) == -1) {
380 			log_warnx("main: Lost connection to RTR");
381 			msgbuf_clear(&ibuf_rtr->w);
382 			free(ibuf_rtr);
383 			ibuf_rtr = NULL;
384 			quit = 1;
385 		} else {
386 			if (dispatch_imsg(ibuf_rtr, PFD_PIPE_RTR, conf) == -1)
387 				quit = 1;
388 		}
389 
390 		if (pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
391 			if (kr_dispatch_msg() == -1)
392 				quit = 1;
393 		}
394 
395 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
396 			if (pfkey_read(keyfd, NULL) == -1) {
397 				log_warnx("pfkey_read failed, exiting...");
398 				quit = 1;
399 			}
400 		}
401 
402 		for (i = PFD_CONNECT_START; i < npfd; i++)
403 			if (pfd[i].revents != 0)
404 				bgpd_rtr_connect_done(pfd[i].fd, conf);
405 
406  next_loop:
407 		if (reconfig) {
408 			u_int	error;
409 
410 			reconfig = 0;
411 			switch (reconfigure(conffile, conf)) {
412 			case -1:	/* fatal error */
413 				quit = 1;
414 				break;
415 			case 0:		/* all OK */
416 				error = 0;
417 				break;
418 			case 2:
419 				log_info("previous reload still running");
420 				error = CTL_RES_PENDING;
421 				break;
422 			default:	/* parse error */
423 				log_warnx("config file %s has errors, "
424 				    "not reloading", conffile);
425 				error = CTL_RES_PARSE_ERROR;
426 				break;
427 			}
428 			if (reconfpid != 0) {
429 				send_imsg_session(IMSG_CTL_RESULT, reconfpid,
430 				    &error, sizeof(error));
431 				reconfpid = 0;
432 			}
433 		}
434 
435 		if (mrtdump) {
436 			mrtdump = 0;
437 			mrt_handler(conf->mrt);
438 		}
439 	}
440 
441 	/* close pipes */
442 	if (ibuf_se) {
443 		msgbuf_clear(&ibuf_se->w);
444 		close(ibuf_se->fd);
445 		free(ibuf_se);
446 		ibuf_se = NULL;
447 	}
448 	if (ibuf_rde) {
449 		msgbuf_clear(&ibuf_rde->w);
450 		close(ibuf_rde->fd);
451 		free(ibuf_rde);
452 		ibuf_rde = NULL;
453 	}
454 	if (ibuf_rtr) {
455 		msgbuf_clear(&ibuf_rtr->w);
456 		close(ibuf_rtr->fd);
457 		free(ibuf_rtr);
458 		ibuf_rtr = NULL;
459 	}
460 
461 	/* cleanup kernel data structures */
462 	carp_demote_shutdown();
463 	kr_shutdown();
464 	pftable_clear_all();
465 
466 	RB_FOREACH(p, peer_head, &conf->peers)
467 		pfkey_remove(p);
468 
469 	while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
470 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
471 		free(rr);
472 	}
473 	free_config(conf);
474 
475 	log_debug("waiting for children to terminate");
476 	do {
477 		pid = wait(&status);
478 		if (pid == -1) {
479 			if (errno != EINTR && errno != ECHILD)
480 				fatal("wait");
481 		} else if (WIFSIGNALED(status)) {
482 			char *name = "unknown process";
483 			if (pid == rde_pid)
484 				name = "route decision engine";
485 			else if (pid == se_pid)
486 				name = "session engine";
487 			else if (pid == rtr_pid)
488 				name = "rtr engine";
489 			log_warnx("%s terminated; signal %d", name,
490 				WTERMSIG(status));
491 		}
492 	} while (pid != -1 || (pid == -1 && errno == EINTR));
493 
494 	free(rcname);
495 	free(cname);
496 
497 	log_info("terminating");
498 	return (0);
499 }
500 
501 pid_t
502 start_child(enum bgpd_process p, char *argv0, int fd, int debug, int verbose)
503 {
504 	char *argv[5];
505 	int argc = 0;
506 	pid_t pid;
507 
508 	switch (pid = fork()) {
509 	case -1:
510 		fatal("cannot fork");
511 	case 0:
512 		break;
513 	default:
514 		close(fd);
515 		return (pid);
516 	}
517 
518 	if (fd != 3) {
519 		if (dup2(fd, 3) == -1)
520 			fatal("cannot setup imsg fd");
521 	} else if (fcntl(fd, F_SETFD, 0) == -1)
522 		fatal("cannot setup imsg fd");
523 
524 	argv[argc++] = argv0;
525 	switch (p) {
526 	case PROC_MAIN:
527 		fatalx("Can not start main process");
528 	case PROC_RDE:
529 		argv[argc++] = "-R";
530 		break;
531 	case PROC_SE:
532 		argv[argc++] = "-S";
533 		break;
534 	case PROC_RTR:
535 		argv[argc++] = "-T";
536 		break;
537 	}
538 	if (debug)
539 		argv[argc++] = "-d";
540 	if (verbose)
541 		argv[argc++] = "-v";
542 	argv[argc++] = NULL;
543 
544 	execvp(argv0, argv);
545 	fatal("execvp");
546 }
547 
548 int
549 send_filterset(struct imsgbuf *i, struct filter_set_head *set)
550 {
551 	struct filter_set	*s;
552 
553 	TAILQ_FOREACH(s, set, entry)
554 		if (imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s,
555 		    sizeof(struct filter_set)) == -1)
556 			return (-1);
557 	return (0);
558 }
559 
560 int
561 reconfigure(char *conffile, struct bgpd_config *conf)
562 {
563 	struct bgpd_config	*new_conf;
564 
565 	if (reconfpending)
566 		return (2);
567 
568 	log_info("rereading config");
569 	if ((new_conf = parse_config(conffile, &conf->peers,
570 	    &conf->rtrs)) == NULL)
571 		return (1);
572 
573 	merge_config(conf, new_conf);
574 
575 	if (prepare_listeners(conf) == -1) {
576 		return (1);
577 	}
578 
579 	if (control_setup(conf) == -1) {
580 		return (1);
581 	}
582 
583 	return send_config(conf);
584 }
585 
586 int
587 send_config(struct bgpd_config *conf)
588 {
589 	struct peer		*p;
590 	struct filter_rule	*r;
591 	struct listen_addr	*la;
592 	struct rde_rib		*rr;
593 	struct l3vpn		*vpn;
594 	struct as_set		*aset;
595 	struct prefixset	*ps;
596 	struct prefixset_item	*psi, *npsi;
597 	struct roa		*roa, *nroa;
598 	struct rtr_config	*rtr;
599 
600 	reconfpending = 3;	/* one per child */
601 
602 	expand_networks(conf);
603 
604 	cflags = conf->flags;
605 
606 	/* start reconfiguration */
607 	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
608 	    conf, sizeof(*conf)) == -1)
609 		return (-1);
610 	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
611 	    conf, sizeof(*conf)) == -1)
612 		return (-1);
613 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_CONF, 0, 0, -1,
614 	    conf, sizeof(*conf)) == -1)
615 		return (-1);
616 
617 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
618 		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
619 		    la, sizeof(*la)) == -1)
620 			return (-1);
621 		la->fd = -1;
622 	}
623 
624 	/* adjust fib syncing on reload */
625 	ktable_preload();
626 
627 	/* RIBs for the RDE */
628 	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
629 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
630 		if (ktable_update(rr->rtableid, rr->name, rr->flags) == -1) {
631 			log_warnx("failed to load routing table %d",
632 			    rr->rtableid);
633 			return (-1);
634 		}
635 		if (imsg_compose(ibuf_rde, IMSG_RECONF_RIB, 0, 0, -1,
636 		    rr, sizeof(*rr)) == -1)
637 			return (-1);
638 		free(rr);
639 	}
640 
641 	/* send peer list to the SE */
642 	RB_FOREACH(p, peer_head, &conf->peers) {
643 		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
644 		    &p->conf, sizeof(p->conf)) == -1)
645 			return (-1);
646 
647 		if (p->reconf_action == RECONF_REINIT)
648 			if (pfkey_establish(p) == -1)
649 				log_peer_warnx(&p->conf, "pfkey setup failed");
650 	}
651 
652 	/* networks go via kroute to the RDE */
653 	kr_net_reload(conf->default_tableid, 0, &conf->networks);
654 
655 	/* prefixsets for filters in the RDE */
656 	while ((ps = SIMPLEQ_FIRST(&conf->prefixsets)) != NULL) {
657 		SIMPLEQ_REMOVE_HEAD(&conf->prefixsets, entry);
658 		if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET, 0, 0, -1,
659 		    ps->name, sizeof(ps->name)) == -1)
660 			return (-1);
661 		RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
662 			RB_REMOVE(prefixset_tree, &ps->psitems, psi);
663 			if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
664 			    0, 0, -1, psi, sizeof(*psi)) == -1)
665 				return (-1);
666 			free(psi);
667 		}
668 		free(ps);
669 	}
670 
671 	/* originsets for filters in the RDE */
672 	while ((ps = SIMPLEQ_FIRST(&conf->originsets)) != NULL) {
673 		SIMPLEQ_REMOVE_HEAD(&conf->originsets, entry);
674 		if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
675 		    ps->name, sizeof(ps->name)) == -1)
676 			return (-1);
677 		RB_FOREACH_SAFE(roa, roa_tree, &ps->roaitems, nroa) {
678 			RB_REMOVE(roa_tree, &ps->roaitems, roa);
679 			if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
680 			    -1, roa, sizeof(*roa)) == -1)
681 				return (-1);
682 			free(roa);
683 		}
684 		free(ps);
685 	}
686 
687 	/* roa table and rtr config are sent to the RTR engine */
688 	RB_FOREACH_SAFE(roa, roa_tree, &conf->roa, nroa) {
689 		RB_REMOVE(roa_tree, &conf->roa, roa);
690 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ROA_ITEM, 0, 0,
691 		    -1, roa, sizeof(*roa)) == -1)
692 			return (-1);
693 		free(roa);
694 	}
695 	SIMPLEQ_FOREACH(rtr, &conf->rtrs, entry) {
696 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_RTR_CONFIG, rtr->id,
697 		    0, -1, rtr->descr, sizeof(rtr->descr)) == -1)
698 			return (-1);
699 	}
700 
701 	/* as-sets for filters in the RDE */
702 	while ((aset = SIMPLEQ_FIRST(&conf->as_sets)) != NULL) {
703 		struct ibuf *wbuf;
704 		uint32_t *as;
705 		size_t i, l, n;
706 
707 		SIMPLEQ_REMOVE_HEAD(&conf->as_sets, entry);
708 
709 		as = set_get(aset->set, &n);
710 		if ((wbuf = imsg_create(ibuf_rde, IMSG_RECONF_AS_SET, 0, 0,
711 		    sizeof(n) + sizeof(aset->name))) == NULL)
712 			return -1;
713 		if (imsg_add(wbuf, &n, sizeof(n)) == -1 ||
714 		    imsg_add(wbuf, aset->name, sizeof(aset->name)) == -1)
715 			return -1;
716 		imsg_close(ibuf_rde, wbuf);
717 
718 		for (i = 0; i < n; i += l) {
719 			l = (n - i > 1024 ? 1024 : n - i);
720 			if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_ITEMS,
721 			    0, 0, -1, as + i, l * sizeof(*as)) == -1)
722 				return -1;
723 		}
724 
725 		if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_DONE, 0, 0, -1,
726 		    NULL, 0) == -1)
727 			return -1;
728 
729 		set_free(aset->set);
730 		free(aset);
731 	}
732 
733 	/* filters for the RDE */
734 	while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
735 		TAILQ_REMOVE(conf->filters, r, entry);
736 		if (send_filterset(ibuf_rde, &r->set) == -1)
737 			return (-1);
738 		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
739 		    r, sizeof(struct filter_rule)) == -1)
740 			return (-1);
741 		filterset_free(&r->set);
742 		free(r);
743 	}
744 
745 	while ((vpn = SIMPLEQ_FIRST(&conf->l3vpns)) != NULL) {
746 		SIMPLEQ_REMOVE_HEAD(&conf->l3vpns, entry);
747 		if (ktable_update(vpn->rtableid, vpn->descr, vpn->flags) ==
748 		    -1) {
749 			log_warnx("failed to load routing table %d",
750 			    vpn->rtableid);
751 			return (-1);
752 		}
753 		/* networks go via kroute to the RDE */
754 		kr_net_reload(vpn->rtableid, vpn->rd, &vpn->net_l);
755 
756 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN, 0, 0, -1,
757 		    vpn, sizeof(*vpn)) == -1)
758 			return (-1);
759 
760 		/* export targets */
761 		if (send_filterset(ibuf_rde, &vpn->export) == -1)
762 			return (-1);
763 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
764 		    -1, NULL, 0) == -1)
765 			return (-1);
766 		filterset_free(&vpn->export);
767 
768 		/* import targets */
769 		if (send_filterset(ibuf_rde, &vpn->import) == -1)
770 			return (-1);
771 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
772 		    -1, NULL, 0) == -1)
773 			return (-1);
774 		filterset_free(&vpn->import);
775 
776 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_DONE, 0, 0,
777 		    -1, NULL, 0) == -1)
778 			return (-1);
779 
780 		free(vpn);
781 	}
782 
783 	/* send a drain message to know when all messages where processed */
784 	if (imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
785 		return (-1);
786 	if (imsg_compose(ibuf_rde, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
787 		return (-1);
788 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
789 		return (-1);
790 
791 	/* mrt changes can be sent out of bound */
792 	mrt_reconfigure(conf->mrt);
793 	return (0);
794 }
795 
796 int
797 dispatch_imsg(struct imsgbuf *ibuf, int idx, struct bgpd_config *conf)
798 {
799 	struct imsg		 imsg;
800 	struct peer		*p;
801 	struct rtr_config	*r;
802 	ssize_t			 n;
803 	u_int			 rtableid;
804 	int			 rv, verbose;
805 
806 	rv = 0;
807 	while (ibuf) {
808 		if ((n = imsg_get(ibuf, &imsg)) == -1)
809 			return (-1);
810 
811 		if (n == 0)
812 			break;
813 
814 		switch (imsg.hdr.type) {
815 		case IMSG_KROUTE_CHANGE:
816 			if (idx != PFD_PIPE_RDE)
817 				log_warnx("route request not from RDE");
818 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
819 			    sizeof(struct kroute_full))
820 				log_warnx("wrong imsg len");
821 			else if (kr_change(imsg.hdr.peerid, imsg.data))
822 				rv = -1;
823 			break;
824 		case IMSG_KROUTE_DELETE:
825 			if (idx != PFD_PIPE_RDE)
826 				log_warnx("route request not from RDE");
827 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
828 			    sizeof(struct kroute_full))
829 				log_warnx("wrong imsg len");
830 			else if (kr_delete(imsg.hdr.peerid, imsg.data))
831 				rv = -1;
832 			break;
833 		case IMSG_KROUTE_FLUSH:
834 			if (idx != PFD_PIPE_RDE)
835 				log_warnx("route request not from RDE");
836 			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
837 				log_warnx("wrong imsg len");
838 			else if (kr_flush(imsg.hdr.peerid))
839 				rv = -1;
840 			break;
841 		case IMSG_NEXTHOP_ADD:
842 			if (idx != PFD_PIPE_RDE)
843 				log_warnx("nexthop request not from RDE");
844 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
845 			    sizeof(struct bgpd_addr))
846 				log_warnx("wrong imsg len");
847 			else {
848 				rtableid = conf->default_tableid;
849 				if (kr_nexthop_add(rtableid, imsg.data) == -1)
850 					rv = -1;
851 			}
852 			break;
853 		case IMSG_NEXTHOP_REMOVE:
854 			if (idx != PFD_PIPE_RDE)
855 				log_warnx("nexthop request not from RDE");
856 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
857 			    sizeof(struct bgpd_addr))
858 				log_warnx("wrong imsg len");
859 			else {
860 				rtableid = conf->default_tableid;
861 				kr_nexthop_delete(rtableid, imsg.data);
862 			}
863 			break;
864 		case IMSG_PFTABLE_ADD:
865 			if (idx != PFD_PIPE_RDE)
866 				log_warnx("pftable request not from RDE");
867 			else
868 				if (imsg.hdr.len != IMSG_HEADER_SIZE +
869 				    sizeof(struct pftable_msg))
870 					log_warnx("wrong imsg len");
871 				else if (pftable_addr_add(imsg.data) != 0)
872 					rv = -1;
873 			break;
874 		case IMSG_PFTABLE_REMOVE:
875 			if (idx != PFD_PIPE_RDE)
876 				log_warnx("pftable request not from RDE");
877 			else
878 				if (imsg.hdr.len != IMSG_HEADER_SIZE +
879 				    sizeof(struct pftable_msg))
880 					log_warnx("wrong imsg len");
881 				else if (pftable_addr_remove(imsg.data) != 0)
882 					rv = -1;
883 			break;
884 		case IMSG_PFTABLE_COMMIT:
885 			if (idx != PFD_PIPE_RDE)
886 				log_warnx("pftable request not from RDE");
887 			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
888 				log_warnx("wrong imsg len");
889 			else if (pftable_commit() != 0)
890 				rv = -1;
891 			break;
892 		case IMSG_PFKEY_RELOAD:
893 			if (idx != PFD_PIPE_SESSION) {
894 				log_warnx("pfkey reload request not from SE");
895 				break;
896 			}
897 			p = getpeerbyid(conf, imsg.hdr.peerid);
898 			if (p != NULL) {
899 				if (pfkey_establish(p) == -1)
900 					log_peer_warnx(&p->conf,
901 					    "pfkey setup failed");
902 			}
903 			break;
904 		case IMSG_CTL_RELOAD:
905 			if (idx != PFD_PIPE_SESSION)
906 				log_warnx("reload request not from SE");
907 			else {
908 				reconfig = 1;
909 				reconfpid = imsg.hdr.pid;
910 				if (imsg.hdr.len == IMSG_HEADER_SIZE +
911 				    REASON_LEN && ((char *)imsg.data)[0])
912 					log_info("reload due to: %s",
913 					    log_reason(imsg.data));
914 			}
915 			break;
916 		case IMSG_CTL_FIB_COUPLE:
917 			if (idx != PFD_PIPE_SESSION)
918 				log_warnx("couple request not from SE");
919 			else
920 				kr_fib_couple(imsg.hdr.peerid);
921 			break;
922 		case IMSG_CTL_FIB_DECOUPLE:
923 			if (idx != PFD_PIPE_SESSION)
924 				log_warnx("decouple request not from SE");
925 			else
926 				kr_fib_decouple(imsg.hdr.peerid);
927 			break;
928 		case IMSG_CTL_KROUTE:
929 		case IMSG_CTL_KROUTE_ADDR:
930 		case IMSG_CTL_SHOW_NEXTHOP:
931 		case IMSG_CTL_SHOW_INTERFACE:
932 		case IMSG_CTL_SHOW_FIB_TABLES:
933 			if (idx != PFD_PIPE_SESSION)
934 				log_warnx("kroute request not from SE");
935 			else
936 				kr_show_route(&imsg);
937 			break;
938 		case IMSG_IFINFO:
939 			if (idx != PFD_PIPE_SESSION)
940 				log_warnx("IFINFO request not from SE");
941 			else if (imsg.hdr.len != IMSG_HEADER_SIZE + IFNAMSIZ)
942 				log_warnx("IFINFO request with wrong len");
943 			else
944 				kr_ifinfo(imsg.data);
945 			break;
946 		case IMSG_DEMOTE:
947 			if (idx != PFD_PIPE_SESSION)
948 				log_warnx("demote request not from SE");
949 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
950 			    sizeof(struct demote_msg))
951 				log_warnx("DEMOTE request with wrong len");
952 			else {
953 				struct demote_msg	*msg;
954 
955 				msg = imsg.data;
956 				carp_demote_set(msg->demote_group, msg->level);
957 			}
958 			break;
959 		case IMSG_CTL_LOG_VERBOSE:
960 			/* already checked by SE */
961 			memcpy(&verbose, imsg.data, sizeof(verbose));
962 			log_setverbose(verbose);
963 			break;
964 		case IMSG_RECONF_DONE:
965 			if (reconfpending == 0) {
966 				log_warnx("unexpected RECONF_DONE received");
967 				break;
968 			}
969 			if (idx == PFD_PIPE_SESSION) {
970 				imsg_compose(ibuf_rtr, IMSG_RECONF_DONE, 0,
971 				    0, -1, NULL, 0);
972 			} else if (idx == PFD_PIPE_RTR) {
973 				imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0,
974 				    0, -1, NULL, 0);
975 
976 				/* finally fix kroute information */
977 				ktable_postload();
978 
979 				/* redistribute list needs to be reloaded too */
980 				kr_reload();
981 			}
982 			reconfpending--;
983 			break;
984 		case IMSG_RECONF_DRAIN:
985 			if (reconfpending == 0) {
986 				log_warnx("unexpected RECONF_DRAIN received");
987 				break;
988 			}
989 			reconfpending--;
990 			if (reconfpending == 0) {
991 				/*
992 				 * SE goes first to bring templated neighbors
993 				 * in sync.
994 				 */
995 				imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0,
996 				    0, -1, NULL, 0);
997 				reconfpending = 3; /* expecting 2 DONE msg */
998 			}
999 			break;
1000 		case IMSG_SOCKET_CONN:
1001 			if (idx != PFD_PIPE_RTR) {
1002 				log_warnx("connect request not from RTR");
1003 			} else {
1004 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1005 					if (imsg.hdr.peerid == r->id)
1006 						break;
1007 				}
1008 				if (r == NULL)
1009 					log_warnx("unknown rtr id %d",
1010 					    imsg.hdr.peerid);
1011 				else
1012 					bgpd_rtr_connect(r);
1013 			}
1014 			break;
1015 		case IMSG_CTL_SHOW_RTR:
1016 			if (idx == PFD_PIPE_SESSION) {
1017 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1018 					imsg_compose(ibuf_rtr, imsg.hdr.type,
1019 					    r->id, imsg.hdr.pid, -1, NULL, 0);
1020 				}
1021 				imsg_compose(ibuf_rtr, IMSG_CTL_END,
1022 				    0, imsg.hdr.pid, -1, NULL, 0);
1023 			} else if (imsg.hdr.len != IMSG_HEADER_SIZE +
1024 			    sizeof(struct ctl_show_rtr)) {
1025 				log_warnx("IMSG_CTL_SHOW_RTR with wrong len");
1026 			} else if (idx == PFD_PIPE_RTR) {
1027 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1028 					if (imsg.hdr.peerid == r->id)
1029 						break;
1030 				}
1031 				if (r != NULL) {
1032 					struct ctl_show_rtr *msg;
1033 					msg = imsg.data;
1034 					strlcpy(msg->descr, r->descr,
1035 					    sizeof(msg->descr));
1036 					msg->local_addr = r->local_addr;
1037 					msg->remote_addr = r->remote_addr;
1038 					msg->remote_port = r->remote_port;
1039 
1040 					imsg_compose(ibuf_se, imsg.hdr.type,
1041 					    imsg.hdr.peerid, imsg.hdr.pid,
1042 					    -1, imsg.data,
1043 					    imsg.hdr.len - IMSG_HEADER_SIZE);
1044 				}
1045 			}
1046 			break;
1047 		case IMSG_CTL_END:
1048 		case IMSG_CTL_SHOW_TIMER:
1049 			if (idx != PFD_PIPE_RTR) {
1050 				log_warnx("connect request not from RTR");
1051 				break;
1052 			}
1053 			imsg_compose(ibuf_se, imsg.hdr.type, imsg.hdr.peerid,
1054 			    imsg.hdr.pid, -1, imsg.data,
1055 			    imsg.hdr.len - IMSG_HEADER_SIZE);
1056 			break;
1057 		default:
1058 			break;
1059 		}
1060 		imsg_free(&imsg);
1061 		if (rv != 0)
1062 			return (rv);
1063 	}
1064 	return (0);
1065 }
1066 
1067 void
1068 send_nexthop_update(struct kroute_nexthop *msg)
1069 {
1070 	char	*gw = NULL;
1071 
1072 	if (msg->gateway.aid)
1073 		if (asprintf(&gw, ": via %s",
1074 		    log_addr(&msg->gateway)) == -1) {
1075 			log_warn("send_nexthop_update");
1076 			quit = 1;
1077 		}
1078 
1079 	log_debug("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
1080 	    msg->valid ? "valid" : "invalid",
1081 	    msg->connected ? ": directly connected" : "",
1082 	    msg->gateway.aid ? gw : "");
1083 
1084 	free(gw);
1085 
1086 	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
1087 	    msg, sizeof(struct kroute_nexthop)) == -1)
1088 		quit = 1;
1089 }
1090 
1091 void
1092 send_imsg_session(int type, pid_t pid, void *data, uint16_t datalen)
1093 {
1094 	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
1095 }
1096 
1097 int
1098 send_network(int type, struct network_config *net, struct filter_set_head *h)
1099 {
1100 	if (quit)
1101 		return (0);
1102 	if (imsg_compose(ibuf_rde, type, 0, 0, -1, net,
1103 	    sizeof(struct network_config)) == -1)
1104 		return (-1);
1105 	/* networks that get deleted don't need to send the filter set */
1106 	if (type == IMSG_NETWORK_REMOVE)
1107 		return (0);
1108 	if (send_filterset(ibuf_rde, h) == -1)
1109 		return (-1);
1110 	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
1111 		return (-1);
1112 
1113 	return (0);
1114 }
1115 
1116 int
1117 bgpd_filternexthop(struct kroute *kr, struct kroute6 *kr6)
1118 {
1119 	/* kernel routes are never filtered */
1120 	if (kr && kr->flags & F_KERNEL && kr->prefixlen != 0)
1121 		return (0);
1122 	if (kr6 && kr6->flags & F_KERNEL && kr6->prefixlen != 0)
1123 		return (0);
1124 
1125 	if (cflags & BGPD_FLAG_NEXTHOP_BGP) {
1126 		if (kr && kr->flags & F_BGPD)
1127 			return (0);
1128 		if (kr6 && kr6->flags & F_BGPD)
1129 			return (0);
1130 	}
1131 
1132 	if (cflags & BGPD_FLAG_NEXTHOP_DEFAULT) {
1133 		if (kr && kr->prefixlen == 0)
1134 			return (0);
1135 		if (kr6 && kr6->prefixlen == 0)
1136 			return (0);
1137 	}
1138 
1139 	return (1);
1140 }
1141 
1142 int
1143 control_setup(struct bgpd_config *conf)
1144 {
1145 	int fd, restricted;
1146 
1147 	/* control socket is outside chroot */
1148 	if (!cname || strcmp(cname, conf->csock)) {
1149 		if (cname) {
1150 			free(cname);
1151 		}
1152 		if ((cname = strdup(conf->csock)) == NULL)
1153 			fatal("strdup");
1154 		if (control_check(cname) == -1)
1155 			return (-1);
1156 		if ((fd = control_init(0, cname)) == -1)
1157 			fatalx("control socket setup failed");
1158 		if (control_listen(fd) == -1)
1159 			fatalx("control socket setup failed");
1160 		restricted = 0;
1161 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1162 		    &restricted, sizeof(restricted)) == -1)
1163 			return (-1);
1164 	}
1165 	if (!conf->rcsock) {
1166 		/* remove restricted socket */
1167 		free(rcname);
1168 		rcname = NULL;
1169 	} else if (!rcname || strcmp(rcname, conf->rcsock)) {
1170 		if (rcname) {
1171 			free(rcname);
1172 		}
1173 		if ((rcname = strdup(conf->rcsock)) == NULL)
1174 			fatal("strdup");
1175 		if (control_check(rcname) == -1)
1176 			return (-1);
1177 		if ((fd = control_init(1, rcname)) == -1)
1178 			fatalx("control socket setup failed");
1179 		if (control_listen(fd) == -1)
1180 			fatalx("control socket setup failed");
1181 		restricted = 1;
1182 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1183 		    &restricted, sizeof(restricted)) == -1)
1184 			return (-1);
1185 	}
1186 	return (0);
1187 }
1188 
1189 void
1190 set_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1191 {
1192 	if (i == NULL || i->fd == -1) {
1193 		pfd->fd = -1;
1194 		return;
1195 	}
1196 	pfd->fd = i->fd;
1197 	pfd->events = POLLIN;
1198 	if (i->w.queued > 0)
1199 		pfd->events |= POLLOUT;
1200 }
1201 
1202 int
1203 handle_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1204 {
1205 	ssize_t n;
1206 
1207 	if (i == NULL)
1208 		return (0);
1209 
1210 	if (pfd->revents & POLLOUT)
1211 		if (msgbuf_write(&i->w) <= 0 && errno != EAGAIN) {
1212 			log_warn("imsg write error");
1213 			close(i->fd);
1214 			i->fd = -1;
1215 			return (-1);
1216 		}
1217 
1218 	if (pfd->revents & POLLIN) {
1219 		if ((n = imsg_read(i)) == -1 && errno != EAGAIN) {
1220 			log_warn("imsg read error");
1221 			close(i->fd);
1222 			i->fd = -1;
1223 			return (-1);
1224 		}
1225 		if (n == 0) {
1226 			log_warnx("peer closed imsg connection");
1227 			close(i->fd);
1228 			i->fd = -1;
1229 			return (-1);
1230 		}
1231 	}
1232 	return (0);
1233 }
1234 
1235 static void
1236 getsockpair(int pipe[2])
1237 {
1238 	int bsize, i;
1239 
1240 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
1241 	    PF_UNSPEC, pipe) == -1)
1242 		fatal("socketpair");
1243 
1244 	for (i = 0; i < 2; i++) {
1245 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1246 			if (setsockopt(pipe[i], SOL_SOCKET, SO_RCVBUF,
1247 			    &bsize, sizeof(bsize)) == -1) {
1248 				if (errno != ENOBUFS)
1249 					fatal("setsockopt(SO_RCVBUF, %d)",
1250 					    bsize);
1251 				log_warn("setsockopt(SO_RCVBUF, %d)", bsize);
1252 				continue;
1253 			}
1254 			break;
1255 		}
1256 	}
1257 	for (i = 0; i < 2; i++) {
1258 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1259 			if (setsockopt(pipe[i], SOL_SOCKET, SO_SNDBUF,
1260 			    &bsize, sizeof(bsize)) == -1) {
1261 				if (errno != ENOBUFS)
1262 					fatal("setsockopt(SO_SNDBUF, %d)",
1263 					    bsize);
1264 				log_warn("setsockopt(SO_SNDBUF, %d)", bsize);
1265 				continue;
1266 			}
1267 			break;
1268 		}
1269 	}
1270 }
1271 
1272 int
1273 imsg_send_sockets(struct imsgbuf *se, struct imsgbuf *rde, struct imsgbuf *roa)
1274 {
1275 	int pipe_s2r[2];
1276 	int pipe_s2r_ctl[2];
1277 	int pipe_r2r[2];
1278 
1279 	getsockpair(pipe_s2r);
1280 	getsockpair(pipe_s2r_ctl);
1281 	getsockpair(pipe_r2r);
1282 
1283 	if (imsg_compose(se, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[0],
1284 	    NULL, 0) == -1)
1285 		return (-1);
1286 	if (imsg_compose(rde, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[1],
1287 	    NULL, 0) == -1)
1288 		return (-1);
1289 
1290 	if (imsg_compose(se, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[0],
1291 	    NULL, 0) == -1)
1292 		return (-1);
1293 	if (imsg_compose(rde, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[1],
1294 	    NULL, 0) == -1)
1295 		return (-1);
1296 
1297 	if (imsg_compose(roa, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[0],
1298 	    NULL, 0) == -1)
1299 		return (-1);
1300 	if (imsg_compose(rde, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[1],
1301 	    NULL, 0) == -1)
1302 		return (-1);
1303 
1304 	return (0);
1305 }
1306 
1307 void
1308 bgpd_rtr_connect(struct rtr_config *r)
1309 {
1310 	struct connect_elm *ce;
1311 	struct sockaddr *sa;
1312 	socklen_t len;
1313 
1314 	if (connect_cnt >= MAX_CONNECT_CNT) {
1315 		log_warnx("rtr %s: too many concurrent connection requests",
1316 		    r->descr);
1317 		return;
1318 	}
1319 
1320 	if ((ce = calloc(1, sizeof(*ce))) == NULL) {
1321 		log_warn("rtr %s", r->descr);
1322 		return;
1323 	}
1324 
1325 	ce->id = r->id;
1326 	ce->fd = socket(aid2af(r->remote_addr.aid),
1327 	     SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP);
1328 	if (ce->fd == -1) {
1329 		log_warn("rtr %s", r->descr);
1330 		free(ce);
1331 		return;
1332 	}
1333 
1334 	if ((sa = addr2sa(&r->local_addr, 0, &len)) != NULL) {
1335 		if (bind(ce->fd, sa, len) == -1) {
1336 			log_warn("rtr %s: bind to %s", r->descr,
1337 			    log_addr(&r->local_addr));
1338 			close(ce->fd);
1339 			free(ce);
1340 			return;
1341 		}
1342 	}
1343 
1344 	sa = addr2sa(&r->remote_addr, r->remote_port, &len);
1345 	if (connect(ce->fd, sa, len) == -1) {
1346 		if (errno != EINPROGRESS) {
1347 			log_warn("rtr %s: connect to %s:%u", r->descr,
1348 			    log_addr(&r->remote_addr), r->remote_port);
1349 			close(ce->fd);
1350 			free(ce);
1351 			return;
1352 		}
1353 		TAILQ_INSERT_TAIL(&connect_queue, ce, entry);
1354 		connect_cnt++;
1355 		return;
1356 	}
1357 
1358 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1359 	free(ce);
1360 }
1361 
1362 void
1363 bgpd_rtr_connect_done(int fd, struct bgpd_config *conf)
1364 {
1365 	struct rtr_config *r;
1366 	struct connect_elm *ce;
1367 	int error = 0;
1368 	socklen_t len;
1369 
1370 	TAILQ_FOREACH(ce, &connect_queue, entry) {
1371 		if (ce->fd == fd)
1372 			break;
1373 	}
1374 	if (ce == NULL)
1375 		fatalx("connect entry not found");
1376 
1377 	TAILQ_REMOVE(&connect_queue, ce, entry);
1378 	connect_cnt--;
1379 
1380 	SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1381 		if (ce->id == r->id)
1382 			break;
1383 	}
1384 	if (r == NULL) {
1385 		log_warnx("rtr id %d no longer exists", ce->id);
1386 		goto fail;
1387 	}
1388 
1389 	len = sizeof(error);
1390 	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) == -1) {
1391 		log_warn("rtr %s: getsockopt SO_ERROR", r->descr);
1392 		goto fail;
1393 	}
1394 
1395 	if (error != 0) {
1396 		errno = error;
1397 		log_warn("rtr %s: connect to %s:%u", r->descr,
1398 		    log_addr(&r->remote_addr), r->remote_port);
1399 		goto fail;
1400 	}
1401 
1402 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1403 	free(ce);
1404 	return;
1405 
1406 fail:
1407 	close(fd);
1408 	free(ce);
1409 }
1410