xref: /openbsd/usr.sbin/bgpd/bgpd.c (revision 39386878)
1 /*	$OpenBSD: bgpd.c,v 1.242 2022/02/06 09:51:19 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/wait.h>
22 #include <netinet/in.h>
23 #include <arpa/inet.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <pwd.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <syslog.h>
34 #include <unistd.h>
35 
36 #include "bgpd.h"
37 #include "session.h"
38 #include "log.h"
39 #include "version.h"
40 
41 void		sighdlr(int);
42 __dead void	usage(void);
43 int		main(int, char *[]);
44 pid_t		start_child(enum bgpd_process, char *, int, int, int);
45 int		send_filterset(struct imsgbuf *, struct filter_set_head *);
46 int		reconfigure(char *, struct bgpd_config *);
47 int		send_config(struct bgpd_config *);
48 int		dispatch_imsg(struct imsgbuf *, int, struct bgpd_config *);
49 int		control_setup(struct bgpd_config *);
50 static void	getsockpair(int [2]);
51 int		imsg_send_sockets(struct imsgbuf *, struct imsgbuf *,
52 		    struct imsgbuf *);
53 void		bgpd_rtr_connect(struct rtr_config *);
54 void		bgpd_rtr_connect_done(int, struct bgpd_config *);
55 
56 int			 cflags;
57 volatile sig_atomic_t	 mrtdump;
58 volatile sig_atomic_t	 quit;
59 volatile sig_atomic_t	 reconfig;
60 pid_t			 reconfpid;
61 int			 reconfpending;
62 struct imsgbuf		*ibuf_se;
63 struct imsgbuf		*ibuf_rde;
64 struct imsgbuf		*ibuf_rtr;
65 struct rib_names	 ribnames = SIMPLEQ_HEAD_INITIALIZER(ribnames);
66 char			*cname;
67 char			*rcname;
68 
69 struct connect_elm {
70 	TAILQ_ENTRY(connect_elm)	entry;
71 	uint32_t			id;
72 	int				fd;
73 };
74 
75 TAILQ_HEAD( ,connect_elm)	connect_queue = \
76 				    TAILQ_HEAD_INITIALIZER(connect_queue);
77 u_int				connect_cnt;
78 #define MAX_CONNECT_CNT		32
79 
80 void
81 sighdlr(int sig)
82 {
83 	switch (sig) {
84 	case SIGTERM:
85 	case SIGINT:
86 		quit = 1;
87 		break;
88 	case SIGHUP:
89 		reconfig = 1;
90 		break;
91 	case SIGALRM:
92 	case SIGUSR1:
93 		mrtdump = 1;
94 		break;
95 	}
96 }
97 
98 __dead void
99 usage(void)
100 {
101 	extern char *__progname;
102 
103 	fprintf(stderr, "usage: %s [-cdnvV] [-D macro=value] [-f file]\n",
104 	    __progname);
105 	exit(1);
106 }
107 
108 #define PFD_PIPE_SESSION	0
109 #define PFD_PIPE_RDE		1
110 #define PFD_PIPE_RTR		2
111 #define PFD_SOCK_ROUTE		3
112 #define PFD_SOCK_PFKEY		4
113 #define PFD_CONNECT_START	5
114 #define MAX_TIMEOUT		3600
115 
116 int	 cmd_opts;
117 
118 int
119 main(int argc, char *argv[])
120 {
121 	struct bgpd_config	*conf;
122 	enum bgpd_process	 proc = PROC_MAIN;
123 	struct rde_rib		*rr;
124 	struct peer		*p;
125 	struct pollfd		*pfd = NULL;
126 	struct connect_elm	*ce;
127 	time_t			 timeout;
128 	pid_t			 se_pid = 0, rde_pid = 0, rtr_pid = 0, pid;
129 	char			*conffile;
130 	char			*saved_argv0;
131 	u_int			 pfd_elms = 0, npfd, i;
132 	int			 debug = 0;
133 	int			 rfd, keyfd;
134 	int			 ch, status;
135 	int			 pipe_m2s[2];
136 	int			 pipe_m2r[2];
137 	int			 pipe_m2roa[2];
138 
139 	conffile = CONFFILE;
140 
141 	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */
142 	log_procinit(log_procnames[PROC_MAIN]);
143 	log_setverbose(1);
144 
145 	saved_argv0 = argv[0];
146 	if (saved_argv0 == NULL)
147 		saved_argv0 = "bgpd";
148 
149 	while ((ch = getopt(argc, argv, "cdD:f:nRSTvV")) != -1) {
150 		switch (ch) {
151 		case 'c':
152 			cmd_opts |= BGPD_OPT_FORCE_DEMOTE;
153 			break;
154 		case 'd':
155 			debug = 1;
156 			break;
157 		case 'D':
158 			if (cmdline_symset(optarg) < 0)
159 				log_warnx("could not parse macro definition %s",
160 				    optarg);
161 			break;
162 		case 'f':
163 			conffile = optarg;
164 			break;
165 		case 'n':
166 			cmd_opts |= BGPD_OPT_NOACTION;
167 			break;
168 		case 'v':
169 			if (cmd_opts & BGPD_OPT_VERBOSE)
170 				cmd_opts |= BGPD_OPT_VERBOSE2;
171 			cmd_opts |= BGPD_OPT_VERBOSE;
172 			break;
173 		case 'R':
174 			proc = PROC_RDE;
175 			break;
176 		case 'S':
177 			proc = PROC_SE;
178 			break;
179 		case 'T':
180 			proc = PROC_RTR;
181 			break;
182 		case 'V':
183 			fprintf(stderr, "OpenBGPD %s\n", BGPD_VERSION);
184 			return 0;
185 		default:
186 			usage();
187 			/* NOTREACHED */
188 		}
189 	}
190 
191 	argc -= optind;
192 	argv += optind;
193 	if (argc > 0)
194 		usage();
195 
196 	if (cmd_opts & BGPD_OPT_NOACTION) {
197 		if ((conf = parse_config(conffile, NULL, NULL)) == NULL)
198 			exit(1);
199 
200 		if (cmd_opts & BGPD_OPT_VERBOSE)
201 			print_config(conf, &ribnames);
202 		else
203 			fprintf(stderr, "configuration OK\n");
204 
205 		while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
206 			SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
207 			free(rr);
208 		}
209 		free_config(conf);
210 		exit(0);
211 	}
212 
213 	switch (proc) {
214 	case PROC_MAIN:
215 		break;
216 	case PROC_RDE:
217 		rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
218 		/* NOTREACHED */
219 	case PROC_SE:
220 		session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
221 		/* NOTREACHED */
222 	case PROC_RTR:
223 		rtr_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
224 		/* NOTREACHED */
225 	}
226 
227 	if (geteuid())
228 		errx(1, "need root privileges");
229 
230 	if (getpwnam(BGPD_USER) == NULL)
231 		errx(1, "unknown user %s", BGPD_USER);
232 
233 	if ((conf = parse_config(conffile, NULL, NULL)) == NULL) {
234 		log_warnx("config file %s has errors", conffile);
235 		exit(1);
236 	}
237 
238 	if (prepare_listeners(conf) == -1)
239 		exit(1);
240 
241 	log_init(debug, LOG_DAEMON);
242 	log_setverbose(cmd_opts & BGPD_OPT_VERBOSE);
243 
244 	if (!debug)
245 		daemon(1, 0);
246 
247 	log_info("startup");
248 
249 	getsockpair(pipe_m2s);
250 	getsockpair(pipe_m2r);
251 	getsockpair(pipe_m2roa);
252 
253 	/* fork children */
254 	rde_pid = start_child(PROC_RDE, saved_argv0, pipe_m2r[1], debug,
255 	    cmd_opts & BGPD_OPT_VERBOSE);
256 	se_pid = start_child(PROC_SE, saved_argv0, pipe_m2s[1], debug,
257 	    cmd_opts & BGPD_OPT_VERBOSE);
258 	rtr_pid = start_child(PROC_RTR, saved_argv0, pipe_m2roa[1], debug,
259 	    cmd_opts & BGPD_OPT_VERBOSE);
260 
261 	signal(SIGTERM, sighdlr);
262 	signal(SIGINT, sighdlr);
263 	signal(SIGHUP, sighdlr);
264 	signal(SIGALRM, sighdlr);
265 	signal(SIGUSR1, sighdlr);
266 	signal(SIGPIPE, SIG_IGN);
267 
268 	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
269 	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
270 	    (ibuf_rtr = malloc(sizeof(struct imsgbuf))) == NULL)
271 		fatal(NULL);
272 	imsg_init(ibuf_se, pipe_m2s[0]);
273 	imsg_init(ibuf_rde, pipe_m2r[0]);
274 	imsg_init(ibuf_rtr, pipe_m2roa[0]);
275 	mrt_init(ibuf_rde, ibuf_se);
276 	if (kr_init(&rfd) == -1)
277 		quit = 1;
278 	keyfd = pfkey_init();
279 
280 	/*
281 	 * rpath, read config file
282 	 * cpath, unlink control socket
283 	 * fattr, chmod on control socket
284 	 * wpath, needed if we are doing mrt dumps
285 	 *
286 	 * pledge placed here because kr_init() does a setsockopt on the
287 	 * routing socket thats not allowed at all.
288 	 */
289 #if 0
290 	/*
291 	 * disabled because we do ioctls on /dev/pf and SIOCSIFGATTR
292 	 * this needs some redesign of bgpd to be fixed.
293 	 */
294 BROKEN	if (pledge("stdio rpath wpath cpath fattr unix route recvfd sendfd",
295 	    NULL) == -1)
296 		fatal("pledge");
297 #endif
298 
299 	if (imsg_send_sockets(ibuf_se, ibuf_rde, ibuf_rtr))
300 		fatal("could not establish imsg links");
301 	/* control setup needs to happen late since it sends imsgs */
302 	if (control_setup(conf) == -1)
303 		quit = 1;
304 	if (send_config(conf) != 0)
305 		quit = 1;
306 	if (pftable_clear_all() != 0)
307 		quit = 1;
308 
309 	while (quit == 0) {
310 		if (pfd_elms < PFD_CONNECT_START + connect_cnt) {
311 			struct pollfd *newp;
312 
313 			if ((newp = reallocarray(pfd,
314 			    PFD_CONNECT_START + connect_cnt,
315 			    sizeof(struct pollfd))) == NULL) {
316 				log_warn("could not resize pfd from %u -> %u"
317 				    " entries", pfd_elms, PFD_CONNECT_START +
318 				    connect_cnt);
319 				fatalx("exiting");
320 			}
321 			pfd = newp;
322 			pfd_elms = PFD_CONNECT_START + connect_cnt;
323 		}
324 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
325 
326 		timeout = mrt_timeout(conf->mrt);
327 
328 		pfd[PFD_SOCK_ROUTE].fd = rfd;
329 		pfd[PFD_SOCK_ROUTE].events = POLLIN;
330 
331 		pfd[PFD_SOCK_PFKEY].fd = keyfd;
332 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
333 
334 		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
335 		set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
336 		set_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr);
337 
338 		npfd = PFD_CONNECT_START;
339 		TAILQ_FOREACH(ce, &connect_queue, entry) {
340 			pfd[npfd].fd = ce->fd;
341 			pfd[npfd++].events = POLLOUT;
342 			if (npfd > pfd_elms)
343 				fatalx("polli pfd overflow");
344 		}
345 
346 		if (timeout < 0 || timeout > MAX_TIMEOUT)
347 			timeout = MAX_TIMEOUT;
348 		if (poll(pfd, npfd, timeout * 1000) == -1) {
349 			if (errno != EINTR) {
350 				log_warn("poll error");
351 				quit = 1;
352 			}
353 			goto next_loop;
354 		}
355 
356 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
357 			log_warnx("main: Lost connection to SE");
358 			msgbuf_clear(&ibuf_se->w);
359 			free(ibuf_se);
360 			ibuf_se = NULL;
361 			quit = 1;
362 		} else {
363 			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION, conf) ==
364 			    -1)
365 				quit = 1;
366 		}
367 
368 		if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
369 			log_warnx("main: Lost connection to RDE");
370 			msgbuf_clear(&ibuf_rde->w);
371 			free(ibuf_rde);
372 			ibuf_rde = NULL;
373 			quit = 1;
374 		} else {
375 			if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) == -1)
376 				quit = 1;
377 		}
378 
379 		if (handle_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr) == -1) {
380 			log_warnx("main: Lost connection to RTR");
381 			msgbuf_clear(&ibuf_rtr->w);
382 			free(ibuf_rtr);
383 			ibuf_rtr = NULL;
384 			quit = 1;
385 		} else {
386 			if (dispatch_imsg(ibuf_rtr, PFD_PIPE_RTR, conf) == -1)
387 				quit = 1;
388 		}
389 
390 		if (pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
391 			if (kr_dispatch_msg(conf->default_tableid) == -1)
392 				quit = 1;
393 		}
394 
395 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
396 			if (pfkey_read(keyfd, NULL) == -1) {
397 				log_warnx("pfkey_read failed, exiting...");
398 				quit = 1;
399 			}
400 		}
401 
402 		for (i = PFD_CONNECT_START; i < npfd; i++)
403 			if (pfd[i].revents != 0)
404 				bgpd_rtr_connect_done(pfd[i].fd, conf);
405 
406  next_loop:
407 		if (reconfig) {
408 			u_int	error;
409 
410 			reconfig = 0;
411 			switch (reconfigure(conffile, conf)) {
412 			case -1:	/* fatal error */
413 				quit = 1;
414 				break;
415 			case 0:		/* all OK */
416 				error = 0;
417 				break;
418 			case 2:
419 				log_info("previous reload still running");
420 				error = CTL_RES_PENDING;
421 				break;
422 			default:	/* parse error */
423 				log_warnx("config file %s has errors, "
424 				    "not reloading", conffile);
425 				error = CTL_RES_PARSE_ERROR;
426 				break;
427 			}
428 			if (reconfpid != 0) {
429 				send_imsg_session(IMSG_CTL_RESULT, reconfpid,
430 				    &error, sizeof(error));
431 				reconfpid = 0;
432 			}
433 		}
434 
435 		if (mrtdump) {
436 			mrtdump = 0;
437 			mrt_handler(conf->mrt);
438 		}
439 	}
440 
441 	/* close pipes */
442 	if (ibuf_se) {
443 		msgbuf_clear(&ibuf_se->w);
444 		close(ibuf_se->fd);
445 		free(ibuf_se);
446 		ibuf_se = NULL;
447 	}
448 	if (ibuf_rde) {
449 		msgbuf_clear(&ibuf_rde->w);
450 		close(ibuf_rde->fd);
451 		free(ibuf_rde);
452 		ibuf_rde = NULL;
453 	}
454 	if (ibuf_rtr) {
455 		msgbuf_clear(&ibuf_rtr->w);
456 		close(ibuf_rtr->fd);
457 		free(ibuf_rtr);
458 		ibuf_rtr = NULL;
459 	}
460 
461 	/* cleanup kernel data structures */
462 	carp_demote_shutdown();
463 	kr_shutdown(conf->fib_priority, conf->default_tableid);
464 	pftable_clear_all();
465 
466 	RB_FOREACH(p, peer_head, &conf->peers)
467 		pfkey_remove(p);
468 
469 	while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
470 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
471 		free(rr);
472 	}
473 	free_config(conf);
474 
475 	log_debug("waiting for children to terminate");
476 	do {
477 		pid = wait(&status);
478 		if (pid == -1) {
479 			if (errno != EINTR && errno != ECHILD)
480 				fatal("wait");
481 		} else if (WIFSIGNALED(status)) {
482 			char *name = "unknown process";
483 			if (pid == rde_pid)
484 				name = "route decision engine";
485 			else if (pid == se_pid)
486 				name = "session engine";
487 			else if (pid == rtr_pid)
488 				name = "rtr engine";
489 			log_warnx("%s terminated; signal %d", name,
490 				WTERMSIG(status));
491 		}
492 	} while (pid != -1 || (pid == -1 && errno == EINTR));
493 
494 	free(rcname);
495 	free(cname);
496 
497 	log_info("terminating");
498 	return (0);
499 }
500 
501 pid_t
502 start_child(enum bgpd_process p, char *argv0, int fd, int debug, int verbose)
503 {
504 	char *argv[5];
505 	int argc = 0;
506 	pid_t pid;
507 
508 	switch (pid = fork()) {
509 	case -1:
510 		fatal("cannot fork");
511 	case 0:
512 		break;
513 	default:
514 		close(fd);
515 		return (pid);
516 	}
517 
518 	if (fd != 3) {
519 		if (dup2(fd, 3) == -1)
520 			fatal("cannot setup imsg fd");
521 	} else if (fcntl(fd, F_SETFD, 0) == -1)
522 		fatal("cannot setup imsg fd");
523 
524 	argv[argc++] = argv0;
525 	switch (p) {
526 	case PROC_MAIN:
527 		fatalx("Can not start main process");
528 	case PROC_RDE:
529 		argv[argc++] = "-R";
530 		break;
531 	case PROC_SE:
532 		argv[argc++] = "-S";
533 		break;
534 	case PROC_RTR:
535 		argv[argc++] = "-T";
536 		break;
537 	}
538 	if (debug)
539 		argv[argc++] = "-d";
540 	if (verbose)
541 		argv[argc++] = "-v";
542 	argv[argc++] = NULL;
543 
544 	execvp(argv0, argv);
545 	fatal("execvp");
546 }
547 
548 int
549 send_filterset(struct imsgbuf *i, struct filter_set_head *set)
550 {
551 	struct filter_set	*s;
552 
553 	TAILQ_FOREACH(s, set, entry)
554 		if (imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s,
555 		    sizeof(struct filter_set)) == -1)
556 			return (-1);
557 	return (0);
558 }
559 
560 int
561 reconfigure(char *conffile, struct bgpd_config *conf)
562 {
563 	struct bgpd_config	*new_conf;
564 
565 	if (reconfpending)
566 		return (2);
567 
568 	log_info("rereading config");
569 	if ((new_conf = parse_config(conffile, &conf->peers,
570 	    &conf->rtrs)) == NULL)
571 		return (1);
572 
573 	merge_config(conf, new_conf);
574 
575 	if (prepare_listeners(conf) == -1) {
576 		return (1);
577 	}
578 
579 	if (control_setup(conf) == -1) {
580 		return (1);
581 	}
582 
583 	return send_config(conf);
584 }
585 
586 int
587 send_config(struct bgpd_config *conf)
588 {
589 	struct peer		*p;
590 	struct filter_rule	*r;
591 	struct listen_addr	*la;
592 	struct rde_rib		*rr;
593 	struct l3vpn		*vpn;
594 	struct as_set		*aset;
595 	struct prefixset	*ps;
596 	struct prefixset_item	*psi, *npsi;
597 	struct roa		*roa, *nroa;
598 	struct rtr_config	*rtr;
599 
600 	reconfpending = 3;	/* one per child */
601 
602 	expand_networks(conf);
603 
604 	cflags = conf->flags;
605 
606 	/* start reconfiguration */
607 	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
608 	    conf, sizeof(*conf)) == -1)
609 		return (-1);
610 	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
611 	    conf, sizeof(*conf)) == -1)
612 		return (-1);
613 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_CONF, 0, 0, -1,
614 	    conf, sizeof(*conf)) == -1)
615 		return (-1);
616 
617 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
618 		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
619 		    la, sizeof(*la)) == -1)
620 			return (-1);
621 		la->fd = -1;
622 	}
623 
624 	/* adjust fib syncing on reload */
625 	ktable_preload();
626 
627 	/* RIBs for the RDE */
628 	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
629 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
630 		if (ktable_update(rr->rtableid, rr->name, rr->flags,
631 		    conf->fib_priority) == -1) {
632 			log_warnx("failed to load rdomain %d",
633 			    rr->rtableid);
634 			return (-1);
635 		}
636 		if (imsg_compose(ibuf_rde, IMSG_RECONF_RIB, 0, 0, -1,
637 		    rr, sizeof(*rr)) == -1)
638 			return (-1);
639 		free(rr);
640 	}
641 
642 	/* send peer list to the SE */
643 	RB_FOREACH(p, peer_head, &conf->peers) {
644 		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
645 		    &p->conf, sizeof(p->conf)) == -1)
646 			return (-1);
647 
648 		if (p->reconf_action == RECONF_REINIT)
649 			if (pfkey_establish(p) == -1)
650 				log_peer_warnx(&p->conf, "pfkey setup failed");
651 	}
652 
653 	/* networks go via kroute to the RDE */
654 	kr_net_reload(conf->default_tableid, 0, &conf->networks);
655 
656 	/* prefixsets for filters in the RDE */
657 	while ((ps = SIMPLEQ_FIRST(&conf->prefixsets)) != NULL) {
658 		SIMPLEQ_REMOVE_HEAD(&conf->prefixsets, entry);
659 		if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET, 0, 0, -1,
660 		    ps->name, sizeof(ps->name)) == -1)
661 			return (-1);
662 		RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
663 			RB_REMOVE(prefixset_tree, &ps->psitems, psi);
664 			if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
665 			    0, 0, -1, psi, sizeof(*psi)) == -1)
666 				return (-1);
667 			free(psi);
668 		}
669 		free(ps);
670 	}
671 
672 	/* originsets for filters in the RDE */
673 	while ((ps = SIMPLEQ_FIRST(&conf->originsets)) != NULL) {
674 		SIMPLEQ_REMOVE_HEAD(&conf->originsets, entry);
675 		if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
676 		    ps->name, sizeof(ps->name)) == -1)
677 			return (-1);
678 		RB_FOREACH_SAFE(roa, roa_tree, &ps->roaitems, nroa) {
679 			RB_REMOVE(roa_tree, &ps->roaitems, roa);
680 			if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
681 			    -1, roa, sizeof(*roa)) == -1)
682 				return (-1);
683 			free(roa);
684 		}
685 		free(ps);
686 	}
687 
688 	/* roa table and rtr config are sent to the RTR engine */
689 	RB_FOREACH_SAFE(roa, roa_tree, &conf->roa, nroa) {
690 		RB_REMOVE(roa_tree, &conf->roa, roa);
691 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ROA_ITEM, 0, 0,
692 		    -1, roa, sizeof(*roa)) == -1)
693 			return (-1);
694 		free(roa);
695 	}
696 	SIMPLEQ_FOREACH(rtr, &conf->rtrs, entry) {
697 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_RTR_CONFIG, rtr->id,
698 		    0, -1, rtr->descr, sizeof(rtr->descr)) == -1)
699 			return (-1);
700 	}
701 
702 	/* as-sets for filters in the RDE */
703 	while ((aset = SIMPLEQ_FIRST(&conf->as_sets)) != NULL) {
704 		struct ibuf *wbuf;
705 		uint32_t *as;
706 		size_t i, l, n;
707 
708 		SIMPLEQ_REMOVE_HEAD(&conf->as_sets, entry);
709 
710 		as = set_get(aset->set, &n);
711 		if ((wbuf = imsg_create(ibuf_rde, IMSG_RECONF_AS_SET, 0, 0,
712 		    sizeof(n) + sizeof(aset->name))) == NULL)
713 			return -1;
714 		if (imsg_add(wbuf, &n, sizeof(n)) == -1 ||
715 		    imsg_add(wbuf, aset->name, sizeof(aset->name)) == -1)
716 			return -1;
717 		imsg_close(ibuf_rde, wbuf);
718 
719 		for (i = 0; i < n; i += l) {
720 			l = (n - i > 1024 ? 1024 : n - i);
721 			if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_ITEMS,
722 			    0, 0, -1, as + i, l * sizeof(*as)) == -1)
723 				return -1;
724 		}
725 
726 		if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_DONE, 0, 0, -1,
727 		    NULL, 0) == -1)
728 			return -1;
729 
730 		set_free(aset->set);
731 		free(aset);
732 	}
733 
734 	/* filters for the RDE */
735 	while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
736 		TAILQ_REMOVE(conf->filters, r, entry);
737 		if (send_filterset(ibuf_rde, &r->set) == -1)
738 			return (-1);
739 		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
740 		    r, sizeof(struct filter_rule)) == -1)
741 			return (-1);
742 		filterset_free(&r->set);
743 		free(r);
744 	}
745 
746 	while ((vpn = SIMPLEQ_FIRST(&conf->l3vpns)) != NULL) {
747 		SIMPLEQ_REMOVE_HEAD(&conf->l3vpns, entry);
748 		if (ktable_update(vpn->rtableid, vpn->descr, vpn->flags,
749 		    conf->fib_priority) == -1) {
750 			log_warnx("failed to load rdomain %d",
751 			    vpn->rtableid);
752 			return (-1);
753 		}
754 		/* networks go via kroute to the RDE */
755 		kr_net_reload(vpn->rtableid, vpn->rd, &vpn->net_l);
756 
757 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN, 0, 0, -1,
758 		    vpn, sizeof(*vpn)) == -1)
759 			return (-1);
760 
761 		/* export targets */
762 		if (send_filterset(ibuf_rde, &vpn->export) == -1)
763 			return (-1);
764 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
765 		    -1, NULL, 0) == -1)
766 			return (-1);
767 		filterset_free(&vpn->export);
768 
769 		/* import targets */
770 		if (send_filterset(ibuf_rde, &vpn->import) == -1)
771 			return (-1);
772 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
773 		    -1, NULL, 0) == -1)
774 			return (-1);
775 		filterset_free(&vpn->import);
776 
777 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_DONE, 0, 0,
778 		    -1, NULL, 0) == -1)
779 			return (-1);
780 
781 		free(vpn);
782 	}
783 
784 	/* send a drain message to know when all messages where processed */
785 	if (imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
786 		return (-1);
787 	if (imsg_compose(ibuf_rde, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
788 		return (-1);
789 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
790 		return (-1);
791 
792 	/* mrt changes can be sent out of bound */
793 	mrt_reconfigure(conf->mrt);
794 	return (0);
795 }
796 
797 int
798 dispatch_imsg(struct imsgbuf *ibuf, int idx, struct bgpd_config *conf)
799 {
800 	struct imsg		 imsg;
801 	struct peer		*p;
802 	struct rtr_config	*r;
803 	ssize_t			 n;
804 	int			 rv, verbose;
805 
806 	rv = 0;
807 	while (ibuf) {
808 		if ((n = imsg_get(ibuf, &imsg)) == -1)
809 			return (-1);
810 
811 		if (n == 0)
812 			break;
813 
814 		switch (imsg.hdr.type) {
815 		case IMSG_KROUTE_CHANGE:
816 			if (idx != PFD_PIPE_RDE)
817 				log_warnx("route request not from RDE");
818 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
819 			    sizeof(struct kroute_full))
820 				log_warnx("wrong imsg len");
821 			else if (kr_change(imsg.hdr.peerid, imsg.data,
822 			    conf->fib_priority))
823 				rv = -1;
824 			break;
825 		case IMSG_KROUTE_DELETE:
826 			if (idx != PFD_PIPE_RDE)
827 				log_warnx("route request not from RDE");
828 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
829 			    sizeof(struct kroute_full))
830 				log_warnx("wrong imsg len");
831 			else if (kr_delete(imsg.hdr.peerid, imsg.data,
832 			    conf->fib_priority))
833 				rv = -1;
834 			break;
835 		case IMSG_KROUTE_FLUSH:
836 			if (idx != PFD_PIPE_RDE)
837 				log_warnx("route request not from RDE");
838 			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
839 				log_warnx("wrong imsg len");
840 			else if (kr_flush(imsg.hdr.peerid))
841 				rv = -1;
842 			break;
843 		case IMSG_NEXTHOP_ADD:
844 			if (idx != PFD_PIPE_RDE)
845 				log_warnx("nexthop request not from RDE");
846 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
847 			    sizeof(struct bgpd_addr))
848 				log_warnx("wrong imsg len");
849 			else if (kr_nexthop_add(imsg.hdr.peerid, imsg.data,
850 			    conf) == -1)
851 				rv = -1;
852 			break;
853 		case IMSG_NEXTHOP_REMOVE:
854 			if (idx != PFD_PIPE_RDE)
855 				log_warnx("nexthop request not from RDE");
856 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
857 			    sizeof(struct bgpd_addr))
858 				log_warnx("wrong imsg len");
859 			else
860 				kr_nexthop_delete(imsg.hdr.peerid, imsg.data,
861 				    conf);
862 			break;
863 		case IMSG_PFTABLE_ADD:
864 			if (idx != PFD_PIPE_RDE)
865 				log_warnx("pftable request not from RDE");
866 			else
867 				if (imsg.hdr.len != IMSG_HEADER_SIZE +
868 				    sizeof(struct pftable_msg))
869 					log_warnx("wrong imsg len");
870 				else if (pftable_addr_add(imsg.data) != 0)
871 					rv = -1;
872 			break;
873 		case IMSG_PFTABLE_REMOVE:
874 			if (idx != PFD_PIPE_RDE)
875 				log_warnx("pftable request not from RDE");
876 			else
877 				if (imsg.hdr.len != IMSG_HEADER_SIZE +
878 				    sizeof(struct pftable_msg))
879 					log_warnx("wrong imsg len");
880 				else if (pftable_addr_remove(imsg.data) != 0)
881 					rv = -1;
882 			break;
883 		case IMSG_PFTABLE_COMMIT:
884 			if (idx != PFD_PIPE_RDE)
885 				log_warnx("pftable request not from RDE");
886 			else if (imsg.hdr.len != IMSG_HEADER_SIZE)
887 				log_warnx("wrong imsg len");
888 			else if (pftable_commit() != 0)
889 				rv = -1;
890 			break;
891 		case IMSG_PFKEY_RELOAD:
892 			if (idx != PFD_PIPE_SESSION) {
893 				log_warnx("pfkey reload request not from SE");
894 				break;
895 			}
896 			p = getpeerbyid(conf, imsg.hdr.peerid);
897 			if (p != NULL) {
898 				if (pfkey_establish(p) == -1)
899 					log_peer_warnx(&p->conf,
900 					    "pfkey setup failed");
901 			}
902 			break;
903 		case IMSG_CTL_RELOAD:
904 			if (idx != PFD_PIPE_SESSION)
905 				log_warnx("reload request not from SE");
906 			else {
907 				reconfig = 1;
908 				reconfpid = imsg.hdr.pid;
909 				if (imsg.hdr.len == IMSG_HEADER_SIZE +
910 				    REASON_LEN && ((char *)imsg.data)[0])
911 					log_info("reload due to: %s",
912 					    log_reason(imsg.data));
913 			}
914 			break;
915 		case IMSG_CTL_FIB_COUPLE:
916 			if (idx != PFD_PIPE_SESSION)
917 				log_warnx("couple request not from SE");
918 			else
919 				kr_fib_couple(imsg.hdr.peerid,
920 				    conf->fib_priority);
921 			break;
922 		case IMSG_CTL_FIB_DECOUPLE:
923 			if (idx != PFD_PIPE_SESSION)
924 				log_warnx("decouple request not from SE");
925 			else
926 				kr_fib_decouple(imsg.hdr.peerid,
927 				    conf->fib_priority);
928 			break;
929 		case IMSG_CTL_KROUTE:
930 		case IMSG_CTL_KROUTE_ADDR:
931 		case IMSG_CTL_SHOW_NEXTHOP:
932 		case IMSG_CTL_SHOW_INTERFACE:
933 		case IMSG_CTL_SHOW_FIB_TABLES:
934 			if (idx != PFD_PIPE_SESSION)
935 				log_warnx("kroute request not from SE");
936 			else
937 				kr_show_route(&imsg);
938 			break;
939 		case IMSG_IFINFO:
940 			if (idx != PFD_PIPE_SESSION)
941 				log_warnx("IFINFO request not from SE");
942 			else if (imsg.hdr.len != IMSG_HEADER_SIZE + IFNAMSIZ)
943 				log_warnx("IFINFO request with wrong len");
944 			else
945 				kr_ifinfo(imsg.data);
946 			break;
947 		case IMSG_DEMOTE:
948 			if (idx != PFD_PIPE_SESSION)
949 				log_warnx("demote request not from SE");
950 			else if (imsg.hdr.len != IMSG_HEADER_SIZE +
951 			    sizeof(struct demote_msg))
952 				log_warnx("DEMOTE request with wrong len");
953 			else {
954 				struct demote_msg	*msg;
955 
956 				msg = imsg.data;
957 				carp_demote_set(msg->demote_group, msg->level);
958 			}
959 			break;
960 		case IMSG_CTL_LOG_VERBOSE:
961 			/* already checked by SE */
962 			memcpy(&verbose, imsg.data, sizeof(verbose));
963 			log_setverbose(verbose);
964 			break;
965 		case IMSG_RECONF_DONE:
966 			if (reconfpending == 0) {
967 				log_warnx("unexpected RECONF_DONE received");
968 				break;
969 			}
970 			if (idx == PFD_PIPE_SESSION) {
971 				imsg_compose(ibuf_rtr, IMSG_RECONF_DONE, 0,
972 				    0, -1, NULL, 0);
973 			} else if (idx == PFD_PIPE_RTR) {
974 				imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0,
975 				    0, -1, NULL, 0);
976 
977 				/* finally fix kroute information */
978 				ktable_postload(conf->fib_priority);
979 
980 				/* redistribute list needs to be reloaded too */
981 				kr_reload();
982 			}
983 			reconfpending--;
984 			break;
985 		case IMSG_RECONF_DRAIN:
986 			if (reconfpending == 0) {
987 				log_warnx("unexpected RECONF_DRAIN received");
988 				break;
989 			}
990 			reconfpending--;
991 			if (reconfpending == 0) {
992 				/*
993 				 * SE goes first to bring templated neighbors
994 				 * in sync.
995 				 */
996 				imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0,
997 				    0, -1, NULL, 0);
998 				reconfpending = 3; /* expecting 2 DONE msg */
999 			}
1000 			break;
1001 		case IMSG_SOCKET_CONN:
1002 			if (idx != PFD_PIPE_RTR) {
1003 				log_warnx("connect request not from RTR");
1004 			} else {
1005 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1006 					if (imsg.hdr.peerid == r->id)
1007 						break;
1008 				}
1009 				if (r == NULL)
1010 					log_warnx("unknown rtr id %d",
1011 					    imsg.hdr.peerid);
1012 				else
1013 					bgpd_rtr_connect(r);
1014 			}
1015 			break;
1016 		case IMSG_CTL_SHOW_RTR:
1017 			if (idx == PFD_PIPE_SESSION) {
1018 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1019 					imsg_compose(ibuf_rtr, imsg.hdr.type,
1020 					    r->id, imsg.hdr.pid, -1, NULL, 0);
1021 				}
1022 				imsg_compose(ibuf_rtr, IMSG_CTL_END,
1023 				    0, imsg.hdr.pid, -1, NULL, 0);
1024 			} else if (imsg.hdr.len != IMSG_HEADER_SIZE +
1025 			    sizeof(struct ctl_show_rtr)) {
1026 				log_warnx("IMSG_CTL_SHOW_RTR with wrong len");
1027 			} else if (idx == PFD_PIPE_RTR) {
1028 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1029 					if (imsg.hdr.peerid == r->id)
1030 						break;
1031 				}
1032 				if (r != NULL) {
1033 					struct ctl_show_rtr *msg;
1034 					msg = imsg.data;
1035 					strlcpy(msg->descr, r->descr,
1036 					    sizeof(msg->descr));
1037 					msg->local_addr = r->local_addr;
1038 					msg->remote_addr = r->remote_addr;
1039 					msg->remote_port = r->remote_port;
1040 
1041 					imsg_compose(ibuf_se, imsg.hdr.type,
1042 					    imsg.hdr.peerid, imsg.hdr.pid,
1043 					    -1, imsg.data,
1044 					    imsg.hdr.len - IMSG_HEADER_SIZE);
1045 				}
1046 			}
1047 			break;
1048 		case IMSG_CTL_END:
1049 		case IMSG_CTL_SHOW_TIMER:
1050 			if (idx != PFD_PIPE_RTR) {
1051 				log_warnx("connect request not from RTR");
1052 				break;
1053 			}
1054 			imsg_compose(ibuf_se, imsg.hdr.type, imsg.hdr.peerid,
1055 			    imsg.hdr.pid, -1, imsg.data,
1056 			    imsg.hdr.len - IMSG_HEADER_SIZE);
1057 			break;
1058 		default:
1059 			break;
1060 		}
1061 		imsg_free(&imsg);
1062 		if (rv != 0)
1063 			return (rv);
1064 	}
1065 	return (0);
1066 }
1067 
1068 void
1069 send_nexthop_update(struct kroute_nexthop *msg)
1070 {
1071 	char	*gw = NULL;
1072 
1073 	if (msg->gateway.aid)
1074 		if (asprintf(&gw, ": via %s",
1075 		    log_addr(&msg->gateway)) == -1) {
1076 			log_warn("send_nexthop_update");
1077 			quit = 1;
1078 		}
1079 
1080 	log_debug("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
1081 	    msg->valid ? "valid" : "invalid",
1082 	    msg->connected ? ": directly connected" : "",
1083 	    msg->gateway.aid ? gw : "");
1084 
1085 	free(gw);
1086 
1087 	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
1088 	    msg, sizeof(struct kroute_nexthop)) == -1)
1089 		quit = 1;
1090 }
1091 
1092 void
1093 send_imsg_session(int type, pid_t pid, void *data, uint16_t datalen)
1094 {
1095 	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
1096 }
1097 
1098 int
1099 send_network(int type, struct network_config *net, struct filter_set_head *h)
1100 {
1101 	if (quit)
1102 		return (0);
1103 	if (imsg_compose(ibuf_rde, type, 0, 0, -1, net,
1104 	    sizeof(struct network_config)) == -1)
1105 		return (-1);
1106 	/* networks that get deleted don't need to send the filter set */
1107 	if (type == IMSG_NETWORK_REMOVE)
1108 		return (0);
1109 	if (send_filterset(ibuf_rde, h) == -1)
1110 		return (-1);
1111 	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
1112 		return (-1);
1113 
1114 	return (0);
1115 }
1116 
1117 int
1118 bgpd_filternexthop(struct kroute *kr, struct kroute6 *kr6)
1119 {
1120 	/* kernel routes are never filtered */
1121 	if (kr && kr->flags & F_KERNEL && kr->prefixlen != 0)
1122 		return (0);
1123 	if (kr6 && kr6->flags & F_KERNEL && kr6->prefixlen != 0)
1124 		return (0);
1125 
1126 	if (cflags & BGPD_FLAG_NEXTHOP_BGP) {
1127 		if (kr && kr->flags & F_BGPD_INSERTED)
1128 			return (0);
1129 		if (kr6 && kr6->flags & F_BGPD_INSERTED)
1130 			return (0);
1131 	}
1132 
1133 	if (cflags & BGPD_FLAG_NEXTHOP_DEFAULT) {
1134 		if (kr && kr->prefixlen == 0)
1135 			return (0);
1136 		if (kr6 && kr6->prefixlen == 0)
1137 			return (0);
1138 	}
1139 
1140 	return (1);
1141 }
1142 
1143 int
1144 control_setup(struct bgpd_config *conf)
1145 {
1146 	int fd, restricted;
1147 
1148 	/* control socket is outside chroot */
1149 	if (!cname || strcmp(cname, conf->csock)) {
1150 		if (cname) {
1151 			free(cname);
1152 		}
1153 		if ((cname = strdup(conf->csock)) == NULL)
1154 			fatal("strdup");
1155 		if (control_check(cname) == -1)
1156 			return (-1);
1157 		if ((fd = control_init(0, cname)) == -1)
1158 			fatalx("control socket setup failed");
1159 		if (control_listen(fd) == -1)
1160 			fatalx("control socket setup failed");
1161 		restricted = 0;
1162 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1163 		    &restricted, sizeof(restricted)) == -1)
1164 			return (-1);
1165 	}
1166 	if (!conf->rcsock) {
1167 		/* remove restricted socket */
1168 		free(rcname);
1169 		rcname = NULL;
1170 	} else if (!rcname || strcmp(rcname, conf->rcsock)) {
1171 		if (rcname) {
1172 			free(rcname);
1173 		}
1174 		if ((rcname = strdup(conf->rcsock)) == NULL)
1175 			fatal("strdup");
1176 		if (control_check(rcname) == -1)
1177 			return (-1);
1178 		if ((fd = control_init(1, rcname)) == -1)
1179 			fatalx("control socket setup failed");
1180 		if (control_listen(fd) == -1)
1181 			fatalx("control socket setup failed");
1182 		restricted = 1;
1183 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1184 		    &restricted, sizeof(restricted)) == -1)
1185 			return (-1);
1186 	}
1187 	return (0);
1188 }
1189 
1190 void
1191 set_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1192 {
1193 	if (i == NULL || i->fd == -1) {
1194 		pfd->fd = -1;
1195 		return;
1196 	}
1197 	pfd->fd = i->fd;
1198 	pfd->events = POLLIN;
1199 	if (i->w.queued > 0)
1200 		pfd->events |= POLLOUT;
1201 }
1202 
1203 int
1204 handle_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1205 {
1206 	ssize_t n;
1207 
1208 	if (i == NULL)
1209 		return (0);
1210 
1211 	if (pfd->revents & POLLOUT)
1212 		if (msgbuf_write(&i->w) <= 0 && errno != EAGAIN) {
1213 			log_warn("imsg write error");
1214 			close(i->fd);
1215 			i->fd = -1;
1216 			return (-1);
1217 		}
1218 
1219 	if (pfd->revents & POLLIN) {
1220 		if ((n = imsg_read(i)) == -1 && errno != EAGAIN) {
1221 			log_warn("imsg read error");
1222 			close(i->fd);
1223 			i->fd = -1;
1224 			return (-1);
1225 		}
1226 		if (n == 0) {
1227 			log_warnx("peer closed imsg connection");
1228 			close(i->fd);
1229 			i->fd = -1;
1230 			return (-1);
1231 		}
1232 	}
1233 	return (0);
1234 }
1235 
1236 static void
1237 getsockpair(int pipe[2])
1238 {
1239 	int bsize, i;
1240 
1241 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
1242 	    PF_UNSPEC, pipe) == -1)
1243 		fatal("socketpair");
1244 
1245 	for (i = 0; i < 2; i++) {
1246 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1247 			if (setsockopt(pipe[i], SOL_SOCKET, SO_RCVBUF,
1248 			    &bsize, sizeof(bsize)) == -1) {
1249 				if (errno != ENOBUFS)
1250 					fatal("setsockopt(SO_RCVBUF, %d)",
1251 					    bsize);
1252 				log_warn("setsockopt(SO_RCVBUF, %d)", bsize);
1253 				continue;
1254 			}
1255 			break;
1256 		}
1257 	}
1258 	for (i = 0; i < 2; i++) {
1259 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1260 			if (setsockopt(pipe[i], SOL_SOCKET, SO_SNDBUF,
1261 			    &bsize, sizeof(bsize)) == -1) {
1262 				if (errno != ENOBUFS)
1263 					fatal("setsockopt(SO_SNDBUF, %d)",
1264 					    bsize);
1265 				log_warn("setsockopt(SO_SNDBUF, %d)", bsize);
1266 				continue;
1267 			}
1268 			break;
1269 		}
1270 	}
1271 }
1272 
1273 int
1274 imsg_send_sockets(struct imsgbuf *se, struct imsgbuf *rde, struct imsgbuf *roa)
1275 {
1276 	int pipe_s2r[2];
1277 	int pipe_s2r_ctl[2];
1278 	int pipe_r2r[2];
1279 
1280 	getsockpair(pipe_s2r);
1281 	getsockpair(pipe_s2r_ctl);
1282 	getsockpair(pipe_r2r);
1283 
1284 	if (imsg_compose(se, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[0],
1285 	    NULL, 0) == -1)
1286 		return (-1);
1287 	if (imsg_compose(rde, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[1],
1288 	    NULL, 0) == -1)
1289 		return (-1);
1290 
1291 	if (imsg_compose(se, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[0],
1292 	    NULL, 0) == -1)
1293 		return (-1);
1294 	if (imsg_compose(rde, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[1],
1295 	    NULL, 0) == -1)
1296 		return (-1);
1297 
1298 	if (imsg_compose(roa, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[0],
1299 	    NULL, 0) == -1)
1300 		return (-1);
1301 	if (imsg_compose(rde, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[1],
1302 	    NULL, 0) == -1)
1303 		return (-1);
1304 
1305 	return (0);
1306 }
1307 
1308 void
1309 bgpd_rtr_connect(struct rtr_config *r)
1310 {
1311 	struct connect_elm *ce;
1312 	struct sockaddr *sa;
1313 	socklen_t len;
1314 
1315 	if (connect_cnt >= MAX_CONNECT_CNT) {
1316 		log_warnx("rtr %s: too many concurrent connection requests",
1317 		    r->descr);
1318 		return;
1319 	}
1320 
1321 	if ((ce = calloc(1, sizeof(*ce))) == NULL) {
1322 		log_warn("rtr %s", r->descr);
1323 		return;
1324 	}
1325 
1326 	ce->id = r->id;
1327 	ce->fd = socket(aid2af(r->remote_addr.aid),
1328 	     SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP);
1329 	if (ce->fd == -1) {
1330 		log_warn("rtr %s", r->descr);
1331 		free(ce);
1332 		return;
1333 	}
1334 
1335 	if ((sa = addr2sa(&r->local_addr, 0, &len)) != NULL) {
1336 		if (bind(ce->fd, sa, len) == -1) {
1337 			log_warn("rtr %s: bind to %s", r->descr,
1338 			    log_addr(&r->local_addr));
1339 			close(ce->fd);
1340 			free(ce);
1341 			return;
1342 		}
1343 	}
1344 
1345 	sa = addr2sa(&r->remote_addr, r->remote_port, &len);
1346 	if (connect(ce->fd, sa, len) == -1) {
1347 		if (errno != EINPROGRESS) {
1348 			log_warn("rtr %s: connect to %s:%u", r->descr,
1349 			    log_addr(&r->remote_addr), r->remote_port);
1350 			close(ce->fd);
1351 			free(ce);
1352 			return;
1353 		}
1354 		TAILQ_INSERT_TAIL(&connect_queue, ce, entry);
1355 		connect_cnt++;
1356 		return;
1357 	}
1358 
1359 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1360 	free(ce);
1361 }
1362 
1363 void
1364 bgpd_rtr_connect_done(int fd, struct bgpd_config *conf)
1365 {
1366 	struct rtr_config *r;
1367 	struct connect_elm *ce;
1368 	int error = 0;
1369 	socklen_t len;
1370 
1371 	TAILQ_FOREACH(ce, &connect_queue, entry) {
1372 		if (ce->fd == fd)
1373 			break;
1374 	}
1375 	if (ce == NULL)
1376 		fatalx("connect entry not found");
1377 
1378 	TAILQ_REMOVE(&connect_queue, ce, entry);
1379 	connect_cnt--;
1380 
1381 	SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1382 		if (ce->id == r->id)
1383 			break;
1384 	}
1385 	if (r == NULL) {
1386 		log_warnx("rtr id %d no longer exists", ce->id);
1387 		goto fail;
1388 	}
1389 
1390 	len = sizeof(error);
1391 	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) == -1) {
1392 		log_warn("rtr %s: getsockopt SO_ERROR", r->descr);
1393 		goto fail;
1394 	}
1395 
1396 	if (error != 0) {
1397 		errno = error;
1398 		log_warn("rtr %s: connect to %s:%u", r->descr,
1399 		    log_addr(&r->remote_addr), r->remote_port);
1400 		goto fail;
1401 	}
1402 
1403 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1404 	free(ce);
1405 	return;
1406 
1407 fail:
1408 	close(fd);
1409 	free(ce);
1410 }
1411