xref: /openbsd/usr.sbin/bgpd/bgpd.c (revision 9ea232b5)
1 /*	$OpenBSD: bgpd.c,v 1.262 2024/01/09 13:41:32 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/wait.h>
22 #include <netinet/in.h>
23 #include <arpa/inet.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <pwd.h>
29 #include <signal.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <syslog.h>
35 #include <unistd.h>
36 
37 #include "bgpd.h"
38 #include "session.h"
39 #include "log.h"
40 #include "version.h"
41 
42 void		sighdlr(int);
43 __dead void	usage(void);
44 int		main(int, char *[]);
45 pid_t		start_child(enum bgpd_process, char *, int, int, int);
46 int		send_filterset(struct imsgbuf *, struct filter_set_head *);
47 int		reconfigure(char *, struct bgpd_config *);
48 int		send_config(struct bgpd_config *);
49 int		dispatch_imsg(struct imsgbuf *, int, struct bgpd_config *);
50 int		control_setup(struct bgpd_config *);
51 static void	getsockpair(int [2]);
52 int		imsg_send_sockets(struct imsgbuf *, struct imsgbuf *,
53 		    struct imsgbuf *);
54 void		bgpd_rtr_connect(struct rtr_config *);
55 void		bgpd_rtr_connect_done(int, struct bgpd_config *);
56 
57 int			 cflags;
58 volatile sig_atomic_t	 mrtdump;
59 volatile sig_atomic_t	 quit;
60 volatile sig_atomic_t	 reconfig;
61 pid_t			 reconfpid;
62 int			 reconfpending;
63 struct imsgbuf		*ibuf_se;
64 struct imsgbuf		*ibuf_rde;
65 struct imsgbuf		*ibuf_rtr;
66 struct rib_names	 ribnames = SIMPLEQ_HEAD_INITIALIZER(ribnames);
67 char			*cname;
68 char			*rcname;
69 
70 struct connect_elm {
71 	TAILQ_ENTRY(connect_elm)	entry;
72 	uint32_t			id;
73 	int				fd;
74 };
75 
76 TAILQ_HEAD(, connect_elm)	connect_queue = \
77 				    TAILQ_HEAD_INITIALIZER(connect_queue);
78 u_int				connect_cnt;
79 #define MAX_CONNECT_CNT		32
80 
81 void
82 sighdlr(int sig)
83 {
84 	switch (sig) {
85 	case SIGTERM:
86 	case SIGINT:
87 		quit = 1;
88 		break;
89 	case SIGHUP:
90 		reconfig = 1;
91 		break;
92 	case SIGALRM:
93 	case SIGUSR1:
94 		mrtdump = 1;
95 		break;
96 	}
97 }
98 
99 __dead void
100 usage(void)
101 {
102 	extern char *__progname;
103 
104 	fprintf(stderr, "usage: %s [-cdnvV] [-D macro=value] [-f file]\n",
105 	    __progname);
106 	exit(1);
107 }
108 
109 #define PFD_PIPE_SESSION	0
110 #define PFD_PIPE_RDE		1
111 #define PFD_PIPE_RTR		2
112 #define PFD_SOCK_ROUTE		3
113 #define PFD_SOCK_PFKEY		4
114 #define PFD_CONNECT_START	5
115 #define MAX_TIMEOUT		3600
116 
117 int	 cmd_opts;
118 
119 int
120 main(int argc, char *argv[])
121 {
122 	struct bgpd_config	*conf;
123 	enum bgpd_process	 proc = PROC_MAIN;
124 	struct rde_rib		*rr;
125 	struct peer		*p;
126 	struct pollfd		*pfd = NULL;
127 	struct connect_elm	*ce;
128 	time_t			 timeout;
129 	pid_t			 se_pid = 0, rde_pid = 0, rtr_pid = 0, pid;
130 	char			*conffile;
131 	char			*saved_argv0;
132 	u_int			 pfd_elms = 0, npfd, i;
133 	int			 debug = 0;
134 	int			 rfd, keyfd;
135 	int			 ch, status;
136 	int			 pipe_m2s[2];
137 	int			 pipe_m2r[2];
138 	int			 pipe_m2roa[2];
139 
140 	conffile = CONFFILE;
141 
142 	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */
143 	log_procinit(log_procnames[PROC_MAIN]);
144 	log_setverbose(1);
145 
146 	saved_argv0 = argv[0];
147 	if (saved_argv0 == NULL)
148 		saved_argv0 = "bgpd";
149 
150 	while ((ch = getopt(argc, argv, "cdD:f:nRSTvV")) != -1) {
151 		switch (ch) {
152 		case 'c':
153 			cmd_opts |= BGPD_OPT_FORCE_DEMOTE;
154 			break;
155 		case 'd':
156 			debug = 1;
157 			break;
158 		case 'D':
159 			if (cmdline_symset(optarg) < 0)
160 				log_warnx("could not parse macro definition %s",
161 				    optarg);
162 			break;
163 		case 'f':
164 			conffile = optarg;
165 			break;
166 		case 'n':
167 			cmd_opts |= BGPD_OPT_NOACTION;
168 			break;
169 		case 'v':
170 			if (cmd_opts & BGPD_OPT_VERBOSE)
171 				cmd_opts |= BGPD_OPT_VERBOSE2;
172 			cmd_opts |= BGPD_OPT_VERBOSE;
173 			break;
174 		case 'R':
175 			proc = PROC_RDE;
176 			break;
177 		case 'S':
178 			proc = PROC_SE;
179 			break;
180 		case 'T':
181 			proc = PROC_RTR;
182 			break;
183 		case 'V':
184 			fprintf(stderr, "OpenBGPD %s\n", BGPD_VERSION);
185 			return 0;
186 		default:
187 			usage();
188 			/* NOTREACHED */
189 		}
190 	}
191 
192 	argc -= optind;
193 	argv += optind;
194 	if (argc > 0)
195 		usage();
196 
197 	if (cmd_opts & BGPD_OPT_NOACTION) {
198 		if ((conf = parse_config(conffile, NULL, NULL)) == NULL)
199 			exit(1);
200 
201 		if (cmd_opts & BGPD_OPT_VERBOSE)
202 			print_config(conf, &ribnames);
203 		else
204 			fprintf(stderr, "configuration OK\n");
205 
206 		while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
207 			SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
208 			free(rr);
209 		}
210 		free_config(conf);
211 		exit(0);
212 	}
213 
214 	switch (proc) {
215 	case PROC_MAIN:
216 		break;
217 	case PROC_RDE:
218 		rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
219 		/* NOTREACHED */
220 	case PROC_SE:
221 		session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
222 		/* NOTREACHED */
223 	case PROC_RTR:
224 		rtr_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
225 		/* NOTREACHED */
226 	}
227 
228 	if (geteuid())
229 		errx(1, "need root privileges");
230 
231 	if (getpwnam(BGPD_USER) == NULL)
232 		errx(1, "unknown user %s", BGPD_USER);
233 
234 	if ((conf = parse_config(conffile, NULL, NULL)) == NULL) {
235 		log_warnx("config file %s has errors", conffile);
236 		exit(1);
237 	}
238 
239 	if (prepare_listeners(conf) == -1)
240 		exit(1);
241 
242 	log_init(debug, LOG_DAEMON);
243 	log_setverbose(cmd_opts & BGPD_OPT_VERBOSE);
244 
245 	if (!debug)
246 		daemon(1, 0);
247 
248 	log_info("startup");
249 
250 	getsockpair(pipe_m2s);
251 	getsockpair(pipe_m2r);
252 	getsockpair(pipe_m2roa);
253 
254 	/* fork children */
255 	rde_pid = start_child(PROC_RDE, saved_argv0, pipe_m2r[1], debug,
256 	    cmd_opts & BGPD_OPT_VERBOSE);
257 	se_pid = start_child(PROC_SE, saved_argv0, pipe_m2s[1], debug,
258 	    cmd_opts & BGPD_OPT_VERBOSE);
259 	rtr_pid = start_child(PROC_RTR, saved_argv0, pipe_m2roa[1], debug,
260 	    cmd_opts & BGPD_OPT_VERBOSE);
261 
262 	signal(SIGTERM, sighdlr);
263 	signal(SIGINT, sighdlr);
264 	signal(SIGHUP, sighdlr);
265 	signal(SIGALRM, sighdlr);
266 	signal(SIGUSR1, sighdlr);
267 	signal(SIGPIPE, SIG_IGN);
268 
269 	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
270 	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
271 	    (ibuf_rtr = malloc(sizeof(struct imsgbuf))) == NULL)
272 		fatal(NULL);
273 	imsg_init(ibuf_se, pipe_m2s[0]);
274 	imsg_init(ibuf_rde, pipe_m2r[0]);
275 	imsg_init(ibuf_rtr, pipe_m2roa[0]);
276 	mrt_init(ibuf_rde, ibuf_se);
277 	if (kr_init(&rfd, conf->fib_priority) == -1)
278 		quit = 1;
279 	keyfd = pfkey_init();
280 
281 	/*
282 	 * rpath, read config file
283 	 * cpath, unlink control socket
284 	 * fattr, chmod on control socket
285 	 * wpath, needed if we are doing mrt dumps
286 	 *
287 	 * pledge placed here because kr_init() does a setsockopt on the
288 	 * routing socket thats not allowed at all.
289 	 */
290 #if 0
291 	/*
292 	 * disabled because we do ioctls on /dev/pf and SIOCSIFGATTR
293 	 * this needs some redesign of bgpd to be fixed.
294 	 */
295 BROKEN	if (pledge("stdio rpath wpath cpath fattr unix route recvfd sendfd",
296 	    NULL) == -1)
297 		fatal("pledge");
298 #endif
299 
300 	if (imsg_send_sockets(ibuf_se, ibuf_rde, ibuf_rtr))
301 		fatal("could not establish imsg links");
302 	/* control setup needs to happen late since it sends imsgs */
303 	if (control_setup(conf) == -1)
304 		quit = 1;
305 	if (send_config(conf) != 0)
306 		quit = 1;
307 	if (pftable_clear_all() != 0)
308 		quit = 1;
309 
310 	while (quit == 0) {
311 		if (pfd_elms < PFD_CONNECT_START + connect_cnt) {
312 			struct pollfd *newp;
313 
314 			if ((newp = reallocarray(pfd,
315 			    PFD_CONNECT_START + connect_cnt,
316 			    sizeof(struct pollfd))) == NULL) {
317 				log_warn("could not resize pfd from %u -> %u"
318 				    " entries", pfd_elms, PFD_CONNECT_START +
319 				    connect_cnt);
320 				fatalx("exiting");
321 			}
322 			pfd = newp;
323 			pfd_elms = PFD_CONNECT_START + connect_cnt;
324 		}
325 		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
326 
327 		timeout = mrt_timeout(conf->mrt);
328 
329 		pfd[PFD_SOCK_ROUTE].fd = rfd;
330 		pfd[PFD_SOCK_ROUTE].events = POLLIN;
331 
332 		pfd[PFD_SOCK_PFKEY].fd = keyfd;
333 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
334 
335 		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
336 		set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
337 		set_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr);
338 
339 		npfd = PFD_CONNECT_START;
340 		TAILQ_FOREACH(ce, &connect_queue, entry) {
341 			pfd[npfd].fd = ce->fd;
342 			pfd[npfd++].events = POLLOUT;
343 			if (npfd > pfd_elms)
344 				fatalx("polli pfd overflow");
345 		}
346 
347 		if (timeout < 0 || timeout > MAX_TIMEOUT)
348 			timeout = MAX_TIMEOUT;
349 		if (poll(pfd, npfd, timeout * 1000) == -1) {
350 			if (errno != EINTR) {
351 				log_warn("poll error");
352 				quit = 1;
353 			}
354 			goto next_loop;
355 		}
356 
357 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
358 			log_warnx("main: Lost connection to SE");
359 			msgbuf_clear(&ibuf_se->w);
360 			free(ibuf_se);
361 			ibuf_se = NULL;
362 			quit = 1;
363 		} else {
364 			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION, conf) ==
365 			    -1)
366 				quit = 1;
367 		}
368 
369 		if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
370 			log_warnx("main: Lost connection to RDE");
371 			msgbuf_clear(&ibuf_rde->w);
372 			free(ibuf_rde);
373 			ibuf_rde = NULL;
374 			quit = 1;
375 		} else {
376 			if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) == -1)
377 				quit = 1;
378 		}
379 
380 		if (handle_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr) == -1) {
381 			log_warnx("main: Lost connection to RTR");
382 			msgbuf_clear(&ibuf_rtr->w);
383 			free(ibuf_rtr);
384 			ibuf_rtr = NULL;
385 			quit = 1;
386 		} else {
387 			if (dispatch_imsg(ibuf_rtr, PFD_PIPE_RTR, conf) == -1)
388 				quit = 1;
389 		}
390 
391 		if (pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
392 			if (kr_dispatch_msg() == -1)
393 				quit = 1;
394 		}
395 
396 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
397 			if (pfkey_read(keyfd, NULL) == -1) {
398 				log_warnx("pfkey_read failed, exiting...");
399 				quit = 1;
400 			}
401 		}
402 
403 		for (i = PFD_CONNECT_START; i < npfd; i++)
404 			if (pfd[i].revents != 0)
405 				bgpd_rtr_connect_done(pfd[i].fd, conf);
406 
407  next_loop:
408 		if (reconfig) {
409 			u_int	error;
410 
411 			reconfig = 0;
412 			switch (reconfigure(conffile, conf)) {
413 			case -1:	/* fatal error */
414 				quit = 1;
415 				break;
416 			case 0:		/* all OK */
417 				error = 0;
418 				break;
419 			case 2:
420 				log_info("previous reload still running");
421 				error = CTL_RES_PENDING;
422 				break;
423 			default:	/* parse error */
424 				log_warnx("config file %s has errors, "
425 				    "not reloading", conffile);
426 				error = CTL_RES_PARSE_ERROR;
427 				break;
428 			}
429 			if (reconfpid != 0) {
430 				send_imsg_session(IMSG_CTL_RESULT, reconfpid,
431 				    &error, sizeof(error));
432 				reconfpid = 0;
433 			}
434 		}
435 
436 		if (mrtdump) {
437 			mrtdump = 0;
438 			mrt_handler(conf->mrt);
439 		}
440 	}
441 
442 	/* close pipes */
443 	if (ibuf_se) {
444 		msgbuf_clear(&ibuf_se->w);
445 		close(ibuf_se->fd);
446 		free(ibuf_se);
447 		ibuf_se = NULL;
448 	}
449 	if (ibuf_rde) {
450 		msgbuf_clear(&ibuf_rde->w);
451 		close(ibuf_rde->fd);
452 		free(ibuf_rde);
453 		ibuf_rde = NULL;
454 	}
455 	if (ibuf_rtr) {
456 		msgbuf_clear(&ibuf_rtr->w);
457 		close(ibuf_rtr->fd);
458 		free(ibuf_rtr);
459 		ibuf_rtr = NULL;
460 	}
461 
462 	/* cleanup kernel data structures */
463 	carp_demote_shutdown();
464 	kr_shutdown();
465 	pftable_clear_all();
466 
467 	RB_FOREACH(p, peer_head, &conf->peers)
468 		pfkey_remove(p);
469 
470 	while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
471 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
472 		free(rr);
473 	}
474 	free_config(conf);
475 
476 	log_debug("waiting for children to terminate");
477 	do {
478 		pid = wait(&status);
479 		if (pid == -1) {
480 			if (errno != EINTR && errno != ECHILD)
481 				fatal("wait");
482 		} else if (WIFSIGNALED(status)) {
483 			char *name = "unknown process";
484 			if (pid == rde_pid)
485 				name = "route decision engine";
486 			else if (pid == se_pid)
487 				name = "session engine";
488 			else if (pid == rtr_pid)
489 				name = "rtr engine";
490 			log_warnx("%s terminated; signal %d", name,
491 				WTERMSIG(status));
492 		}
493 	} while (pid != -1 || (pid == -1 && errno == EINTR));
494 
495 	free(rcname);
496 	free(cname);
497 
498 	log_info("terminating");
499 	return (0);
500 }
501 
502 pid_t
503 start_child(enum bgpd_process p, char *argv0, int fd, int debug, int verbose)
504 {
505 	char *argv[5];
506 	int argc = 0;
507 	pid_t pid;
508 
509 	switch (pid = fork()) {
510 	case -1:
511 		fatal("cannot fork");
512 	case 0:
513 		break;
514 	default:
515 		close(fd);
516 		return (pid);
517 	}
518 
519 	if (fd != 3) {
520 		if (dup2(fd, 3) == -1)
521 			fatal("cannot setup imsg fd");
522 	} else if (fcntl(fd, F_SETFD, 0) == -1)
523 		fatal("cannot setup imsg fd");
524 
525 	argv[argc++] = argv0;
526 	switch (p) {
527 	case PROC_MAIN:
528 		fatalx("Can not start main process");
529 	case PROC_RDE:
530 		argv[argc++] = "-R";
531 		break;
532 	case PROC_SE:
533 		argv[argc++] = "-S";
534 		break;
535 	case PROC_RTR:
536 		argv[argc++] = "-T";
537 		break;
538 	}
539 	if (debug)
540 		argv[argc++] = "-d";
541 	if (verbose)
542 		argv[argc++] = "-v";
543 	argv[argc++] = NULL;
544 
545 	execvp(argv0, argv);
546 	fatal("execvp");
547 }
548 
549 int
550 send_filterset(struct imsgbuf *i, struct filter_set_head *set)
551 {
552 	struct filter_set	*s;
553 
554 	TAILQ_FOREACH(s, set, entry)
555 		if (imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s,
556 		    sizeof(struct filter_set)) == -1)
557 			return (-1);
558 	return (0);
559 }
560 
561 int
562 reconfigure(char *conffile, struct bgpd_config *conf)
563 {
564 	struct bgpd_config	*new_conf;
565 
566 	if (reconfpending)
567 		return (2);
568 
569 	log_info("rereading config");
570 	if ((new_conf = parse_config(conffile, &conf->peers,
571 	    &conf->rtrs)) == NULL)
572 		return (1);
573 
574 	merge_config(conf, new_conf);
575 
576 	if (prepare_listeners(conf) == -1) {
577 		return (1);
578 	}
579 
580 	if (control_setup(conf) == -1) {
581 		return (1);
582 	}
583 
584 	return send_config(conf);
585 }
586 
587 int
588 send_config(struct bgpd_config *conf)
589 {
590 	struct peer		*p;
591 	struct filter_rule	*r;
592 	struct listen_addr	*la;
593 	struct rde_rib		*rr;
594 	struct l3vpn		*vpn;
595 	struct as_set		*aset;
596 	struct prefixset	*ps;
597 	struct prefixset_item	*psi, *npsi;
598 	struct roa		*roa;
599 	struct aspa_set		*aspa;
600 	struct rtr_config	*rtr;
601 	struct flowspec_config	*f, *nf;
602 
603 	reconfpending = 3;	/* one per child */
604 
605 	expand_networks(conf, &conf->networks);
606 	SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry)
607 		expand_networks(conf, &vpn->net_l);
608 
609 	cflags = conf->flags;
610 
611 	/* start reconfiguration */
612 	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
613 	    conf, sizeof(*conf)) == -1)
614 		return (-1);
615 	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
616 	    conf, sizeof(*conf)) == -1)
617 		return (-1);
618 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_CONF, 0, 0, -1,
619 	    conf, sizeof(*conf)) == -1)
620 		return (-1);
621 
622 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
623 		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
624 		    la, sizeof(*la)) == -1)
625 			return (-1);
626 		la->fd = -1;
627 	}
628 
629 	/* adjust fib syncing on reload */
630 	ktable_preload();
631 
632 	/* RIBs for the RDE */
633 	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
634 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
635 		if (ktable_update(rr->rtableid, rr->name, rr->flags) == -1) {
636 			log_warnx("failed to load routing table %d",
637 			    rr->rtableid);
638 			return (-1);
639 		}
640 		if (imsg_compose(ibuf_rde, IMSG_RECONF_RIB, 0, 0, -1,
641 		    rr, sizeof(*rr)) == -1)
642 			return (-1);
643 		free(rr);
644 	}
645 
646 	/* send peer list to the SE */
647 	RB_FOREACH(p, peer_head, &conf->peers) {
648 		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
649 		    &p->conf, sizeof(p->conf)) == -1)
650 			return (-1);
651 
652 		if (p->reconf_action == RECONF_REINIT)
653 			if (pfkey_establish(p) == -1)
654 				log_peer_warnx(&p->conf, "pfkey setup failed");
655 	}
656 
657 	/* networks go via kroute to the RDE */
658 	kr_net_reload(conf->default_tableid, 0, &conf->networks);
659 
660 	/* flowspec goes directly to the RDE, also remove old objects */
661 	RB_FOREACH_SAFE(f, flowspec_tree, &conf->flowspecs, nf) {
662 		if (f->reconf_action != RECONF_DELETE) {
663 			if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_ADD, 0, 0, -1,
664 			    f->flow, FLOWSPEC_SIZE + f->flow->len) == -1)
665 				return (-1);
666 			if (send_filterset(ibuf_rde, &f->attrset) == -1)
667 				return (-1);
668 			if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_DONE, 0, 0, -1,
669 			    NULL, 0) == -1)
670 				return (-1);
671 		} else {
672 			if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_REMOVE, 0, 0,
673 			    -1, f->flow, FLOWSPEC_SIZE + f->flow->len) == -1)
674 				return (-1);
675 			RB_REMOVE(flowspec_tree, &conf->flowspecs, f);
676 			flowspec_free(f);
677 		}
678 	}
679 
680 	/* prefixsets for filters in the RDE */
681 	while ((ps = SIMPLEQ_FIRST(&conf->prefixsets)) != NULL) {
682 		SIMPLEQ_REMOVE_HEAD(&conf->prefixsets, entry);
683 		if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET, 0, 0, -1,
684 		    ps->name, sizeof(ps->name)) == -1)
685 			return (-1);
686 		RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
687 			RB_REMOVE(prefixset_tree, &ps->psitems, psi);
688 			if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
689 			    0, 0, -1, psi, sizeof(*psi)) == -1)
690 				return (-1);
691 			free(psi);
692 		}
693 		free(ps);
694 	}
695 
696 	/* originsets for filters in the RDE */
697 	while ((ps = SIMPLEQ_FIRST(&conf->originsets)) != NULL) {
698 		SIMPLEQ_REMOVE_HEAD(&conf->originsets, entry);
699 		if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
700 		    ps->name, sizeof(ps->name)) == -1)
701 			return (-1);
702 		RB_FOREACH(roa, roa_tree, &ps->roaitems) {
703 			if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
704 			    -1, roa, sizeof(*roa)) == -1)
705 				return (-1);
706 		}
707 		free_roatree(&ps->roaitems);
708 		free(ps);
709 	}
710 
711 	/* roa table, aspa table and rtr config are sent to the RTR engine */
712 	RB_FOREACH(roa, roa_tree, &conf->roa) {
713 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ROA_ITEM, 0, 0,
714 		    -1, roa, sizeof(*roa)) == -1)
715 			return (-1);
716 	}
717 	free_roatree(&conf->roa);
718 	RB_FOREACH(aspa, aspa_tree, &conf->aspa) {
719 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA, 0, 0,
720 		    -1, aspa, offsetof(struct aspa_set, tas)) == -1)
721 			return (-1);
722 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA_TAS, 0, 0,
723 		    -1, aspa->tas, sizeof(*aspa->tas) * aspa->num) == -1)
724 			return (-1);
725 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA_DONE, 0, 0, -1,
726 		    NULL, 0) == -1)
727 			return -1;
728 	}
729 	free_aspatree(&conf->aspa);
730 	SIMPLEQ_FOREACH(rtr, &conf->rtrs, entry) {
731 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_RTR_CONFIG, rtr->id,
732 		    0, -1, rtr->descr, sizeof(rtr->descr)) == -1)
733 			return (-1);
734 	}
735 
736 	/* as-sets for filters in the RDE */
737 	while ((aset = SIMPLEQ_FIRST(&conf->as_sets)) != NULL) {
738 		struct ibuf *wbuf;
739 		uint32_t *as;
740 		size_t i, l, n;
741 
742 		SIMPLEQ_REMOVE_HEAD(&conf->as_sets, entry);
743 
744 		as = set_get(aset->set, &n);
745 		if ((wbuf = imsg_create(ibuf_rde, IMSG_RECONF_AS_SET, 0, 0,
746 		    sizeof(n) + sizeof(aset->name))) == NULL)
747 			return -1;
748 		if (imsg_add(wbuf, &n, sizeof(n)) == -1 ||
749 		    imsg_add(wbuf, aset->name, sizeof(aset->name)) == -1)
750 			return -1;
751 		imsg_close(ibuf_rde, wbuf);
752 
753 		for (i = 0; i < n; i += l) {
754 			l = (n - i > 1024 ? 1024 : n - i);
755 			if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_ITEMS,
756 			    0, 0, -1, as + i, l * sizeof(*as)) == -1)
757 				return -1;
758 		}
759 
760 		if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_DONE, 0, 0, -1,
761 		    NULL, 0) == -1)
762 			return -1;
763 
764 		set_free(aset->set);
765 		free(aset);
766 	}
767 
768 	/* filters for the RDE */
769 	while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
770 		TAILQ_REMOVE(conf->filters, r, entry);
771 		if (send_filterset(ibuf_rde, &r->set) == -1)
772 			return (-1);
773 		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
774 		    r, sizeof(struct filter_rule)) == -1)
775 			return (-1);
776 		filterset_free(&r->set);
777 		free(r);
778 	}
779 
780 	while ((vpn = SIMPLEQ_FIRST(&conf->l3vpns)) != NULL) {
781 		SIMPLEQ_REMOVE_HEAD(&conf->l3vpns, entry);
782 		if (ktable_update(vpn->rtableid, vpn->descr, vpn->flags) ==
783 		    -1) {
784 			log_warnx("failed to load routing table %d",
785 			    vpn->rtableid);
786 			return (-1);
787 		}
788 		/* networks go via kroute to the RDE */
789 		kr_net_reload(vpn->rtableid, vpn->rd, &vpn->net_l);
790 
791 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN, 0, 0, -1,
792 		    vpn, sizeof(*vpn)) == -1)
793 			return (-1);
794 
795 		/* export targets */
796 		if (send_filterset(ibuf_rde, &vpn->export) == -1)
797 			return (-1);
798 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
799 		    -1, NULL, 0) == -1)
800 			return (-1);
801 		filterset_free(&vpn->export);
802 
803 		/* import targets */
804 		if (send_filterset(ibuf_rde, &vpn->import) == -1)
805 			return (-1);
806 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
807 		    -1, NULL, 0) == -1)
808 			return (-1);
809 		filterset_free(&vpn->import);
810 
811 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_DONE, 0, 0,
812 		    -1, NULL, 0) == -1)
813 			return (-1);
814 
815 		free(vpn);
816 	}
817 
818 	/* send a drain message to know when all messages where processed */
819 	if (imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
820 		return (-1);
821 	if (imsg_compose(ibuf_rde, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
822 		return (-1);
823 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
824 		return (-1);
825 
826 	/* mrt changes can be sent out of bound */
827 	mrt_reconfigure(conf->mrt);
828 	return (0);
829 }
830 
831 int
832 dispatch_imsg(struct imsgbuf *imsgbuf, int idx, struct bgpd_config *conf)
833 {
834 	struct imsg		 imsg;
835 	struct peer		*p;
836 	struct rtr_config	*r;
837 	struct kroute_full	 kf;
838 	struct bgpd_addr	 addr;
839 	struct pftable_msg	 pfmsg;
840 	struct demote_msg	 demote;
841 	char			 reason[REASON_LEN], ifname[IFNAMSIZ];
842 	ssize_t			 n;
843 	u_int			 rtableid;
844 	int			 rv, verbose;
845 
846 	rv = 0;
847 	while (imsgbuf) {
848 		if ((n = imsg_get(imsgbuf, &imsg)) == -1)
849 			return (-1);
850 
851 		if (n == 0)
852 			break;
853 
854 		switch (imsg_get_type(&imsg)) {
855 		case IMSG_KROUTE_CHANGE:
856 			if (idx != PFD_PIPE_RDE)
857 				log_warnx("route request not from RDE");
858 			else if (imsg_get_data(&imsg, &kf, sizeof(kf)) == -1)
859 				log_warn("wrong imsg len");
860 			else if (kr_change(imsg_get_id(&imsg), &kf))
861 				rv = -1;
862 			break;
863 		case IMSG_KROUTE_DELETE:
864 			if (idx != PFD_PIPE_RDE)
865 				log_warnx("route request not from RDE");
866 			else if (imsg_get_data(&imsg, &kf, sizeof(kf)) == -1)
867 				log_warn("wrong imsg len");
868 			else if (kr_delete(imsg_get_id(&imsg), &kf))
869 				rv = -1;
870 			break;
871 		case IMSG_KROUTE_FLUSH:
872 			if (idx != PFD_PIPE_RDE)
873 				log_warnx("route request not from RDE");
874 			else if (kr_flush(imsg_get_id(&imsg)))
875 				rv = -1;
876 			break;
877 		case IMSG_NEXTHOP_ADD:
878 			if (idx != PFD_PIPE_RDE)
879 				log_warnx("nexthop request not from RDE");
880 			else if (imsg_get_data(&imsg, &addr, sizeof(addr)) ==
881 			    -1)
882 				log_warn("wrong imsg len");
883 			else {
884 				rtableid = conf->default_tableid;
885 				if (kr_nexthop_add(rtableid, &addr) == -1)
886 					rv = -1;
887 			}
888 			break;
889 		case IMSG_NEXTHOP_REMOVE:
890 			if (idx != PFD_PIPE_RDE)
891 				log_warnx("nexthop request not from RDE");
892 			else if (imsg_get_data(&imsg, &addr, sizeof(addr)) ==
893 			    -1)
894 				log_warn("wrong imsg len");
895 			else {
896 				rtableid = conf->default_tableid;
897 				kr_nexthop_delete(rtableid, &addr);
898 			}
899 			break;
900 		case IMSG_PFTABLE_ADD:
901 			if (idx != PFD_PIPE_RDE)
902 				log_warnx("pftable request not from RDE");
903 			else if (imsg_get_data(&imsg, &pfmsg, sizeof(pfmsg)) ==
904 			    -1)
905 				log_warn("wrong imsg len");
906 			else if (pftable_addr_add(&pfmsg) != 0)
907 				rv = -1;
908 			break;
909 		case IMSG_PFTABLE_REMOVE:
910 			if (idx != PFD_PIPE_RDE)
911 				log_warnx("pftable request not from RDE");
912 			else if (imsg_get_data(&imsg, &pfmsg, sizeof(pfmsg)) ==
913 			    -1)
914 				log_warn("wrong imsg len");
915 			else if (pftable_addr_remove(&pfmsg) != 0)
916 				rv = -1;
917 			break;
918 		case IMSG_PFTABLE_COMMIT:
919 			if (idx != PFD_PIPE_RDE)
920 				log_warnx("pftable request not from RDE");
921 			else if (pftable_commit() != 0)
922 				rv = -1;
923 			break;
924 		case IMSG_PFKEY_RELOAD:
925 			if (idx != PFD_PIPE_SESSION) {
926 				log_warnx("pfkey reload request not from SE");
927 				break;
928 			}
929 			p = getpeerbyid(conf, imsg_get_id(&imsg));
930 			if (p != NULL) {
931 				if (pfkey_establish(p) == -1)
932 					log_peer_warnx(&p->conf,
933 					    "pfkey setup failed");
934 			}
935 			break;
936 		case IMSG_CTL_RELOAD:
937 			if (idx != PFD_PIPE_SESSION)
938 				log_warnx("reload request not from SE");
939 			else {
940 				reconfig = 1;
941 				reconfpid = imsg_get_pid(&imsg);
942 				if (imsg_get_data(&imsg, reason,
943 				    sizeof(reason)) == 0 && reason[0] != '\0')
944 					log_info("reload due to: %s",
945 					    log_reason(reason));
946 			}
947 			break;
948 		case IMSG_CTL_FIB_COUPLE:
949 			if (idx != PFD_PIPE_SESSION)
950 				log_warnx("couple request not from SE");
951 			else
952 				kr_fib_couple(imsg_get_id(&imsg));
953 			break;
954 		case IMSG_CTL_FIB_DECOUPLE:
955 			if (idx != PFD_PIPE_SESSION)
956 				log_warnx("decouple request not from SE");
957 			else
958 				kr_fib_decouple(imsg_get_id(&imsg));
959 			break;
960 		case IMSG_CTL_KROUTE:
961 		case IMSG_CTL_KROUTE_ADDR:
962 		case IMSG_CTL_SHOW_NEXTHOP:
963 		case IMSG_CTL_SHOW_INTERFACE:
964 		case IMSG_CTL_SHOW_FIB_TABLES:
965 			if (idx != PFD_PIPE_SESSION)
966 				log_warnx("kroute request not from SE");
967 			else
968 				kr_show_route(&imsg);
969 			break;
970 		case IMSG_SESSION_DEPENDON:
971 			if (idx != PFD_PIPE_SESSION)
972 				log_warnx("DEPENDON request not from SE");
973 			else if (imsg_get_data(&imsg, ifname, sizeof(ifname)) ==
974 			    -1)
975 				log_warn("wrong imsg len");
976 			else
977 				kr_ifinfo(ifname);
978 			break;
979 		case IMSG_DEMOTE:
980 			if (idx != PFD_PIPE_SESSION)
981 				log_warnx("demote request not from SE");
982 			else if (imsg_get_data(&imsg, &demote, sizeof(demote))
983 			    == -1)
984 				log_warn("wrong imsg len");
985 			else
986 				carp_demote_set(demote.demote_group,
987 				    demote.level);
988 			break;
989 		case IMSG_CTL_LOG_VERBOSE:
990 			/* already checked by SE */
991 			if (imsg_get_data(&imsg, &verbose, sizeof(verbose)) ==
992 			    -1)
993 				log_warn("wrong imsg len");
994 			else
995 				log_setverbose(verbose);
996 			break;
997 		case IMSG_RECONF_DONE:
998 			if (reconfpending == 0) {
999 				log_warnx("unexpected RECONF_DONE received");
1000 				break;
1001 			}
1002 			if (idx == PFD_PIPE_SESSION) {
1003 				/* RDE and RTR engine can reload concurrently */
1004 				imsg_compose(ibuf_rtr, IMSG_RECONF_DONE, 0,
1005 				    0, -1, NULL, 0);
1006 				imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0,
1007 				    0, -1, NULL, 0);
1008 
1009 				/* finally fix kroute information */
1010 				ktable_postload();
1011 
1012 				/* redistribute list needs to be reloaded too */
1013 				kr_reload();
1014 			}
1015 			reconfpending--;
1016 			break;
1017 		case IMSG_RECONF_DRAIN:
1018 			if (reconfpending == 0) {
1019 				log_warnx("unexpected RECONF_DRAIN received");
1020 				break;
1021 			}
1022 			reconfpending--;
1023 			if (reconfpending == 0) {
1024 				/*
1025 				 * SE goes first to bring templated neighbors
1026 				 * in sync.
1027 				 */
1028 				imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0,
1029 				    0, -1, NULL, 0);
1030 				reconfpending = 3; /* expecting 2 DONE msg */
1031 			}
1032 			break;
1033 		case IMSG_SOCKET_CONN:
1034 			if (idx != PFD_PIPE_RTR) {
1035 				log_warnx("connect request not from RTR");
1036 			} else {
1037 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1038 					if (imsg_get_id(&imsg) == r->id)
1039 						break;
1040 				}
1041 				if (r == NULL)
1042 					log_warnx("unknown rtr id %d",
1043 					    imsg_get_id(&imsg));
1044 				else
1045 					bgpd_rtr_connect(r);
1046 			}
1047 			break;
1048 		case IMSG_CTL_SHOW_RTR:
1049 			if (idx == PFD_PIPE_SESSION) {
1050 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1051 					imsg_compose(ibuf_rtr,
1052 					    IMSG_CTL_SHOW_RTR, r->id,
1053 					    imsg_get_pid(&imsg), -1, NULL, 0);
1054 				}
1055 				imsg_compose(ibuf_rtr, IMSG_CTL_END,
1056 				    0, imsg_get_pid(&imsg), -1, NULL, 0);
1057 			} else if (idx == PFD_PIPE_RTR) {
1058 				struct ctl_show_rtr rtr;
1059 				if (imsg_get_data(&imsg, &rtr, sizeof(rtr)) ==
1060 				    -1) {
1061 					log_warn("wrong imsg len");
1062 					break;
1063 				}
1064 
1065 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1066 					if (imsg_get_id(&imsg) == r->id)
1067 						break;
1068 				}
1069 				if (r != NULL) {
1070 					strlcpy(rtr.descr, r->descr,
1071 					    sizeof(rtr.descr));
1072 					rtr.local_addr = r->local_addr;
1073 					rtr.remote_addr = r->remote_addr;
1074 					rtr.remote_port = r->remote_port;
1075 
1076 					imsg_compose(ibuf_se, IMSG_CTL_SHOW_RTR,
1077 					    imsg_get_id(&imsg),
1078 					    imsg_get_pid(&imsg), -1,
1079 					    &rtr, sizeof(rtr));
1080 				}
1081 			}
1082 			break;
1083 		case IMSG_CTL_END:
1084 		case IMSG_CTL_SHOW_TIMER:
1085 			if (idx != PFD_PIPE_RTR) {
1086 				log_warnx("connect request not from RTR");
1087 				break;
1088 			}
1089 			imsg_forward(ibuf_se, &imsg);
1090 			break;
1091 		default:
1092 			break;
1093 		}
1094 		imsg_free(&imsg);
1095 		if (rv != 0)
1096 			return (rv);
1097 	}
1098 	return (0);
1099 }
1100 
1101 void
1102 send_nexthop_update(struct kroute_nexthop *msg)
1103 {
1104 	char	*gw = NULL;
1105 
1106 	if (msg->gateway.aid)
1107 		if (asprintf(&gw, ": via %s",
1108 		    log_addr(&msg->gateway)) == -1) {
1109 			log_warn("send_nexthop_update");
1110 			quit = 1;
1111 		}
1112 
1113 	log_debug("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
1114 	    msg->valid ? "valid" : "invalid",
1115 	    msg->connected ? ": directly connected" : "",
1116 	    msg->gateway.aid ? gw : "");
1117 
1118 	free(gw);
1119 
1120 	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
1121 	    msg, sizeof(struct kroute_nexthop)) == -1)
1122 		quit = 1;
1123 }
1124 
1125 void
1126 send_imsg_session(int type, pid_t pid, void *data, uint16_t datalen)
1127 {
1128 	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
1129 }
1130 
1131 int
1132 send_network(int type, struct network_config *net, struct filter_set_head *h)
1133 {
1134 	if (quit)
1135 		return (0);
1136 	if (imsg_compose(ibuf_rde, type, 0, 0, -1, net,
1137 	    sizeof(struct network_config)) == -1)
1138 		return (-1);
1139 	/* networks that get deleted don't need to send the filter set */
1140 	if (type == IMSG_NETWORK_REMOVE)
1141 		return (0);
1142 	if (send_filterset(ibuf_rde, h) == -1)
1143 		return (-1);
1144 	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
1145 		return (-1);
1146 
1147 	return (0);
1148 }
1149 
1150 /*
1151  * Return true if a route can be used for nexthop resolution.
1152  */
1153 int
1154 bgpd_oknexthop(struct kroute_full *kf)
1155 {
1156 	if (kf->flags & F_BGPD)
1157 		return ((cflags & BGPD_FLAG_NEXTHOP_BGP) != 0);
1158 
1159 	if (kf->prefixlen == 0)
1160 		return ((cflags & BGPD_FLAG_NEXTHOP_DEFAULT) != 0);
1161 
1162 	/* any other route is fine */
1163 	return (1);
1164 }
1165 
1166 int
1167 control_setup(struct bgpd_config *conf)
1168 {
1169 	int fd, restricted;
1170 
1171 	/* control socket is outside chroot */
1172 	if (!cname || strcmp(cname, conf->csock)) {
1173 		if (cname) {
1174 			free(cname);
1175 		}
1176 		if ((cname = strdup(conf->csock)) == NULL)
1177 			fatal("strdup");
1178 		if (control_check(cname) == -1)
1179 			return (-1);
1180 		if ((fd = control_init(0, cname)) == -1)
1181 			fatalx("control socket setup failed");
1182 		if (control_listen(fd) == -1)
1183 			fatalx("control socket setup failed");
1184 		restricted = 0;
1185 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1186 		    &restricted, sizeof(restricted)) == -1)
1187 			return (-1);
1188 	}
1189 	if (!conf->rcsock) {
1190 		/* remove restricted socket */
1191 		free(rcname);
1192 		rcname = NULL;
1193 	} else if (!rcname || strcmp(rcname, conf->rcsock)) {
1194 		if (rcname) {
1195 			free(rcname);
1196 		}
1197 		if ((rcname = strdup(conf->rcsock)) == NULL)
1198 			fatal("strdup");
1199 		if (control_check(rcname) == -1)
1200 			return (-1);
1201 		if ((fd = control_init(1, rcname)) == -1)
1202 			fatalx("control socket setup failed");
1203 		if (control_listen(fd) == -1)
1204 			fatalx("control socket setup failed");
1205 		restricted = 1;
1206 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1207 		    &restricted, sizeof(restricted)) == -1)
1208 			return (-1);
1209 	}
1210 	return (0);
1211 }
1212 
1213 void
1214 set_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1215 {
1216 	if (i == NULL || i->fd == -1) {
1217 		pfd->fd = -1;
1218 		return;
1219 	}
1220 	pfd->fd = i->fd;
1221 	pfd->events = POLLIN;
1222 	if (i->w.queued > 0)
1223 		pfd->events |= POLLOUT;
1224 }
1225 
1226 int
1227 handle_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1228 {
1229 	ssize_t n;
1230 
1231 	if (i == NULL)
1232 		return (0);
1233 
1234 	if (pfd->revents & POLLOUT)
1235 		if (msgbuf_write(&i->w) <= 0 && errno != EAGAIN) {
1236 			log_warn("imsg write error");
1237 			close(i->fd);
1238 			i->fd = -1;
1239 			return (-1);
1240 		}
1241 
1242 	if (pfd->revents & POLLIN) {
1243 		if ((n = imsg_read(i)) == -1 && errno != EAGAIN) {
1244 			log_warn("imsg read error");
1245 			close(i->fd);
1246 			i->fd = -1;
1247 			return (-1);
1248 		}
1249 		if (n == 0) {
1250 			log_warnx("peer closed imsg connection");
1251 			close(i->fd);
1252 			i->fd = -1;
1253 			return (-1);
1254 		}
1255 	}
1256 	return (0);
1257 }
1258 
1259 static void
1260 getsockpair(int pipe[2])
1261 {
1262 	int bsize, i;
1263 
1264 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
1265 	    PF_UNSPEC, pipe) == -1)
1266 		fatal("socketpair");
1267 
1268 	for (i = 0; i < 2; i++) {
1269 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1270 			if (setsockopt(pipe[i], SOL_SOCKET, SO_RCVBUF,
1271 			    &bsize, sizeof(bsize)) == -1) {
1272 				if (errno != ENOBUFS)
1273 					fatal("setsockopt(SO_RCVBUF, %d)",
1274 					    bsize);
1275 				log_warn("setsockopt(SO_RCVBUF, %d)", bsize);
1276 				continue;
1277 			}
1278 			break;
1279 		}
1280 	}
1281 	for (i = 0; i < 2; i++) {
1282 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1283 			if (setsockopt(pipe[i], SOL_SOCKET, SO_SNDBUF,
1284 			    &bsize, sizeof(bsize)) == -1) {
1285 				if (errno != ENOBUFS)
1286 					fatal("setsockopt(SO_SNDBUF, %d)",
1287 					    bsize);
1288 				log_warn("setsockopt(SO_SNDBUF, %d)", bsize);
1289 				continue;
1290 			}
1291 			break;
1292 		}
1293 	}
1294 }
1295 
1296 int
1297 imsg_send_sockets(struct imsgbuf *se, struct imsgbuf *rde, struct imsgbuf *rtr)
1298 {
1299 	int pipe_s2r[2];
1300 	int pipe_s2r_ctl[2];
1301 	int pipe_r2r[2];
1302 
1303 	getsockpair(pipe_s2r);
1304 	getsockpair(pipe_s2r_ctl);
1305 	getsockpair(pipe_r2r);
1306 
1307 	if (imsg_compose(se, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[0],
1308 	    NULL, 0) == -1)
1309 		return (-1);
1310 	if (imsg_compose(rde, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[1],
1311 	    NULL, 0) == -1)
1312 		return (-1);
1313 
1314 	if (imsg_compose(se, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[0],
1315 	    NULL, 0) == -1)
1316 		return (-1);
1317 	if (imsg_compose(rde, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[1],
1318 	    NULL, 0) == -1)
1319 		return (-1);
1320 
1321 	if (imsg_compose(rtr, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[0],
1322 	    NULL, 0) == -1)
1323 		return (-1);
1324 	if (imsg_compose(rde, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[1],
1325 	    NULL, 0) == -1)
1326 		return (-1);
1327 
1328 	return (0);
1329 }
1330 
1331 void
1332 bgpd_rtr_connect(struct rtr_config *r)
1333 {
1334 	struct connect_elm *ce;
1335 	struct sockaddr *sa;
1336 	socklen_t len;
1337 
1338 	if (connect_cnt >= MAX_CONNECT_CNT) {
1339 		log_warnx("rtr %s: too many concurrent connection requests",
1340 		    r->descr);
1341 		return;
1342 	}
1343 
1344 	if ((ce = calloc(1, sizeof(*ce))) == NULL) {
1345 		log_warn("rtr %s", r->descr);
1346 		return;
1347 	}
1348 
1349 	ce->id = r->id;
1350 	ce->fd = socket(aid2af(r->remote_addr.aid),
1351 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP);
1352 	if (ce->fd == -1) {
1353 		log_warn("rtr %s", r->descr);
1354 		free(ce);
1355 		return;
1356 	}
1357 
1358 	if ((sa = addr2sa(&r->local_addr, 0, &len)) != NULL) {
1359 		if (bind(ce->fd, sa, len) == -1) {
1360 			log_warn("rtr %s: bind to %s", r->descr,
1361 			    log_addr(&r->local_addr));
1362 			close(ce->fd);
1363 			free(ce);
1364 			return;
1365 		}
1366 	}
1367 
1368 	sa = addr2sa(&r->remote_addr, r->remote_port, &len);
1369 	if (connect(ce->fd, sa, len) == -1) {
1370 		if (errno != EINPROGRESS) {
1371 			log_warn("rtr %s: connect to %s:%u", r->descr,
1372 			    log_addr(&r->remote_addr), r->remote_port);
1373 			close(ce->fd);
1374 			free(ce);
1375 			return;
1376 		}
1377 		TAILQ_INSERT_TAIL(&connect_queue, ce, entry);
1378 		connect_cnt++;
1379 		return;
1380 	}
1381 
1382 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1383 	free(ce);
1384 }
1385 
1386 void
1387 bgpd_rtr_connect_done(int fd, struct bgpd_config *conf)
1388 {
1389 	struct rtr_config *r;
1390 	struct connect_elm *ce;
1391 	int error = 0;
1392 	socklen_t len;
1393 
1394 	TAILQ_FOREACH(ce, &connect_queue, entry) {
1395 		if (ce->fd == fd)
1396 			break;
1397 	}
1398 	if (ce == NULL)
1399 		fatalx("connect entry not found");
1400 
1401 	TAILQ_REMOVE(&connect_queue, ce, entry);
1402 	connect_cnt--;
1403 
1404 	SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1405 		if (ce->id == r->id)
1406 			break;
1407 	}
1408 	if (r == NULL) {
1409 		log_warnx("rtr id %d no longer exists", ce->id);
1410 		goto fail;
1411 	}
1412 
1413 	len = sizeof(error);
1414 	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) == -1) {
1415 		log_warn("rtr %s: getsockopt SO_ERROR", r->descr);
1416 		goto fail;
1417 	}
1418 
1419 	if (error != 0) {
1420 		errno = error;
1421 		log_warn("rtr %s: connect to %s:%u", r->descr,
1422 		    log_addr(&r->remote_addr), r->remote_port);
1423 		goto fail;
1424 	}
1425 
1426 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1427 	free(ce);
1428 	return;
1429 
1430 fail:
1431 	close(fd);
1432 	free(ce);
1433 }
1434