xref: /openbsd/usr.sbin/bgpd/bgpd.c (revision f7421e07)
1 /*	$OpenBSD: bgpd.c,v 1.264 2024/05/15 09:09:38 job Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/wait.h>
22 #include <netinet/in.h>
23 #include <netinet/ip.h>
24 #include <netinet/tcp.h>
25 #include <arpa/inet.h>
26 #include <err.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <poll.h>
30 #include <pwd.h>
31 #include <signal.h>
32 #include <stddef.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <syslog.h>
37 #include <unistd.h>
38 
39 #include "bgpd.h"
40 #include "session.h"
41 #include "log.h"
42 #include "version.h"
43 
44 void		sighdlr(int);
45 __dead void	usage(void);
46 int		main(int, char *[]);
47 pid_t		start_child(enum bgpd_process, char *, int, int, int);
48 int		send_filterset(struct imsgbuf *, struct filter_set_head *);
49 int		reconfigure(char *, struct bgpd_config *);
50 int		send_config(struct bgpd_config *);
51 int		dispatch_imsg(struct imsgbuf *, int, struct bgpd_config *);
52 int		control_setup(struct bgpd_config *);
53 static void	getsockpair(int [2]);
54 int		imsg_send_sockets(struct imsgbuf *, struct imsgbuf *,
55 		    struct imsgbuf *);
56 void		bgpd_rtr_connect(struct rtr_config *);
57 void		bgpd_rtr_connect_done(int, struct bgpd_config *);
58 
59 int			 cflags;
60 volatile sig_atomic_t	 mrtdump;
61 volatile sig_atomic_t	 quit;
62 volatile sig_atomic_t	 reconfig;
63 pid_t			 reconfpid;
64 int			 reconfpending;
65 struct imsgbuf		*ibuf_se;
66 struct imsgbuf		*ibuf_rde;
67 struct imsgbuf		*ibuf_rtr;
68 struct rib_names	 ribnames = SIMPLEQ_HEAD_INITIALIZER(ribnames);
69 char			*cname;
70 char			*rcname;
71 
72 struct connect_elm {
73 	TAILQ_ENTRY(connect_elm)	entry;
74 	uint32_t			id;
75 	int				fd;
76 };
77 
78 TAILQ_HEAD(, connect_elm)	connect_queue = \
79 				    TAILQ_HEAD_INITIALIZER(connect_queue);
80 u_int				connect_cnt;
81 #define MAX_CONNECT_CNT		32
82 
83 void
sighdlr(int sig)84 sighdlr(int sig)
85 {
86 	switch (sig) {
87 	case SIGTERM:
88 	case SIGINT:
89 		quit = 1;
90 		break;
91 	case SIGHUP:
92 		reconfig = 1;
93 		break;
94 	case SIGALRM:
95 	case SIGUSR1:
96 		mrtdump = 1;
97 		break;
98 	}
99 }
100 
101 __dead void
usage(void)102 usage(void)
103 {
104 	extern char *__progname;
105 
106 	fprintf(stderr, "usage: %s [-cdnvV] [-D macro=value] [-f file]\n",
107 	    __progname);
108 	exit(1);
109 }
110 
111 #define PFD_PIPE_SESSION	0
112 #define PFD_PIPE_RDE		1
113 #define PFD_PIPE_RTR		2
114 #define PFD_SOCK_ROUTE		3
115 #define PFD_SOCK_PFKEY		4
116 #define PFD_CONNECT_START	5
117 #define MAX_TIMEOUT		3600
118 
119 int	 cmd_opts;
120 
121 int
main(int argc,char * argv[])122 main(int argc, char *argv[])
123 {
124 	struct bgpd_config	*conf;
125 	enum bgpd_process	 proc = PROC_MAIN;
126 	struct rde_rib		*rr;
127 	struct peer		*p;
128 	struct pollfd		*pfd = NULL;
129 	struct connect_elm	*ce;
130 	time_t			 timeout;
131 	pid_t			 se_pid = 0, rde_pid = 0, rtr_pid = 0, pid;
132 	char			*conffile;
133 	char			*saved_argv0;
134 	u_int			 pfd_elms = 0, npfd, i;
135 	int			 debug = 0;
136 	int			 rfd, keyfd;
137 	int			 ch, status;
138 	int			 pipe_m2s[2];
139 	int			 pipe_m2r[2];
140 	int			 pipe_m2roa[2];
141 
142 	conffile = CONFFILE;
143 
144 	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */
145 	log_procinit(log_procnames[PROC_MAIN]);
146 	log_setverbose(1);
147 
148 	saved_argv0 = argv[0];
149 	if (saved_argv0 == NULL)
150 		saved_argv0 = "bgpd";
151 
152 	while ((ch = getopt(argc, argv, "cdD:f:nRSTvV")) != -1) {
153 		switch (ch) {
154 		case 'c':
155 			cmd_opts |= BGPD_OPT_FORCE_DEMOTE;
156 			break;
157 		case 'd':
158 			debug = 1;
159 			break;
160 		case 'D':
161 			if (cmdline_symset(optarg) < 0)
162 				log_warnx("could not parse macro definition %s",
163 				    optarg);
164 			break;
165 		case 'f':
166 			conffile = optarg;
167 			break;
168 		case 'n':
169 			cmd_opts |= BGPD_OPT_NOACTION;
170 			break;
171 		case 'v':
172 			if (cmd_opts & BGPD_OPT_VERBOSE)
173 				cmd_opts |= BGPD_OPT_VERBOSE2;
174 			cmd_opts |= BGPD_OPT_VERBOSE;
175 			break;
176 		case 'R':
177 			proc = PROC_RDE;
178 			break;
179 		case 'S':
180 			proc = PROC_SE;
181 			break;
182 		case 'T':
183 			proc = PROC_RTR;
184 			break;
185 		case 'V':
186 			fprintf(stderr, "OpenBGPD %s\n", BGPD_VERSION);
187 			return 0;
188 		default:
189 			usage();
190 			/* NOTREACHED */
191 		}
192 	}
193 
194 	argc -= optind;
195 	argv += optind;
196 	if (argc > 0)
197 		usage();
198 
199 	if (cmd_opts & BGPD_OPT_NOACTION) {
200 		if ((conf = parse_config(conffile, NULL, NULL)) == NULL)
201 			exit(1);
202 
203 		if (cmd_opts & BGPD_OPT_VERBOSE)
204 			print_config(conf, &ribnames);
205 		else
206 			fprintf(stderr, "configuration OK\n");
207 
208 		while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
209 			SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
210 			free(rr);
211 		}
212 		free_config(conf);
213 		exit(0);
214 	}
215 
216 	switch (proc) {
217 	case PROC_MAIN:
218 		break;
219 	case PROC_RDE:
220 		rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
221 		/* NOTREACHED */
222 	case PROC_SE:
223 		session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
224 		/* NOTREACHED */
225 	case PROC_RTR:
226 		rtr_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
227 		/* NOTREACHED */
228 	}
229 
230 	if (geteuid())
231 		errx(1, "need root privileges");
232 
233 	if (getpwnam(BGPD_USER) == NULL)
234 		errx(1, "unknown user %s", BGPD_USER);
235 
236 	if ((conf = parse_config(conffile, NULL, NULL)) == NULL) {
237 		log_warnx("config file %s has errors", conffile);
238 		exit(1);
239 	}
240 
241 	if (prepare_listeners(conf) == -1)
242 		exit(1);
243 
244 	log_init(debug, LOG_DAEMON);
245 	log_setverbose(cmd_opts & BGPD_OPT_VERBOSE);
246 
247 	if (!debug)
248 		daemon(1, 0);
249 
250 	log_info("startup");
251 
252 	getsockpair(pipe_m2s);
253 	getsockpair(pipe_m2r);
254 	getsockpair(pipe_m2roa);
255 
256 	/* fork children */
257 	rde_pid = start_child(PROC_RDE, saved_argv0, pipe_m2r[1], debug,
258 	    cmd_opts & BGPD_OPT_VERBOSE);
259 	se_pid = start_child(PROC_SE, saved_argv0, pipe_m2s[1], debug,
260 	    cmd_opts & BGPD_OPT_VERBOSE);
261 	rtr_pid = start_child(PROC_RTR, saved_argv0, pipe_m2roa[1], debug,
262 	    cmd_opts & BGPD_OPT_VERBOSE);
263 
264 	signal(SIGTERM, sighdlr);
265 	signal(SIGINT, sighdlr);
266 	signal(SIGHUP, sighdlr);
267 	signal(SIGALRM, sighdlr);
268 	signal(SIGUSR1, sighdlr);
269 	signal(SIGPIPE, SIG_IGN);
270 
271 	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
272 	    (ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
273 	    (ibuf_rtr = malloc(sizeof(struct imsgbuf))) == NULL)
274 		fatal(NULL);
275 	imsg_init(ibuf_se, pipe_m2s[0]);
276 	imsg_init(ibuf_rde, pipe_m2r[0]);
277 	imsg_init(ibuf_rtr, pipe_m2roa[0]);
278 	mrt_init(ibuf_rde, ibuf_se);
279 	if (kr_init(&rfd, conf->fib_priority) == -1)
280 		quit = 1;
281 	keyfd = pfkey_init();
282 
283 	/*
284 	 * rpath, read config file
285 	 * cpath, unlink control socket
286 	 * fattr, chmod on control socket
287 	 * wpath, needed if we are doing mrt dumps
288 	 *
289 	 * pledge placed here because kr_init() does a setsockopt on the
290 	 * routing socket thats not allowed at all.
291 	 */
292 #if 0
293 	/*
294 	 * disabled because we do ioctls on /dev/pf and SIOCSIFGATTR
295 	 * this needs some redesign of bgpd to be fixed.
296 	 */
297 BROKEN	if (pledge("stdio rpath wpath cpath fattr unix route recvfd sendfd",
298 	    NULL) == -1)
299 		fatal("pledge");
300 #endif
301 
302 	if (imsg_send_sockets(ibuf_se, ibuf_rde, ibuf_rtr))
303 		fatal("could not establish imsg links");
304 	/* control setup needs to happen late since it sends imsgs */
305 	if (control_setup(conf) == -1)
306 		quit = 1;
307 	if (send_config(conf) != 0)
308 		quit = 1;
309 	if (pftable_clear_all() != 0)
310 		quit = 1;
311 
312 	while (quit == 0) {
313 		if (pfd_elms < PFD_CONNECT_START + connect_cnt) {
314 			struct pollfd *newp;
315 
316 			if ((newp = reallocarray(pfd,
317 			    PFD_CONNECT_START + connect_cnt,
318 			    sizeof(struct pollfd))) == NULL) {
319 				log_warn("could not resize pfd from %u -> %u"
320 				    " entries", pfd_elms, PFD_CONNECT_START +
321 				    connect_cnt);
322 				fatalx("exiting");
323 			}
324 			pfd = newp;
325 			pfd_elms = PFD_CONNECT_START + connect_cnt;
326 		}
327 		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
328 
329 		timeout = mrt_timeout(conf->mrt);
330 
331 		pfd[PFD_SOCK_ROUTE].fd = rfd;
332 		pfd[PFD_SOCK_ROUTE].events = POLLIN;
333 
334 		pfd[PFD_SOCK_PFKEY].fd = keyfd;
335 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
336 
337 		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
338 		set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
339 		set_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr);
340 
341 		npfd = PFD_CONNECT_START;
342 		TAILQ_FOREACH(ce, &connect_queue, entry) {
343 			pfd[npfd].fd = ce->fd;
344 			pfd[npfd++].events = POLLOUT;
345 			if (npfd > pfd_elms)
346 				fatalx("polli pfd overflow");
347 		}
348 
349 		if (timeout < 0 || timeout > MAX_TIMEOUT)
350 			timeout = MAX_TIMEOUT;
351 		if (poll(pfd, npfd, timeout * 1000) == -1) {
352 			if (errno != EINTR) {
353 				log_warn("poll error");
354 				quit = 1;
355 			}
356 			goto next_loop;
357 		}
358 
359 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
360 			log_warnx("main: Lost connection to SE");
361 			msgbuf_clear(&ibuf_se->w);
362 			free(ibuf_se);
363 			ibuf_se = NULL;
364 			quit = 1;
365 		} else {
366 			if (dispatch_imsg(ibuf_se, PFD_PIPE_SESSION, conf) ==
367 			    -1)
368 				quit = 1;
369 		}
370 
371 		if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
372 			log_warnx("main: Lost connection to RDE");
373 			msgbuf_clear(&ibuf_rde->w);
374 			free(ibuf_rde);
375 			ibuf_rde = NULL;
376 			quit = 1;
377 		} else {
378 			if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) == -1)
379 				quit = 1;
380 		}
381 
382 		if (handle_pollfd(&pfd[PFD_PIPE_RTR], ibuf_rtr) == -1) {
383 			log_warnx("main: Lost connection to RTR");
384 			msgbuf_clear(&ibuf_rtr->w);
385 			free(ibuf_rtr);
386 			ibuf_rtr = NULL;
387 			quit = 1;
388 		} else {
389 			if (dispatch_imsg(ibuf_rtr, PFD_PIPE_RTR, conf) == -1)
390 				quit = 1;
391 		}
392 
393 		if (pfd[PFD_SOCK_ROUTE].revents & POLLIN) {
394 			if (kr_dispatch_msg() == -1)
395 				quit = 1;
396 		}
397 
398 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
399 			if (pfkey_read(keyfd, NULL) == -1) {
400 				log_warnx("pfkey_read failed, exiting...");
401 				quit = 1;
402 			}
403 		}
404 
405 		for (i = PFD_CONNECT_START; i < npfd; i++)
406 			if (pfd[i].revents != 0)
407 				bgpd_rtr_connect_done(pfd[i].fd, conf);
408 
409  next_loop:
410 		if (reconfig) {
411 			u_int	error;
412 
413 			reconfig = 0;
414 			switch (reconfigure(conffile, conf)) {
415 			case -1:	/* fatal error */
416 				quit = 1;
417 				break;
418 			case 0:		/* all OK */
419 				error = 0;
420 				break;
421 			case 2:
422 				log_info("previous reload still running");
423 				error = CTL_RES_PENDING;
424 				break;
425 			default:	/* parse error */
426 				log_warnx("config file %s has errors, "
427 				    "not reloading", conffile);
428 				error = CTL_RES_PARSE_ERROR;
429 				break;
430 			}
431 			if (reconfpid != 0) {
432 				send_imsg_session(IMSG_CTL_RESULT, reconfpid,
433 				    &error, sizeof(error));
434 				reconfpid = 0;
435 			}
436 		}
437 
438 		if (mrtdump) {
439 			mrtdump = 0;
440 			mrt_handler(conf->mrt);
441 		}
442 	}
443 
444 	/* close pipes */
445 	if (ibuf_se) {
446 		msgbuf_clear(&ibuf_se->w);
447 		close(ibuf_se->fd);
448 		free(ibuf_se);
449 		ibuf_se = NULL;
450 	}
451 	if (ibuf_rde) {
452 		msgbuf_clear(&ibuf_rde->w);
453 		close(ibuf_rde->fd);
454 		free(ibuf_rde);
455 		ibuf_rde = NULL;
456 	}
457 	if (ibuf_rtr) {
458 		msgbuf_clear(&ibuf_rtr->w);
459 		close(ibuf_rtr->fd);
460 		free(ibuf_rtr);
461 		ibuf_rtr = NULL;
462 	}
463 
464 	/* cleanup kernel data structures */
465 	carp_demote_shutdown();
466 	kr_shutdown();
467 	pftable_clear_all();
468 
469 	RB_FOREACH(p, peer_head, &conf->peers)
470 		pfkey_remove(p);
471 
472 	while ((rr = SIMPLEQ_FIRST(&ribnames)) != NULL) {
473 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
474 		free(rr);
475 	}
476 	free_config(conf);
477 
478 	log_debug("waiting for children to terminate");
479 	do {
480 		pid = wait(&status);
481 		if (pid == -1) {
482 			if (errno != EINTR && errno != ECHILD)
483 				fatal("wait");
484 		} else if (WIFSIGNALED(status)) {
485 			char *name = "unknown process";
486 			if (pid == rde_pid)
487 				name = "route decision engine";
488 			else if (pid == se_pid)
489 				name = "session engine";
490 			else if (pid == rtr_pid)
491 				name = "rtr engine";
492 			log_warnx("%s terminated; signal %d", name,
493 				WTERMSIG(status));
494 		}
495 	} while (pid != -1 || (pid == -1 && errno == EINTR));
496 
497 	free(rcname);
498 	free(cname);
499 
500 	log_info("terminating");
501 	return (0);
502 }
503 
504 pid_t
start_child(enum bgpd_process p,char * argv0,int fd,int debug,int verbose)505 start_child(enum bgpd_process p, char *argv0, int fd, int debug, int verbose)
506 {
507 	char *argv[5];
508 	int argc = 0;
509 	pid_t pid;
510 
511 	switch (pid = fork()) {
512 	case -1:
513 		fatal("cannot fork");
514 	case 0:
515 		break;
516 	default:
517 		close(fd);
518 		return (pid);
519 	}
520 
521 	if (fd != 3) {
522 		if (dup2(fd, 3) == -1)
523 			fatal("cannot setup imsg fd");
524 	} else if (fcntl(fd, F_SETFD, 0) == -1)
525 		fatal("cannot setup imsg fd");
526 
527 	argv[argc++] = argv0;
528 	switch (p) {
529 	case PROC_MAIN:
530 		fatalx("Can not start main process");
531 	case PROC_RDE:
532 		argv[argc++] = "-R";
533 		break;
534 	case PROC_SE:
535 		argv[argc++] = "-S";
536 		break;
537 	case PROC_RTR:
538 		argv[argc++] = "-T";
539 		break;
540 	}
541 	if (debug)
542 		argv[argc++] = "-d";
543 	if (verbose)
544 		argv[argc++] = "-v";
545 	argv[argc++] = NULL;
546 
547 	execvp(argv0, argv);
548 	fatal("execvp");
549 }
550 
551 int
send_filterset(struct imsgbuf * i,struct filter_set_head * set)552 send_filterset(struct imsgbuf *i, struct filter_set_head *set)
553 {
554 	struct filter_set	*s;
555 
556 	TAILQ_FOREACH(s, set, entry)
557 		if (imsg_compose(i, IMSG_FILTER_SET, 0, 0, -1, s,
558 		    sizeof(struct filter_set)) == -1)
559 			return (-1);
560 	return (0);
561 }
562 
563 int
reconfigure(char * conffile,struct bgpd_config * conf)564 reconfigure(char *conffile, struct bgpd_config *conf)
565 {
566 	struct bgpd_config	*new_conf;
567 
568 	if (reconfpending)
569 		return (2);
570 
571 	log_info("rereading config");
572 	if ((new_conf = parse_config(conffile, &conf->peers,
573 	    &conf->rtrs)) == NULL)
574 		return (1);
575 
576 	merge_config(conf, new_conf);
577 
578 	if (prepare_listeners(conf) == -1) {
579 		return (1);
580 	}
581 
582 	if (control_setup(conf) == -1) {
583 		return (1);
584 	}
585 
586 	return send_config(conf);
587 }
588 
589 int
send_config(struct bgpd_config * conf)590 send_config(struct bgpd_config *conf)
591 {
592 	struct peer		*p;
593 	struct filter_rule	*r;
594 	struct listen_addr	*la;
595 	struct rde_rib		*rr;
596 	struct l3vpn		*vpn;
597 	struct as_set		*aset;
598 	struct prefixset	*ps;
599 	struct prefixset_item	*psi, *npsi;
600 	struct roa		*roa;
601 	struct aspa_set		*aspa;
602 	struct rtr_config	*rtr;
603 	struct flowspec_config	*f, *nf;
604 
605 	reconfpending = 3;	/* one per child */
606 
607 	expand_networks(conf, &conf->networks);
608 	SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry)
609 		expand_networks(conf, &vpn->net_l);
610 
611 	cflags = conf->flags;
612 
613 	/* start reconfiguration */
614 	if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
615 	    conf, sizeof(*conf)) == -1)
616 		return (-1);
617 	if (imsg_compose(ibuf_rde, IMSG_RECONF_CONF, 0, 0, -1,
618 	    conf, sizeof(*conf)) == -1)
619 		return (-1);
620 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_CONF, 0, 0, -1,
621 	    conf, sizeof(*conf)) == -1)
622 		return (-1);
623 
624 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
625 		if (imsg_compose(ibuf_se, IMSG_RECONF_LISTENER, 0, 0, la->fd,
626 		    la, sizeof(*la)) == -1)
627 			return (-1);
628 		la->fd = -1;
629 	}
630 
631 	/* adjust fib syncing on reload */
632 	ktable_preload();
633 
634 	/* RIBs for the RDE */
635 	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
636 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
637 		if (ktable_update(rr->rtableid, rr->name, rr->flags) == -1) {
638 			log_warnx("failed to load routing table %d",
639 			    rr->rtableid);
640 			return (-1);
641 		}
642 		if (imsg_compose(ibuf_rde, IMSG_RECONF_RIB, 0, 0, -1,
643 		    rr, sizeof(*rr)) == -1)
644 			return (-1);
645 		free(rr);
646 	}
647 
648 	/* send peer list to the SE */
649 	RB_FOREACH(p, peer_head, &conf->peers) {
650 		if (imsg_compose(ibuf_se, IMSG_RECONF_PEER, p->conf.id, 0, -1,
651 		    &p->conf, sizeof(p->conf)) == -1)
652 			return (-1);
653 
654 		if (p->reconf_action == RECONF_REINIT)
655 			if (pfkey_establish(p) == -1)
656 				log_peer_warnx(&p->conf, "pfkey setup failed");
657 	}
658 
659 	/* networks go via kroute to the RDE */
660 	kr_net_reload(conf->default_tableid, 0, &conf->networks);
661 
662 	/* flowspec goes directly to the RDE, also remove old objects */
663 	RB_FOREACH_SAFE(f, flowspec_tree, &conf->flowspecs, nf) {
664 		if (f->reconf_action != RECONF_DELETE) {
665 			if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_ADD, 0, 0, -1,
666 			    f->flow, FLOWSPEC_SIZE + f->flow->len) == -1)
667 				return (-1);
668 			if (send_filterset(ibuf_rde, &f->attrset) == -1)
669 				return (-1);
670 			if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_DONE, 0, 0, -1,
671 			    NULL, 0) == -1)
672 				return (-1);
673 		} else {
674 			if (imsg_compose(ibuf_rde, IMSG_FLOWSPEC_REMOVE, 0, 0,
675 			    -1, f->flow, FLOWSPEC_SIZE + f->flow->len) == -1)
676 				return (-1);
677 			RB_REMOVE(flowspec_tree, &conf->flowspecs, f);
678 			flowspec_free(f);
679 		}
680 	}
681 
682 	/* prefixsets for filters in the RDE */
683 	while ((ps = SIMPLEQ_FIRST(&conf->prefixsets)) != NULL) {
684 		SIMPLEQ_REMOVE_HEAD(&conf->prefixsets, entry);
685 		if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET, 0, 0, -1,
686 		    ps->name, sizeof(ps->name)) == -1)
687 			return (-1);
688 		RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
689 			RB_REMOVE(prefixset_tree, &ps->psitems, psi);
690 			if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
691 			    0, 0, -1, psi, sizeof(*psi)) == -1)
692 				return (-1);
693 			free(psi);
694 		}
695 		free(ps);
696 	}
697 
698 	/* originsets for filters in the RDE */
699 	while ((ps = SIMPLEQ_FIRST(&conf->originsets)) != NULL) {
700 		SIMPLEQ_REMOVE_HEAD(&conf->originsets, entry);
701 		if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
702 		    ps->name, sizeof(ps->name)) == -1)
703 			return (-1);
704 		RB_FOREACH(roa, roa_tree, &ps->roaitems) {
705 			if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
706 			    -1, roa, sizeof(*roa)) == -1)
707 				return (-1);
708 		}
709 		free_roatree(&ps->roaitems);
710 		free(ps);
711 	}
712 
713 	/* roa table, aspa table and rtr config are sent to the RTR engine */
714 	RB_FOREACH(roa, roa_tree, &conf->roa) {
715 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ROA_ITEM, 0, 0,
716 		    -1, roa, sizeof(*roa)) == -1)
717 			return (-1);
718 	}
719 	free_roatree(&conf->roa);
720 	RB_FOREACH(aspa, aspa_tree, &conf->aspa) {
721 		/* XXX prevent oversized IMSG for now */
722 		if (aspa->num * sizeof(*aspa->tas) >
723 		    MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
724 			log_warnx("oversized ASPA set for customer-as %s, %s",
725 			    log_as(aspa->as), "dropped");
726 			continue;
727 		}
728 
729 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA, 0, 0,
730 		    -1, aspa, offsetof(struct aspa_set, tas)) == -1)
731 			return (-1);
732 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA_TAS, 0, 0,
733 		    -1, aspa->tas, aspa->num * sizeof(*aspa->tas)) == -1)
734 			return (-1);
735 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_ASPA_DONE, 0, 0, -1,
736 		    NULL, 0) == -1)
737 			return -1;
738 	}
739 	free_aspatree(&conf->aspa);
740 	SIMPLEQ_FOREACH(rtr, &conf->rtrs, entry) {
741 		if (imsg_compose(ibuf_rtr, IMSG_RECONF_RTR_CONFIG, rtr->id,
742 		    0, -1, rtr->descr, sizeof(rtr->descr)) == -1)
743 			return (-1);
744 	}
745 
746 	/* as-sets for filters in the RDE */
747 	while ((aset = SIMPLEQ_FIRST(&conf->as_sets)) != NULL) {
748 		struct ibuf *wbuf;
749 		uint32_t *as;
750 		size_t i, l, n;
751 
752 		SIMPLEQ_REMOVE_HEAD(&conf->as_sets, entry);
753 
754 		as = set_get(aset->set, &n);
755 		if ((wbuf = imsg_create(ibuf_rde, IMSG_RECONF_AS_SET, 0, 0,
756 		    sizeof(n) + sizeof(aset->name))) == NULL)
757 			return -1;
758 		if (imsg_add(wbuf, &n, sizeof(n)) == -1 ||
759 		    imsg_add(wbuf, aset->name, sizeof(aset->name)) == -1)
760 			return -1;
761 		imsg_close(ibuf_rde, wbuf);
762 
763 		for (i = 0; i < n; i += l) {
764 			l = (n - i > 1024 ? 1024 : n - i);
765 			if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_ITEMS,
766 			    0, 0, -1, as + i, l * sizeof(*as)) == -1)
767 				return -1;
768 		}
769 
770 		if (imsg_compose(ibuf_rde, IMSG_RECONF_AS_SET_DONE, 0, 0, -1,
771 		    NULL, 0) == -1)
772 			return -1;
773 
774 		set_free(aset->set);
775 		free(aset);
776 	}
777 
778 	/* filters for the RDE */
779 	while ((r = TAILQ_FIRST(conf->filters)) != NULL) {
780 		TAILQ_REMOVE(conf->filters, r, entry);
781 		if (send_filterset(ibuf_rde, &r->set) == -1)
782 			return (-1);
783 		if (imsg_compose(ibuf_rde, IMSG_RECONF_FILTER, 0, 0, -1,
784 		    r, sizeof(struct filter_rule)) == -1)
785 			return (-1);
786 		filterset_free(&r->set);
787 		free(r);
788 	}
789 
790 	while ((vpn = SIMPLEQ_FIRST(&conf->l3vpns)) != NULL) {
791 		SIMPLEQ_REMOVE_HEAD(&conf->l3vpns, entry);
792 		if (ktable_update(vpn->rtableid, vpn->descr, vpn->flags) ==
793 		    -1) {
794 			log_warnx("failed to load routing table %d",
795 			    vpn->rtableid);
796 			return (-1);
797 		}
798 		/* networks go via kroute to the RDE */
799 		kr_net_reload(vpn->rtableid, vpn->rd, &vpn->net_l);
800 
801 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN, 0, 0, -1,
802 		    vpn, sizeof(*vpn)) == -1)
803 			return (-1);
804 
805 		/* export targets */
806 		if (send_filterset(ibuf_rde, &vpn->export) == -1)
807 			return (-1);
808 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_EXPORT, 0, 0,
809 		    -1, NULL, 0) == -1)
810 			return (-1);
811 		filterset_free(&vpn->export);
812 
813 		/* import targets */
814 		if (send_filterset(ibuf_rde, &vpn->import) == -1)
815 			return (-1);
816 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_IMPORT, 0, 0,
817 		    -1, NULL, 0) == -1)
818 			return (-1);
819 		filterset_free(&vpn->import);
820 
821 		if (imsg_compose(ibuf_rde, IMSG_RECONF_VPN_DONE, 0, 0,
822 		    -1, NULL, 0) == -1)
823 			return (-1);
824 
825 		free(vpn);
826 	}
827 
828 	/* send a drain message to know when all messages where processed */
829 	if (imsg_compose(ibuf_se, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
830 		return (-1);
831 	if (imsg_compose(ibuf_rde, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
832 		return (-1);
833 	if (imsg_compose(ibuf_rtr, IMSG_RECONF_DRAIN, 0, 0, -1, NULL, 0) == -1)
834 		return (-1);
835 
836 	/* mrt changes can be sent out of bound */
837 	mrt_reconfigure(conf->mrt);
838 	return (0);
839 }
840 
841 int
dispatch_imsg(struct imsgbuf * imsgbuf,int idx,struct bgpd_config * conf)842 dispatch_imsg(struct imsgbuf *imsgbuf, int idx, struct bgpd_config *conf)
843 {
844 	struct imsg		 imsg;
845 	struct peer		*p;
846 	struct rtr_config	*r;
847 	struct kroute_full	 kf;
848 	struct bgpd_addr	 addr;
849 	struct pftable_msg	 pfmsg;
850 	struct demote_msg	 demote;
851 	char			 reason[REASON_LEN], ifname[IFNAMSIZ];
852 	ssize_t			 n;
853 	u_int			 rtableid;
854 	int			 rv, verbose;
855 
856 	rv = 0;
857 	while (imsgbuf) {
858 		if ((n = imsg_get(imsgbuf, &imsg)) == -1)
859 			return (-1);
860 
861 		if (n == 0)
862 			break;
863 
864 		switch (imsg_get_type(&imsg)) {
865 		case IMSG_KROUTE_CHANGE:
866 			if (idx != PFD_PIPE_RDE)
867 				log_warnx("route request not from RDE");
868 			else if (imsg_get_data(&imsg, &kf, sizeof(kf)) == -1)
869 				log_warn("wrong imsg len");
870 			else if (kr_change(imsg_get_id(&imsg), &kf))
871 				rv = -1;
872 			break;
873 		case IMSG_KROUTE_DELETE:
874 			if (idx != PFD_PIPE_RDE)
875 				log_warnx("route request not from RDE");
876 			else if (imsg_get_data(&imsg, &kf, sizeof(kf)) == -1)
877 				log_warn("wrong imsg len");
878 			else if (kr_delete(imsg_get_id(&imsg), &kf))
879 				rv = -1;
880 			break;
881 		case IMSG_KROUTE_FLUSH:
882 			if (idx != PFD_PIPE_RDE)
883 				log_warnx("route request not from RDE");
884 			else if (kr_flush(imsg_get_id(&imsg)))
885 				rv = -1;
886 			break;
887 		case IMSG_NEXTHOP_ADD:
888 			if (idx != PFD_PIPE_RDE)
889 				log_warnx("nexthop request not from RDE");
890 			else if (imsg_get_data(&imsg, &addr, sizeof(addr)) ==
891 			    -1)
892 				log_warn("wrong imsg len");
893 			else {
894 				rtableid = conf->default_tableid;
895 				if (kr_nexthop_add(rtableid, &addr) == -1)
896 					rv = -1;
897 			}
898 			break;
899 		case IMSG_NEXTHOP_REMOVE:
900 			if (idx != PFD_PIPE_RDE)
901 				log_warnx("nexthop request not from RDE");
902 			else if (imsg_get_data(&imsg, &addr, sizeof(addr)) ==
903 			    -1)
904 				log_warn("wrong imsg len");
905 			else {
906 				rtableid = conf->default_tableid;
907 				kr_nexthop_delete(rtableid, &addr);
908 			}
909 			break;
910 		case IMSG_PFTABLE_ADD:
911 			if (idx != PFD_PIPE_RDE)
912 				log_warnx("pftable request not from RDE");
913 			else if (imsg_get_data(&imsg, &pfmsg, sizeof(pfmsg)) ==
914 			    -1)
915 				log_warn("wrong imsg len");
916 			else if (pftable_addr_add(&pfmsg) != 0)
917 				rv = -1;
918 			break;
919 		case IMSG_PFTABLE_REMOVE:
920 			if (idx != PFD_PIPE_RDE)
921 				log_warnx("pftable request not from RDE");
922 			else if (imsg_get_data(&imsg, &pfmsg, sizeof(pfmsg)) ==
923 			    -1)
924 				log_warn("wrong imsg len");
925 			else if (pftable_addr_remove(&pfmsg) != 0)
926 				rv = -1;
927 			break;
928 		case IMSG_PFTABLE_COMMIT:
929 			if (idx != PFD_PIPE_RDE)
930 				log_warnx("pftable request not from RDE");
931 			else if (pftable_commit() != 0)
932 				rv = -1;
933 			break;
934 		case IMSG_PFKEY_RELOAD:
935 			if (idx != PFD_PIPE_SESSION) {
936 				log_warnx("pfkey reload request not from SE");
937 				break;
938 			}
939 			p = getpeerbyid(conf, imsg_get_id(&imsg));
940 			if (p != NULL) {
941 				if (pfkey_establish(p) == -1)
942 					log_peer_warnx(&p->conf,
943 					    "pfkey setup failed");
944 			}
945 			break;
946 		case IMSG_CTL_RELOAD:
947 			if (idx != PFD_PIPE_SESSION)
948 				log_warnx("reload request not from SE");
949 			else {
950 				reconfig = 1;
951 				reconfpid = imsg_get_pid(&imsg);
952 				if (imsg_get_data(&imsg, reason,
953 				    sizeof(reason)) == 0 && reason[0] != '\0')
954 					log_info("reload due to: %s",
955 					    log_reason(reason));
956 			}
957 			break;
958 		case IMSG_CTL_FIB_COUPLE:
959 			if (idx != PFD_PIPE_SESSION)
960 				log_warnx("couple request not from SE");
961 			else
962 				kr_fib_couple(imsg_get_id(&imsg));
963 			break;
964 		case IMSG_CTL_FIB_DECOUPLE:
965 			if (idx != PFD_PIPE_SESSION)
966 				log_warnx("decouple request not from SE");
967 			else
968 				kr_fib_decouple(imsg_get_id(&imsg));
969 			break;
970 		case IMSG_CTL_KROUTE:
971 		case IMSG_CTL_KROUTE_ADDR:
972 		case IMSG_CTL_SHOW_NEXTHOP:
973 		case IMSG_CTL_SHOW_INTERFACE:
974 		case IMSG_CTL_SHOW_FIB_TABLES:
975 			if (idx != PFD_PIPE_SESSION)
976 				log_warnx("kroute request not from SE");
977 			else
978 				kr_show_route(&imsg);
979 			break;
980 		case IMSG_SESSION_DEPENDON:
981 			if (idx != PFD_PIPE_SESSION)
982 				log_warnx("DEPENDON request not from SE");
983 			else if (imsg_get_data(&imsg, ifname, sizeof(ifname)) ==
984 			    -1)
985 				log_warn("wrong imsg len");
986 			else
987 				kr_ifinfo(ifname);
988 			break;
989 		case IMSG_DEMOTE:
990 			if (idx != PFD_PIPE_SESSION)
991 				log_warnx("demote request not from SE");
992 			else if (imsg_get_data(&imsg, &demote, sizeof(demote))
993 			    == -1)
994 				log_warn("wrong imsg len");
995 			else
996 				carp_demote_set(demote.demote_group,
997 				    demote.level);
998 			break;
999 		case IMSG_CTL_LOG_VERBOSE:
1000 			/* already checked by SE */
1001 			if (imsg_get_data(&imsg, &verbose, sizeof(verbose)) ==
1002 			    -1)
1003 				log_warn("wrong imsg len");
1004 			else
1005 				log_setverbose(verbose);
1006 			break;
1007 		case IMSG_RECONF_DONE:
1008 			if (reconfpending == 0) {
1009 				log_warnx("unexpected RECONF_DONE received");
1010 				break;
1011 			}
1012 			if (idx == PFD_PIPE_SESSION) {
1013 				/* RDE and RTR engine can reload concurrently */
1014 				imsg_compose(ibuf_rtr, IMSG_RECONF_DONE, 0,
1015 				    0, -1, NULL, 0);
1016 				imsg_compose(ibuf_rde, IMSG_RECONF_DONE, 0,
1017 				    0, -1, NULL, 0);
1018 
1019 				/* finally fix kroute information */
1020 				ktable_postload();
1021 
1022 				/* redistribute list needs to be reloaded too */
1023 				kr_reload();
1024 			}
1025 			reconfpending--;
1026 			break;
1027 		case IMSG_RECONF_DRAIN:
1028 			if (reconfpending == 0) {
1029 				log_warnx("unexpected RECONF_DRAIN received");
1030 				break;
1031 			}
1032 			reconfpending--;
1033 			if (reconfpending == 0) {
1034 				/*
1035 				 * SE goes first to bring templated neighbors
1036 				 * in sync.
1037 				 */
1038 				imsg_compose(ibuf_se, IMSG_RECONF_DONE, 0,
1039 				    0, -1, NULL, 0);
1040 				reconfpending = 3; /* expecting 2 DONE msg */
1041 			}
1042 			break;
1043 		case IMSG_SOCKET_CONN:
1044 			if (idx != PFD_PIPE_RTR) {
1045 				log_warnx("connect request not from RTR");
1046 			} else {
1047 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1048 					if (imsg_get_id(&imsg) == r->id)
1049 						break;
1050 				}
1051 				if (r == NULL)
1052 					log_warnx("unknown rtr id %d",
1053 					    imsg_get_id(&imsg));
1054 				else
1055 					bgpd_rtr_connect(r);
1056 			}
1057 			break;
1058 		case IMSG_CTL_SHOW_RTR:
1059 			if (idx == PFD_PIPE_SESSION) {
1060 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1061 					imsg_compose(ibuf_rtr,
1062 					    IMSG_CTL_SHOW_RTR, r->id,
1063 					    imsg_get_pid(&imsg), -1, NULL, 0);
1064 				}
1065 				imsg_compose(ibuf_rtr, IMSG_CTL_END,
1066 				    0, imsg_get_pid(&imsg), -1, NULL, 0);
1067 			} else if (idx == PFD_PIPE_RTR) {
1068 				struct ctl_show_rtr rtr;
1069 				if (imsg_get_data(&imsg, &rtr, sizeof(rtr)) ==
1070 				    -1) {
1071 					log_warn("wrong imsg len");
1072 					break;
1073 				}
1074 
1075 				SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1076 					if (imsg_get_id(&imsg) == r->id)
1077 						break;
1078 				}
1079 				if (r != NULL) {
1080 					strlcpy(rtr.descr, r->descr,
1081 					    sizeof(rtr.descr));
1082 					rtr.local_addr = r->local_addr;
1083 					rtr.remote_addr = r->remote_addr;
1084 					rtr.remote_port = r->remote_port;
1085 
1086 					imsg_compose(ibuf_se, IMSG_CTL_SHOW_RTR,
1087 					    imsg_get_id(&imsg),
1088 					    imsg_get_pid(&imsg), -1,
1089 					    &rtr, sizeof(rtr));
1090 				}
1091 			}
1092 			break;
1093 		case IMSG_CTL_END:
1094 		case IMSG_CTL_SHOW_TIMER:
1095 			if (idx != PFD_PIPE_RTR) {
1096 				log_warnx("connect request not from RTR");
1097 				break;
1098 			}
1099 			imsg_forward(ibuf_se, &imsg);
1100 			break;
1101 		default:
1102 			break;
1103 		}
1104 		imsg_free(&imsg);
1105 		if (rv != 0)
1106 			return (rv);
1107 	}
1108 	return (0);
1109 }
1110 
1111 void
send_nexthop_update(struct kroute_nexthop * msg)1112 send_nexthop_update(struct kroute_nexthop *msg)
1113 {
1114 	char	*gw = NULL;
1115 
1116 	if (msg->gateway.aid)
1117 		if (asprintf(&gw, ": via %s",
1118 		    log_addr(&msg->gateway)) == -1) {
1119 			log_warn("send_nexthop_update");
1120 			quit = 1;
1121 		}
1122 
1123 	log_debug("nexthop %s now %s%s%s", log_addr(&msg->nexthop),
1124 	    msg->valid ? "valid" : "invalid",
1125 	    msg->connected ? ": directly connected" : "",
1126 	    msg->gateway.aid ? gw : "");
1127 
1128 	free(gw);
1129 
1130 	if (imsg_compose(ibuf_rde, IMSG_NEXTHOP_UPDATE, 0, 0, -1,
1131 	    msg, sizeof(struct kroute_nexthop)) == -1)
1132 		quit = 1;
1133 }
1134 
1135 void
send_imsg_session(int type,pid_t pid,void * data,uint16_t datalen)1136 send_imsg_session(int type, pid_t pid, void *data, uint16_t datalen)
1137 {
1138 	imsg_compose(ibuf_se, type, 0, pid, -1, data, datalen);
1139 }
1140 
1141 int
send_network(int type,struct network_config * net,struct filter_set_head * h)1142 send_network(int type, struct network_config *net, struct filter_set_head *h)
1143 {
1144 	if (quit)
1145 		return (0);
1146 	if (imsg_compose(ibuf_rde, type, 0, 0, -1, net,
1147 	    sizeof(struct network_config)) == -1)
1148 		return (-1);
1149 	/* networks that get deleted don't need to send the filter set */
1150 	if (type == IMSG_NETWORK_REMOVE)
1151 		return (0);
1152 	if (send_filterset(ibuf_rde, h) == -1)
1153 		return (-1);
1154 	if (imsg_compose(ibuf_rde, IMSG_NETWORK_DONE, 0, 0, -1, NULL, 0) == -1)
1155 		return (-1);
1156 
1157 	return (0);
1158 }
1159 
1160 /*
1161  * Return true if a route can be used for nexthop resolution.
1162  */
1163 int
bgpd_oknexthop(struct kroute_full * kf)1164 bgpd_oknexthop(struct kroute_full *kf)
1165 {
1166 	if (kf->flags & F_BGPD)
1167 		return ((cflags & BGPD_FLAG_NEXTHOP_BGP) != 0);
1168 
1169 	if (kf->prefixlen == 0)
1170 		return ((cflags & BGPD_FLAG_NEXTHOP_DEFAULT) != 0);
1171 
1172 	/* any other route is fine */
1173 	return (1);
1174 }
1175 
1176 int
control_setup(struct bgpd_config * conf)1177 control_setup(struct bgpd_config *conf)
1178 {
1179 	int fd, restricted;
1180 
1181 	/* control socket is outside chroot */
1182 	if (!cname || strcmp(cname, conf->csock)) {
1183 		if (cname) {
1184 			free(cname);
1185 		}
1186 		if ((cname = strdup(conf->csock)) == NULL)
1187 			fatal("strdup");
1188 		if (control_check(cname) == -1)
1189 			return (-1);
1190 		if ((fd = control_init(0, cname)) == -1)
1191 			fatalx("control socket setup failed");
1192 		if (control_listen(fd) == -1)
1193 			fatalx("control socket setup failed");
1194 		restricted = 0;
1195 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1196 		    &restricted, sizeof(restricted)) == -1)
1197 			return (-1);
1198 	}
1199 	if (!conf->rcsock) {
1200 		/* remove restricted socket */
1201 		free(rcname);
1202 		rcname = NULL;
1203 	} else if (!rcname || strcmp(rcname, conf->rcsock)) {
1204 		if (rcname) {
1205 			free(rcname);
1206 		}
1207 		if ((rcname = strdup(conf->rcsock)) == NULL)
1208 			fatal("strdup");
1209 		if (control_check(rcname) == -1)
1210 			return (-1);
1211 		if ((fd = control_init(1, rcname)) == -1)
1212 			fatalx("control socket setup failed");
1213 		if (control_listen(fd) == -1)
1214 			fatalx("control socket setup failed");
1215 		restricted = 1;
1216 		if (imsg_compose(ibuf_se, IMSG_RECONF_CTRL, 0, 0, fd,
1217 		    &restricted, sizeof(restricted)) == -1)
1218 			return (-1);
1219 	}
1220 	return (0);
1221 }
1222 
1223 void
set_pollfd(struct pollfd * pfd,struct imsgbuf * i)1224 set_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1225 {
1226 	if (i == NULL || i->fd == -1) {
1227 		pfd->fd = -1;
1228 		return;
1229 	}
1230 	pfd->fd = i->fd;
1231 	pfd->events = POLLIN;
1232 	if (i->w.queued > 0)
1233 		pfd->events |= POLLOUT;
1234 }
1235 
1236 int
handle_pollfd(struct pollfd * pfd,struct imsgbuf * i)1237 handle_pollfd(struct pollfd *pfd, struct imsgbuf *i)
1238 {
1239 	ssize_t n;
1240 
1241 	if (i == NULL)
1242 		return (0);
1243 
1244 	if (pfd->revents & POLLOUT)
1245 		if (msgbuf_write(&i->w) <= 0 && errno != EAGAIN) {
1246 			log_warn("imsg write error");
1247 			close(i->fd);
1248 			i->fd = -1;
1249 			return (-1);
1250 		}
1251 
1252 	if (pfd->revents & POLLIN) {
1253 		if ((n = imsg_read(i)) == -1 && errno != EAGAIN) {
1254 			log_warn("imsg read error");
1255 			close(i->fd);
1256 			i->fd = -1;
1257 			return (-1);
1258 		}
1259 		if (n == 0) {
1260 			log_warnx("peer closed imsg connection");
1261 			close(i->fd);
1262 			i->fd = -1;
1263 			return (-1);
1264 		}
1265 	}
1266 	return (0);
1267 }
1268 
1269 static void
getsockpair(int pipe[2])1270 getsockpair(int pipe[2])
1271 {
1272 	int bsize, i;
1273 
1274 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
1275 	    PF_UNSPEC, pipe) == -1)
1276 		fatal("socketpair");
1277 
1278 	for (i = 0; i < 2; i++) {
1279 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1280 			if (setsockopt(pipe[i], SOL_SOCKET, SO_RCVBUF,
1281 			    &bsize, sizeof(bsize)) == -1) {
1282 				if (errno != ENOBUFS)
1283 					fatal("setsockopt(SO_RCVBUF, %d)",
1284 					    bsize);
1285 				log_warn("setsockopt(SO_RCVBUF, %d)", bsize);
1286 				continue;
1287 			}
1288 			break;
1289 		}
1290 	}
1291 	for (i = 0; i < 2; i++) {
1292 		for (bsize = MAX_SOCK_BUF; bsize >= 16 * 1024; bsize /= 2) {
1293 			if (setsockopt(pipe[i], SOL_SOCKET, SO_SNDBUF,
1294 			    &bsize, sizeof(bsize)) == -1) {
1295 				if (errno != ENOBUFS)
1296 					fatal("setsockopt(SO_SNDBUF, %d)",
1297 					    bsize);
1298 				log_warn("setsockopt(SO_SNDBUF, %d)", bsize);
1299 				continue;
1300 			}
1301 			break;
1302 		}
1303 	}
1304 }
1305 
1306 int
imsg_send_sockets(struct imsgbuf * se,struct imsgbuf * rde,struct imsgbuf * rtr)1307 imsg_send_sockets(struct imsgbuf *se, struct imsgbuf *rde, struct imsgbuf *rtr)
1308 {
1309 	int pipe_s2r[2];
1310 	int pipe_s2r_ctl[2];
1311 	int pipe_r2r[2];
1312 
1313 	getsockpair(pipe_s2r);
1314 	getsockpair(pipe_s2r_ctl);
1315 	getsockpair(pipe_r2r);
1316 
1317 	if (imsg_compose(se, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[0],
1318 	    NULL, 0) == -1)
1319 		return (-1);
1320 	if (imsg_compose(rde, IMSG_SOCKET_CONN, 0, 0, pipe_s2r[1],
1321 	    NULL, 0) == -1)
1322 		return (-1);
1323 
1324 	if (imsg_compose(se, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[0],
1325 	    NULL, 0) == -1)
1326 		return (-1);
1327 	if (imsg_compose(rde, IMSG_SOCKET_CONN_CTL, 0, 0, pipe_s2r_ctl[1],
1328 	    NULL, 0) == -1)
1329 		return (-1);
1330 
1331 	if (imsg_compose(rtr, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[0],
1332 	    NULL, 0) == -1)
1333 		return (-1);
1334 	if (imsg_compose(rde, IMSG_SOCKET_CONN_RTR, 0, 0, pipe_r2r[1],
1335 	    NULL, 0) == -1)
1336 		return (-1);
1337 
1338 	return (0);
1339 }
1340 
1341 void
bgpd_rtr_connect(struct rtr_config * r)1342 bgpd_rtr_connect(struct rtr_config *r)
1343 {
1344 	struct connect_elm *ce;
1345 	struct sockaddr *sa;
1346 	socklen_t len;
1347 	int nodelay = 1;
1348 	int pre = IPTOS_PREC_INTERNETCONTROL;
1349 
1350 	if (connect_cnt >= MAX_CONNECT_CNT) {
1351 		log_warnx("rtr %s: too many concurrent connection requests",
1352 		    r->descr);
1353 		return;
1354 	}
1355 
1356 	if ((ce = calloc(1, sizeof(*ce))) == NULL) {
1357 		log_warn("rtr %s", r->descr);
1358 		return;
1359 	}
1360 
1361 	ce->id = r->id;
1362 	ce->fd = socket(aid2af(r->remote_addr.aid),
1363 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP);
1364 	if (ce->fd == -1) {
1365 		log_warn("rtr %s", r->descr);
1366 		free(ce);
1367 		return;
1368 	}
1369 
1370 	if ((sa = addr2sa(&r->local_addr, 0, &len)) != NULL) {
1371 		if (bind(ce->fd, sa, len) == -1) {
1372 			log_warn("rtr %s: bind to %s", r->descr,
1373 			    log_addr(&r->local_addr));
1374 			close(ce->fd);
1375 			free(ce);
1376 			return;
1377 		}
1378 	}
1379 
1380 	sa = addr2sa(&r->remote_addr, r->remote_port, &len);
1381 	if (connect(ce->fd, sa, len) == -1) {
1382 		if (errno != EINPROGRESS) {
1383 			log_warn("rtr %s: connect to %s:%u", r->descr,
1384 			    log_addr(&r->remote_addr), r->remote_port);
1385 			close(ce->fd);
1386 			free(ce);
1387 			return;
1388 		}
1389 		TAILQ_INSERT_TAIL(&connect_queue, ce, entry);
1390 		connect_cnt++;
1391 		return;
1392 	}
1393 
1394 	switch (r->remote_addr.aid) {
1395 	case AID_INET:
1396 		if (setsockopt(ce->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1397 		    -1) {
1398 			log_warn("rtr %s: setsockopt IP_TOS", r->descr);
1399 			return;
1400 		}
1401 		break;
1402 	case AID_INET6:
1403 		if (setsockopt(ce->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre,
1404 		    sizeof(pre)) == -1) {
1405 			log_warn("rtr %s: setsockopt IP_TOS", r->descr);
1406 			return;
1407 		}
1408 		break;
1409 	}
1410 
1411 	if (setsockopt(ce->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1412 	    sizeof(nodelay)) == -1) {
1413 		log_warn("rtr %s: setsockopt TCP_NODELAY", r->descr);
1414 		return;
1415 	}
1416 
1417 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1418 	free(ce);
1419 }
1420 
1421 void
bgpd_rtr_connect_done(int fd,struct bgpd_config * conf)1422 bgpd_rtr_connect_done(int fd, struct bgpd_config *conf)
1423 {
1424 	struct rtr_config *r;
1425 	struct connect_elm *ce;
1426 	int error = 0;
1427 	socklen_t len;
1428 
1429 	TAILQ_FOREACH(ce, &connect_queue, entry) {
1430 		if (ce->fd == fd)
1431 			break;
1432 	}
1433 	if (ce == NULL)
1434 		fatalx("connect entry not found");
1435 
1436 	TAILQ_REMOVE(&connect_queue, ce, entry);
1437 	connect_cnt--;
1438 
1439 	SIMPLEQ_FOREACH(r, &conf->rtrs, entry) {
1440 		if (ce->id == r->id)
1441 			break;
1442 	}
1443 	if (r == NULL) {
1444 		log_warnx("rtr id %d no longer exists", ce->id);
1445 		goto fail;
1446 	}
1447 
1448 	len = sizeof(error);
1449 	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) == -1) {
1450 		log_warn("rtr %s: getsockopt SO_ERROR", r->descr);
1451 		goto fail;
1452 	}
1453 
1454 	if (error != 0) {
1455 		errno = error;
1456 		log_warn("rtr %s: connect to %s:%u", r->descr,
1457 		    log_addr(&r->remote_addr), r->remote_port);
1458 		goto fail;
1459 	}
1460 
1461 	imsg_compose(ibuf_rtr, IMSG_SOCKET_CONN, ce->id, 0, ce->fd, NULL, 0);
1462 	free(ce);
1463 	return;
1464 
1465 fail:
1466 	close(fd);
1467 	free(ce);
1468 }
1469