xref: /openbsd/usr.sbin/bgpd/rde.c (revision 55cc5ba3)
1 /*	$OpenBSD: rde.c,v 1.514 2021/01/25 09:15:24 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2016 Job Snijders <job@instituut.net>
6  * Copyright (c) 2016 Peter Hessler <phessler@openbsd.org>
7  * Copyright (c) 2018 Sebastian Benoit <benno@openbsd.org>
8  *
9  * Permission to use, copy, modify, and distribute this software for any
10  * purpose with or without fee is hereby granted, provided that the above
11  * copyright notice and this permission notice appear in all copies.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20  */
21 
22 #include <sys/types.h>
23 #include <sys/time.h>
24 #include <sys/resource.h>
25 
26 #include <errno.h>
27 #include <pwd.h>
28 #include <poll.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <syslog.h>
34 #include <unistd.h>
35 #include <err.h>
36 
37 #include "bgpd.h"
38 #include "rde.h"
39 #include "session.h"
40 #include "log.h"
41 
42 #define PFD_PIPE_MAIN		0
43 #define PFD_PIPE_SESSION	1
44 #define PFD_PIPE_SESSION_CTL	2
45 #define PFD_PIPE_COUNT		3
46 
47 void		 rde_sighdlr(int);
48 void		 rde_dispatch_imsg_session(struct imsgbuf *);
49 void		 rde_dispatch_imsg_parent(struct imsgbuf *);
50 void		 rde_dispatch_imsg_peer(struct rde_peer *, void *);
51 void		 rde_update_dispatch(struct rde_peer *, struct imsg *);
52 int		 rde_update_update(struct rde_peer *, struct filterstate *,
53 		     struct bgpd_addr *, u_int8_t);
54 void		 rde_update_withdraw(struct rde_peer *, struct bgpd_addr *,
55 		     u_int8_t);
56 int		 rde_attr_parse(u_char *, u_int16_t, struct rde_peer *,
57 		     struct filterstate *, struct mpattr *);
58 int		 rde_attr_add(struct filterstate *, u_char *, u_int16_t);
59 u_int8_t	 rde_attr_missing(struct rde_aspath *, int, u_int16_t);
60 int		 rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t,
61 		     struct filterstate *);
62 void		 rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *);
63 void		 rde_reflector(struct rde_peer *, struct rde_aspath *);
64 
65 void		 rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t,
66 		     enum imsg_type);
67 void		 rde_dump_ctx_throttle(pid_t, int);
68 void		 rde_dump_ctx_terminate(pid_t);
69 void		 rde_dump_mrt_new(struct mrt *, pid_t, int);
70 
71 int		 rde_l3vpn_import(struct rde_community *, struct l3vpn *);
72 static void	 rde_commit_pftable(void);
73 void		 rde_reload_done(void);
74 static void	 rde_softreconfig_in_done(void *, u_int8_t);
75 static void	 rde_softreconfig_out_done(void *, u_int8_t);
76 static void	 rde_softreconfig_done(void);
77 static void	 rde_softreconfig_out(struct rib_entry *, void *);
78 static void	 rde_softreconfig_in(struct rib_entry *, void *);
79 static void	 rde_softreconfig_sync_reeval(struct rib_entry *, void *);
80 static void	 rde_softreconfig_sync_fib(struct rib_entry *, void *);
81 static void	 rde_softreconfig_sync_done(void *, u_int8_t);
82 static int	 rde_no_as_set(struct rde_peer *);
83 int		 rde_update_queue_pending(void);
84 void		 rde_update_queue_runner(void);
85 void		 rde_update6_queue_runner(u_int8_t);
86 struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head *);
87 void		 rde_mark_prefixsets_dirty(struct rde_prefixset_head *,
88 		     struct rde_prefixset_head *);
89 u_int8_t	 rde_roa_validity(struct rde_prefixset *,
90 		     struct bgpd_addr *, u_int8_t, u_int32_t);
91 
92 static void	 rde_peer_recv_eor(struct rde_peer *, u_int8_t);
93 static void	 rde_peer_send_eor(struct rde_peer *, u_int8_t);
94 
95 void		 network_add(struct network_config *, struct filterstate *);
96 void		 network_delete(struct network_config *);
97 static void	 network_dump_upcall(struct rib_entry *, void *);
98 static void	 network_flush_upcall(struct rib_entry *, void *);
99 
100 void		 rde_shutdown(void);
101 int		 ovs_match(struct prefix *, u_int32_t);
102 
103 static struct imsgbuf		*ibuf_se;
104 static struct imsgbuf		*ibuf_se_ctl;
105 static struct imsgbuf		*ibuf_main;
106 static struct bgpd_config	*conf, *nconf;
107 
108 volatile sig_atomic_t	 rde_quit = 0;
109 struct filter_head	*out_rules, *out_rules_tmp;
110 struct rde_memstats	 rdemem;
111 int			 softreconfig;
112 
113 extern struct rde_peer_head	 peerlist;
114 extern struct rde_peer		*peerself;
115 
116 struct rde_dump_ctx {
117 	LIST_ENTRY(rde_dump_ctx)	entry;
118 	struct ctl_show_rib_request	req;
119 	u_int32_t			peerid;
120 	u_int8_t			throttled;
121 };
122 
123 LIST_HEAD(, rde_dump_ctx) rde_dump_h = LIST_HEAD_INITIALIZER(rde_dump_h);
124 
125 struct rde_mrt_ctx {
126 	LIST_ENTRY(rde_mrt_ctx)	entry;
127 	struct mrt		mrt;
128 };
129 
130 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts);
131 u_int rde_mrt_cnt;
132 
133 void
134 rde_sighdlr(int sig)
135 {
136 	switch (sig) {
137 	case SIGINT:
138 	case SIGTERM:
139 		rde_quit = 1;
140 		break;
141 	}
142 }
143 
144 u_int32_t	peerhashsize = 1024;
145 u_int32_t	pathhashsize = 128 * 1024;
146 u_int32_t	attrhashsize = 16 * 1024;
147 u_int32_t	nexthophashsize = 1024;
148 
149 void
150 rde_main(int debug, int verbose)
151 {
152 	struct passwd		*pw;
153 	struct pollfd		*pfd = NULL;
154 	struct rde_mrt_ctx	*mctx, *xmctx;
155 	void			*newp;
156 	u_int			 pfd_elms = 0, i, j;
157 	int			 timeout;
158 	u_int8_t		 aid;
159 
160 	log_init(debug, LOG_DAEMON);
161 	log_setverbose(verbose);
162 
163 	log_procinit(log_procnames[PROC_RDE]);
164 
165 	if ((pw = getpwnam(BGPD_USER)) == NULL)
166 		fatal("getpwnam");
167 
168 	if (chroot(pw->pw_dir) == -1)
169 		fatal("chroot");
170 	if (chdir("/") == -1)
171 		fatal("chdir(\"/\")");
172 
173 	setproctitle("route decision engine");
174 
175 	if (setgroups(1, &pw->pw_gid) ||
176 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
177 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
178 		fatal("can't drop privileges");
179 
180 	if (pledge("stdio recvfd", NULL) == -1)
181 		fatal("pledge");
182 
183 	signal(SIGTERM, rde_sighdlr);
184 	signal(SIGINT, rde_sighdlr);
185 	signal(SIGPIPE, SIG_IGN);
186 	signal(SIGHUP, SIG_IGN);
187 	signal(SIGALRM, SIG_IGN);
188 	signal(SIGUSR1, SIG_IGN);
189 
190 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
191 		fatal(NULL);
192 	imsg_init(ibuf_main, 3);
193 
194 	/* initialize the RIB structures */
195 	pt_init();
196 	path_init(pathhashsize);
197 	aspath_init(pathhashsize);
198 	communities_init(attrhashsize);
199 	attr_init(attrhashsize);
200 	nexthop_init(nexthophashsize);
201 	peer_init(peerhashsize);
202 
203 	/* make sure the default RIBs are setup */
204 	rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
205 
206 	out_rules = calloc(1, sizeof(struct filter_head));
207 	if (out_rules == NULL)
208 		fatal(NULL);
209 	TAILQ_INIT(out_rules);
210 
211 	conf = new_config();
212 	log_info("route decision engine ready");
213 
214 	while (rde_quit == 0) {
215 		if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) {
216 			if ((newp = reallocarray(pfd,
217 			    PFD_PIPE_COUNT + rde_mrt_cnt,
218 			    sizeof(struct pollfd))) == NULL) {
219 				/* panic for now  */
220 				log_warn("could not resize pfd from %u -> %u"
221 				    " entries", pfd_elms, PFD_PIPE_COUNT +
222 				    rde_mrt_cnt);
223 				fatalx("exiting");
224 			}
225 			pfd = newp;
226 			pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt;
227 		}
228 		timeout = -1;
229 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
230 
231 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
232 		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
233 		set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl);
234 
235 		i = PFD_PIPE_COUNT;
236 		for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) {
237 			xmctx = LIST_NEXT(mctx, entry);
238 
239 			if (i >= pfd_elms)
240 				fatalx("poll pfd too small");
241 			if (mctx->mrt.wbuf.queued) {
242 				pfd[i].fd = mctx->mrt.wbuf.fd;
243 				pfd[i].events = POLLOUT;
244 				i++;
245 			} else if (mctx->mrt.state == MRT_STATE_REMOVE) {
246 				close(mctx->mrt.wbuf.fd);
247 				LIST_REMOVE(mctx, entry);
248 				free(mctx);
249 				rde_mrt_cnt--;
250 			}
251 		}
252 
253 		if (rib_dump_pending() || rde_update_queue_pending() ||
254 		    nexthop_pending() || peer_imsg_pending())
255 			timeout = 0;
256 
257 		if (poll(pfd, i, timeout) == -1) {
258 			if (errno != EINTR)
259 				fatal("poll error");
260 			continue;
261 		}
262 
263 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1)
264 			fatalx("Lost connection to parent");
265 		else
266 			rde_dispatch_imsg_parent(ibuf_main);
267 
268 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
269 			log_warnx("RDE: Lost connection to SE");
270 			msgbuf_clear(&ibuf_se->w);
271 			free(ibuf_se);
272 			ibuf_se = NULL;
273 		} else
274 			rde_dispatch_imsg_session(ibuf_se);
275 
276 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl) ==
277 		    -1) {
278 			log_warnx("RDE: Lost connection to SE control");
279 			msgbuf_clear(&ibuf_se_ctl->w);
280 			free(ibuf_se_ctl);
281 			ibuf_se_ctl = NULL;
282 		} else
283 			rde_dispatch_imsg_session(ibuf_se_ctl);
284 
285 		for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts);
286 		    j < i && mctx != 0; j++) {
287 			if (pfd[j].fd == mctx->mrt.wbuf.fd &&
288 			    pfd[j].revents & POLLOUT)
289 				mrt_write(&mctx->mrt);
290 			mctx = LIST_NEXT(mctx, entry);
291 		}
292 
293 		peer_foreach(rde_dispatch_imsg_peer, NULL);
294 		rib_dump_runner();
295 		nexthop_runner();
296 		if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) {
297 			rde_update_queue_runner();
298 			for (aid = AID_INET6; aid < AID_MAX; aid++)
299 				rde_update6_queue_runner(aid);
300 		}
301 		/* commit pftable once per poll loop */
302 		rde_commit_pftable();
303 	}
304 
305 	/* do not clean up on shutdown on production, it takes ages. */
306 	if (debug)
307 		rde_shutdown();
308 
309 	free_config(conf);
310 	free(pfd);
311 
312 	/* close pipes */
313 	if (ibuf_se) {
314 		msgbuf_clear(&ibuf_se->w);
315 		close(ibuf_se->fd);
316 		free(ibuf_se);
317 	}
318 	if (ibuf_se_ctl) {
319 		msgbuf_clear(&ibuf_se_ctl->w);
320 		close(ibuf_se_ctl->fd);
321 		free(ibuf_se_ctl);
322 	}
323 	msgbuf_clear(&ibuf_main->w);
324 	close(ibuf_main->fd);
325 	free(ibuf_main);
326 
327 	while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) {
328 		msgbuf_clear(&mctx->mrt.wbuf);
329 		close(mctx->mrt.wbuf.fd);
330 		LIST_REMOVE(mctx, entry);
331 		free(mctx);
332 	}
333 
334 	log_info("route decision engine exiting");
335 	exit(0);
336 }
337 
338 struct network_config	netconf_s, netconf_p;
339 struct filterstate	netconf_state;
340 struct filter_set_head	session_set = TAILQ_HEAD_INITIALIZER(session_set);
341 struct filter_set_head	parent_set = TAILQ_HEAD_INITIALIZER(parent_set);
342 
343 void
344 rde_dispatch_imsg_session(struct imsgbuf *ibuf)
345 {
346 	struct imsg		 imsg;
347 	struct peer		 p;
348 	struct peer_config	 pconf;
349 	struct ctl_show_set	 cset;
350 	struct ctl_show_rib	 csr;
351 	struct ctl_show_rib_request	req;
352 	struct rde_peer		*peer;
353 	struct rde_aspath	*asp;
354 	struct rde_hashstats	 rdehash;
355 	struct filter_set	*s;
356 	struct as_set		*aset;
357 	struct rde_prefixset	*pset;
358 	u_int8_t		*asdata;
359 	ssize_t			 n;
360 	size_t			 aslen;
361 	int			 verbose;
362 	u_int16_t		 len;
363 
364 	while (ibuf) {
365 		if ((n = imsg_get(ibuf, &imsg)) == -1)
366 			fatal("rde_dispatch_imsg_session: imsg_get error");
367 		if (n == 0)
368 			break;
369 
370 		switch (imsg.hdr.type) {
371 		case IMSG_UPDATE:
372 		case IMSG_SESSION_UP:
373 		case IMSG_SESSION_DOWN:
374 		case IMSG_SESSION_STALE:
375 		case IMSG_SESSION_FLUSH:
376 		case IMSG_SESSION_RESTARTED:
377 		case IMSG_REFRESH:
378 			if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
379 				log_warnx("rde_dispatch: unknown peer id %d",
380 				    imsg.hdr.peerid);
381 				break;
382 			}
383 			peer_imsg_push(peer, &imsg);
384 			break;
385 		case IMSG_SESSION_ADD:
386 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf))
387 				fatalx("incorrect size of session request");
388 			memcpy(&pconf, imsg.data, sizeof(pconf));
389 			peer_add(imsg.hdr.peerid, &pconf);
390 			break;
391 		case IMSG_NETWORK_ADD:
392 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
393 			    sizeof(struct network_config)) {
394 				log_warnx("rde_dispatch: wrong imsg len");
395 				break;
396 			}
397 			memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
398 			TAILQ_INIT(&netconf_s.attrset);
399 			rde_filterstate_prep(&netconf_state, NULL, NULL, NULL,
400 			    0);
401 			asp = &netconf_state.aspath;
402 			asp->aspath = aspath_get(NULL, 0);
403 			asp->origin = ORIGIN_IGP;
404 			asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH |
405 			    F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED |
406 			    F_ANN_DYNAMIC;
407 			break;
408 		case IMSG_NETWORK_ASPATH:
409 			if (imsg.hdr.len - IMSG_HEADER_SIZE <
410 			    sizeof(csr)) {
411 				log_warnx("rde_dispatch: wrong imsg len");
412 				bzero(&netconf_s, sizeof(netconf_s));
413 				break;
414 			}
415 			aslen = imsg.hdr.len - IMSG_HEADER_SIZE - sizeof(csr);
416 			asdata = imsg.data;
417 			asdata += sizeof(struct ctl_show_rib);
418 			memcpy(&csr, imsg.data, sizeof(csr));
419 			asp = &netconf_state.aspath;
420 			asp->lpref = csr.local_pref;
421 			asp->med = csr.med;
422 			asp->weight = csr.weight;
423 			asp->flags = csr.flags;
424 			asp->origin = csr.origin;
425 			asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC;
426 			aspath_put(asp->aspath);
427 			asp->aspath = aspath_get(asdata, aslen);
428 			break;
429 		case IMSG_NETWORK_ATTR:
430 			if (imsg.hdr.len <= IMSG_HEADER_SIZE) {
431 				log_warnx("rde_dispatch: wrong imsg len");
432 				break;
433 			}
434 			/* parse optional path attributes */
435 			len = imsg.hdr.len - IMSG_HEADER_SIZE;
436 			if (rde_attr_add(&netconf_state, imsg.data,
437 			    len) == -1) {
438 				log_warnx("rde_dispatch: bad network "
439 				    "attribute");
440 				rde_filterstate_clean(&netconf_state);
441 				bzero(&netconf_s, sizeof(netconf_s));
442 				break;
443 			}
444 			break;
445 		case IMSG_NETWORK_DONE:
446 			if (imsg.hdr.len != IMSG_HEADER_SIZE) {
447 				log_warnx("rde_dispatch: wrong imsg len");
448 				break;
449 			}
450 			TAILQ_CONCAT(&netconf_s.attrset, &session_set, entry);
451 			switch (netconf_s.prefix.aid) {
452 			case AID_INET:
453 				if (netconf_s.prefixlen > 32)
454 					goto badnet;
455 				network_add(&netconf_s, &netconf_state);
456 				break;
457 			case AID_INET6:
458 				if (netconf_s.prefixlen > 128)
459 					goto badnet;
460 				network_add(&netconf_s, &netconf_state);
461 				break;
462 			case 0:
463 				/* something failed beforehands */
464 				break;
465 			default:
466 badnet:
467 				log_warnx("request to insert invalid network");
468 				break;
469 			}
470 			rde_filterstate_clean(&netconf_state);
471 			break;
472 		case IMSG_NETWORK_REMOVE:
473 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
474 			    sizeof(struct network_config)) {
475 				log_warnx("rde_dispatch: wrong imsg len");
476 				break;
477 			}
478 			memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
479 			TAILQ_INIT(&netconf_s.attrset);
480 
481 			switch (netconf_s.prefix.aid) {
482 			case AID_INET:
483 				if (netconf_s.prefixlen > 32)
484 					goto badnetdel;
485 				network_delete(&netconf_s);
486 				break;
487 			case AID_INET6:
488 				if (netconf_s.prefixlen > 128)
489 					goto badnetdel;
490 				network_delete(&netconf_s);
491 				break;
492 			default:
493 badnetdel:
494 				log_warnx("request to remove invalid network");
495 				break;
496 			}
497 			break;
498 		case IMSG_NETWORK_FLUSH:
499 			if (imsg.hdr.len != IMSG_HEADER_SIZE) {
500 				log_warnx("rde_dispatch: wrong imsg len");
501 				break;
502 			}
503 			if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
504 			    RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
505 			    NULL, NULL) == -1)
506 				log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
507 			break;
508 		case IMSG_FILTER_SET:
509 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
510 			    sizeof(struct filter_set)) {
511 				log_warnx("rde_dispatch: wrong imsg len");
512 				break;
513 			}
514 			if ((s = malloc(sizeof(struct filter_set))) == NULL)
515 				fatal(NULL);
516 			memcpy(s, imsg.data, sizeof(struct filter_set));
517 			if (s->type == ACTION_SET_NEXTHOP) {
518 				s->action.nh_ref =
519 				    nexthop_get(&s->action.nexthop);
520 				s->type = ACTION_SET_NEXTHOP_REF;
521 			}
522 			TAILQ_INSERT_TAIL(&session_set, s, entry);
523 			break;
524 		case IMSG_CTL_SHOW_NETWORK:
525 		case IMSG_CTL_SHOW_RIB:
526 		case IMSG_CTL_SHOW_RIB_PREFIX:
527 			if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) {
528 				log_warnx("rde_dispatch: wrong imsg len");
529 				break;
530 			}
531 			memcpy(&req, imsg.data, sizeof(req));
532 			rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type);
533 			break;
534 		case IMSG_CTL_SHOW_NEIGHBOR:
535 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
536 			    sizeof(struct peer)) {
537 				log_warnx("rde_dispatch: wrong imsg len");
538 				break;
539 			}
540 			memcpy(&p, imsg.data, sizeof(struct peer));
541 			peer = peer_get(p.conf.id);
542 			if (peer != NULL) {
543 				p.stats.prefix_cnt = peer->prefix_cnt;
544 				p.stats.prefix_out_cnt = peer->prefix_out_cnt;
545 				p.stats.prefix_rcvd_update =
546 				    peer->prefix_rcvd_update;
547 				p.stats.prefix_rcvd_withdraw =
548 				    peer->prefix_rcvd_withdraw;
549 				p.stats.prefix_rcvd_eor =
550 				    peer->prefix_rcvd_eor;
551 				p.stats.prefix_sent_update =
552 				    peer->prefix_sent_update;
553 				p.stats.prefix_sent_withdraw =
554 				    peer->prefix_sent_withdraw;
555 				p.stats.prefix_sent_eor =
556 				    peer->prefix_sent_eor;
557 			}
558 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
559 			    imsg.hdr.pid, -1, &p, sizeof(struct peer));
560 			break;
561 		case IMSG_CTL_SHOW_RIB_MEM:
562 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0,
563 			    imsg.hdr.pid, -1, &rdemem, sizeof(rdemem));
564 			path_hash_stats(&rdehash);
565 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
566 			    imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
567 			aspath_hash_stats(&rdehash);
568 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
569 			    imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
570 			communities_hash_stats(&rdehash);
571 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
572 			    imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
573 			attr_hash_stats(&rdehash);
574 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
575 			    imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
576 			imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
577 			    -1, NULL, 0);
578 			break;
579 		case IMSG_CTL_SHOW_SET:
580 			/* first roa set */
581 			pset = &conf->rde_roa;
582 			memset(&cset, 0, sizeof(cset));
583 			cset.type = ROA_SET;
584 			strlcpy(cset.name, "RPKI ROA", sizeof(cset.name));
585 			cset.lastchange = pset->lastchange;
586 			cset.v4_cnt = pset->th.v4_cnt;
587 			cset.v6_cnt = pset->th.v6_cnt;
588 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
589 			    imsg.hdr.pid, -1, &cset, sizeof(cset));
590 
591 			SIMPLEQ_FOREACH(aset, &conf->as_sets, entry) {
592 				memset(&cset, 0, sizeof(cset));
593 				cset.type = ASNUM_SET;
594 				strlcpy(cset.name, aset->name,
595 				    sizeof(cset.name));
596 				cset.lastchange = aset->lastchange;
597 				cset.as_cnt = set_nmemb(aset->set);
598 				imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
599 				    imsg.hdr.pid, -1, &cset, sizeof(cset));
600 			}
601 			SIMPLEQ_FOREACH(pset, &conf->rde_prefixsets, entry) {
602 				memset(&cset, 0, sizeof(cset));
603 				cset.type = PREFIX_SET;
604 				strlcpy(cset.name, pset->name,
605 				    sizeof(cset.name));
606 				cset.lastchange = pset->lastchange;
607 				cset.v4_cnt = pset->th.v4_cnt;
608 				cset.v6_cnt = pset->th.v6_cnt;
609 				imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
610 				    imsg.hdr.pid, -1, &cset, sizeof(cset));
611 			}
612 			SIMPLEQ_FOREACH(pset, &conf->rde_originsets, entry) {
613 				memset(&cset, 0, sizeof(cset));
614 				cset.type = ORIGIN_SET;
615 				strlcpy(cset.name, pset->name,
616 				    sizeof(cset.name));
617 				cset.lastchange = pset->lastchange;
618 				cset.v4_cnt = pset->th.v4_cnt;
619 				cset.v6_cnt = pset->th.v6_cnt;
620 				imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
621 				    imsg.hdr.pid, -1, &cset, sizeof(cset));
622 			}
623 			imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
624 			    -1, NULL, 0);
625 			break;
626 		case IMSG_CTL_LOG_VERBOSE:
627 			/* already checked by SE */
628 			memcpy(&verbose, imsg.data, sizeof(verbose));
629 			log_setverbose(verbose);
630 			break;
631 		case IMSG_CTL_END:
632 			imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
633 			    -1, NULL, 0);
634 			break;
635 		case IMSG_CTL_TERMINATE:
636 			rde_dump_ctx_terminate(imsg.hdr.pid);
637 			break;
638 		case IMSG_XON:
639 			if (imsg.hdr.peerid) {
640 				peer = peer_get(imsg.hdr.peerid);
641 				if (peer)
642 					peer->throttled = 0;
643 			} else {
644 				rde_dump_ctx_throttle(imsg.hdr.pid, 0);
645 			}
646 			break;
647 		case IMSG_XOFF:
648 			if (imsg.hdr.peerid) {
649 				peer = peer_get(imsg.hdr.peerid);
650 				if (peer)
651 					peer->throttled = 1;
652 			} else {
653 				rde_dump_ctx_throttle(imsg.hdr.pid, 1);
654 			}
655 			break;
656 		default:
657 			break;
658 		}
659 		imsg_free(&imsg);
660 	}
661 }
662 
663 void
664 rde_dispatch_imsg_parent(struct imsgbuf *ibuf)
665 {
666 	static struct rde_prefixset	*last_prefixset;
667 	static struct as_set	*last_as_set;
668 	static struct l3vpn	*vpn;
669 	struct imsg		 imsg;
670 	struct mrt		 xmrt;
671 	struct roa		 roa;
672 	struct rde_rib		 rr;
673 	struct filterstate	 state;
674 	struct imsgbuf		*i;
675 	struct filter_head	*nr;
676 	struct filter_rule	*r;
677 	struct filter_set	*s;
678 	struct rib		*rib;
679 	struct rde_prefixset	*ps;
680 	struct rde_aspath	*asp;
681 	struct prefixset_item	 psi;
682 	char			*name;
683 	size_t			 nmemb;
684 	int			 n, fd, rv;
685 	u_int16_t		 rid;
686 
687 	while (ibuf) {
688 		if ((n = imsg_get(ibuf, &imsg)) == -1)
689 			fatal("rde_dispatch_imsg_parent: imsg_get error");
690 		if (n == 0)
691 			break;
692 
693 		switch (imsg.hdr.type) {
694 		case IMSG_SOCKET_CONN:
695 		case IMSG_SOCKET_CONN_CTL:
696 			if ((fd = imsg.fd) == -1) {
697 				log_warnx("expected to receive imsg fd to "
698 				    "SE but didn't receive any");
699 				break;
700 			}
701 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
702 				fatal(NULL);
703 			imsg_init(i, fd);
704 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
705 				if (ibuf_se) {
706 					log_warnx("Unexpected imsg connection "
707 					    "to SE received");
708 					msgbuf_clear(&ibuf_se->w);
709 					free(ibuf_se);
710 				}
711 				ibuf_se = i;
712 			} else {
713 				if (ibuf_se_ctl) {
714 					log_warnx("Unexpected imsg ctl "
715 					    "connection to SE received");
716 					msgbuf_clear(&ibuf_se_ctl->w);
717 					free(ibuf_se_ctl);
718 				}
719 				ibuf_se_ctl = i;
720 			}
721 			break;
722 		case IMSG_NETWORK_ADD:
723 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
724 			    sizeof(struct network_config)) {
725 				log_warnx("rde_dispatch: wrong imsg len");
726 				break;
727 			}
728 			memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
729 			TAILQ_INIT(&netconf_p.attrset);
730 			break;
731 		case IMSG_NETWORK_DONE:
732 			TAILQ_CONCAT(&netconf_p.attrset, &parent_set, entry);
733 
734 			rde_filterstate_prep(&state, NULL, NULL, NULL, 0);
735 			asp = &state.aspath;
736 			asp->aspath = aspath_get(NULL, 0);
737 			asp->origin = ORIGIN_IGP;
738 			asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH |
739 			    F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED;
740 
741 			network_add(&netconf_p, &state);
742 			rde_filterstate_clean(&state);
743 			break;
744 		case IMSG_NETWORK_REMOVE:
745 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
746 			    sizeof(struct network_config)) {
747 				log_warnx("rde_dispatch: wrong imsg len");
748 				break;
749 			}
750 			memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
751 			TAILQ_INIT(&netconf_p.attrset);
752 			network_delete(&netconf_p);
753 			break;
754 		case IMSG_RECONF_CONF:
755 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
756 			    sizeof(struct bgpd_config))
757 				fatalx("IMSG_RECONF_CONF bad len");
758 			out_rules_tmp = calloc(1, sizeof(struct filter_head));
759 			if (out_rules_tmp == NULL)
760 				fatal(NULL);
761 			TAILQ_INIT(out_rules_tmp);
762 			nconf = new_config();
763 			copy_config(nconf, imsg.data);
764 
765 			for (rid = 0; rid < rib_size; rid++) {
766 				if ((rib = rib_byid(rid)) == NULL)
767 					continue;
768 				rib->state = RECONF_DELETE;
769 				rib->fibstate = RECONF_NONE;
770 			}
771 			break;
772 		case IMSG_RECONF_RIB:
773 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
774 			    sizeof(struct rde_rib))
775 				fatalx("IMSG_RECONF_RIB bad len");
776 			memcpy(&rr, imsg.data, sizeof(rr));
777 			rib = rib_byid(rib_find(rr.name));
778 			if (rib == NULL) {
779 				rib = rib_new(rr.name, rr.rtableid, rr.flags);
780 			} else if (rib->flags == rr.flags &&
781 			    rib->rtableid == rr.rtableid) {
782 				/* no change to rib apart from filters */
783 				rib->state = RECONF_KEEP;
784 			} else {
785 				/* reload rib because somehing changed */
786 				rib->flags_tmp = rr.flags;
787 				rib->rtableid_tmp = rr.rtableid;
788 				rib->state = RECONF_RELOAD;
789 			}
790 			break;
791 		case IMSG_RECONF_FILTER:
792 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
793 			    sizeof(struct filter_rule))
794 				fatalx("IMSG_RECONF_FILTER bad len");
795 			if ((r = malloc(sizeof(struct filter_rule))) == NULL)
796 				fatal(NULL);
797 			memcpy(r, imsg.data, sizeof(struct filter_rule));
798 			if (r->match.prefixset.name[0] != '\0') {
799 				r->match.prefixset.ps =
800 				    rde_find_prefixset(r->match.prefixset.name,
801 					&nconf->rde_prefixsets);
802 				if (r->match.prefixset.ps == NULL)
803 					log_warnx("%s: no prefixset for %s",
804 					    __func__, r->match.prefixset.name);
805 			}
806 			if (r->match.originset.name[0] != '\0') {
807 				r->match.originset.ps =
808 				    rde_find_prefixset(r->match.originset.name,
809 					&nconf->rde_originsets);
810 				if (r->match.originset.ps == NULL)
811 					log_warnx("%s: no origin-set for %s",
812 					    __func__, r->match.originset.name);
813 			}
814 			if (r->match.as.flags & AS_FLAG_AS_SET_NAME) {
815 				struct as_set * aset;
816 
817 				aset = as_sets_lookup(&nconf->as_sets,
818 				    r->match.as.name);
819 				if (aset == NULL) {
820 					log_warnx("%s: no as-set for %s",
821 					    __func__, r->match.as.name);
822 				} else {
823 					r->match.as.flags = AS_FLAG_AS_SET;
824 					r->match.as.aset = aset;
825 				}
826 			}
827 			TAILQ_INIT(&r->set);
828 			TAILQ_CONCAT(&r->set, &parent_set, entry);
829 			if ((rib = rib_byid(rib_find(r->rib))) == NULL) {
830 				log_warnx("IMSG_RECONF_FILTER: filter rule "
831 				    "for nonexistent rib %s", r->rib);
832 				free(r);
833 				break;
834 			}
835 			r->peer.ribid = rib->id;
836 			if (r->dir == DIR_IN) {
837 				nr = rib->in_rules_tmp;
838 				if (nr == NULL) {
839 					nr = calloc(1,
840 					    sizeof(struct filter_head));
841 					if (nr == NULL)
842 						fatal(NULL);
843 					TAILQ_INIT(nr);
844 					rib->in_rules_tmp = nr;
845 				}
846 				TAILQ_INSERT_TAIL(nr, r, entry);
847 			} else
848 				TAILQ_INSERT_TAIL(out_rules_tmp, r, entry);
849 			break;
850 		case IMSG_RECONF_PREFIX_SET:
851 		case IMSG_RECONF_ORIGIN_SET:
852 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
853 			    sizeof(ps->name))
854 				fatalx("IMSG_RECONF_PREFIX_SET bad len");
855 			ps = calloc(1, sizeof(struct rde_prefixset));
856 			if (ps == NULL)
857 				fatal(NULL);
858 			memcpy(ps->name, imsg.data, sizeof(ps->name));
859 			if (imsg.hdr.type == IMSG_RECONF_ORIGIN_SET) {
860 				SIMPLEQ_INSERT_TAIL(&nconf->rde_originsets, ps,
861 				    entry);
862 			} else {
863 				SIMPLEQ_INSERT_TAIL(&nconf->rde_prefixsets, ps,
864 				    entry);
865 			}
866 			last_prefixset = ps;
867 			break;
868 		case IMSG_RECONF_ROA_SET:
869 			strlcpy(nconf->rde_roa.name, "RPKI ROA",
870 			    sizeof(nconf->rde_roa.name));
871 			last_prefixset = &nconf->rde_roa;
872 			break;
873 		case IMSG_RECONF_ROA_ITEM:
874 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(roa))
875 				fatalx("IMSG_RECONF_ROA_ITEM bad len");
876 			memcpy(&roa, imsg.data, sizeof(roa));
877 			rv = trie_roa_add(&last_prefixset->th, &roa);
878 			break;
879 		case IMSG_RECONF_PREFIX_SET_ITEM:
880 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(psi))
881 				fatalx("IMSG_RECONF_PREFIX_SET_ITEM bad len");
882 			memcpy(&psi, imsg.data, sizeof(psi));
883 			if (last_prefixset == NULL)
884 				fatalx("King Bula has no prefixset");
885 			rv = trie_add(&last_prefixset->th,
886 			    &psi.p.addr, psi.p.len,
887 			    psi.p.len_min, psi.p.len_max);
888 			if (rv == -1)
889 				log_warnx("trie_add(%s) %s/%u) failed",
890 				    last_prefixset->name, log_addr(&psi.p.addr),
891 				    psi.p.len);
892 			break;
893 		case IMSG_RECONF_AS_SET:
894 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
895 			    sizeof(nmemb) + SET_NAME_LEN)
896 				fatalx("IMSG_RECONF_AS_SET bad len");
897 			memcpy(&nmemb, imsg.data, sizeof(nmemb));
898 			name = (char *)imsg.data + sizeof(nmemb);
899 			if (as_sets_lookup(&nconf->as_sets, name) != NULL)
900 				fatalx("duplicate as-set %s", name);
901 			last_as_set = as_sets_new(&nconf->as_sets, name, nmemb,
902 			    sizeof(u_int32_t));
903 			break;
904 		case IMSG_RECONF_AS_SET_ITEMS:
905 			nmemb = imsg.hdr.len - IMSG_HEADER_SIZE;
906 			nmemb /= sizeof(u_int32_t);
907 			if (set_add(last_as_set->set, imsg.data, nmemb) != 0)
908 				fatal(NULL);
909 			break;
910 		case IMSG_RECONF_AS_SET_DONE:
911 			set_prep(last_as_set->set);
912 			last_as_set = NULL;
913 			break;
914 		case IMSG_RECONF_VPN:
915 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
916 			    sizeof(struct l3vpn))
917 				fatalx("IMSG_RECONF_VPN bad len");
918 			if ((vpn = malloc(sizeof(struct l3vpn))) == NULL)
919 				fatal(NULL);
920 			memcpy(vpn, imsg.data, sizeof(struct l3vpn));
921 			TAILQ_INIT(&vpn->import);
922 			TAILQ_INIT(&vpn->export);
923 			TAILQ_INIT(&vpn->net_l);
924 			SIMPLEQ_INSERT_TAIL(&nconf->l3vpns, vpn, entry);
925 			break;
926 		case IMSG_RECONF_VPN_EXPORT:
927 			if (vpn == NULL) {
928 				log_warnx("rde_dispatch_imsg_parent: "
929 				    "IMSG_RECONF_VPN_EXPORT unexpected");
930 				break;
931 			}
932 			TAILQ_CONCAT(&vpn->export, &parent_set, entry);
933 			break;
934 		case IMSG_RECONF_VPN_IMPORT:
935 			if (vpn == NULL) {
936 				log_warnx("rde_dispatch_imsg_parent: "
937 				    "IMSG_RECONF_VPN_IMPORT unexpected");
938 				break;
939 			}
940 			TAILQ_CONCAT(&vpn->import, &parent_set, entry);
941 			break;
942 		case IMSG_RECONF_VPN_DONE:
943 			break;
944 		case IMSG_RECONF_DRAIN:
945 			imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
946 			    -1, NULL, 0);
947 			break;
948 		case IMSG_RECONF_DONE:
949 			if (nconf == NULL)
950 				fatalx("got IMSG_RECONF_DONE but no config");
951 			last_prefixset = NULL;
952 
953 			rde_reload_done();
954 			break;
955 		case IMSG_NEXTHOP_UPDATE:
956 			nexthop_update(imsg.data);
957 			break;
958 		case IMSG_FILTER_SET:
959 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
960 			    sizeof(struct filter_set))
961 				fatalx("IMSG_FILTER_SET bad len");
962 			if ((s = malloc(sizeof(struct filter_set))) == NULL)
963 				fatal(NULL);
964 			memcpy(s, imsg.data, sizeof(struct filter_set));
965 			if (s->type == ACTION_SET_NEXTHOP) {
966 				s->action.nh_ref =
967 				    nexthop_get(&s->action.nexthop);
968 				s->type = ACTION_SET_NEXTHOP_REF;
969 			}
970 			TAILQ_INSERT_TAIL(&parent_set, s, entry);
971 			break;
972 		case IMSG_MRT_OPEN:
973 		case IMSG_MRT_REOPEN:
974 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
975 			    sizeof(struct mrt)) {
976 				log_warnx("wrong imsg len");
977 				break;
978 			}
979 			memcpy(&xmrt, imsg.data, sizeof(xmrt));
980 			if ((fd = imsg.fd) == -1)
981 				log_warnx("expected to receive fd for mrt dump "
982 				    "but didn't receive any");
983 			else if (xmrt.type == MRT_TABLE_DUMP ||
984 			    xmrt.type == MRT_TABLE_DUMP_MP ||
985 			    xmrt.type == MRT_TABLE_DUMP_V2) {
986 				rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd);
987 			} else
988 				close(fd);
989 			break;
990 		case IMSG_MRT_CLOSE:
991 			/* ignore end message because a dump is atomic */
992 			break;
993 		default:
994 			break;
995 		}
996 		imsg_free(&imsg);
997 	}
998 }
999 
1000 void
1001 rde_dispatch_imsg_peer(struct rde_peer *peer, void *bula)
1002 {
1003 	struct session_up sup;
1004 	struct imsg imsg;
1005 	u_int8_t aid;
1006 
1007 	if (!peer_imsg_pop(peer, &imsg))
1008 		return;
1009 
1010 	switch (imsg.hdr.type) {
1011 	case IMSG_UPDATE:
1012 		rde_update_dispatch(peer, &imsg);
1013 		break;
1014 	case IMSG_SESSION_UP:
1015 		if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup))
1016 			fatalx("incorrect size of session request");
1017 		memcpy(&sup, imsg.data, sizeof(sup));
1018 		if (peer_up(peer, &sup) == -1) {
1019 			peer->state = PEER_DOWN;
1020 			imsg_compose(ibuf_se, IMSG_SESSION_DOWN, peer->conf.id,
1021 			    0, -1, NULL, 0);
1022 		}
1023 		break;
1024 	case IMSG_SESSION_DOWN:
1025 		peer_down(peer, NULL);
1026 		break;
1027 	case IMSG_SESSION_STALE:
1028 	case IMSG_SESSION_FLUSH:
1029 	case IMSG_SESSION_RESTARTED:
1030 	case IMSG_REFRESH:
1031 		if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
1032 			log_warnx("%s: wrong imsg len", __func__);
1033 			break;
1034 		}
1035 		memcpy(&aid, imsg.data, sizeof(aid));
1036 		if (aid >= AID_MAX) {
1037 			log_warnx("%s: bad AID", __func__);
1038 			break;
1039 		}
1040 
1041 		switch (imsg.hdr.type) {
1042 		case IMSG_SESSION_STALE:
1043 			peer_stale(peer, aid);
1044 			break;
1045 		case IMSG_SESSION_FLUSH:
1046 			peer_flush(peer, aid, peer->staletime[aid]);
1047 			break;
1048 		case IMSG_SESSION_RESTARTED:
1049 			if (peer->staletime[aid])
1050 				peer_flush(peer, aid, peer->staletime[aid]);
1051 			break;
1052 		case IMSG_REFRESH:
1053 			peer_dump(peer, aid);
1054 			break;
1055 		}
1056 		break;
1057 	default:
1058 		log_warnx("%s: unhandled imsg type %d", __func__,
1059 		    imsg.hdr.type);
1060 		break;
1061 	}
1062 
1063 	imsg_free(&imsg);
1064 }
1065 
1066 /* handle routing updates from the session engine. */
1067 void
1068 rde_update_dispatch(struct rde_peer *peer, struct imsg *imsg)
1069 {
1070 	struct filterstate	 state;
1071 	struct bgpd_addr	 prefix;
1072 	struct mpattr		 mpa;
1073 	u_char			*p, *mpp = NULL;
1074 	int			 pos = 0;
1075 	u_int16_t		 afi, len, mplen;
1076 	u_int16_t		 withdrawn_len;
1077 	u_int16_t		 attrpath_len;
1078 	u_int16_t		 nlri_len;
1079 	u_int8_t		 aid, prefixlen, safi, subtype;
1080 	u_int32_t		 fas;
1081 
1082 	p = imsg->data;
1083 
1084 	if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) {
1085 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
1086 		return;
1087 	}
1088 
1089 	memcpy(&len, p, 2);
1090 	withdrawn_len = ntohs(len);
1091 	p += 2;
1092 	if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) {
1093 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
1094 		return;
1095 	}
1096 
1097 	p += withdrawn_len;
1098 	memcpy(&len, p, 2);
1099 	attrpath_len = len = ntohs(len);
1100 	p += 2;
1101 	if (imsg->hdr.len <
1102 	    IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) {
1103 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
1104 		return;
1105 	}
1106 
1107 	nlri_len =
1108 	    imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len;
1109 
1110 	if (attrpath_len == 0) {
1111 		/* 0 = no NLRI information in this message */
1112 		if (nlri_len != 0) {
1113 			/* crap at end of update which should not be there */
1114 			rde_update_err(peer, ERR_UPDATE,
1115 			    ERR_UPD_ATTRLIST, NULL, 0);
1116 			return;
1117 		}
1118 		if (withdrawn_len == 0) {
1119 			/* EoR marker */
1120 			rde_peer_recv_eor(peer, AID_INET);
1121 			return;
1122 		}
1123 	}
1124 
1125 	bzero(&mpa, sizeof(mpa));
1126 	rde_filterstate_prep(&state, NULL, NULL, NULL, 0);
1127 	if (attrpath_len != 0) { /* 0 = no NLRI information in this message */
1128 		/* parse path attributes */
1129 		while (len > 0) {
1130 			if ((pos = rde_attr_parse(p, len, peer, &state,
1131 			    &mpa)) < 0)
1132 				goto done;
1133 			p += pos;
1134 			len -= pos;
1135 		}
1136 
1137 		/* check for missing but necessary attributes */
1138 		if ((subtype = rde_attr_missing(&state.aspath, peer->conf.ebgp,
1139 		    nlri_len))) {
1140 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR,
1141 			    &subtype, sizeof(u_int8_t));
1142 			goto done;
1143 		}
1144 
1145 		rde_as4byte_fixup(peer, &state.aspath);
1146 
1147 		/* enforce remote AS if requested */
1148 		if (state.aspath.flags & F_ATTR_ASPATH &&
1149 		    peer->conf.enforce_as == ENFORCE_AS_ON) {
1150 			fas = aspath_neighbor(state.aspath.aspath);
1151 			if (peer->conf.remote_as != fas) {
1152 			    log_peer_warnx(&peer->conf, "bad path, "
1153 				"starting with %s, "
1154 				"enforce neighbor-as enabled", log_as(fas));
1155 			    rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1156 				    NULL, 0);
1157 			    goto done;
1158 			}
1159 		}
1160 
1161 		/* aspath needs to be loop free. This is not a hard error. */
1162 		if (state.aspath.flags & F_ATTR_ASPATH &&
1163 		    peer->conf.ebgp &&
1164 		    peer->conf.enforce_local_as == ENFORCE_AS_ON &&
1165 		    !aspath_loopfree(state.aspath.aspath, peer->conf.local_as))
1166 			state.aspath.flags |= F_ATTR_LOOP;
1167 
1168 		rde_reflector(peer, &state.aspath);
1169 	}
1170 
1171 	p = imsg->data;
1172 	len = withdrawn_len;
1173 	p += 2;
1174 
1175 	/* withdraw prefix */
1176 	while (len > 0) {
1177 		if ((pos = nlri_get_prefix(p, len, &prefix,
1178 		    &prefixlen)) == -1) {
1179 			/*
1180 			 * the RFC does not mention what we should do in
1181 			 * this case. Let's do the same as in the NLRI case.
1182 			 */
1183 			log_peer_warnx(&peer->conf, "bad withdraw prefix");
1184 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1185 			    NULL, 0);
1186 			goto done;
1187 		}
1188 		p += pos;
1189 		len -= pos;
1190 
1191 		if (peer->capa.mp[AID_INET] == 0) {
1192 			log_peer_warnx(&peer->conf,
1193 			    "bad withdraw, %s disabled", aid2str(AID_INET));
1194 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1195 			    NULL, 0);
1196 			goto done;
1197 		}
1198 
1199 		rde_update_withdraw(peer, &prefix, prefixlen);
1200 	}
1201 
1202 	/* withdraw MP_UNREACH_NLRI if available */
1203 	if (mpa.unreach_len != 0) {
1204 		mpp = mpa.unreach;
1205 		mplen = mpa.unreach_len;
1206 		memcpy(&afi, mpp, 2);
1207 		mpp += 2;
1208 		mplen -= 2;
1209 		afi = ntohs(afi);
1210 		safi = *mpp++;
1211 		mplen--;
1212 
1213 		if (afi2aid(afi, safi, &aid) == -1) {
1214 			log_peer_warnx(&peer->conf,
1215 			    "bad AFI/SAFI pair in withdraw");
1216 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1217 			    NULL, 0);
1218 			goto done;
1219 		}
1220 
1221 		if (peer->capa.mp[aid] == 0) {
1222 			log_peer_warnx(&peer->conf,
1223 			    "bad withdraw, %s disabled", aid2str(aid));
1224 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1225 			    NULL, 0);
1226 			goto done;
1227 		}
1228 
1229 		if ((state.aspath.flags & ~F_ATTR_MP_UNREACH) == 0 &&
1230 		    mplen == 0) {
1231 			/* EoR marker */
1232 			rde_peer_recv_eor(peer, aid);
1233 		}
1234 
1235 		switch (aid) {
1236 		case AID_INET6:
1237 			while (mplen > 0) {
1238 				if ((pos = nlri_get_prefix6(mpp, mplen,
1239 				    &prefix, &prefixlen)) == -1) {
1240 					log_peer_warnx(&peer->conf,
1241 					    "bad IPv6 withdraw prefix");
1242 					rde_update_err(peer, ERR_UPDATE,
1243 					    ERR_UPD_OPTATTR,
1244 					    mpa.unreach, mpa.unreach_len);
1245 					goto done;
1246 				}
1247 				mpp += pos;
1248 				mplen -= pos;
1249 
1250 				rde_update_withdraw(peer, &prefix, prefixlen);
1251 			}
1252 			break;
1253 		case AID_VPN_IPv4:
1254 			while (mplen > 0) {
1255 				if ((pos = nlri_get_vpn4(mpp, mplen,
1256 				    &prefix, &prefixlen, 1)) == -1) {
1257 					log_peer_warnx(&peer->conf,
1258 					    "bad VPNv4 withdraw prefix");
1259 					rde_update_err(peer, ERR_UPDATE,
1260 					    ERR_UPD_OPTATTR,
1261 					    mpa.unreach, mpa.unreach_len);
1262 					goto done;
1263 				}
1264 				mpp += pos;
1265 				mplen -= pos;
1266 
1267 				rde_update_withdraw(peer, &prefix, prefixlen);
1268 			}
1269 			break;
1270 		case AID_VPN_IPv6:
1271 			while (mplen > 0) {
1272 				if ((pos = nlri_get_vpn6(mpp, mplen,
1273 				    &prefix, &prefixlen, 1)) == -1) {
1274 					log_peer_warnx(&peer->conf,
1275 					    "bad VPNv6 withdraw prefix");
1276 					rde_update_err(peer, ERR_UPDATE,
1277 					    ERR_UPD_OPTATTR, mpa.unreach,
1278 					    mpa.unreach_len);
1279 					goto done;
1280 				}
1281 				mpp += pos;
1282 				mplen -= pos;
1283 
1284 				rde_update_withdraw(peer, &prefix, prefixlen);
1285 			}
1286 			break;
1287 		default:
1288 			/* silently ignore unsupported multiprotocol AF */
1289 			break;
1290 		}
1291 
1292 		if ((state.aspath.flags & ~F_ATTR_MP_UNREACH) == 0)
1293 			goto done;
1294 	}
1295 
1296 	/* shift to NLRI information */
1297 	p += 2 + attrpath_len;
1298 
1299 	/* parse nlri prefix */
1300 	while (nlri_len > 0) {
1301 		if ((pos = nlri_get_prefix(p, nlri_len, &prefix,
1302 		    &prefixlen)) == -1) {
1303 			log_peer_warnx(&peer->conf, "bad nlri prefix");
1304 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1305 			    NULL, 0);
1306 			goto done;
1307 		}
1308 		p += pos;
1309 		nlri_len -= pos;
1310 
1311 		if (peer->capa.mp[AID_INET] == 0) {
1312 			log_peer_warnx(&peer->conf,
1313 			    "bad update, %s disabled", aid2str(AID_INET));
1314 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1315 			    NULL, 0);
1316 			goto done;
1317 		}
1318 
1319 		if (rde_update_update(peer, &state, &prefix, prefixlen) == -1)
1320 			goto done;
1321 
1322 	}
1323 
1324 	/* add MP_REACH_NLRI if available */
1325 	if (mpa.reach_len != 0) {
1326 		mpp = mpa.reach;
1327 		mplen = mpa.reach_len;
1328 		memcpy(&afi, mpp, 2);
1329 		mpp += 2;
1330 		mplen -= 2;
1331 		afi = ntohs(afi);
1332 		safi = *mpp++;
1333 		mplen--;
1334 
1335 		if (afi2aid(afi, safi, &aid) == -1) {
1336 			log_peer_warnx(&peer->conf,
1337 			    "bad AFI/SAFI pair in update");
1338 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1339 			    NULL, 0);
1340 			goto done;
1341 		}
1342 
1343 		if (peer->capa.mp[aid] == 0) {
1344 			log_peer_warnx(&peer->conf,
1345 			    "bad update, %s disabled", aid2str(aid));
1346 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1347 			    NULL, 0);
1348 			goto done;
1349 		}
1350 
1351 		/* unlock the previously locked nexthop, it is no longer used */
1352 		nexthop_unref(state.nexthop);
1353 		state.nexthop = NULL;
1354 		if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, &state)) == -1) {
1355 			log_peer_warnx(&peer->conf, "bad nlri nexthop");
1356 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1357 			    mpa.reach, mpa.reach_len);
1358 			goto done;
1359 		}
1360 		mpp += pos;
1361 		mplen -= pos;
1362 
1363 		switch (aid) {
1364 		case AID_INET6:
1365 			while (mplen > 0) {
1366 				if ((pos = nlri_get_prefix6(mpp, mplen,
1367 				    &prefix, &prefixlen)) == -1) {
1368 					log_peer_warnx(&peer->conf,
1369 					    "bad IPv6 nlri prefix");
1370 					rde_update_err(peer, ERR_UPDATE,
1371 					    ERR_UPD_OPTATTR,
1372 					    mpa.reach, mpa.reach_len);
1373 					goto done;
1374 				}
1375 				mpp += pos;
1376 				mplen -= pos;
1377 
1378 				if (rde_update_update(peer, &state, &prefix,
1379 				    prefixlen) == -1)
1380 					goto done;
1381 			}
1382 			break;
1383 		case AID_VPN_IPv4:
1384 			while (mplen > 0) {
1385 				if ((pos = nlri_get_vpn4(mpp, mplen,
1386 				    &prefix, &prefixlen, 0)) == -1) {
1387 					log_peer_warnx(&peer->conf,
1388 					    "bad VPNv4 nlri prefix");
1389 					rde_update_err(peer, ERR_UPDATE,
1390 					    ERR_UPD_OPTATTR,
1391 					    mpa.reach, mpa.reach_len);
1392 					goto done;
1393 				}
1394 				mpp += pos;
1395 				mplen -= pos;
1396 
1397 				if (rde_update_update(peer, &state, &prefix,
1398 				    prefixlen) == -1)
1399 					goto done;
1400 			}
1401 			break;
1402 		case AID_VPN_IPv6:
1403 			while (mplen > 0) {
1404 				if ((pos = nlri_get_vpn6(mpp, mplen,
1405 				    &prefix, &prefixlen, 0)) == -1) {
1406 					log_peer_warnx(&peer->conf,
1407 					    "bad VPNv6 nlri prefix");
1408 					rde_update_err(peer, ERR_UPDATE,
1409 					    ERR_UPD_OPTATTR,
1410 					    mpa.reach, mpa.reach_len);
1411 					goto done;
1412 				}
1413 				mpp += pos;
1414 				mplen -= pos;
1415 
1416 				if (rde_update_update(peer, &state, &prefix,
1417 				    prefixlen) == -1)
1418 					goto done;
1419 			}
1420 			break;
1421 		default:
1422 			/* silently ignore unsupported multiprotocol AF */
1423 			break;
1424 		}
1425 	}
1426 
1427 done:
1428 	rde_filterstate_clean(&state);
1429 }
1430 
1431 int
1432 rde_update_update(struct rde_peer *peer, struct filterstate *in,
1433     struct bgpd_addr *prefix, u_int8_t prefixlen)
1434 {
1435 	struct filterstate	 state;
1436 	enum filter_actions	 action;
1437 	u_int8_t		 vstate;
1438 	u_int16_t		 i;
1439 	const char		*wmsg = "filtered, withdraw";
1440 
1441 	peer->prefix_rcvd_update++;
1442 	vstate = rde_roa_validity(&conf->rde_roa, prefix, prefixlen,
1443 	    aspath_origin(in->aspath.aspath));
1444 
1445 	/* add original path to the Adj-RIB-In */
1446 	if (prefix_update(rib_byid(RIB_ADJ_IN), peer, in, prefix, prefixlen,
1447 	    vstate) == 1)
1448 		peer->prefix_cnt++;
1449 
1450 	/* max prefix checker */
1451 	if (peer->conf.max_prefix && peer->prefix_cnt > peer->conf.max_prefix) {
1452 		log_peer_warnx(&peer->conf, "prefix limit reached (>%u/%u)",
1453 		    peer->prefix_cnt, peer->conf.max_prefix);
1454 		rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX, NULL, 0);
1455 		return (-1);
1456 	}
1457 
1458 	if (in->aspath.flags & F_ATTR_PARSE_ERR)
1459 		wmsg = "path invalid, withdraw";
1460 
1461 	for (i = RIB_LOC_START; i < rib_size; i++) {
1462 		struct rib *rib = rib_byid(i);
1463 		if (rib == NULL)
1464 			continue;
1465 		rde_filterstate_prep(&state, &in->aspath, &in->communities,
1466 		    in->nexthop, in->nhflags);
1467 		/* input filter */
1468 		action = rde_filter(rib->in_rules, peer, peer, prefix,
1469 		    prefixlen, vstate, &state);
1470 
1471 		if (action == ACTION_ALLOW) {
1472 			rde_update_log("update", i, peer,
1473 			    &state.nexthop->exit_nexthop, prefix,
1474 			    prefixlen);
1475 			prefix_update(rib, peer, &state, prefix,
1476 			    prefixlen, vstate);
1477 		} else if (prefix_withdraw(rib, peer, prefix,
1478 		    prefixlen)) {
1479 			rde_update_log(wmsg, i, peer,
1480 			    NULL, prefix, prefixlen);
1481 		}
1482 
1483 		/* clear state */
1484 		rde_filterstate_clean(&state);
1485 	}
1486 	return (0);
1487 }
1488 
1489 void
1490 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
1491     u_int8_t prefixlen)
1492 {
1493 	u_int16_t i;
1494 
1495 	for (i = RIB_LOC_START; i < rib_size; i++) {
1496 		struct rib *rib = rib_byid(i);
1497 		if (rib == NULL)
1498 			continue;
1499 		if (prefix_withdraw(rib, peer, prefix, prefixlen))
1500 			rde_update_log("withdraw", i, peer, NULL, prefix,
1501 			    prefixlen);
1502 	}
1503 
1504 	/* remove original path form the Adj-RIB-In */
1505 	if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peer, prefix, prefixlen))
1506 		peer->prefix_cnt--;
1507 
1508 	peer->prefix_rcvd_withdraw++;
1509 }
1510 
1511 /*
1512  * BGP UPDATE parser functions
1513  */
1514 
1515 /* attribute parser specific makros */
1516 #define UPD_READ(t, p, plen, n) \
1517 	do { \
1518 		memcpy(t, p, n); \
1519 		p += n; \
1520 		plen += n; \
1521 	} while (0)
1522 
1523 #define CHECK_FLAGS(s, t, m)	\
1524 	(((s) & ~(ATTR_DEFMASK | (m))) == (t))
1525 
1526 int
1527 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer,
1528     struct filterstate *state, struct mpattr *mpa)
1529 {
1530 	struct bgpd_addr nexthop;
1531 	struct rde_aspath *a = &state->aspath;
1532 	u_char		*op = p, *npath;
1533 	u_int32_t	 tmp32, zero = 0;
1534 	int		 error;
1535 	u_int16_t	 attr_len, nlen;
1536 	u_int16_t	 plen = 0;
1537 	u_int8_t	 flags;
1538 	u_int8_t	 type;
1539 	u_int8_t	 tmp8;
1540 
1541 	if (len < 3) {
1542 bad_len:
1543 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len);
1544 		return (-1);
1545 	}
1546 
1547 	UPD_READ(&flags, p, plen, 1);
1548 	UPD_READ(&type, p, plen, 1);
1549 
1550 	if (flags & ATTR_EXTLEN) {
1551 		if (len - plen < 2)
1552 			goto bad_len;
1553 		UPD_READ(&attr_len, p, plen, 2);
1554 		attr_len = ntohs(attr_len);
1555 	} else {
1556 		UPD_READ(&tmp8, p, plen, 1);
1557 		attr_len = tmp8;
1558 	}
1559 
1560 	if (len - plen < attr_len)
1561 		goto bad_len;
1562 
1563 	/* adjust len to the actual attribute size including header */
1564 	len = plen + attr_len;
1565 
1566 	switch (type) {
1567 	case ATTR_UNDEF:
1568 		/* ignore and drop path attributes with a type code of 0 */
1569 		plen += attr_len;
1570 		break;
1571 	case ATTR_ORIGIN:
1572 		if (attr_len != 1)
1573 			goto bad_len;
1574 
1575 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) {
1576 bad_flags:
1577 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS,
1578 			    op, len);
1579 			return (-1);
1580 		}
1581 
1582 		UPD_READ(&a->origin, p, plen, 1);
1583 		if (a->origin > ORIGIN_INCOMPLETE) {
1584 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN,
1585 			    op, len);
1586 			return (-1);
1587 		}
1588 		if (a->flags & F_ATTR_ORIGIN)
1589 			goto bad_list;
1590 		a->flags |= F_ATTR_ORIGIN;
1591 		break;
1592 	case ATTR_ASPATH:
1593 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1594 			goto bad_flags;
1595 		error = aspath_verify(p, attr_len, rde_as4byte(peer),
1596 		    rde_no_as_set(peer));
1597 		if (error == AS_ERR_SOFT) {
1598 			/*
1599 			 * soft errors like unexpected segment types are
1600 			 * not considered fatal and the path is just
1601 			 * marked invalid.
1602 			 */
1603 			a->flags |= F_ATTR_PARSE_ERR;
1604 		} else if (error != 0) {
1605 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1606 			    NULL, 0);
1607 			return (-1);
1608 		}
1609 		if (a->flags & F_ATTR_ASPATH)
1610 			goto bad_list;
1611 		if (rde_as4byte(peer)) {
1612 			npath = p;
1613 			nlen = attr_len;
1614 		} else {
1615 			npath = aspath_inflate(p, attr_len, &nlen);
1616 			if (npath == NULL)
1617 				fatal("aspath_inflate");
1618 		}
1619 		if (error == AS_ERR_SOFT) {
1620 			char *str;
1621 
1622 			aspath_asprint(&str, npath, nlen);
1623 			log_peer_warnx(&peer->conf, "bad ASPATH %s, "
1624 			    "path invalidated and prefix withdrawn",
1625 			    str ? str : "(bad aspath)");
1626 			free(str);
1627 		}
1628 		a->flags |= F_ATTR_ASPATH;
1629 		a->aspath = aspath_get(npath, nlen);
1630 		if (npath != p)
1631 			free(npath);
1632 		plen += attr_len;
1633 		break;
1634 	case ATTR_NEXTHOP:
1635 		if (attr_len != 4)
1636 			goto bad_len;
1637 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1638 			goto bad_flags;
1639 		if (a->flags & F_ATTR_NEXTHOP)
1640 			goto bad_list;
1641 		a->flags |= F_ATTR_NEXTHOP;
1642 
1643 		bzero(&nexthop, sizeof(nexthop));
1644 		nexthop.aid = AID_INET;
1645 		UPD_READ(&nexthop.v4.s_addr, p, plen, 4);
1646 		/*
1647 		 * Check if the nexthop is a valid IP address. We consider
1648 		 * multicast and experimental addresses as invalid.
1649 		 */
1650 		tmp32 = ntohl(nexthop.v4.s_addr);
1651 		if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) {
1652 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1653 			    op, len);
1654 			return (-1);
1655 		}
1656 		nexthop_unref(state->nexthop);	/* just to be sure */
1657 		state->nexthop = nexthop_get(&nexthop);
1658 		break;
1659 	case ATTR_MED:
1660 		if (attr_len != 4)
1661 			goto bad_len;
1662 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1663 			goto bad_flags;
1664 		if (a->flags & F_ATTR_MED)
1665 			goto bad_list;
1666 		a->flags |= F_ATTR_MED;
1667 
1668 		UPD_READ(&tmp32, p, plen, 4);
1669 		a->med = ntohl(tmp32);
1670 		break;
1671 	case ATTR_LOCALPREF:
1672 		if (attr_len != 4)
1673 			goto bad_len;
1674 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1675 			goto bad_flags;
1676 		if (peer->conf.ebgp) {
1677 			/* ignore local-pref attr on non ibgp peers */
1678 			plen += 4;
1679 			break;
1680 		}
1681 		if (a->flags & F_ATTR_LOCALPREF)
1682 			goto bad_list;
1683 		a->flags |= F_ATTR_LOCALPREF;
1684 
1685 		UPD_READ(&tmp32, p, plen, 4);
1686 		a->lpref = ntohl(tmp32);
1687 		break;
1688 	case ATTR_ATOMIC_AGGREGATE:
1689 		if (attr_len != 0)
1690 			goto bad_len;
1691 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1692 			goto bad_flags;
1693 		goto optattr;
1694 	case ATTR_AGGREGATOR:
1695 		if ((!rde_as4byte(peer) && attr_len != 6) ||
1696 		    (rde_as4byte(peer) && attr_len != 8)) {
1697 			/*
1698 			 * ignore attribute in case of error as per
1699 			 * RFC 7606
1700 			 */
1701 			log_peer_warnx(&peer->conf, "bad AGGREGATOR, "
1702 			    "partial attribute ignored");
1703 			plen += attr_len;
1704 			break;
1705 		}
1706 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1707 		    ATTR_PARTIAL))
1708 			goto bad_flags;
1709 		if (!rde_as4byte(peer)) {
1710 			/* need to inflate aggregator AS to 4-byte */
1711 			u_char	t[8];
1712 			t[0] = t[1] = 0;
1713 			UPD_READ(&t[2], p, plen, 2);
1714 			UPD_READ(&t[4], p, plen, 4);
1715 			if (memcmp(t, &zero, sizeof(u_int32_t)) == 0) {
1716 				/* As per RFC7606 use "attribute discard". */
1717 				log_peer_warnx(&peer->conf, "bad AGGREGATOR, "
1718 				    "AS 0 not allowed, attribute discarded");
1719 				break;
1720 			}
1721 			if (attr_optadd(a, flags, type, t,
1722 			    sizeof(t)) == -1)
1723 				goto bad_list;
1724 			break;
1725 		}
1726 		/* 4-byte ready server take the default route */
1727 		if (memcmp(p, &zero, sizeof(u_int32_t)) == 0) {
1728 			/* As per RFC7606 use "attribute discard" here. */
1729 			char *pfmt = log_fmt_peer(&peer->conf);
1730 			log_debug("%s: bad AGGREGATOR, "
1731 			    "AS 0 not allowed, attribute discarded", pfmt);
1732 			free(pfmt);
1733 			plen += attr_len;
1734 			break;
1735 		}
1736 		goto optattr;
1737 	case ATTR_COMMUNITIES:
1738 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1739 		    ATTR_PARTIAL))
1740 			goto bad_flags;
1741 		if (community_add(&state->communities, flags, p,
1742 		    attr_len) == -1) {
1743 			/*
1744 			 * mark update as bad and withdraw all routes as per
1745 			 * RFC 7606
1746 			 */
1747 			a->flags |= F_ATTR_PARSE_ERR;
1748 			log_peer_warnx(&peer->conf, "bad COMMUNITIES, "
1749 			    "path invalidated and prefix withdrawn");
1750 			break;
1751 		}
1752 		plen += attr_len;
1753 		break;
1754 	case ATTR_LARGE_COMMUNITIES:
1755 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1756 		    ATTR_PARTIAL))
1757 			goto bad_flags;
1758 		if (community_large_add(&state->communities, flags, p,
1759 		    attr_len) == -1) {
1760 			/*
1761 			 * mark update as bad and withdraw all routes as per
1762 			 * RFC 7606
1763 			 */
1764 			a->flags |= F_ATTR_PARSE_ERR;
1765 			log_peer_warnx(&peer->conf, "bad LARGE COMMUNITIES, "
1766 			    "path invalidated and prefix withdrawn");
1767 			break;
1768 		}
1769 		plen += attr_len;
1770 		break;
1771 	case ATTR_EXT_COMMUNITIES:
1772 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1773 		    ATTR_PARTIAL))
1774 			goto bad_flags;
1775 		if (community_ext_add(&state->communities, flags, p,
1776 		    attr_len) == -1) {
1777 			/*
1778 			 * mark update as bad and withdraw all routes as per
1779 			 * RFC 7606
1780 			 */
1781 			a->flags |= F_ATTR_PARSE_ERR;
1782 			log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, "
1783 			    "path invalidated and prefix withdrawn");
1784 			break;
1785 		}
1786 		plen += attr_len;
1787 		break;
1788 	case ATTR_ORIGINATOR_ID:
1789 		if (attr_len != 4)
1790 			goto bad_len;
1791 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1792 			goto bad_flags;
1793 		goto optattr;
1794 	case ATTR_CLUSTER_LIST:
1795 		if (attr_len % 4 != 0)
1796 			goto bad_len;
1797 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1798 			goto bad_flags;
1799 		goto optattr;
1800 	case ATTR_MP_REACH_NLRI:
1801 		if (attr_len < 4)
1802 			goto bad_len;
1803 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1804 			goto bad_flags;
1805 		/* the validity is checked in rde_update_dispatch() */
1806 		if (a->flags & F_ATTR_MP_REACH)
1807 			goto bad_list;
1808 		a->flags |= F_ATTR_MP_REACH;
1809 
1810 		mpa->reach = p;
1811 		mpa->reach_len = attr_len;
1812 		plen += attr_len;
1813 		break;
1814 	case ATTR_MP_UNREACH_NLRI:
1815 		if (attr_len < 3)
1816 			goto bad_len;
1817 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1818 			goto bad_flags;
1819 		/* the validity is checked in rde_update_dispatch() */
1820 		if (a->flags & F_ATTR_MP_UNREACH)
1821 			goto bad_list;
1822 		a->flags |= F_ATTR_MP_UNREACH;
1823 
1824 		mpa->unreach = p;
1825 		mpa->unreach_len = attr_len;
1826 		plen += attr_len;
1827 		break;
1828 	case ATTR_AS4_AGGREGATOR:
1829 		if (attr_len != 8) {
1830 			/* see ATTR_AGGREGATOR ... */
1831 			if ((flags & ATTR_PARTIAL) == 0)
1832 				goto bad_len;
1833 			log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, "
1834 			    "partial attribute ignored");
1835 			plen += attr_len;
1836 			break;
1837 		}
1838 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1839 		    ATTR_PARTIAL))
1840 			goto bad_flags;
1841 		if (memcmp(p, &zero, sizeof(u_int32_t)) == 0) {
1842 			/* As per RFC6793 use "attribute discard" here. */
1843 			log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, "
1844 			    "AS 0 not allowed, attribute discarded");
1845 			plen += attr_len;
1846 			break;
1847 		}
1848 		a->flags |= F_ATTR_AS4BYTE_NEW;
1849 		goto optattr;
1850 	case ATTR_AS4_PATH:
1851 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1852 		    ATTR_PARTIAL))
1853 			goto bad_flags;
1854 		if ((error = aspath_verify(p, attr_len, 1,
1855 		    rde_no_as_set(peer))) != 0) {
1856 			/*
1857 			 * XXX RFC does not specify how to handle errors.
1858 			 * XXX Instead of dropping the session because of a
1859 			 * XXX bad path just mark the full update as having
1860 			 * XXX a parse error which makes the update no longer
1861 			 * XXX eligible and will not be considered for routing
1862 			 * XXX or redistribution.
1863 			 * XXX We follow draft-ietf-idr-optional-transitive
1864 			 * XXX by looking at the partial bit.
1865 			 * XXX Consider soft errors similar to a partial attr.
1866 			 */
1867 			if (flags & ATTR_PARTIAL || error == AS_ERR_SOFT) {
1868 				a->flags |= F_ATTR_PARSE_ERR;
1869 				log_peer_warnx(&peer->conf, "bad AS4_PATH, "
1870 				    "path invalidated and prefix withdrawn");
1871 				goto optattr;
1872 			} else {
1873 				rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1874 				    NULL, 0);
1875 				return (-1);
1876 			}
1877 		}
1878 		a->flags |= F_ATTR_AS4BYTE_NEW;
1879 		goto optattr;
1880 	default:
1881 		if ((flags & ATTR_OPTIONAL) == 0) {
1882 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR,
1883 			    op, len);
1884 			return (-1);
1885 		}
1886 optattr:
1887 		if (attr_optadd(a, flags, type, p, attr_len) == -1) {
1888 bad_list:
1889 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST,
1890 			    NULL, 0);
1891 			return (-1);
1892 		}
1893 
1894 		plen += attr_len;
1895 		break;
1896 	}
1897 
1898 	return (plen);
1899 }
1900 
1901 int
1902 rde_attr_add(struct filterstate *state, u_char *p, u_int16_t len)
1903 {
1904 	u_int16_t	 attr_len;
1905 	u_int16_t	 plen = 0;
1906 	u_int8_t	 flags;
1907 	u_int8_t	 type;
1908 	u_int8_t	 tmp8;
1909 
1910 	if (len < 3)
1911 		return (-1);
1912 
1913 	UPD_READ(&flags, p, plen, 1);
1914 	UPD_READ(&type, p, plen, 1);
1915 
1916 	if (flags & ATTR_EXTLEN) {
1917 		if (len - plen < 2)
1918 			return (-1);
1919 		UPD_READ(&attr_len, p, plen, 2);
1920 		attr_len = ntohs(attr_len);
1921 	} else {
1922 		UPD_READ(&tmp8, p, plen, 1);
1923 		attr_len = tmp8;
1924 	}
1925 
1926 	if (len - plen < attr_len)
1927 		return (-1);
1928 
1929 	switch (type) {
1930 	case ATTR_COMMUNITIES:
1931 		return community_add(&state->communities, flags, p, attr_len);
1932 	case ATTR_LARGE_COMMUNITIES:
1933 		return community_large_add(&state->communities, flags, p,
1934 		    attr_len);
1935 	case ATTR_EXT_COMMUNITIES:
1936 		return community_ext_add(&state->communities, flags, p,
1937 		    attr_len);
1938 	}
1939 
1940 	if (attr_optadd(&state->aspath, flags, type, p, attr_len) == -1)
1941 		return (-1);
1942 	return (0);
1943 }
1944 
1945 #undef UPD_READ
1946 #undef CHECK_FLAGS
1947 
1948 u_int8_t
1949 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen)
1950 {
1951 	/* ATTR_MP_UNREACH_NLRI may be sent alone */
1952 	if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH &&
1953 	    (a->flags & F_ATTR_MP_REACH) == 0)
1954 		return (0);
1955 
1956 	if ((a->flags & F_ATTR_ORIGIN) == 0)
1957 		return (ATTR_ORIGIN);
1958 	if ((a->flags & F_ATTR_ASPATH) == 0)
1959 		return (ATTR_ASPATH);
1960 	if ((a->flags & F_ATTR_MP_REACH) == 0 &&
1961 	    (a->flags & F_ATTR_NEXTHOP) == 0)
1962 		return (ATTR_NEXTHOP);
1963 	if (!ebgp)
1964 		if ((a->flags & F_ATTR_LOCALPREF) == 0)
1965 			return (ATTR_LOCALPREF);
1966 	return (0);
1967 }
1968 
1969 int
1970 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid,
1971     struct filterstate *state)
1972 {
1973 	struct bgpd_addr	nexthop;
1974 	u_int8_t		totlen, nhlen;
1975 
1976 	if (len == 0)
1977 		return (-1);
1978 
1979 	nhlen = *data++;
1980 	totlen = 1;
1981 	len--;
1982 
1983 	if (nhlen > len)
1984 		return (-1);
1985 
1986 	bzero(&nexthop, sizeof(nexthop));
1987 	nexthop.aid = aid;
1988 	switch (aid) {
1989 	case AID_INET6:
1990 		/*
1991 		 * RFC2545 describes that there may be a link-local
1992 		 * address carried in nexthop. Yikes!
1993 		 * This is not only silly, it is wrong and we just ignore
1994 		 * this link-local nexthop. The bgpd session doesn't run
1995 		 * over the link-local address so why should all other
1996 		 * traffic.
1997 		 */
1998 		if (nhlen != 16 && nhlen != 32) {
1999 			log_warnx("bad multiprotocol nexthop, bad size");
2000 			return (-1);
2001 		}
2002 		memcpy(&nexthop.v6.s6_addr, data, 16);
2003 		break;
2004 	case AID_VPN_IPv6:
2005 		if (nhlen != 24) {
2006 			log_warnx("bad multiprotocol nexthop, bad size %d",
2007 			    nhlen);
2008 			return (-1);
2009 		}
2010 		memcpy(&nexthop.v6, data + sizeof(u_int64_t),
2011 		    sizeof(nexthop.v6));
2012 		nexthop.aid = AID_INET6;
2013 		break;
2014 	case AID_VPN_IPv4:
2015 		/*
2016 		 * Neither RFC4364 nor RFC3107 specify the format of the
2017 		 * nexthop in an explicit way. The quality of RFC went down
2018 		 * the toilet the larger the number got.
2019 		 * RFC4364 is very confusing about VPN-IPv4 address and the
2020 		 * VPN-IPv4 prefix that carries also a MPLS label.
2021 		 * So the nexthop is a 12-byte address with a 64bit RD and
2022 		 * an IPv4 address following. In the nexthop case the RD can
2023 		 * be ignored.
2024 		 * Since the nexthop has to be in the main IPv4 table just
2025 		 * create an AID_INET nexthop. So we don't need to handle
2026 		 * AID_VPN_IPv4 in nexthop and kroute.
2027 		 */
2028 		if (nhlen != 12) {
2029 			log_warnx("bad multiprotocol nexthop, bad size");
2030 			return (-1);
2031 		}
2032 		nexthop.aid = AID_INET;
2033 		memcpy(&nexthop.v4, data + sizeof(u_int64_t),
2034 		    sizeof(nexthop.v4));
2035 		break;
2036 	default:
2037 		log_warnx("bad multiprotocol nexthop, bad AID");
2038 		return (-1);
2039 	}
2040 
2041 	nexthop_unref(state->nexthop);	/* just to be sure */
2042 	state->nexthop = nexthop_get(&nexthop);
2043 
2044 	/* ignore reserved (old SNPA) field as per RFC4760 */
2045 	totlen += nhlen + 1;
2046 	data += nhlen + 1;
2047 
2048 	return (totlen);
2049 }
2050 
2051 void
2052 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr,
2053     void *data, u_int16_t size)
2054 {
2055 	struct ibuf	*wbuf;
2056 
2057 	if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0,
2058 	    size + sizeof(error) + sizeof(suberr))) == NULL)
2059 		fatal("%s %d imsg_create error", __func__, __LINE__);
2060 	if (imsg_add(wbuf, &error, sizeof(error)) == -1 ||
2061 	    imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 ||
2062 	    imsg_add(wbuf, data, size) == -1)
2063 		fatal("%s %d imsg_add error", __func__, __LINE__);
2064 	imsg_close(ibuf_se, wbuf);
2065 	peer->state = PEER_ERR;
2066 }
2067 
2068 void
2069 rde_update_log(const char *message, u_int16_t rid,
2070     const struct rde_peer *peer, const struct bgpd_addr *next,
2071     const struct bgpd_addr *prefix, u_int8_t prefixlen)
2072 {
2073 	char		*l = NULL;
2074 	char		*n = NULL;
2075 	char		*p = NULL;
2076 
2077 	if (!((conf->log & BGPD_LOG_UPDATES) ||
2078 	    (peer->conf.flags & PEERFLAG_LOG_UPDATES)))
2079 		return;
2080 
2081 	if (next != NULL)
2082 		if (asprintf(&n, " via %s", log_addr(next)) == -1)
2083 			n = NULL;
2084 	if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1)
2085 		p = NULL;
2086 	l = log_fmt_peer(&peer->conf);
2087 	log_info("Rib %s: %s AS%s: %s %s%s", rib_byid(rid)->name,
2088 	    l, log_as(peer->conf.remote_as), message,
2089 	    p ? p : "out of memory", n ? n : "");
2090 
2091 	free(l);
2092 	free(n);
2093 	free(p);
2094 }
2095 
2096 /*
2097  * 4-Byte ASN helper function.
2098  * Two scenarios need to be considered:
2099  * - NEW session with NEW attributes present -> just remove the attributes
2100  * - OLD session with NEW attributes present -> try to merge them
2101  */
2102 void
2103 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a)
2104 {
2105 	struct attr	*nasp, *naggr, *oaggr;
2106 	u_int32_t	 as;
2107 
2108 	/*
2109 	 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present
2110 	 * try to fixup the attributes.
2111 	 * Do not fixup if F_ATTR_PARSE_ERR is set.
2112 	 */
2113 	if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR)
2114 		return;
2115 
2116 	/* first get the attributes */
2117 	nasp = attr_optget(a, ATTR_AS4_PATH);
2118 	naggr = attr_optget(a, ATTR_AS4_AGGREGATOR);
2119 
2120 	if (rde_as4byte(peer)) {
2121 		/* NEW session using 4-byte ASNs */
2122 		if (nasp) {
2123 			log_peer_warnx(&peer->conf, "uses 4-byte ASN "
2124 			    "but sent AS4_PATH attribute.");
2125 			attr_free(a, nasp);
2126 		}
2127 		if (naggr) {
2128 			log_peer_warnx(&peer->conf, "uses 4-byte ASN "
2129 			    "but sent AS4_AGGREGATOR attribute.");
2130 			attr_free(a, naggr);
2131 		}
2132 		return;
2133 	}
2134 	/* OLD session using 2-byte ASNs */
2135 	/* try to merge the new attributes into the old ones */
2136 	if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) {
2137 		memcpy(&as, oaggr->data, sizeof(as));
2138 		if (ntohl(as) != AS_TRANS) {
2139 			/* per RFC ignore AS4_PATH and AS4_AGGREGATOR */
2140 			if (nasp)
2141 				attr_free(a, nasp);
2142 			if (naggr)
2143 				attr_free(a, naggr);
2144 			return;
2145 		}
2146 		if (naggr) {
2147 			/* switch over to new AGGREGATOR */
2148 			attr_free(a, oaggr);
2149 			if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE,
2150 			    ATTR_AGGREGATOR, naggr->data, naggr->len))
2151 				fatalx("attr_optadd failed but impossible");
2152 		}
2153 	}
2154 	/* there is no need for AS4_AGGREGATOR any more */
2155 	if (naggr)
2156 		attr_free(a, naggr);
2157 
2158 	/* merge AS4_PATH with ASPATH */
2159 	if (nasp)
2160 		aspath_merge(a, nasp);
2161 }
2162 
2163 
2164 /*
2165  * route reflector helper function
2166  */
2167 void
2168 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp)
2169 {
2170 	struct attr	*a;
2171 	u_int8_t	*p;
2172 	u_int16_t	 len;
2173 	u_int32_t	 id;
2174 
2175 	/* do not consider updates with parse errors */
2176 	if (asp->flags & F_ATTR_PARSE_ERR)
2177 		return;
2178 
2179 	/* check for originator id if eq router_id drop */
2180 	if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) {
2181 		if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) {
2182 			/* this is coming from myself */
2183 			asp->flags |= F_ATTR_LOOP;
2184 			return;
2185 		}
2186 	} else if (conf->flags & BGPD_FLAG_REFLECTOR) {
2187 		if (peer->conf.ebgp)
2188 			id = conf->bgpid;
2189 		else
2190 			id = htonl(peer->remote_bgpid);
2191 		if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID,
2192 		    &id, sizeof(u_int32_t)) == -1)
2193 			fatalx("attr_optadd failed but impossible");
2194 	}
2195 
2196 	/* check for own id in the cluster list */
2197 	if (conf->flags & BGPD_FLAG_REFLECTOR) {
2198 		if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) {
2199 			for (len = 0; len < a->len;
2200 			    len += sizeof(conf->clusterid))
2201 				/* check if coming from my cluster */
2202 				if (memcmp(&conf->clusterid, a->data + len,
2203 				    sizeof(conf->clusterid)) == 0) {
2204 					asp->flags |= F_ATTR_LOOP;
2205 					return;
2206 				}
2207 
2208 			/* prepend own clusterid by replacing attribute */
2209 			len = a->len + sizeof(conf->clusterid);
2210 			if (len < a->len)
2211 				fatalx("rde_reflector: cluster-list overflow");
2212 			if ((p = malloc(len)) == NULL)
2213 				fatal("rde_reflector");
2214 			memcpy(p, &conf->clusterid, sizeof(conf->clusterid));
2215 			memcpy(p + sizeof(conf->clusterid), a->data, a->len);
2216 			attr_free(asp, a);
2217 			if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2218 			    p, len) == -1)
2219 				fatalx("attr_optadd failed but impossible");
2220 			free(p);
2221 		} else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2222 		    &conf->clusterid, sizeof(conf->clusterid)) == -1)
2223 			fatalx("attr_optadd failed but impossible");
2224 	}
2225 }
2226 
2227 /*
2228  * control specific functions
2229  */
2230 static void
2231 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
2232 {
2233 	struct ctl_show_rib	 rib;
2234 	struct ibuf		*wbuf;
2235 	struct attr		*a;
2236 	struct nexthop		*nexthop;
2237 	void			*bp;
2238 	time_t			 staletime;
2239 	size_t			 aslen;
2240 	u_int8_t		 l;
2241 
2242 	nexthop = prefix_nexthop(p);
2243 	bzero(&rib, sizeof(rib));
2244 	rib.age = getmonotime() - p->lastchange;
2245 	rib.local_pref = asp->lpref;
2246 	rib.med = asp->med;
2247 	rib.weight = asp->weight;
2248 	strlcpy(rib.descr, prefix_peer(p)->conf.descr, sizeof(rib.descr));
2249 	memcpy(&rib.remote_addr, &prefix_peer(p)->remote_addr,
2250 	    sizeof(rib.remote_addr));
2251 	rib.remote_id = prefix_peer(p)->remote_bgpid;
2252 	if (nexthop != NULL) {
2253 		memcpy(&rib.true_nexthop, &nexthop->true_nexthop,
2254 		    sizeof(rib.true_nexthop));
2255 		memcpy(&rib.exit_nexthop, &nexthop->exit_nexthop,
2256 		    sizeof(rib.exit_nexthop));
2257 	} else {
2258 		/* announced network may have a NULL nexthop */
2259 		bzero(&rib.true_nexthop, sizeof(rib.true_nexthop));
2260 		bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop));
2261 		rib.true_nexthop.aid = p->pt->aid;
2262 		rib.exit_nexthop.aid = p->pt->aid;
2263 	}
2264 	pt_getaddr(p->pt, &rib.prefix);
2265 	rib.prefixlen = p->pt->prefixlen;
2266 	rib.origin = asp->origin;
2267 	rib.validation_state = p->validation_state;
2268 	rib.flags = 0;
2269 	if (p->re != NULL && p->re->active == p)
2270 		rib.flags |= F_PREF_ACTIVE;
2271 	if (!prefix_peer(p)->conf.ebgp)
2272 		rib.flags |= F_PREF_INTERNAL;
2273 	if (asp->flags & F_PREFIX_ANNOUNCED)
2274 		rib.flags |= F_PREF_ANNOUNCE;
2275 	if (nexthop == NULL || nexthop->state == NEXTHOP_REACH)
2276 		rib.flags |= F_PREF_ELIGIBLE;
2277 	if (asp->flags & F_ATTR_LOOP)
2278 		rib.flags &= ~F_PREF_ELIGIBLE;
2279 	if (asp->flags & F_ATTR_PARSE_ERR)
2280 		rib.flags |= F_PREF_INVALID;
2281 	staletime = prefix_peer(p)->staletime[p->pt->aid];
2282 	if (staletime && p->lastchange <= staletime)
2283 		rib.flags |= F_PREF_STALE;
2284 	aslen = aspath_length(asp->aspath);
2285 
2286 	if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid,
2287 	    sizeof(rib) + aslen)) == NULL)
2288 		return;
2289 	if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 ||
2290 	    imsg_add(wbuf, aspath_dump(asp->aspath), aslen) == -1)
2291 		return;
2292 	imsg_close(ibuf_se_ctl, wbuf);
2293 
2294 	if (flags & F_CTL_DETAIL) {
2295 		struct rde_community *comm = prefix_communities(p);
2296 		size_t len = comm->nentries * sizeof(struct community);
2297 		if (comm->nentries > 0) {
2298 			if ((wbuf = imsg_create(ibuf_se_ctl,
2299 			    IMSG_CTL_SHOW_RIB_COMMUNITIES, 0, pid,
2300 			    len)) == NULL)
2301 				return;
2302 			if ((bp = ibuf_reserve(wbuf, len)) == NULL) {
2303 				ibuf_free(wbuf);
2304 				return;
2305 			}
2306 			memcpy(bp, comm->communities, len);
2307 			imsg_close(ibuf_se_ctl, wbuf);
2308 		}
2309 		for (l = 0; l < asp->others_len; l++) {
2310 			if ((a = asp->others[l]) == NULL)
2311 				break;
2312 			if ((wbuf = imsg_create(ibuf_se_ctl,
2313 			    IMSG_CTL_SHOW_RIB_ATTR, 0, pid,
2314 			    attr_optlen(a))) == NULL)
2315 				return;
2316 			if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) {
2317 				ibuf_free(wbuf);
2318 				return;
2319 			}
2320 			if (attr_write(bp, attr_optlen(a), a->flags,
2321 			    a->type, a->data, a->len) == -1) {
2322 				ibuf_free(wbuf);
2323 				return;
2324 			}
2325 			imsg_close(ibuf_se_ctl, wbuf);
2326 		}
2327 	}
2328 }
2329 
2330 int
2331 rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
2332 {
2333 	char *s;
2334 
2335 	if (n && n->addr.aid) {
2336 		if (memcmp(&p->conf.remote_addr, &n->addr,
2337 		    sizeof(p->conf.remote_addr)))
2338 			return 0;
2339 	} else if (n && n->descr[0]) {
2340 		s = n->is_group ? p->conf.group : p->conf.descr;
2341 		if (strcmp(s, n->descr))
2342 			return 0;
2343 	}
2344 	return 1;
2345 }
2346 
2347 static void
2348 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
2349 {
2350 	struct rde_aspath	*asp;
2351 
2352 	if (!rde_match_peer(prefix_peer(p), &req->neighbor))
2353 		return;
2354 
2355 	asp = prefix_aspath(p);
2356 	if (asp == NULL)	/* skip pending withdraw in Adj-RIB-Out */
2357 		return;
2358 	if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
2359 		return;
2360 	if ((req->flags & F_CTL_INVALID) &&
2361 	    (asp->flags & F_ATTR_PARSE_ERR) == 0)
2362 		return;
2363 	if (req->as.type != AS_UNDEF &&
2364 	    !aspath_match(asp->aspath, &req->as, 0))
2365 		return;
2366 	if (req->community.flags != 0) {
2367 		if (!community_match(prefix_communities(p), &req->community,
2368 		    NULL))
2369 			return;
2370 	}
2371 	if (!ovs_match(p, req->flags))
2372 		return;
2373 	rde_dump_rib_as(p, asp, req->pid, req->flags);
2374 }
2375 
2376 static void
2377 rde_dump_upcall(struct rib_entry *re, void *ptr)
2378 {
2379 	struct rde_dump_ctx	*ctx = ptr;
2380 	struct prefix		*p;
2381 
2382 	LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
2383 		rde_dump_filter(p, &ctx->req);
2384 }
2385 
2386 static void
2387 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr)
2388 {
2389 	struct rde_dump_ctx	*ctx = ptr;
2390 	struct prefix		*p;
2391 	struct pt_entry		*pt;
2392 	struct bgpd_addr	 addr;
2393 
2394 	pt = re->prefix;
2395 	pt_getaddr(pt, &addr);
2396 	if (addr.aid != ctx->req.prefix.aid)
2397 		return;
2398 	if (ctx->req.flags & F_LONGER) {
2399 		if (ctx->req.prefixlen > pt->prefixlen)
2400 			return;
2401 		if (!prefix_compare(&ctx->req.prefix, &addr,
2402 		    ctx->req.prefixlen))
2403 			LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
2404 				rde_dump_filter(p, &ctx->req);
2405 	} else {
2406 		if (ctx->req.prefixlen < pt->prefixlen)
2407 			return;
2408 		if (!prefix_compare(&addr, &ctx->req.prefix,
2409 		    pt->prefixlen))
2410 			LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
2411 				rde_dump_filter(p, &ctx->req);
2412 	}
2413 }
2414 
2415 static void
2416 rde_dump_adjout_upcall(struct prefix *p, void *ptr)
2417 {
2418 	struct rde_dump_ctx	*ctx = ptr;
2419 
2420 	if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
2421 		return;
2422 	rde_dump_filter(p, &ctx->req);
2423 }
2424 
2425 static void
2426 rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr)
2427 {
2428 	struct rde_dump_ctx	*ctx = ptr;
2429 	struct bgpd_addr	 addr;
2430 
2431 	if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
2432 		return;
2433 
2434 	pt_getaddr(p->pt, &addr);
2435 	if (addr.aid != ctx->req.prefix.aid)
2436 		return;
2437 	if (ctx->req.flags & F_LONGER) {
2438 		if (ctx->req.prefixlen > p->pt->prefixlen)
2439 			return;
2440 		if (!prefix_compare(&ctx->req.prefix, &addr,
2441 		    ctx->req.prefixlen))
2442 			rde_dump_filter(p, &ctx->req);
2443 	} else {
2444 		if (ctx->req.prefixlen < p->pt->prefixlen)
2445 			return;
2446 		if (!prefix_compare(&addr, &ctx->req.prefix,
2447 		    p->pt->prefixlen))
2448 			rde_dump_filter(p, &ctx->req);
2449 	}
2450 }
2451 
2452 static int
2453 rde_dump_throttled(void *arg)
2454 {
2455 	struct rde_dump_ctx	*ctx = arg;
2456 
2457 	return (ctx->throttled != 0);
2458 }
2459 
2460 static void
2461 rde_dump_done(void *arg, u_int8_t aid)
2462 {
2463 	struct rde_dump_ctx	*ctx = arg;
2464 	struct rde_peer		*peer;
2465 	u_int			 error;
2466 
2467 	if (ctx->req.flags & F_CTL_ADJ_OUT) {
2468 		peer = peer_match(&ctx->req.neighbor, ctx->peerid);
2469 		if (peer == NULL)
2470 			goto done;
2471 		ctx->peerid = peer->conf.id;
2472 		switch (ctx->req.type) {
2473 		case IMSG_CTL_SHOW_RIB:
2474 			if (prefix_dump_new(peer, ctx->req.aid,
2475 			    CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
2476 			    rde_dump_done, rde_dump_throttled) == -1)
2477 				goto nomem;
2478 			break;
2479 		case IMSG_CTL_SHOW_RIB_PREFIX:
2480 			if (prefix_dump_new(peer, ctx->req.aid,
2481 			    CTL_MSG_HIGH_MARK, ctx,
2482 			    rde_dump_adjout_prefix_upcall,
2483 			    rde_dump_done, rde_dump_throttled) == -1)
2484 				goto nomem;
2485 			break;
2486 		default:
2487 			fatalx("%s: unsupported imsg type", __func__);
2488 		}
2489 		return;
2490 	}
2491 done:
2492 	imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0);
2493 	LIST_REMOVE(ctx, entry);
2494 	free(ctx);
2495 	return;
2496 
2497 nomem:
2498 	log_warn(__func__);
2499 	error = CTL_RES_NOMEM;
2500 	imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error,
2501 	    sizeof(error));
2502 	return;
2503 }
2504 
2505 void
2506 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid,
2507     enum imsg_type type)
2508 {
2509 	struct rde_dump_ctx	*ctx;
2510 	struct rib_entry	*re;
2511 	struct prefix		*p;
2512 	u_int			 error;
2513 	u_int8_t		 hostplen;
2514 	u_int16_t		 rid;
2515 
2516 	if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2517  nomem:
2518 		log_warn(__func__);
2519 		error = CTL_RES_NOMEM;
2520 		imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2521 		    sizeof(error));
2522 		return;
2523 	}
2524 
2525 	memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
2526 	ctx->req.pid = pid;
2527 	ctx->req.type = type;
2528 
2529 	if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
2530 		rid = RIB_ADJ_IN;
2531 	} else if (req->flags & F_CTL_ADJ_OUT) {
2532 		struct rde_peer *peer;
2533 
2534 		peer = peer_match(&req->neighbor, 0);
2535 		if (peer == NULL) {
2536 			log_warnx("%s: no peer found for adj-rib-out",
2537 			    __func__);
2538 			error = CTL_RES_NOSUCHPEER;
2539 			imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1,
2540 			    &error, sizeof(error));
2541 			free(ctx);
2542 			return;
2543 		}
2544 		ctx->peerid = peer->conf.id;
2545 		switch (ctx->req.type) {
2546 		case IMSG_CTL_SHOW_RIB:
2547 			if (prefix_dump_new(peer, ctx->req.aid,
2548 			    CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
2549 			    rde_dump_done, rde_dump_throttled) == -1)
2550 				goto nomem;
2551 			break;
2552 		case IMSG_CTL_SHOW_RIB_PREFIX:
2553 			if (req->flags & (F_LONGER|F_SHORTER)) {
2554 				if (prefix_dump_new(peer, ctx->req.aid,
2555 				    CTL_MSG_HIGH_MARK, ctx,
2556 				    rde_dump_adjout_prefix_upcall,
2557 				    rde_dump_done, rde_dump_throttled) == -1)
2558 					goto nomem;
2559 				break;
2560 			}
2561 			switch (req->prefix.aid) {
2562 			case AID_INET:
2563 			case AID_VPN_IPv4:
2564 				hostplen = 32;
2565 				break;
2566 			case AID_INET6:
2567 			case AID_VPN_IPv6:
2568 				hostplen = 128;
2569 				break;
2570 			default:
2571 				fatalx("%s: unknown af", __func__);
2572 			}
2573 
2574 			do {
2575 				if (req->prefixlen == hostplen)
2576 					p = prefix_match(peer, &req->prefix);
2577 				else
2578 					p = prefix_lookup(peer, &req->prefix,
2579 					    req->prefixlen);
2580 				if (p)
2581 					rde_dump_adjout_upcall(p, ctx);
2582 			} while ((peer = peer_match(&req->neighbor,
2583 			    peer->conf.id)));
2584 
2585 			imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
2586 			    -1, NULL, 0);
2587 			free(ctx);
2588 			return;
2589 		default:
2590 			fatalx("%s: unsupported imsg type", __func__);
2591 		}
2592 
2593 		LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
2594 		return;
2595 	} else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
2596 		log_warnx("%s: no such rib %s", __func__, req->rib);
2597 		error = CTL_RES_NOSUCHRIB;
2598 		imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2599 		    sizeof(error));
2600 		free(ctx);
2601 		return;
2602 	}
2603 
2604 	switch (ctx->req.type) {
2605 	case IMSG_CTL_SHOW_NETWORK:
2606 		if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
2607 		    network_dump_upcall, rde_dump_done,
2608 		    rde_dump_throttled) == -1)
2609 			goto nomem;
2610 		break;
2611 	case IMSG_CTL_SHOW_RIB:
2612 		if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
2613 		    rde_dump_upcall, rde_dump_done, rde_dump_throttled) == -1)
2614 			goto nomem;
2615 		break;
2616 	case IMSG_CTL_SHOW_RIB_PREFIX:
2617 		if (req->flags & (F_LONGER|F_SHORTER)) {
2618 			if (rib_dump_new(rid, ctx->req.aid,
2619 			    CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall,
2620 			    rde_dump_done, rde_dump_throttled) == -1)
2621 				goto nomem;
2622 			break;
2623 		}
2624 		switch (req->prefix.aid) {
2625 		case AID_INET:
2626 		case AID_VPN_IPv4:
2627 			hostplen = 32;
2628 			break;
2629 		case AID_INET6:
2630 		case AID_VPN_IPv6:
2631 			hostplen = 128;
2632 			break;
2633 		default:
2634 			fatalx("%s: unknown af", __func__);
2635 		}
2636 		if (req->prefixlen == hostplen)
2637 			re = rib_match(rib_byid(rid), &req->prefix);
2638 		else
2639 			re = rib_get(rib_byid(rid), &req->prefix,
2640 			    req->prefixlen);
2641 		if (re)
2642 			rde_dump_upcall(re, ctx);
2643 		imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
2644 		    -1, NULL, 0);
2645 		free(ctx);
2646 		return;
2647 	default:
2648 		fatalx("%s: unsupported imsg type", __func__);
2649 	}
2650 	LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
2651 }
2652 
2653 void
2654 rde_dump_ctx_throttle(pid_t pid, int throttle)
2655 {
2656 	struct rde_dump_ctx	*ctx;
2657 
2658 	LIST_FOREACH(ctx, &rde_dump_h, entry) {
2659 		if (ctx->req.pid == pid) {
2660 			ctx->throttled = throttle;
2661 			return;
2662 		}
2663 	}
2664 }
2665 
2666 void
2667 rde_dump_ctx_terminate(pid_t pid)
2668 {
2669 	struct rde_dump_ctx	*ctx;
2670 
2671 	LIST_FOREACH(ctx, &rde_dump_h, entry) {
2672 		if (ctx->req.pid == pid) {
2673 			rib_dump_terminate(ctx);
2674 			return;
2675 		}
2676 	}
2677 }
2678 
2679 static int
2680 rde_mrt_throttled(void *arg)
2681 {
2682 	struct mrt	*mrt = arg;
2683 
2684 	return (mrt->wbuf.queued > SESS_MSG_LOW_MARK);
2685 }
2686 
2687 static void
2688 rde_mrt_done(void *ptr, u_int8_t aid)
2689 {
2690 	mrt_done(ptr);
2691 }
2692 
2693 void
2694 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd)
2695 {
2696 	struct rde_mrt_ctx *ctx;
2697 	u_int16_t rid;
2698 
2699 	if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2700 		log_warn("rde_dump_mrt_new");
2701 		return;
2702 	}
2703 	memcpy(&ctx->mrt, mrt, sizeof(struct mrt));
2704 	TAILQ_INIT(&ctx->mrt.wbuf.bufs);
2705 	ctx->mrt.wbuf.fd = fd;
2706 	ctx->mrt.state = MRT_STATE_RUNNING;
2707 	rid = rib_find(ctx->mrt.rib);
2708 	if (rid == RIB_NOTFOUND) {
2709 		log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib);
2710 		free(ctx);
2711 		return;
2712 	}
2713 
2714 	if (ctx->mrt.type == MRT_TABLE_DUMP_V2)
2715 		mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist);
2716 
2717 	if (rib_dump_new(rid, AID_UNSPEC, CTL_MSG_HIGH_MARK, &ctx->mrt,
2718 	    mrt_dump_upcall, rde_mrt_done, rde_mrt_throttled) == -1)
2719 		fatal("%s: rib_dump_new", __func__);
2720 
2721 	LIST_INSERT_HEAD(&rde_mrts, ctx, entry);
2722 	rde_mrt_cnt++;
2723 }
2724 
2725 /*
2726  * kroute specific functions
2727  */
2728 int
2729 rde_l3vpn_import(struct rde_community *comm, struct l3vpn *rd)
2730 {
2731 	struct filter_set	*s;
2732 
2733 	TAILQ_FOREACH(s, &rd->import, entry) {
2734 		if (community_match(comm, &s->action.community, 0))
2735 			return (1);
2736 	}
2737 	return (0);
2738 }
2739 
2740 void
2741 rde_send_kroute_flush(struct rib *rib)
2742 {
2743 	if (imsg_compose(ibuf_main, IMSG_KROUTE_FLUSH, rib->rtableid, 0, -1,
2744 	    NULL, 0) == -1)
2745 		fatal("%s %d imsg_compose error", __func__, __LINE__);
2746 }
2747 
2748 void
2749 rde_send_kroute(struct rib *rib, struct prefix *new, struct prefix *old)
2750 {
2751 	struct kroute_full	 kr;
2752 	struct bgpd_addr	 addr;
2753 	struct prefix		*p;
2754 	struct rde_aspath	*asp;
2755 	struct l3vpn		*vpn;
2756 	enum imsg_type		 type;
2757 
2758 	/*
2759 	 * Make sure that self announce prefixes are not committed to the
2760 	 * FIB. If both prefixes are unreachable no update is needed.
2761 	 */
2762 	if ((old == NULL || prefix_aspath(old)->flags & F_PREFIX_ANNOUNCED) &&
2763 	    (new == NULL || prefix_aspath(new)->flags & F_PREFIX_ANNOUNCED))
2764 		return;
2765 
2766 	if (new == NULL || prefix_aspath(new)->flags & F_PREFIX_ANNOUNCED) {
2767 		type = IMSG_KROUTE_DELETE;
2768 		p = old;
2769 	} else {
2770 		type = IMSG_KROUTE_CHANGE;
2771 		p = new;
2772 	}
2773 
2774 	asp = prefix_aspath(p);
2775 	pt_getaddr(p->pt, &addr);
2776 	bzero(&kr, sizeof(kr));
2777 	memcpy(&kr.prefix, &addr, sizeof(kr.prefix));
2778 	kr.prefixlen = p->pt->prefixlen;
2779 	if (prefix_nhflags(p) == NEXTHOP_REJECT)
2780 		kr.flags |= F_REJECT;
2781 	if (prefix_nhflags(p) == NEXTHOP_BLACKHOLE)
2782 		kr.flags |= F_BLACKHOLE;
2783 	if (type == IMSG_KROUTE_CHANGE)
2784 		memcpy(&kr.nexthop, &prefix_nexthop(p)->true_nexthop,
2785 		    sizeof(kr.nexthop));
2786 	strlcpy(kr.label, rtlabel_id2name(asp->rtlabelid), sizeof(kr.label));
2787 
2788 	switch (addr.aid) {
2789 	case AID_VPN_IPv4:
2790 	case AID_VPN_IPv6:
2791 		if (!(rib->flags & F_RIB_LOCAL))
2792 			/* not Loc-RIB, no update for VPNs */
2793 			break;
2794 
2795 		SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) {
2796 			if (!rde_l3vpn_import(prefix_communities(p), vpn))
2797 				continue;
2798 			/* must send exit_nexthop so that correct MPLS tunnel
2799 			 * is chosen
2800 			 */
2801 			if (type == IMSG_KROUTE_CHANGE)
2802 				memcpy(&kr.nexthop,
2803 				    &prefix_nexthop(p)->exit_nexthop,
2804 				    sizeof(kr.nexthop));
2805 			/* XXX not ideal but this will change */
2806 			kr.ifindex = if_nametoindex(vpn->ifmpe);
2807 			if (imsg_compose(ibuf_main, type, vpn->rtableid, 0, -1,
2808 			    &kr, sizeof(kr)) == -1)
2809 				fatal("%s %d imsg_compose error", __func__,
2810 				    __LINE__);
2811 		}
2812 		break;
2813 	default:
2814 		if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1,
2815 		    &kr, sizeof(kr)) == -1)
2816 			fatal("%s %d imsg_compose error", __func__, __LINE__);
2817 		break;
2818 	}
2819 }
2820 
2821 /*
2822  * update specific functions
2823  */
2824 void
2825 rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old)
2826 {
2827 	struct rde_peer	*peer;
2828 	u_int8_t	 aid;
2829 
2830 	/*
2831 	 * If old is != NULL we know it was active and should be removed.
2832 	 * If new is != NULL we know it is reachable and then we should
2833 	 * generate an update.
2834 	 */
2835 	if (old == NULL && new == NULL)
2836 		return;
2837 
2838 	if ((rib->flags & F_RIB_NOFIB) == 0)
2839 		rde_send_kroute(rib, new, old);
2840 
2841 	if (new)
2842 		aid = new->pt->aid;
2843 	else
2844 		aid = old->pt->aid;
2845 
2846 	LIST_FOREACH(peer, &peerlist, peer_l) {
2847 		if (peer->conf.id == 0)
2848 			continue;
2849 		if (peer->loc_rib_id != rib->id)
2850 			continue;
2851 		if (peer->state != PEER_UP)
2852 			continue;
2853 		/* check if peer actually supports the address family */
2854 		if (peer->capa.mp[aid] == 0)
2855 			continue;
2856 		/* skip peers with special export types */
2857 		if (peer->conf.export_type == EXPORT_NONE ||
2858 		    peer->conf.export_type == EXPORT_DEFAULT_ROUTE)
2859 			continue;
2860 
2861 		up_generate_updates(out_rules, peer, new, old);
2862 	}
2863 }
2864 
2865 static void
2866 rde_up_flush_upcall(struct prefix *p, void *ptr)
2867 {
2868 	up_generate_updates(out_rules, prefix_peer(p), NULL, p);
2869 }
2870 
2871 u_char	queue_buf[4096];
2872 
2873 int
2874 rde_update_queue_pending(void)
2875 {
2876 	struct rde_peer *peer;
2877 	u_int8_t aid;
2878 
2879 	if (ibuf_se && ibuf_se->w.queued >= SESS_MSG_HIGH_MARK)
2880 		return 0;
2881 
2882 	LIST_FOREACH(peer, &peerlist, peer_l) {
2883 		if (peer->conf.id == 0)
2884 			continue;
2885 		if (peer->state != PEER_UP)
2886 			continue;
2887 		if (peer->throttled)
2888 			continue;
2889 		for (aid = 0; aid < AID_MAX; aid++) {
2890 			if (!RB_EMPTY(&peer->updates[aid]) ||
2891 			    !RB_EMPTY(&peer->withdraws[aid]))
2892 				return 1;
2893 		}
2894 	}
2895 	return 0;
2896 }
2897 
2898 void
2899 rde_update_queue_runner(void)
2900 {
2901 	struct rde_peer		*peer;
2902 	int			 r, sent, max = RDE_RUNNER_ROUNDS, eor;
2903 	u_int16_t		 len, wpos;
2904 
2905 	len = sizeof(queue_buf) - MSGSIZE_HEADER;
2906 	do {
2907 		sent = 0;
2908 		LIST_FOREACH(peer, &peerlist, peer_l) {
2909 			if (peer->conf.id == 0)
2910 				continue;
2911 			if (peer->state != PEER_UP)
2912 				continue;
2913 			if (peer->throttled)
2914 				continue;
2915 			eor = 0;
2916 			wpos = 0;
2917 			/* first withdraws, save 2 bytes for path attributes */
2918 			if ((r = up_dump_withdraws(queue_buf, len - 2, peer,
2919 			    AID_INET)) == -1)
2920 				continue;
2921 			wpos += r;
2922 
2923 			/* now bgp path attributes unless it is the EoR mark */
2924 			if (up_is_eor(peer, AID_INET)) {
2925 				eor = 1;
2926 				bzero(queue_buf + wpos, 2);
2927 				wpos += 2;
2928 			} else {
2929 				r = up_dump_attrnlri(queue_buf + wpos,
2930 				    len - wpos, peer);
2931 				wpos += r;
2932 			}
2933 
2934 			/* finally send message to SE */
2935 			if (wpos > 4) {
2936 				if (imsg_compose(ibuf_se, IMSG_UPDATE,
2937 				    peer->conf.id, 0, -1, queue_buf,
2938 				    wpos) == -1)
2939 					fatal("%s %d imsg_compose error",
2940 					    __func__, __LINE__);
2941 				sent++;
2942 			}
2943 			if (eor)
2944 				rde_peer_send_eor(peer, AID_INET);
2945 		}
2946 		max -= sent;
2947 	} while (sent != 0 && max > 0);
2948 }
2949 
2950 void
2951 rde_update6_queue_runner(u_int8_t aid)
2952 {
2953 	struct rde_peer		*peer;
2954 	int			 r, sent, max = RDE_RUNNER_ROUNDS / 2;
2955 	u_int16_t		 len;
2956 
2957 	/* first withdraws ... */
2958 	do {
2959 		sent = 0;
2960 		LIST_FOREACH(peer, &peerlist, peer_l) {
2961 			if (peer->conf.id == 0)
2962 				continue;
2963 			if (peer->state != PEER_UP)
2964 				continue;
2965 			if (peer->throttled)
2966 				continue;
2967 			len = sizeof(queue_buf) - MSGSIZE_HEADER;
2968 			r = up_dump_mp_unreach(queue_buf, len, peer, aid);
2969 			if (r == -1)
2970 				continue;
2971 			/* finally send message to SE */
2972 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
2973 			    0, -1, queue_buf, r) == -1)
2974 				fatal("%s %d imsg_compose error", __func__,
2975 				    __LINE__);
2976 			sent++;
2977 		}
2978 		max -= sent;
2979 	} while (sent != 0 && max > 0);
2980 
2981 	/* ... then updates */
2982 	max = RDE_RUNNER_ROUNDS / 2;
2983 	do {
2984 		sent = 0;
2985 		LIST_FOREACH(peer, &peerlist, peer_l) {
2986 			if (peer->conf.id == 0)
2987 				continue;
2988 			if (peer->state != PEER_UP)
2989 				continue;
2990 			if (peer->throttled)
2991 				continue;
2992 			len = sizeof(queue_buf) - MSGSIZE_HEADER;
2993 			if (up_is_eor(peer, aid)) {
2994 				rde_peer_send_eor(peer, aid);
2995 				continue;
2996 			}
2997 			r = up_dump_mp_reach(queue_buf, len, peer, aid);
2998 			if (r == 0)
2999 				continue;
3000 
3001 			/* finally send message to SE */
3002 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3003 			    0, -1, queue_buf, r) == -1)
3004 				fatal("%s %d imsg_compose error", __func__,
3005 				    __LINE__);
3006 			sent++;
3007 		}
3008 		max -= sent;
3009 	} while (sent != 0 && max > 0);
3010 }
3011 
3012 /*
3013  * pf table specific functions
3014  */
3015 struct rde_pftable_node {
3016 	RB_ENTRY(rde_pftable_node)	 entry;
3017 	struct pt_entry			*prefix;
3018 	int				 refcnt;
3019 	u_int16_t			 id;
3020 };
3021 RB_HEAD(rde_pftable_tree, rde_pftable_node);
3022 
3023 static inline int
3024 rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b)
3025 {
3026 	if (a->prefix > b->prefix)
3027 		return 1;
3028 	if (a->prefix < b->prefix)
3029 		return -1;
3030 	return (a->id - b->id);
3031 }
3032 
3033 RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, rde_pftable_cmp);
3034 
3035 struct rde_pftable_tree pftable_tree = RB_INITIALIZER(&pftable_tree);
3036 int need_commit;
3037 
3038 static void
3039 rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del)
3040 {
3041 	struct pftable_msg pfm;
3042 
3043 	if (id == 0)
3044 		return;
3045 
3046 	/* do not run while cleaning up */
3047 	if (rde_quit)
3048 		return;
3049 
3050 	bzero(&pfm, sizeof(pfm));
3051 	strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable));
3052 	pt_getaddr(pt, &pfm.addr);
3053 	pfm.len = pt->prefixlen;
3054 
3055 	if (imsg_compose(ibuf_main,
3056 	    del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD,
3057 	    0, 0, -1, &pfm, sizeof(pfm)) == -1)
3058 		fatal("%s %d imsg_compose error", __func__, __LINE__);
3059 
3060 	need_commit = 1;
3061 }
3062 
3063 void
3064 rde_pftable_add(u_int16_t id, struct prefix *p)
3065 {
3066 	struct rde_pftable_node *pfn, node;
3067 
3068 	memset(&node, 0, sizeof(node));
3069 	node.prefix = p->pt;
3070 	node.id = id;
3071 
3072 	pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node);
3073 	if (pfn == NULL) {
3074 		if ((pfn = calloc(1, sizeof(*pfn))) == NULL)
3075 			fatal("%s", __func__);
3076 		pfn->prefix = pt_ref(p->pt);
3077 		pfn->id = id;
3078 
3079 		if (RB_INSERT(rde_pftable_tree, &pftable_tree, pfn) != NULL)
3080 			fatalx("%s: tree corrupt", __func__);
3081 
3082 		rde_pftable_send(id, p->pt, 0);
3083 	}
3084 	pfn->refcnt++;
3085 }
3086 
3087 void
3088 rde_pftable_del(u_int16_t id, struct prefix *p)
3089 {
3090 	struct rde_pftable_node *pfn, node;
3091 
3092 	memset(&node, 0, sizeof(node));
3093 	node.prefix = p->pt;
3094 	node.id = id;
3095 
3096 	pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node);
3097 	if (pfn == NULL)
3098 		return;
3099 
3100 	if (--pfn->refcnt <= 0) {
3101 		rde_pftable_send(id, p->pt, 1);
3102 
3103 		if (RB_REMOVE(rde_pftable_tree, &pftable_tree, pfn) == NULL)
3104 			fatalx("%s: tree corrupt", __func__);
3105 
3106 		pt_unref(pfn->prefix);
3107 		free(pfn);
3108 	}
3109 }
3110 
3111 void
3112 rde_commit_pftable(void)
3113 {
3114 	/* do not run while cleaning up */
3115 	if (rde_quit)
3116 		return;
3117 
3118 	if (!need_commit)
3119 		return;
3120 
3121 	if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) ==
3122 	    -1)
3123 		fatal("%s %d imsg_compose error", __func__, __LINE__);
3124 
3125 	need_commit = 0;
3126 }
3127 
3128 /*
3129  * nexthop specific functions
3130  */
3131 void
3132 rde_send_nexthop(struct bgpd_addr *next, int insert)
3133 {
3134 	int			 type;
3135 
3136 	if (insert)
3137 		type = IMSG_NEXTHOP_ADD;
3138 	else
3139 		type = IMSG_NEXTHOP_REMOVE;
3140 
3141 	if (imsg_compose(ibuf_main, type, 0, 0, -1, next,
3142 	    sizeof(struct bgpd_addr)) == -1)
3143 		fatal("%s %d imsg_compose error", __func__, __LINE__);
3144 }
3145 
3146 /*
3147  * soft reconfig specific functions
3148  */
3149 void
3150 rde_reload_done(void)
3151 {
3152 	struct rde_peer		*peer;
3153 	struct filter_head	*fh;
3154 	struct rde_prefixset_head prefixsets_old;
3155 	struct rde_prefixset_head originsets_old;
3156 	struct rde_prefixset	 roa_old;
3157 	struct as_set_head	 as_sets_old;
3158 	u_int16_t		 rid;
3159 	int			 reload = 0;
3160 
3161 	softreconfig = 0;
3162 
3163 	SIMPLEQ_INIT(&prefixsets_old);
3164 	SIMPLEQ_INIT(&originsets_old);
3165 	SIMPLEQ_INIT(&as_sets_old);
3166 	SIMPLEQ_CONCAT(&prefixsets_old, &conf->rde_prefixsets);
3167 	SIMPLEQ_CONCAT(&originsets_old, &conf->rde_originsets);
3168 	SIMPLEQ_CONCAT(&as_sets_old, &conf->as_sets);
3169 	roa_old = conf->rde_roa;
3170 
3171 	/* merge the main config */
3172 	copy_config(conf, nconf);
3173 
3174 	/* need to copy the sets and roa table and clear them in nconf */
3175 	SIMPLEQ_CONCAT(&conf->rde_prefixsets, &nconf->rde_prefixsets);
3176 	SIMPLEQ_CONCAT(&conf->rde_originsets, &nconf->rde_originsets);
3177 	SIMPLEQ_CONCAT(&conf->as_sets, &nconf->as_sets);
3178 
3179 	conf->rde_roa = nconf->rde_roa;
3180 	conf->rde_roa.lastchange = roa_old.lastchange;
3181 	memset(&nconf->rde_roa, 0, sizeof(nconf->rde_roa));
3182 
3183 	/* apply new set of l3vpn, sync will be done later */
3184 	free_l3vpns(&conf->l3vpns);
3185 	SIMPLEQ_CONCAT(&conf->l3vpns, &nconf->l3vpns);
3186 	/* XXX WHERE IS THE SYNC ??? */
3187 
3188 	free_config(nconf);
3189 	nconf = NULL;
3190 
3191 	/* sync peerself with conf */
3192 	peerself->remote_bgpid = ntohl(conf->bgpid);
3193 	peerself->conf.local_as = conf->as;
3194 	peerself->conf.remote_as = conf->as;
3195 	peerself->conf.remote_addr.aid = AID_INET;
3196 	peerself->conf.remote_addr.v4.s_addr = conf->bgpid;
3197 	peerself->conf.remote_masklen = 32;
3198 	peerself->short_as = conf->short_as;
3199 
3200 	/* check if roa changed */
3201 	if (trie_equal(&conf->rde_roa.th, &roa_old.th) == 0) {
3202 		log_debug("roa change: reloading Adj-RIB-In");
3203 		conf->rde_roa.dirty = 1;
3204 		conf->rde_roa.lastchange = getmonotime();
3205 		reload++;	/* run softreconf in */
3206 	}
3207 
3208 	trie_free(&roa_old.th);	/* old roa no longer needed */
3209 
3210 	rde_mark_prefixsets_dirty(&prefixsets_old, &conf->rde_prefixsets);
3211 	rde_mark_prefixsets_dirty(&originsets_old, &conf->rde_originsets);
3212 	as_sets_mark_dirty(&as_sets_old, &conf->as_sets);
3213 
3214 	/*
3215 	 * make the new filter rules the active one but keep the old for
3216 	 * softrconfig. This is needed so that changes happening are using
3217 	 * the right filters.
3218 	 */
3219 	fh = out_rules;
3220 	out_rules = out_rules_tmp;
3221 	out_rules_tmp = fh;
3222 
3223 	rde_filter_calc_skip_steps(out_rules);
3224 
3225 	/* check if filter changed */
3226 	LIST_FOREACH(peer, &peerlist, peer_l) {
3227 		if (peer->conf.id == 0)
3228 			continue;
3229 		peer->reconf_out = 0;
3230 		peer->reconf_rib = 0;
3231 		if (peer->loc_rib_id != rib_find(peer->conf.rib)) {
3232 			log_peer_info(&peer->conf, "rib change, reloading");
3233 			peer->loc_rib_id = rib_find(peer->conf.rib);
3234 			if (peer->loc_rib_id == RIB_NOTFOUND)
3235 				fatalx("King Bula's peer met an unknown RIB");
3236 			peer->reconf_rib = 1;
3237 			softreconfig++;
3238 			if (prefix_dump_new(peer, AID_UNSPEC,
3239 			    RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
3240 			    rde_softreconfig_in_done, NULL) == -1)
3241 				fatal("%s: prefix_dump_new", __func__);
3242 			log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
3243 			softreconfig++;	/* account for the running flush */
3244 			continue;
3245 		}
3246 		if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
3247 			char *p = log_fmt_peer(&peer->conf);
3248 			log_debug("out filter change: reloading peer %s", p);
3249 			free(p);
3250 			peer->reconf_out = 1;
3251 		}
3252 	}
3253 	/* bring ribs in sync */
3254 	for (rid = 0; rid < rib_size; rid++) {
3255 		struct rib *rib = rib_byid(rid);
3256 		if (rib == NULL)
3257 			continue;
3258 		rde_filter_calc_skip_steps(rib->in_rules_tmp);
3259 
3260 		/* flip rules, make new active */
3261 		fh = rib->in_rules;
3262 		rib->in_rules = rib->in_rules_tmp;
3263 		rib->in_rules_tmp = fh;
3264 
3265 		switch (rib->state) {
3266 		case RECONF_DELETE:
3267 			rib_free(rib);
3268 			break;
3269 		case RECONF_RELOAD:
3270 			rib_update(rib);
3271 			rib->state = RECONF_KEEP;
3272 			/* FALLTHROUGH */
3273 		case RECONF_KEEP:
3274 			if (rde_filter_equal(rib->in_rules,
3275 			    rib->in_rules_tmp, NULL))
3276 				/* rib is in sync */
3277 				break;
3278 			log_debug("in filter change: reloading RIB %s",
3279 			    rib->name);
3280 			rib->state = RECONF_RELOAD;
3281 			reload++;
3282 			break;
3283 		case RECONF_REINIT:
3284 			/* new rib */
3285 			rib->state = RECONF_RELOAD;
3286 			reload++;
3287 			break;
3288 		case RECONF_NONE:
3289 			break;
3290 		}
3291 		filterlist_free(rib->in_rules_tmp);
3292 		rib->in_rules_tmp = NULL;
3293 	}
3294 
3295 	filterlist_free(out_rules_tmp);
3296 	out_rules_tmp = NULL;
3297 	/* old filters removed, free all sets */
3298 	free_rde_prefixsets(&prefixsets_old);
3299 	free_rde_prefixsets(&originsets_old);
3300 	as_sets_free(&as_sets_old);
3301 
3302 	log_info("RDE reconfigured");
3303 
3304 	if (reload > 0) {
3305 		softreconfig++;
3306 		if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3307 		    rib_byid(RIB_ADJ_IN), rde_softreconfig_in,
3308 		    rde_softreconfig_in_done, NULL) == -1)
3309 			fatal("%s: rib_dump_new", __func__);
3310 		log_info("running softreconfig in");
3311 	} else {
3312 		rde_softreconfig_in_done(NULL, AID_UNSPEC);
3313 	}
3314 }
3315 
3316 static void
3317 rde_softreconfig_in_done(void *arg, u_int8_t dummy)
3318 {
3319 	struct rde_peer	*peer;
3320 	u_int16_t	 i;
3321 
3322 	if (arg != NULL) {
3323 		softreconfig--;
3324 		/* one guy done but other dumps are still running */
3325 		if (softreconfig > 0)
3326 			return;
3327 
3328 		log_info("softreconfig in done");
3329 	}
3330 
3331 	/* now do the Adj-RIB-Out sync and a possible FIB sync */
3332 	softreconfig = 0;
3333 	for (i = 0; i < rib_size; i++) {
3334 		struct rib *rib = rib_byid(i);
3335 		if (rib == NULL)
3336 			continue;
3337 		rib->state = RECONF_NONE;
3338 		if (rib->fibstate == RECONF_RELOAD) {
3339 			if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3340 			    rib, rde_softreconfig_sync_fib,
3341 			    rde_softreconfig_sync_done, NULL) == -1)
3342 				fatal("%s: rib_dump_new", __func__);
3343 			softreconfig++;
3344 			log_info("starting fib sync for rib %s",
3345 			    rib->name);
3346 		} else if (rib->fibstate == RECONF_REINIT) {
3347 			if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3348 			    rib, rde_softreconfig_sync_reeval,
3349 			    rde_softreconfig_sync_done, NULL) == -1)
3350 				fatal("%s: rib_dump_new", __func__);
3351 			softreconfig++;
3352 			log_info("starting re-evaluation of rib %s",
3353 			    rib->name);
3354 		}
3355 	}
3356 
3357 	LIST_FOREACH(peer, &peerlist, peer_l) {
3358 		u_int8_t aid;
3359 
3360 		if (peer->reconf_out) {
3361 			if (peer->conf.export_type == EXPORT_NONE) {
3362 				/* nothing to do here */
3363 				peer->reconf_out = 0;
3364 			} else if (peer->conf.export_type ==
3365 			    EXPORT_DEFAULT_ROUTE) {
3366 				/* just resend the default route */
3367 				for (aid = 0; aid < AID_MAX; aid++) {
3368 					if (peer->capa.mp[aid])
3369 						up_generate_default(out_rules,
3370 						    peer, aid);
3371 				}
3372 				peer->reconf_out = 0;
3373 			} else
3374 				rib_byid(peer->loc_rib_id)->state =
3375 				    RECONF_RELOAD;
3376 		} else if (peer->reconf_rib) {
3377 			/* dump the full table to neighbors that changed rib */
3378 			for (aid = 0; aid < AID_MAX; aid++) {
3379 				if (peer->capa.mp[aid])
3380 					peer_dump(peer, aid);
3381 			}
3382 		}
3383 	}
3384 
3385 	for (i = 0; i < rib_size; i++) {
3386 		struct rib *rib = rib_byid(i);
3387 		if (rib == NULL)
3388 			continue;
3389 		if (rib->state == RECONF_RELOAD) {
3390 			if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3391 			    rib, rde_softreconfig_out,
3392 			    rde_softreconfig_out_done, NULL) == -1)
3393 				fatal("%s: rib_dump_new", __func__);
3394 			softreconfig++;
3395 			log_info("starting softreconfig out for rib %s",
3396 			    rib->name);
3397 		}
3398 	}
3399 
3400 	/* if nothing to do move to last stage */
3401 	if (softreconfig == 0)
3402 		rde_softreconfig_done();
3403 }
3404 
3405 static void
3406 rde_softreconfig_out_done(void *arg, u_int8_t aid)
3407 {
3408 	struct rib	*rib = arg;
3409 
3410 	/* this RIB dump is done */
3411 	log_info("softreconfig out done for %s", rib->name);
3412 
3413 	/* check if other dumps are still running */
3414 	if (--softreconfig == 0)
3415 		rde_softreconfig_done();
3416 }
3417 
3418 static void
3419 rde_softreconfig_done(void)
3420 {
3421 	u_int16_t	i;
3422 
3423 	for (i = 0; i < rib_size; i++) {
3424 		struct rib *rib = rib_byid(i);
3425 		if (rib == NULL)
3426 			continue;
3427 		rib->state = RECONF_NONE;
3428 	}
3429 
3430 	log_info("RDE soft reconfiguration done");
3431 	imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3432 	    -1, NULL, 0);
3433 }
3434 
3435 static void
3436 rde_softreconfig_in(struct rib_entry *re, void *bula)
3437 {
3438 	struct filterstate	 state;
3439 	struct rib		*rib;
3440 	struct prefix		*p;
3441 	struct pt_entry		*pt;
3442 	struct rde_peer		*peer;
3443 	struct rde_aspath	*asp;
3444 	enum filter_actions	 action;
3445 	struct bgpd_addr	 prefix;
3446 	int			 force_eval;
3447 	u_int8_t		 vstate;
3448 	u_int16_t		 i;
3449 
3450 	pt = re->prefix;
3451 	pt_getaddr(pt, &prefix);
3452 	LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
3453 		asp = prefix_aspath(p);
3454 		peer = prefix_peer(p);
3455 		force_eval = 0;
3456 
3457 		if (conf->rde_roa.dirty) {
3458 			/* ROA validation state update */
3459 			vstate = rde_roa_validity(&conf->rde_roa,
3460 			    &prefix, pt->prefixlen, aspath_origin(asp->aspath));
3461 			if (vstate != p->validation_state) {
3462 				force_eval = 1;
3463 				p->validation_state = vstate;
3464 			}
3465 		}
3466 
3467 		/* skip announced networks, they are never filtered */
3468 		if (asp->flags & F_PREFIX_ANNOUNCED)
3469 			continue;
3470 
3471 		for (i = RIB_LOC_START; i < rib_size; i++) {
3472 			rib = rib_byid(i);
3473 			if (rib == NULL)
3474 				continue;
3475 
3476 			if (rib->state != RECONF_RELOAD && !force_eval)
3477 				continue;
3478 
3479 			rde_filterstate_prep(&state, asp, prefix_communities(p),
3480 			    prefix_nexthop(p), prefix_nhflags(p));
3481 			action = rde_filter(rib->in_rules, peer, peer, &prefix,
3482 			    pt->prefixlen, p->validation_state, &state);
3483 
3484 			if (action == ACTION_ALLOW) {
3485 				/* update Local-RIB */
3486 				prefix_update(rib, peer, &state, &prefix,
3487 				    pt->prefixlen, p->validation_state);
3488 			} else if (action == ACTION_DENY) {
3489 				/* remove from Local-RIB */
3490 				prefix_withdraw(rib, peer, &prefix,
3491 				    pt->prefixlen);
3492 			}
3493 
3494 			rde_filterstate_clean(&state);
3495 		}
3496 	}
3497 }
3498 
3499 static void
3500 rde_softreconfig_out(struct rib_entry *re, void *bula)
3501 {
3502 	struct prefix		*p = re->active;
3503 	struct rde_peer		*peer;
3504 
3505 	if (p == NULL)
3506 		/* no valid path for prefix */
3507 		return;
3508 
3509 	LIST_FOREACH(peer, &peerlist, peer_l) {
3510 		if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
3511 			/* Regenerate all updates. */
3512 			up_generate_updates(out_rules, peer, p, p);
3513 	}
3514 }
3515 
3516 static void
3517 rde_softreconfig_sync_reeval(struct rib_entry *re, void *arg)
3518 {
3519 	struct prefix_list	prefixes;
3520 	struct prefix		*p, *next;
3521 	struct rib		*rib = arg;
3522 
3523 	if (rib->flags & F_RIB_NOEVALUATE) {
3524 		/*
3525 		 * evaluation process is turned off
3526 		 * so remove all prefixes from adj-rib-out
3527 		 * also unlink nexthop if it was linked
3528 		 */
3529 		LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
3530 			if (p->flags & PREFIX_NEXTHOP_LINKED)
3531 				nexthop_unlink(p);
3532 		}
3533 		if (re->active) {
3534 			rde_generate_updates(rib, NULL, re->active);
3535 			re->active = NULL;
3536 		}
3537 		return;
3538 	}
3539 
3540 	/* evaluation process is turned on, so evaluate all prefixes again */
3541 	re->active = NULL;
3542 	prefixes = re->prefix_h;
3543 	LIST_INIT(&re->prefix_h);
3544 
3545 	LIST_FOREACH_SAFE(p, &prefixes, entry.list.rib, next) {
3546 		/* need to re-link the nexthop if not already linked */
3547 		if ((p->flags & PREFIX_NEXTHOP_LINKED) == 0)
3548 			nexthop_link(p);
3549 		prefix_evaluate(re, p, p);
3550 	}
3551 }
3552 
3553 static void
3554 rde_softreconfig_sync_fib(struct rib_entry *re, void *bula)
3555 {
3556 	if (re->active)
3557 		rde_send_kroute(re_rib(re), re->active, NULL);
3558 }
3559 
3560 static void
3561 rde_softreconfig_sync_done(void *arg, u_int8_t aid)
3562 {
3563 	struct rib *rib = arg;
3564 
3565 	/* this RIB dump is done */
3566 	if (rib->fibstate == RECONF_RELOAD)
3567 		log_info("fib sync done for %s", rib->name);
3568 	else
3569 		log_info("re-evaluation done for %s", rib->name);
3570 	rib->fibstate = RECONF_NONE;
3571 
3572 	/* check if other dumps are still running */
3573 	if (--softreconfig == 0)
3574 		rde_softreconfig_done();
3575 }
3576 
3577 /*
3578  * generic helper function
3579  */
3580 u_int32_t
3581 rde_local_as(void)
3582 {
3583 	return (conf->as);
3584 }
3585 
3586 int
3587 rde_decisionflags(void)
3588 {
3589 	return (conf->flags & BGPD_FLAG_DECISION_MASK);
3590 }
3591 
3592 int
3593 rde_as4byte(struct rde_peer *peer)
3594 {
3595 	return (peer->capa.as4byte);
3596 }
3597 
3598 static int
3599 rde_no_as_set(struct rde_peer *peer)
3600 {
3601 	return (peer->conf.flags & PEERFLAG_NO_AS_SET);
3602 }
3603 
3604 /* End-of-RIB marker, RFC 4724 */
3605 static void
3606 rde_peer_recv_eor(struct rde_peer *peer, u_int8_t aid)
3607 {
3608 	peer->prefix_rcvd_eor++;
3609 
3610 	/*
3611 	 * First notify SE to avert a possible race with the restart timeout.
3612 	 * If the timeout fires before this imsg is processed by the SE it will
3613 	 * result in the same operation since the timeout issues a FLUSH which
3614 	 * does the same as the RESTARTED action (flushing stale routes).
3615 	 * The logic in the SE is so that only one of FLUSH or RESTARTED will
3616 	 * be sent back to the RDE and so peer_flush is only called once.
3617 	 */
3618 	if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id,
3619 	    0, -1, &aid, sizeof(aid)) == -1)
3620 		fatal("imsg_compose error while receiving EoR");
3621 
3622 	log_peer_info(&peer->conf, "received %s EOR marker",
3623 	    aid2str(aid));
3624 }
3625 
3626 static void
3627 rde_peer_send_eor(struct rde_peer *peer, u_int8_t aid)
3628 {
3629 	u_int16_t	afi;
3630 	u_int8_t	safi;
3631 
3632 	peer->prefix_sent_eor++;
3633 
3634 	if (aid == AID_INET) {
3635 		u_char null[4];
3636 
3637 		bzero(&null, 4);
3638 		if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3639 		    0, -1, &null, 4) == -1)
3640 			fatal("imsg_compose error while sending EoR");
3641 	} else {
3642 		u_int16_t	i;
3643 		u_char		buf[10];
3644 
3645 		if (aid2afi(aid, &afi, &safi) == -1)
3646 			fatalx("peer_send_eor: bad AID");
3647 
3648 		i = 0;	/* v4 withdrawn len */
3649 		bcopy(&i, &buf[0], sizeof(i));
3650 		i = htons(6);	/* path attr len */
3651 		bcopy(&i, &buf[2], sizeof(i));
3652 		buf[4] = ATTR_OPTIONAL;
3653 		buf[5] = ATTR_MP_UNREACH_NLRI;
3654 		buf[6] = 3;	/* withdrawn len */
3655 		i = htons(afi);
3656 		bcopy(&i, &buf[7], sizeof(i));
3657 		buf[9] = safi;
3658 
3659 		if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3660 		    0, -1, &buf, 10) == -1)
3661 			fatal("%s %d imsg_compose error in peer_send_eor",
3662 			    __func__, __LINE__);
3663 	}
3664 
3665 	log_peer_info(&peer->conf, "sending %s EOR marker",
3666 	    aid2str(aid));
3667 }
3668 
3669 /*
3670  * network announcement stuff
3671  */
3672 void
3673 network_add(struct network_config *nc, struct filterstate *state)
3674 {
3675 	struct l3vpn		*vpn;
3676 	struct filter_set_head	*vpnset = NULL;
3677 	struct in_addr		 prefix4;
3678 	struct in6_addr		 prefix6;
3679 	u_int8_t		 vstate;
3680 	u_int16_t		 i;
3681 
3682 	if (nc->rd != 0) {
3683 		SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) {
3684 			if (vpn->rd != nc->rd)
3685 				continue;
3686 			switch (nc->prefix.aid) {
3687 			case AID_INET:
3688 				prefix4 = nc->prefix.v4;
3689 				memset(&nc->prefix, 0, sizeof(nc->prefix));
3690 				nc->prefix.aid = AID_VPN_IPv4;
3691 				nc->prefix.rd = vpn->rd;
3692 				nc->prefix.v4 = prefix4;
3693 				nc->prefix.labellen = 3;
3694 				nc->prefix.labelstack[0] =
3695 				    (vpn->label >> 12) & 0xff;
3696 				nc->prefix.labelstack[1] =
3697 				    (vpn->label >> 4) & 0xff;
3698 				nc->prefix.labelstack[2] =
3699 				    (vpn->label << 4) & 0xf0;
3700 				nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3701 				vpnset = &vpn->export;
3702 				break;
3703 			case AID_INET6:
3704 				prefix6 = nc->prefix.v6;
3705 				memset(&nc->prefix, 0, sizeof(nc->prefix));
3706 				nc->prefix.aid = AID_VPN_IPv6;
3707 				nc->prefix.rd = vpn->rd;
3708 				nc->prefix.v6 = prefix6;
3709 				nc->prefix.labellen = 3;
3710 				nc->prefix.labelstack[0] =
3711 				    (vpn->label >> 12) & 0xff;
3712 				nc->prefix.labelstack[1] =
3713 				    (vpn->label >> 4) & 0xff;
3714 				nc->prefix.labelstack[2] =
3715 				    (vpn->label << 4) & 0xf0;
3716 				nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3717 				vpnset = &vpn->export;
3718 				break;
3719 			default:
3720 				log_warnx("unable to VPNize prefix");
3721 				filterset_free(&nc->attrset);
3722 				return;
3723 			}
3724 			break;
3725 		}
3726 		if (vpn == NULL) {
3727 			log_warnx("network_add: "
3728 			    "prefix %s/%u in non-existing l3vpn %s",
3729 			    log_addr(&nc->prefix), nc->prefixlen,
3730 			    log_rd(nc->rd));
3731 			return;
3732 		}
3733 	}
3734 
3735 	rde_apply_set(&nc->attrset, peerself, peerself, state, nc->prefix.aid);
3736 	if (vpnset)
3737 		rde_apply_set(vpnset, peerself, peerself, state,
3738 		    nc->prefix.aid);
3739 
3740 	vstate = rde_roa_validity(&conf->rde_roa, &nc->prefix,
3741 	    nc->prefixlen, aspath_origin(state->aspath.aspath));
3742 	if (prefix_update(rib_byid(RIB_ADJ_IN), peerself, state, &nc->prefix,
3743 	    nc->prefixlen, vstate) == 1)
3744 		peerself->prefix_cnt++;
3745 	for (i = RIB_LOC_START; i < rib_size; i++) {
3746 		struct rib *rib = rib_byid(i);
3747 		if (rib == NULL)
3748 			continue;
3749 		rde_update_log("announce", i, peerself,
3750 		    state->nexthop ? &state->nexthop->exit_nexthop : NULL,
3751 		    &nc->prefix, nc->prefixlen);
3752 		prefix_update(rib, peerself, state, &nc->prefix,
3753 		    nc->prefixlen, vstate);
3754 	}
3755 	filterset_free(&nc->attrset);
3756 }
3757 
3758 void
3759 network_delete(struct network_config *nc)
3760 {
3761 	struct l3vpn	*vpn;
3762 	struct in_addr	 prefix4;
3763 	struct in6_addr	 prefix6;
3764 	u_int32_t	 i;
3765 
3766 	if (nc->rd) {
3767 		SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) {
3768 			if (vpn->rd != nc->rd)
3769 				continue;
3770 			switch (nc->prefix.aid) {
3771 			case AID_INET:
3772 				prefix4 = nc->prefix.v4;
3773 				memset(&nc->prefix, 0, sizeof(nc->prefix));
3774 				nc->prefix.aid = AID_VPN_IPv4;
3775 				nc->prefix.rd = vpn->rd;
3776 				nc->prefix.v4 = prefix4;
3777 				nc->prefix.labellen = 3;
3778 				nc->prefix.labelstack[0] =
3779 				    (vpn->label >> 12) & 0xff;
3780 				nc->prefix.labelstack[1] =
3781 				    (vpn->label >> 4) & 0xff;
3782 				nc->prefix.labelstack[2] =
3783 				    (vpn->label << 4) & 0xf0;
3784 				nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3785 				break;
3786 			case AID_INET6:
3787 				prefix6 = nc->prefix.v6;
3788 				memset(&nc->prefix, 0, sizeof(nc->prefix));
3789 				nc->prefix.aid = AID_VPN_IPv6;
3790 				nc->prefix.rd = vpn->rd;
3791 				nc->prefix.v6 = prefix6;
3792 				nc->prefix.labellen = 3;
3793 				nc->prefix.labelstack[0] =
3794 				    (vpn->label >> 12) & 0xff;
3795 				nc->prefix.labelstack[1] =
3796 				    (vpn->label >> 4) & 0xff;
3797 				nc->prefix.labelstack[2] =
3798 				    (vpn->label << 4) & 0xf0;
3799 				nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3800 				break;
3801 			default:
3802 				log_warnx("unable to VPNize prefix");
3803 				return;
3804 			}
3805 		}
3806 	}
3807 
3808 	for (i = RIB_LOC_START; i < rib_size; i++) {
3809 		struct rib *rib = rib_byid(i);
3810 		if (rib == NULL)
3811 			continue;
3812 		if (prefix_withdraw(rib, peerself, &nc->prefix,
3813 		    nc->prefixlen))
3814 			rde_update_log("withdraw announce", i, peerself,
3815 			    NULL, &nc->prefix, nc->prefixlen);
3816 	}
3817 	if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peerself, &nc->prefix,
3818 	    nc->prefixlen))
3819 		peerself->prefix_cnt--;
3820 }
3821 
3822 static void
3823 network_dump_upcall(struct rib_entry *re, void *ptr)
3824 {
3825 	struct prefix		*p;
3826 	struct rde_aspath	*asp;
3827 	struct kroute_full	 k;
3828 	struct bgpd_addr	 addr;
3829 	struct rde_dump_ctx	*ctx = ptr;
3830 
3831 	LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
3832 		asp = prefix_aspath(p);
3833 		if (!(asp->flags & F_PREFIX_ANNOUNCED))
3834 			continue;
3835 		pt_getaddr(p->pt, &addr);
3836 
3837 		bzero(&k, sizeof(k));
3838 		memcpy(&k.prefix, &addr, sizeof(k.prefix));
3839 		if (prefix_nexthop(p) == NULL ||
3840 		    prefix_nexthop(p)->state != NEXTHOP_REACH)
3841 			k.nexthop.aid = k.prefix.aid;
3842 		else
3843 			memcpy(&k.nexthop, &prefix_nexthop(p)->true_nexthop,
3844 			    sizeof(k.nexthop));
3845 		k.prefixlen = p->pt->prefixlen;
3846 		k.flags = F_KERNEL;
3847 		if ((asp->flags & F_ANN_DYNAMIC) == 0)
3848 			k.flags = F_STATIC;
3849 		if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0,
3850 		    ctx->req.pid, -1, &k, sizeof(k)) == -1)
3851 			log_warnx("network_dump_upcall: "
3852 			    "imsg_compose error");
3853 	}
3854 }
3855 
3856 static void
3857 network_flush_upcall(struct rib_entry *re, void *ptr)
3858 {
3859 	struct rde_peer *peer = ptr;
3860 	struct bgpd_addr addr;
3861 	struct prefix *p;
3862 	u_int32_t i;
3863 	u_int8_t prefixlen;
3864 
3865 	p = prefix_bypeer(re, peer);
3866 	if (p == NULL)
3867 		return;
3868 	if ((prefix_aspath(p)->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC)
3869 		return;
3870 
3871 	pt_getaddr(re->prefix, &addr);
3872 	prefixlen = re->prefix->prefixlen;
3873 
3874 	for (i = RIB_LOC_START; i < rib_size; i++) {
3875 		struct rib *rib = rib_byid(i);
3876 		if (rib == NULL)
3877 			continue;
3878 		if (prefix_withdraw(rib, peer, &addr, prefixlen) == 1)
3879 			rde_update_log("flush announce", i, peer,
3880 			    NULL, &addr, prefixlen);
3881 	}
3882 
3883 	if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peer, &addr,
3884 	    prefixlen) == 1)
3885 		peer->prefix_cnt--;
3886 }
3887 
3888 /* clean up */
3889 void
3890 rde_shutdown(void)
3891 {
3892 	/*
3893 	 * the decision process is turned off if rde_quit = 1 and
3894 	 * rde_shutdown depends on this.
3895 	 */
3896 
3897 	/* First all peers go down */
3898 	peer_foreach(peer_down, NULL);
3899 
3900 	/* free filters */
3901 	filterlist_free(out_rules);
3902 	filterlist_free(out_rules_tmp);
3903 
3904 	/* kill the VPN configs */
3905 	free_l3vpns(&conf->l3vpns);
3906 
3907 	/* now check everything */
3908 	rib_shutdown();
3909 	nexthop_shutdown();
3910 	path_shutdown();
3911 	aspath_shutdown();
3912 	attr_shutdown();
3913 	pt_shutdown();
3914 	peer_shutdown();
3915 }
3916 
3917 struct rde_prefixset *
3918 rde_find_prefixset(char *name, struct rde_prefixset_head *p)
3919 {
3920 	struct rde_prefixset *ps;
3921 
3922 	SIMPLEQ_FOREACH(ps, p, entry) {
3923 		if (!strcmp(ps->name, name))
3924 			return (ps);
3925 	}
3926 	return (NULL);
3927 }
3928 
3929 void
3930 rde_mark_prefixsets_dirty(struct rde_prefixset_head *psold,
3931     struct rde_prefixset_head *psnew)
3932 {
3933 	struct rde_prefixset *new, *old;
3934 
3935 	SIMPLEQ_FOREACH(new, psnew, entry) {
3936 		if ((psold == NULL) ||
3937 		    (old = rde_find_prefixset(new->name, psold)) == NULL) {
3938 			new->dirty = 1;
3939 			new->lastchange = getmonotime();
3940 		} else {
3941 			if (trie_equal(&new->th, &old->th) == 0) {
3942 				new->dirty = 1;
3943 				new->lastchange = getmonotime();
3944 			} else
3945 				new->lastchange = old->lastchange;
3946 		}
3947 	}
3948 }
3949 
3950 u_int8_t
3951 rde_roa_validity(struct rde_prefixset *ps, struct bgpd_addr *prefix,
3952     u_int8_t plen, u_int32_t as)
3953 {
3954 	int r;
3955 
3956 	r = trie_roa_check(&ps->th, prefix, plen, as);
3957 	return (r & ROA_MASK);
3958 }
3959 
3960 int
3961 ovs_match(struct prefix *p, u_int32_t flag)
3962 {
3963 	if (flag & (F_CTL_OVS_VALID|F_CTL_OVS_INVALID|F_CTL_OVS_NOTFOUND)) {
3964 		switch (prefix_vstate(p)) {
3965 		case ROA_VALID:
3966 			if (!(flag & F_CTL_OVS_VALID))
3967 				return 0;
3968 			break;
3969 		case ROA_INVALID:
3970 			if (!(flag & F_CTL_OVS_INVALID))
3971 				return 0;
3972 			break;
3973 		case ROA_NOTFOUND:
3974 			if (!(flag & F_CTL_OVS_NOTFOUND))
3975 				return 0;
3976 			break;
3977 		default:
3978 			break;
3979 		}
3980 	}
3981 
3982 	return 1;
3983 }
3984