1 /* $OpenBSD: rde.c,v 1.517 2021/04/16 06:20:29 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5 * Copyright (c) 2016 Job Snijders <job@instituut.net>
6 * Copyright (c) 2016 Peter Hessler <phessler@openbsd.org>
7 * Copyright (c) 2018 Sebastian Benoit <benno@openbsd.org>
8 *
9 * Permission to use, copy, modify, and distribute this software for any
10 * purpose with or without fee is hereby granted, provided that the above
11 * copyright notice and this permission notice appear in all copies.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 */
21
22 #include <sys/types.h>
23 #include <sys/time.h>
24 #include <sys/resource.h>
25
26 #include <errno.h>
27 #include <pwd.h>
28 #include <poll.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <syslog.h>
34 #include <unistd.h>
35
36 #include "bgpd.h"
37 #include "rde.h"
38 #include "session.h"
39 #include "log.h"
40
41 #define PFD_PIPE_MAIN 0
42 #define PFD_PIPE_SESSION 1
43 #define PFD_PIPE_SESSION_CTL 2
44 #define PFD_PIPE_ROA 3
45 #define PFD_PIPE_COUNT 4
46
47 void rde_sighdlr(int);
48 void rde_dispatch_imsg_session(struct imsgbuf *);
49 void rde_dispatch_imsg_parent(struct imsgbuf *);
50 void rde_dispatch_imsg_rtr(struct imsgbuf *);
51 void rde_dispatch_imsg_peer(struct rde_peer *, void *);
52 void rde_update_dispatch(struct rde_peer *, struct imsg *);
53 int rde_update_update(struct rde_peer *, struct filterstate *,
54 struct bgpd_addr *, u_int8_t);
55 void rde_update_withdraw(struct rde_peer *, struct bgpd_addr *,
56 u_int8_t);
57 int rde_attr_parse(u_char *, u_int16_t, struct rde_peer *,
58 struct filterstate *, struct mpattr *);
59 int rde_attr_add(struct filterstate *, u_char *, u_int16_t);
60 u_int8_t rde_attr_missing(struct rde_aspath *, int, u_int16_t);
61 int rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t,
62 struct filterstate *);
63 void rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *);
64 void rde_reflector(struct rde_peer *, struct rde_aspath *);
65
66 void rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t,
67 enum imsg_type);
68 void rde_dump_ctx_throttle(pid_t, int);
69 void rde_dump_ctx_terminate(pid_t);
70 void rde_dump_mrt_new(struct mrt *, pid_t, int);
71
72 int rde_l3vpn_import(struct rde_community *, struct l3vpn *);
73 static void rde_commit_pftable(void);
74 void rde_reload_done(void);
75 static void rde_softreconfig_in_done(void *, u_int8_t);
76 static void rde_softreconfig_out_done(void *, u_int8_t);
77 static void rde_softreconfig_done(void);
78 static void rde_softreconfig_out(struct rib_entry *, void *);
79 static void rde_softreconfig_in(struct rib_entry *, void *);
80 static void rde_softreconfig_sync_reeval(struct rib_entry *, void *);
81 static void rde_softreconfig_sync_fib(struct rib_entry *, void *);
82 static void rde_softreconfig_sync_done(void *, u_int8_t);
83 static void rde_roa_reload(void);
84 static int rde_no_as_set(struct rde_peer *);
85 int rde_update_queue_pending(void);
86 void rde_update_queue_runner(void);
87 void rde_update6_queue_runner(u_int8_t);
88 struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head *);
89 void rde_mark_prefixsets_dirty(struct rde_prefixset_head *,
90 struct rde_prefixset_head *);
91 u_int8_t rde_roa_validity(struct rde_prefixset *,
92 struct bgpd_addr *, u_int8_t, u_int32_t);
93
94 static void rde_peer_recv_eor(struct rde_peer *, u_int8_t);
95 static void rde_peer_send_eor(struct rde_peer *, u_int8_t);
96
97 void network_add(struct network_config *, struct filterstate *);
98 void network_delete(struct network_config *);
99 static void network_dump_upcall(struct rib_entry *, void *);
100 static void network_flush_upcall(struct rib_entry *, void *);
101
102 void rde_shutdown(void);
103 int ovs_match(struct prefix *, u_int32_t);
104
105 static struct imsgbuf *ibuf_se;
106 static struct imsgbuf *ibuf_se_ctl;
107 static struct imsgbuf *ibuf_rtr;
108 static struct imsgbuf *ibuf_main;
109 static struct bgpd_config *conf, *nconf;
110 static struct rde_prefixset rde_roa, roa_new;
111
112 volatile sig_atomic_t rde_quit = 0;
113 struct filter_head *out_rules, *out_rules_tmp;
114 struct rde_memstats rdemem;
115 int softreconfig;
116
117 extern struct rde_peer_head peerlist;
118 extern struct rde_peer *peerself;
119
120 struct rde_dump_ctx {
121 LIST_ENTRY(rde_dump_ctx) entry;
122 struct ctl_show_rib_request req;
123 u_int32_t peerid;
124 u_int8_t throttled;
125 };
126
127 LIST_HEAD(, rde_dump_ctx) rde_dump_h = LIST_HEAD_INITIALIZER(rde_dump_h);
128
129 struct rde_mrt_ctx {
130 LIST_ENTRY(rde_mrt_ctx) entry;
131 struct mrt mrt;
132 };
133
134 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts);
135 u_int rde_mrt_cnt;
136
137 void
rde_sighdlr(int sig)138 rde_sighdlr(int sig)
139 {
140 switch (sig) {
141 case SIGINT:
142 case SIGTERM:
143 rde_quit = 1;
144 break;
145 }
146 }
147
148 u_int32_t peerhashsize = 1024;
149 u_int32_t pathhashsize = 128 * 1024;
150 u_int32_t attrhashsize = 16 * 1024;
151 u_int32_t nexthophashsize = 1024;
152
153 void
rde_main(int debug,int verbose)154 rde_main(int debug, int verbose)
155 {
156 struct passwd *pw;
157 struct pollfd *pfd = NULL;
158 struct rde_mrt_ctx *mctx, *xmctx;
159 void *newp;
160 u_int pfd_elms = 0, i, j;
161 int timeout;
162 u_int8_t aid;
163
164 log_init(debug, LOG_DAEMON);
165 log_setverbose(verbose);
166
167 log_procinit(log_procnames[PROC_RDE]);
168
169 if ((pw = getpwnam(BGPD_USER)) == NULL)
170 fatal("getpwnam");
171
172 if (chroot(pw->pw_dir) == -1)
173 fatal("chroot");
174 if (chdir("/") == -1)
175 fatal("chdir(\"/\")");
176
177 setproctitle("route decision engine");
178
179 if (setgroups(1, &pw->pw_gid) ||
180 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
181 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
182 fatal("can't drop privileges");
183
184 if (pledge("stdio recvfd", NULL) == -1)
185 fatal("pledge");
186
187 signal(SIGTERM, rde_sighdlr);
188 signal(SIGINT, rde_sighdlr);
189 signal(SIGPIPE, SIG_IGN);
190 signal(SIGHUP, SIG_IGN);
191 signal(SIGALRM, SIG_IGN);
192 signal(SIGUSR1, SIG_IGN);
193
194 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
195 fatal(NULL);
196 imsg_init(ibuf_main, 3);
197
198 /* initialize the RIB structures */
199 pt_init();
200 path_init(pathhashsize);
201 aspath_init(pathhashsize);
202 communities_init(attrhashsize);
203 attr_init(attrhashsize);
204 nexthop_init(nexthophashsize);
205 peer_init(peerhashsize);
206
207 /* make sure the default RIBs are setup */
208 rib_new("Adj-RIB-In", 0, F_RIB_NOFIB | F_RIB_NOEVALUATE);
209
210 out_rules = calloc(1, sizeof(struct filter_head));
211 if (out_rules == NULL)
212 fatal(NULL);
213 TAILQ_INIT(out_rules);
214
215 conf = new_config();
216 log_info("route decision engine ready");
217
218 while (rde_quit == 0) {
219 if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) {
220 if ((newp = reallocarray(pfd,
221 PFD_PIPE_COUNT + rde_mrt_cnt,
222 sizeof(struct pollfd))) == NULL) {
223 /* panic for now */
224 log_warn("could not resize pfd from %u -> %u"
225 " entries", pfd_elms, PFD_PIPE_COUNT +
226 rde_mrt_cnt);
227 fatalx("exiting");
228 }
229 pfd = newp;
230 pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt;
231 }
232 timeout = -1;
233 bzero(pfd, sizeof(struct pollfd) * pfd_elms);
234
235 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
236 set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
237 set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl);
238 set_pollfd(&pfd[PFD_PIPE_ROA], ibuf_rtr);
239
240 i = PFD_PIPE_COUNT;
241 for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) {
242 xmctx = LIST_NEXT(mctx, entry);
243
244 if (i >= pfd_elms)
245 fatalx("poll pfd too small");
246 if (mctx->mrt.wbuf.queued) {
247 pfd[i].fd = mctx->mrt.wbuf.fd;
248 pfd[i].events = POLLOUT;
249 i++;
250 } else if (mctx->mrt.state == MRT_STATE_REMOVE) {
251 close(mctx->mrt.wbuf.fd);
252 LIST_REMOVE(mctx, entry);
253 free(mctx);
254 rde_mrt_cnt--;
255 }
256 }
257
258 if (rib_dump_pending() || rde_update_queue_pending() ||
259 nexthop_pending() || peer_imsg_pending())
260 timeout = 0;
261
262 if (poll(pfd, i, timeout) == -1) {
263 if (errno != EINTR)
264 fatal("poll error");
265 continue;
266 }
267
268 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1)
269 fatalx("Lost connection to parent");
270 else
271 rde_dispatch_imsg_parent(ibuf_main);
272
273 if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
274 log_warnx("RDE: Lost connection to SE");
275 msgbuf_clear(&ibuf_se->w);
276 free(ibuf_se);
277 ibuf_se = NULL;
278 } else
279 rde_dispatch_imsg_session(ibuf_se);
280
281 if (handle_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl) ==
282 -1) {
283 log_warnx("RDE: Lost connection to SE control");
284 msgbuf_clear(&ibuf_se_ctl->w);
285 free(ibuf_se_ctl);
286 ibuf_se_ctl = NULL;
287 } else
288 rde_dispatch_imsg_session(ibuf_se_ctl);
289
290 if (handle_pollfd(&pfd[PFD_PIPE_ROA], ibuf_rtr) == -1) {
291 log_warnx("RDE: Lost connection to ROA");
292 msgbuf_clear(&ibuf_rtr->w);
293 free(ibuf_rtr);
294 ibuf_rtr = NULL;
295 } else
296 rde_dispatch_imsg_rtr(ibuf_rtr);
297
298 for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts);
299 j < i && mctx != 0; j++) {
300 if (pfd[j].fd == mctx->mrt.wbuf.fd &&
301 pfd[j].revents & POLLOUT)
302 mrt_write(&mctx->mrt);
303 mctx = LIST_NEXT(mctx, entry);
304 }
305
306 peer_foreach(rde_dispatch_imsg_peer, NULL);
307 rib_dump_runner();
308 nexthop_runner();
309 if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) {
310 rde_update_queue_runner();
311 for (aid = AID_INET6; aid < AID_MAX; aid++)
312 rde_update6_queue_runner(aid);
313 }
314 /* commit pftable once per poll loop */
315 rde_commit_pftable();
316 }
317
318 /* do not clean up on shutdown on production, it takes ages. */
319 if (debug)
320 rde_shutdown();
321
322 free_config(conf);
323 free(pfd);
324
325 /* close pipes */
326 if (ibuf_se) {
327 msgbuf_clear(&ibuf_se->w);
328 close(ibuf_se->fd);
329 free(ibuf_se);
330 }
331 if (ibuf_se_ctl) {
332 msgbuf_clear(&ibuf_se_ctl->w);
333 close(ibuf_se_ctl->fd);
334 free(ibuf_se_ctl);
335 }
336 msgbuf_clear(&ibuf_main->w);
337 close(ibuf_main->fd);
338 free(ibuf_main);
339
340 while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) {
341 msgbuf_clear(&mctx->mrt.wbuf);
342 close(mctx->mrt.wbuf.fd);
343 LIST_REMOVE(mctx, entry);
344 free(mctx);
345 }
346
347 log_info("route decision engine exiting");
348 exit(0);
349 }
350
351 struct network_config netconf_s, netconf_p;
352 struct filterstate netconf_state;
353 struct filter_set_head session_set = TAILQ_HEAD_INITIALIZER(session_set);
354 struct filter_set_head parent_set = TAILQ_HEAD_INITIALIZER(parent_set);
355
356 void
rde_dispatch_imsg_session(struct imsgbuf * ibuf)357 rde_dispatch_imsg_session(struct imsgbuf *ibuf)
358 {
359 struct imsg imsg;
360 struct peer p;
361 struct peer_config pconf;
362 struct ctl_show_set cset;
363 struct ctl_show_rib csr;
364 struct ctl_show_rib_request req;
365 struct rde_peer *peer;
366 struct rde_aspath *asp;
367 struct rde_hashstats rdehash;
368 struct filter_set *s;
369 struct as_set *aset;
370 struct rde_prefixset *pset;
371 u_int8_t *asdata;
372 ssize_t n;
373 size_t aslen;
374 int verbose;
375 u_int16_t len;
376
377 while (ibuf) {
378 if ((n = imsg_get(ibuf, &imsg)) == -1)
379 fatal("rde_dispatch_imsg_session: imsg_get error");
380 if (n == 0)
381 break;
382
383 switch (imsg.hdr.type) {
384 case IMSG_UPDATE:
385 case IMSG_SESSION_UP:
386 case IMSG_SESSION_DOWN:
387 case IMSG_SESSION_STALE:
388 case IMSG_SESSION_FLUSH:
389 case IMSG_SESSION_RESTARTED:
390 case IMSG_REFRESH:
391 if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
392 log_warnx("rde_dispatch: unknown peer id %d",
393 imsg.hdr.peerid);
394 break;
395 }
396 peer_imsg_push(peer, &imsg);
397 break;
398 case IMSG_SESSION_ADD:
399 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf))
400 fatalx("incorrect size of session request");
401 memcpy(&pconf, imsg.data, sizeof(pconf));
402 peer_add(imsg.hdr.peerid, &pconf);
403 break;
404 case IMSG_NETWORK_ADD:
405 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
406 sizeof(struct network_config)) {
407 log_warnx("rde_dispatch: wrong imsg len");
408 break;
409 }
410 memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
411 TAILQ_INIT(&netconf_s.attrset);
412 rde_filterstate_prep(&netconf_state, NULL, NULL, NULL,
413 0);
414 asp = &netconf_state.aspath;
415 asp->aspath = aspath_get(NULL, 0);
416 asp->origin = ORIGIN_IGP;
417 asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH |
418 F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED |
419 F_ANN_DYNAMIC;
420 break;
421 case IMSG_NETWORK_ASPATH:
422 if (imsg.hdr.len - IMSG_HEADER_SIZE <
423 sizeof(csr)) {
424 log_warnx("rde_dispatch: wrong imsg len");
425 bzero(&netconf_s, sizeof(netconf_s));
426 break;
427 }
428 aslen = imsg.hdr.len - IMSG_HEADER_SIZE - sizeof(csr);
429 asdata = imsg.data;
430 asdata += sizeof(struct ctl_show_rib);
431 memcpy(&csr, imsg.data, sizeof(csr));
432 asp = &netconf_state.aspath;
433 asp->lpref = csr.local_pref;
434 asp->med = csr.med;
435 asp->weight = csr.weight;
436 asp->flags = csr.flags;
437 asp->origin = csr.origin;
438 asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC;
439 aspath_put(asp->aspath);
440 asp->aspath = aspath_get(asdata, aslen);
441 break;
442 case IMSG_NETWORK_ATTR:
443 if (imsg.hdr.len <= IMSG_HEADER_SIZE) {
444 log_warnx("rde_dispatch: wrong imsg len");
445 break;
446 }
447 /* parse optional path attributes */
448 len = imsg.hdr.len - IMSG_HEADER_SIZE;
449 if (rde_attr_add(&netconf_state, imsg.data,
450 len) == -1) {
451 log_warnx("rde_dispatch: bad network "
452 "attribute");
453 rde_filterstate_clean(&netconf_state);
454 bzero(&netconf_s, sizeof(netconf_s));
455 break;
456 }
457 break;
458 case IMSG_NETWORK_DONE:
459 if (imsg.hdr.len != IMSG_HEADER_SIZE) {
460 log_warnx("rde_dispatch: wrong imsg len");
461 break;
462 }
463 TAILQ_CONCAT(&netconf_s.attrset, &session_set, entry);
464 switch (netconf_s.prefix.aid) {
465 case AID_INET:
466 if (netconf_s.prefixlen > 32)
467 goto badnet;
468 network_add(&netconf_s, &netconf_state);
469 break;
470 case AID_INET6:
471 if (netconf_s.prefixlen > 128)
472 goto badnet;
473 network_add(&netconf_s, &netconf_state);
474 break;
475 case 0:
476 /* something failed beforehands */
477 break;
478 default:
479 badnet:
480 log_warnx("request to insert invalid network");
481 break;
482 }
483 rde_filterstate_clean(&netconf_state);
484 break;
485 case IMSG_NETWORK_REMOVE:
486 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
487 sizeof(struct network_config)) {
488 log_warnx("rde_dispatch: wrong imsg len");
489 break;
490 }
491 memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
492 TAILQ_INIT(&netconf_s.attrset);
493
494 switch (netconf_s.prefix.aid) {
495 case AID_INET:
496 if (netconf_s.prefixlen > 32)
497 goto badnetdel;
498 network_delete(&netconf_s);
499 break;
500 case AID_INET6:
501 if (netconf_s.prefixlen > 128)
502 goto badnetdel;
503 network_delete(&netconf_s);
504 break;
505 default:
506 badnetdel:
507 log_warnx("request to remove invalid network");
508 break;
509 }
510 break;
511 case IMSG_NETWORK_FLUSH:
512 if (imsg.hdr.len != IMSG_HEADER_SIZE) {
513 log_warnx("rde_dispatch: wrong imsg len");
514 break;
515 }
516 if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC,
517 RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
518 NULL, NULL) == -1)
519 log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
520 break;
521 case IMSG_FILTER_SET:
522 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
523 sizeof(struct filter_set)) {
524 log_warnx("rde_dispatch: wrong imsg len");
525 break;
526 }
527 if ((s = malloc(sizeof(struct filter_set))) == NULL)
528 fatal(NULL);
529 memcpy(s, imsg.data, sizeof(struct filter_set));
530 if (s->type == ACTION_SET_NEXTHOP) {
531 s->action.nh_ref =
532 nexthop_get(&s->action.nexthop);
533 s->type = ACTION_SET_NEXTHOP_REF;
534 }
535 TAILQ_INSERT_TAIL(&session_set, s, entry);
536 break;
537 case IMSG_CTL_SHOW_NETWORK:
538 case IMSG_CTL_SHOW_RIB:
539 case IMSG_CTL_SHOW_RIB_PREFIX:
540 if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) {
541 log_warnx("rde_dispatch: wrong imsg len");
542 break;
543 }
544 memcpy(&req, imsg.data, sizeof(req));
545 rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type);
546 break;
547 case IMSG_CTL_SHOW_NEIGHBOR:
548 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
549 sizeof(struct peer)) {
550 log_warnx("rde_dispatch: wrong imsg len");
551 break;
552 }
553 memcpy(&p, imsg.data, sizeof(struct peer));
554 peer = peer_get(p.conf.id);
555 if (peer != NULL) {
556 p.stats.prefix_cnt = peer->prefix_cnt;
557 p.stats.prefix_out_cnt = peer->prefix_out_cnt;
558 p.stats.prefix_rcvd_update =
559 peer->prefix_rcvd_update;
560 p.stats.prefix_rcvd_withdraw =
561 peer->prefix_rcvd_withdraw;
562 p.stats.prefix_rcvd_eor =
563 peer->prefix_rcvd_eor;
564 p.stats.prefix_sent_update =
565 peer->prefix_sent_update;
566 p.stats.prefix_sent_withdraw =
567 peer->prefix_sent_withdraw;
568 p.stats.prefix_sent_eor =
569 peer->prefix_sent_eor;
570 }
571 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
572 imsg.hdr.pid, -1, &p, sizeof(struct peer));
573 break;
574 case IMSG_CTL_SHOW_RIB_MEM:
575 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0,
576 imsg.hdr.pid, -1, &rdemem, sizeof(rdemem));
577 path_hash_stats(&rdehash);
578 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
579 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
580 aspath_hash_stats(&rdehash);
581 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
582 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
583 communities_hash_stats(&rdehash);
584 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
585 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
586 attr_hash_stats(&rdehash);
587 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
588 imsg.hdr.pid, -1, &rdehash, sizeof(rdehash));
589 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
590 -1, NULL, 0);
591 break;
592 case IMSG_CTL_SHOW_SET:
593 /* first roa set */
594 pset = &rde_roa;
595 memset(&cset, 0, sizeof(cset));
596 cset.type = ROA_SET;
597 strlcpy(cset.name, "RPKI ROA", sizeof(cset.name));
598 cset.lastchange = pset->lastchange;
599 cset.v4_cnt = pset->th.v4_cnt;
600 cset.v6_cnt = pset->th.v6_cnt;
601 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
602 imsg.hdr.pid, -1, &cset, sizeof(cset));
603
604 SIMPLEQ_FOREACH(aset, &conf->as_sets, entry) {
605 memset(&cset, 0, sizeof(cset));
606 cset.type = ASNUM_SET;
607 strlcpy(cset.name, aset->name,
608 sizeof(cset.name));
609 cset.lastchange = aset->lastchange;
610 cset.as_cnt = set_nmemb(aset->set);
611 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
612 imsg.hdr.pid, -1, &cset, sizeof(cset));
613 }
614 SIMPLEQ_FOREACH(pset, &conf->rde_prefixsets, entry) {
615 memset(&cset, 0, sizeof(cset));
616 cset.type = PREFIX_SET;
617 strlcpy(cset.name, pset->name,
618 sizeof(cset.name));
619 cset.lastchange = pset->lastchange;
620 cset.v4_cnt = pset->th.v4_cnt;
621 cset.v6_cnt = pset->th.v6_cnt;
622 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
623 imsg.hdr.pid, -1, &cset, sizeof(cset));
624 }
625 SIMPLEQ_FOREACH(pset, &conf->rde_originsets, entry) {
626 memset(&cset, 0, sizeof(cset));
627 cset.type = ORIGIN_SET;
628 strlcpy(cset.name, pset->name,
629 sizeof(cset.name));
630 cset.lastchange = pset->lastchange;
631 cset.v4_cnt = pset->th.v4_cnt;
632 cset.v6_cnt = pset->th.v6_cnt;
633 imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_SET, 0,
634 imsg.hdr.pid, -1, &cset, sizeof(cset));
635 }
636 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
637 -1, NULL, 0);
638 break;
639 case IMSG_CTL_LOG_VERBOSE:
640 /* already checked by SE */
641 memcpy(&verbose, imsg.data, sizeof(verbose));
642 log_setverbose(verbose);
643 break;
644 case IMSG_CTL_END:
645 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
646 -1, NULL, 0);
647 break;
648 case IMSG_CTL_TERMINATE:
649 rde_dump_ctx_terminate(imsg.hdr.pid);
650 break;
651 case IMSG_XON:
652 if (imsg.hdr.peerid) {
653 peer = peer_get(imsg.hdr.peerid);
654 if (peer)
655 peer->throttled = 0;
656 } else {
657 rde_dump_ctx_throttle(imsg.hdr.pid, 0);
658 }
659 break;
660 case IMSG_XOFF:
661 if (imsg.hdr.peerid) {
662 peer = peer_get(imsg.hdr.peerid);
663 if (peer)
664 peer->throttled = 1;
665 } else {
666 rde_dump_ctx_throttle(imsg.hdr.pid, 1);
667 }
668 break;
669 default:
670 break;
671 }
672 imsg_free(&imsg);
673 }
674 }
675
676 void
rde_dispatch_imsg_parent(struct imsgbuf * ibuf)677 rde_dispatch_imsg_parent(struct imsgbuf *ibuf)
678 {
679 static struct rde_prefixset *last_prefixset;
680 static struct as_set *last_as_set;
681 static struct l3vpn *vpn;
682 struct imsg imsg;
683 struct mrt xmrt;
684 struct roa roa;
685 struct rde_rib rr;
686 struct filterstate state;
687 struct imsgbuf *i;
688 struct filter_head *nr;
689 struct filter_rule *r;
690 struct filter_set *s;
691 struct rib *rib;
692 struct rde_prefixset *ps;
693 struct rde_aspath *asp;
694 struct prefixset_item psi;
695 char *name;
696 size_t nmemb;
697 int n, fd, rv;
698 u_int16_t rid;
699
700 while (ibuf) {
701 if ((n = imsg_get(ibuf, &imsg)) == -1)
702 fatal("rde_dispatch_imsg_parent: imsg_get error");
703 if (n == 0)
704 break;
705
706 switch (imsg.hdr.type) {
707 case IMSG_SOCKET_CONN:
708 case IMSG_SOCKET_CONN_CTL:
709 case IMSG_SOCKET_CONN_RTR:
710 if ((fd = imsg.fd) == -1) {
711 log_warnx("expected to receive imsg fd "
712 "but didn't receive any");
713 break;
714 }
715 if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
716 fatal(NULL);
717 imsg_init(i, fd);
718 switch (imsg.hdr.type) {
719 case IMSG_SOCKET_CONN:
720 if (ibuf_se) {
721 log_warnx("Unexpected imsg connection "
722 "to SE received");
723 msgbuf_clear(&ibuf_se->w);
724 free(ibuf_se);
725 }
726 ibuf_se = i;
727 break;
728 case IMSG_SOCKET_CONN_CTL:
729 if (ibuf_se_ctl) {
730 log_warnx("Unexpected imsg ctl "
731 "connection to SE received");
732 msgbuf_clear(&ibuf_se_ctl->w);
733 free(ibuf_se_ctl);
734 }
735 ibuf_se_ctl = i;
736 break;
737 case IMSG_SOCKET_CONN_RTR:
738 if (ibuf_rtr) {
739 log_warnx("Unexpected imsg ctl "
740 "connection to ROA received");
741 msgbuf_clear(&ibuf_rtr->w);
742 free(ibuf_rtr);
743 }
744 ibuf_rtr = i;
745 break;
746 }
747 break;
748 case IMSG_NETWORK_ADD:
749 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
750 sizeof(struct network_config)) {
751 log_warnx("rde_dispatch: wrong imsg len");
752 break;
753 }
754 memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
755 TAILQ_INIT(&netconf_p.attrset);
756 break;
757 case IMSG_NETWORK_DONE:
758 TAILQ_CONCAT(&netconf_p.attrset, &parent_set, entry);
759
760 rde_filterstate_prep(&state, NULL, NULL, NULL, 0);
761 asp = &state.aspath;
762 asp->aspath = aspath_get(NULL, 0);
763 asp->origin = ORIGIN_IGP;
764 asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH |
765 F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED;
766
767 network_add(&netconf_p, &state);
768 rde_filterstate_clean(&state);
769 break;
770 case IMSG_NETWORK_REMOVE:
771 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
772 sizeof(struct network_config)) {
773 log_warnx("rde_dispatch: wrong imsg len");
774 break;
775 }
776 memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
777 TAILQ_INIT(&netconf_p.attrset);
778 network_delete(&netconf_p);
779 break;
780 case IMSG_RECONF_CONF:
781 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
782 sizeof(struct bgpd_config))
783 fatalx("IMSG_RECONF_CONF bad len");
784 out_rules_tmp = calloc(1, sizeof(struct filter_head));
785 if (out_rules_tmp == NULL)
786 fatal(NULL);
787 TAILQ_INIT(out_rules_tmp);
788 nconf = new_config();
789 copy_config(nconf, imsg.data);
790
791 for (rid = 0; rid < rib_size; rid++) {
792 if ((rib = rib_byid(rid)) == NULL)
793 continue;
794 rib->state = RECONF_DELETE;
795 rib->fibstate = RECONF_NONE;
796 }
797 break;
798 case IMSG_RECONF_RIB:
799 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
800 sizeof(struct rde_rib))
801 fatalx("IMSG_RECONF_RIB bad len");
802 memcpy(&rr, imsg.data, sizeof(rr));
803 rib = rib_byid(rib_find(rr.name));
804 if (rib == NULL) {
805 rib = rib_new(rr.name, rr.rtableid, rr.flags);
806 } else if (rib->flags == rr.flags &&
807 rib->rtableid == rr.rtableid) {
808 /* no change to rib apart from filters */
809 rib->state = RECONF_KEEP;
810 } else {
811 /* reload rib because somehing changed */
812 rib->flags_tmp = rr.flags;
813 rib->rtableid_tmp = rr.rtableid;
814 rib->state = RECONF_RELOAD;
815 }
816 break;
817 case IMSG_RECONF_FILTER:
818 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
819 sizeof(struct filter_rule))
820 fatalx("IMSG_RECONF_FILTER bad len");
821 if ((r = malloc(sizeof(struct filter_rule))) == NULL)
822 fatal(NULL);
823 memcpy(r, imsg.data, sizeof(struct filter_rule));
824 if (r->match.prefixset.name[0] != '\0') {
825 r->match.prefixset.ps =
826 rde_find_prefixset(r->match.prefixset.name,
827 &nconf->rde_prefixsets);
828 if (r->match.prefixset.ps == NULL)
829 log_warnx("%s: no prefixset for %s",
830 __func__, r->match.prefixset.name);
831 }
832 if (r->match.originset.name[0] != '\0') {
833 r->match.originset.ps =
834 rde_find_prefixset(r->match.originset.name,
835 &nconf->rde_originsets);
836 if (r->match.originset.ps == NULL)
837 log_warnx("%s: no origin-set for %s",
838 __func__, r->match.originset.name);
839 }
840 if (r->match.as.flags & AS_FLAG_AS_SET_NAME) {
841 struct as_set * aset;
842
843 aset = as_sets_lookup(&nconf->as_sets,
844 r->match.as.name);
845 if (aset == NULL) {
846 log_warnx("%s: no as-set for %s",
847 __func__, r->match.as.name);
848 } else {
849 r->match.as.flags = AS_FLAG_AS_SET;
850 r->match.as.aset = aset;
851 }
852 }
853 TAILQ_INIT(&r->set);
854 TAILQ_CONCAT(&r->set, &parent_set, entry);
855 if ((rib = rib_byid(rib_find(r->rib))) == NULL) {
856 log_warnx("IMSG_RECONF_FILTER: filter rule "
857 "for nonexistent rib %s", r->rib);
858 free(r);
859 break;
860 }
861 r->peer.ribid = rib->id;
862 if (r->dir == DIR_IN) {
863 nr = rib->in_rules_tmp;
864 if (nr == NULL) {
865 nr = calloc(1,
866 sizeof(struct filter_head));
867 if (nr == NULL)
868 fatal(NULL);
869 TAILQ_INIT(nr);
870 rib->in_rules_tmp = nr;
871 }
872 TAILQ_INSERT_TAIL(nr, r, entry);
873 } else
874 TAILQ_INSERT_TAIL(out_rules_tmp, r, entry);
875 break;
876 case IMSG_RECONF_PREFIX_SET:
877 case IMSG_RECONF_ORIGIN_SET:
878 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
879 sizeof(ps->name))
880 fatalx("IMSG_RECONF_PREFIX_SET bad len");
881 ps = calloc(1, sizeof(struct rde_prefixset));
882 if (ps == NULL)
883 fatal(NULL);
884 memcpy(ps->name, imsg.data, sizeof(ps->name));
885 if (imsg.hdr.type == IMSG_RECONF_ORIGIN_SET) {
886 SIMPLEQ_INSERT_TAIL(&nconf->rde_originsets, ps,
887 entry);
888 } else {
889 SIMPLEQ_INSERT_TAIL(&nconf->rde_prefixsets, ps,
890 entry);
891 }
892 last_prefixset = ps;
893 break;
894 case IMSG_RECONF_ROA_ITEM:
895 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(roa))
896 fatalx("IMSG_RECONF_ROA_ITEM bad len");
897 memcpy(&roa, imsg.data, sizeof(roa));
898 rv = trie_roa_add(&last_prefixset->th, &roa);
899 break;
900 case IMSG_RECONF_PREFIX_SET_ITEM:
901 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(psi))
902 fatalx("IMSG_RECONF_PREFIX_SET_ITEM bad len");
903 memcpy(&psi, imsg.data, sizeof(psi));
904 if (last_prefixset == NULL)
905 fatalx("King Bula has no prefixset");
906 rv = trie_add(&last_prefixset->th,
907 &psi.p.addr, psi.p.len,
908 psi.p.len_min, psi.p.len_max);
909 if (rv == -1)
910 log_warnx("trie_add(%s) %s/%u failed",
911 last_prefixset->name, log_addr(&psi.p.addr),
912 psi.p.len);
913 break;
914 case IMSG_RECONF_AS_SET:
915 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
916 sizeof(nmemb) + SET_NAME_LEN)
917 fatalx("IMSG_RECONF_AS_SET bad len");
918 memcpy(&nmemb, imsg.data, sizeof(nmemb));
919 name = (char *)imsg.data + sizeof(nmemb);
920 if (as_sets_lookup(&nconf->as_sets, name) != NULL)
921 fatalx("duplicate as-set %s", name);
922 last_as_set = as_sets_new(&nconf->as_sets, name, nmemb,
923 sizeof(u_int32_t));
924 break;
925 case IMSG_RECONF_AS_SET_ITEMS:
926 nmemb = imsg.hdr.len - IMSG_HEADER_SIZE;
927 nmemb /= sizeof(u_int32_t);
928 if (set_add(last_as_set->set, imsg.data, nmemb) != 0)
929 fatal(NULL);
930 break;
931 case IMSG_RECONF_AS_SET_DONE:
932 set_prep(last_as_set->set);
933 last_as_set = NULL;
934 break;
935 case IMSG_RECONF_VPN:
936 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
937 sizeof(struct l3vpn))
938 fatalx("IMSG_RECONF_VPN bad len");
939 if ((vpn = malloc(sizeof(struct l3vpn))) == NULL)
940 fatal(NULL);
941 memcpy(vpn, imsg.data, sizeof(struct l3vpn));
942 TAILQ_INIT(&vpn->import);
943 TAILQ_INIT(&vpn->export);
944 TAILQ_INIT(&vpn->net_l);
945 SIMPLEQ_INSERT_TAIL(&nconf->l3vpns, vpn, entry);
946 break;
947 case IMSG_RECONF_VPN_EXPORT:
948 if (vpn == NULL) {
949 log_warnx("rde_dispatch_imsg_parent: "
950 "IMSG_RECONF_VPN_EXPORT unexpected");
951 break;
952 }
953 TAILQ_CONCAT(&vpn->export, &parent_set, entry);
954 break;
955 case IMSG_RECONF_VPN_IMPORT:
956 if (vpn == NULL) {
957 log_warnx("rde_dispatch_imsg_parent: "
958 "IMSG_RECONF_VPN_IMPORT unexpected");
959 break;
960 }
961 TAILQ_CONCAT(&vpn->import, &parent_set, entry);
962 break;
963 case IMSG_RECONF_VPN_DONE:
964 break;
965 case IMSG_RECONF_DRAIN:
966 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
967 -1, NULL, 0);
968 break;
969 case IMSG_RECONF_DONE:
970 if (nconf == NULL)
971 fatalx("got IMSG_RECONF_DONE but no config");
972 last_prefixset = NULL;
973
974 rde_reload_done();
975 break;
976 case IMSG_NEXTHOP_UPDATE:
977 nexthop_update(imsg.data);
978 break;
979 case IMSG_FILTER_SET:
980 if (imsg.hdr.len > IMSG_HEADER_SIZE +
981 sizeof(struct filter_set))
982 fatalx("IMSG_FILTER_SET bad len");
983 if ((s = malloc(sizeof(struct filter_set))) == NULL)
984 fatal(NULL);
985 memcpy(s, imsg.data, sizeof(struct filter_set));
986 if (s->type == ACTION_SET_NEXTHOP) {
987 s->action.nh_ref =
988 nexthop_get(&s->action.nexthop);
989 s->type = ACTION_SET_NEXTHOP_REF;
990 }
991 TAILQ_INSERT_TAIL(&parent_set, s, entry);
992 break;
993 case IMSG_MRT_OPEN:
994 case IMSG_MRT_REOPEN:
995 if (imsg.hdr.len > IMSG_HEADER_SIZE +
996 sizeof(struct mrt)) {
997 log_warnx("wrong imsg len");
998 break;
999 }
1000 memcpy(&xmrt, imsg.data, sizeof(xmrt));
1001 if ((fd = imsg.fd) == -1)
1002 log_warnx("expected to receive fd for mrt dump "
1003 "but didn't receive any");
1004 else if (xmrt.type == MRT_TABLE_DUMP ||
1005 xmrt.type == MRT_TABLE_DUMP_MP ||
1006 xmrt.type == MRT_TABLE_DUMP_V2) {
1007 rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd);
1008 } else
1009 close(fd);
1010 break;
1011 case IMSG_MRT_CLOSE:
1012 /* ignore end message because a dump is atomic */
1013 break;
1014 default:
1015 fatalx("unhandled IMSG %u", imsg.hdr.type);
1016 }
1017 imsg_free(&imsg);
1018 }
1019 }
1020
1021 void
rde_dispatch_imsg_rtr(struct imsgbuf * ibuf)1022 rde_dispatch_imsg_rtr(struct imsgbuf *ibuf)
1023 {
1024 struct imsg imsg;
1025 struct roa roa;
1026 int n;
1027
1028 while (ibuf) {
1029 if ((n = imsg_get(ibuf, &imsg)) == -1)
1030 fatal("rde_dispatch_imsg_parent: imsg_get error");
1031 if (n == 0)
1032 break;
1033
1034 switch (imsg.hdr.type) {
1035 case IMSG_RECONF_ROA_SET:
1036 /* start of update */
1037 break;
1038 case IMSG_RECONF_ROA_ITEM:
1039 if (imsg.hdr.len - IMSG_HEADER_SIZE !=
1040 sizeof(roa))
1041 fatalx("IMSG_RECONF_ROA_ITEM bad len");
1042 memcpy(&roa, imsg.data, sizeof(roa));
1043 if (trie_roa_add(&roa_new.th, &roa) != 0) {
1044 struct bgpd_addr p = {
1045 .aid = roa.aid,
1046 .v6 = roa.prefix.inet6
1047 };
1048 log_warnx("trie_roa_add %s/%u failed",
1049 log_addr(&p), roa.prefixlen);
1050 }
1051 break;
1052 case IMSG_RECONF_DONE:
1053 /* end of update */
1054 rde_roa_reload();
1055 break;
1056 }
1057 imsg_free(&imsg);
1058 }
1059 }
1060
1061 void
rde_dispatch_imsg_peer(struct rde_peer * peer,void * bula)1062 rde_dispatch_imsg_peer(struct rde_peer *peer, void *bula)
1063 {
1064 struct session_up sup;
1065 struct imsg imsg;
1066 u_int8_t aid;
1067
1068 if (!peer_imsg_pop(peer, &imsg))
1069 return;
1070
1071 switch (imsg.hdr.type) {
1072 case IMSG_UPDATE:
1073 rde_update_dispatch(peer, &imsg);
1074 break;
1075 case IMSG_SESSION_UP:
1076 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup))
1077 fatalx("incorrect size of session request");
1078 memcpy(&sup, imsg.data, sizeof(sup));
1079 if (peer_up(peer, &sup) == -1) {
1080 peer->state = PEER_DOWN;
1081 imsg_compose(ibuf_se, IMSG_SESSION_DOWN, peer->conf.id,
1082 0, -1, NULL, 0);
1083 }
1084 break;
1085 case IMSG_SESSION_DOWN:
1086 peer_down(peer, NULL);
1087 break;
1088 case IMSG_SESSION_STALE:
1089 case IMSG_SESSION_FLUSH:
1090 case IMSG_SESSION_RESTARTED:
1091 case IMSG_REFRESH:
1092 if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
1093 log_warnx("%s: wrong imsg len", __func__);
1094 break;
1095 }
1096 memcpy(&aid, imsg.data, sizeof(aid));
1097 if (aid >= AID_MAX) {
1098 log_warnx("%s: bad AID", __func__);
1099 break;
1100 }
1101
1102 switch (imsg.hdr.type) {
1103 case IMSG_SESSION_STALE:
1104 peer_stale(peer, aid);
1105 break;
1106 case IMSG_SESSION_FLUSH:
1107 peer_flush(peer, aid, peer->staletime[aid]);
1108 break;
1109 case IMSG_SESSION_RESTARTED:
1110 if (peer->staletime[aid])
1111 peer_flush(peer, aid, peer->staletime[aid]);
1112 break;
1113 case IMSG_REFRESH:
1114 peer_dump(peer, aid);
1115 break;
1116 }
1117 break;
1118 default:
1119 log_warnx("%s: unhandled imsg type %d", __func__,
1120 imsg.hdr.type);
1121 break;
1122 }
1123
1124 imsg_free(&imsg);
1125 }
1126
1127 /* handle routing updates from the session engine. */
1128 void
rde_update_dispatch(struct rde_peer * peer,struct imsg * imsg)1129 rde_update_dispatch(struct rde_peer *peer, struct imsg *imsg)
1130 {
1131 struct filterstate state;
1132 struct bgpd_addr prefix;
1133 struct mpattr mpa;
1134 u_char *p, *mpp = NULL;
1135 int pos = 0;
1136 u_int16_t afi, len, mplen;
1137 u_int16_t withdrawn_len;
1138 u_int16_t attrpath_len;
1139 u_int16_t nlri_len;
1140 u_int8_t aid, prefixlen, safi, subtype;
1141 u_int32_t fas;
1142
1143 p = imsg->data;
1144
1145 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) {
1146 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
1147 return;
1148 }
1149
1150 memcpy(&len, p, 2);
1151 withdrawn_len = ntohs(len);
1152 p += 2;
1153 if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) {
1154 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
1155 return;
1156 }
1157
1158 p += withdrawn_len;
1159 memcpy(&len, p, 2);
1160 attrpath_len = len = ntohs(len);
1161 p += 2;
1162 if (imsg->hdr.len <
1163 IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) {
1164 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
1165 return;
1166 }
1167
1168 nlri_len =
1169 imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len;
1170
1171 if (attrpath_len == 0) {
1172 /* 0 = no NLRI information in this message */
1173 if (nlri_len != 0) {
1174 /* crap at end of update which should not be there */
1175 rde_update_err(peer, ERR_UPDATE,
1176 ERR_UPD_ATTRLIST, NULL, 0);
1177 return;
1178 }
1179 if (withdrawn_len == 0) {
1180 /* EoR marker */
1181 rde_peer_recv_eor(peer, AID_INET);
1182 return;
1183 }
1184 }
1185
1186 bzero(&mpa, sizeof(mpa));
1187 rde_filterstate_prep(&state, NULL, NULL, NULL, 0);
1188 if (attrpath_len != 0) { /* 0 = no NLRI information in this message */
1189 /* parse path attributes */
1190 while (len > 0) {
1191 if ((pos = rde_attr_parse(p, len, peer, &state,
1192 &mpa)) < 0)
1193 goto done;
1194 p += pos;
1195 len -= pos;
1196 }
1197
1198 /* check for missing but necessary attributes */
1199 if ((subtype = rde_attr_missing(&state.aspath, peer->conf.ebgp,
1200 nlri_len))) {
1201 rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR,
1202 &subtype, sizeof(u_int8_t));
1203 goto done;
1204 }
1205
1206 rde_as4byte_fixup(peer, &state.aspath);
1207
1208 /* enforce remote AS if requested */
1209 if (state.aspath.flags & F_ATTR_ASPATH &&
1210 peer->conf.enforce_as == ENFORCE_AS_ON) {
1211 fas = aspath_neighbor(state.aspath.aspath);
1212 if (peer->conf.remote_as != fas) {
1213 log_peer_warnx(&peer->conf, "bad path, "
1214 "starting with %s, "
1215 "enforce neighbor-as enabled", log_as(fas));
1216 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1217 NULL, 0);
1218 goto done;
1219 }
1220 }
1221
1222 /* aspath needs to be loop free. This is not a hard error. */
1223 if (state.aspath.flags & F_ATTR_ASPATH &&
1224 peer->conf.ebgp &&
1225 peer->conf.enforce_local_as == ENFORCE_AS_ON &&
1226 !aspath_loopfree(state.aspath.aspath, peer->conf.local_as))
1227 state.aspath.flags |= F_ATTR_LOOP;
1228
1229 rde_reflector(peer, &state.aspath);
1230 }
1231
1232 p = imsg->data;
1233 len = withdrawn_len;
1234 p += 2;
1235
1236 /* withdraw prefix */
1237 while (len > 0) {
1238 if ((pos = nlri_get_prefix(p, len, &prefix,
1239 &prefixlen)) == -1) {
1240 /*
1241 * the RFC does not mention what we should do in
1242 * this case. Let's do the same as in the NLRI case.
1243 */
1244 log_peer_warnx(&peer->conf, "bad withdraw prefix");
1245 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1246 NULL, 0);
1247 goto done;
1248 }
1249 p += pos;
1250 len -= pos;
1251
1252 if (peer->capa.mp[AID_INET] == 0) {
1253 log_peer_warnx(&peer->conf,
1254 "bad withdraw, %s disabled", aid2str(AID_INET));
1255 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1256 NULL, 0);
1257 goto done;
1258 }
1259
1260 rde_update_withdraw(peer, &prefix, prefixlen);
1261 }
1262
1263 /* withdraw MP_UNREACH_NLRI if available */
1264 if (mpa.unreach_len != 0) {
1265 mpp = mpa.unreach;
1266 mplen = mpa.unreach_len;
1267 memcpy(&afi, mpp, 2);
1268 mpp += 2;
1269 mplen -= 2;
1270 afi = ntohs(afi);
1271 safi = *mpp++;
1272 mplen--;
1273
1274 if (afi2aid(afi, safi, &aid) == -1) {
1275 log_peer_warnx(&peer->conf,
1276 "bad AFI/SAFI pair in withdraw");
1277 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1278 NULL, 0);
1279 goto done;
1280 }
1281
1282 if (peer->capa.mp[aid] == 0) {
1283 log_peer_warnx(&peer->conf,
1284 "bad withdraw, %s disabled", aid2str(aid));
1285 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1286 NULL, 0);
1287 goto done;
1288 }
1289
1290 if ((state.aspath.flags & ~F_ATTR_MP_UNREACH) == 0 &&
1291 mplen == 0) {
1292 /* EoR marker */
1293 rde_peer_recv_eor(peer, aid);
1294 }
1295
1296 switch (aid) {
1297 case AID_INET6:
1298 while (mplen > 0) {
1299 if ((pos = nlri_get_prefix6(mpp, mplen,
1300 &prefix, &prefixlen)) == -1) {
1301 log_peer_warnx(&peer->conf,
1302 "bad IPv6 withdraw prefix");
1303 rde_update_err(peer, ERR_UPDATE,
1304 ERR_UPD_OPTATTR,
1305 mpa.unreach, mpa.unreach_len);
1306 goto done;
1307 }
1308 mpp += pos;
1309 mplen -= pos;
1310
1311 rde_update_withdraw(peer, &prefix, prefixlen);
1312 }
1313 break;
1314 case AID_VPN_IPv4:
1315 while (mplen > 0) {
1316 if ((pos = nlri_get_vpn4(mpp, mplen,
1317 &prefix, &prefixlen, 1)) == -1) {
1318 log_peer_warnx(&peer->conf,
1319 "bad VPNv4 withdraw prefix");
1320 rde_update_err(peer, ERR_UPDATE,
1321 ERR_UPD_OPTATTR,
1322 mpa.unreach, mpa.unreach_len);
1323 goto done;
1324 }
1325 mpp += pos;
1326 mplen -= pos;
1327
1328 rde_update_withdraw(peer, &prefix, prefixlen);
1329 }
1330 break;
1331 case AID_VPN_IPv6:
1332 while (mplen > 0) {
1333 if ((pos = nlri_get_vpn6(mpp, mplen,
1334 &prefix, &prefixlen, 1)) == -1) {
1335 log_peer_warnx(&peer->conf,
1336 "bad VPNv6 withdraw prefix");
1337 rde_update_err(peer, ERR_UPDATE,
1338 ERR_UPD_OPTATTR, mpa.unreach,
1339 mpa.unreach_len);
1340 goto done;
1341 }
1342 mpp += pos;
1343 mplen -= pos;
1344
1345 rde_update_withdraw(peer, &prefix, prefixlen);
1346 }
1347 break;
1348 default:
1349 /* silently ignore unsupported multiprotocol AF */
1350 break;
1351 }
1352
1353 if ((state.aspath.flags & ~F_ATTR_MP_UNREACH) == 0)
1354 goto done;
1355 }
1356
1357 /* shift to NLRI information */
1358 p += 2 + attrpath_len;
1359
1360 /* parse nlri prefix */
1361 while (nlri_len > 0) {
1362 if ((pos = nlri_get_prefix(p, nlri_len, &prefix,
1363 &prefixlen)) == -1) {
1364 log_peer_warnx(&peer->conf, "bad nlri prefix");
1365 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1366 NULL, 0);
1367 goto done;
1368 }
1369 p += pos;
1370 nlri_len -= pos;
1371
1372 if (peer->capa.mp[AID_INET] == 0) {
1373 log_peer_warnx(&peer->conf,
1374 "bad update, %s disabled", aid2str(AID_INET));
1375 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1376 NULL, 0);
1377 goto done;
1378 }
1379
1380 if (rde_update_update(peer, &state, &prefix, prefixlen) == -1)
1381 goto done;
1382
1383 }
1384
1385 /* add MP_REACH_NLRI if available */
1386 if (mpa.reach_len != 0) {
1387 mpp = mpa.reach;
1388 mplen = mpa.reach_len;
1389 memcpy(&afi, mpp, 2);
1390 mpp += 2;
1391 mplen -= 2;
1392 afi = ntohs(afi);
1393 safi = *mpp++;
1394 mplen--;
1395
1396 if (afi2aid(afi, safi, &aid) == -1) {
1397 log_peer_warnx(&peer->conf,
1398 "bad AFI/SAFI pair in update");
1399 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1400 NULL, 0);
1401 goto done;
1402 }
1403
1404 if (peer->capa.mp[aid] == 0) {
1405 log_peer_warnx(&peer->conf,
1406 "bad update, %s disabled", aid2str(aid));
1407 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1408 NULL, 0);
1409 goto done;
1410 }
1411
1412 /* unlock the previously locked nexthop, it is no longer used */
1413 nexthop_unref(state.nexthop);
1414 state.nexthop = NULL;
1415 if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, &state)) == -1) {
1416 log_peer_warnx(&peer->conf, "bad nlri nexthop");
1417 rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1418 mpa.reach, mpa.reach_len);
1419 goto done;
1420 }
1421 mpp += pos;
1422 mplen -= pos;
1423
1424 switch (aid) {
1425 case AID_INET6:
1426 while (mplen > 0) {
1427 if ((pos = nlri_get_prefix6(mpp, mplen,
1428 &prefix, &prefixlen)) == -1) {
1429 log_peer_warnx(&peer->conf,
1430 "bad IPv6 nlri prefix");
1431 rde_update_err(peer, ERR_UPDATE,
1432 ERR_UPD_OPTATTR,
1433 mpa.reach, mpa.reach_len);
1434 goto done;
1435 }
1436 mpp += pos;
1437 mplen -= pos;
1438
1439 if (rde_update_update(peer, &state, &prefix,
1440 prefixlen) == -1)
1441 goto done;
1442 }
1443 break;
1444 case AID_VPN_IPv4:
1445 while (mplen > 0) {
1446 if ((pos = nlri_get_vpn4(mpp, mplen,
1447 &prefix, &prefixlen, 0)) == -1) {
1448 log_peer_warnx(&peer->conf,
1449 "bad VPNv4 nlri prefix");
1450 rde_update_err(peer, ERR_UPDATE,
1451 ERR_UPD_OPTATTR,
1452 mpa.reach, mpa.reach_len);
1453 goto done;
1454 }
1455 mpp += pos;
1456 mplen -= pos;
1457
1458 if (rde_update_update(peer, &state, &prefix,
1459 prefixlen) == -1)
1460 goto done;
1461 }
1462 break;
1463 case AID_VPN_IPv6:
1464 while (mplen > 0) {
1465 if ((pos = nlri_get_vpn6(mpp, mplen,
1466 &prefix, &prefixlen, 0)) == -1) {
1467 log_peer_warnx(&peer->conf,
1468 "bad VPNv6 nlri prefix");
1469 rde_update_err(peer, ERR_UPDATE,
1470 ERR_UPD_OPTATTR,
1471 mpa.reach, mpa.reach_len);
1472 goto done;
1473 }
1474 mpp += pos;
1475 mplen -= pos;
1476
1477 if (rde_update_update(peer, &state, &prefix,
1478 prefixlen) == -1)
1479 goto done;
1480 }
1481 break;
1482 default:
1483 /* silently ignore unsupported multiprotocol AF */
1484 break;
1485 }
1486 }
1487
1488 done:
1489 rde_filterstate_clean(&state);
1490 }
1491
1492 int
rde_update_update(struct rde_peer * peer,struct filterstate * in,struct bgpd_addr * prefix,u_int8_t prefixlen)1493 rde_update_update(struct rde_peer *peer, struct filterstate *in,
1494 struct bgpd_addr *prefix, u_int8_t prefixlen)
1495 {
1496 struct filterstate state;
1497 enum filter_actions action;
1498 u_int8_t vstate;
1499 u_int16_t i;
1500 const char *wmsg = "filtered, withdraw";
1501
1502 peer->prefix_rcvd_update++;
1503 vstate = rde_roa_validity(&rde_roa, prefix, prefixlen,
1504 aspath_origin(in->aspath.aspath));
1505
1506 /* add original path to the Adj-RIB-In */
1507 if (prefix_update(rib_byid(RIB_ADJ_IN), peer, in, prefix, prefixlen,
1508 vstate) == 1)
1509 peer->prefix_cnt++;
1510
1511 /* max prefix checker */
1512 if (peer->conf.max_prefix && peer->prefix_cnt > peer->conf.max_prefix) {
1513 log_peer_warnx(&peer->conf, "prefix limit reached (>%u/%u)",
1514 peer->prefix_cnt, peer->conf.max_prefix);
1515 rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX, NULL, 0);
1516 return (-1);
1517 }
1518
1519 if (in->aspath.flags & F_ATTR_PARSE_ERR)
1520 wmsg = "path invalid, withdraw";
1521
1522 for (i = RIB_LOC_START; i < rib_size; i++) {
1523 struct rib *rib = rib_byid(i);
1524 if (rib == NULL)
1525 continue;
1526 rde_filterstate_prep(&state, &in->aspath, &in->communities,
1527 in->nexthop, in->nhflags);
1528 /* input filter */
1529 action = rde_filter(rib->in_rules, peer, peer, prefix,
1530 prefixlen, vstate, &state);
1531
1532 if (action == ACTION_ALLOW) {
1533 rde_update_log("update", i, peer,
1534 &state.nexthop->exit_nexthop, prefix,
1535 prefixlen);
1536 prefix_update(rib, peer, &state, prefix,
1537 prefixlen, vstate);
1538 } else if (prefix_withdraw(rib, peer, prefix,
1539 prefixlen)) {
1540 rde_update_log(wmsg, i, peer,
1541 NULL, prefix, prefixlen);
1542 }
1543
1544 /* clear state */
1545 rde_filterstate_clean(&state);
1546 }
1547 return (0);
1548 }
1549
1550 void
rde_update_withdraw(struct rde_peer * peer,struct bgpd_addr * prefix,u_int8_t prefixlen)1551 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
1552 u_int8_t prefixlen)
1553 {
1554 u_int16_t i;
1555
1556 for (i = RIB_LOC_START; i < rib_size; i++) {
1557 struct rib *rib = rib_byid(i);
1558 if (rib == NULL)
1559 continue;
1560 if (prefix_withdraw(rib, peer, prefix, prefixlen))
1561 rde_update_log("withdraw", i, peer, NULL, prefix,
1562 prefixlen);
1563 }
1564
1565 /* remove original path form the Adj-RIB-In */
1566 if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peer, prefix, prefixlen))
1567 peer->prefix_cnt--;
1568
1569 peer->prefix_rcvd_withdraw++;
1570 }
1571
1572 /*
1573 * BGP UPDATE parser functions
1574 */
1575
1576 /* attribute parser specific makros */
1577 #define UPD_READ(t, p, plen, n) \
1578 do { \
1579 memcpy(t, p, n); \
1580 p += n; \
1581 plen += n; \
1582 } while (0)
1583
1584 #define CHECK_FLAGS(s, t, m) \
1585 (((s) & ~(ATTR_DEFMASK | (m))) == (t))
1586
1587 int
rde_attr_parse(u_char * p,u_int16_t len,struct rde_peer * peer,struct filterstate * state,struct mpattr * mpa)1588 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer,
1589 struct filterstate *state, struct mpattr *mpa)
1590 {
1591 struct bgpd_addr nexthop;
1592 struct rde_aspath *a = &state->aspath;
1593 u_char *op = p, *npath;
1594 u_int32_t tmp32, zero = 0;
1595 int error;
1596 u_int16_t attr_len, nlen;
1597 u_int16_t plen = 0;
1598 u_int8_t flags;
1599 u_int8_t type;
1600 u_int8_t tmp8;
1601
1602 if (len < 3) {
1603 bad_len:
1604 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len);
1605 return (-1);
1606 }
1607
1608 UPD_READ(&flags, p, plen, 1);
1609 UPD_READ(&type, p, plen, 1);
1610
1611 if (flags & ATTR_EXTLEN) {
1612 if (len - plen < 2)
1613 goto bad_len;
1614 UPD_READ(&attr_len, p, plen, 2);
1615 attr_len = ntohs(attr_len);
1616 } else {
1617 UPD_READ(&tmp8, p, plen, 1);
1618 attr_len = tmp8;
1619 }
1620
1621 if (len - plen < attr_len)
1622 goto bad_len;
1623
1624 /* adjust len to the actual attribute size including header */
1625 len = plen + attr_len;
1626
1627 switch (type) {
1628 case ATTR_UNDEF:
1629 /* ignore and drop path attributes with a type code of 0 */
1630 plen += attr_len;
1631 break;
1632 case ATTR_ORIGIN:
1633 if (attr_len != 1)
1634 goto bad_len;
1635
1636 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) {
1637 bad_flags:
1638 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS,
1639 op, len);
1640 return (-1);
1641 }
1642
1643 UPD_READ(&a->origin, p, plen, 1);
1644 if (a->origin > ORIGIN_INCOMPLETE) {
1645 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN,
1646 op, len);
1647 return (-1);
1648 }
1649 if (a->flags & F_ATTR_ORIGIN)
1650 goto bad_list;
1651 a->flags |= F_ATTR_ORIGIN;
1652 break;
1653 case ATTR_ASPATH:
1654 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1655 goto bad_flags;
1656 error = aspath_verify(p, attr_len, rde_as4byte(peer),
1657 rde_no_as_set(peer));
1658 if (error == AS_ERR_SOFT) {
1659 /*
1660 * soft errors like unexpected segment types are
1661 * not considered fatal and the path is just
1662 * marked invalid.
1663 */
1664 a->flags |= F_ATTR_PARSE_ERR;
1665 } else if (error != 0) {
1666 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1667 NULL, 0);
1668 return (-1);
1669 }
1670 if (a->flags & F_ATTR_ASPATH)
1671 goto bad_list;
1672 if (rde_as4byte(peer)) {
1673 npath = p;
1674 nlen = attr_len;
1675 } else {
1676 npath = aspath_inflate(p, attr_len, &nlen);
1677 if (npath == NULL)
1678 fatal("aspath_inflate");
1679 }
1680 if (error == AS_ERR_SOFT) {
1681 char *str;
1682
1683 aspath_asprint(&str, npath, nlen);
1684 log_peer_warnx(&peer->conf, "bad ASPATH %s, "
1685 "path invalidated and prefix withdrawn",
1686 str ? str : "(bad aspath)");
1687 free(str);
1688 }
1689 a->flags |= F_ATTR_ASPATH;
1690 a->aspath = aspath_get(npath, nlen);
1691 if (npath != p)
1692 free(npath);
1693 plen += attr_len;
1694 break;
1695 case ATTR_NEXTHOP:
1696 if (attr_len != 4)
1697 goto bad_len;
1698 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1699 goto bad_flags;
1700 if (a->flags & F_ATTR_NEXTHOP)
1701 goto bad_list;
1702 a->flags |= F_ATTR_NEXTHOP;
1703
1704 bzero(&nexthop, sizeof(nexthop));
1705 nexthop.aid = AID_INET;
1706 UPD_READ(&nexthop.v4.s_addr, p, plen, 4);
1707 /*
1708 * Check if the nexthop is a valid IP address. We consider
1709 * multicast and experimental addresses as invalid.
1710 */
1711 tmp32 = ntohl(nexthop.v4.s_addr);
1712 if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) {
1713 rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1714 op, len);
1715 return (-1);
1716 }
1717 nexthop_unref(state->nexthop); /* just to be sure */
1718 state->nexthop = nexthop_get(&nexthop);
1719 break;
1720 case ATTR_MED:
1721 if (attr_len != 4)
1722 goto bad_len;
1723 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1724 goto bad_flags;
1725 if (a->flags & F_ATTR_MED)
1726 goto bad_list;
1727 a->flags |= F_ATTR_MED;
1728
1729 UPD_READ(&tmp32, p, plen, 4);
1730 a->med = ntohl(tmp32);
1731 break;
1732 case ATTR_LOCALPREF:
1733 if (attr_len != 4)
1734 goto bad_len;
1735 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1736 goto bad_flags;
1737 if (peer->conf.ebgp) {
1738 /* ignore local-pref attr on non ibgp peers */
1739 plen += 4;
1740 break;
1741 }
1742 if (a->flags & F_ATTR_LOCALPREF)
1743 goto bad_list;
1744 a->flags |= F_ATTR_LOCALPREF;
1745
1746 UPD_READ(&tmp32, p, plen, 4);
1747 a->lpref = ntohl(tmp32);
1748 break;
1749 case ATTR_ATOMIC_AGGREGATE:
1750 if (attr_len != 0)
1751 goto bad_len;
1752 if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1753 goto bad_flags;
1754 goto optattr;
1755 case ATTR_AGGREGATOR:
1756 if ((!rde_as4byte(peer) && attr_len != 6) ||
1757 (rde_as4byte(peer) && attr_len != 8)) {
1758 /*
1759 * ignore attribute in case of error as per
1760 * RFC 7606
1761 */
1762 log_peer_warnx(&peer->conf, "bad AGGREGATOR, "
1763 "partial attribute ignored");
1764 plen += attr_len;
1765 break;
1766 }
1767 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1768 ATTR_PARTIAL))
1769 goto bad_flags;
1770 if (!rde_as4byte(peer)) {
1771 /* need to inflate aggregator AS to 4-byte */
1772 u_char t[8];
1773 t[0] = t[1] = 0;
1774 UPD_READ(&t[2], p, plen, 2);
1775 UPD_READ(&t[4], p, plen, 4);
1776 if (memcmp(t, &zero, sizeof(u_int32_t)) == 0) {
1777 /* As per RFC7606 use "attribute discard". */
1778 log_peer_warnx(&peer->conf, "bad AGGREGATOR, "
1779 "AS 0 not allowed, attribute discarded");
1780 break;
1781 }
1782 if (attr_optadd(a, flags, type, t,
1783 sizeof(t)) == -1)
1784 goto bad_list;
1785 break;
1786 }
1787 /* 4-byte ready server take the default route */
1788 if (memcmp(p, &zero, sizeof(u_int32_t)) == 0) {
1789 /* As per RFC7606 use "attribute discard" here. */
1790 char *pfmt = log_fmt_peer(&peer->conf);
1791 log_debug("%s: bad AGGREGATOR, "
1792 "AS 0 not allowed, attribute discarded", pfmt);
1793 free(pfmt);
1794 plen += attr_len;
1795 break;
1796 }
1797 goto optattr;
1798 case ATTR_COMMUNITIES:
1799 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1800 ATTR_PARTIAL))
1801 goto bad_flags;
1802 if (community_add(&state->communities, flags, p,
1803 attr_len) == -1) {
1804 /*
1805 * mark update as bad and withdraw all routes as per
1806 * RFC 7606
1807 */
1808 a->flags |= F_ATTR_PARSE_ERR;
1809 log_peer_warnx(&peer->conf, "bad COMMUNITIES, "
1810 "path invalidated and prefix withdrawn");
1811 break;
1812 }
1813 plen += attr_len;
1814 break;
1815 case ATTR_LARGE_COMMUNITIES:
1816 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1817 ATTR_PARTIAL))
1818 goto bad_flags;
1819 if (community_large_add(&state->communities, flags, p,
1820 attr_len) == -1) {
1821 /*
1822 * mark update as bad and withdraw all routes as per
1823 * RFC 7606
1824 */
1825 a->flags |= F_ATTR_PARSE_ERR;
1826 log_peer_warnx(&peer->conf, "bad LARGE COMMUNITIES, "
1827 "path invalidated and prefix withdrawn");
1828 break;
1829 }
1830 plen += attr_len;
1831 break;
1832 case ATTR_EXT_COMMUNITIES:
1833 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1834 ATTR_PARTIAL))
1835 goto bad_flags;
1836 if (community_ext_add(&state->communities, flags, p,
1837 attr_len) == -1) {
1838 /*
1839 * mark update as bad and withdraw all routes as per
1840 * RFC 7606
1841 */
1842 a->flags |= F_ATTR_PARSE_ERR;
1843 log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, "
1844 "path invalidated and prefix withdrawn");
1845 break;
1846 }
1847 plen += attr_len;
1848 break;
1849 case ATTR_ORIGINATOR_ID:
1850 if (attr_len != 4)
1851 goto bad_len;
1852 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1853 goto bad_flags;
1854 goto optattr;
1855 case ATTR_CLUSTER_LIST:
1856 if (attr_len % 4 != 0)
1857 goto bad_len;
1858 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1859 goto bad_flags;
1860 goto optattr;
1861 case ATTR_MP_REACH_NLRI:
1862 if (attr_len < 4)
1863 goto bad_len;
1864 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1865 goto bad_flags;
1866 /* the validity is checked in rde_update_dispatch() */
1867 if (a->flags & F_ATTR_MP_REACH)
1868 goto bad_list;
1869 a->flags |= F_ATTR_MP_REACH;
1870
1871 mpa->reach = p;
1872 mpa->reach_len = attr_len;
1873 plen += attr_len;
1874 break;
1875 case ATTR_MP_UNREACH_NLRI:
1876 if (attr_len < 3)
1877 goto bad_len;
1878 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1879 goto bad_flags;
1880 /* the validity is checked in rde_update_dispatch() */
1881 if (a->flags & F_ATTR_MP_UNREACH)
1882 goto bad_list;
1883 a->flags |= F_ATTR_MP_UNREACH;
1884
1885 mpa->unreach = p;
1886 mpa->unreach_len = attr_len;
1887 plen += attr_len;
1888 break;
1889 case ATTR_AS4_AGGREGATOR:
1890 if (attr_len != 8) {
1891 /* see ATTR_AGGREGATOR ... */
1892 if ((flags & ATTR_PARTIAL) == 0)
1893 goto bad_len;
1894 log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, "
1895 "partial attribute ignored");
1896 plen += attr_len;
1897 break;
1898 }
1899 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1900 ATTR_PARTIAL))
1901 goto bad_flags;
1902 if (memcmp(p, &zero, sizeof(u_int32_t)) == 0) {
1903 /* As per RFC6793 use "attribute discard" here. */
1904 log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, "
1905 "AS 0 not allowed, attribute discarded");
1906 plen += attr_len;
1907 break;
1908 }
1909 a->flags |= F_ATTR_AS4BYTE_NEW;
1910 goto optattr;
1911 case ATTR_AS4_PATH:
1912 if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1913 ATTR_PARTIAL))
1914 goto bad_flags;
1915 if ((error = aspath_verify(p, attr_len, 1,
1916 rde_no_as_set(peer))) != 0) {
1917 /*
1918 * XXX RFC does not specify how to handle errors.
1919 * XXX Instead of dropping the session because of a
1920 * XXX bad path just mark the full update as having
1921 * XXX a parse error which makes the update no longer
1922 * XXX eligible and will not be considered for routing
1923 * XXX or redistribution.
1924 * XXX We follow draft-ietf-idr-optional-transitive
1925 * XXX by looking at the partial bit.
1926 * XXX Consider soft errors similar to a partial attr.
1927 */
1928 if (flags & ATTR_PARTIAL || error == AS_ERR_SOFT) {
1929 a->flags |= F_ATTR_PARSE_ERR;
1930 log_peer_warnx(&peer->conf, "bad AS4_PATH, "
1931 "path invalidated and prefix withdrawn");
1932 goto optattr;
1933 } else {
1934 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1935 NULL, 0);
1936 return (-1);
1937 }
1938 }
1939 a->flags |= F_ATTR_AS4BYTE_NEW;
1940 goto optattr;
1941 default:
1942 if ((flags & ATTR_OPTIONAL) == 0) {
1943 rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR,
1944 op, len);
1945 return (-1);
1946 }
1947 optattr:
1948 if (attr_optadd(a, flags, type, p, attr_len) == -1) {
1949 bad_list:
1950 rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST,
1951 NULL, 0);
1952 return (-1);
1953 }
1954
1955 plen += attr_len;
1956 break;
1957 }
1958
1959 return (plen);
1960 }
1961
1962 int
rde_attr_add(struct filterstate * state,u_char * p,u_int16_t len)1963 rde_attr_add(struct filterstate *state, u_char *p, u_int16_t len)
1964 {
1965 u_int16_t attr_len;
1966 u_int16_t plen = 0;
1967 u_int8_t flags;
1968 u_int8_t type;
1969 u_int8_t tmp8;
1970
1971 if (len < 3)
1972 return (-1);
1973
1974 UPD_READ(&flags, p, plen, 1);
1975 UPD_READ(&type, p, plen, 1);
1976
1977 if (flags & ATTR_EXTLEN) {
1978 if (len - plen < 2)
1979 return (-1);
1980 UPD_READ(&attr_len, p, plen, 2);
1981 attr_len = ntohs(attr_len);
1982 } else {
1983 UPD_READ(&tmp8, p, plen, 1);
1984 attr_len = tmp8;
1985 }
1986
1987 if (len - plen < attr_len)
1988 return (-1);
1989
1990 switch (type) {
1991 case ATTR_COMMUNITIES:
1992 return community_add(&state->communities, flags, p, attr_len);
1993 case ATTR_LARGE_COMMUNITIES:
1994 return community_large_add(&state->communities, flags, p,
1995 attr_len);
1996 case ATTR_EXT_COMMUNITIES:
1997 return community_ext_add(&state->communities, flags, p,
1998 attr_len);
1999 }
2000
2001 if (attr_optadd(&state->aspath, flags, type, p, attr_len) == -1)
2002 return (-1);
2003 return (0);
2004 }
2005
2006 #undef UPD_READ
2007 #undef CHECK_FLAGS
2008
2009 u_int8_t
rde_attr_missing(struct rde_aspath * a,int ebgp,u_int16_t nlrilen)2010 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen)
2011 {
2012 /* ATTR_MP_UNREACH_NLRI may be sent alone */
2013 if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH &&
2014 (a->flags & F_ATTR_MP_REACH) == 0)
2015 return (0);
2016
2017 if ((a->flags & F_ATTR_ORIGIN) == 0)
2018 return (ATTR_ORIGIN);
2019 if ((a->flags & F_ATTR_ASPATH) == 0)
2020 return (ATTR_ASPATH);
2021 if ((a->flags & F_ATTR_MP_REACH) == 0 &&
2022 (a->flags & F_ATTR_NEXTHOP) == 0)
2023 return (ATTR_NEXTHOP);
2024 if (!ebgp)
2025 if ((a->flags & F_ATTR_LOCALPREF) == 0)
2026 return (ATTR_LOCALPREF);
2027 return (0);
2028 }
2029
2030 int
rde_get_mp_nexthop(u_char * data,u_int16_t len,u_int8_t aid,struct filterstate * state)2031 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid,
2032 struct filterstate *state)
2033 {
2034 struct bgpd_addr nexthop;
2035 u_int8_t totlen, nhlen;
2036
2037 if (len == 0)
2038 return (-1);
2039
2040 nhlen = *data++;
2041 totlen = 1;
2042 len--;
2043
2044 if (nhlen > len)
2045 return (-1);
2046
2047 bzero(&nexthop, sizeof(nexthop));
2048 nexthop.aid = aid;
2049 switch (aid) {
2050 case AID_INET6:
2051 /*
2052 * RFC2545 describes that there may be a link-local
2053 * address carried in nexthop. Yikes!
2054 * This is not only silly, it is wrong and we just ignore
2055 * this link-local nexthop. The bgpd session doesn't run
2056 * over the link-local address so why should all other
2057 * traffic.
2058 */
2059 if (nhlen != 16 && nhlen != 32) {
2060 log_warnx("bad multiprotocol nexthop, bad size");
2061 return (-1);
2062 }
2063 memcpy(&nexthop.v6.s6_addr, data, 16);
2064 break;
2065 case AID_VPN_IPv6:
2066 if (nhlen != 24) {
2067 log_warnx("bad multiprotocol nexthop, bad size %d",
2068 nhlen);
2069 return (-1);
2070 }
2071 memcpy(&nexthop.v6, data + sizeof(u_int64_t),
2072 sizeof(nexthop.v6));
2073 nexthop.aid = AID_INET6;
2074 break;
2075 case AID_VPN_IPv4:
2076 /*
2077 * Neither RFC4364 nor RFC3107 specify the format of the
2078 * nexthop in an explicit way. The quality of RFC went down
2079 * the toilet the larger the number got.
2080 * RFC4364 is very confusing about VPN-IPv4 address and the
2081 * VPN-IPv4 prefix that carries also a MPLS label.
2082 * So the nexthop is a 12-byte address with a 64bit RD and
2083 * an IPv4 address following. In the nexthop case the RD can
2084 * be ignored.
2085 * Since the nexthop has to be in the main IPv4 table just
2086 * create an AID_INET nexthop. So we don't need to handle
2087 * AID_VPN_IPv4 in nexthop and kroute.
2088 */
2089 if (nhlen != 12) {
2090 log_warnx("bad multiprotocol nexthop, bad size");
2091 return (-1);
2092 }
2093 nexthop.aid = AID_INET;
2094 memcpy(&nexthop.v4, data + sizeof(u_int64_t),
2095 sizeof(nexthop.v4));
2096 break;
2097 default:
2098 log_warnx("bad multiprotocol nexthop, bad AID");
2099 return (-1);
2100 }
2101
2102 nexthop_unref(state->nexthop); /* just to be sure */
2103 state->nexthop = nexthop_get(&nexthop);
2104
2105 /* ignore reserved (old SNPA) field as per RFC4760 */
2106 totlen += nhlen + 1;
2107 data += nhlen + 1;
2108
2109 return (totlen);
2110 }
2111
2112 void
rde_update_err(struct rde_peer * peer,u_int8_t error,u_int8_t suberr,void * data,u_int16_t size)2113 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr,
2114 void *data, u_int16_t size)
2115 {
2116 struct ibuf *wbuf;
2117
2118 if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0,
2119 size + sizeof(error) + sizeof(suberr))) == NULL)
2120 fatal("%s %d imsg_create error", __func__, __LINE__);
2121 if (imsg_add(wbuf, &error, sizeof(error)) == -1 ||
2122 imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 ||
2123 imsg_add(wbuf, data, size) == -1)
2124 fatal("%s %d imsg_add error", __func__, __LINE__);
2125 imsg_close(ibuf_se, wbuf);
2126 peer->state = PEER_ERR;
2127 }
2128
2129 void
rde_update_log(const char * message,u_int16_t rid,const struct rde_peer * peer,const struct bgpd_addr * next,const struct bgpd_addr * prefix,u_int8_t prefixlen)2130 rde_update_log(const char *message, u_int16_t rid,
2131 const struct rde_peer *peer, const struct bgpd_addr *next,
2132 const struct bgpd_addr *prefix, u_int8_t prefixlen)
2133 {
2134 char *l = NULL;
2135 char *n = NULL;
2136 char *p = NULL;
2137
2138 if (!((conf->log & BGPD_LOG_UPDATES) ||
2139 (peer->conf.flags & PEERFLAG_LOG_UPDATES)))
2140 return;
2141
2142 if (next != NULL)
2143 if (asprintf(&n, " via %s", log_addr(next)) == -1)
2144 n = NULL;
2145 if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1)
2146 p = NULL;
2147 l = log_fmt_peer(&peer->conf);
2148 log_info("Rib %s: %s AS%s: %s %s%s", rib_byid(rid)->name,
2149 l, log_as(peer->conf.remote_as), message,
2150 p ? p : "out of memory", n ? n : "");
2151
2152 free(l);
2153 free(n);
2154 free(p);
2155 }
2156
2157 /*
2158 * 4-Byte ASN helper function.
2159 * Two scenarios need to be considered:
2160 * - NEW session with NEW attributes present -> just remove the attributes
2161 * - OLD session with NEW attributes present -> try to merge them
2162 */
2163 void
rde_as4byte_fixup(struct rde_peer * peer,struct rde_aspath * a)2164 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a)
2165 {
2166 struct attr *nasp, *naggr, *oaggr;
2167 u_int32_t as;
2168
2169 /*
2170 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present
2171 * try to fixup the attributes.
2172 * Do not fixup if F_ATTR_PARSE_ERR is set.
2173 */
2174 if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR)
2175 return;
2176
2177 /* first get the attributes */
2178 nasp = attr_optget(a, ATTR_AS4_PATH);
2179 naggr = attr_optget(a, ATTR_AS4_AGGREGATOR);
2180
2181 if (rde_as4byte(peer)) {
2182 /* NEW session using 4-byte ASNs */
2183 if (nasp) {
2184 log_peer_warnx(&peer->conf, "uses 4-byte ASN "
2185 "but sent AS4_PATH attribute.");
2186 attr_free(a, nasp);
2187 }
2188 if (naggr) {
2189 log_peer_warnx(&peer->conf, "uses 4-byte ASN "
2190 "but sent AS4_AGGREGATOR attribute.");
2191 attr_free(a, naggr);
2192 }
2193 return;
2194 }
2195 /* OLD session using 2-byte ASNs */
2196 /* try to merge the new attributes into the old ones */
2197 if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) {
2198 memcpy(&as, oaggr->data, sizeof(as));
2199 if (ntohl(as) != AS_TRANS) {
2200 /* per RFC ignore AS4_PATH and AS4_AGGREGATOR */
2201 if (nasp)
2202 attr_free(a, nasp);
2203 if (naggr)
2204 attr_free(a, naggr);
2205 return;
2206 }
2207 if (naggr) {
2208 /* switch over to new AGGREGATOR */
2209 attr_free(a, oaggr);
2210 if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE,
2211 ATTR_AGGREGATOR, naggr->data, naggr->len))
2212 fatalx("attr_optadd failed but impossible");
2213 }
2214 }
2215 /* there is no need for AS4_AGGREGATOR any more */
2216 if (naggr)
2217 attr_free(a, naggr);
2218
2219 /* merge AS4_PATH with ASPATH */
2220 if (nasp)
2221 aspath_merge(a, nasp);
2222 }
2223
2224
2225 /*
2226 * route reflector helper function
2227 */
2228 void
rde_reflector(struct rde_peer * peer,struct rde_aspath * asp)2229 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp)
2230 {
2231 struct attr *a;
2232 u_int8_t *p;
2233 u_int16_t len;
2234 u_int32_t id;
2235
2236 /* do not consider updates with parse errors */
2237 if (asp->flags & F_ATTR_PARSE_ERR)
2238 return;
2239
2240 /* check for originator id if eq router_id drop */
2241 if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) {
2242 if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) {
2243 /* this is coming from myself */
2244 asp->flags |= F_ATTR_LOOP;
2245 return;
2246 }
2247 } else if (conf->flags & BGPD_FLAG_REFLECTOR) {
2248 if (peer->conf.ebgp)
2249 id = conf->bgpid;
2250 else
2251 id = htonl(peer->remote_bgpid);
2252 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID,
2253 &id, sizeof(u_int32_t)) == -1)
2254 fatalx("attr_optadd failed but impossible");
2255 }
2256
2257 /* check for own id in the cluster list */
2258 if (conf->flags & BGPD_FLAG_REFLECTOR) {
2259 if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) {
2260 for (len = 0; len < a->len;
2261 len += sizeof(conf->clusterid))
2262 /* check if coming from my cluster */
2263 if (memcmp(&conf->clusterid, a->data + len,
2264 sizeof(conf->clusterid)) == 0) {
2265 asp->flags |= F_ATTR_LOOP;
2266 return;
2267 }
2268
2269 /* prepend own clusterid by replacing attribute */
2270 len = a->len + sizeof(conf->clusterid);
2271 if (len < a->len)
2272 fatalx("rde_reflector: cluster-list overflow");
2273 if ((p = malloc(len)) == NULL)
2274 fatal("rde_reflector");
2275 memcpy(p, &conf->clusterid, sizeof(conf->clusterid));
2276 memcpy(p + sizeof(conf->clusterid), a->data, a->len);
2277 attr_free(asp, a);
2278 if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2279 p, len) == -1)
2280 fatalx("attr_optadd failed but impossible");
2281 free(p);
2282 } else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2283 &conf->clusterid, sizeof(conf->clusterid)) == -1)
2284 fatalx("attr_optadd failed but impossible");
2285 }
2286 }
2287
2288 /*
2289 * control specific functions
2290 */
2291 static void
rde_dump_rib_as(struct prefix * p,struct rde_aspath * asp,pid_t pid,int flags)2292 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
2293 {
2294 struct ctl_show_rib rib;
2295 struct ibuf *wbuf;
2296 struct attr *a;
2297 struct nexthop *nexthop;
2298 void *bp;
2299 time_t staletime;
2300 size_t aslen;
2301 u_int8_t l;
2302
2303 nexthop = prefix_nexthop(p);
2304 bzero(&rib, sizeof(rib));
2305 rib.age = getmonotime() - p->lastchange;
2306 rib.local_pref = asp->lpref;
2307 rib.med = asp->med;
2308 rib.weight = asp->weight;
2309 strlcpy(rib.descr, prefix_peer(p)->conf.descr, sizeof(rib.descr));
2310 memcpy(&rib.remote_addr, &prefix_peer(p)->remote_addr,
2311 sizeof(rib.remote_addr));
2312 rib.remote_id = prefix_peer(p)->remote_bgpid;
2313 if (nexthop != NULL) {
2314 memcpy(&rib.true_nexthop, &nexthop->true_nexthop,
2315 sizeof(rib.true_nexthop));
2316 memcpy(&rib.exit_nexthop, &nexthop->exit_nexthop,
2317 sizeof(rib.exit_nexthop));
2318 } else {
2319 /* announced network may have a NULL nexthop */
2320 bzero(&rib.true_nexthop, sizeof(rib.true_nexthop));
2321 bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop));
2322 rib.true_nexthop.aid = p->pt->aid;
2323 rib.exit_nexthop.aid = p->pt->aid;
2324 }
2325 pt_getaddr(p->pt, &rib.prefix);
2326 rib.prefixlen = p->pt->prefixlen;
2327 rib.origin = asp->origin;
2328 rib.validation_state = p->validation_state;
2329 rib.flags = 0;
2330 if (p->re != NULL && p->re->active == p)
2331 rib.flags |= F_PREF_ACTIVE;
2332 if (!prefix_peer(p)->conf.ebgp)
2333 rib.flags |= F_PREF_INTERNAL;
2334 if (asp->flags & F_PREFIX_ANNOUNCED)
2335 rib.flags |= F_PREF_ANNOUNCE;
2336 if (nexthop == NULL || nexthop->state == NEXTHOP_REACH)
2337 rib.flags |= F_PREF_ELIGIBLE;
2338 if (asp->flags & F_ATTR_LOOP)
2339 rib.flags &= ~F_PREF_ELIGIBLE;
2340 if (asp->flags & F_ATTR_PARSE_ERR)
2341 rib.flags |= F_PREF_INVALID;
2342 staletime = prefix_peer(p)->staletime[p->pt->aid];
2343 if (staletime && p->lastchange <= staletime)
2344 rib.flags |= F_PREF_STALE;
2345 aslen = aspath_length(asp->aspath);
2346
2347 if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid,
2348 sizeof(rib) + aslen)) == NULL)
2349 return;
2350 if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 ||
2351 imsg_add(wbuf, aspath_dump(asp->aspath), aslen) == -1)
2352 return;
2353 imsg_close(ibuf_se_ctl, wbuf);
2354
2355 if (flags & F_CTL_DETAIL) {
2356 struct rde_community *comm = prefix_communities(p);
2357 size_t len = comm->nentries * sizeof(struct community);
2358 if (comm->nentries > 0) {
2359 if ((wbuf = imsg_create(ibuf_se_ctl,
2360 IMSG_CTL_SHOW_RIB_COMMUNITIES, 0, pid,
2361 len)) == NULL)
2362 return;
2363 if ((bp = ibuf_reserve(wbuf, len)) == NULL) {
2364 ibuf_free(wbuf);
2365 return;
2366 }
2367 memcpy(bp, comm->communities, len);
2368 imsg_close(ibuf_se_ctl, wbuf);
2369 }
2370 for (l = 0; l < asp->others_len; l++) {
2371 if ((a = asp->others[l]) == NULL)
2372 break;
2373 if ((wbuf = imsg_create(ibuf_se_ctl,
2374 IMSG_CTL_SHOW_RIB_ATTR, 0, pid,
2375 attr_optlen(a))) == NULL)
2376 return;
2377 if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) {
2378 ibuf_free(wbuf);
2379 return;
2380 }
2381 if (attr_write(bp, attr_optlen(a), a->flags,
2382 a->type, a->data, a->len) == -1) {
2383 ibuf_free(wbuf);
2384 return;
2385 }
2386 imsg_close(ibuf_se_ctl, wbuf);
2387 }
2388 }
2389 }
2390
2391 int
rde_match_peer(struct rde_peer * p,struct ctl_neighbor * n)2392 rde_match_peer(struct rde_peer *p, struct ctl_neighbor *n)
2393 {
2394 char *s;
2395
2396 if (n && n->addr.aid) {
2397 if (memcmp(&p->conf.remote_addr, &n->addr,
2398 sizeof(p->conf.remote_addr)))
2399 return 0;
2400 } else if (n && n->descr[0]) {
2401 s = n->is_group ? p->conf.group : p->conf.descr;
2402 if (strcmp(s, n->descr))
2403 return 0;
2404 }
2405 return 1;
2406 }
2407
2408 static void
rde_dump_filter(struct prefix * p,struct ctl_show_rib_request * req)2409 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
2410 {
2411 struct rde_aspath *asp;
2412
2413 if (!rde_match_peer(prefix_peer(p), &req->neighbor))
2414 return;
2415
2416 asp = prefix_aspath(p);
2417 if (asp == NULL) /* skip pending withdraw in Adj-RIB-Out */
2418 return;
2419 if ((req->flags & F_CTL_ACTIVE) && p->re->active != p)
2420 return;
2421 if ((req->flags & F_CTL_INVALID) &&
2422 (asp->flags & F_ATTR_PARSE_ERR) == 0)
2423 return;
2424 if (req->as.type != AS_UNDEF &&
2425 !aspath_match(asp->aspath, &req->as, 0))
2426 return;
2427 if (req->community.flags != 0) {
2428 if (!community_match(prefix_communities(p), &req->community,
2429 NULL))
2430 return;
2431 }
2432 if (!ovs_match(p, req->flags))
2433 return;
2434 rde_dump_rib_as(p, asp, req->pid, req->flags);
2435 }
2436
2437 static void
rde_dump_upcall(struct rib_entry * re,void * ptr)2438 rde_dump_upcall(struct rib_entry *re, void *ptr)
2439 {
2440 struct rde_dump_ctx *ctx = ptr;
2441 struct prefix *p;
2442
2443 LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
2444 rde_dump_filter(p, &ctx->req);
2445 }
2446
2447 static void
rde_dump_prefix_upcall(struct rib_entry * re,void * ptr)2448 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr)
2449 {
2450 struct rde_dump_ctx *ctx = ptr;
2451 struct prefix *p;
2452 struct pt_entry *pt;
2453 struct bgpd_addr addr;
2454
2455 pt = re->prefix;
2456 pt_getaddr(pt, &addr);
2457 if (addr.aid != ctx->req.prefix.aid)
2458 return;
2459 if (ctx->req.flags & F_LONGER) {
2460 if (ctx->req.prefixlen > pt->prefixlen)
2461 return;
2462 if (!prefix_compare(&ctx->req.prefix, &addr,
2463 ctx->req.prefixlen))
2464 LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
2465 rde_dump_filter(p, &ctx->req);
2466 } else {
2467 if (ctx->req.prefixlen < pt->prefixlen)
2468 return;
2469 if (!prefix_compare(&addr, &ctx->req.prefix,
2470 pt->prefixlen))
2471 LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
2472 rde_dump_filter(p, &ctx->req);
2473 }
2474 }
2475
2476 static void
rde_dump_adjout_upcall(struct prefix * p,void * ptr)2477 rde_dump_adjout_upcall(struct prefix *p, void *ptr)
2478 {
2479 struct rde_dump_ctx *ctx = ptr;
2480
2481 if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
2482 return;
2483 rde_dump_filter(p, &ctx->req);
2484 }
2485
2486 static void
rde_dump_adjout_prefix_upcall(struct prefix * p,void * ptr)2487 rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr)
2488 {
2489 struct rde_dump_ctx *ctx = ptr;
2490 struct bgpd_addr addr;
2491
2492 if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
2493 return;
2494
2495 pt_getaddr(p->pt, &addr);
2496 if (addr.aid != ctx->req.prefix.aid)
2497 return;
2498 if (ctx->req.flags & F_LONGER) {
2499 if (ctx->req.prefixlen > p->pt->prefixlen)
2500 return;
2501 if (!prefix_compare(&ctx->req.prefix, &addr,
2502 ctx->req.prefixlen))
2503 rde_dump_filter(p, &ctx->req);
2504 } else {
2505 if (ctx->req.prefixlen < p->pt->prefixlen)
2506 return;
2507 if (!prefix_compare(&addr, &ctx->req.prefix,
2508 p->pt->prefixlen))
2509 rde_dump_filter(p, &ctx->req);
2510 }
2511 }
2512
2513 static int
rde_dump_throttled(void * arg)2514 rde_dump_throttled(void *arg)
2515 {
2516 struct rde_dump_ctx *ctx = arg;
2517
2518 return (ctx->throttled != 0);
2519 }
2520
2521 static void
rde_dump_done(void * arg,u_int8_t aid)2522 rde_dump_done(void *arg, u_int8_t aid)
2523 {
2524 struct rde_dump_ctx *ctx = arg;
2525 struct rde_peer *peer;
2526 u_int error;
2527
2528 if (ctx->req.flags & F_CTL_ADJ_OUT) {
2529 peer = peer_match(&ctx->req.neighbor, ctx->peerid);
2530 if (peer == NULL)
2531 goto done;
2532 ctx->peerid = peer->conf.id;
2533 switch (ctx->req.type) {
2534 case IMSG_CTL_SHOW_RIB:
2535 if (prefix_dump_new(peer, ctx->req.aid,
2536 CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
2537 rde_dump_done, rde_dump_throttled) == -1)
2538 goto nomem;
2539 break;
2540 case IMSG_CTL_SHOW_RIB_PREFIX:
2541 if (prefix_dump_new(peer, ctx->req.aid,
2542 CTL_MSG_HIGH_MARK, ctx,
2543 rde_dump_adjout_prefix_upcall,
2544 rde_dump_done, rde_dump_throttled) == -1)
2545 goto nomem;
2546 break;
2547 default:
2548 fatalx("%s: unsupported imsg type", __func__);
2549 }
2550 return;
2551 }
2552 done:
2553 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid, -1, NULL, 0);
2554 LIST_REMOVE(ctx, entry);
2555 free(ctx);
2556 return;
2557
2558 nomem:
2559 log_warn(__func__);
2560 error = CTL_RES_NOMEM;
2561 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, ctx->req.pid, -1, &error,
2562 sizeof(error));
2563 return;
2564 }
2565
2566 void
rde_dump_ctx_new(struct ctl_show_rib_request * req,pid_t pid,enum imsg_type type)2567 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid,
2568 enum imsg_type type)
2569 {
2570 struct rde_dump_ctx *ctx;
2571 struct rib_entry *re;
2572 struct prefix *p;
2573 u_int error;
2574 u_int8_t hostplen;
2575 u_int16_t rid;
2576
2577 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2578 nomem:
2579 log_warn(__func__);
2580 error = CTL_RES_NOMEM;
2581 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2582 sizeof(error));
2583 return;
2584 }
2585
2586 memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
2587 ctx->req.pid = pid;
2588 ctx->req.type = type;
2589
2590 if (req->flags & (F_CTL_ADJ_IN | F_CTL_INVALID)) {
2591 rid = RIB_ADJ_IN;
2592 } else if (req->flags & F_CTL_ADJ_OUT) {
2593 struct rde_peer *peer;
2594
2595 peer = peer_match(&req->neighbor, 0);
2596 if (peer == NULL) {
2597 log_warnx("%s: no peer found for adj-rib-out",
2598 __func__);
2599 error = CTL_RES_NOSUCHPEER;
2600 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1,
2601 &error, sizeof(error));
2602 free(ctx);
2603 return;
2604 }
2605 ctx->peerid = peer->conf.id;
2606 switch (ctx->req.type) {
2607 case IMSG_CTL_SHOW_RIB:
2608 if (prefix_dump_new(peer, ctx->req.aid,
2609 CTL_MSG_HIGH_MARK, ctx, rde_dump_adjout_upcall,
2610 rde_dump_done, rde_dump_throttled) == -1)
2611 goto nomem;
2612 break;
2613 case IMSG_CTL_SHOW_RIB_PREFIX:
2614 if (req->flags & (F_LONGER|F_SHORTER)) {
2615 if (prefix_dump_new(peer, ctx->req.aid,
2616 CTL_MSG_HIGH_MARK, ctx,
2617 rde_dump_adjout_prefix_upcall,
2618 rde_dump_done, rde_dump_throttled) == -1)
2619 goto nomem;
2620 break;
2621 }
2622 switch (req->prefix.aid) {
2623 case AID_INET:
2624 case AID_VPN_IPv4:
2625 hostplen = 32;
2626 break;
2627 case AID_INET6:
2628 case AID_VPN_IPv6:
2629 hostplen = 128;
2630 break;
2631 default:
2632 fatalx("%s: unknown af", __func__);
2633 }
2634
2635 do {
2636 if (req->prefixlen == hostplen)
2637 p = prefix_match(peer, &req->prefix);
2638 else
2639 p = prefix_lookup(peer, &req->prefix,
2640 req->prefixlen);
2641 if (p)
2642 rde_dump_adjout_upcall(p, ctx);
2643 } while ((peer = peer_match(&req->neighbor,
2644 peer->conf.id)));
2645
2646 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
2647 -1, NULL, 0);
2648 free(ctx);
2649 return;
2650 default:
2651 fatalx("%s: unsupported imsg type", __func__);
2652 }
2653
2654 LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
2655 return;
2656 } else if ((rid = rib_find(req->rib)) == RIB_NOTFOUND) {
2657 log_warnx("%s: no such rib %s", __func__, req->rib);
2658 error = CTL_RES_NOSUCHRIB;
2659 imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2660 sizeof(error));
2661 free(ctx);
2662 return;
2663 }
2664
2665 switch (ctx->req.type) {
2666 case IMSG_CTL_SHOW_NETWORK:
2667 if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
2668 network_dump_upcall, rde_dump_done,
2669 rde_dump_throttled) == -1)
2670 goto nomem;
2671 break;
2672 case IMSG_CTL_SHOW_RIB:
2673 if (rib_dump_new(rid, ctx->req.aid, CTL_MSG_HIGH_MARK, ctx,
2674 rde_dump_upcall, rde_dump_done, rde_dump_throttled) == -1)
2675 goto nomem;
2676 break;
2677 case IMSG_CTL_SHOW_RIB_PREFIX:
2678 if (req->flags & (F_LONGER|F_SHORTER)) {
2679 if (rib_dump_new(rid, ctx->req.aid,
2680 CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall,
2681 rde_dump_done, rde_dump_throttled) == -1)
2682 goto nomem;
2683 break;
2684 }
2685 switch (req->prefix.aid) {
2686 case AID_INET:
2687 case AID_VPN_IPv4:
2688 hostplen = 32;
2689 break;
2690 case AID_INET6:
2691 case AID_VPN_IPv6:
2692 hostplen = 128;
2693 break;
2694 default:
2695 fatalx("%s: unknown af", __func__);
2696 }
2697 if (req->prefixlen == hostplen)
2698 re = rib_match(rib_byid(rid), &req->prefix);
2699 else
2700 re = rib_get(rib_byid(rid), &req->prefix,
2701 req->prefixlen);
2702 if (re)
2703 rde_dump_upcall(re, ctx);
2704 imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
2705 -1, NULL, 0);
2706 free(ctx);
2707 return;
2708 default:
2709 fatalx("%s: unsupported imsg type", __func__);
2710 }
2711 LIST_INSERT_HEAD(&rde_dump_h, ctx, entry);
2712 }
2713
2714 void
rde_dump_ctx_throttle(pid_t pid,int throttle)2715 rde_dump_ctx_throttle(pid_t pid, int throttle)
2716 {
2717 struct rde_dump_ctx *ctx;
2718
2719 LIST_FOREACH(ctx, &rde_dump_h, entry) {
2720 if (ctx->req.pid == pid) {
2721 ctx->throttled = throttle;
2722 return;
2723 }
2724 }
2725 }
2726
2727 void
rde_dump_ctx_terminate(pid_t pid)2728 rde_dump_ctx_terminate(pid_t pid)
2729 {
2730 struct rde_dump_ctx *ctx;
2731
2732 LIST_FOREACH(ctx, &rde_dump_h, entry) {
2733 if (ctx->req.pid == pid) {
2734 rib_dump_terminate(ctx);
2735 return;
2736 }
2737 }
2738 }
2739
2740 static int
rde_mrt_throttled(void * arg)2741 rde_mrt_throttled(void *arg)
2742 {
2743 struct mrt *mrt = arg;
2744
2745 return (mrt->wbuf.queued > SESS_MSG_LOW_MARK);
2746 }
2747
2748 static void
rde_mrt_done(void * ptr,u_int8_t aid)2749 rde_mrt_done(void *ptr, u_int8_t aid)
2750 {
2751 mrt_done(ptr);
2752 }
2753
2754 void
rde_dump_mrt_new(struct mrt * mrt,pid_t pid,int fd)2755 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd)
2756 {
2757 struct rde_mrt_ctx *ctx;
2758 u_int16_t rid;
2759
2760 if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2761 log_warn("rde_dump_mrt_new");
2762 return;
2763 }
2764 memcpy(&ctx->mrt, mrt, sizeof(struct mrt));
2765 TAILQ_INIT(&ctx->mrt.wbuf.bufs);
2766 ctx->mrt.wbuf.fd = fd;
2767 ctx->mrt.state = MRT_STATE_RUNNING;
2768 rid = rib_find(ctx->mrt.rib);
2769 if (rid == RIB_NOTFOUND) {
2770 log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib);
2771 free(ctx);
2772 return;
2773 }
2774
2775 if (ctx->mrt.type == MRT_TABLE_DUMP_V2)
2776 mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist);
2777
2778 if (rib_dump_new(rid, AID_UNSPEC, CTL_MSG_HIGH_MARK, &ctx->mrt,
2779 mrt_dump_upcall, rde_mrt_done, rde_mrt_throttled) == -1)
2780 fatal("%s: rib_dump_new", __func__);
2781
2782 LIST_INSERT_HEAD(&rde_mrts, ctx, entry);
2783 rde_mrt_cnt++;
2784 }
2785
2786 /*
2787 * kroute specific functions
2788 */
2789 int
rde_l3vpn_import(struct rde_community * comm,struct l3vpn * rd)2790 rde_l3vpn_import(struct rde_community *comm, struct l3vpn *rd)
2791 {
2792 struct filter_set *s;
2793
2794 TAILQ_FOREACH(s, &rd->import, entry) {
2795 if (community_match(comm, &s->action.community, 0))
2796 return (1);
2797 }
2798 return (0);
2799 }
2800
2801 void
rde_send_kroute_flush(struct rib * rib)2802 rde_send_kroute_flush(struct rib *rib)
2803 {
2804 if (imsg_compose(ibuf_main, IMSG_KROUTE_FLUSH, rib->rtableid, 0, -1,
2805 NULL, 0) == -1)
2806 fatal("%s %d imsg_compose error", __func__, __LINE__);
2807 }
2808
2809 void
rde_send_kroute(struct rib * rib,struct prefix * new,struct prefix * old)2810 rde_send_kroute(struct rib *rib, struct prefix *new, struct prefix *old)
2811 {
2812 struct kroute_full kr;
2813 struct bgpd_addr addr;
2814 struct prefix *p;
2815 struct rde_aspath *asp;
2816 struct l3vpn *vpn;
2817 enum imsg_type type;
2818
2819 /*
2820 * Make sure that self announce prefixes are not committed to the
2821 * FIB. If both prefixes are unreachable no update is needed.
2822 */
2823 if ((old == NULL || prefix_aspath(old)->flags & F_PREFIX_ANNOUNCED) &&
2824 (new == NULL || prefix_aspath(new)->flags & F_PREFIX_ANNOUNCED))
2825 return;
2826
2827 if (new == NULL || prefix_aspath(new)->flags & F_PREFIX_ANNOUNCED) {
2828 type = IMSG_KROUTE_DELETE;
2829 p = old;
2830 } else {
2831 type = IMSG_KROUTE_CHANGE;
2832 p = new;
2833 }
2834
2835 asp = prefix_aspath(p);
2836 pt_getaddr(p->pt, &addr);
2837 bzero(&kr, sizeof(kr));
2838 memcpy(&kr.prefix, &addr, sizeof(kr.prefix));
2839 kr.prefixlen = p->pt->prefixlen;
2840 if (prefix_nhflags(p) == NEXTHOP_REJECT)
2841 kr.flags |= F_REJECT;
2842 if (prefix_nhflags(p) == NEXTHOP_BLACKHOLE)
2843 kr.flags |= F_BLACKHOLE;
2844 if (type == IMSG_KROUTE_CHANGE)
2845 memcpy(&kr.nexthop, &prefix_nexthop(p)->true_nexthop,
2846 sizeof(kr.nexthop));
2847 strlcpy(kr.label, rtlabel_id2name(asp->rtlabelid), sizeof(kr.label));
2848
2849 switch (addr.aid) {
2850 case AID_VPN_IPv4:
2851 case AID_VPN_IPv6:
2852 if (!(rib->flags & F_RIB_LOCAL))
2853 /* not Loc-RIB, no update for VPNs */
2854 break;
2855
2856 SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) {
2857 if (!rde_l3vpn_import(prefix_communities(p), vpn))
2858 continue;
2859 /* must send exit_nexthop so that correct MPLS tunnel
2860 * is chosen
2861 */
2862 if (type == IMSG_KROUTE_CHANGE)
2863 memcpy(&kr.nexthop,
2864 &prefix_nexthop(p)->exit_nexthop,
2865 sizeof(kr.nexthop));
2866 /* XXX not ideal but this will change */
2867 kr.ifindex = if_nametoindex(vpn->ifmpe);
2868 if (imsg_compose(ibuf_main, type, vpn->rtableid, 0, -1,
2869 &kr, sizeof(kr)) == -1)
2870 fatal("%s %d imsg_compose error", __func__,
2871 __LINE__);
2872 }
2873 break;
2874 default:
2875 if (imsg_compose(ibuf_main, type, rib->rtableid, 0, -1,
2876 &kr, sizeof(kr)) == -1)
2877 fatal("%s %d imsg_compose error", __func__, __LINE__);
2878 break;
2879 }
2880 }
2881
2882 /*
2883 * update specific functions
2884 */
2885 static int rde_eval_all;
2886
2887 int
rde_evaluate_all(void)2888 rde_evaluate_all(void)
2889 {
2890 return rde_eval_all;
2891 }
2892
2893 void
rde_generate_updates(struct rib * rib,struct prefix * new,struct prefix * old,int eval_all)2894 rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old,
2895 int eval_all)
2896 {
2897 struct rde_peer *peer;
2898 u_int8_t aid;
2899
2900 /*
2901 * If old is != NULL we know it was active and should be removed.
2902 * If new is != NULL we know it is reachable and then we should
2903 * generate an update.
2904 */
2905 if (old == NULL && new == NULL)
2906 return;
2907
2908 if (!eval_all && (rib->flags & F_RIB_NOFIB) == 0)
2909 rde_send_kroute(rib, new, old);
2910
2911 if (new)
2912 aid = new->pt->aid;
2913 else
2914 aid = old->pt->aid;
2915
2916 rde_eval_all = 0;
2917 LIST_FOREACH(peer, &peerlist, peer_l) {
2918 /* skip ourself */
2919 if (peer == peerself)
2920 continue;
2921 if (peer->state != PEER_UP)
2922 continue;
2923 /* handle evaluate all, keep track if it is needed */
2924 if (peer->conf.flags & PEERFLAG_EVALUATE_ALL)
2925 rde_eval_all = 1;
2926 else if (eval_all)
2927 /* skip default peers if the best path didn't change */
2928 continue;
2929 /* skip peers using a different rib */
2930 if (peer->loc_rib_id != rib->id)
2931 continue;
2932 /* check if peer actually supports the address family */
2933 if (peer->capa.mp[aid] == 0)
2934 continue;
2935 /* skip peers with special export types */
2936 if (peer->conf.export_type == EXPORT_NONE ||
2937 peer->conf.export_type == EXPORT_DEFAULT_ROUTE)
2938 continue;
2939
2940 up_generate_updates(out_rules, peer, new, old);
2941 }
2942 }
2943
2944 static void
rde_up_flush_upcall(struct prefix * p,void * ptr)2945 rde_up_flush_upcall(struct prefix *p, void *ptr)
2946 {
2947 up_generate_updates(out_rules, prefix_peer(p), NULL, p);
2948 }
2949
2950 u_char queue_buf[4096];
2951
2952 int
rde_update_queue_pending(void)2953 rde_update_queue_pending(void)
2954 {
2955 struct rde_peer *peer;
2956 u_int8_t aid;
2957
2958 if (ibuf_se && ibuf_se->w.queued >= SESS_MSG_HIGH_MARK)
2959 return 0;
2960
2961 LIST_FOREACH(peer, &peerlist, peer_l) {
2962 if (peer->conf.id == 0)
2963 continue;
2964 if (peer->state != PEER_UP)
2965 continue;
2966 if (peer->throttled)
2967 continue;
2968 for (aid = 0; aid < AID_MAX; aid++) {
2969 if (!RB_EMPTY(&peer->updates[aid]) ||
2970 !RB_EMPTY(&peer->withdraws[aid]))
2971 return 1;
2972 }
2973 }
2974 return 0;
2975 }
2976
2977 void
rde_update_queue_runner(void)2978 rde_update_queue_runner(void)
2979 {
2980 struct rde_peer *peer;
2981 int r, sent, max = RDE_RUNNER_ROUNDS, eor;
2982 u_int16_t len, wpos;
2983
2984 len = sizeof(queue_buf) - MSGSIZE_HEADER;
2985 do {
2986 sent = 0;
2987 LIST_FOREACH(peer, &peerlist, peer_l) {
2988 if (peer->conf.id == 0)
2989 continue;
2990 if (peer->state != PEER_UP)
2991 continue;
2992 if (peer->throttled)
2993 continue;
2994 eor = 0;
2995 wpos = 0;
2996 /* first withdraws, save 2 bytes for path attributes */
2997 if ((r = up_dump_withdraws(queue_buf, len - 2, peer,
2998 AID_INET)) == -1)
2999 continue;
3000 wpos += r;
3001
3002 /* now bgp path attributes unless it is the EoR mark */
3003 if (up_is_eor(peer, AID_INET)) {
3004 eor = 1;
3005 bzero(queue_buf + wpos, 2);
3006 wpos += 2;
3007 } else {
3008 r = up_dump_attrnlri(queue_buf + wpos,
3009 len - wpos, peer);
3010 wpos += r;
3011 }
3012
3013 /* finally send message to SE */
3014 if (wpos > 4) {
3015 if (imsg_compose(ibuf_se, IMSG_UPDATE,
3016 peer->conf.id, 0, -1, queue_buf,
3017 wpos) == -1)
3018 fatal("%s %d imsg_compose error",
3019 __func__, __LINE__);
3020 sent++;
3021 }
3022 if (eor)
3023 rde_peer_send_eor(peer, AID_INET);
3024 }
3025 max -= sent;
3026 } while (sent != 0 && max > 0);
3027 }
3028
3029 void
rde_update6_queue_runner(u_int8_t aid)3030 rde_update6_queue_runner(u_int8_t aid)
3031 {
3032 struct rde_peer *peer;
3033 int r, sent, max = RDE_RUNNER_ROUNDS / 2;
3034 u_int16_t len;
3035
3036 /* first withdraws ... */
3037 do {
3038 sent = 0;
3039 LIST_FOREACH(peer, &peerlist, peer_l) {
3040 if (peer->conf.id == 0)
3041 continue;
3042 if (peer->state != PEER_UP)
3043 continue;
3044 if (peer->throttled)
3045 continue;
3046 len = sizeof(queue_buf) - MSGSIZE_HEADER;
3047 r = up_dump_mp_unreach(queue_buf, len, peer, aid);
3048 if (r == -1)
3049 continue;
3050 /* finally send message to SE */
3051 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3052 0, -1, queue_buf, r) == -1)
3053 fatal("%s %d imsg_compose error", __func__,
3054 __LINE__);
3055 sent++;
3056 }
3057 max -= sent;
3058 } while (sent != 0 && max > 0);
3059
3060 /* ... then updates */
3061 max = RDE_RUNNER_ROUNDS / 2;
3062 do {
3063 sent = 0;
3064 LIST_FOREACH(peer, &peerlist, peer_l) {
3065 if (peer->conf.id == 0)
3066 continue;
3067 if (peer->state != PEER_UP)
3068 continue;
3069 if (peer->throttled)
3070 continue;
3071 len = sizeof(queue_buf) - MSGSIZE_HEADER;
3072 if (up_is_eor(peer, aid)) {
3073 rde_peer_send_eor(peer, aid);
3074 continue;
3075 }
3076 r = up_dump_mp_reach(queue_buf, len, peer, aid);
3077 if (r == 0)
3078 continue;
3079
3080 /* finally send message to SE */
3081 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3082 0, -1, queue_buf, r) == -1)
3083 fatal("%s %d imsg_compose error", __func__,
3084 __LINE__);
3085 sent++;
3086 }
3087 max -= sent;
3088 } while (sent != 0 && max > 0);
3089 }
3090
3091 /*
3092 * pf table specific functions
3093 */
3094 struct rde_pftable_node {
3095 RB_ENTRY(rde_pftable_node) entry;
3096 struct pt_entry *prefix;
3097 int refcnt;
3098 u_int16_t id;
3099 };
3100 RB_HEAD(rde_pftable_tree, rde_pftable_node);
3101
3102 static inline int
rde_pftable_cmp(struct rde_pftable_node * a,struct rde_pftable_node * b)3103 rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b)
3104 {
3105 if (a->prefix > b->prefix)
3106 return 1;
3107 if (a->prefix < b->prefix)
3108 return -1;
3109 return (a->id - b->id);
3110 }
3111
3112 RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, rde_pftable_cmp);
3113
3114 struct rde_pftable_tree pftable_tree = RB_INITIALIZER(&pftable_tree);
3115 int need_commit;
3116
3117 static void
rde_pftable_send(u_int16_t id,struct pt_entry * pt,int del)3118 rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del)
3119 {
3120 struct pftable_msg pfm;
3121
3122 if (id == 0)
3123 return;
3124
3125 /* do not run while cleaning up */
3126 if (rde_quit)
3127 return;
3128
3129 bzero(&pfm, sizeof(pfm));
3130 strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable));
3131 pt_getaddr(pt, &pfm.addr);
3132 pfm.len = pt->prefixlen;
3133
3134 if (imsg_compose(ibuf_main,
3135 del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD,
3136 0, 0, -1, &pfm, sizeof(pfm)) == -1)
3137 fatal("%s %d imsg_compose error", __func__, __LINE__);
3138
3139 need_commit = 1;
3140 }
3141
3142 void
rde_pftable_add(u_int16_t id,struct prefix * p)3143 rde_pftable_add(u_int16_t id, struct prefix *p)
3144 {
3145 struct rde_pftable_node *pfn, node;
3146
3147 memset(&node, 0, sizeof(node));
3148 node.prefix = p->pt;
3149 node.id = id;
3150
3151 pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node);
3152 if (pfn == NULL) {
3153 if ((pfn = calloc(1, sizeof(*pfn))) == NULL)
3154 fatal("%s", __func__);
3155 pfn->prefix = pt_ref(p->pt);
3156 pfn->id = id;
3157
3158 if (RB_INSERT(rde_pftable_tree, &pftable_tree, pfn) != NULL)
3159 fatalx("%s: tree corrupt", __func__);
3160
3161 rde_pftable_send(id, p->pt, 0);
3162 }
3163 pfn->refcnt++;
3164 }
3165
3166 void
rde_pftable_del(u_int16_t id,struct prefix * p)3167 rde_pftable_del(u_int16_t id, struct prefix *p)
3168 {
3169 struct rde_pftable_node *pfn, node;
3170
3171 memset(&node, 0, sizeof(node));
3172 node.prefix = p->pt;
3173 node.id = id;
3174
3175 pfn = RB_FIND(rde_pftable_tree, &pftable_tree, &node);
3176 if (pfn == NULL)
3177 return;
3178
3179 if (--pfn->refcnt <= 0) {
3180 rde_pftable_send(id, p->pt, 1);
3181
3182 if (RB_REMOVE(rde_pftable_tree, &pftable_tree, pfn) == NULL)
3183 fatalx("%s: tree corrupt", __func__);
3184
3185 pt_unref(pfn->prefix);
3186 free(pfn);
3187 }
3188 }
3189
3190 void
rde_commit_pftable(void)3191 rde_commit_pftable(void)
3192 {
3193 /* do not run while cleaning up */
3194 if (rde_quit)
3195 return;
3196
3197 if (!need_commit)
3198 return;
3199
3200 if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) ==
3201 -1)
3202 fatal("%s %d imsg_compose error", __func__, __LINE__);
3203
3204 need_commit = 0;
3205 }
3206
3207 /*
3208 * nexthop specific functions
3209 */
3210 void
rde_send_nexthop(struct bgpd_addr * next,int insert)3211 rde_send_nexthop(struct bgpd_addr *next, int insert)
3212 {
3213 int type;
3214
3215 if (insert)
3216 type = IMSG_NEXTHOP_ADD;
3217 else
3218 type = IMSG_NEXTHOP_REMOVE;
3219
3220 if (imsg_compose(ibuf_main, type, 0, 0, -1, next,
3221 sizeof(struct bgpd_addr)) == -1)
3222 fatal("%s %d imsg_compose error", __func__, __LINE__);
3223 }
3224
3225 /*
3226 * soft reconfig specific functions
3227 */
3228 void
rde_reload_done(void)3229 rde_reload_done(void)
3230 {
3231 struct rde_peer *peer;
3232 struct filter_head *fh;
3233 struct rde_prefixset_head prefixsets_old;
3234 struct rde_prefixset_head originsets_old;
3235 struct as_set_head as_sets_old;
3236 u_int16_t rid;
3237 int reload = 0;
3238
3239 softreconfig = 0;
3240
3241 SIMPLEQ_INIT(&prefixsets_old);
3242 SIMPLEQ_INIT(&originsets_old);
3243 SIMPLEQ_INIT(&as_sets_old);
3244 SIMPLEQ_CONCAT(&prefixsets_old, &conf->rde_prefixsets);
3245 SIMPLEQ_CONCAT(&originsets_old, &conf->rde_originsets);
3246 SIMPLEQ_CONCAT(&as_sets_old, &conf->as_sets);
3247
3248 /* merge the main config */
3249 copy_config(conf, nconf);
3250
3251 /* need to copy the sets and roa table and clear them in nconf */
3252 SIMPLEQ_CONCAT(&conf->rde_prefixsets, &nconf->rde_prefixsets);
3253 SIMPLEQ_CONCAT(&conf->rde_originsets, &nconf->rde_originsets);
3254 SIMPLEQ_CONCAT(&conf->as_sets, &nconf->as_sets);
3255
3256 /* apply new set of l3vpn, sync will be done later */
3257 free_l3vpns(&conf->l3vpns);
3258 SIMPLEQ_CONCAT(&conf->l3vpns, &nconf->l3vpns);
3259 /* XXX WHERE IS THE SYNC ??? */
3260
3261 free_config(nconf);
3262 nconf = NULL;
3263
3264 /* sync peerself with conf */
3265 peerself->remote_bgpid = ntohl(conf->bgpid);
3266 peerself->conf.local_as = conf->as;
3267 peerself->conf.remote_as = conf->as;
3268 peerself->conf.remote_addr.aid = AID_INET;
3269 peerself->conf.remote_addr.v4.s_addr = conf->bgpid;
3270 peerself->conf.remote_masklen = 32;
3271 peerself->short_as = conf->short_as;
3272
3273 rde_mark_prefixsets_dirty(&prefixsets_old, &conf->rde_prefixsets);
3274 rde_mark_prefixsets_dirty(&originsets_old, &conf->rde_originsets);
3275 as_sets_mark_dirty(&as_sets_old, &conf->as_sets);
3276
3277 /*
3278 * make the new filter rules the active one but keep the old for
3279 * softrconfig. This is needed so that changes happening are using
3280 * the right filters.
3281 */
3282 fh = out_rules;
3283 out_rules = out_rules_tmp;
3284 out_rules_tmp = fh;
3285
3286 rde_filter_calc_skip_steps(out_rules);
3287
3288 /* check if filter changed */
3289 LIST_FOREACH(peer, &peerlist, peer_l) {
3290 if (peer->conf.id == 0)
3291 continue;
3292 peer->reconf_out = 0;
3293 peer->reconf_rib = 0;
3294 if (peer->loc_rib_id != rib_find(peer->conf.rib)) {
3295 log_peer_info(&peer->conf, "rib change, reloading");
3296 peer->loc_rib_id = rib_find(peer->conf.rib);
3297 if (peer->loc_rib_id == RIB_NOTFOUND)
3298 fatalx("King Bula's peer met an unknown RIB");
3299 peer->reconf_rib = 1;
3300 softreconfig++;
3301 if (prefix_dump_new(peer, AID_UNSPEC,
3302 RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
3303 rde_softreconfig_in_done, NULL) == -1)
3304 fatal("%s: prefix_dump_new", __func__);
3305 log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
3306 softreconfig++; /* account for the running flush */
3307 continue;
3308 }
3309 if (!rde_filter_equal(out_rules, out_rules_tmp, peer)) {
3310 char *p = log_fmt_peer(&peer->conf);
3311 log_debug("out filter change: reloading peer %s", p);
3312 free(p);
3313 peer->reconf_out = 1;
3314 }
3315 }
3316 /* bring ribs in sync */
3317 for (rid = 0; rid < rib_size; rid++) {
3318 struct rib *rib = rib_byid(rid);
3319 if (rib == NULL)
3320 continue;
3321 rde_filter_calc_skip_steps(rib->in_rules_tmp);
3322
3323 /* flip rules, make new active */
3324 fh = rib->in_rules;
3325 rib->in_rules = rib->in_rules_tmp;
3326 rib->in_rules_tmp = fh;
3327
3328 switch (rib->state) {
3329 case RECONF_DELETE:
3330 rib_free(rib);
3331 break;
3332 case RECONF_RELOAD:
3333 rib_update(rib);
3334 rib->state = RECONF_KEEP;
3335 /* FALLTHROUGH */
3336 case RECONF_KEEP:
3337 if (rde_filter_equal(rib->in_rules,
3338 rib->in_rules_tmp, NULL))
3339 /* rib is in sync */
3340 break;
3341 log_debug("in filter change: reloading RIB %s",
3342 rib->name);
3343 rib->state = RECONF_RELOAD;
3344 reload++;
3345 break;
3346 case RECONF_REINIT:
3347 /* new rib */
3348 rib->state = RECONF_RELOAD;
3349 reload++;
3350 break;
3351 case RECONF_NONE:
3352 break;
3353 }
3354 filterlist_free(rib->in_rules_tmp);
3355 rib->in_rules_tmp = NULL;
3356 }
3357
3358 filterlist_free(out_rules_tmp);
3359 out_rules_tmp = NULL;
3360 /* old filters removed, free all sets */
3361 free_rde_prefixsets(&prefixsets_old);
3362 free_rde_prefixsets(&originsets_old);
3363 as_sets_free(&as_sets_old);
3364
3365 log_info("RDE reconfigured");
3366
3367 if (reload > 0) {
3368 softreconfig++;
3369 if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3370 rib_byid(RIB_ADJ_IN), rde_softreconfig_in,
3371 rde_softreconfig_in_done, NULL) == -1)
3372 fatal("%s: rib_dump_new", __func__);
3373 log_info("running softreconfig in");
3374 } else {
3375 rde_softreconfig_in_done(NULL, AID_UNSPEC);
3376 }
3377 }
3378
3379 static void
rde_softreconfig_in_done(void * arg,u_int8_t dummy)3380 rde_softreconfig_in_done(void *arg, u_int8_t dummy)
3381 {
3382 struct rde_peer *peer;
3383 u_int16_t i;
3384
3385 if (arg != NULL) {
3386 softreconfig--;
3387 /* one guy done but other dumps are still running */
3388 if (softreconfig > 0)
3389 return;
3390
3391 log_info("softreconfig in done");
3392 }
3393
3394 /* now do the Adj-RIB-Out sync and a possible FIB sync */
3395 softreconfig = 0;
3396 for (i = 0; i < rib_size; i++) {
3397 struct rib *rib = rib_byid(i);
3398 if (rib == NULL)
3399 continue;
3400 rib->state = RECONF_NONE;
3401 if (rib->fibstate == RECONF_RELOAD) {
3402 if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3403 rib, rde_softreconfig_sync_fib,
3404 rde_softreconfig_sync_done, NULL) == -1)
3405 fatal("%s: rib_dump_new", __func__);
3406 softreconfig++;
3407 log_info("starting fib sync for rib %s",
3408 rib->name);
3409 } else if (rib->fibstate == RECONF_REINIT) {
3410 if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3411 rib, rde_softreconfig_sync_reeval,
3412 rde_softreconfig_sync_done, NULL) == -1)
3413 fatal("%s: rib_dump_new", __func__);
3414 softreconfig++;
3415 log_info("starting re-evaluation of rib %s",
3416 rib->name);
3417 }
3418 }
3419
3420 LIST_FOREACH(peer, &peerlist, peer_l) {
3421 u_int8_t aid;
3422
3423 if (peer->reconf_out) {
3424 if (peer->conf.export_type == EXPORT_NONE) {
3425 /* nothing to do here */
3426 peer->reconf_out = 0;
3427 } else if (peer->conf.export_type ==
3428 EXPORT_DEFAULT_ROUTE) {
3429 /* just resend the default route */
3430 for (aid = 0; aid < AID_MAX; aid++) {
3431 if (peer->capa.mp[aid])
3432 up_generate_default(out_rules,
3433 peer, aid);
3434 }
3435 peer->reconf_out = 0;
3436 } else
3437 rib_byid(peer->loc_rib_id)->state =
3438 RECONF_RELOAD;
3439 } else if (peer->reconf_rib) {
3440 /* dump the full table to neighbors that changed rib */
3441 for (aid = 0; aid < AID_MAX; aid++) {
3442 if (peer->capa.mp[aid])
3443 peer_dump(peer, aid);
3444 }
3445 }
3446 }
3447
3448 for (i = 0; i < rib_size; i++) {
3449 struct rib *rib = rib_byid(i);
3450 if (rib == NULL)
3451 continue;
3452 if (rib->state == RECONF_RELOAD) {
3453 if (rib_dump_new(i, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3454 rib, rde_softreconfig_out,
3455 rde_softreconfig_out_done, NULL) == -1)
3456 fatal("%s: rib_dump_new", __func__);
3457 softreconfig++;
3458 log_info("starting softreconfig out for rib %s",
3459 rib->name);
3460 }
3461 }
3462
3463 /* if nothing to do move to last stage */
3464 if (softreconfig == 0)
3465 rde_softreconfig_done();
3466 }
3467
3468 static void
rde_softreconfig_out_done(void * arg,u_int8_t aid)3469 rde_softreconfig_out_done(void *arg, u_int8_t aid)
3470 {
3471 struct rib *rib = arg;
3472
3473 /* this RIB dump is done */
3474 log_info("softreconfig out done for %s", rib->name);
3475
3476 /* check if other dumps are still running */
3477 if (--softreconfig == 0)
3478 rde_softreconfig_done();
3479 }
3480
3481 static void
rde_softreconfig_done(void)3482 rde_softreconfig_done(void)
3483 {
3484 u_int16_t i;
3485
3486 for (i = 0; i < rib_size; i++) {
3487 struct rib *rib = rib_byid(i);
3488 if (rib == NULL)
3489 continue;
3490 rib->state = RECONF_NONE;
3491 }
3492
3493 log_info("RDE soft reconfiguration done");
3494 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3495 -1, NULL, 0);
3496 }
3497
3498 static void
rde_softreconfig_in(struct rib_entry * re,void * bula)3499 rde_softreconfig_in(struct rib_entry *re, void *bula)
3500 {
3501 struct filterstate state;
3502 struct rib *rib;
3503 struct prefix *p;
3504 struct pt_entry *pt;
3505 struct rde_peer *peer;
3506 struct rde_aspath *asp;
3507 enum filter_actions action;
3508 struct bgpd_addr prefix;
3509 u_int16_t i;
3510
3511 pt = re->prefix;
3512 pt_getaddr(pt, &prefix);
3513 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
3514 asp = prefix_aspath(p);
3515 peer = prefix_peer(p);
3516
3517 /* skip announced networks, they are never filtered */
3518 if (asp->flags & F_PREFIX_ANNOUNCED)
3519 continue;
3520
3521 for (i = RIB_LOC_START; i < rib_size; i++) {
3522 rib = rib_byid(i);
3523 if (rib == NULL)
3524 continue;
3525
3526 if (rib->state != RECONF_RELOAD)
3527 continue;
3528
3529 rde_filterstate_prep(&state, asp, prefix_communities(p),
3530 prefix_nexthop(p), prefix_nhflags(p));
3531 action = rde_filter(rib->in_rules, peer, peer, &prefix,
3532 pt->prefixlen, p->validation_state, &state);
3533
3534 if (action == ACTION_ALLOW) {
3535 /* update Local-RIB */
3536 prefix_update(rib, peer, &state, &prefix,
3537 pt->prefixlen, p->validation_state);
3538 } else if (action == ACTION_DENY) {
3539 /* remove from Local-RIB */
3540 prefix_withdraw(rib, peer, &prefix,
3541 pt->prefixlen);
3542 }
3543
3544 rde_filterstate_clean(&state);
3545 }
3546 }
3547 }
3548
3549 static void
rde_softreconfig_out(struct rib_entry * re,void * bula)3550 rde_softreconfig_out(struct rib_entry *re, void *bula)
3551 {
3552 struct prefix *p = re->active;
3553 struct rde_peer *peer;
3554
3555 if (p == NULL)
3556 /* no valid path for prefix */
3557 return;
3558
3559 LIST_FOREACH(peer, &peerlist, peer_l) {
3560 if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
3561 /* Regenerate all updates. */
3562 up_generate_updates(out_rules, peer, p, p);
3563 }
3564 }
3565
3566 static void
rde_softreconfig_sync_reeval(struct rib_entry * re,void * arg)3567 rde_softreconfig_sync_reeval(struct rib_entry *re, void *arg)
3568 {
3569 struct prefix_list prefixes;
3570 struct prefix *p, *next;
3571 struct rib *rib = arg;
3572
3573 if (rib->flags & F_RIB_NOEVALUATE) {
3574 /*
3575 * evaluation process is turned off
3576 * so remove all prefixes from adj-rib-out
3577 * also unlink nexthop if it was linked
3578 */
3579 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
3580 if (p->flags & PREFIX_NEXTHOP_LINKED)
3581 nexthop_unlink(p);
3582 }
3583 if (re->active) {
3584 rde_generate_updates(rib, NULL, re->active, 0);
3585 re->active = NULL;
3586 }
3587 return;
3588 }
3589
3590 /* evaluation process is turned on, so evaluate all prefixes again */
3591 re->active = NULL;
3592 prefixes = re->prefix_h;
3593 LIST_INIT(&re->prefix_h);
3594
3595 LIST_FOREACH_SAFE(p, &prefixes, entry.list.rib, next) {
3596 /* need to re-link the nexthop if not already linked */
3597 if ((p->flags & PREFIX_NEXTHOP_LINKED) == 0)
3598 nexthop_link(p);
3599 prefix_evaluate(re, p, p);
3600 }
3601 }
3602
3603 static void
rde_softreconfig_sync_fib(struct rib_entry * re,void * bula)3604 rde_softreconfig_sync_fib(struct rib_entry *re, void *bula)
3605 {
3606 if (re->active)
3607 rde_send_kroute(re_rib(re), re->active, NULL);
3608 }
3609
3610 static void
rde_softreconfig_sync_done(void * arg,u_int8_t aid)3611 rde_softreconfig_sync_done(void *arg, u_int8_t aid)
3612 {
3613 struct rib *rib = arg;
3614
3615 /* this RIB dump is done */
3616 if (rib->fibstate == RECONF_RELOAD)
3617 log_info("fib sync done for %s", rib->name);
3618 else
3619 log_info("re-evaluation done for %s", rib->name);
3620 rib->fibstate = RECONF_NONE;
3621
3622 /* check if other dumps are still running */
3623 if (--softreconfig == 0)
3624 rde_softreconfig_done();
3625 }
3626
3627 /*
3628 * ROA specific functions. The roa set is updated independent of the config
3629 * so this runs outside of the softreconfig handlers.
3630 */
3631 static void
rde_roa_softreload(struct rib_entry * re,void * bula)3632 rde_roa_softreload(struct rib_entry *re, void *bula)
3633 {
3634 struct filterstate state;
3635 struct rib *rib;
3636 struct prefix *p;
3637 struct pt_entry *pt;
3638 struct rde_peer *peer;
3639 struct rde_aspath *asp;
3640 enum filter_actions action;
3641 struct bgpd_addr prefix;
3642 u_int8_t vstate;
3643 u_int16_t i;
3644
3645 pt = re->prefix;
3646 pt_getaddr(pt, &prefix);
3647 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
3648 asp = prefix_aspath(p);
3649 peer = prefix_peer(p);
3650
3651 /* ROA validation state update */
3652 vstate = rde_roa_validity(&rde_roa,
3653 &prefix, pt->prefixlen, aspath_origin(asp->aspath));
3654 if (vstate == p->validation_state)
3655 continue;
3656 p->validation_state = vstate;
3657
3658 /* skip announced networks, they are never filtered */
3659 if (asp->flags & F_PREFIX_ANNOUNCED)
3660 continue;
3661
3662 for (i = RIB_LOC_START; i < rib_size; i++) {
3663 rib = rib_byid(i);
3664 if (rib == NULL)
3665 continue;
3666
3667 rde_filterstate_prep(&state, asp, prefix_communities(p),
3668 prefix_nexthop(p), prefix_nhflags(p));
3669 action = rde_filter(rib->in_rules, peer, peer, &prefix,
3670 pt->prefixlen, p->validation_state, &state);
3671
3672 if (action == ACTION_ALLOW) {
3673 /* update Local-RIB */
3674 prefix_update(rib, peer, &state, &prefix,
3675 pt->prefixlen, p->validation_state);
3676 } else if (action == ACTION_DENY) {
3677 /* remove from Local-RIB */
3678 prefix_withdraw(rib, peer, &prefix,
3679 pt->prefixlen);
3680 }
3681
3682 rde_filterstate_clean(&state);
3683 }
3684 }
3685 }
3686
3687 static void
rde_roa_softreload_done(void * arg,u_int8_t aid)3688 rde_roa_softreload_done(void *arg, u_int8_t aid)
3689 {
3690 /* the roa update is done */
3691 log_info("ROA softreload done");
3692 }
3693
3694 static void
rde_roa_reload(void)3695 rde_roa_reload(void)
3696 {
3697 struct rde_prefixset roa_old;
3698
3699 roa_old = rde_roa;
3700 rde_roa = roa_new;
3701 memset(&roa_new, 0, sizeof(roa_new));
3702
3703 /* check if roa changed */
3704 if (trie_equal(&rde_roa.th, &roa_old.th)) {
3705 rde_roa.lastchange = roa_old.lastchange;
3706 trie_free(&roa_old.th); /* old roa no longer needed */
3707 return;
3708 }
3709
3710 rde_roa.lastchange = getmonotime();
3711 trie_free(&roa_old.th); /* old roa no longer needed */
3712
3713 log_debug("ROA change: reloading Adj-RIB-In");
3714 if (rib_dump_new(RIB_ADJ_IN, AID_UNSPEC, RDE_RUNNER_ROUNDS,
3715 rib_byid(RIB_ADJ_IN), rde_roa_softreload,
3716 rde_roa_softreload_done, NULL) == -1)
3717 fatal("%s: rib_dump_new", __func__);
3718 }
3719
3720 /*
3721 * generic helper function
3722 */
3723 u_int32_t
rde_local_as(void)3724 rde_local_as(void)
3725 {
3726 return (conf->as);
3727 }
3728
3729 int
rde_decisionflags(void)3730 rde_decisionflags(void)
3731 {
3732 return (conf->flags & BGPD_FLAG_DECISION_MASK);
3733 }
3734
3735 int
rde_as4byte(struct rde_peer * peer)3736 rde_as4byte(struct rde_peer *peer)
3737 {
3738 return (peer->capa.as4byte);
3739 }
3740
3741 static int
rde_no_as_set(struct rde_peer * peer)3742 rde_no_as_set(struct rde_peer *peer)
3743 {
3744 return (peer->conf.flags & PEERFLAG_NO_AS_SET);
3745 }
3746
3747 /* End-of-RIB marker, RFC 4724 */
3748 static void
rde_peer_recv_eor(struct rde_peer * peer,u_int8_t aid)3749 rde_peer_recv_eor(struct rde_peer *peer, u_int8_t aid)
3750 {
3751 peer->prefix_rcvd_eor++;
3752
3753 /*
3754 * First notify SE to avert a possible race with the restart timeout.
3755 * If the timeout fires before this imsg is processed by the SE it will
3756 * result in the same operation since the timeout issues a FLUSH which
3757 * does the same as the RESTARTED action (flushing stale routes).
3758 * The logic in the SE is so that only one of FLUSH or RESTARTED will
3759 * be sent back to the RDE and so peer_flush is only called once.
3760 */
3761 if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id,
3762 0, -1, &aid, sizeof(aid)) == -1)
3763 fatal("imsg_compose error while receiving EoR");
3764
3765 log_peer_info(&peer->conf, "received %s EOR marker",
3766 aid2str(aid));
3767 }
3768
3769 static void
rde_peer_send_eor(struct rde_peer * peer,u_int8_t aid)3770 rde_peer_send_eor(struct rde_peer *peer, u_int8_t aid)
3771 {
3772 u_int16_t afi;
3773 u_int8_t safi;
3774
3775 peer->prefix_sent_eor++;
3776
3777 if (aid == AID_INET) {
3778 u_char null[4];
3779
3780 bzero(&null, 4);
3781 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3782 0, -1, &null, 4) == -1)
3783 fatal("imsg_compose error while sending EoR");
3784 } else {
3785 u_int16_t i;
3786 u_char buf[10];
3787
3788 if (aid2afi(aid, &afi, &safi) == -1)
3789 fatalx("peer_send_eor: bad AID");
3790
3791 i = 0; /* v4 withdrawn len */
3792 bcopy(&i, &buf[0], sizeof(i));
3793 i = htons(6); /* path attr len */
3794 bcopy(&i, &buf[2], sizeof(i));
3795 buf[4] = ATTR_OPTIONAL;
3796 buf[5] = ATTR_MP_UNREACH_NLRI;
3797 buf[6] = 3; /* withdrawn len */
3798 i = htons(afi);
3799 bcopy(&i, &buf[7], sizeof(i));
3800 buf[9] = safi;
3801
3802 if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3803 0, -1, &buf, 10) == -1)
3804 fatal("%s %d imsg_compose error in peer_send_eor",
3805 __func__, __LINE__);
3806 }
3807
3808 log_peer_info(&peer->conf, "sending %s EOR marker",
3809 aid2str(aid));
3810 }
3811
3812 /*
3813 * network announcement stuff
3814 */
3815 void
network_add(struct network_config * nc,struct filterstate * state)3816 network_add(struct network_config *nc, struct filterstate *state)
3817 {
3818 struct l3vpn *vpn;
3819 struct filter_set_head *vpnset = NULL;
3820 struct in_addr prefix4;
3821 struct in6_addr prefix6;
3822 u_int8_t vstate;
3823 u_int16_t i;
3824
3825 if (nc->rd != 0) {
3826 SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) {
3827 if (vpn->rd != nc->rd)
3828 continue;
3829 switch (nc->prefix.aid) {
3830 case AID_INET:
3831 prefix4 = nc->prefix.v4;
3832 memset(&nc->prefix, 0, sizeof(nc->prefix));
3833 nc->prefix.aid = AID_VPN_IPv4;
3834 nc->prefix.rd = vpn->rd;
3835 nc->prefix.v4 = prefix4;
3836 nc->prefix.labellen = 3;
3837 nc->prefix.labelstack[0] =
3838 (vpn->label >> 12) & 0xff;
3839 nc->prefix.labelstack[1] =
3840 (vpn->label >> 4) & 0xff;
3841 nc->prefix.labelstack[2] =
3842 (vpn->label << 4) & 0xf0;
3843 nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3844 vpnset = &vpn->export;
3845 break;
3846 case AID_INET6:
3847 prefix6 = nc->prefix.v6;
3848 memset(&nc->prefix, 0, sizeof(nc->prefix));
3849 nc->prefix.aid = AID_VPN_IPv6;
3850 nc->prefix.rd = vpn->rd;
3851 nc->prefix.v6 = prefix6;
3852 nc->prefix.labellen = 3;
3853 nc->prefix.labelstack[0] =
3854 (vpn->label >> 12) & 0xff;
3855 nc->prefix.labelstack[1] =
3856 (vpn->label >> 4) & 0xff;
3857 nc->prefix.labelstack[2] =
3858 (vpn->label << 4) & 0xf0;
3859 nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3860 vpnset = &vpn->export;
3861 break;
3862 default:
3863 log_warnx("unable to VPNize prefix");
3864 filterset_free(&nc->attrset);
3865 return;
3866 }
3867 break;
3868 }
3869 if (vpn == NULL) {
3870 log_warnx("network_add: "
3871 "prefix %s/%u in non-existing l3vpn %s",
3872 log_addr(&nc->prefix), nc->prefixlen,
3873 log_rd(nc->rd));
3874 return;
3875 }
3876 }
3877
3878 rde_apply_set(&nc->attrset, peerself, peerself, state, nc->prefix.aid);
3879 if (vpnset)
3880 rde_apply_set(vpnset, peerself, peerself, state,
3881 nc->prefix.aid);
3882
3883 vstate = rde_roa_validity(&rde_roa, &nc->prefix,
3884 nc->prefixlen, aspath_origin(state->aspath.aspath));
3885 if (prefix_update(rib_byid(RIB_ADJ_IN), peerself, state, &nc->prefix,
3886 nc->prefixlen, vstate) == 1)
3887 peerself->prefix_cnt++;
3888 for (i = RIB_LOC_START; i < rib_size; i++) {
3889 struct rib *rib = rib_byid(i);
3890 if (rib == NULL)
3891 continue;
3892 rde_update_log("announce", i, peerself,
3893 state->nexthop ? &state->nexthop->exit_nexthop : NULL,
3894 &nc->prefix, nc->prefixlen);
3895 prefix_update(rib, peerself, state, &nc->prefix,
3896 nc->prefixlen, vstate);
3897 }
3898 filterset_free(&nc->attrset);
3899 }
3900
3901 void
network_delete(struct network_config * nc)3902 network_delete(struct network_config *nc)
3903 {
3904 struct l3vpn *vpn;
3905 struct in_addr prefix4;
3906 struct in6_addr prefix6;
3907 u_int32_t i;
3908
3909 if (nc->rd) {
3910 SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) {
3911 if (vpn->rd != nc->rd)
3912 continue;
3913 switch (nc->prefix.aid) {
3914 case AID_INET:
3915 prefix4 = nc->prefix.v4;
3916 memset(&nc->prefix, 0, sizeof(nc->prefix));
3917 nc->prefix.aid = AID_VPN_IPv4;
3918 nc->prefix.rd = vpn->rd;
3919 nc->prefix.v4 = prefix4;
3920 nc->prefix.labellen = 3;
3921 nc->prefix.labelstack[0] =
3922 (vpn->label >> 12) & 0xff;
3923 nc->prefix.labelstack[1] =
3924 (vpn->label >> 4) & 0xff;
3925 nc->prefix.labelstack[2] =
3926 (vpn->label << 4) & 0xf0;
3927 nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3928 break;
3929 case AID_INET6:
3930 prefix6 = nc->prefix.v6;
3931 memset(&nc->prefix, 0, sizeof(nc->prefix));
3932 nc->prefix.aid = AID_VPN_IPv6;
3933 nc->prefix.rd = vpn->rd;
3934 nc->prefix.v6 = prefix6;
3935 nc->prefix.labellen = 3;
3936 nc->prefix.labelstack[0] =
3937 (vpn->label >> 12) & 0xff;
3938 nc->prefix.labelstack[1] =
3939 (vpn->label >> 4) & 0xff;
3940 nc->prefix.labelstack[2] =
3941 (vpn->label << 4) & 0xf0;
3942 nc->prefix.labelstack[2] |= BGP_MPLS_BOS;
3943 break;
3944 default:
3945 log_warnx("unable to VPNize prefix");
3946 return;
3947 }
3948 }
3949 }
3950
3951 for (i = RIB_LOC_START; i < rib_size; i++) {
3952 struct rib *rib = rib_byid(i);
3953 if (rib == NULL)
3954 continue;
3955 if (prefix_withdraw(rib, peerself, &nc->prefix,
3956 nc->prefixlen))
3957 rde_update_log("withdraw announce", i, peerself,
3958 NULL, &nc->prefix, nc->prefixlen);
3959 }
3960 if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peerself, &nc->prefix,
3961 nc->prefixlen))
3962 peerself->prefix_cnt--;
3963 }
3964
3965 static void
network_dump_upcall(struct rib_entry * re,void * ptr)3966 network_dump_upcall(struct rib_entry *re, void *ptr)
3967 {
3968 struct prefix *p;
3969 struct rde_aspath *asp;
3970 struct kroute_full k;
3971 struct bgpd_addr addr;
3972 struct rde_dump_ctx *ctx = ptr;
3973
3974 LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
3975 asp = prefix_aspath(p);
3976 if (!(asp->flags & F_PREFIX_ANNOUNCED))
3977 continue;
3978 pt_getaddr(p->pt, &addr);
3979
3980 bzero(&k, sizeof(k));
3981 memcpy(&k.prefix, &addr, sizeof(k.prefix));
3982 if (prefix_nexthop(p) == NULL ||
3983 prefix_nexthop(p)->state != NEXTHOP_REACH)
3984 k.nexthop.aid = k.prefix.aid;
3985 else
3986 memcpy(&k.nexthop, &prefix_nexthop(p)->true_nexthop,
3987 sizeof(k.nexthop));
3988 k.prefixlen = p->pt->prefixlen;
3989 k.flags = F_KERNEL;
3990 if ((asp->flags & F_ANN_DYNAMIC) == 0)
3991 k.flags = F_STATIC;
3992 if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0,
3993 ctx->req.pid, -1, &k, sizeof(k)) == -1)
3994 log_warnx("network_dump_upcall: "
3995 "imsg_compose error");
3996 }
3997 }
3998
3999 static void
network_flush_upcall(struct rib_entry * re,void * ptr)4000 network_flush_upcall(struct rib_entry *re, void *ptr)
4001 {
4002 struct rde_peer *peer = ptr;
4003 struct bgpd_addr addr;
4004 struct prefix *p;
4005 u_int32_t i;
4006 u_int8_t prefixlen;
4007
4008 p = prefix_bypeer(re, peer);
4009 if (p == NULL)
4010 return;
4011 if ((prefix_aspath(p)->flags & F_ANN_DYNAMIC) != F_ANN_DYNAMIC)
4012 return;
4013
4014 pt_getaddr(re->prefix, &addr);
4015 prefixlen = re->prefix->prefixlen;
4016
4017 for (i = RIB_LOC_START; i < rib_size; i++) {
4018 struct rib *rib = rib_byid(i);
4019 if (rib == NULL)
4020 continue;
4021 if (prefix_withdraw(rib, peer, &addr, prefixlen) == 1)
4022 rde_update_log("flush announce", i, peer,
4023 NULL, &addr, prefixlen);
4024 }
4025
4026 if (prefix_withdraw(rib_byid(RIB_ADJ_IN), peer, &addr,
4027 prefixlen) == 1)
4028 peer->prefix_cnt--;
4029 }
4030
4031 /* clean up */
4032 void
rde_shutdown(void)4033 rde_shutdown(void)
4034 {
4035 /*
4036 * the decision process is turned off if rde_quit = 1 and
4037 * rde_shutdown depends on this.
4038 */
4039
4040 /* First all peers go down */
4041 peer_foreach(peer_down, NULL);
4042
4043 /* free filters */
4044 filterlist_free(out_rules);
4045 filterlist_free(out_rules_tmp);
4046
4047 /* kill the VPN configs */
4048 free_l3vpns(&conf->l3vpns);
4049
4050 /* now check everything */
4051 rib_shutdown();
4052 nexthop_shutdown();
4053 path_shutdown();
4054 aspath_shutdown();
4055 attr_shutdown();
4056 pt_shutdown();
4057 peer_shutdown();
4058 }
4059
4060 struct rde_prefixset *
rde_find_prefixset(char * name,struct rde_prefixset_head * p)4061 rde_find_prefixset(char *name, struct rde_prefixset_head *p)
4062 {
4063 struct rde_prefixset *ps;
4064
4065 SIMPLEQ_FOREACH(ps, p, entry) {
4066 if (!strcmp(ps->name, name))
4067 return (ps);
4068 }
4069 return (NULL);
4070 }
4071
4072 void
rde_mark_prefixsets_dirty(struct rde_prefixset_head * psold,struct rde_prefixset_head * psnew)4073 rde_mark_prefixsets_dirty(struct rde_prefixset_head *psold,
4074 struct rde_prefixset_head *psnew)
4075 {
4076 struct rde_prefixset *new, *old;
4077
4078 SIMPLEQ_FOREACH(new, psnew, entry) {
4079 if ((psold == NULL) ||
4080 (old = rde_find_prefixset(new->name, psold)) == NULL) {
4081 new->dirty = 1;
4082 new->lastchange = getmonotime();
4083 } else {
4084 if (trie_equal(&new->th, &old->th) == 0) {
4085 new->dirty = 1;
4086 new->lastchange = getmonotime();
4087 } else
4088 new->lastchange = old->lastchange;
4089 }
4090 }
4091 }
4092
4093 u_int8_t
rde_roa_validity(struct rde_prefixset * ps,struct bgpd_addr * prefix,u_int8_t plen,u_int32_t as)4094 rde_roa_validity(struct rde_prefixset *ps, struct bgpd_addr *prefix,
4095 u_int8_t plen, u_int32_t as)
4096 {
4097 int r;
4098
4099 r = trie_roa_check(&ps->th, prefix, plen, as);
4100 return (r & ROA_MASK);
4101 }
4102
4103 int
ovs_match(struct prefix * p,u_int32_t flag)4104 ovs_match(struct prefix *p, u_int32_t flag)
4105 {
4106 if (flag & (F_CTL_OVS_VALID|F_CTL_OVS_INVALID|F_CTL_OVS_NOTFOUND)) {
4107 switch (prefix_vstate(p)) {
4108 case ROA_VALID:
4109 if (!(flag & F_CTL_OVS_VALID))
4110 return 0;
4111 break;
4112 case ROA_INVALID:
4113 if (!(flag & F_CTL_OVS_INVALID))
4114 return 0;
4115 break;
4116 case ROA_NOTFOUND:
4117 if (!(flag & F_CTL_OVS_NOTFOUND))
4118 return 0;
4119 break;
4120 default:
4121 break;
4122 }
4123 }
4124
4125 return 1;
4126 }
4127