1 /* $OpenBSD: rde_update.c,v 1.168 2024/05/30 08:29:30 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26
27 #include "bgpd.h"
28 #include "rde.h"
29 #include "log.h"
30
31 enum up_state {
32 UP_OK,
33 UP_ERR_LIMIT,
34 UP_FILTERED,
35 UP_EXCLUDED,
36 };
37
38 static struct community comm_no_advertise = {
39 .flags = COMMUNITY_TYPE_BASIC,
40 .data1 = COMMUNITY_WELLKNOWN,
41 .data2 = COMMUNITY_NO_ADVERTISE
42 };
43 static struct community comm_no_export = {
44 .flags = COMMUNITY_TYPE_BASIC,
45 .data1 = COMMUNITY_WELLKNOWN,
46 .data2 = COMMUNITY_NO_EXPORT
47 };
48 static struct community comm_no_expsubconfed = {
49 .flags = COMMUNITY_TYPE_BASIC,
50 .data1 = COMMUNITY_WELLKNOWN,
51 .data2 = COMMUNITY_NO_EXPSUBCONFED
52 };
53
54 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t);
55
56 static int
up_test_update(struct rde_peer * peer,struct prefix * p)57 up_test_update(struct rde_peer *peer, struct prefix *p)
58 {
59 struct rde_aspath *asp;
60 struct rde_community *comm;
61 struct rde_peer *frompeer;
62
63 frompeer = prefix_peer(p);
64 asp = prefix_aspath(p);
65 comm = prefix_communities(p);
66
67 if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
68 fatalx("try to send out a botched path");
69 if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK))
70 fatalx("try to send out a looped path");
71
72 if (peer == frompeer)
73 /* Do not send routes back to sender */
74 return (0);
75
76 if (!frompeer->conf.ebgp && !peer->conf.ebgp) {
77 /*
78 * route reflector redistribution rules:
79 * 1. if announce is set -> announce
80 * 2. from non-client, to non-client -> no
81 * 3. from client, to non-client -> yes
82 * 4. from non-client, to client -> yes
83 * 5. from client, to client -> yes
84 */
85 if (frompeer->conf.reflector_client == 0 &&
86 peer->conf.reflector_client == 0 &&
87 (asp->flags & F_PREFIX_ANNOUNCED) == 0)
88 /* Do not redistribute updates to ibgp peers */
89 return (0);
90 }
91
92 /* well known communities */
93 if (community_match(comm, &comm_no_advertise, NULL))
94 return (0);
95 if (peer->conf.ebgp) {
96 if (community_match(comm, &comm_no_export, NULL))
97 return (0);
98 if (community_match(comm, &comm_no_expsubconfed, NULL))
99 return (0);
100 }
101
102 return (1);
103 }
104
105 /* RFC9234 open policy handling */
106 static int
up_enforce_open_policy(struct rde_peer * peer,struct filterstate * state,uint8_t aid)107 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state,
108 uint8_t aid)
109 {
110 /* only for IPv4 and IPv6 unicast */
111 if (aid != AID_INET && aid != AID_INET6)
112 return 0;
113
114 /*
115 * do not propagate (consider it filtered) if OTC is present and
116 * local role is peer, customer or rs-client.
117 */
118 if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER ||
119 peer->role == ROLE_RS_CLIENT)
120 if (state->aspath.flags & F_ATTR_OTC)
121 return 1;
122
123 /*
124 * add OTC attribute if not present towards peers, customers and
125 * rs-clients (local roles peer, provider, rs).
126 */
127 if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER ||
128 peer->role == ROLE_RS)
129 if ((state->aspath.flags & F_ATTR_OTC) == 0) {
130 uint32_t tmp;
131
132 tmp = htonl(peer->conf.local_as);
133 if (attr_optadd(&state->aspath,
134 ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC,
135 &tmp, sizeof(tmp)) == -1)
136 log_peer_warnx(&peer->conf,
137 "failed to add OTC attribute");
138 state->aspath.flags |= F_ATTR_OTC;
139 }
140
141 return 0;
142 }
143
144 /*
145 * Process a single prefix by passing it through the various filter stages
146 * and if not filtered out update the Adj-RIB-Out. Returns:
147 * - UP_OK if prefix was added
148 * - UP_ERR_LIMIT if the peer outbound prefix limit was reached
149 * - UP_FILTERED if prefix was filtered out
150 * - UP_EXCLUDED if prefix was excluded because of up_test_update()
151 */
152 static enum up_state
up_process_prefix(struct rde_peer * peer,struct prefix * new,struct prefix * p)153 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p)
154 {
155 struct filterstate state;
156 struct bgpd_addr addr;
157 int excluded = 0;
158
159 /*
160 * up_test_update() needs to run before the output filters
161 * else the well known communities won't work properly.
162 * The output filters would not be able to add well known
163 * communities.
164 */
165 if (!up_test_update(peer, new))
166 excluded = 1;
167
168 rde_filterstate_prep(&state, new);
169 pt_getaddr(new->pt, &addr);
170 if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr,
171 new->pt->prefixlen, &state) == ACTION_DENY) {
172 rde_filterstate_clean(&state);
173 return UP_FILTERED;
174 }
175
176 /* Open Policy Check: acts like an output filter */
177 if (up_enforce_open_policy(peer, &state, new->pt->aid)) {
178 rde_filterstate_clean(&state);
179 return UP_FILTERED;
180 }
181
182 if (excluded) {
183 rde_filterstate_clean(&state);
184 return UP_EXCLUDED;
185 }
186
187 /* from here on we know this is an update */
188 if (p == (void *)-1)
189 p = prefix_adjout_get(peer, new->path_id_tx, new->pt);
190
191 up_prep_adjout(peer, &state, new->pt->aid);
192 prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx);
193 rde_filterstate_clean(&state);
194
195 /* max prefix checker outbound */
196 if (peer->conf.max_out_prefix &&
197 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
198 log_peer_warnx(&peer->conf,
199 "outbound prefix limit reached (>%u/%u)",
200 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
201 rde_update_err(peer, ERR_CEASE,
202 ERR_CEASE_MAX_SENT_PREFIX, NULL);
203 return UP_ERR_LIMIT;
204 }
205
206 return UP_OK;
207 }
208
209 void
up_generate_updates(struct rde_peer * peer,struct rib_entry * re)210 up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
211 {
212 struct prefix *new, *p;
213
214 p = prefix_adjout_first(peer, re->prefix);
215
216 new = prefix_best(re);
217 while (new != NULL) {
218 switch (up_process_prefix(peer, new, p)) {
219 case UP_OK:
220 case UP_ERR_LIMIT:
221 return;
222 case UP_FILTERED:
223 if (peer->flags & PEERFLAG_EVALUATE_ALL) {
224 new = TAILQ_NEXT(new, entry.list.rib);
225 if (new != NULL && prefix_eligible(new))
226 continue;
227 }
228 goto done;
229 case UP_EXCLUDED:
230 goto done;
231 }
232 }
233
234 done:
235 /* withdraw prefix */
236 if (p != NULL)
237 prefix_adjout_withdraw(p);
238 }
239
240 /*
241 * Generate updates for the add-path send case. Depending on the
242 * peer eval settings prefixes are selected and distributed.
243 * This highly depends on the Adj-RIB-Out to handle prefixes with no
244 * changes gracefully. It may be possible to improve the API so that
245 * less churn is needed.
246 */
247 void
up_generate_addpath(struct rde_peer * peer,struct rib_entry * re)248 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
249 {
250 struct prefix *head, *new, *p;
251 int maxpaths = 0, extrapaths = 0, extra;
252 int checkmode = 1;
253
254 head = prefix_adjout_first(peer, re->prefix);
255
256 /* mark all paths as stale */
257 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
258 p->flags |= PREFIX_FLAG_STALE;
259
260 /* update paths */
261 new = prefix_best(re);
262 while (new != NULL) {
263 /* check limits and stop when a limit is reached */
264 if (peer->eval.maxpaths != 0 &&
265 maxpaths >= peer->eval.maxpaths)
266 break;
267 if (peer->eval.extrapaths != 0 &&
268 extrapaths >= peer->eval.extrapaths)
269 break;
270
271 extra = 1;
272 if (checkmode) {
273 switch (peer->eval.mode) {
274 case ADDPATH_EVAL_BEST:
275 if (new->dmetric == PREFIX_DMETRIC_BEST)
276 extra = 0;
277 else
278 checkmode = 0;
279 break;
280 case ADDPATH_EVAL_ECMP:
281 if (new->dmetric == PREFIX_DMETRIC_BEST ||
282 new->dmetric == PREFIX_DMETRIC_ECMP)
283 extra = 0;
284 else
285 checkmode = 0;
286 break;
287 case ADDPATH_EVAL_AS_WIDE:
288 if (new->dmetric == PREFIX_DMETRIC_BEST ||
289 new->dmetric == PREFIX_DMETRIC_ECMP ||
290 new->dmetric == PREFIX_DMETRIC_AS_WIDE)
291 extra = 0;
292 else
293 checkmode = 0;
294 break;
295 case ADDPATH_EVAL_ALL:
296 /* nothing to check */
297 checkmode = 0;
298 break;
299 default:
300 fatalx("unknown add-path eval mode");
301 }
302 }
303
304 switch (up_process_prefix(peer, new, (void *)-1)) {
305 case UP_OK:
306 maxpaths++;
307 extrapaths += extra;
308 break;
309 case UP_FILTERED:
310 case UP_EXCLUDED:
311 break;
312 case UP_ERR_LIMIT:
313 /* just give up */
314 return;
315 }
316
317 /* only allow valid prefixes */
318 new = TAILQ_NEXT(new, entry.list.rib);
319 if (new == NULL || !prefix_eligible(new))
320 break;
321 }
322
323 /* withdraw stale paths */
324 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
325 if (p->flags & PREFIX_FLAG_STALE)
326 prefix_adjout_withdraw(p);
327 }
328 }
329
330 /*
331 * Generate updates for the add-path send all case. Since all prefixes
332 * are distributed just remove old and add new.
333 */
334 void
up_generate_addpath_all(struct rde_peer * peer,struct rib_entry * re,struct prefix * new,struct prefix * old)335 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
336 struct prefix *new, struct prefix *old)
337 {
338 struct prefix *p, *head = NULL;
339 int all = 0;
340
341 /*
342 * if old and new are NULL then insert all prefixes from best,
343 * clearing old routes in the process
344 */
345 if (old == NULL && new == NULL) {
346 /* mark all paths as stale */
347 head = prefix_adjout_first(peer, re->prefix);
348 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
349 p->flags |= PREFIX_FLAG_STALE;
350
351 new = prefix_best(re);
352 all = 1;
353 }
354
355 if (new != NULL && !prefix_eligible(new)) {
356 /* only allow valid prefixes */
357 new = NULL;
358 }
359
360 if (old != NULL) {
361 /* withdraw stale paths */
362 p = prefix_adjout_get(peer, old->path_id_tx, old->pt);
363 if (p != NULL)
364 prefix_adjout_withdraw(p);
365 }
366
367 /* add new path (or multiple if all is set) */
368 while (new != NULL) {
369 switch (up_process_prefix(peer, new, (void *)-1)) {
370 case UP_OK:
371 case UP_FILTERED:
372 case UP_EXCLUDED:
373 break;
374 case UP_ERR_LIMIT:
375 /* just give up */
376 return;
377 }
378
379 if (!all)
380 break;
381
382 /* only allow valid prefixes */
383 new = TAILQ_NEXT(new, entry.list.rib);
384 if (new == NULL || !prefix_eligible(new))
385 break;
386 }
387
388 if (all) {
389 /* withdraw stale paths */
390 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
391 if (p->flags & PREFIX_FLAG_STALE)
392 prefix_adjout_withdraw(p);
393 }
394 }
395 }
396
397 /* send a default route to the specified peer */
398 void
up_generate_default(struct rde_peer * peer,uint8_t aid)399 up_generate_default(struct rde_peer *peer, uint8_t aid)
400 {
401 extern struct rde_peer *peerself;
402 struct filterstate state;
403 struct rde_aspath *asp;
404 struct prefix *p;
405 struct pt_entry *pte;
406 struct bgpd_addr addr;
407
408 if (peer->capa.mp[aid] == 0)
409 return;
410
411 rde_filterstate_init(&state);
412 asp = &state.aspath;
413 asp->aspath = aspath_get(NULL, 0);
414 asp->origin = ORIGIN_IGP;
415 rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN);
416 /* the other default values are OK, nexthop is once again NULL */
417
418 /*
419 * XXX apply default overrides. Not yet possible, mainly a parse.y
420 * problem.
421 */
422 /* rde_apply_set(asp, peerself, peerself, set, af); */
423
424 memset(&addr, 0, sizeof(addr));
425 addr.aid = aid;
426 p = prefix_adjout_lookup(peer, &addr, 0);
427
428 /* outbound filter as usual */
429 if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
430 ACTION_DENY) {
431 rde_filterstate_clean(&state);
432 return;
433 }
434
435 up_prep_adjout(peer, &state, addr.aid);
436 /* can't use pt_fill here since prefix_adjout_update keeps a ref */
437 pte = pt_get(&addr, 0);
438 if (pte == NULL)
439 pte = pt_add(&addr, 0);
440 prefix_adjout_update(p, peer, &state, pte, 0);
441 rde_filterstate_clean(&state);
442
443 /* max prefix checker outbound */
444 if (peer->conf.max_out_prefix &&
445 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
446 log_peer_warnx(&peer->conf,
447 "outbound prefix limit reached (>%u/%u)",
448 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
449 rde_update_err(peer, ERR_CEASE,
450 ERR_CEASE_MAX_SENT_PREFIX, NULL);
451 }
452 }
453
454 static struct bgpd_addr *
up_get_nexthop(struct rde_peer * peer,struct filterstate * state,uint8_t aid)455 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
456 {
457 struct bgpd_addr *peer_local = NULL;
458
459 switch (aid) {
460 case AID_INET:
461 case AID_VPN_IPv4:
462 if (peer->local_v4_addr.aid == AID_INET)
463 peer_local = &peer->local_v4_addr;
464 break;
465 case AID_INET6:
466 case AID_VPN_IPv6:
467 if (peer->local_v6_addr.aid == AID_INET6)
468 peer_local = &peer->local_v6_addr;
469 break;
470 case AID_FLOWSPECv4:
471 case AID_FLOWSPECv6:
472 /* flowspec has no nexthop */
473 return (NULL);
474 default:
475 fatalx("%s, bad AID %s", __func__, aid2str(aid));
476 }
477
478 if (state->nhflags & NEXTHOP_SELF) {
479 /*
480 * Forcing the nexthop to self is always possible
481 * and has precedence over other flags.
482 */
483 return (peer_local);
484 } else if (!peer->conf.ebgp) {
485 /*
486 * in the ibgp case the nexthop is normally not
487 * modified unless it points at the peer itself.
488 */
489 if (state->nexthop == NULL) {
490 /* announced networks without explicit nexthop set */
491 return (peer_local);
492 }
493 /*
494 * per RFC: if remote peer address is equal to the nexthop set
495 * the nexthop to our local address. This reduces the risk of
496 * routing loops. This overrides NEXTHOP_NOMODIFY.
497 */
498 if (memcmp(&state->nexthop->exit_nexthop,
499 &peer->remote_addr, sizeof(peer->remote_addr)) == 0) {
500 return (peer_local);
501 }
502 return (&state->nexthop->exit_nexthop);
503 } else if (peer->conf.distance == 1) {
504 /*
505 * In the ebgp directly connected case never send
506 * out a nexthop that is outside of the connected
507 * network of the peer. No matter what flags are
508 * set. This follows section 5.1.3 of RFC 4271.
509 * So just check if the nexthop is in the same net
510 * is enough here.
511 */
512 if (state->nexthop != NULL &&
513 state->nexthop->flags & NEXTHOP_CONNECTED &&
514 prefix_compare(&peer->remote_addr,
515 &state->nexthop->nexthop_net,
516 state->nexthop->nexthop_netlen) == 0) {
517 /* nexthop and peer are in the same net */
518 return (&state->nexthop->exit_nexthop);
519 }
520 return (peer_local);
521 } else {
522 /*
523 * For ebgp multihop make it possible to overrule
524 * the sent nexthop by setting NEXTHOP_NOMODIFY.
525 * Similar to the ibgp case there is no same net check
526 * needed but still ensure that the nexthop is not
527 * pointing to the peer itself.
528 */
529 if (state->nhflags & NEXTHOP_NOMODIFY &&
530 state->nexthop != NULL &&
531 memcmp(&state->nexthop->exit_nexthop,
532 &peer->remote_addr, sizeof(peer->remote_addr)) != 0) {
533 /* no modify flag set and nexthop not peer addr */
534 return (&state->nexthop->exit_nexthop);
535 }
536 return (peer_local);
537 }
538 }
539
540 static void
up_prep_adjout(struct rde_peer * peer,struct filterstate * state,uint8_t aid)541 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
542 {
543 struct bgpd_addr *nexthop;
544 struct nexthop *nh = NULL;
545 u_char *np;
546 uint16_t nl;
547
548 /* prepend local AS number for eBGP sessions. */
549 if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) {
550 uint32_t prep_as = peer->conf.local_as;
551 np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl);
552 aspath_put(state->aspath.aspath);
553 state->aspath.aspath = aspath_get(np, nl);
554 free(np);
555 }
556
557 /* update nexthop */
558 nexthop = up_get_nexthop(peer, state, aid);
559 if (nexthop != NULL)
560 nh = nexthop_get(nexthop);
561 nexthop_unref(state->nexthop);
562 state->nexthop = nh;
563 state->nhflags = 0;
564 }
565
566
567 static int
up_generate_attr(struct ibuf * buf,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nh,uint8_t aid)568 up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
569 struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
570 uint8_t aid)
571 {
572 struct attr *oa = NULL, *newaggr = NULL;
573 u_char *pdata;
574 uint32_t tmp32;
575 int flags, neednewpath = 0, rv;
576 uint16_t plen;
577 uint8_t oalen = 0, type;
578
579 if (asp->others_len > 0)
580 oa = asp->others[oalen++];
581
582 /* dump attributes in ascending order */
583 for (type = ATTR_ORIGIN; type < 255; type++) {
584 while (oa && oa->type < type) {
585 if (oalen < asp->others_len)
586 oa = asp->others[oalen++];
587 else
588 oa = NULL;
589 }
590
591 switch (type) {
592 /*
593 * Attributes stored in rde_aspath
594 */
595 case ATTR_ORIGIN:
596 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
597 ATTR_ORIGIN, &asp->origin, 1) == -1)
598 return -1;
599 break;
600 case ATTR_ASPATH:
601 plen = aspath_length(asp->aspath);
602 pdata = aspath_dump(asp->aspath);
603
604 if (!peer_has_as4byte(peer))
605 pdata = aspath_deflate(pdata, &plen,
606 &neednewpath);
607 rv = attr_writebuf(buf, ATTR_WELL_KNOWN,
608 ATTR_ASPATH, pdata, plen);
609 if (!peer_has_as4byte(peer))
610 free(pdata);
611
612 if (rv == -1)
613 return -1;
614 break;
615 case ATTR_NEXTHOP:
616 switch (aid) {
617 case AID_INET:
618 if (nh == NULL)
619 return -1;
620 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
621 ATTR_NEXTHOP, &nh->exit_nexthop.v4,
622 sizeof(nh->exit_nexthop.v4)) == -1)
623 return -1;
624 break;
625 default:
626 break;
627 }
628 break;
629 case ATTR_MED:
630 /*
631 * The old MED from other peers MUST not be announced
632 * to others unless the MED is originating from us or
633 * the peer is an IBGP one. Only exception are routers
634 * with "transparent-as yes" set.
635 */
636 if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp ||
637 asp->flags & F_ATTR_MED_ANNOUNCE ||
638 peer->flags & PEERFLAG_TRANS_AS)) {
639 tmp32 = htonl(asp->med);
640 if (attr_writebuf(buf, ATTR_OPTIONAL,
641 ATTR_MED, &tmp32, 4) == -1)
642 return -1;
643 }
644 break;
645 case ATTR_LOCALPREF:
646 if (!peer->conf.ebgp) {
647 /* local preference, only valid for ibgp */
648 tmp32 = htonl(asp->lpref);
649 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
650 ATTR_LOCALPREF, &tmp32, 4) == -1)
651 return -1;
652 }
653 break;
654 /*
655 * Communities are stored in struct rde_community
656 */
657 case ATTR_COMMUNITIES:
658 case ATTR_EXT_COMMUNITIES:
659 case ATTR_LARGE_COMMUNITIES:
660 if (community_writebuf(comm, type, peer->conf.ebgp,
661 buf) == -1)
662 return -1;
663 break;
664 /*
665 * NEW to OLD conversion when sending stuff to a 2byte AS peer
666 */
667 case ATTR_AS4_PATH:
668 if (neednewpath) {
669 plen = aspath_length(asp->aspath);
670 pdata = aspath_dump(asp->aspath);
671
672 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
673 if (!(asp->flags & F_PREFIX_ANNOUNCED))
674 flags |= ATTR_PARTIAL;
675 if (plen != 0)
676 if (attr_writebuf(buf, flags,
677 ATTR_AS4_PATH, pdata, plen) == -1)
678 return -1;
679 }
680 break;
681 case ATTR_AS4_AGGREGATOR:
682 if (newaggr) {
683 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
684 if (!(asp->flags & F_PREFIX_ANNOUNCED))
685 flags |= ATTR_PARTIAL;
686 if (attr_writebuf(buf, flags,
687 ATTR_AS4_AGGREGATOR, newaggr->data,
688 newaggr->len) == -1)
689 return -1;
690 }
691 break;
692 /*
693 * multiprotocol attributes are handled elsewhere
694 */
695 case ATTR_MP_REACH_NLRI:
696 case ATTR_MP_UNREACH_NLRI:
697 break;
698 /*
699 * dump all other path attributes. Following rules apply:
700 * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and
701 * ATTR_AGGREGATOR pass unmodified (enforce flags
702 * to correct values). Actually ATTR_AGGREGATOR may be
703 * deflated for OLD 2-byte peers.
704 * 2. non-transitive attrs: don't re-announce to ebgp peers
705 * 3. transitive known attrs: announce unmodified
706 * 4. transitive unknown attrs: set partial bit and re-announce
707 */
708 case ATTR_ATOMIC_AGGREGATE:
709 if (oa == NULL || oa->type != type)
710 break;
711 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
712 ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
713 return -1;
714 break;
715 case ATTR_AGGREGATOR:
716 if (oa == NULL || oa->type != type)
717 break;
718 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
719 peer->conf.ebgp)
720 break;
721 if (!peer_has_as4byte(peer)) {
722 /* need to deflate the aggregator */
723 uint8_t t[6];
724 uint16_t tas;
725
726 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
727 peer->conf.ebgp)
728 break;
729
730 memcpy(&tmp32, oa->data, sizeof(tmp32));
731 if (ntohl(tmp32) > USHRT_MAX) {
732 tas = htons(AS_TRANS);
733 newaggr = oa;
734 } else
735 tas = htons(ntohl(tmp32));
736
737 memcpy(t, &tas, sizeof(tas));
738 memcpy(t + sizeof(tas),
739 oa->data + sizeof(tmp32),
740 oa->len - sizeof(tmp32));
741 if (attr_writebuf(buf, oa->flags,
742 oa->type, &t, sizeof(t)) == -1)
743 return -1;
744 } else {
745 if (attr_writebuf(buf, oa->flags, oa->type,
746 oa->data, oa->len) == -1)
747 return -1;
748 }
749 break;
750 case ATTR_ORIGINATOR_ID:
751 case ATTR_CLUSTER_LIST:
752 case ATTR_OTC:
753 if (oa == NULL || oa->type != type)
754 break;
755 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
756 peer->conf.ebgp)
757 break;
758 if (attr_writebuf(buf, oa->flags, oa->type,
759 oa->data, oa->len) == -1)
760 return -1;
761 break;
762 default:
763 if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
764 /* there is no attribute left to dump */
765 return (0);
766
767 if (oa == NULL || oa->type != type)
768 break;
769 /* unknown attribute */
770 if (!(oa->flags & ATTR_TRANSITIVE)) {
771 /*
772 * RFC 1771:
773 * Unrecognized non-transitive optional
774 * attributes must be quietly ignored and
775 * not passed along to other BGP peers.
776 */
777 break;
778 }
779 if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
780 oa->type, oa->data, oa->len) == -1)
781 return -1;
782 }
783 }
784 return 0;
785 }
786
787 /*
788 * Check if the pending element is a EoR marker. If so remove it from the
789 * tree and return 1.
790 */
791 int
up_is_eor(struct rde_peer * peer,uint8_t aid)792 up_is_eor(struct rde_peer *peer, uint8_t aid)
793 {
794 struct prefix *p;
795
796 p = RB_MIN(prefix_tree, &peer->updates[aid]);
797 if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
798 /*
799 * Need to remove eor from update tree because
800 * prefix_adjout_destroy() can't handle that.
801 */
802 RB_REMOVE(prefix_tree, &peer->updates[aid], p);
803 p->flags &= ~PREFIX_FLAG_UPDATE;
804 prefix_adjout_destroy(p);
805 return 1;
806 }
807 return 0;
808 }
809
810 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
811 #define MIN_UPDATE_LEN 16
812
813 static void
up_prefix_free(struct prefix_tree * prefix_head,struct prefix * p,struct rde_peer * peer,int withdraw)814 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
815 struct rde_peer *peer, int withdraw)
816 {
817 if (withdraw) {
818 /* prefix no longer needed, remove it */
819 prefix_adjout_destroy(p);
820 peer->stats.prefix_sent_withdraw++;
821 } else {
822 /* prefix still in Adj-RIB-Out, keep it */
823 RB_REMOVE(prefix_tree, prefix_head, p);
824 p->flags &= ~PREFIX_FLAG_UPDATE;
825 peer->stats.pending_update--;
826 peer->stats.prefix_sent_update++;
827 }
828 }
829
830 /*
831 * Write prefixes to buffer until either there is no more space or
832 * the next prefix has no longer the same ASPATH attributes.
833 * Returns -1 if no prefix was written else 0.
834 */
835 static int
up_dump_prefix(struct ibuf * buf,struct prefix_tree * prefix_head,struct rde_peer * peer,int withdraw)836 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
837 struct rde_peer *peer, int withdraw)
838 {
839 struct prefix *p, *np;
840 int done = 0, has_ap = -1, rv = -1;
841
842 RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
843 if (has_ap == -1)
844 has_ap = peer_has_add_path(peer, p->pt->aid,
845 CAPA_AP_SEND);
846 if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
847 -1)
848 break;
849
850 /* make sure we only dump prefixes which belong together */
851 if (np == NULL ||
852 np->aspath != p->aspath ||
853 np->communities != p->communities ||
854 np->nexthop != p->nexthop ||
855 np->nhflags != p->nhflags ||
856 (np->flags & PREFIX_FLAG_EOR))
857 done = 1;
858
859 rv = 0;
860 up_prefix_free(prefix_head, p, peer, withdraw);
861 if (done)
862 break;
863 }
864 return rv;
865 }
866
867 static int
up_generate_mp_reach(struct ibuf * buf,struct rde_peer * peer,struct nexthop * nh,uint8_t aid)868 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
869 struct nexthop *nh, uint8_t aid)
870 {
871 struct bgpd_addr *nexthop;
872 size_t off;
873 uint16_t len, afi;
874 uint8_t safi;
875
876 /* attribute header, defaulting to extended length one */
877 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
878 return -1;
879 if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
880 return -1;
881 off = ibuf_size(buf);
882 if (ibuf_add_zero(buf, sizeof(len)) == -1)
883 return -1;
884
885 if (aid2afi(aid, &afi, &safi))
886 fatalx("up_generate_mp_reach: bad AID");
887
888 /* AFI + SAFI + NH LEN + NH + Reserved */
889 if (ibuf_add_n16(buf, afi) == -1)
890 return -1;
891 if (ibuf_add_n8(buf, safi) == -1)
892 return -1;
893
894 switch (aid) {
895 case AID_INET6:
896 if (nh == NULL)
897 return -1;
898 /* NH LEN */
899 if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1)
900 return -1;
901 /* write nexthop */
902 nexthop = &nh->exit_nexthop;
903 if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
904 return -1;
905 break;
906 case AID_VPN_IPv4:
907 if (nh == NULL)
908 return -1;
909 /* NH LEN */
910 if (ibuf_add_n8(buf,
911 sizeof(uint64_t) + sizeof(struct in_addr)) == -1)
912 return -1;
913 /* write zero rd */
914 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
915 return -1;
916 /* write nexthop */
917 nexthop = &nh->exit_nexthop;
918 if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1)
919 return -1;
920 break;
921 case AID_VPN_IPv6:
922 if (nh == NULL)
923 return -1;
924 /* NH LEN */
925 if (ibuf_add_n8(buf,
926 sizeof(uint64_t) + sizeof(struct in6_addr)) == -1)
927 return -1;
928 /* write zero rd */
929 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
930 return -1;
931 /* write nexthop */
932 nexthop = &nh->exit_nexthop;
933 if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
934 return -1;
935 break;
936 case AID_FLOWSPECv4:
937 case AID_FLOWSPECv6:
938 if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */
939 return -1;
940 /* no NH */
941 break;
942 default:
943 fatalx("up_generate_mp_reach: unknown AID");
944 }
945
946 if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
947 return -1;
948
949 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
950 /* no prefixes written, fail update */
951 return (-1);
952
953 /* update MP_REACH attribute length field */
954 len = ibuf_size(buf) - off - sizeof(len);
955 if (ibuf_set_n16(buf, off, len) == -1)
956 return -1;
957
958 return 0;
959 }
960
961 /*
962 * Generate UPDATE message containing either just withdraws or updates.
963 * UPDATE messages are contructed like this:
964 *
965 * +-----------------------------------------------------+
966 * | Withdrawn Routes Length (2 octets) |
967 * +-----------------------------------------------------+
968 * | Withdrawn Routes (variable) |
969 * +-----------------------------------------------------+
970 * | Total Path Attribute Length (2 octets) |
971 * +-----------------------------------------------------+
972 * | Path Attributes (variable) |
973 * +-----------------------------------------------------+
974 * | Network Layer Reachability Information (variable) |
975 * +-----------------------------------------------------+
976 *
977 * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
978 * the latter will be the only path attribute in a message.
979 */
980
981 /*
982 * Write UPDATE message for withdrawn routes. The size of buf limits
983 * how may routes can be added. Return 0 on success -1 on error which
984 * includes generating an empty withdraw message.
985 */
986 int
up_dump_withdraws(struct ibuf * buf,struct rde_peer * peer,uint8_t aid)987 up_dump_withdraws(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
988 {
989 size_t off;
990 uint16_t afi, len;
991 uint8_t safi;
992
993 /* reserve space for the withdrawn routes length field */
994 off = ibuf_size(buf);
995 if (ibuf_add_zero(buf, sizeof(len)) == -1)
996 return -1;
997
998 if (aid != AID_INET) {
999 /* reserve space for 2-byte path attribute length */
1000 off = ibuf_size(buf);
1001 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1002 return -1;
1003
1004 /* attribute header, defaulting to extended length one */
1005 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1006 return -1;
1007 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1008 return -1;
1009 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1010 return -1;
1011
1012 /* afi & safi */
1013 if (aid2afi(aid, &afi, &safi))
1014 fatalx("up_dump_mp_unreach: bad AID");
1015 if (ibuf_add_n16(buf, afi) == -1)
1016 return -1;
1017 if (ibuf_add_n8(buf, safi) == -1)
1018 return -1;
1019 }
1020
1021 if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
1022 return -1;
1023
1024 /* update length field (either withdrawn routes or attribute length) */
1025 len = ibuf_size(buf) - off - sizeof(len);
1026 if (ibuf_set_n16(buf, off, len) == -1)
1027 return -1;
1028
1029 if (aid != AID_INET) {
1030 /* write MP_UNREACH_NLRI attribute length (always extended) */
1031 len -= 4; /* skip attribute header */
1032 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1033 return -1;
1034 } else {
1035 /* no extra attributes so set attribute len to 0 */
1036 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1037 return -1;
1038 }
1039
1040 return 0;
1041 }
1042
1043 /*
1044 * Write UPDATE message for changed and added routes. The size of buf limits
1045 * how may routes can be added. The function first dumps the path attributes
1046 * and then tries to add as many prefixes using these attributes.
1047 * Return 0 on success -1 on error which includes producing an empty message.
1048 */
1049 int
up_dump_update(struct ibuf * buf,struct rde_peer * peer,uint8_t aid)1050 up_dump_update(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
1051 {
1052 struct bgpd_addr addr;
1053 struct prefix *p;
1054 size_t off;
1055 uint16_t len;
1056
1057 p = RB_MIN(prefix_tree, &peer->updates[aid]);
1058 if (p == NULL)
1059 return -1;
1060
1061 /* withdrawn routes length field is 0 */
1062 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1063 return -1;
1064
1065 /* reserve space for 2-byte path attribute length */
1066 off = ibuf_size(buf);
1067 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1068 return -1;
1069
1070 if (up_generate_attr(buf, peer, prefix_aspath(p),
1071 prefix_communities(p), prefix_nexthop(p), aid) == -1)
1072 goto fail;
1073
1074 if (aid != AID_INET) {
1075 /* write mp attribute including nlri */
1076
1077 /*
1078 * RFC 7606 wants this to be first but then we need
1079 * to use multiple buffers with adjusted length to
1080 * merge the attributes together in reverse order of
1081 * creation.
1082 */
1083 if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
1084 -1)
1085 goto fail;
1086 }
1087
1088 /* update attribute length field */
1089 len = ibuf_size(buf) - off - sizeof(len);
1090 if (ibuf_set_n16(buf, off, len) == -1)
1091 return -1;
1092
1093 if (aid == AID_INET) {
1094 /* last but not least dump the IPv4 nlri */
1095 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
1096 goto fail;
1097 }
1098
1099 return 0;
1100
1101 fail:
1102 /* Not enough space. Drop prefix, it will never fit. */
1103 pt_getaddr(p->pt, &addr);
1104 log_peer_warnx(&peer->conf, "dump of path attributes failed, "
1105 "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
1106
1107 up_prefix_free(&peer->updates[aid], p, peer, 0);
1108 /* XXX should probably send a withdraw for this prefix */
1109 return -1;
1110 }
1111