1 /* $OpenBSD: rde_update.c,v 1.169 2024/09/25 14:46:51 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26
27 #include "bgpd.h"
28 #include "session.h"
29 #include "rde.h"
30 #include "log.h"
31
32 enum up_state {
33 UP_OK,
34 UP_ERR_LIMIT,
35 UP_FILTERED,
36 UP_EXCLUDED,
37 };
38
39 static struct community comm_no_advertise = {
40 .flags = COMMUNITY_TYPE_BASIC,
41 .data1 = COMMUNITY_WELLKNOWN,
42 .data2 = COMMUNITY_NO_ADVERTISE
43 };
44 static struct community comm_no_export = {
45 .flags = COMMUNITY_TYPE_BASIC,
46 .data1 = COMMUNITY_WELLKNOWN,
47 .data2 = COMMUNITY_NO_EXPORT
48 };
49 static struct community comm_no_expsubconfed = {
50 .flags = COMMUNITY_TYPE_BASIC,
51 .data1 = COMMUNITY_WELLKNOWN,
52 .data2 = COMMUNITY_NO_EXPSUBCONFED
53 };
54
55 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t);
56
57 static int
up_test_update(struct rde_peer * peer,struct prefix * p)58 up_test_update(struct rde_peer *peer, struct prefix *p)
59 {
60 struct rde_aspath *asp;
61 struct rde_community *comm;
62 struct rde_peer *frompeer;
63
64 frompeer = prefix_peer(p);
65 asp = prefix_aspath(p);
66 comm = prefix_communities(p);
67
68 if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
69 fatalx("try to send out a botched path");
70 if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK))
71 fatalx("try to send out a looped path");
72
73 if (peer == frompeer)
74 /* Do not send routes back to sender */
75 return (0);
76
77 if (!frompeer->conf.ebgp && !peer->conf.ebgp) {
78 /*
79 * route reflector redistribution rules:
80 * 1. if announce is set -> announce
81 * 2. from non-client, to non-client -> no
82 * 3. from client, to non-client -> yes
83 * 4. from non-client, to client -> yes
84 * 5. from client, to client -> yes
85 */
86 if (frompeer->conf.reflector_client == 0 &&
87 peer->conf.reflector_client == 0 &&
88 (asp->flags & F_PREFIX_ANNOUNCED) == 0)
89 /* Do not redistribute updates to ibgp peers */
90 return (0);
91 }
92
93 /* well known communities */
94 if (community_match(comm, &comm_no_advertise, NULL))
95 return (0);
96 if (peer->conf.ebgp) {
97 if (community_match(comm, &comm_no_export, NULL))
98 return (0);
99 if (community_match(comm, &comm_no_expsubconfed, NULL))
100 return (0);
101 }
102
103 return (1);
104 }
105
106 /* RFC9234 open policy handling */
107 static int
up_enforce_open_policy(struct rde_peer * peer,struct filterstate * state,uint8_t aid)108 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state,
109 uint8_t aid)
110 {
111 /* only for IPv4 and IPv6 unicast */
112 if (aid != AID_INET && aid != AID_INET6)
113 return 0;
114
115 /*
116 * do not propagate (consider it filtered) if OTC is present and
117 * local role is peer, customer or rs-client.
118 */
119 if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER ||
120 peer->role == ROLE_RS_CLIENT)
121 if (state->aspath.flags & F_ATTR_OTC)
122 return 1;
123
124 /*
125 * add OTC attribute if not present towards peers, customers and
126 * rs-clients (local roles peer, provider, rs).
127 */
128 if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER ||
129 peer->role == ROLE_RS)
130 if ((state->aspath.flags & F_ATTR_OTC) == 0) {
131 uint32_t tmp;
132
133 tmp = htonl(peer->conf.local_as);
134 if (attr_optadd(&state->aspath,
135 ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC,
136 &tmp, sizeof(tmp)) == -1)
137 log_peer_warnx(&peer->conf,
138 "failed to add OTC attribute");
139 state->aspath.flags |= F_ATTR_OTC;
140 }
141
142 return 0;
143 }
144
145 /*
146 * Process a single prefix by passing it through the various filter stages
147 * and if not filtered out update the Adj-RIB-Out. Returns:
148 * - UP_OK if prefix was added
149 * - UP_ERR_LIMIT if the peer outbound prefix limit was reached
150 * - UP_FILTERED if prefix was filtered out
151 * - UP_EXCLUDED if prefix was excluded because of up_test_update()
152 */
153 static enum up_state
up_process_prefix(struct rde_peer * peer,struct prefix * new,struct prefix * p)154 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p)
155 {
156 struct filterstate state;
157 struct bgpd_addr addr;
158 int excluded = 0;
159
160 /*
161 * up_test_update() needs to run before the output filters
162 * else the well known communities won't work properly.
163 * The output filters would not be able to add well known
164 * communities.
165 */
166 if (!up_test_update(peer, new))
167 excluded = 1;
168
169 rde_filterstate_prep(&state, new);
170 pt_getaddr(new->pt, &addr);
171 if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr,
172 new->pt->prefixlen, &state) == ACTION_DENY) {
173 rde_filterstate_clean(&state);
174 return UP_FILTERED;
175 }
176
177 /* Open Policy Check: acts like an output filter */
178 if (up_enforce_open_policy(peer, &state, new->pt->aid)) {
179 rde_filterstate_clean(&state);
180 return UP_FILTERED;
181 }
182
183 if (excluded) {
184 rde_filterstate_clean(&state);
185 return UP_EXCLUDED;
186 }
187
188 /* from here on we know this is an update */
189 if (p == (void *)-1)
190 p = prefix_adjout_get(peer, new->path_id_tx, new->pt);
191
192 up_prep_adjout(peer, &state, new->pt->aid);
193 prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx);
194 rde_filterstate_clean(&state);
195
196 /* max prefix checker outbound */
197 if (peer->conf.max_out_prefix &&
198 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
199 log_peer_warnx(&peer->conf,
200 "outbound prefix limit reached (>%u/%u)",
201 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
202 rde_update_err(peer, ERR_CEASE,
203 ERR_CEASE_MAX_SENT_PREFIX, NULL);
204 return UP_ERR_LIMIT;
205 }
206
207 return UP_OK;
208 }
209
210 void
up_generate_updates(struct rde_peer * peer,struct rib_entry * re)211 up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
212 {
213 struct prefix *new, *p;
214
215 p = prefix_adjout_first(peer, re->prefix);
216
217 new = prefix_best(re);
218 while (new != NULL) {
219 switch (up_process_prefix(peer, new, p)) {
220 case UP_OK:
221 case UP_ERR_LIMIT:
222 return;
223 case UP_FILTERED:
224 if (peer->flags & PEERFLAG_EVALUATE_ALL) {
225 new = TAILQ_NEXT(new, entry.list.rib);
226 if (new != NULL && prefix_eligible(new))
227 continue;
228 }
229 goto done;
230 case UP_EXCLUDED:
231 goto done;
232 }
233 }
234
235 done:
236 /* withdraw prefix */
237 if (p != NULL)
238 prefix_adjout_withdraw(p);
239 }
240
241 /*
242 * Generate updates for the add-path send case. Depending on the
243 * peer eval settings prefixes are selected and distributed.
244 * This highly depends on the Adj-RIB-Out to handle prefixes with no
245 * changes gracefully. It may be possible to improve the API so that
246 * less churn is needed.
247 */
248 void
up_generate_addpath(struct rde_peer * peer,struct rib_entry * re)249 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
250 {
251 struct prefix *head, *new, *p;
252 int maxpaths = 0, extrapaths = 0, extra;
253 int checkmode = 1;
254
255 head = prefix_adjout_first(peer, re->prefix);
256
257 /* mark all paths as stale */
258 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
259 p->flags |= PREFIX_FLAG_STALE;
260
261 /* update paths */
262 new = prefix_best(re);
263 while (new != NULL) {
264 /* check limits and stop when a limit is reached */
265 if (peer->eval.maxpaths != 0 &&
266 maxpaths >= peer->eval.maxpaths)
267 break;
268 if (peer->eval.extrapaths != 0 &&
269 extrapaths >= peer->eval.extrapaths)
270 break;
271
272 extra = 1;
273 if (checkmode) {
274 switch (peer->eval.mode) {
275 case ADDPATH_EVAL_BEST:
276 if (new->dmetric == PREFIX_DMETRIC_BEST)
277 extra = 0;
278 else
279 checkmode = 0;
280 break;
281 case ADDPATH_EVAL_ECMP:
282 if (new->dmetric == PREFIX_DMETRIC_BEST ||
283 new->dmetric == PREFIX_DMETRIC_ECMP)
284 extra = 0;
285 else
286 checkmode = 0;
287 break;
288 case ADDPATH_EVAL_AS_WIDE:
289 if (new->dmetric == PREFIX_DMETRIC_BEST ||
290 new->dmetric == PREFIX_DMETRIC_ECMP ||
291 new->dmetric == PREFIX_DMETRIC_AS_WIDE)
292 extra = 0;
293 else
294 checkmode = 0;
295 break;
296 case ADDPATH_EVAL_ALL:
297 /* nothing to check */
298 checkmode = 0;
299 break;
300 default:
301 fatalx("unknown add-path eval mode");
302 }
303 }
304
305 switch (up_process_prefix(peer, new, (void *)-1)) {
306 case UP_OK:
307 maxpaths++;
308 extrapaths += extra;
309 break;
310 case UP_FILTERED:
311 case UP_EXCLUDED:
312 break;
313 case UP_ERR_LIMIT:
314 /* just give up */
315 return;
316 }
317
318 /* only allow valid prefixes */
319 new = TAILQ_NEXT(new, entry.list.rib);
320 if (new == NULL || !prefix_eligible(new))
321 break;
322 }
323
324 /* withdraw stale paths */
325 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
326 if (p->flags & PREFIX_FLAG_STALE)
327 prefix_adjout_withdraw(p);
328 }
329 }
330
331 /*
332 * Generate updates for the add-path send all case. Since all prefixes
333 * are distributed just remove old and add new.
334 */
335 void
up_generate_addpath_all(struct rde_peer * peer,struct rib_entry * re,struct prefix * new,struct prefix * old)336 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
337 struct prefix *new, struct prefix *old)
338 {
339 struct prefix *p, *head = NULL;
340 int all = 0;
341
342 /*
343 * if old and new are NULL then insert all prefixes from best,
344 * clearing old routes in the process
345 */
346 if (old == NULL && new == NULL) {
347 /* mark all paths as stale */
348 head = prefix_adjout_first(peer, re->prefix);
349 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
350 p->flags |= PREFIX_FLAG_STALE;
351
352 new = prefix_best(re);
353 all = 1;
354 }
355
356 if (new != NULL && !prefix_eligible(new)) {
357 /* only allow valid prefixes */
358 new = NULL;
359 }
360
361 if (old != NULL) {
362 /* withdraw stale paths */
363 p = prefix_adjout_get(peer, old->path_id_tx, old->pt);
364 if (p != NULL)
365 prefix_adjout_withdraw(p);
366 }
367
368 /* add new path (or multiple if all is set) */
369 while (new != NULL) {
370 switch (up_process_prefix(peer, new, (void *)-1)) {
371 case UP_OK:
372 case UP_FILTERED:
373 case UP_EXCLUDED:
374 break;
375 case UP_ERR_LIMIT:
376 /* just give up */
377 return;
378 }
379
380 if (!all)
381 break;
382
383 /* only allow valid prefixes */
384 new = TAILQ_NEXT(new, entry.list.rib);
385 if (new == NULL || !prefix_eligible(new))
386 break;
387 }
388
389 if (all) {
390 /* withdraw stale paths */
391 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
392 if (p->flags & PREFIX_FLAG_STALE)
393 prefix_adjout_withdraw(p);
394 }
395 }
396 }
397
398 /* send a default route to the specified peer */
399 void
up_generate_default(struct rde_peer * peer,uint8_t aid)400 up_generate_default(struct rde_peer *peer, uint8_t aid)
401 {
402 extern struct rde_peer *peerself;
403 struct filterstate state;
404 struct rde_aspath *asp;
405 struct prefix *p;
406 struct pt_entry *pte;
407 struct bgpd_addr addr;
408
409 if (peer->capa.mp[aid] == 0)
410 return;
411
412 rde_filterstate_init(&state);
413 asp = &state.aspath;
414 asp->aspath = aspath_get(NULL, 0);
415 asp->origin = ORIGIN_IGP;
416 rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN);
417 /* the other default values are OK, nexthop is once again NULL */
418
419 /*
420 * XXX apply default overrides. Not yet possible, mainly a parse.y
421 * problem.
422 */
423 /* rde_apply_set(asp, peerself, peerself, set, af); */
424
425 memset(&addr, 0, sizeof(addr));
426 addr.aid = aid;
427 p = prefix_adjout_lookup(peer, &addr, 0);
428
429 /* outbound filter as usual */
430 if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
431 ACTION_DENY) {
432 rde_filterstate_clean(&state);
433 return;
434 }
435
436 up_prep_adjout(peer, &state, addr.aid);
437 /* can't use pt_fill here since prefix_adjout_update keeps a ref */
438 pte = pt_get(&addr, 0);
439 if (pte == NULL)
440 pte = pt_add(&addr, 0);
441 prefix_adjout_update(p, peer, &state, pte, 0);
442 rde_filterstate_clean(&state);
443
444 /* max prefix checker outbound */
445 if (peer->conf.max_out_prefix &&
446 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
447 log_peer_warnx(&peer->conf,
448 "outbound prefix limit reached (>%u/%u)",
449 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
450 rde_update_err(peer, ERR_CEASE,
451 ERR_CEASE_MAX_SENT_PREFIX, NULL);
452 }
453 }
454
455 static struct bgpd_addr *
up_get_nexthop(struct rde_peer * peer,struct filterstate * state,uint8_t aid)456 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
457 {
458 struct bgpd_addr *peer_local = NULL;
459
460 switch (aid) {
461 case AID_INET:
462 case AID_VPN_IPv4:
463 if (peer->local_v4_addr.aid == AID_INET)
464 peer_local = &peer->local_v4_addr;
465 break;
466 case AID_INET6:
467 case AID_VPN_IPv6:
468 if (peer->local_v6_addr.aid == AID_INET6)
469 peer_local = &peer->local_v6_addr;
470 break;
471 case AID_FLOWSPECv4:
472 case AID_FLOWSPECv6:
473 /* flowspec has no nexthop */
474 return (NULL);
475 default:
476 fatalx("%s, bad AID %s", __func__, aid2str(aid));
477 }
478
479 if (state->nhflags & NEXTHOP_SELF) {
480 /*
481 * Forcing the nexthop to self is always possible
482 * and has precedence over other flags.
483 */
484 return (peer_local);
485 } else if (!peer->conf.ebgp) {
486 /*
487 * in the ibgp case the nexthop is normally not
488 * modified unless it points at the peer itself.
489 */
490 if (state->nexthop == NULL) {
491 /* announced networks without explicit nexthop set */
492 return (peer_local);
493 }
494 /*
495 * per RFC: if remote peer address is equal to the nexthop set
496 * the nexthop to our local address. This reduces the risk of
497 * routing loops. This overrides NEXTHOP_NOMODIFY.
498 */
499 if (memcmp(&state->nexthop->exit_nexthop,
500 &peer->remote_addr, sizeof(peer->remote_addr)) == 0) {
501 return (peer_local);
502 }
503 return (&state->nexthop->exit_nexthop);
504 } else if (peer->conf.distance == 1) {
505 /*
506 * In the ebgp directly connected case never send
507 * out a nexthop that is outside of the connected
508 * network of the peer. No matter what flags are
509 * set. This follows section 5.1.3 of RFC 4271.
510 * So just check if the nexthop is in the same net
511 * is enough here.
512 */
513 if (state->nexthop != NULL &&
514 state->nexthop->flags & NEXTHOP_CONNECTED &&
515 prefix_compare(&peer->remote_addr,
516 &state->nexthop->nexthop_net,
517 state->nexthop->nexthop_netlen) == 0) {
518 /* nexthop and peer are in the same net */
519 return (&state->nexthop->exit_nexthop);
520 }
521 return (peer_local);
522 } else {
523 /*
524 * For ebgp multihop make it possible to overrule
525 * the sent nexthop by setting NEXTHOP_NOMODIFY.
526 * Similar to the ibgp case there is no same net check
527 * needed but still ensure that the nexthop is not
528 * pointing to the peer itself.
529 */
530 if (state->nhflags & NEXTHOP_NOMODIFY &&
531 state->nexthop != NULL &&
532 memcmp(&state->nexthop->exit_nexthop,
533 &peer->remote_addr, sizeof(peer->remote_addr)) != 0) {
534 /* no modify flag set and nexthop not peer addr */
535 return (&state->nexthop->exit_nexthop);
536 }
537 return (peer_local);
538 }
539 }
540
541 static void
up_prep_adjout(struct rde_peer * peer,struct filterstate * state,uint8_t aid)542 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
543 {
544 struct bgpd_addr *nexthop;
545 struct nexthop *nh = NULL;
546 u_char *np;
547 uint16_t nl;
548
549 /* prepend local AS number for eBGP sessions. */
550 if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) {
551 uint32_t prep_as = peer->conf.local_as;
552 np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl);
553 aspath_put(state->aspath.aspath);
554 state->aspath.aspath = aspath_get(np, nl);
555 free(np);
556 }
557
558 /* update nexthop */
559 nexthop = up_get_nexthop(peer, state, aid);
560 if (nexthop != NULL)
561 nh = nexthop_get(nexthop);
562 nexthop_unref(state->nexthop);
563 state->nexthop = nh;
564 state->nhflags = 0;
565 }
566
567
568 static int
up_generate_attr(struct ibuf * buf,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nh,uint8_t aid)569 up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
570 struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
571 uint8_t aid)
572 {
573 struct attr *oa = NULL, *newaggr = NULL;
574 u_char *pdata;
575 uint32_t tmp32;
576 int flags, neednewpath = 0, rv;
577 uint16_t plen;
578 uint8_t oalen = 0, type;
579
580 if (asp->others_len > 0)
581 oa = asp->others[oalen++];
582
583 /* dump attributes in ascending order */
584 for (type = ATTR_ORIGIN; type < 255; type++) {
585 while (oa && oa->type < type) {
586 if (oalen < asp->others_len)
587 oa = asp->others[oalen++];
588 else
589 oa = NULL;
590 }
591
592 switch (type) {
593 /*
594 * Attributes stored in rde_aspath
595 */
596 case ATTR_ORIGIN:
597 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
598 ATTR_ORIGIN, &asp->origin, 1) == -1)
599 return -1;
600 break;
601 case ATTR_ASPATH:
602 plen = aspath_length(asp->aspath);
603 pdata = aspath_dump(asp->aspath);
604
605 if (!peer_has_as4byte(peer))
606 pdata = aspath_deflate(pdata, &plen,
607 &neednewpath);
608 rv = attr_writebuf(buf, ATTR_WELL_KNOWN,
609 ATTR_ASPATH, pdata, plen);
610 if (!peer_has_as4byte(peer))
611 free(pdata);
612
613 if (rv == -1)
614 return -1;
615 break;
616 case ATTR_NEXTHOP:
617 switch (aid) {
618 case AID_INET:
619 if (nh == NULL)
620 return -1;
621 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
622 ATTR_NEXTHOP, &nh->exit_nexthop.v4,
623 sizeof(nh->exit_nexthop.v4)) == -1)
624 return -1;
625 break;
626 default:
627 break;
628 }
629 break;
630 case ATTR_MED:
631 /*
632 * The old MED from other peers MUST not be announced
633 * to others unless the MED is originating from us or
634 * the peer is an IBGP one. Only exception are routers
635 * with "transparent-as yes" set.
636 */
637 if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp ||
638 asp->flags & F_ATTR_MED_ANNOUNCE ||
639 peer->flags & PEERFLAG_TRANS_AS)) {
640 tmp32 = htonl(asp->med);
641 if (attr_writebuf(buf, ATTR_OPTIONAL,
642 ATTR_MED, &tmp32, 4) == -1)
643 return -1;
644 }
645 break;
646 case ATTR_LOCALPREF:
647 if (!peer->conf.ebgp) {
648 /* local preference, only valid for ibgp */
649 tmp32 = htonl(asp->lpref);
650 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
651 ATTR_LOCALPREF, &tmp32, 4) == -1)
652 return -1;
653 }
654 break;
655 /*
656 * Communities are stored in struct rde_community
657 */
658 case ATTR_COMMUNITIES:
659 case ATTR_EXT_COMMUNITIES:
660 case ATTR_LARGE_COMMUNITIES:
661 if (community_writebuf(comm, type, peer->conf.ebgp,
662 buf) == -1)
663 return -1;
664 break;
665 /*
666 * NEW to OLD conversion when sending stuff to a 2byte AS peer
667 */
668 case ATTR_AS4_PATH:
669 if (neednewpath) {
670 plen = aspath_length(asp->aspath);
671 pdata = aspath_dump(asp->aspath);
672
673 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
674 if (!(asp->flags & F_PREFIX_ANNOUNCED))
675 flags |= ATTR_PARTIAL;
676 if (plen != 0)
677 if (attr_writebuf(buf, flags,
678 ATTR_AS4_PATH, pdata, plen) == -1)
679 return -1;
680 }
681 break;
682 case ATTR_AS4_AGGREGATOR:
683 if (newaggr) {
684 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
685 if (!(asp->flags & F_PREFIX_ANNOUNCED))
686 flags |= ATTR_PARTIAL;
687 if (attr_writebuf(buf, flags,
688 ATTR_AS4_AGGREGATOR, newaggr->data,
689 newaggr->len) == -1)
690 return -1;
691 }
692 break;
693 /*
694 * multiprotocol attributes are handled elsewhere
695 */
696 case ATTR_MP_REACH_NLRI:
697 case ATTR_MP_UNREACH_NLRI:
698 break;
699 /*
700 * dump all other path attributes. Following rules apply:
701 * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and
702 * ATTR_AGGREGATOR pass unmodified (enforce flags
703 * to correct values). Actually ATTR_AGGREGATOR may be
704 * deflated for OLD 2-byte peers.
705 * 2. non-transitive attrs: don't re-announce to ebgp peers
706 * 3. transitive known attrs: announce unmodified
707 * 4. transitive unknown attrs: set partial bit and re-announce
708 */
709 case ATTR_ATOMIC_AGGREGATE:
710 if (oa == NULL || oa->type != type)
711 break;
712 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
713 ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
714 return -1;
715 break;
716 case ATTR_AGGREGATOR:
717 if (oa == NULL || oa->type != type)
718 break;
719 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
720 peer->conf.ebgp)
721 break;
722 if (!peer_has_as4byte(peer)) {
723 /* need to deflate the aggregator */
724 uint8_t t[6];
725 uint16_t tas;
726
727 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
728 peer->conf.ebgp)
729 break;
730
731 memcpy(&tmp32, oa->data, sizeof(tmp32));
732 if (ntohl(tmp32) > USHRT_MAX) {
733 tas = htons(AS_TRANS);
734 newaggr = oa;
735 } else
736 tas = htons(ntohl(tmp32));
737
738 memcpy(t, &tas, sizeof(tas));
739 memcpy(t + sizeof(tas),
740 oa->data + sizeof(tmp32),
741 oa->len - sizeof(tmp32));
742 if (attr_writebuf(buf, oa->flags,
743 oa->type, &t, sizeof(t)) == -1)
744 return -1;
745 } else {
746 if (attr_writebuf(buf, oa->flags, oa->type,
747 oa->data, oa->len) == -1)
748 return -1;
749 }
750 break;
751 case ATTR_ORIGINATOR_ID:
752 case ATTR_CLUSTER_LIST:
753 case ATTR_OTC:
754 if (oa == NULL || oa->type != type)
755 break;
756 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
757 peer->conf.ebgp)
758 break;
759 if (attr_writebuf(buf, oa->flags, oa->type,
760 oa->data, oa->len) == -1)
761 return -1;
762 break;
763 default:
764 if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
765 /* there is no attribute left to dump */
766 return (0);
767
768 if (oa == NULL || oa->type != type)
769 break;
770 /* unknown attribute */
771 if (!(oa->flags & ATTR_TRANSITIVE)) {
772 /*
773 * RFC 1771:
774 * Unrecognized non-transitive optional
775 * attributes must be quietly ignored and
776 * not passed along to other BGP peers.
777 */
778 break;
779 }
780 if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
781 oa->type, oa->data, oa->len) == -1)
782 return -1;
783 }
784 }
785 return 0;
786 }
787
788 /*
789 * Check if the pending element is a EoR marker. If so remove it from the
790 * tree and return 1.
791 */
792 int
up_is_eor(struct rde_peer * peer,uint8_t aid)793 up_is_eor(struct rde_peer *peer, uint8_t aid)
794 {
795 struct prefix *p;
796
797 p = RB_MIN(prefix_tree, &peer->updates[aid]);
798 if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
799 /*
800 * Need to remove eor from update tree because
801 * prefix_adjout_destroy() can't handle that.
802 */
803 RB_REMOVE(prefix_tree, &peer->updates[aid], p);
804 p->flags &= ~PREFIX_FLAG_UPDATE;
805 prefix_adjout_destroy(p);
806 return 1;
807 }
808 return 0;
809 }
810
811 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
812 #define MIN_UPDATE_LEN 16
813
814 static void
up_prefix_free(struct prefix_tree * prefix_head,struct prefix * p,struct rde_peer * peer,int withdraw)815 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
816 struct rde_peer *peer, int withdraw)
817 {
818 if (withdraw) {
819 /* prefix no longer needed, remove it */
820 prefix_adjout_destroy(p);
821 peer->stats.prefix_sent_withdraw++;
822 } else {
823 /* prefix still in Adj-RIB-Out, keep it */
824 RB_REMOVE(prefix_tree, prefix_head, p);
825 p->flags &= ~PREFIX_FLAG_UPDATE;
826 peer->stats.pending_update--;
827 peer->stats.prefix_sent_update++;
828 }
829 }
830
831 /*
832 * Write prefixes to buffer until either there is no more space or
833 * the next prefix has no longer the same ASPATH attributes.
834 * Returns -1 if no prefix was written else 0.
835 */
836 static int
up_dump_prefix(struct ibuf * buf,struct prefix_tree * prefix_head,struct rde_peer * peer,int withdraw)837 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
838 struct rde_peer *peer, int withdraw)
839 {
840 struct prefix *p, *np;
841 int done = 0, has_ap = -1, rv = -1;
842
843 RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
844 if (has_ap == -1)
845 has_ap = peer_has_add_path(peer, p->pt->aid,
846 CAPA_AP_SEND);
847 if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
848 -1)
849 break;
850
851 /* make sure we only dump prefixes which belong together */
852 if (np == NULL ||
853 np->aspath != p->aspath ||
854 np->communities != p->communities ||
855 np->nexthop != p->nexthop ||
856 np->nhflags != p->nhflags ||
857 (np->flags & PREFIX_FLAG_EOR))
858 done = 1;
859
860 rv = 0;
861 up_prefix_free(prefix_head, p, peer, withdraw);
862 if (done)
863 break;
864 }
865 return rv;
866 }
867
868 static int
up_generate_mp_reach(struct ibuf * buf,struct rde_peer * peer,struct nexthop * nh,uint8_t aid)869 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
870 struct nexthop *nh, uint8_t aid)
871 {
872 struct bgpd_addr *nexthop;
873 size_t off;
874 uint16_t len, afi;
875 uint8_t safi;
876
877 /* attribute header, defaulting to extended length one */
878 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
879 return -1;
880 if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
881 return -1;
882 off = ibuf_size(buf);
883 if (ibuf_add_zero(buf, sizeof(len)) == -1)
884 return -1;
885
886 if (aid2afi(aid, &afi, &safi))
887 fatalx("up_generate_mp_reach: bad AID");
888
889 /* AFI + SAFI + NH LEN + NH + Reserved */
890 if (ibuf_add_n16(buf, afi) == -1)
891 return -1;
892 if (ibuf_add_n8(buf, safi) == -1)
893 return -1;
894
895 switch (aid) {
896 case AID_INET6:
897 if (nh == NULL)
898 return -1;
899 /* NH LEN */
900 if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1)
901 return -1;
902 /* write nexthop */
903 nexthop = &nh->exit_nexthop;
904 if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
905 return -1;
906 break;
907 case AID_VPN_IPv4:
908 if (nh == NULL)
909 return -1;
910 /* NH LEN */
911 if (ibuf_add_n8(buf,
912 sizeof(uint64_t) + sizeof(struct in_addr)) == -1)
913 return -1;
914 /* write zero rd */
915 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
916 return -1;
917 /* write nexthop */
918 nexthop = &nh->exit_nexthop;
919 if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1)
920 return -1;
921 break;
922 case AID_VPN_IPv6:
923 if (nh == NULL)
924 return -1;
925 /* NH LEN */
926 if (ibuf_add_n8(buf,
927 sizeof(uint64_t) + sizeof(struct in6_addr)) == -1)
928 return -1;
929 /* write zero rd */
930 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
931 return -1;
932 /* write nexthop */
933 nexthop = &nh->exit_nexthop;
934 if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
935 return -1;
936 break;
937 case AID_FLOWSPECv4:
938 case AID_FLOWSPECv6:
939 if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */
940 return -1;
941 /* no NH */
942 break;
943 default:
944 fatalx("up_generate_mp_reach: unknown AID");
945 }
946
947 if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
948 return -1;
949
950 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
951 /* no prefixes written, fail update */
952 return -1;
953
954 /* update MP_REACH attribute length field */
955 len = ibuf_size(buf) - off - sizeof(len);
956 if (ibuf_set_n16(buf, off, len) == -1)
957 return -1;
958
959 return 0;
960 }
961
962 /*
963 * Generate UPDATE message containing either just withdraws or updates.
964 * UPDATE messages are contructed like this:
965 *
966 * +-----------------------------------------------------+
967 * | Withdrawn Routes Length (2 octets) |
968 * +-----------------------------------------------------+
969 * | Withdrawn Routes (variable) |
970 * +-----------------------------------------------------+
971 * | Total Path Attribute Length (2 octets) |
972 * +-----------------------------------------------------+
973 * | Path Attributes (variable) |
974 * +-----------------------------------------------------+
975 * | Network Layer Reachability Information (variable) |
976 * +-----------------------------------------------------+
977 *
978 * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
979 * the latter will be the only path attribute in a message.
980 */
981
982 /*
983 * Write UPDATE message for withdrawn routes. The size of buf limits
984 * how may routes can be added. Return 0 on success -1 on error which
985 * includes generating an empty withdraw message.
986 */
987 struct ibuf *
up_dump_withdraws(struct rde_peer * peer,uint8_t aid)988 up_dump_withdraws(struct rde_peer *peer, uint8_t aid)
989 {
990 struct ibuf *buf;
991 size_t off;
992 uint16_t afi, len;
993 uint8_t safi;
994
995 if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
996 goto fail;
997
998 /* reserve space for the withdrawn routes length field */
999 off = ibuf_size(buf);
1000 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1001 goto fail;
1002
1003 if (aid != AID_INET) {
1004 /* reserve space for 2-byte path attribute length */
1005 off = ibuf_size(buf);
1006 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1007 goto fail;
1008
1009 /* attribute header, defaulting to extended length one */
1010 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1011 goto fail;
1012 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1013 goto fail;
1014 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1015 goto fail;
1016
1017 /* afi & safi */
1018 if (aid2afi(aid, &afi, &safi))
1019 fatalx("%s: bad AID", __func__);
1020 if (ibuf_add_n16(buf, afi) == -1)
1021 goto fail;
1022 if (ibuf_add_n8(buf, safi) == -1)
1023 goto fail;
1024 }
1025
1026 if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
1027 goto fail;
1028
1029 /* update length field (either withdrawn routes or attribute length) */
1030 len = ibuf_size(buf) - off - sizeof(len);
1031 if (ibuf_set_n16(buf, off, len) == -1)
1032 goto fail;
1033
1034 if (aid != AID_INET) {
1035 /* write MP_UNREACH_NLRI attribute length (always extended) */
1036 len -= 4; /* skip attribute header */
1037 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1038 goto fail;
1039 } else {
1040 /* no extra attributes so set attribute len to 0 */
1041 if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1042 goto fail;
1043 }
1044 }
1045
1046 return buf;
1047
1048 fail:
1049 /* something went horribly wrong */
1050 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1051 ibuf_free(buf);
1052 return NULL;
1053 }
1054
1055 /*
1056 * Withdraw a single prefix after an error.
1057 */
1058 static struct ibuf *
up_dump_withdraw_one(struct rde_peer * peer,struct prefix * p,struct ibuf * buf)1059 up_dump_withdraw_one(struct rde_peer *peer, struct prefix *p, struct ibuf *buf)
1060 {
1061 size_t off;
1062 int has_ap;
1063 uint16_t afi, len;
1064 uint8_t safi;
1065
1066 /* reset the buffer and start fresh */
1067 ibuf_truncate(buf, 0);
1068
1069 /* reserve space for the withdrawn routes length field */
1070 off = ibuf_size(buf);
1071 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1072 goto fail;
1073
1074 if (p->pt->aid != AID_INET) {
1075 /* reserve space for 2-byte path attribute length */
1076 off = ibuf_size(buf);
1077 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1078 goto fail;
1079
1080 /* attribute header, defaulting to extended length one */
1081 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1082 goto fail;
1083 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1084 goto fail;
1085 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1086 goto fail;
1087
1088 /* afi & safi */
1089 if (aid2afi(p->pt->aid, &afi, &safi))
1090 fatalx("%s: bad AID", __func__);
1091 if (ibuf_add_n16(buf, afi) == -1)
1092 goto fail;
1093 if (ibuf_add_n8(buf, safi) == -1)
1094 goto fail;
1095 }
1096
1097 has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND);
1098 if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1)
1099 goto fail;
1100
1101 /* update length field (either withdrawn routes or attribute length) */
1102 len = ibuf_size(buf) - off - sizeof(len);
1103 if (ibuf_set_n16(buf, off, len) == -1)
1104 goto fail;
1105
1106 if (p->pt->aid != AID_INET) {
1107 /* write MP_UNREACH_NLRI attribute length (always extended) */
1108 len -= 4; /* skip attribute header */
1109 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1110 goto fail;
1111 } else {
1112 /* no extra attributes so set attribute len to 0 */
1113 if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1114 goto fail;
1115 }
1116 }
1117
1118 return buf;
1119
1120 fail:
1121 /* something went horribly wrong */
1122 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1123 ibuf_free(buf);
1124 return NULL;
1125 }
1126
1127 /*
1128 * Write UPDATE message for changed and added routes. The size of buf limits
1129 * how may routes can be added. The function first dumps the path attributes
1130 * and then tries to add as many prefixes using these attributes.
1131 * Return 0 on success -1 on error which includes producing an empty message.
1132 */
1133 struct ibuf *
up_dump_update(struct rde_peer * peer,uint8_t aid)1134 up_dump_update(struct rde_peer *peer, uint8_t aid)
1135 {
1136 struct ibuf *buf;
1137 struct bgpd_addr addr;
1138 struct prefix *p;
1139 size_t off;
1140 uint16_t len;
1141
1142 p = RB_MIN(prefix_tree, &peer->updates[aid]);
1143 if (p == NULL)
1144 return NULL;
1145
1146 if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
1147 goto fail;
1148
1149 /* withdrawn routes length field is 0 */
1150 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1151 goto fail;
1152
1153 /* reserve space for 2-byte path attribute length */
1154 off = ibuf_size(buf);
1155 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1156 goto fail;
1157
1158 if (up_generate_attr(buf, peer, prefix_aspath(p),
1159 prefix_communities(p), prefix_nexthop(p), aid) == -1)
1160 goto drop;
1161
1162 if (aid != AID_INET) {
1163 /* write mp attribute including nlri */
1164
1165 /*
1166 * RFC 7606 wants this to be first but then we need
1167 * to use multiple buffers with adjusted length to
1168 * merge the attributes together in reverse order of
1169 * creation.
1170 */
1171 if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
1172 -1)
1173 goto drop;
1174 }
1175
1176 /* update attribute length field */
1177 len = ibuf_size(buf) - off - sizeof(len);
1178 if (ibuf_set_n16(buf, off, len) == -1)
1179 goto fail;
1180
1181 if (aid == AID_INET) {
1182 /* last but not least dump the IPv4 nlri */
1183 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
1184 goto drop;
1185 }
1186
1187 return buf;
1188
1189 drop:
1190 /* Not enough space. Drop current prefix, it will never fit. */
1191 p = RB_MIN(prefix_tree, &peer->updates[aid]);
1192 pt_getaddr(p->pt, &addr);
1193 log_peer_warnx(&peer->conf, "generating update failed, "
1194 "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
1195
1196 up_prefix_free(&peer->updates[aid], p, peer, 0);
1197 return up_dump_withdraw_one(peer, p, buf);
1198
1199 fail:
1200 /* something went horribly wrong */
1201 log_peer_warn(&peer->conf, "generating update failed, peer desynced");
1202 ibuf_free(buf);
1203 return NULL;
1204 }
1205