1 /* $OpenBSD: rde_update.c,v 1.174 2025/01/13 13:50:34 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26
27 #include "bgpd.h"
28 #include "session.h"
29 #include "rde.h"
30 #include "log.h"
31
32 enum up_state {
33 UP_OK,
34 UP_ERR_LIMIT,
35 UP_FILTERED,
36 UP_EXCLUDED,
37 };
38
39 static struct community comm_no_advertise = {
40 .flags = COMMUNITY_TYPE_BASIC,
41 .data1 = COMMUNITY_WELLKNOWN,
42 .data2 = COMMUNITY_NO_ADVERTISE
43 };
44 static struct community comm_no_export = {
45 .flags = COMMUNITY_TYPE_BASIC,
46 .data1 = COMMUNITY_WELLKNOWN,
47 .data2 = COMMUNITY_NO_EXPORT
48 };
49 static struct community comm_no_expsubconfed = {
50 .flags = COMMUNITY_TYPE_BASIC,
51 .data1 = COMMUNITY_WELLKNOWN,
52 .data2 = COMMUNITY_NO_EXPSUBCONFED
53 };
54
55 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t);
56
57 static int
up_test_update(struct rde_peer * peer,struct prefix * p)58 up_test_update(struct rde_peer *peer, struct prefix *p)
59 {
60 struct rde_aspath *asp;
61 struct rde_community *comm;
62 struct rde_peer *frompeer;
63
64 frompeer = prefix_peer(p);
65 asp = prefix_aspath(p);
66 comm = prefix_communities(p);
67
68 if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
69 fatalx("try to send out a botched path");
70 if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK))
71 fatalx("try to send out a looped path");
72
73 if (peer == frompeer)
74 /* Do not send routes back to sender */
75 return (0);
76
77 if (!frompeer->conf.ebgp && !peer->conf.ebgp) {
78 /*
79 * route reflector redistribution rules:
80 * 1. if announce is set -> announce
81 * 2. from non-client, to non-client -> no
82 * 3. from client, to non-client -> yes
83 * 4. from non-client, to client -> yes
84 * 5. from client, to client -> yes
85 */
86 if (frompeer->conf.reflector_client == 0 &&
87 peer->conf.reflector_client == 0 &&
88 (asp->flags & F_PREFIX_ANNOUNCED) == 0)
89 /* Do not redistribute updates to ibgp peers */
90 return (0);
91 }
92
93 /*
94 * With "transparent-as yes" set do not filter based on
95 * well-known communities. Instead pass them on to the client.
96 */
97 if (peer->flags & PEERFLAG_TRANS_AS)
98 return (1);
99
100 /* well-known communities */
101 if (community_match(comm, &comm_no_advertise, NULL))
102 return (0);
103 if (peer->conf.ebgp) {
104 if (community_match(comm, &comm_no_export, NULL))
105 return (0);
106 if (community_match(comm, &comm_no_expsubconfed, NULL))
107 return (0);
108 }
109
110 return (1);
111 }
112
113 /* RFC9234 open policy handling */
114 static int
up_enforce_open_policy(struct rde_peer * peer,struct filterstate * state,uint8_t aid)115 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state,
116 uint8_t aid)
117 {
118 /* only for IPv4 and IPv6 unicast */
119 if (aid != AID_INET && aid != AID_INET6)
120 return 0;
121
122 /*
123 * do not propagate (consider it filtered) if OTC is present and
124 * local role is peer, customer or rs-client.
125 */
126 if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER ||
127 peer->role == ROLE_RS_CLIENT)
128 if (state->aspath.flags & F_ATTR_OTC)
129 return 1;
130
131 /*
132 * add OTC attribute if not present towards peers, customers and
133 * rs-clients (local roles peer, provider, rs).
134 */
135 if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER ||
136 peer->role == ROLE_RS)
137 if ((state->aspath.flags & F_ATTR_OTC) == 0) {
138 uint32_t tmp;
139
140 tmp = htonl(peer->conf.local_as);
141 if (attr_optadd(&state->aspath,
142 ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC,
143 &tmp, sizeof(tmp)) == -1)
144 log_peer_warnx(&peer->conf,
145 "failed to add OTC attribute");
146 state->aspath.flags |= F_ATTR_OTC;
147 }
148
149 return 0;
150 }
151
152 /*
153 * Process a single prefix by passing it through the various filter stages
154 * and if not filtered out update the Adj-RIB-Out. Returns:
155 * - UP_OK if prefix was added
156 * - UP_ERR_LIMIT if the peer outbound prefix limit was reached
157 * - UP_FILTERED if prefix was filtered out
158 * - UP_EXCLUDED if prefix was excluded because of up_test_update()
159 */
160 static enum up_state
up_process_prefix(struct rde_peer * peer,struct prefix * new,struct prefix * p)161 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p)
162 {
163 struct filterstate state;
164 struct bgpd_addr addr;
165 int excluded = 0;
166
167 /*
168 * up_test_update() needs to run before the output filters
169 * else the well-known communities won't work properly.
170 * The output filters would not be able to add well-known
171 * communities.
172 */
173 if (!up_test_update(peer, new))
174 excluded = 1;
175
176 rde_filterstate_prep(&state, new);
177 pt_getaddr(new->pt, &addr);
178 if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr,
179 new->pt->prefixlen, &state) == ACTION_DENY) {
180 rde_filterstate_clean(&state);
181 return UP_FILTERED;
182 }
183
184 /* Open Policy Check: acts like an output filter */
185 if (up_enforce_open_policy(peer, &state, new->pt->aid)) {
186 rde_filterstate_clean(&state);
187 return UP_FILTERED;
188 }
189
190 if (excluded) {
191 rde_filterstate_clean(&state);
192 return UP_EXCLUDED;
193 }
194
195 /* from here on we know this is an update */
196 if (p == (void *)-1)
197 p = prefix_adjout_get(peer, new->path_id_tx, new->pt);
198
199 up_prep_adjout(peer, &state, new->pt->aid);
200 prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx);
201 rde_filterstate_clean(&state);
202
203 /* max prefix checker outbound */
204 if (peer->conf.max_out_prefix &&
205 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
206 log_peer_warnx(&peer->conf,
207 "outbound prefix limit reached (>%u/%u)",
208 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
209 rde_update_err(peer, ERR_CEASE,
210 ERR_CEASE_MAX_SENT_PREFIX, NULL);
211 return UP_ERR_LIMIT;
212 }
213
214 return UP_OK;
215 }
216
217 void
up_generate_updates(struct rde_peer * peer,struct rib_entry * re)218 up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
219 {
220 struct prefix *new, *p;
221
222 p = prefix_adjout_first(peer, re->prefix);
223
224 new = prefix_best(re);
225 while (new != NULL) {
226 switch (up_process_prefix(peer, new, p)) {
227 case UP_OK:
228 case UP_ERR_LIMIT:
229 return;
230 case UP_FILTERED:
231 if (peer->flags & PEERFLAG_EVALUATE_ALL) {
232 new = TAILQ_NEXT(new, entry.list.rib);
233 if (new != NULL && prefix_eligible(new))
234 continue;
235 }
236 goto done;
237 case UP_EXCLUDED:
238 goto done;
239 }
240 }
241
242 done:
243 /* withdraw prefix */
244 if (p != NULL)
245 prefix_adjout_withdraw(p);
246 }
247
248 /*
249 * Generate updates for the add-path send case. Depending on the
250 * peer eval settings prefixes are selected and distributed.
251 * This highly depends on the Adj-RIB-Out to handle prefixes with no
252 * changes gracefully. It may be possible to improve the API so that
253 * less churn is needed.
254 */
255 void
up_generate_addpath(struct rde_peer * peer,struct rib_entry * re)256 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
257 {
258 struct prefix *head, *new, *p;
259 int maxpaths = 0, extrapaths = 0, extra;
260 int checkmode = 1;
261
262 head = prefix_adjout_first(peer, re->prefix);
263
264 /* mark all paths as stale */
265 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
266 p->flags |= PREFIX_FLAG_STALE;
267
268 /* update paths */
269 new = prefix_best(re);
270 while (new != NULL) {
271 /* check limits and stop when a limit is reached */
272 if (peer->eval.maxpaths != 0 &&
273 maxpaths >= peer->eval.maxpaths)
274 break;
275 if (peer->eval.extrapaths != 0 &&
276 extrapaths >= peer->eval.extrapaths)
277 break;
278
279 extra = 1;
280 if (checkmode) {
281 switch (peer->eval.mode) {
282 case ADDPATH_EVAL_BEST:
283 if (new->dmetric == PREFIX_DMETRIC_BEST)
284 extra = 0;
285 else
286 checkmode = 0;
287 break;
288 case ADDPATH_EVAL_ECMP:
289 if (new->dmetric == PREFIX_DMETRIC_BEST ||
290 new->dmetric == PREFIX_DMETRIC_ECMP)
291 extra = 0;
292 else
293 checkmode = 0;
294 break;
295 case ADDPATH_EVAL_AS_WIDE:
296 if (new->dmetric == PREFIX_DMETRIC_BEST ||
297 new->dmetric == PREFIX_DMETRIC_ECMP ||
298 new->dmetric == PREFIX_DMETRIC_AS_WIDE)
299 extra = 0;
300 else
301 checkmode = 0;
302 break;
303 case ADDPATH_EVAL_ALL:
304 /* nothing to check */
305 checkmode = 0;
306 break;
307 default:
308 fatalx("unknown add-path eval mode");
309 }
310 }
311
312 switch (up_process_prefix(peer, new, (void *)-1)) {
313 case UP_OK:
314 maxpaths++;
315 extrapaths += extra;
316 break;
317 case UP_FILTERED:
318 case UP_EXCLUDED:
319 break;
320 case UP_ERR_LIMIT:
321 /* just give up */
322 return;
323 }
324
325 /* only allow valid prefixes */
326 new = TAILQ_NEXT(new, entry.list.rib);
327 if (new == NULL || !prefix_eligible(new))
328 break;
329 }
330
331 /* withdraw stale paths */
332 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
333 if (p->flags & PREFIX_FLAG_STALE)
334 prefix_adjout_withdraw(p);
335 }
336 }
337
338 /*
339 * Generate updates for the add-path send all case. Since all prefixes
340 * are distributed just remove old and add new.
341 */
342 void
up_generate_addpath_all(struct rde_peer * peer,struct rib_entry * re,struct prefix * new,struct prefix * old)343 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
344 struct prefix *new, struct prefix *old)
345 {
346 struct prefix *p, *head = NULL;
347 int all = 0;
348
349 /*
350 * if old and new are NULL then insert all prefixes from best,
351 * clearing old routes in the process
352 */
353 if (old == NULL && new == NULL) {
354 /* mark all paths as stale */
355 head = prefix_adjout_first(peer, re->prefix);
356 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
357 p->flags |= PREFIX_FLAG_STALE;
358
359 new = prefix_best(re);
360 all = 1;
361 }
362
363 if (new != NULL && !prefix_eligible(new)) {
364 /* only allow valid prefixes */
365 new = NULL;
366 }
367
368 if (old != NULL) {
369 /* withdraw stale paths */
370 p = prefix_adjout_get(peer, old->path_id_tx, old->pt);
371 if (p != NULL)
372 prefix_adjout_withdraw(p);
373 }
374
375 /* add new path (or multiple if all is set) */
376 while (new != NULL) {
377 switch (up_process_prefix(peer, new, (void *)-1)) {
378 case UP_OK:
379 case UP_FILTERED:
380 case UP_EXCLUDED:
381 break;
382 case UP_ERR_LIMIT:
383 /* just give up */
384 return;
385 }
386
387 if (!all)
388 break;
389
390 /* only allow valid prefixes */
391 new = TAILQ_NEXT(new, entry.list.rib);
392 if (new == NULL || !prefix_eligible(new))
393 break;
394 }
395
396 if (all) {
397 /* withdraw stale paths */
398 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
399 if (p->flags & PREFIX_FLAG_STALE)
400 prefix_adjout_withdraw(p);
401 }
402 }
403 }
404
405 /* send a default route to the specified peer */
406 void
up_generate_default(struct rde_peer * peer,uint8_t aid)407 up_generate_default(struct rde_peer *peer, uint8_t aid)
408 {
409 extern struct rde_peer *peerself;
410 struct filterstate state;
411 struct rde_aspath *asp;
412 struct prefix *p;
413 struct pt_entry *pte;
414 struct bgpd_addr addr;
415
416 if (peer->capa.mp[aid] == 0)
417 return;
418
419 rde_filterstate_init(&state);
420 asp = &state.aspath;
421 asp->aspath = aspath_get(NULL, 0);
422 asp->origin = ORIGIN_IGP;
423 rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN);
424 /* the other default values are OK, nexthop is once again NULL */
425
426 /*
427 * XXX apply default overrides. Not yet possible, mainly a parse.y
428 * problem.
429 */
430 /* rde_apply_set(asp, peerself, peerself, set, af); */
431
432 memset(&addr, 0, sizeof(addr));
433 addr.aid = aid;
434 p = prefix_adjout_lookup(peer, &addr, 0);
435
436 /* outbound filter as usual */
437 if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
438 ACTION_DENY) {
439 rde_filterstate_clean(&state);
440 return;
441 }
442
443 up_prep_adjout(peer, &state, addr.aid);
444 /* can't use pt_fill here since prefix_adjout_update keeps a ref */
445 pte = pt_get(&addr, 0);
446 if (pte == NULL)
447 pte = pt_add(&addr, 0);
448 prefix_adjout_update(p, peer, &state, pte, 0);
449 rde_filterstate_clean(&state);
450
451 /* max prefix checker outbound */
452 if (peer->conf.max_out_prefix &&
453 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
454 log_peer_warnx(&peer->conf,
455 "outbound prefix limit reached (>%u/%u)",
456 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
457 rde_update_err(peer, ERR_CEASE,
458 ERR_CEASE_MAX_SENT_PREFIX, NULL);
459 }
460 }
461
462 static struct bgpd_addr *
up_get_nexthop(struct rde_peer * peer,struct filterstate * state,uint8_t aid)463 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
464 {
465 struct bgpd_addr *peer_local = NULL;
466
467 switch (aid) {
468 case AID_INET:
469 case AID_VPN_IPv4:
470 if (peer_has_ext_nexthop(peer, aid) &&
471 peer->remote_addr.aid == AID_INET6)
472 peer_local = &peer->local_v6_addr;
473 else if (peer->local_v4_addr.aid == AID_INET)
474 peer_local = &peer->local_v4_addr;
475 break;
476 case AID_INET6:
477 case AID_VPN_IPv6:
478 if (peer->local_v6_addr.aid == AID_INET6)
479 peer_local = &peer->local_v6_addr;
480 break;
481 case AID_FLOWSPECv4:
482 case AID_FLOWSPECv6:
483 /* flowspec has no nexthop */
484 return (NULL);
485 default:
486 fatalx("%s, bad AID %s", __func__, aid2str(aid));
487 }
488
489 if (state->nhflags & NEXTHOP_SELF) {
490 /*
491 * Forcing the nexthop to self is always possible
492 * and has precedence over other flags.
493 */
494 return (peer_local);
495 } else if (!peer->conf.ebgp) {
496 /*
497 * in the ibgp case the nexthop is normally not
498 * modified unless it points at the peer itself.
499 */
500 if (state->nexthop == NULL) {
501 /* announced networks without explicit nexthop set */
502 return (peer_local);
503 }
504 /*
505 * per RFC: if remote peer address is equal to the nexthop set
506 * the nexthop to our local address. This reduces the risk of
507 * routing loops. This overrides NEXTHOP_NOMODIFY.
508 */
509 if (memcmp(&state->nexthop->exit_nexthop,
510 &peer->remote_addr, sizeof(peer->remote_addr)) == 0) {
511 return (peer_local);
512 }
513 return (&state->nexthop->exit_nexthop);
514 } else if (peer->conf.distance == 1) {
515 /*
516 * In the ebgp directly connected case never send
517 * out a nexthop that is outside of the connected
518 * network of the peer. No matter what flags are
519 * set. This follows section 5.1.3 of RFC 4271.
520 * So just check if the nexthop is in the same net
521 * is enough here.
522 */
523 if (state->nexthop != NULL &&
524 state->nexthop->flags & NEXTHOP_CONNECTED &&
525 prefix_compare(&peer->remote_addr,
526 &state->nexthop->nexthop_net,
527 state->nexthop->nexthop_netlen) == 0) {
528 /* nexthop and peer are in the same net */
529 return (&state->nexthop->exit_nexthop);
530 }
531 return (peer_local);
532 } else {
533 /*
534 * For ebgp multihop make it possible to overrule
535 * the sent nexthop by setting NEXTHOP_NOMODIFY.
536 * Similar to the ibgp case there is no same net check
537 * needed but still ensure that the nexthop is not
538 * pointing to the peer itself.
539 */
540 if (state->nhflags & NEXTHOP_NOMODIFY &&
541 state->nexthop != NULL &&
542 memcmp(&state->nexthop->exit_nexthop,
543 &peer->remote_addr, sizeof(peer->remote_addr)) != 0) {
544 /* no modify flag set and nexthop not peer addr */
545 return (&state->nexthop->exit_nexthop);
546 }
547 return (peer_local);
548 }
549 }
550
551 static void
up_prep_adjout(struct rde_peer * peer,struct filterstate * state,uint8_t aid)552 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
553 {
554 struct bgpd_addr *nexthop;
555 struct nexthop *nh = NULL;
556 u_char *np;
557 uint16_t nl;
558
559 /* prepend local AS number for eBGP sessions. */
560 if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) {
561 uint32_t prep_as = peer->conf.local_as;
562 np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl);
563 aspath_put(state->aspath.aspath);
564 state->aspath.aspath = aspath_get(np, nl);
565 free(np);
566 }
567
568 /* update nexthop */
569 nexthop = up_get_nexthop(peer, state, aid);
570 if (nexthop != NULL)
571 nh = nexthop_get(nexthop);
572 nexthop_unref(state->nexthop);
573 state->nexthop = nh;
574 state->nhflags = 0;
575 }
576
577
578 static int
up_generate_attr(struct ibuf * buf,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nh,uint8_t aid)579 up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
580 struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
581 uint8_t aid)
582 {
583 struct attr *oa = NULL, *newaggr = NULL;
584 u_char *pdata;
585 uint32_t tmp32;
586 int flags, neednewpath = 0, rv;
587 uint16_t plen;
588 uint8_t oalen = 0, type;
589
590 if (asp->others_len > 0)
591 oa = asp->others[oalen++];
592
593 /* dump attributes in ascending order */
594 for (type = ATTR_ORIGIN; type < 255; type++) {
595 while (oa && oa->type < type) {
596 if (oalen < asp->others_len)
597 oa = asp->others[oalen++];
598 else
599 oa = NULL;
600 }
601
602 switch (type) {
603 /*
604 * Attributes stored in rde_aspath
605 */
606 case ATTR_ORIGIN:
607 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
608 ATTR_ORIGIN, &asp->origin, 1) == -1)
609 return -1;
610 break;
611 case ATTR_ASPATH:
612 plen = aspath_length(asp->aspath);
613 pdata = aspath_dump(asp->aspath);
614
615 if (!peer_has_as4byte(peer))
616 pdata = aspath_deflate(pdata, &plen,
617 &neednewpath);
618 rv = attr_writebuf(buf, ATTR_WELL_KNOWN,
619 ATTR_ASPATH, pdata, plen);
620 if (!peer_has_as4byte(peer))
621 free(pdata);
622
623 if (rv == -1)
624 return -1;
625 break;
626 case ATTR_NEXTHOP:
627 switch (aid) {
628 case AID_INET:
629 if (nh == NULL)
630 return -1;
631 if (nh->exit_nexthop.aid != AID_INET) {
632 if (peer_has_ext_nexthop(peer, aid))
633 break;
634 return -1;
635 }
636 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
637 ATTR_NEXTHOP, &nh->exit_nexthop.v4,
638 sizeof(nh->exit_nexthop.v4)) == -1)
639 return -1;
640 break;
641 default:
642 break;
643 }
644 break;
645 case ATTR_MED:
646 /*
647 * The old MED from other peers MUST not be announced
648 * to others unless the MED is originating from us or
649 * the peer is an IBGP one. Only exception are routers
650 * with "transparent-as yes" set.
651 */
652 if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp ||
653 asp->flags & F_ATTR_MED_ANNOUNCE ||
654 peer->flags & PEERFLAG_TRANS_AS)) {
655 tmp32 = htonl(asp->med);
656 if (attr_writebuf(buf, ATTR_OPTIONAL,
657 ATTR_MED, &tmp32, 4) == -1)
658 return -1;
659 }
660 break;
661 case ATTR_LOCALPREF:
662 if (!peer->conf.ebgp) {
663 /* local preference, only valid for ibgp */
664 tmp32 = htonl(asp->lpref);
665 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
666 ATTR_LOCALPREF, &tmp32, 4) == -1)
667 return -1;
668 }
669 break;
670 /*
671 * Communities are stored in struct rde_community
672 */
673 case ATTR_COMMUNITIES:
674 case ATTR_EXT_COMMUNITIES:
675 case ATTR_LARGE_COMMUNITIES:
676 if (community_writebuf(comm, type, peer->conf.ebgp,
677 buf) == -1)
678 return -1;
679 break;
680 /*
681 * NEW to OLD conversion when sending stuff to a 2byte AS peer
682 */
683 case ATTR_AS4_PATH:
684 if (neednewpath) {
685 plen = aspath_length(asp->aspath);
686 pdata = aspath_dump(asp->aspath);
687
688 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
689 if (!(asp->flags & F_PREFIX_ANNOUNCED))
690 flags |= ATTR_PARTIAL;
691 if (plen != 0)
692 if (attr_writebuf(buf, flags,
693 ATTR_AS4_PATH, pdata, plen) == -1)
694 return -1;
695 }
696 break;
697 case ATTR_AS4_AGGREGATOR:
698 if (newaggr) {
699 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
700 if (!(asp->flags & F_PREFIX_ANNOUNCED))
701 flags |= ATTR_PARTIAL;
702 if (attr_writebuf(buf, flags,
703 ATTR_AS4_AGGREGATOR, newaggr->data,
704 newaggr->len) == -1)
705 return -1;
706 }
707 break;
708 /*
709 * multiprotocol attributes are handled elsewhere
710 */
711 case ATTR_MP_REACH_NLRI:
712 case ATTR_MP_UNREACH_NLRI:
713 break;
714 /*
715 * dump all other path attributes. Following rules apply:
716 * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and
717 * ATTR_AGGREGATOR pass unmodified (enforce flags
718 * to correct values). Actually ATTR_AGGREGATOR may be
719 * deflated for OLD 2-byte peers.
720 * 2. non-transitive attrs: don't re-announce to ebgp peers
721 * 3. transitive known attrs: announce unmodified
722 * 4. transitive unknown attrs: set partial bit and re-announce
723 */
724 case ATTR_ATOMIC_AGGREGATE:
725 if (oa == NULL || oa->type != type)
726 break;
727 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
728 ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
729 return -1;
730 break;
731 case ATTR_AGGREGATOR:
732 if (oa == NULL || oa->type != type)
733 break;
734 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
735 peer->conf.ebgp)
736 break;
737 if (!peer_has_as4byte(peer)) {
738 /* need to deflate the aggregator */
739 uint8_t t[6];
740 uint16_t tas;
741
742 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
743 peer->conf.ebgp)
744 break;
745
746 memcpy(&tmp32, oa->data, sizeof(tmp32));
747 if (ntohl(tmp32) > USHRT_MAX) {
748 tas = htons(AS_TRANS);
749 newaggr = oa;
750 } else
751 tas = htons(ntohl(tmp32));
752
753 memcpy(t, &tas, sizeof(tas));
754 memcpy(t + sizeof(tas),
755 oa->data + sizeof(tmp32),
756 oa->len - sizeof(tmp32));
757 if (attr_writebuf(buf, oa->flags,
758 oa->type, &t, sizeof(t)) == -1)
759 return -1;
760 } else {
761 if (attr_writebuf(buf, oa->flags, oa->type,
762 oa->data, oa->len) == -1)
763 return -1;
764 }
765 break;
766 case ATTR_ORIGINATOR_ID:
767 case ATTR_CLUSTER_LIST:
768 case ATTR_OTC:
769 if (oa == NULL || oa->type != type)
770 break;
771 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
772 peer->conf.ebgp)
773 break;
774 if (attr_writebuf(buf, oa->flags, oa->type,
775 oa->data, oa->len) == -1)
776 return -1;
777 break;
778 default:
779 if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
780 /* there is no attribute left to dump */
781 return (0);
782
783 if (oa == NULL || oa->type != type)
784 break;
785 /* unknown attribute */
786 if (!(oa->flags & ATTR_TRANSITIVE)) {
787 /*
788 * RFC 1771:
789 * Unrecognized non-transitive optional
790 * attributes must be quietly ignored and
791 * not passed along to other BGP peers.
792 */
793 break;
794 }
795 if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
796 oa->type, oa->data, oa->len) == -1)
797 return -1;
798 }
799 }
800 return 0;
801 }
802
803 /*
804 * Check if the pending element is a EoR marker. If so remove it from the
805 * tree and return 1.
806 */
807 int
up_is_eor(struct rde_peer * peer,uint8_t aid)808 up_is_eor(struct rde_peer *peer, uint8_t aid)
809 {
810 struct prefix *p;
811
812 p = RB_MIN(prefix_tree, &peer->updates[aid]);
813 if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
814 /*
815 * Need to remove eor from update tree because
816 * prefix_adjout_destroy() can't handle that.
817 */
818 RB_REMOVE(prefix_tree, &peer->updates[aid], p);
819 p->flags &= ~PREFIX_FLAG_UPDATE;
820 prefix_adjout_destroy(p);
821 return 1;
822 }
823 return 0;
824 }
825
826 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
827 #define MIN_UPDATE_LEN 16
828
829 static void
up_prefix_free(struct prefix_tree * prefix_head,struct prefix * p,struct rde_peer * peer,int withdraw)830 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
831 struct rde_peer *peer, int withdraw)
832 {
833 if (withdraw) {
834 /* prefix no longer needed, remove it */
835 prefix_adjout_destroy(p);
836 peer->stats.prefix_sent_withdraw++;
837 } else {
838 /* prefix still in Adj-RIB-Out, keep it */
839 RB_REMOVE(prefix_tree, prefix_head, p);
840 p->flags &= ~PREFIX_FLAG_UPDATE;
841 peer->stats.pending_update--;
842 peer->stats.prefix_sent_update++;
843 }
844 }
845
846 /*
847 * Write prefixes to buffer until either there is no more space or
848 * the next prefix has no longer the same ASPATH attributes.
849 * Returns -1 if no prefix was written else 0.
850 */
851 static int
up_dump_prefix(struct ibuf * buf,struct prefix_tree * prefix_head,struct rde_peer * peer,int withdraw)852 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
853 struct rde_peer *peer, int withdraw)
854 {
855 struct prefix *p, *np;
856 int done = 0, has_ap = -1, rv = -1;
857
858 RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
859 if (has_ap == -1)
860 has_ap = peer_has_add_path(peer, p->pt->aid,
861 CAPA_AP_SEND);
862 if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
863 -1)
864 break;
865
866 /* make sure we only dump prefixes which belong together */
867 if (np == NULL ||
868 np->aspath != p->aspath ||
869 np->communities != p->communities ||
870 np->nexthop != p->nexthop ||
871 np->nhflags != p->nhflags ||
872 (np->flags & PREFIX_FLAG_EOR))
873 done = 1;
874
875 rv = 0;
876 up_prefix_free(prefix_head, p, peer, withdraw);
877 if (done)
878 break;
879 }
880 return rv;
881 }
882
883 static int
up_generate_mp_reach(struct ibuf * buf,struct rde_peer * peer,struct nexthop * nh,uint8_t aid)884 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
885 struct nexthop *nh, uint8_t aid)
886 {
887 struct bgpd_addr *nexthop;
888 size_t off, nhoff;
889 uint16_t len, afi;
890 uint8_t safi;
891
892 /* attribute header, defaulting to extended length one */
893 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
894 return -1;
895 if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
896 return -1;
897 off = ibuf_size(buf);
898 if (ibuf_add_zero(buf, sizeof(len)) == -1)
899 return -1;
900
901 if (aid2afi(aid, &afi, &safi))
902 fatalx("up_generate_mp_reach: bad AID");
903
904 /* AFI + SAFI + NH LEN + NH + Reserved */
905 if (ibuf_add_n16(buf, afi) == -1)
906 return -1;
907 if (ibuf_add_n8(buf, safi) == -1)
908 return -1;
909 nhoff = ibuf_size(buf);
910 if (ibuf_add_zero(buf, 1) == -1)
911 return -1;
912
913 if (aid == AID_VPN_IPv4 || aid == AID_VPN_IPv6) {
914 /* write zero rd */
915 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
916 return -1;
917 }
918
919 switch (aid) {
920 case AID_INET:
921 case AID_VPN_IPv4:
922 if (nh == NULL)
923 return -1;
924 nexthop = &nh->exit_nexthop;
925 /* AID_INET must only use this path with an IPv6 nexthop */
926 if (nexthop->aid == AID_INET && aid != AID_INET) {
927 if (ibuf_add(buf, &nexthop->v4,
928 sizeof(nexthop->v4)) == -1)
929 return -1;
930 break;
931 } else if (nexthop->aid == AID_INET6 &&
932 peer_has_ext_nexthop(peer, aid)) {
933 if (ibuf_add(buf, &nexthop->v6,
934 sizeof(nexthop->v6)) == -1)
935 return -1;
936 } else {
937 /* can't encode nexthop, give up and withdraw prefix */
938 return -1;
939 }
940 break;
941 case AID_INET6:
942 case AID_VPN_IPv6:
943 if (nh == NULL)
944 return -1;
945 nexthop = &nh->exit_nexthop;
946 if (ibuf_add(buf, &nexthop->v6, sizeof(nexthop->v6)) == -1)
947 return -1;
948 break;
949 case AID_FLOWSPECv4:
950 case AID_FLOWSPECv6:
951 /* no NH */
952 break;
953 default:
954 fatalx("up_generate_mp_reach: unknown AID");
955 }
956
957 /* update nexthop len */
958 len = ibuf_size(buf) - nhoff - 1;
959 if (ibuf_set_n8(buf, nhoff, len) == -1)
960 return -1;
961
962 if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
963 return -1;
964
965 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
966 /* no prefixes written, fail update */
967 return -1;
968
969 /* update MP_REACH attribute length field */
970 len = ibuf_size(buf) - off - sizeof(len);
971 if (ibuf_set_n16(buf, off, len) == -1)
972 return -1;
973
974 return 0;
975 }
976
977 /*
978 * Generate UPDATE message containing either just withdraws or updates.
979 * UPDATE messages are contructed like this:
980 *
981 * +-----------------------------------------------------+
982 * | Withdrawn Routes Length (2 octets) |
983 * +-----------------------------------------------------+
984 * | Withdrawn Routes (variable) |
985 * +-----------------------------------------------------+
986 * | Total Path Attribute Length (2 octets) |
987 * +-----------------------------------------------------+
988 * | Path Attributes (variable) |
989 * +-----------------------------------------------------+
990 * | Network Layer Reachability Information (variable) |
991 * +-----------------------------------------------------+
992 *
993 * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
994 * the latter will be the only path attribute in a message.
995 */
996
997 /*
998 * Write UPDATE message for withdrawn routes. The size of buf limits
999 * how may routes can be added. Return 0 on success -1 on error which
1000 * includes generating an empty withdraw message.
1001 */
1002 struct ibuf *
up_dump_withdraws(struct rde_peer * peer,uint8_t aid)1003 up_dump_withdraws(struct rde_peer *peer, uint8_t aid)
1004 {
1005 struct ibuf *buf;
1006 size_t off, pkgsize = MAX_PKTSIZE;
1007 uint16_t afi, len;
1008 uint8_t safi;
1009
1010 if (peer_has_ext_msg(peer))
1011 pkgsize = MAX_EXT_PKTSIZE;
1012
1013 if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
1014 goto fail;
1015
1016 /* reserve space for the withdrawn routes length field */
1017 off = ibuf_size(buf);
1018 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1019 goto fail;
1020
1021 if (aid != AID_INET) {
1022 /* reserve space for 2-byte path attribute length */
1023 off = ibuf_size(buf);
1024 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1025 goto fail;
1026
1027 /* attribute header, defaulting to extended length one */
1028 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1029 goto fail;
1030 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1031 goto fail;
1032 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1033 goto fail;
1034
1035 /* afi & safi */
1036 if (aid2afi(aid, &afi, &safi))
1037 fatalx("%s: bad AID", __func__);
1038 if (ibuf_add_n16(buf, afi) == -1)
1039 goto fail;
1040 if (ibuf_add_n8(buf, safi) == -1)
1041 goto fail;
1042 }
1043
1044 if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
1045 goto fail;
1046
1047 /* update length field (either withdrawn routes or attribute length) */
1048 len = ibuf_size(buf) - off - sizeof(len);
1049 if (ibuf_set_n16(buf, off, len) == -1)
1050 goto fail;
1051
1052 if (aid != AID_INET) {
1053 /* write MP_UNREACH_NLRI attribute length (always extended) */
1054 len -= 4; /* skip attribute header */
1055 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1056 goto fail;
1057 } else {
1058 /* no extra attributes so set attribute len to 0 */
1059 if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1060 goto fail;
1061 }
1062 }
1063
1064 return buf;
1065
1066 fail:
1067 /* something went horribly wrong */
1068 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1069 ibuf_free(buf);
1070 return NULL;
1071 }
1072
1073 /*
1074 * Withdraw a single prefix after an error.
1075 */
1076 static struct ibuf *
up_dump_withdraw_one(struct rde_peer * peer,struct prefix * p,struct ibuf * buf)1077 up_dump_withdraw_one(struct rde_peer *peer, struct prefix *p, struct ibuf *buf)
1078 {
1079 size_t off;
1080 int has_ap;
1081 uint16_t afi, len;
1082 uint8_t safi;
1083
1084 /* reset the buffer and start fresh */
1085 ibuf_truncate(buf, 0);
1086
1087 /* reserve space for the withdrawn routes length field */
1088 off = ibuf_size(buf);
1089 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1090 goto fail;
1091
1092 if (p->pt->aid != AID_INET) {
1093 /* reserve space for 2-byte path attribute length */
1094 off = ibuf_size(buf);
1095 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1096 goto fail;
1097
1098 /* attribute header, defaulting to extended length one */
1099 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1100 goto fail;
1101 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1102 goto fail;
1103 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1104 goto fail;
1105
1106 /* afi & safi */
1107 if (aid2afi(p->pt->aid, &afi, &safi))
1108 fatalx("%s: bad AID", __func__);
1109 if (ibuf_add_n16(buf, afi) == -1)
1110 goto fail;
1111 if (ibuf_add_n8(buf, safi) == -1)
1112 goto fail;
1113 }
1114
1115 has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND);
1116 if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1)
1117 goto fail;
1118
1119 /* update length field (either withdrawn routes or attribute length) */
1120 len = ibuf_size(buf) - off - sizeof(len);
1121 if (ibuf_set_n16(buf, off, len) == -1)
1122 goto fail;
1123
1124 if (p->pt->aid != AID_INET) {
1125 /* write MP_UNREACH_NLRI attribute length (always extended) */
1126 len -= 4; /* skip attribute header */
1127 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1128 goto fail;
1129 } else {
1130 /* no extra attributes so set attribute len to 0 */
1131 if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1132 goto fail;
1133 }
1134 }
1135
1136 return buf;
1137
1138 fail:
1139 /* something went horribly wrong */
1140 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1141 ibuf_free(buf);
1142 return NULL;
1143 }
1144
1145 /*
1146 * Write UPDATE message for changed and added routes. The size of buf limits
1147 * how may routes can be added. The function first dumps the path attributes
1148 * and then tries to add as many prefixes using these attributes.
1149 * Return 0 on success -1 on error which includes producing an empty message.
1150 */
1151 struct ibuf *
up_dump_update(struct rde_peer * peer,uint8_t aid)1152 up_dump_update(struct rde_peer *peer, uint8_t aid)
1153 {
1154 struct ibuf *buf;
1155 struct bgpd_addr addr;
1156 struct prefix *p;
1157 size_t off, pkgsize = MAX_PKTSIZE;
1158 uint16_t len;
1159 int force_ip4mp = 0;
1160
1161 p = RB_MIN(prefix_tree, &peer->updates[aid]);
1162 if (p == NULL)
1163 return NULL;
1164
1165 if (peer_has_ext_msg(peer))
1166 pkgsize = MAX_EXT_PKTSIZE;
1167
1168 if (aid == AID_INET && peer_has_ext_nexthop(peer, AID_INET)) {
1169 struct nexthop *nh = prefix_nexthop(p);
1170 if (nh != NULL && nh->exit_nexthop.aid == AID_INET6)
1171 force_ip4mp = 1;
1172 }
1173
1174 if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
1175 goto fail;
1176
1177 /* withdrawn routes length field is 0 */
1178 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1179 goto fail;
1180
1181 /* reserve space for 2-byte path attribute length */
1182 off = ibuf_size(buf);
1183 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1184 goto fail;
1185
1186 if (up_generate_attr(buf, peer, prefix_aspath(p),
1187 prefix_communities(p), prefix_nexthop(p), aid) == -1)
1188 goto drop;
1189
1190 if (aid != AID_INET || force_ip4mp) {
1191 /* write mp attribute including nlri */
1192
1193 /*
1194 * RFC 7606 wants this to be first but then we need
1195 * to use multiple buffers with adjusted length to
1196 * merge the attributes together in reverse order of
1197 * creation.
1198 */
1199 if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
1200 -1)
1201 goto drop;
1202 }
1203
1204 /* update attribute length field */
1205 len = ibuf_size(buf) - off - sizeof(len);
1206 if (ibuf_set_n16(buf, off, len) == -1)
1207 goto fail;
1208
1209 if (aid == AID_INET && !force_ip4mp) {
1210 /* last but not least dump the IPv4 nlri */
1211 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
1212 goto drop;
1213 }
1214
1215 return buf;
1216
1217 drop:
1218 /* Not enough space. Drop current prefix, it will never fit. */
1219 p = RB_MIN(prefix_tree, &peer->updates[aid]);
1220 pt_getaddr(p->pt, &addr);
1221 log_peer_warnx(&peer->conf, "generating update failed, "
1222 "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
1223
1224 up_prefix_free(&peer->updates[aid], p, peer, 0);
1225 return up_dump_withdraw_one(peer, p, buf);
1226
1227 fail:
1228 /* something went horribly wrong */
1229 log_peer_warn(&peer->conf, "generating update failed, peer desynced");
1230 ibuf_free(buf);
1231 return NULL;
1232 }
1233