xref: /openbsd/usr.sbin/bgpd/rde_update.c (revision de634ddd)
1 /*	$OpenBSD: rde_update.c,v 1.168 2024/05/30 08:29:30 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 
27 #include "bgpd.h"
28 #include "rde.h"
29 #include "log.h"
30 
31 enum up_state {
32 	UP_OK,
33 	UP_ERR_LIMIT,
34 	UP_FILTERED,
35 	UP_EXCLUDED,
36 };
37 
38 static struct community	comm_no_advertise = {
39 	.flags = COMMUNITY_TYPE_BASIC,
40 	.data1 = COMMUNITY_WELLKNOWN,
41 	.data2 = COMMUNITY_NO_ADVERTISE
42 };
43 static struct community	comm_no_export = {
44 	.flags = COMMUNITY_TYPE_BASIC,
45 	.data1 = COMMUNITY_WELLKNOWN,
46 	.data2 = COMMUNITY_NO_EXPORT
47 };
48 static struct community	comm_no_expsubconfed = {
49 	.flags = COMMUNITY_TYPE_BASIC,
50 	.data1 = COMMUNITY_WELLKNOWN,
51 	.data2 = COMMUNITY_NO_EXPSUBCONFED
52 };
53 
54 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t);
55 
56 static int
up_test_update(struct rde_peer * peer,struct prefix * p)57 up_test_update(struct rde_peer *peer, struct prefix *p)
58 {
59 	struct rde_aspath	*asp;
60 	struct rde_community	*comm;
61 	struct rde_peer		*frompeer;
62 
63 	frompeer = prefix_peer(p);
64 	asp = prefix_aspath(p);
65 	comm = prefix_communities(p);
66 
67 	if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
68 		fatalx("try to send out a botched path");
69 	if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK))
70 		fatalx("try to send out a looped path");
71 
72 	if (peer == frompeer)
73 		/* Do not send routes back to sender */
74 		return (0);
75 
76 	if (!frompeer->conf.ebgp && !peer->conf.ebgp) {
77 		/*
78 		 * route reflector redistribution rules:
79 		 * 1. if announce is set                -> announce
80 		 * 2. from non-client, to non-client    -> no
81 		 * 3. from client, to non-client        -> yes
82 		 * 4. from non-client, to client        -> yes
83 		 * 5. from client, to client            -> yes
84 		 */
85 		if (frompeer->conf.reflector_client == 0 &&
86 		    peer->conf.reflector_client == 0 &&
87 		    (asp->flags & F_PREFIX_ANNOUNCED) == 0)
88 			/* Do not redistribute updates to ibgp peers */
89 			return (0);
90 	}
91 
92 	/* well known communities */
93 	if (community_match(comm, &comm_no_advertise, NULL))
94 		return (0);
95 	if (peer->conf.ebgp) {
96 		if (community_match(comm, &comm_no_export, NULL))
97 			return (0);
98 		if (community_match(comm, &comm_no_expsubconfed, NULL))
99 			return (0);
100 	}
101 
102 	return (1);
103 }
104 
105 /* RFC9234 open policy handling */
106 static int
up_enforce_open_policy(struct rde_peer * peer,struct filterstate * state,uint8_t aid)107 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state,
108     uint8_t aid)
109 {
110 	/* only for IPv4 and IPv6 unicast */
111 	if (aid != AID_INET && aid != AID_INET6)
112 		return 0;
113 
114 	/*
115 	 * do not propagate (consider it filtered) if OTC is present and
116 	 * local role is peer, customer or rs-client.
117 	 */
118 	if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER ||
119 	    peer->role == ROLE_RS_CLIENT)
120 		if (state->aspath.flags & F_ATTR_OTC)
121 			return 1;
122 
123 	/*
124 	 * add OTC attribute if not present towards peers, customers and
125 	 * rs-clients (local roles peer, provider, rs).
126 	 */
127 	if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER ||
128 	    peer->role == ROLE_RS)
129 		if ((state->aspath.flags & F_ATTR_OTC) == 0) {
130 			uint32_t tmp;
131 
132 			tmp = htonl(peer->conf.local_as);
133 			if (attr_optadd(&state->aspath,
134 			    ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC,
135 			    &tmp, sizeof(tmp)) == -1)
136 				log_peer_warnx(&peer->conf,
137 				    "failed to add OTC attribute");
138 			state->aspath.flags |= F_ATTR_OTC;
139 		}
140 
141 	return 0;
142 }
143 
144 /*
145  * Process a single prefix by passing it through the various filter stages
146  * and if not filtered out update the Adj-RIB-Out. Returns:
147  * - UP_OK if prefix was added
148  * - UP_ERR_LIMIT if the peer outbound prefix limit was reached
149  * - UP_FILTERED if prefix was filtered out
150  * - UP_EXCLUDED if prefix was excluded because of up_test_update()
151  */
152 static enum up_state
up_process_prefix(struct rde_peer * peer,struct prefix * new,struct prefix * p)153 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p)
154 {
155 	struct filterstate state;
156 	struct bgpd_addr addr;
157 	int excluded = 0;
158 
159 	/*
160 	 * up_test_update() needs to run before the output filters
161 	 * else the well known communities won't work properly.
162 	 * The output filters would not be able to add well known
163 	 * communities.
164 	 */
165 	if (!up_test_update(peer, new))
166 		excluded = 1;
167 
168 	rde_filterstate_prep(&state, new);
169 	pt_getaddr(new->pt, &addr);
170 	if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr,
171 	    new->pt->prefixlen, &state) == ACTION_DENY) {
172 		rde_filterstate_clean(&state);
173 		return UP_FILTERED;
174 	}
175 
176 	/* Open Policy Check: acts like an output filter */
177 	if (up_enforce_open_policy(peer, &state, new->pt->aid)) {
178 		rde_filterstate_clean(&state);
179 		return UP_FILTERED;
180 	}
181 
182 	if (excluded) {
183 		rde_filterstate_clean(&state);
184 		return UP_EXCLUDED;
185 	}
186 
187 	/* from here on we know this is an update */
188 	if (p == (void *)-1)
189 		p = prefix_adjout_get(peer, new->path_id_tx, new->pt);
190 
191 	up_prep_adjout(peer, &state, new->pt->aid);
192 	prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx);
193 	rde_filterstate_clean(&state);
194 
195 	/* max prefix checker outbound */
196 	if (peer->conf.max_out_prefix &&
197 	    peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
198 		log_peer_warnx(&peer->conf,
199 		    "outbound prefix limit reached (>%u/%u)",
200 		    peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
201 		rde_update_err(peer, ERR_CEASE,
202 		    ERR_CEASE_MAX_SENT_PREFIX, NULL);
203 		return UP_ERR_LIMIT;
204 	}
205 
206 	return UP_OK;
207 }
208 
209 void
up_generate_updates(struct rde_peer * peer,struct rib_entry * re)210 up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
211 {
212 	struct prefix		*new, *p;
213 
214 	p = prefix_adjout_first(peer, re->prefix);
215 
216 	new = prefix_best(re);
217 	while (new != NULL) {
218 		switch (up_process_prefix(peer, new, p)) {
219 		case UP_OK:
220 		case UP_ERR_LIMIT:
221 			return;
222 		case UP_FILTERED:
223 			if (peer->flags & PEERFLAG_EVALUATE_ALL) {
224 				new = TAILQ_NEXT(new, entry.list.rib);
225 				if (new != NULL && prefix_eligible(new))
226 					continue;
227 			}
228 			goto done;
229 		case UP_EXCLUDED:
230 			goto done;
231 		}
232 	}
233 
234 done:
235 	/* withdraw prefix */
236 	if (p != NULL)
237 		prefix_adjout_withdraw(p);
238 }
239 
240 /*
241  * Generate updates for the add-path send case. Depending on the
242  * peer eval settings prefixes are selected and distributed.
243  * This highly depends on the Adj-RIB-Out to handle prefixes with no
244  * changes gracefully. It may be possible to improve the API so that
245  * less churn is needed.
246  */
247 void
up_generate_addpath(struct rde_peer * peer,struct rib_entry * re)248 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
249 {
250 	struct prefix		*head, *new, *p;
251 	int			maxpaths = 0, extrapaths = 0, extra;
252 	int			checkmode = 1;
253 
254 	head = prefix_adjout_first(peer, re->prefix);
255 
256 	/* mark all paths as stale */
257 	for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
258 		p->flags |= PREFIX_FLAG_STALE;
259 
260 	/* update paths */
261 	new = prefix_best(re);
262 	while (new != NULL) {
263 		/* check limits and stop when a limit is reached */
264 		if (peer->eval.maxpaths != 0 &&
265 		    maxpaths >= peer->eval.maxpaths)
266 			break;
267 		if (peer->eval.extrapaths != 0 &&
268 		    extrapaths >= peer->eval.extrapaths)
269 			break;
270 
271 		extra = 1;
272 		if (checkmode) {
273 			switch (peer->eval.mode) {
274 			case ADDPATH_EVAL_BEST:
275 				if (new->dmetric == PREFIX_DMETRIC_BEST)
276 					extra = 0;
277 				else
278 					checkmode = 0;
279 				break;
280 			case ADDPATH_EVAL_ECMP:
281 				if (new->dmetric == PREFIX_DMETRIC_BEST ||
282 				    new->dmetric == PREFIX_DMETRIC_ECMP)
283 					extra = 0;
284 				else
285 					checkmode = 0;
286 				break;
287 			case ADDPATH_EVAL_AS_WIDE:
288 				if (new->dmetric == PREFIX_DMETRIC_BEST ||
289 				    new->dmetric == PREFIX_DMETRIC_ECMP ||
290 				    new->dmetric == PREFIX_DMETRIC_AS_WIDE)
291 					extra = 0;
292 				else
293 					checkmode = 0;
294 				break;
295 			case ADDPATH_EVAL_ALL:
296 				/* nothing to check */
297 				checkmode = 0;
298 				break;
299 			default:
300 				fatalx("unknown add-path eval mode");
301 			}
302 		}
303 
304 		switch (up_process_prefix(peer, new, (void *)-1)) {
305 		case UP_OK:
306 			maxpaths++;
307 			extrapaths += extra;
308 			break;
309 		case UP_FILTERED:
310 		case UP_EXCLUDED:
311 			break;
312 		case UP_ERR_LIMIT:
313 			/* just give up */
314 			return;
315 		}
316 
317 		/* only allow valid prefixes */
318 		new = TAILQ_NEXT(new, entry.list.rib);
319 		if (new == NULL || !prefix_eligible(new))
320 			break;
321 	}
322 
323 	/* withdraw stale paths */
324 	for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
325 		if (p->flags & PREFIX_FLAG_STALE)
326 			prefix_adjout_withdraw(p);
327 	}
328 }
329 
330 /*
331  * Generate updates for the add-path send all case. Since all prefixes
332  * are distributed just remove old and add new.
333  */
334 void
up_generate_addpath_all(struct rde_peer * peer,struct rib_entry * re,struct prefix * new,struct prefix * old)335 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
336     struct prefix *new, struct prefix *old)
337 {
338 	struct prefix		*p, *head = NULL;
339 	int			all = 0;
340 
341 	/*
342 	 * if old and new are NULL then insert all prefixes from best,
343 	 * clearing old routes in the process
344 	 */
345 	if (old == NULL && new == NULL) {
346 		/* mark all paths as stale */
347 		head = prefix_adjout_first(peer, re->prefix);
348 		for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
349 			p->flags |= PREFIX_FLAG_STALE;
350 
351 		new = prefix_best(re);
352 		all = 1;
353 	}
354 
355 	if (new != NULL && !prefix_eligible(new)) {
356 		/* only allow valid prefixes */
357 		new = NULL;
358 	}
359 
360 	if (old != NULL) {
361 		/* withdraw stale paths */
362 		p = prefix_adjout_get(peer, old->path_id_tx, old->pt);
363 		if (p != NULL)
364 			prefix_adjout_withdraw(p);
365 	}
366 
367 	/* add new path (or multiple if all is set) */
368 	while (new != NULL) {
369 		switch (up_process_prefix(peer, new, (void *)-1)) {
370 		case UP_OK:
371 		case UP_FILTERED:
372 		case UP_EXCLUDED:
373 			break;
374 		case UP_ERR_LIMIT:
375 			/* just give up */
376 			return;
377 		}
378 
379 		if (!all)
380 			break;
381 
382 		/* only allow valid prefixes */
383 		new = TAILQ_NEXT(new, entry.list.rib);
384 		if (new == NULL || !prefix_eligible(new))
385 			break;
386 	}
387 
388 	if (all) {
389 		/* withdraw stale paths */
390 		for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
391 			if (p->flags & PREFIX_FLAG_STALE)
392 				prefix_adjout_withdraw(p);
393 		}
394 	}
395 }
396 
397 /* send a default route to the specified peer */
398 void
up_generate_default(struct rde_peer * peer,uint8_t aid)399 up_generate_default(struct rde_peer *peer, uint8_t aid)
400 {
401 	extern struct rde_peer	*peerself;
402 	struct filterstate	 state;
403 	struct rde_aspath	*asp;
404 	struct prefix		*p;
405 	struct pt_entry		*pte;
406 	struct bgpd_addr	 addr;
407 
408 	if (peer->capa.mp[aid] == 0)
409 		return;
410 
411 	rde_filterstate_init(&state);
412 	asp = &state.aspath;
413 	asp->aspath = aspath_get(NULL, 0);
414 	asp->origin = ORIGIN_IGP;
415 	rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN);
416 	/* the other default values are OK, nexthop is once again NULL */
417 
418 	/*
419 	 * XXX apply default overrides. Not yet possible, mainly a parse.y
420 	 * problem.
421 	 */
422 	/* rde_apply_set(asp, peerself, peerself, set, af); */
423 
424 	memset(&addr, 0, sizeof(addr));
425 	addr.aid = aid;
426 	p = prefix_adjout_lookup(peer, &addr, 0);
427 
428 	/* outbound filter as usual */
429 	if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
430 	    ACTION_DENY) {
431 		rde_filterstate_clean(&state);
432 		return;
433 	}
434 
435 	up_prep_adjout(peer, &state, addr.aid);
436 	/* can't use pt_fill here since prefix_adjout_update keeps a ref */
437 	pte = pt_get(&addr, 0);
438 	if (pte == NULL)
439 		pte = pt_add(&addr, 0);
440 	prefix_adjout_update(p, peer, &state, pte, 0);
441 	rde_filterstate_clean(&state);
442 
443 	/* max prefix checker outbound */
444 	if (peer->conf.max_out_prefix &&
445 	    peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
446 		log_peer_warnx(&peer->conf,
447 		    "outbound prefix limit reached (>%u/%u)",
448 		    peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
449 		rde_update_err(peer, ERR_CEASE,
450 		    ERR_CEASE_MAX_SENT_PREFIX, NULL);
451 	}
452 }
453 
454 static struct bgpd_addr *
up_get_nexthop(struct rde_peer * peer,struct filterstate * state,uint8_t aid)455 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
456 {
457 	struct bgpd_addr *peer_local = NULL;
458 
459 	switch (aid) {
460 	case AID_INET:
461 	case AID_VPN_IPv4:
462 		if (peer->local_v4_addr.aid == AID_INET)
463 			peer_local = &peer->local_v4_addr;
464 		break;
465 	case AID_INET6:
466 	case AID_VPN_IPv6:
467 		if (peer->local_v6_addr.aid == AID_INET6)
468 			peer_local = &peer->local_v6_addr;
469 		break;
470 	case AID_FLOWSPECv4:
471 	case AID_FLOWSPECv6:
472 		/* flowspec has no nexthop */
473 		return (NULL);
474 	default:
475 		fatalx("%s, bad AID %s", __func__, aid2str(aid));
476 	}
477 
478 	if (state->nhflags & NEXTHOP_SELF) {
479 		/*
480 		 * Forcing the nexthop to self is always possible
481 		 * and has precedence over other flags.
482 		 */
483 		return (peer_local);
484 	} else if (!peer->conf.ebgp) {
485 		/*
486 		 * in the ibgp case the nexthop is normally not
487 		 * modified unless it points at the peer itself.
488 		 */
489 		if (state->nexthop == NULL) {
490 			/* announced networks without explicit nexthop set */
491 			return (peer_local);
492 		}
493 		/*
494 		 * per RFC: if remote peer address is equal to the nexthop set
495 		 * the nexthop to our local address. This reduces the risk of
496 		 * routing loops. This overrides NEXTHOP_NOMODIFY.
497 		 */
498 		if (memcmp(&state->nexthop->exit_nexthop,
499 		    &peer->remote_addr, sizeof(peer->remote_addr)) == 0) {
500 			return (peer_local);
501 		}
502 		return (&state->nexthop->exit_nexthop);
503 	} else if (peer->conf.distance == 1) {
504 		/*
505 		 * In the ebgp directly connected case never send
506 		 * out a nexthop that is outside of the connected
507 		 * network of the peer. No matter what flags are
508 		 * set. This follows section 5.1.3 of RFC 4271.
509 		 * So just check if the nexthop is in the same net
510 		 * is enough here.
511 		 */
512 		if (state->nexthop != NULL &&
513 		    state->nexthop->flags & NEXTHOP_CONNECTED &&
514 		    prefix_compare(&peer->remote_addr,
515 		    &state->nexthop->nexthop_net,
516 		    state->nexthop->nexthop_netlen) == 0) {
517 			/* nexthop and peer are in the same net */
518 			return (&state->nexthop->exit_nexthop);
519 		}
520 		return (peer_local);
521 	} else {
522 		/*
523 		 * For ebgp multihop make it possible to overrule
524 		 * the sent nexthop by setting NEXTHOP_NOMODIFY.
525 		 * Similar to the ibgp case there is no same net check
526 		 * needed but still ensure that the nexthop is not
527 		 * pointing to the peer itself.
528 		 */
529 		if (state->nhflags & NEXTHOP_NOMODIFY &&
530 		    state->nexthop != NULL &&
531 		    memcmp(&state->nexthop->exit_nexthop,
532 		    &peer->remote_addr, sizeof(peer->remote_addr)) != 0) {
533 			/* no modify flag set and nexthop not peer addr */
534 			return (&state->nexthop->exit_nexthop);
535 		}
536 		return (peer_local);
537 	}
538 }
539 
540 static void
up_prep_adjout(struct rde_peer * peer,struct filterstate * state,uint8_t aid)541 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
542 {
543 	struct bgpd_addr *nexthop;
544 	struct nexthop *nh = NULL;
545 	u_char *np;
546 	uint16_t nl;
547 
548 	/* prepend local AS number for eBGP sessions. */
549 	if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) {
550 		uint32_t prep_as = peer->conf.local_as;
551 		np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl);
552 		aspath_put(state->aspath.aspath);
553 		state->aspath.aspath = aspath_get(np, nl);
554 		free(np);
555 	}
556 
557 	/* update nexthop */
558 	nexthop = up_get_nexthop(peer, state, aid);
559 	if (nexthop != NULL)
560 		nh = nexthop_get(nexthop);
561 	nexthop_unref(state->nexthop);
562 	state->nexthop = nh;
563 	state->nhflags = 0;
564 }
565 
566 
567 static int
up_generate_attr(struct ibuf * buf,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nh,uint8_t aid)568 up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
569     struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
570     uint8_t aid)
571 {
572 	struct attr	*oa = NULL, *newaggr = NULL;
573 	u_char		*pdata;
574 	uint32_t	 tmp32;
575 	int		 flags, neednewpath = 0, rv;
576 	uint16_t	 plen;
577 	uint8_t		 oalen = 0, type;
578 
579 	if (asp->others_len > 0)
580 		oa = asp->others[oalen++];
581 
582 	/* dump attributes in ascending order */
583 	for (type = ATTR_ORIGIN; type < 255; type++) {
584 		while (oa && oa->type < type) {
585 			if (oalen < asp->others_len)
586 				oa = asp->others[oalen++];
587 			else
588 				oa = NULL;
589 		}
590 
591 		switch (type) {
592 		/*
593 		 * Attributes stored in rde_aspath
594 		 */
595 		case ATTR_ORIGIN:
596 			if (attr_writebuf(buf, ATTR_WELL_KNOWN,
597 			    ATTR_ORIGIN, &asp->origin, 1) == -1)
598 				return -1;
599 			break;
600 		case ATTR_ASPATH:
601 			plen = aspath_length(asp->aspath);
602 			pdata = aspath_dump(asp->aspath);
603 
604 			if (!peer_has_as4byte(peer))
605 				pdata = aspath_deflate(pdata, &plen,
606 				    &neednewpath);
607 			rv = attr_writebuf(buf, ATTR_WELL_KNOWN,
608 			    ATTR_ASPATH, pdata, plen);
609 			if (!peer_has_as4byte(peer))
610 				free(pdata);
611 
612 			if (rv == -1)
613 				return -1;
614 			break;
615 		case ATTR_NEXTHOP:
616 			switch (aid) {
617 			case AID_INET:
618 				if (nh == NULL)
619 					return -1;
620 				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
621 				    ATTR_NEXTHOP, &nh->exit_nexthop.v4,
622 				    sizeof(nh->exit_nexthop.v4)) == -1)
623 					return -1;
624 				break;
625 			default:
626 				break;
627 			}
628 			break;
629 		case ATTR_MED:
630 			/*
631 			 * The old MED from other peers MUST not be announced
632 			 * to others unless the MED is originating from us or
633 			 * the peer is an IBGP one. Only exception are routers
634 			 * with "transparent-as yes" set.
635 			 */
636 			if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp ||
637 			    asp->flags & F_ATTR_MED_ANNOUNCE ||
638 			    peer->flags & PEERFLAG_TRANS_AS)) {
639 				tmp32 = htonl(asp->med);
640 				if (attr_writebuf(buf, ATTR_OPTIONAL,
641 				    ATTR_MED, &tmp32, 4) == -1)
642 					return -1;
643 			}
644 			break;
645 		case ATTR_LOCALPREF:
646 			if (!peer->conf.ebgp) {
647 				/* local preference, only valid for ibgp */
648 				tmp32 = htonl(asp->lpref);
649 				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
650 				    ATTR_LOCALPREF, &tmp32, 4) == -1)
651 					return -1;
652 			}
653 			break;
654 		/*
655 		 * Communities are stored in struct rde_community
656 		 */
657 		case ATTR_COMMUNITIES:
658 		case ATTR_EXT_COMMUNITIES:
659 		case ATTR_LARGE_COMMUNITIES:
660 			if (community_writebuf(comm, type, peer->conf.ebgp,
661 			    buf) == -1)
662 				return -1;
663 			break;
664 		/*
665 		 * NEW to OLD conversion when sending stuff to a 2byte AS peer
666 		 */
667 		case ATTR_AS4_PATH:
668 			if (neednewpath) {
669 				plen = aspath_length(asp->aspath);
670 				pdata = aspath_dump(asp->aspath);
671 
672 				flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
673 				if (!(asp->flags & F_PREFIX_ANNOUNCED))
674 					flags |= ATTR_PARTIAL;
675 				if (plen != 0)
676 					if (attr_writebuf(buf, flags,
677 					    ATTR_AS4_PATH, pdata, plen) == -1)
678 						return -1;
679 			}
680 			break;
681 		case ATTR_AS4_AGGREGATOR:
682 			if (newaggr) {
683 				flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
684 				if (!(asp->flags & F_PREFIX_ANNOUNCED))
685 					flags |= ATTR_PARTIAL;
686 				if (attr_writebuf(buf, flags,
687 				    ATTR_AS4_AGGREGATOR, newaggr->data,
688 				    newaggr->len) == -1)
689 					return -1;
690 			}
691 			break;
692 		/*
693 		 * multiprotocol attributes are handled elsewhere
694 		 */
695 		case ATTR_MP_REACH_NLRI:
696 		case ATTR_MP_UNREACH_NLRI:
697 			break;
698 		/*
699 		 * dump all other path attributes. Following rules apply:
700 		 *  1. well-known attrs: ATTR_ATOMIC_AGGREGATE and
701 		 *     ATTR_AGGREGATOR pass unmodified (enforce flags
702 		 *     to correct values). Actually ATTR_AGGREGATOR may be
703 		 *     deflated for OLD 2-byte peers.
704 		 *  2. non-transitive attrs: don't re-announce to ebgp peers
705 		 *  3. transitive known attrs: announce unmodified
706 		 *  4. transitive unknown attrs: set partial bit and re-announce
707 		 */
708 		case ATTR_ATOMIC_AGGREGATE:
709 			if (oa == NULL || oa->type != type)
710 				break;
711 			if (attr_writebuf(buf, ATTR_WELL_KNOWN,
712 			    ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
713 				return -1;
714 			break;
715 		case ATTR_AGGREGATOR:
716 			if (oa == NULL || oa->type != type)
717 				break;
718 			if ((!(oa->flags & ATTR_TRANSITIVE)) &&
719 			    peer->conf.ebgp)
720 				break;
721 			if (!peer_has_as4byte(peer)) {
722 				/* need to deflate the aggregator */
723 				uint8_t		t[6];
724 				uint16_t	tas;
725 
726 				if ((!(oa->flags & ATTR_TRANSITIVE)) &&
727 				    peer->conf.ebgp)
728 					break;
729 
730 				memcpy(&tmp32, oa->data, sizeof(tmp32));
731 				if (ntohl(tmp32) > USHRT_MAX) {
732 					tas = htons(AS_TRANS);
733 					newaggr = oa;
734 				} else
735 					tas = htons(ntohl(tmp32));
736 
737 				memcpy(t, &tas, sizeof(tas));
738 				memcpy(t + sizeof(tas),
739 				    oa->data + sizeof(tmp32),
740 				    oa->len - sizeof(tmp32));
741 				if (attr_writebuf(buf, oa->flags,
742 				    oa->type, &t, sizeof(t)) == -1)
743 					return -1;
744 			} else {
745 				if (attr_writebuf(buf, oa->flags, oa->type,
746 				    oa->data, oa->len) == -1)
747 					return -1;
748 			}
749 			break;
750 		case ATTR_ORIGINATOR_ID:
751 		case ATTR_CLUSTER_LIST:
752 		case ATTR_OTC:
753 			if (oa == NULL || oa->type != type)
754 				break;
755 			if ((!(oa->flags & ATTR_TRANSITIVE)) &&
756 			    peer->conf.ebgp)
757 				break;
758 			if (attr_writebuf(buf, oa->flags, oa->type,
759 			    oa->data, oa->len) == -1)
760 				return -1;
761 			break;
762 		default:
763 			if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
764 				/* there is no attribute left to dump */
765 				return (0);
766 
767 			if (oa == NULL || oa->type != type)
768 				break;
769 			/* unknown attribute */
770 			if (!(oa->flags & ATTR_TRANSITIVE)) {
771 				/*
772 				 * RFC 1771:
773 				 * Unrecognized non-transitive optional
774 				 * attributes must be quietly ignored and
775 				 * not passed along to other BGP peers.
776 				 */
777 				break;
778 			}
779 			if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
780 			    oa->type, oa->data, oa->len) == -1)
781 				return -1;
782 		}
783 	}
784 	return 0;
785 }
786 
787 /*
788  * Check if the pending element is a EoR marker. If so remove it from the
789  * tree and return 1.
790  */
791 int
up_is_eor(struct rde_peer * peer,uint8_t aid)792 up_is_eor(struct rde_peer *peer, uint8_t aid)
793 {
794 	struct prefix *p;
795 
796 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
797 	if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
798 		/*
799 		 * Need to remove eor from update tree because
800 		 * prefix_adjout_destroy() can't handle that.
801 		 */
802 		RB_REMOVE(prefix_tree, &peer->updates[aid], p);
803 		p->flags &= ~PREFIX_FLAG_UPDATE;
804 		prefix_adjout_destroy(p);
805 		return 1;
806 	}
807 	return 0;
808 }
809 
810 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
811 #define MIN_UPDATE_LEN	16
812 
813 static void
up_prefix_free(struct prefix_tree * prefix_head,struct prefix * p,struct rde_peer * peer,int withdraw)814 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
815     struct rde_peer *peer, int withdraw)
816 {
817 	if (withdraw) {
818 		/* prefix no longer needed, remove it */
819 		prefix_adjout_destroy(p);
820 		peer->stats.prefix_sent_withdraw++;
821 	} else {
822 		/* prefix still in Adj-RIB-Out, keep it */
823 		RB_REMOVE(prefix_tree, prefix_head, p);
824 		p->flags &= ~PREFIX_FLAG_UPDATE;
825 		peer->stats.pending_update--;
826 		peer->stats.prefix_sent_update++;
827 	}
828 }
829 
830 /*
831  * Write prefixes to buffer until either there is no more space or
832  * the next prefix has no longer the same ASPATH attributes.
833  * Returns -1 if no prefix was written else 0.
834  */
835 static int
up_dump_prefix(struct ibuf * buf,struct prefix_tree * prefix_head,struct rde_peer * peer,int withdraw)836 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
837     struct rde_peer *peer, int withdraw)
838 {
839 	struct prefix	*p, *np;
840 	int		 done = 0, has_ap = -1, rv = -1;
841 
842 	RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
843 		if (has_ap == -1)
844 			has_ap = peer_has_add_path(peer, p->pt->aid,
845 			    CAPA_AP_SEND);
846 		if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
847 		    -1)
848 			break;
849 
850 		/* make sure we only dump prefixes which belong together */
851 		if (np == NULL ||
852 		    np->aspath != p->aspath ||
853 		    np->communities != p->communities ||
854 		    np->nexthop != p->nexthop ||
855 		    np->nhflags != p->nhflags ||
856 		    (np->flags & PREFIX_FLAG_EOR))
857 			done = 1;
858 
859 		rv = 0;
860 		up_prefix_free(prefix_head, p, peer, withdraw);
861 		if (done)
862 			break;
863 	}
864 	return rv;
865 }
866 
867 static int
up_generate_mp_reach(struct ibuf * buf,struct rde_peer * peer,struct nexthop * nh,uint8_t aid)868 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
869     struct nexthop *nh, uint8_t aid)
870 {
871 	struct bgpd_addr *nexthop;
872 	size_t off;
873 	uint16_t len, afi;
874 	uint8_t safi;
875 
876 	/* attribute header, defaulting to extended length one */
877 	if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
878 		return -1;
879 	if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
880 		return -1;
881 	off = ibuf_size(buf);
882 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
883 		return -1;
884 
885 	if (aid2afi(aid, &afi, &safi))
886 		fatalx("up_generate_mp_reach: bad AID");
887 
888 	/* AFI + SAFI + NH LEN + NH + Reserved */
889 	if (ibuf_add_n16(buf, afi) == -1)
890 		return -1;
891 	if (ibuf_add_n8(buf, safi) == -1)
892 		return -1;
893 
894 	switch (aid) {
895 	case AID_INET6:
896 		if (nh == NULL)
897 			return -1;
898 		/* NH LEN */
899 		if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1)
900 			return -1;
901 		/* write nexthop */
902 		nexthop = &nh->exit_nexthop;
903 		if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
904 			return -1;
905 		break;
906 	case AID_VPN_IPv4:
907 		if (nh == NULL)
908 			return -1;
909 		/* NH LEN */
910 		if (ibuf_add_n8(buf,
911 		    sizeof(uint64_t) + sizeof(struct in_addr)) == -1)
912 			return -1;
913 		/* write zero rd */
914 		if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
915 			return -1;
916 		/* write nexthop */
917 		nexthop = &nh->exit_nexthop;
918 		if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1)
919 			return -1;
920 		break;
921 	case AID_VPN_IPv6:
922 		if (nh == NULL)
923 			return -1;
924 		/* NH LEN */
925 		if (ibuf_add_n8(buf,
926 		    sizeof(uint64_t) + sizeof(struct in6_addr)) == -1)
927 			return -1;
928 		/* write zero rd */
929 		if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
930 			return -1;
931 		/* write nexthop */
932 		nexthop = &nh->exit_nexthop;
933 		if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
934 			return -1;
935 		break;
936 	case AID_FLOWSPECv4:
937 	case AID_FLOWSPECv6:
938 		if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */
939 			return -1;
940 		/* no NH */
941 		break;
942 	default:
943 		fatalx("up_generate_mp_reach: unknown AID");
944 	}
945 
946 	if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
947 		return -1;
948 
949 	if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
950 		/* no prefixes written, fail update  */
951 		return (-1);
952 
953 	/* update MP_REACH attribute length field */
954 	len = ibuf_size(buf) - off - sizeof(len);
955 	if (ibuf_set_n16(buf, off, len) == -1)
956 		return -1;
957 
958 	return 0;
959 }
960 
961 /*
962  * Generate UPDATE message containing either just withdraws or updates.
963  * UPDATE messages are contructed like this:
964  *
965  *    +-----------------------------------------------------+
966  *    |   Withdrawn Routes Length (2 octets)                |
967  *    +-----------------------------------------------------+
968  *    |   Withdrawn Routes (variable)                       |
969  *    +-----------------------------------------------------+
970  *    |   Total Path Attribute Length (2 octets)            |
971  *    +-----------------------------------------------------+
972  *    |   Path Attributes (variable)                        |
973  *    +-----------------------------------------------------+
974  *    |   Network Layer Reachability Information (variable) |
975  *    +-----------------------------------------------------+
976  *
977  * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
978  * the latter will be the only path attribute in a message.
979  */
980 
981 /*
982  * Write UPDATE message for withdrawn routes. The size of buf limits
983  * how may routes can be added. Return 0 on success -1 on error which
984  * includes generating an empty withdraw message.
985  */
986 int
up_dump_withdraws(struct ibuf * buf,struct rde_peer * peer,uint8_t aid)987 up_dump_withdraws(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
988 {
989 	size_t off;
990 	uint16_t afi, len;
991 	uint8_t safi;
992 
993 	/* reserve space for the withdrawn routes length field */
994 	off = ibuf_size(buf);
995 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
996 		return -1;
997 
998 	if (aid != AID_INET) {
999 		/* reserve space for 2-byte path attribute length */
1000 		off = ibuf_size(buf);
1001 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1002 			return -1;
1003 
1004 		/* attribute header, defaulting to extended length one */
1005 		if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1006 			return -1;
1007 		if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1008 			return -1;
1009 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1010 			return -1;
1011 
1012 		/* afi & safi */
1013 		if (aid2afi(aid, &afi, &safi))
1014 			fatalx("up_dump_mp_unreach: bad AID");
1015 		if (ibuf_add_n16(buf, afi) == -1)
1016 			return -1;
1017 		if (ibuf_add_n8(buf, safi) == -1)
1018 			return -1;
1019 	}
1020 
1021 	if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
1022 		return -1;
1023 
1024 	/* update length field (either withdrawn routes or attribute length) */
1025 	len = ibuf_size(buf) - off - sizeof(len);
1026 	if (ibuf_set_n16(buf, off, len) == -1)
1027 		return -1;
1028 
1029 	if (aid != AID_INET) {
1030 		/* write MP_UNREACH_NLRI attribute length (always extended) */
1031 		len -= 4; /* skip attribute header */
1032 		if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1033 			return -1;
1034 	} else {
1035 		/* no extra attributes so set attribute len to 0 */
1036 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1037 			return -1;
1038 	}
1039 
1040 	return 0;
1041 }
1042 
1043 /*
1044  * Write UPDATE message for changed and added routes. The size of buf limits
1045  * how may routes can be added. The function first dumps the path attributes
1046  * and then tries to add as many prefixes using these attributes.
1047  * Return 0 on success -1 on error which includes producing an empty message.
1048  */
1049 int
up_dump_update(struct ibuf * buf,struct rde_peer * peer,uint8_t aid)1050 up_dump_update(struct ibuf *buf, struct rde_peer *peer, uint8_t aid)
1051 {
1052 	struct bgpd_addr addr;
1053 	struct prefix *p;
1054 	size_t off;
1055 	uint16_t len;
1056 
1057 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
1058 	if (p == NULL)
1059 		return -1;
1060 
1061 	/* withdrawn routes length field is 0 */
1062 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1063 		return -1;
1064 
1065 	/* reserve space for 2-byte path attribute length */
1066 	off = ibuf_size(buf);
1067 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1068 		return -1;
1069 
1070 	if (up_generate_attr(buf, peer, prefix_aspath(p),
1071 	    prefix_communities(p), prefix_nexthop(p), aid) == -1)
1072 		goto fail;
1073 
1074 	if (aid != AID_INET) {
1075 		/* write mp attribute including nlri */
1076 
1077 		/*
1078 		 * RFC 7606 wants this to be first but then we need
1079 		 * to use multiple buffers with adjusted length to
1080 		 * merge the attributes together in reverse order of
1081 		 * creation.
1082 		 */
1083 		if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
1084 		    -1)
1085 			goto fail;
1086 	}
1087 
1088 	/* update attribute length field */
1089 	len = ibuf_size(buf) - off - sizeof(len);
1090 	if (ibuf_set_n16(buf, off, len) == -1)
1091 		return -1;
1092 
1093 	if (aid == AID_INET) {
1094 		/* last but not least dump the IPv4 nlri */
1095 		if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
1096 			goto fail;
1097 	}
1098 
1099 	return 0;
1100 
1101 fail:
1102 	/* Not enough space. Drop prefix, it will never fit. */
1103 	pt_getaddr(p->pt, &addr);
1104 	log_peer_warnx(&peer->conf, "dump of path attributes failed, "
1105 	    "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
1106 
1107 	up_prefix_free(&peer->updates[aid], p, peer, 0);
1108 	/* XXX should probably send a withdraw for this prefix */
1109 	return -1;
1110 }
1111