xref: /openbsd/usr.sbin/bgpd/rde_update.c (revision 6c19f566)
1 /*	$OpenBSD: rde_update.c,v 1.169 2024/09/25 14:46:51 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 
27 #include "bgpd.h"
28 #include "session.h"
29 #include "rde.h"
30 #include "log.h"
31 
32 enum up_state {
33 	UP_OK,
34 	UP_ERR_LIMIT,
35 	UP_FILTERED,
36 	UP_EXCLUDED,
37 };
38 
39 static struct community	comm_no_advertise = {
40 	.flags = COMMUNITY_TYPE_BASIC,
41 	.data1 = COMMUNITY_WELLKNOWN,
42 	.data2 = COMMUNITY_NO_ADVERTISE
43 };
44 static struct community	comm_no_export = {
45 	.flags = COMMUNITY_TYPE_BASIC,
46 	.data1 = COMMUNITY_WELLKNOWN,
47 	.data2 = COMMUNITY_NO_EXPORT
48 };
49 static struct community	comm_no_expsubconfed = {
50 	.flags = COMMUNITY_TYPE_BASIC,
51 	.data1 = COMMUNITY_WELLKNOWN,
52 	.data2 = COMMUNITY_NO_EXPSUBCONFED
53 };
54 
55 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t);
56 
57 static int
up_test_update(struct rde_peer * peer,struct prefix * p)58 up_test_update(struct rde_peer *peer, struct prefix *p)
59 {
60 	struct rde_aspath	*asp;
61 	struct rde_community	*comm;
62 	struct rde_peer		*frompeer;
63 
64 	frompeer = prefix_peer(p);
65 	asp = prefix_aspath(p);
66 	comm = prefix_communities(p);
67 
68 	if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
69 		fatalx("try to send out a botched path");
70 	if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK))
71 		fatalx("try to send out a looped path");
72 
73 	if (peer == frompeer)
74 		/* Do not send routes back to sender */
75 		return (0);
76 
77 	if (!frompeer->conf.ebgp && !peer->conf.ebgp) {
78 		/*
79 		 * route reflector redistribution rules:
80 		 * 1. if announce is set                -> announce
81 		 * 2. from non-client, to non-client    -> no
82 		 * 3. from client, to non-client        -> yes
83 		 * 4. from non-client, to client        -> yes
84 		 * 5. from client, to client            -> yes
85 		 */
86 		if (frompeer->conf.reflector_client == 0 &&
87 		    peer->conf.reflector_client == 0 &&
88 		    (asp->flags & F_PREFIX_ANNOUNCED) == 0)
89 			/* Do not redistribute updates to ibgp peers */
90 			return (0);
91 	}
92 
93 	/* well known communities */
94 	if (community_match(comm, &comm_no_advertise, NULL))
95 		return (0);
96 	if (peer->conf.ebgp) {
97 		if (community_match(comm, &comm_no_export, NULL))
98 			return (0);
99 		if (community_match(comm, &comm_no_expsubconfed, NULL))
100 			return (0);
101 	}
102 
103 	return (1);
104 }
105 
106 /* RFC9234 open policy handling */
107 static int
up_enforce_open_policy(struct rde_peer * peer,struct filterstate * state,uint8_t aid)108 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state,
109     uint8_t aid)
110 {
111 	/* only for IPv4 and IPv6 unicast */
112 	if (aid != AID_INET && aid != AID_INET6)
113 		return 0;
114 
115 	/*
116 	 * do not propagate (consider it filtered) if OTC is present and
117 	 * local role is peer, customer or rs-client.
118 	 */
119 	if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER ||
120 	    peer->role == ROLE_RS_CLIENT)
121 		if (state->aspath.flags & F_ATTR_OTC)
122 			return 1;
123 
124 	/*
125 	 * add OTC attribute if not present towards peers, customers and
126 	 * rs-clients (local roles peer, provider, rs).
127 	 */
128 	if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER ||
129 	    peer->role == ROLE_RS)
130 		if ((state->aspath.flags & F_ATTR_OTC) == 0) {
131 			uint32_t tmp;
132 
133 			tmp = htonl(peer->conf.local_as);
134 			if (attr_optadd(&state->aspath,
135 			    ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC,
136 			    &tmp, sizeof(tmp)) == -1)
137 				log_peer_warnx(&peer->conf,
138 				    "failed to add OTC attribute");
139 			state->aspath.flags |= F_ATTR_OTC;
140 		}
141 
142 	return 0;
143 }
144 
145 /*
146  * Process a single prefix by passing it through the various filter stages
147  * and if not filtered out update the Adj-RIB-Out. Returns:
148  * - UP_OK if prefix was added
149  * - UP_ERR_LIMIT if the peer outbound prefix limit was reached
150  * - UP_FILTERED if prefix was filtered out
151  * - UP_EXCLUDED if prefix was excluded because of up_test_update()
152  */
153 static enum up_state
up_process_prefix(struct rde_peer * peer,struct prefix * new,struct prefix * p)154 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p)
155 {
156 	struct filterstate state;
157 	struct bgpd_addr addr;
158 	int excluded = 0;
159 
160 	/*
161 	 * up_test_update() needs to run before the output filters
162 	 * else the well known communities won't work properly.
163 	 * The output filters would not be able to add well known
164 	 * communities.
165 	 */
166 	if (!up_test_update(peer, new))
167 		excluded = 1;
168 
169 	rde_filterstate_prep(&state, new);
170 	pt_getaddr(new->pt, &addr);
171 	if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr,
172 	    new->pt->prefixlen, &state) == ACTION_DENY) {
173 		rde_filterstate_clean(&state);
174 		return UP_FILTERED;
175 	}
176 
177 	/* Open Policy Check: acts like an output filter */
178 	if (up_enforce_open_policy(peer, &state, new->pt->aid)) {
179 		rde_filterstate_clean(&state);
180 		return UP_FILTERED;
181 	}
182 
183 	if (excluded) {
184 		rde_filterstate_clean(&state);
185 		return UP_EXCLUDED;
186 	}
187 
188 	/* from here on we know this is an update */
189 	if (p == (void *)-1)
190 		p = prefix_adjout_get(peer, new->path_id_tx, new->pt);
191 
192 	up_prep_adjout(peer, &state, new->pt->aid);
193 	prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx);
194 	rde_filterstate_clean(&state);
195 
196 	/* max prefix checker outbound */
197 	if (peer->conf.max_out_prefix &&
198 	    peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
199 		log_peer_warnx(&peer->conf,
200 		    "outbound prefix limit reached (>%u/%u)",
201 		    peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
202 		rde_update_err(peer, ERR_CEASE,
203 		    ERR_CEASE_MAX_SENT_PREFIX, NULL);
204 		return UP_ERR_LIMIT;
205 	}
206 
207 	return UP_OK;
208 }
209 
210 void
up_generate_updates(struct rde_peer * peer,struct rib_entry * re)211 up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
212 {
213 	struct prefix		*new, *p;
214 
215 	p = prefix_adjout_first(peer, re->prefix);
216 
217 	new = prefix_best(re);
218 	while (new != NULL) {
219 		switch (up_process_prefix(peer, new, p)) {
220 		case UP_OK:
221 		case UP_ERR_LIMIT:
222 			return;
223 		case UP_FILTERED:
224 			if (peer->flags & PEERFLAG_EVALUATE_ALL) {
225 				new = TAILQ_NEXT(new, entry.list.rib);
226 				if (new != NULL && prefix_eligible(new))
227 					continue;
228 			}
229 			goto done;
230 		case UP_EXCLUDED:
231 			goto done;
232 		}
233 	}
234 
235 done:
236 	/* withdraw prefix */
237 	if (p != NULL)
238 		prefix_adjout_withdraw(p);
239 }
240 
241 /*
242  * Generate updates for the add-path send case. Depending on the
243  * peer eval settings prefixes are selected and distributed.
244  * This highly depends on the Adj-RIB-Out to handle prefixes with no
245  * changes gracefully. It may be possible to improve the API so that
246  * less churn is needed.
247  */
248 void
up_generate_addpath(struct rde_peer * peer,struct rib_entry * re)249 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
250 {
251 	struct prefix		*head, *new, *p;
252 	int			maxpaths = 0, extrapaths = 0, extra;
253 	int			checkmode = 1;
254 
255 	head = prefix_adjout_first(peer, re->prefix);
256 
257 	/* mark all paths as stale */
258 	for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
259 		p->flags |= PREFIX_FLAG_STALE;
260 
261 	/* update paths */
262 	new = prefix_best(re);
263 	while (new != NULL) {
264 		/* check limits and stop when a limit is reached */
265 		if (peer->eval.maxpaths != 0 &&
266 		    maxpaths >= peer->eval.maxpaths)
267 			break;
268 		if (peer->eval.extrapaths != 0 &&
269 		    extrapaths >= peer->eval.extrapaths)
270 			break;
271 
272 		extra = 1;
273 		if (checkmode) {
274 			switch (peer->eval.mode) {
275 			case ADDPATH_EVAL_BEST:
276 				if (new->dmetric == PREFIX_DMETRIC_BEST)
277 					extra = 0;
278 				else
279 					checkmode = 0;
280 				break;
281 			case ADDPATH_EVAL_ECMP:
282 				if (new->dmetric == PREFIX_DMETRIC_BEST ||
283 				    new->dmetric == PREFIX_DMETRIC_ECMP)
284 					extra = 0;
285 				else
286 					checkmode = 0;
287 				break;
288 			case ADDPATH_EVAL_AS_WIDE:
289 				if (new->dmetric == PREFIX_DMETRIC_BEST ||
290 				    new->dmetric == PREFIX_DMETRIC_ECMP ||
291 				    new->dmetric == PREFIX_DMETRIC_AS_WIDE)
292 					extra = 0;
293 				else
294 					checkmode = 0;
295 				break;
296 			case ADDPATH_EVAL_ALL:
297 				/* nothing to check */
298 				checkmode = 0;
299 				break;
300 			default:
301 				fatalx("unknown add-path eval mode");
302 			}
303 		}
304 
305 		switch (up_process_prefix(peer, new, (void *)-1)) {
306 		case UP_OK:
307 			maxpaths++;
308 			extrapaths += extra;
309 			break;
310 		case UP_FILTERED:
311 		case UP_EXCLUDED:
312 			break;
313 		case UP_ERR_LIMIT:
314 			/* just give up */
315 			return;
316 		}
317 
318 		/* only allow valid prefixes */
319 		new = TAILQ_NEXT(new, entry.list.rib);
320 		if (new == NULL || !prefix_eligible(new))
321 			break;
322 	}
323 
324 	/* withdraw stale paths */
325 	for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
326 		if (p->flags & PREFIX_FLAG_STALE)
327 			prefix_adjout_withdraw(p);
328 	}
329 }
330 
331 /*
332  * Generate updates for the add-path send all case. Since all prefixes
333  * are distributed just remove old and add new.
334  */
335 void
up_generate_addpath_all(struct rde_peer * peer,struct rib_entry * re,struct prefix * new,struct prefix * old)336 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
337     struct prefix *new, struct prefix *old)
338 {
339 	struct prefix		*p, *head = NULL;
340 	int			all = 0;
341 
342 	/*
343 	 * if old and new are NULL then insert all prefixes from best,
344 	 * clearing old routes in the process
345 	 */
346 	if (old == NULL && new == NULL) {
347 		/* mark all paths as stale */
348 		head = prefix_adjout_first(peer, re->prefix);
349 		for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
350 			p->flags |= PREFIX_FLAG_STALE;
351 
352 		new = prefix_best(re);
353 		all = 1;
354 	}
355 
356 	if (new != NULL && !prefix_eligible(new)) {
357 		/* only allow valid prefixes */
358 		new = NULL;
359 	}
360 
361 	if (old != NULL) {
362 		/* withdraw stale paths */
363 		p = prefix_adjout_get(peer, old->path_id_tx, old->pt);
364 		if (p != NULL)
365 			prefix_adjout_withdraw(p);
366 	}
367 
368 	/* add new path (or multiple if all is set) */
369 	while (new != NULL) {
370 		switch (up_process_prefix(peer, new, (void *)-1)) {
371 		case UP_OK:
372 		case UP_FILTERED:
373 		case UP_EXCLUDED:
374 			break;
375 		case UP_ERR_LIMIT:
376 			/* just give up */
377 			return;
378 		}
379 
380 		if (!all)
381 			break;
382 
383 		/* only allow valid prefixes */
384 		new = TAILQ_NEXT(new, entry.list.rib);
385 		if (new == NULL || !prefix_eligible(new))
386 			break;
387 	}
388 
389 	if (all) {
390 		/* withdraw stale paths */
391 		for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
392 			if (p->flags & PREFIX_FLAG_STALE)
393 				prefix_adjout_withdraw(p);
394 		}
395 	}
396 }
397 
398 /* send a default route to the specified peer */
399 void
up_generate_default(struct rde_peer * peer,uint8_t aid)400 up_generate_default(struct rde_peer *peer, uint8_t aid)
401 {
402 	extern struct rde_peer	*peerself;
403 	struct filterstate	 state;
404 	struct rde_aspath	*asp;
405 	struct prefix		*p;
406 	struct pt_entry		*pte;
407 	struct bgpd_addr	 addr;
408 
409 	if (peer->capa.mp[aid] == 0)
410 		return;
411 
412 	rde_filterstate_init(&state);
413 	asp = &state.aspath;
414 	asp->aspath = aspath_get(NULL, 0);
415 	asp->origin = ORIGIN_IGP;
416 	rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN);
417 	/* the other default values are OK, nexthop is once again NULL */
418 
419 	/*
420 	 * XXX apply default overrides. Not yet possible, mainly a parse.y
421 	 * problem.
422 	 */
423 	/* rde_apply_set(asp, peerself, peerself, set, af); */
424 
425 	memset(&addr, 0, sizeof(addr));
426 	addr.aid = aid;
427 	p = prefix_adjout_lookup(peer, &addr, 0);
428 
429 	/* outbound filter as usual */
430 	if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
431 	    ACTION_DENY) {
432 		rde_filterstate_clean(&state);
433 		return;
434 	}
435 
436 	up_prep_adjout(peer, &state, addr.aid);
437 	/* can't use pt_fill here since prefix_adjout_update keeps a ref */
438 	pte = pt_get(&addr, 0);
439 	if (pte == NULL)
440 		pte = pt_add(&addr, 0);
441 	prefix_adjout_update(p, peer, &state, pte, 0);
442 	rde_filterstate_clean(&state);
443 
444 	/* max prefix checker outbound */
445 	if (peer->conf.max_out_prefix &&
446 	    peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
447 		log_peer_warnx(&peer->conf,
448 		    "outbound prefix limit reached (>%u/%u)",
449 		    peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
450 		rde_update_err(peer, ERR_CEASE,
451 		    ERR_CEASE_MAX_SENT_PREFIX, NULL);
452 	}
453 }
454 
455 static struct bgpd_addr *
up_get_nexthop(struct rde_peer * peer,struct filterstate * state,uint8_t aid)456 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
457 {
458 	struct bgpd_addr *peer_local = NULL;
459 
460 	switch (aid) {
461 	case AID_INET:
462 	case AID_VPN_IPv4:
463 		if (peer->local_v4_addr.aid == AID_INET)
464 			peer_local = &peer->local_v4_addr;
465 		break;
466 	case AID_INET6:
467 	case AID_VPN_IPv6:
468 		if (peer->local_v6_addr.aid == AID_INET6)
469 			peer_local = &peer->local_v6_addr;
470 		break;
471 	case AID_FLOWSPECv4:
472 	case AID_FLOWSPECv6:
473 		/* flowspec has no nexthop */
474 		return (NULL);
475 	default:
476 		fatalx("%s, bad AID %s", __func__, aid2str(aid));
477 	}
478 
479 	if (state->nhflags & NEXTHOP_SELF) {
480 		/*
481 		 * Forcing the nexthop to self is always possible
482 		 * and has precedence over other flags.
483 		 */
484 		return (peer_local);
485 	} else if (!peer->conf.ebgp) {
486 		/*
487 		 * in the ibgp case the nexthop is normally not
488 		 * modified unless it points at the peer itself.
489 		 */
490 		if (state->nexthop == NULL) {
491 			/* announced networks without explicit nexthop set */
492 			return (peer_local);
493 		}
494 		/*
495 		 * per RFC: if remote peer address is equal to the nexthop set
496 		 * the nexthop to our local address. This reduces the risk of
497 		 * routing loops. This overrides NEXTHOP_NOMODIFY.
498 		 */
499 		if (memcmp(&state->nexthop->exit_nexthop,
500 		    &peer->remote_addr, sizeof(peer->remote_addr)) == 0) {
501 			return (peer_local);
502 		}
503 		return (&state->nexthop->exit_nexthop);
504 	} else if (peer->conf.distance == 1) {
505 		/*
506 		 * In the ebgp directly connected case never send
507 		 * out a nexthop that is outside of the connected
508 		 * network of the peer. No matter what flags are
509 		 * set. This follows section 5.1.3 of RFC 4271.
510 		 * So just check if the nexthop is in the same net
511 		 * is enough here.
512 		 */
513 		if (state->nexthop != NULL &&
514 		    state->nexthop->flags & NEXTHOP_CONNECTED &&
515 		    prefix_compare(&peer->remote_addr,
516 		    &state->nexthop->nexthop_net,
517 		    state->nexthop->nexthop_netlen) == 0) {
518 			/* nexthop and peer are in the same net */
519 			return (&state->nexthop->exit_nexthop);
520 		}
521 		return (peer_local);
522 	} else {
523 		/*
524 		 * For ebgp multihop make it possible to overrule
525 		 * the sent nexthop by setting NEXTHOP_NOMODIFY.
526 		 * Similar to the ibgp case there is no same net check
527 		 * needed but still ensure that the nexthop is not
528 		 * pointing to the peer itself.
529 		 */
530 		if (state->nhflags & NEXTHOP_NOMODIFY &&
531 		    state->nexthop != NULL &&
532 		    memcmp(&state->nexthop->exit_nexthop,
533 		    &peer->remote_addr, sizeof(peer->remote_addr)) != 0) {
534 			/* no modify flag set and nexthop not peer addr */
535 			return (&state->nexthop->exit_nexthop);
536 		}
537 		return (peer_local);
538 	}
539 }
540 
541 static void
up_prep_adjout(struct rde_peer * peer,struct filterstate * state,uint8_t aid)542 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
543 {
544 	struct bgpd_addr *nexthop;
545 	struct nexthop *nh = NULL;
546 	u_char *np;
547 	uint16_t nl;
548 
549 	/* prepend local AS number for eBGP sessions. */
550 	if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) {
551 		uint32_t prep_as = peer->conf.local_as;
552 		np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl);
553 		aspath_put(state->aspath.aspath);
554 		state->aspath.aspath = aspath_get(np, nl);
555 		free(np);
556 	}
557 
558 	/* update nexthop */
559 	nexthop = up_get_nexthop(peer, state, aid);
560 	if (nexthop != NULL)
561 		nh = nexthop_get(nexthop);
562 	nexthop_unref(state->nexthop);
563 	state->nexthop = nh;
564 	state->nhflags = 0;
565 }
566 
567 
568 static int
up_generate_attr(struct ibuf * buf,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nh,uint8_t aid)569 up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
570     struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
571     uint8_t aid)
572 {
573 	struct attr	*oa = NULL, *newaggr = NULL;
574 	u_char		*pdata;
575 	uint32_t	 tmp32;
576 	int		 flags, neednewpath = 0, rv;
577 	uint16_t	 plen;
578 	uint8_t		 oalen = 0, type;
579 
580 	if (asp->others_len > 0)
581 		oa = asp->others[oalen++];
582 
583 	/* dump attributes in ascending order */
584 	for (type = ATTR_ORIGIN; type < 255; type++) {
585 		while (oa && oa->type < type) {
586 			if (oalen < asp->others_len)
587 				oa = asp->others[oalen++];
588 			else
589 				oa = NULL;
590 		}
591 
592 		switch (type) {
593 		/*
594 		 * Attributes stored in rde_aspath
595 		 */
596 		case ATTR_ORIGIN:
597 			if (attr_writebuf(buf, ATTR_WELL_KNOWN,
598 			    ATTR_ORIGIN, &asp->origin, 1) == -1)
599 				return -1;
600 			break;
601 		case ATTR_ASPATH:
602 			plen = aspath_length(asp->aspath);
603 			pdata = aspath_dump(asp->aspath);
604 
605 			if (!peer_has_as4byte(peer))
606 				pdata = aspath_deflate(pdata, &plen,
607 				    &neednewpath);
608 			rv = attr_writebuf(buf, ATTR_WELL_KNOWN,
609 			    ATTR_ASPATH, pdata, plen);
610 			if (!peer_has_as4byte(peer))
611 				free(pdata);
612 
613 			if (rv == -1)
614 				return -1;
615 			break;
616 		case ATTR_NEXTHOP:
617 			switch (aid) {
618 			case AID_INET:
619 				if (nh == NULL)
620 					return -1;
621 				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
622 				    ATTR_NEXTHOP, &nh->exit_nexthop.v4,
623 				    sizeof(nh->exit_nexthop.v4)) == -1)
624 					return -1;
625 				break;
626 			default:
627 				break;
628 			}
629 			break;
630 		case ATTR_MED:
631 			/*
632 			 * The old MED from other peers MUST not be announced
633 			 * to others unless the MED is originating from us or
634 			 * the peer is an IBGP one. Only exception are routers
635 			 * with "transparent-as yes" set.
636 			 */
637 			if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp ||
638 			    asp->flags & F_ATTR_MED_ANNOUNCE ||
639 			    peer->flags & PEERFLAG_TRANS_AS)) {
640 				tmp32 = htonl(asp->med);
641 				if (attr_writebuf(buf, ATTR_OPTIONAL,
642 				    ATTR_MED, &tmp32, 4) == -1)
643 					return -1;
644 			}
645 			break;
646 		case ATTR_LOCALPREF:
647 			if (!peer->conf.ebgp) {
648 				/* local preference, only valid for ibgp */
649 				tmp32 = htonl(asp->lpref);
650 				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
651 				    ATTR_LOCALPREF, &tmp32, 4) == -1)
652 					return -1;
653 			}
654 			break;
655 		/*
656 		 * Communities are stored in struct rde_community
657 		 */
658 		case ATTR_COMMUNITIES:
659 		case ATTR_EXT_COMMUNITIES:
660 		case ATTR_LARGE_COMMUNITIES:
661 			if (community_writebuf(comm, type, peer->conf.ebgp,
662 			    buf) == -1)
663 				return -1;
664 			break;
665 		/*
666 		 * NEW to OLD conversion when sending stuff to a 2byte AS peer
667 		 */
668 		case ATTR_AS4_PATH:
669 			if (neednewpath) {
670 				plen = aspath_length(asp->aspath);
671 				pdata = aspath_dump(asp->aspath);
672 
673 				flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
674 				if (!(asp->flags & F_PREFIX_ANNOUNCED))
675 					flags |= ATTR_PARTIAL;
676 				if (plen != 0)
677 					if (attr_writebuf(buf, flags,
678 					    ATTR_AS4_PATH, pdata, plen) == -1)
679 						return -1;
680 			}
681 			break;
682 		case ATTR_AS4_AGGREGATOR:
683 			if (newaggr) {
684 				flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
685 				if (!(asp->flags & F_PREFIX_ANNOUNCED))
686 					flags |= ATTR_PARTIAL;
687 				if (attr_writebuf(buf, flags,
688 				    ATTR_AS4_AGGREGATOR, newaggr->data,
689 				    newaggr->len) == -1)
690 					return -1;
691 			}
692 			break;
693 		/*
694 		 * multiprotocol attributes are handled elsewhere
695 		 */
696 		case ATTR_MP_REACH_NLRI:
697 		case ATTR_MP_UNREACH_NLRI:
698 			break;
699 		/*
700 		 * dump all other path attributes. Following rules apply:
701 		 *  1. well-known attrs: ATTR_ATOMIC_AGGREGATE and
702 		 *     ATTR_AGGREGATOR pass unmodified (enforce flags
703 		 *     to correct values). Actually ATTR_AGGREGATOR may be
704 		 *     deflated for OLD 2-byte peers.
705 		 *  2. non-transitive attrs: don't re-announce to ebgp peers
706 		 *  3. transitive known attrs: announce unmodified
707 		 *  4. transitive unknown attrs: set partial bit and re-announce
708 		 */
709 		case ATTR_ATOMIC_AGGREGATE:
710 			if (oa == NULL || oa->type != type)
711 				break;
712 			if (attr_writebuf(buf, ATTR_WELL_KNOWN,
713 			    ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
714 				return -1;
715 			break;
716 		case ATTR_AGGREGATOR:
717 			if (oa == NULL || oa->type != type)
718 				break;
719 			if ((!(oa->flags & ATTR_TRANSITIVE)) &&
720 			    peer->conf.ebgp)
721 				break;
722 			if (!peer_has_as4byte(peer)) {
723 				/* need to deflate the aggregator */
724 				uint8_t		t[6];
725 				uint16_t	tas;
726 
727 				if ((!(oa->flags & ATTR_TRANSITIVE)) &&
728 				    peer->conf.ebgp)
729 					break;
730 
731 				memcpy(&tmp32, oa->data, sizeof(tmp32));
732 				if (ntohl(tmp32) > USHRT_MAX) {
733 					tas = htons(AS_TRANS);
734 					newaggr = oa;
735 				} else
736 					tas = htons(ntohl(tmp32));
737 
738 				memcpy(t, &tas, sizeof(tas));
739 				memcpy(t + sizeof(tas),
740 				    oa->data + sizeof(tmp32),
741 				    oa->len - sizeof(tmp32));
742 				if (attr_writebuf(buf, oa->flags,
743 				    oa->type, &t, sizeof(t)) == -1)
744 					return -1;
745 			} else {
746 				if (attr_writebuf(buf, oa->flags, oa->type,
747 				    oa->data, oa->len) == -1)
748 					return -1;
749 			}
750 			break;
751 		case ATTR_ORIGINATOR_ID:
752 		case ATTR_CLUSTER_LIST:
753 		case ATTR_OTC:
754 			if (oa == NULL || oa->type != type)
755 				break;
756 			if ((!(oa->flags & ATTR_TRANSITIVE)) &&
757 			    peer->conf.ebgp)
758 				break;
759 			if (attr_writebuf(buf, oa->flags, oa->type,
760 			    oa->data, oa->len) == -1)
761 				return -1;
762 			break;
763 		default:
764 			if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
765 				/* there is no attribute left to dump */
766 				return (0);
767 
768 			if (oa == NULL || oa->type != type)
769 				break;
770 			/* unknown attribute */
771 			if (!(oa->flags & ATTR_TRANSITIVE)) {
772 				/*
773 				 * RFC 1771:
774 				 * Unrecognized non-transitive optional
775 				 * attributes must be quietly ignored and
776 				 * not passed along to other BGP peers.
777 				 */
778 				break;
779 			}
780 			if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
781 			    oa->type, oa->data, oa->len) == -1)
782 				return -1;
783 		}
784 	}
785 	return 0;
786 }
787 
788 /*
789  * Check if the pending element is a EoR marker. If so remove it from the
790  * tree and return 1.
791  */
792 int
up_is_eor(struct rde_peer * peer,uint8_t aid)793 up_is_eor(struct rde_peer *peer, uint8_t aid)
794 {
795 	struct prefix *p;
796 
797 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
798 	if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
799 		/*
800 		 * Need to remove eor from update tree because
801 		 * prefix_adjout_destroy() can't handle that.
802 		 */
803 		RB_REMOVE(prefix_tree, &peer->updates[aid], p);
804 		p->flags &= ~PREFIX_FLAG_UPDATE;
805 		prefix_adjout_destroy(p);
806 		return 1;
807 	}
808 	return 0;
809 }
810 
811 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
812 #define MIN_UPDATE_LEN	16
813 
814 static void
up_prefix_free(struct prefix_tree * prefix_head,struct prefix * p,struct rde_peer * peer,int withdraw)815 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
816     struct rde_peer *peer, int withdraw)
817 {
818 	if (withdraw) {
819 		/* prefix no longer needed, remove it */
820 		prefix_adjout_destroy(p);
821 		peer->stats.prefix_sent_withdraw++;
822 	} else {
823 		/* prefix still in Adj-RIB-Out, keep it */
824 		RB_REMOVE(prefix_tree, prefix_head, p);
825 		p->flags &= ~PREFIX_FLAG_UPDATE;
826 		peer->stats.pending_update--;
827 		peer->stats.prefix_sent_update++;
828 	}
829 }
830 
831 /*
832  * Write prefixes to buffer until either there is no more space or
833  * the next prefix has no longer the same ASPATH attributes.
834  * Returns -1 if no prefix was written else 0.
835  */
836 static int
up_dump_prefix(struct ibuf * buf,struct prefix_tree * prefix_head,struct rde_peer * peer,int withdraw)837 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
838     struct rde_peer *peer, int withdraw)
839 {
840 	struct prefix	*p, *np;
841 	int		 done = 0, has_ap = -1, rv = -1;
842 
843 	RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
844 		if (has_ap == -1)
845 			has_ap = peer_has_add_path(peer, p->pt->aid,
846 			    CAPA_AP_SEND);
847 		if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
848 		    -1)
849 			break;
850 
851 		/* make sure we only dump prefixes which belong together */
852 		if (np == NULL ||
853 		    np->aspath != p->aspath ||
854 		    np->communities != p->communities ||
855 		    np->nexthop != p->nexthop ||
856 		    np->nhflags != p->nhflags ||
857 		    (np->flags & PREFIX_FLAG_EOR))
858 			done = 1;
859 
860 		rv = 0;
861 		up_prefix_free(prefix_head, p, peer, withdraw);
862 		if (done)
863 			break;
864 	}
865 	return rv;
866 }
867 
868 static int
up_generate_mp_reach(struct ibuf * buf,struct rde_peer * peer,struct nexthop * nh,uint8_t aid)869 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
870     struct nexthop *nh, uint8_t aid)
871 {
872 	struct bgpd_addr *nexthop;
873 	size_t off;
874 	uint16_t len, afi;
875 	uint8_t safi;
876 
877 	/* attribute header, defaulting to extended length one */
878 	if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
879 		return -1;
880 	if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
881 		return -1;
882 	off = ibuf_size(buf);
883 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
884 		return -1;
885 
886 	if (aid2afi(aid, &afi, &safi))
887 		fatalx("up_generate_mp_reach: bad AID");
888 
889 	/* AFI + SAFI + NH LEN + NH + Reserved */
890 	if (ibuf_add_n16(buf, afi) == -1)
891 		return -1;
892 	if (ibuf_add_n8(buf, safi) == -1)
893 		return -1;
894 
895 	switch (aid) {
896 	case AID_INET6:
897 		if (nh == NULL)
898 			return -1;
899 		/* NH LEN */
900 		if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1)
901 			return -1;
902 		/* write nexthop */
903 		nexthop = &nh->exit_nexthop;
904 		if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
905 			return -1;
906 		break;
907 	case AID_VPN_IPv4:
908 		if (nh == NULL)
909 			return -1;
910 		/* NH LEN */
911 		if (ibuf_add_n8(buf,
912 		    sizeof(uint64_t) + sizeof(struct in_addr)) == -1)
913 			return -1;
914 		/* write zero rd */
915 		if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
916 			return -1;
917 		/* write nexthop */
918 		nexthop = &nh->exit_nexthop;
919 		if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1)
920 			return -1;
921 		break;
922 	case AID_VPN_IPv6:
923 		if (nh == NULL)
924 			return -1;
925 		/* NH LEN */
926 		if (ibuf_add_n8(buf,
927 		    sizeof(uint64_t) + sizeof(struct in6_addr)) == -1)
928 			return -1;
929 		/* write zero rd */
930 		if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
931 			return -1;
932 		/* write nexthop */
933 		nexthop = &nh->exit_nexthop;
934 		if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1)
935 			return -1;
936 		break;
937 	case AID_FLOWSPECv4:
938 	case AID_FLOWSPECv6:
939 		if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */
940 			return -1;
941 		/* no NH */
942 		break;
943 	default:
944 		fatalx("up_generate_mp_reach: unknown AID");
945 	}
946 
947 	if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
948 		return -1;
949 
950 	if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
951 		/* no prefixes written, fail update  */
952 		return -1;
953 
954 	/* update MP_REACH attribute length field */
955 	len = ibuf_size(buf) - off - sizeof(len);
956 	if (ibuf_set_n16(buf, off, len) == -1)
957 		return -1;
958 
959 	return 0;
960 }
961 
962 /*
963  * Generate UPDATE message containing either just withdraws or updates.
964  * UPDATE messages are contructed like this:
965  *
966  *    +-----------------------------------------------------+
967  *    |   Withdrawn Routes Length (2 octets)                |
968  *    +-----------------------------------------------------+
969  *    |   Withdrawn Routes (variable)                       |
970  *    +-----------------------------------------------------+
971  *    |   Total Path Attribute Length (2 octets)            |
972  *    +-----------------------------------------------------+
973  *    |   Path Attributes (variable)                        |
974  *    +-----------------------------------------------------+
975  *    |   Network Layer Reachability Information (variable) |
976  *    +-----------------------------------------------------+
977  *
978  * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
979  * the latter will be the only path attribute in a message.
980  */
981 
982 /*
983  * Write UPDATE message for withdrawn routes. The size of buf limits
984  * how may routes can be added. Return 0 on success -1 on error which
985  * includes generating an empty withdraw message.
986  */
987 struct ibuf *
up_dump_withdraws(struct rde_peer * peer,uint8_t aid)988 up_dump_withdraws(struct rde_peer *peer, uint8_t aid)
989 {
990 	struct ibuf *buf;
991 	size_t off;
992 	uint16_t afi, len;
993 	uint8_t safi;
994 
995 	if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
996 		goto fail;
997 
998 	/* reserve space for the withdrawn routes length field */
999 	off = ibuf_size(buf);
1000 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1001 		goto fail;
1002 
1003 	if (aid != AID_INET) {
1004 		/* reserve space for 2-byte path attribute length */
1005 		off = ibuf_size(buf);
1006 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1007 			goto fail;
1008 
1009 		/* attribute header, defaulting to extended length one */
1010 		if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1011 			goto fail;
1012 		if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1013 			goto fail;
1014 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1015 			goto fail;
1016 
1017 		/* afi & safi */
1018 		if (aid2afi(aid, &afi, &safi))
1019 			fatalx("%s: bad AID", __func__);
1020 		if (ibuf_add_n16(buf, afi) == -1)
1021 			goto fail;
1022 		if (ibuf_add_n8(buf, safi) == -1)
1023 			goto fail;
1024 	}
1025 
1026 	if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
1027 		goto fail;
1028 
1029 	/* update length field (either withdrawn routes or attribute length) */
1030 	len = ibuf_size(buf) - off - sizeof(len);
1031 	if (ibuf_set_n16(buf, off, len) == -1)
1032 		goto fail;
1033 
1034 	if (aid != AID_INET) {
1035 		/* write MP_UNREACH_NLRI attribute length (always extended) */
1036 		len -= 4; /* skip attribute header */
1037 		if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1038 			goto fail;
1039 	} else {
1040 		/* no extra attributes so set attribute len to 0 */
1041 		if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1042 			goto fail;
1043 		}
1044 	}
1045 
1046 	return buf;
1047 
1048  fail:
1049 	/* something went horribly wrong */
1050 	log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1051 	ibuf_free(buf);
1052 	return NULL;
1053 }
1054 
1055 /*
1056  * Withdraw a single prefix after an error.
1057  */
1058 static struct ibuf *
up_dump_withdraw_one(struct rde_peer * peer,struct prefix * p,struct ibuf * buf)1059 up_dump_withdraw_one(struct rde_peer *peer, struct prefix *p, struct ibuf *buf)
1060 {
1061 	size_t off;
1062 	int has_ap;
1063 	uint16_t afi, len;
1064 	uint8_t safi;
1065 
1066 	/* reset the buffer and start fresh */
1067 	ibuf_truncate(buf, 0);
1068 
1069 	/* reserve space for the withdrawn routes length field */
1070 	off = ibuf_size(buf);
1071 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1072 		goto fail;
1073 
1074 	if (p->pt->aid != AID_INET) {
1075 		/* reserve space for 2-byte path attribute length */
1076 		off = ibuf_size(buf);
1077 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1078 			goto fail;
1079 
1080 		/* attribute header, defaulting to extended length one */
1081 		if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1082 			goto fail;
1083 		if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1084 			goto fail;
1085 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1086 			goto fail;
1087 
1088 		/* afi & safi */
1089 		if (aid2afi(p->pt->aid, &afi, &safi))
1090 			fatalx("%s: bad AID", __func__);
1091 		if (ibuf_add_n16(buf, afi) == -1)
1092 			goto fail;
1093 		if (ibuf_add_n8(buf, safi) == -1)
1094 			goto fail;
1095 	}
1096 
1097 	has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND);
1098 	if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1)
1099 		goto fail;
1100 
1101 	/* update length field (either withdrawn routes or attribute length) */
1102 	len = ibuf_size(buf) - off - sizeof(len);
1103 	if (ibuf_set_n16(buf, off, len) == -1)
1104 		goto fail;
1105 
1106 	if (p->pt->aid != AID_INET) {
1107 		/* write MP_UNREACH_NLRI attribute length (always extended) */
1108 		len -= 4; /* skip attribute header */
1109 		if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1110 			goto fail;
1111 	} else {
1112 		/* no extra attributes so set attribute len to 0 */
1113 		if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1114 			goto fail;
1115 		}
1116 	}
1117 
1118 	return buf;
1119 
1120  fail:
1121 	/* something went horribly wrong */
1122 	log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1123 	ibuf_free(buf);
1124 	return NULL;
1125 }
1126 
1127 /*
1128  * Write UPDATE message for changed and added routes. The size of buf limits
1129  * how may routes can be added. The function first dumps the path attributes
1130  * and then tries to add as many prefixes using these attributes.
1131  * Return 0 on success -1 on error which includes producing an empty message.
1132  */
1133 struct ibuf *
up_dump_update(struct rde_peer * peer,uint8_t aid)1134 up_dump_update(struct rde_peer *peer, uint8_t aid)
1135 {
1136 	struct ibuf *buf;
1137 	struct bgpd_addr addr;
1138 	struct prefix *p;
1139 	size_t off;
1140 	uint16_t len;
1141 
1142 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
1143 	if (p == NULL)
1144 		return NULL;
1145 
1146 	if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == NULL)
1147 		goto fail;
1148 
1149 	/* withdrawn routes length field is 0 */
1150 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1151 		goto fail;
1152 
1153 	/* reserve space for 2-byte path attribute length */
1154 	off = ibuf_size(buf);
1155 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1156 		goto fail;
1157 
1158 	if (up_generate_attr(buf, peer, prefix_aspath(p),
1159 	    prefix_communities(p), prefix_nexthop(p), aid) == -1)
1160 		goto drop;
1161 
1162 	if (aid != AID_INET) {
1163 		/* write mp attribute including nlri */
1164 
1165 		/*
1166 		 * RFC 7606 wants this to be first but then we need
1167 		 * to use multiple buffers with adjusted length to
1168 		 * merge the attributes together in reverse order of
1169 		 * creation.
1170 		 */
1171 		if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
1172 		    -1)
1173 			goto drop;
1174 	}
1175 
1176 	/* update attribute length field */
1177 	len = ibuf_size(buf) - off - sizeof(len);
1178 	if (ibuf_set_n16(buf, off, len) == -1)
1179 		goto fail;
1180 
1181 	if (aid == AID_INET) {
1182 		/* last but not least dump the IPv4 nlri */
1183 		if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
1184 			goto drop;
1185 	}
1186 
1187 	return buf;
1188 
1189  drop:
1190 	/* Not enough space. Drop current prefix, it will never fit. */
1191 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
1192 	pt_getaddr(p->pt, &addr);
1193 	log_peer_warnx(&peer->conf, "generating update failed, "
1194 	    "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
1195 
1196 	up_prefix_free(&peer->updates[aid], p, peer, 0);
1197 	return up_dump_withdraw_one(peer, p, buf);
1198 
1199  fail:
1200 	/* something went horribly wrong */
1201 	log_peer_warn(&peer->conf, "generating update failed, peer desynced");
1202 	ibuf_free(buf);
1203 	return NULL;
1204 }
1205