xref: /openbsd/usr.sbin/bgpd/rde_update.c (revision 927cc6da)
1 /*	$OpenBSD: rde_update.c,v 1.174 2025/01/13 13:50:34 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 
27 #include "bgpd.h"
28 #include "session.h"
29 #include "rde.h"
30 #include "log.h"
31 
32 enum up_state {
33 	UP_OK,
34 	UP_ERR_LIMIT,
35 	UP_FILTERED,
36 	UP_EXCLUDED,
37 };
38 
39 static struct community	comm_no_advertise = {
40 	.flags = COMMUNITY_TYPE_BASIC,
41 	.data1 = COMMUNITY_WELLKNOWN,
42 	.data2 = COMMUNITY_NO_ADVERTISE
43 };
44 static struct community	comm_no_export = {
45 	.flags = COMMUNITY_TYPE_BASIC,
46 	.data1 = COMMUNITY_WELLKNOWN,
47 	.data2 = COMMUNITY_NO_EXPORT
48 };
49 static struct community	comm_no_expsubconfed = {
50 	.flags = COMMUNITY_TYPE_BASIC,
51 	.data1 = COMMUNITY_WELLKNOWN,
52 	.data2 = COMMUNITY_NO_EXPSUBCONFED
53 };
54 
55 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t);
56 
57 static int
up_test_update(struct rde_peer * peer,struct prefix * p)58 up_test_update(struct rde_peer *peer, struct prefix *p)
59 {
60 	struct rde_aspath	*asp;
61 	struct rde_community	*comm;
62 	struct rde_peer		*frompeer;
63 
64 	frompeer = prefix_peer(p);
65 	asp = prefix_aspath(p);
66 	comm = prefix_communities(p);
67 
68 	if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
69 		fatalx("try to send out a botched path");
70 	if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK))
71 		fatalx("try to send out a looped path");
72 
73 	if (peer == frompeer)
74 		/* Do not send routes back to sender */
75 		return (0);
76 
77 	if (!frompeer->conf.ebgp && !peer->conf.ebgp) {
78 		/*
79 		 * route reflector redistribution rules:
80 		 * 1. if announce is set                -> announce
81 		 * 2. from non-client, to non-client    -> no
82 		 * 3. from client, to non-client        -> yes
83 		 * 4. from non-client, to client        -> yes
84 		 * 5. from client, to client            -> yes
85 		 */
86 		if (frompeer->conf.reflector_client == 0 &&
87 		    peer->conf.reflector_client == 0 &&
88 		    (asp->flags & F_PREFIX_ANNOUNCED) == 0)
89 			/* Do not redistribute updates to ibgp peers */
90 			return (0);
91 	}
92 
93 	/*
94 	 * With "transparent-as yes" set do not filter based on
95 	 * well-known communities. Instead pass them on to the client.
96 	 */
97 	if (peer->flags & PEERFLAG_TRANS_AS)
98 		return (1);
99 
100 	/* well-known communities */
101 	if (community_match(comm, &comm_no_advertise, NULL))
102 		return (0);
103 	if (peer->conf.ebgp) {
104 		if (community_match(comm, &comm_no_export, NULL))
105 			return (0);
106 		if (community_match(comm, &comm_no_expsubconfed, NULL))
107 			return (0);
108 	}
109 
110 	return (1);
111 }
112 
113 /* RFC9234 open policy handling */
114 static int
up_enforce_open_policy(struct rde_peer * peer,struct filterstate * state,uint8_t aid)115 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state,
116     uint8_t aid)
117 {
118 	/* only for IPv4 and IPv6 unicast */
119 	if (aid != AID_INET && aid != AID_INET6)
120 		return 0;
121 
122 	/*
123 	 * do not propagate (consider it filtered) if OTC is present and
124 	 * local role is peer, customer or rs-client.
125 	 */
126 	if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER ||
127 	    peer->role == ROLE_RS_CLIENT)
128 		if (state->aspath.flags & F_ATTR_OTC)
129 			return 1;
130 
131 	/*
132 	 * add OTC attribute if not present towards peers, customers and
133 	 * rs-clients (local roles peer, provider, rs).
134 	 */
135 	if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER ||
136 	    peer->role == ROLE_RS)
137 		if ((state->aspath.flags & F_ATTR_OTC) == 0) {
138 			uint32_t tmp;
139 
140 			tmp = htonl(peer->conf.local_as);
141 			if (attr_optadd(&state->aspath,
142 			    ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC,
143 			    &tmp, sizeof(tmp)) == -1)
144 				log_peer_warnx(&peer->conf,
145 				    "failed to add OTC attribute");
146 			state->aspath.flags |= F_ATTR_OTC;
147 		}
148 
149 	return 0;
150 }
151 
152 /*
153  * Process a single prefix by passing it through the various filter stages
154  * and if not filtered out update the Adj-RIB-Out. Returns:
155  * - UP_OK if prefix was added
156  * - UP_ERR_LIMIT if the peer outbound prefix limit was reached
157  * - UP_FILTERED if prefix was filtered out
158  * - UP_EXCLUDED if prefix was excluded because of up_test_update()
159  */
160 static enum up_state
up_process_prefix(struct rde_peer * peer,struct prefix * new,struct prefix * p)161 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p)
162 {
163 	struct filterstate state;
164 	struct bgpd_addr addr;
165 	int excluded = 0;
166 
167 	/*
168 	 * up_test_update() needs to run before the output filters
169 	 * else the well-known communities won't work properly.
170 	 * The output filters would not be able to add well-known
171 	 * communities.
172 	 */
173 	if (!up_test_update(peer, new))
174 		excluded = 1;
175 
176 	rde_filterstate_prep(&state, new);
177 	pt_getaddr(new->pt, &addr);
178 	if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr,
179 	    new->pt->prefixlen, &state) == ACTION_DENY) {
180 		rde_filterstate_clean(&state);
181 		return UP_FILTERED;
182 	}
183 
184 	/* Open Policy Check: acts like an output filter */
185 	if (up_enforce_open_policy(peer, &state, new->pt->aid)) {
186 		rde_filterstate_clean(&state);
187 		return UP_FILTERED;
188 	}
189 
190 	if (excluded) {
191 		rde_filterstate_clean(&state);
192 		return UP_EXCLUDED;
193 	}
194 
195 	/* from here on we know this is an update */
196 	if (p == (void *)-1)
197 		p = prefix_adjout_get(peer, new->path_id_tx, new->pt);
198 
199 	up_prep_adjout(peer, &state, new->pt->aid);
200 	prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx);
201 	rde_filterstate_clean(&state);
202 
203 	/* max prefix checker outbound */
204 	if (peer->conf.max_out_prefix &&
205 	    peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
206 		log_peer_warnx(&peer->conf,
207 		    "outbound prefix limit reached (>%u/%u)",
208 		    peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
209 		rde_update_err(peer, ERR_CEASE,
210 		    ERR_CEASE_MAX_SENT_PREFIX, NULL);
211 		return UP_ERR_LIMIT;
212 	}
213 
214 	return UP_OK;
215 }
216 
217 void
up_generate_updates(struct rde_peer * peer,struct rib_entry * re)218 up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
219 {
220 	struct prefix		*new, *p;
221 
222 	p = prefix_adjout_first(peer, re->prefix);
223 
224 	new = prefix_best(re);
225 	while (new != NULL) {
226 		switch (up_process_prefix(peer, new, p)) {
227 		case UP_OK:
228 		case UP_ERR_LIMIT:
229 			return;
230 		case UP_FILTERED:
231 			if (peer->flags & PEERFLAG_EVALUATE_ALL) {
232 				new = TAILQ_NEXT(new, entry.list.rib);
233 				if (new != NULL && prefix_eligible(new))
234 					continue;
235 			}
236 			goto done;
237 		case UP_EXCLUDED:
238 			goto done;
239 		}
240 	}
241 
242 done:
243 	/* withdraw prefix */
244 	if (p != NULL)
245 		prefix_adjout_withdraw(p);
246 }
247 
248 /*
249  * Generate updates for the add-path send case. Depending on the
250  * peer eval settings prefixes are selected and distributed.
251  * This highly depends on the Adj-RIB-Out to handle prefixes with no
252  * changes gracefully. It may be possible to improve the API so that
253  * less churn is needed.
254  */
255 void
up_generate_addpath(struct rde_peer * peer,struct rib_entry * re)256 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
257 {
258 	struct prefix		*head, *new, *p;
259 	int			maxpaths = 0, extrapaths = 0, extra;
260 	int			checkmode = 1;
261 
262 	head = prefix_adjout_first(peer, re->prefix);
263 
264 	/* mark all paths as stale */
265 	for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
266 		p->flags |= PREFIX_FLAG_STALE;
267 
268 	/* update paths */
269 	new = prefix_best(re);
270 	while (new != NULL) {
271 		/* check limits and stop when a limit is reached */
272 		if (peer->eval.maxpaths != 0 &&
273 		    maxpaths >= peer->eval.maxpaths)
274 			break;
275 		if (peer->eval.extrapaths != 0 &&
276 		    extrapaths >= peer->eval.extrapaths)
277 			break;
278 
279 		extra = 1;
280 		if (checkmode) {
281 			switch (peer->eval.mode) {
282 			case ADDPATH_EVAL_BEST:
283 				if (new->dmetric == PREFIX_DMETRIC_BEST)
284 					extra = 0;
285 				else
286 					checkmode = 0;
287 				break;
288 			case ADDPATH_EVAL_ECMP:
289 				if (new->dmetric == PREFIX_DMETRIC_BEST ||
290 				    new->dmetric == PREFIX_DMETRIC_ECMP)
291 					extra = 0;
292 				else
293 					checkmode = 0;
294 				break;
295 			case ADDPATH_EVAL_AS_WIDE:
296 				if (new->dmetric == PREFIX_DMETRIC_BEST ||
297 				    new->dmetric == PREFIX_DMETRIC_ECMP ||
298 				    new->dmetric == PREFIX_DMETRIC_AS_WIDE)
299 					extra = 0;
300 				else
301 					checkmode = 0;
302 				break;
303 			case ADDPATH_EVAL_ALL:
304 				/* nothing to check */
305 				checkmode = 0;
306 				break;
307 			default:
308 				fatalx("unknown add-path eval mode");
309 			}
310 		}
311 
312 		switch (up_process_prefix(peer, new, (void *)-1)) {
313 		case UP_OK:
314 			maxpaths++;
315 			extrapaths += extra;
316 			break;
317 		case UP_FILTERED:
318 		case UP_EXCLUDED:
319 			break;
320 		case UP_ERR_LIMIT:
321 			/* just give up */
322 			return;
323 		}
324 
325 		/* only allow valid prefixes */
326 		new = TAILQ_NEXT(new, entry.list.rib);
327 		if (new == NULL || !prefix_eligible(new))
328 			break;
329 	}
330 
331 	/* withdraw stale paths */
332 	for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
333 		if (p->flags & PREFIX_FLAG_STALE)
334 			prefix_adjout_withdraw(p);
335 	}
336 }
337 
338 /*
339  * Generate updates for the add-path send all case. Since all prefixes
340  * are distributed just remove old and add new.
341  */
342 void
up_generate_addpath_all(struct rde_peer * peer,struct rib_entry * re,struct prefix * new,struct prefix * old)343 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
344     struct prefix *new, struct prefix *old)
345 {
346 	struct prefix		*p, *head = NULL;
347 	int			all = 0;
348 
349 	/*
350 	 * if old and new are NULL then insert all prefixes from best,
351 	 * clearing old routes in the process
352 	 */
353 	if (old == NULL && new == NULL) {
354 		/* mark all paths as stale */
355 		head = prefix_adjout_first(peer, re->prefix);
356 		for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
357 			p->flags |= PREFIX_FLAG_STALE;
358 
359 		new = prefix_best(re);
360 		all = 1;
361 	}
362 
363 	if (new != NULL && !prefix_eligible(new)) {
364 		/* only allow valid prefixes */
365 		new = NULL;
366 	}
367 
368 	if (old != NULL) {
369 		/* withdraw stale paths */
370 		p = prefix_adjout_get(peer, old->path_id_tx, old->pt);
371 		if (p != NULL)
372 			prefix_adjout_withdraw(p);
373 	}
374 
375 	/* add new path (or multiple if all is set) */
376 	while (new != NULL) {
377 		switch (up_process_prefix(peer, new, (void *)-1)) {
378 		case UP_OK:
379 		case UP_FILTERED:
380 		case UP_EXCLUDED:
381 			break;
382 		case UP_ERR_LIMIT:
383 			/* just give up */
384 			return;
385 		}
386 
387 		if (!all)
388 			break;
389 
390 		/* only allow valid prefixes */
391 		new = TAILQ_NEXT(new, entry.list.rib);
392 		if (new == NULL || !prefix_eligible(new))
393 			break;
394 	}
395 
396 	if (all) {
397 		/* withdraw stale paths */
398 		for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
399 			if (p->flags & PREFIX_FLAG_STALE)
400 				prefix_adjout_withdraw(p);
401 		}
402 	}
403 }
404 
405 /* send a default route to the specified peer */
406 void
up_generate_default(struct rde_peer * peer,uint8_t aid)407 up_generate_default(struct rde_peer *peer, uint8_t aid)
408 {
409 	extern struct rde_peer	*peerself;
410 	struct filterstate	 state;
411 	struct rde_aspath	*asp;
412 	struct prefix		*p;
413 	struct pt_entry		*pte;
414 	struct bgpd_addr	 addr;
415 
416 	if (peer->capa.mp[aid] == 0)
417 		return;
418 
419 	rde_filterstate_init(&state);
420 	asp = &state.aspath;
421 	asp->aspath = aspath_get(NULL, 0);
422 	asp->origin = ORIGIN_IGP;
423 	rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN);
424 	/* the other default values are OK, nexthop is once again NULL */
425 
426 	/*
427 	 * XXX apply default overrides. Not yet possible, mainly a parse.y
428 	 * problem.
429 	 */
430 	/* rde_apply_set(asp, peerself, peerself, set, af); */
431 
432 	memset(&addr, 0, sizeof(addr));
433 	addr.aid = aid;
434 	p = prefix_adjout_lookup(peer, &addr, 0);
435 
436 	/* outbound filter as usual */
437 	if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
438 	    ACTION_DENY) {
439 		rde_filterstate_clean(&state);
440 		return;
441 	}
442 
443 	up_prep_adjout(peer, &state, addr.aid);
444 	/* can't use pt_fill here since prefix_adjout_update keeps a ref */
445 	pte = pt_get(&addr, 0);
446 	if (pte == NULL)
447 		pte = pt_add(&addr, 0);
448 	prefix_adjout_update(p, peer, &state, pte, 0);
449 	rde_filterstate_clean(&state);
450 
451 	/* max prefix checker outbound */
452 	if (peer->conf.max_out_prefix &&
453 	    peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
454 		log_peer_warnx(&peer->conf,
455 		    "outbound prefix limit reached (>%u/%u)",
456 		    peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
457 		rde_update_err(peer, ERR_CEASE,
458 		    ERR_CEASE_MAX_SENT_PREFIX, NULL);
459 	}
460 }
461 
462 static struct bgpd_addr *
up_get_nexthop(struct rde_peer * peer,struct filterstate * state,uint8_t aid)463 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
464 {
465 	struct bgpd_addr *peer_local = NULL;
466 
467 	switch (aid) {
468 	case AID_INET:
469 	case AID_VPN_IPv4:
470 		if (peer_has_ext_nexthop(peer, aid) &&
471 		    peer->remote_addr.aid == AID_INET6)
472 			peer_local = &peer->local_v6_addr;
473 		else if (peer->local_v4_addr.aid == AID_INET)
474 			peer_local = &peer->local_v4_addr;
475 		break;
476 	case AID_INET6:
477 	case AID_VPN_IPv6:
478 		if (peer->local_v6_addr.aid == AID_INET6)
479 			peer_local = &peer->local_v6_addr;
480 		break;
481 	case AID_FLOWSPECv4:
482 	case AID_FLOWSPECv6:
483 		/* flowspec has no nexthop */
484 		return (NULL);
485 	default:
486 		fatalx("%s, bad AID %s", __func__, aid2str(aid));
487 	}
488 
489 	if (state->nhflags & NEXTHOP_SELF) {
490 		/*
491 		 * Forcing the nexthop to self is always possible
492 		 * and has precedence over other flags.
493 		 */
494 		return (peer_local);
495 	} else if (!peer->conf.ebgp) {
496 		/*
497 		 * in the ibgp case the nexthop is normally not
498 		 * modified unless it points at the peer itself.
499 		 */
500 		if (state->nexthop == NULL) {
501 			/* announced networks without explicit nexthop set */
502 			return (peer_local);
503 		}
504 		/*
505 		 * per RFC: if remote peer address is equal to the nexthop set
506 		 * the nexthop to our local address. This reduces the risk of
507 		 * routing loops. This overrides NEXTHOP_NOMODIFY.
508 		 */
509 		if (memcmp(&state->nexthop->exit_nexthop,
510 		    &peer->remote_addr, sizeof(peer->remote_addr)) == 0) {
511 			return (peer_local);
512 		}
513 		return (&state->nexthop->exit_nexthop);
514 	} else if (peer->conf.distance == 1) {
515 		/*
516 		 * In the ebgp directly connected case never send
517 		 * out a nexthop that is outside of the connected
518 		 * network of the peer. No matter what flags are
519 		 * set. This follows section 5.1.3 of RFC 4271.
520 		 * So just check if the nexthop is in the same net
521 		 * is enough here.
522 		 */
523 		if (state->nexthop != NULL &&
524 		    state->nexthop->flags & NEXTHOP_CONNECTED &&
525 		    prefix_compare(&peer->remote_addr,
526 		    &state->nexthop->nexthop_net,
527 		    state->nexthop->nexthop_netlen) == 0) {
528 			/* nexthop and peer are in the same net */
529 			return (&state->nexthop->exit_nexthop);
530 		}
531 		return (peer_local);
532 	} else {
533 		/*
534 		 * For ebgp multihop make it possible to overrule
535 		 * the sent nexthop by setting NEXTHOP_NOMODIFY.
536 		 * Similar to the ibgp case there is no same net check
537 		 * needed but still ensure that the nexthop is not
538 		 * pointing to the peer itself.
539 		 */
540 		if (state->nhflags & NEXTHOP_NOMODIFY &&
541 		    state->nexthop != NULL &&
542 		    memcmp(&state->nexthop->exit_nexthop,
543 		    &peer->remote_addr, sizeof(peer->remote_addr)) != 0) {
544 			/* no modify flag set and nexthop not peer addr */
545 			return (&state->nexthop->exit_nexthop);
546 		}
547 		return (peer_local);
548 	}
549 }
550 
551 static void
up_prep_adjout(struct rde_peer * peer,struct filterstate * state,uint8_t aid)552 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
553 {
554 	struct bgpd_addr *nexthop;
555 	struct nexthop *nh = NULL;
556 	u_char *np;
557 	uint16_t nl;
558 
559 	/* prepend local AS number for eBGP sessions. */
560 	if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) {
561 		uint32_t prep_as = peer->conf.local_as;
562 		np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl);
563 		aspath_put(state->aspath.aspath);
564 		state->aspath.aspath = aspath_get(np, nl);
565 		free(np);
566 	}
567 
568 	/* update nexthop */
569 	nexthop = up_get_nexthop(peer, state, aid);
570 	if (nexthop != NULL)
571 		nh = nexthop_get(nexthop);
572 	nexthop_unref(state->nexthop);
573 	state->nexthop = nh;
574 	state->nhflags = 0;
575 }
576 
577 
578 static int
up_generate_attr(struct ibuf * buf,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nh,uint8_t aid)579 up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
580     struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
581     uint8_t aid)
582 {
583 	struct attr	*oa = NULL, *newaggr = NULL;
584 	u_char		*pdata;
585 	uint32_t	 tmp32;
586 	int		 flags, neednewpath = 0, rv;
587 	uint16_t	 plen;
588 	uint8_t		 oalen = 0, type;
589 
590 	if (asp->others_len > 0)
591 		oa = asp->others[oalen++];
592 
593 	/* dump attributes in ascending order */
594 	for (type = ATTR_ORIGIN; type < 255; type++) {
595 		while (oa && oa->type < type) {
596 			if (oalen < asp->others_len)
597 				oa = asp->others[oalen++];
598 			else
599 				oa = NULL;
600 		}
601 
602 		switch (type) {
603 		/*
604 		 * Attributes stored in rde_aspath
605 		 */
606 		case ATTR_ORIGIN:
607 			if (attr_writebuf(buf, ATTR_WELL_KNOWN,
608 			    ATTR_ORIGIN, &asp->origin, 1) == -1)
609 				return -1;
610 			break;
611 		case ATTR_ASPATH:
612 			plen = aspath_length(asp->aspath);
613 			pdata = aspath_dump(asp->aspath);
614 
615 			if (!peer_has_as4byte(peer))
616 				pdata = aspath_deflate(pdata, &plen,
617 				    &neednewpath);
618 			rv = attr_writebuf(buf, ATTR_WELL_KNOWN,
619 			    ATTR_ASPATH, pdata, plen);
620 			if (!peer_has_as4byte(peer))
621 				free(pdata);
622 
623 			if (rv == -1)
624 				return -1;
625 			break;
626 		case ATTR_NEXTHOP:
627 			switch (aid) {
628 			case AID_INET:
629 				if (nh == NULL)
630 					return -1;
631 				if (nh->exit_nexthop.aid != AID_INET) {
632 					if (peer_has_ext_nexthop(peer, aid))
633 						break;
634 					return -1;
635 				}
636 				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
637 				    ATTR_NEXTHOP, &nh->exit_nexthop.v4,
638 				    sizeof(nh->exit_nexthop.v4)) == -1)
639 					return -1;
640 				break;
641 			default:
642 				break;
643 			}
644 			break;
645 		case ATTR_MED:
646 			/*
647 			 * The old MED from other peers MUST not be announced
648 			 * to others unless the MED is originating from us or
649 			 * the peer is an IBGP one. Only exception are routers
650 			 * with "transparent-as yes" set.
651 			 */
652 			if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp ||
653 			    asp->flags & F_ATTR_MED_ANNOUNCE ||
654 			    peer->flags & PEERFLAG_TRANS_AS)) {
655 				tmp32 = htonl(asp->med);
656 				if (attr_writebuf(buf, ATTR_OPTIONAL,
657 				    ATTR_MED, &tmp32, 4) == -1)
658 					return -1;
659 			}
660 			break;
661 		case ATTR_LOCALPREF:
662 			if (!peer->conf.ebgp) {
663 				/* local preference, only valid for ibgp */
664 				tmp32 = htonl(asp->lpref);
665 				if (attr_writebuf(buf, ATTR_WELL_KNOWN,
666 				    ATTR_LOCALPREF, &tmp32, 4) == -1)
667 					return -1;
668 			}
669 			break;
670 		/*
671 		 * Communities are stored in struct rde_community
672 		 */
673 		case ATTR_COMMUNITIES:
674 		case ATTR_EXT_COMMUNITIES:
675 		case ATTR_LARGE_COMMUNITIES:
676 			if (community_writebuf(comm, type, peer->conf.ebgp,
677 			    buf) == -1)
678 				return -1;
679 			break;
680 		/*
681 		 * NEW to OLD conversion when sending stuff to a 2byte AS peer
682 		 */
683 		case ATTR_AS4_PATH:
684 			if (neednewpath) {
685 				plen = aspath_length(asp->aspath);
686 				pdata = aspath_dump(asp->aspath);
687 
688 				flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
689 				if (!(asp->flags & F_PREFIX_ANNOUNCED))
690 					flags |= ATTR_PARTIAL;
691 				if (plen != 0)
692 					if (attr_writebuf(buf, flags,
693 					    ATTR_AS4_PATH, pdata, plen) == -1)
694 						return -1;
695 			}
696 			break;
697 		case ATTR_AS4_AGGREGATOR:
698 			if (newaggr) {
699 				flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
700 				if (!(asp->flags & F_PREFIX_ANNOUNCED))
701 					flags |= ATTR_PARTIAL;
702 				if (attr_writebuf(buf, flags,
703 				    ATTR_AS4_AGGREGATOR, newaggr->data,
704 				    newaggr->len) == -1)
705 					return -1;
706 			}
707 			break;
708 		/*
709 		 * multiprotocol attributes are handled elsewhere
710 		 */
711 		case ATTR_MP_REACH_NLRI:
712 		case ATTR_MP_UNREACH_NLRI:
713 			break;
714 		/*
715 		 * dump all other path attributes. Following rules apply:
716 		 *  1. well-known attrs: ATTR_ATOMIC_AGGREGATE and
717 		 *     ATTR_AGGREGATOR pass unmodified (enforce flags
718 		 *     to correct values). Actually ATTR_AGGREGATOR may be
719 		 *     deflated for OLD 2-byte peers.
720 		 *  2. non-transitive attrs: don't re-announce to ebgp peers
721 		 *  3. transitive known attrs: announce unmodified
722 		 *  4. transitive unknown attrs: set partial bit and re-announce
723 		 */
724 		case ATTR_ATOMIC_AGGREGATE:
725 			if (oa == NULL || oa->type != type)
726 				break;
727 			if (attr_writebuf(buf, ATTR_WELL_KNOWN,
728 			    ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
729 				return -1;
730 			break;
731 		case ATTR_AGGREGATOR:
732 			if (oa == NULL || oa->type != type)
733 				break;
734 			if ((!(oa->flags & ATTR_TRANSITIVE)) &&
735 			    peer->conf.ebgp)
736 				break;
737 			if (!peer_has_as4byte(peer)) {
738 				/* need to deflate the aggregator */
739 				uint8_t		t[6];
740 				uint16_t	tas;
741 
742 				if ((!(oa->flags & ATTR_TRANSITIVE)) &&
743 				    peer->conf.ebgp)
744 					break;
745 
746 				memcpy(&tmp32, oa->data, sizeof(tmp32));
747 				if (ntohl(tmp32) > USHRT_MAX) {
748 					tas = htons(AS_TRANS);
749 					newaggr = oa;
750 				} else
751 					tas = htons(ntohl(tmp32));
752 
753 				memcpy(t, &tas, sizeof(tas));
754 				memcpy(t + sizeof(tas),
755 				    oa->data + sizeof(tmp32),
756 				    oa->len - sizeof(tmp32));
757 				if (attr_writebuf(buf, oa->flags,
758 				    oa->type, &t, sizeof(t)) == -1)
759 					return -1;
760 			} else {
761 				if (attr_writebuf(buf, oa->flags, oa->type,
762 				    oa->data, oa->len) == -1)
763 					return -1;
764 			}
765 			break;
766 		case ATTR_ORIGINATOR_ID:
767 		case ATTR_CLUSTER_LIST:
768 		case ATTR_OTC:
769 			if (oa == NULL || oa->type != type)
770 				break;
771 			if ((!(oa->flags & ATTR_TRANSITIVE)) &&
772 			    peer->conf.ebgp)
773 				break;
774 			if (attr_writebuf(buf, oa->flags, oa->type,
775 			    oa->data, oa->len) == -1)
776 				return -1;
777 			break;
778 		default:
779 			if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
780 				/* there is no attribute left to dump */
781 				return (0);
782 
783 			if (oa == NULL || oa->type != type)
784 				break;
785 			/* unknown attribute */
786 			if (!(oa->flags & ATTR_TRANSITIVE)) {
787 				/*
788 				 * RFC 1771:
789 				 * Unrecognized non-transitive optional
790 				 * attributes must be quietly ignored and
791 				 * not passed along to other BGP peers.
792 				 */
793 				break;
794 			}
795 			if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
796 			    oa->type, oa->data, oa->len) == -1)
797 				return -1;
798 		}
799 	}
800 	return 0;
801 }
802 
803 /*
804  * Check if the pending element is a EoR marker. If so remove it from the
805  * tree and return 1.
806  */
807 int
up_is_eor(struct rde_peer * peer,uint8_t aid)808 up_is_eor(struct rde_peer *peer, uint8_t aid)
809 {
810 	struct prefix *p;
811 
812 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
813 	if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
814 		/*
815 		 * Need to remove eor from update tree because
816 		 * prefix_adjout_destroy() can't handle that.
817 		 */
818 		RB_REMOVE(prefix_tree, &peer->updates[aid], p);
819 		p->flags &= ~PREFIX_FLAG_UPDATE;
820 		prefix_adjout_destroy(p);
821 		return 1;
822 	}
823 	return 0;
824 }
825 
826 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
827 #define MIN_UPDATE_LEN	16
828 
829 static void
up_prefix_free(struct prefix_tree * prefix_head,struct prefix * p,struct rde_peer * peer,int withdraw)830 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
831     struct rde_peer *peer, int withdraw)
832 {
833 	if (withdraw) {
834 		/* prefix no longer needed, remove it */
835 		prefix_adjout_destroy(p);
836 		peer->stats.prefix_sent_withdraw++;
837 	} else {
838 		/* prefix still in Adj-RIB-Out, keep it */
839 		RB_REMOVE(prefix_tree, prefix_head, p);
840 		p->flags &= ~PREFIX_FLAG_UPDATE;
841 		peer->stats.pending_update--;
842 		peer->stats.prefix_sent_update++;
843 	}
844 }
845 
846 /*
847  * Write prefixes to buffer until either there is no more space or
848  * the next prefix has no longer the same ASPATH attributes.
849  * Returns -1 if no prefix was written else 0.
850  */
851 static int
up_dump_prefix(struct ibuf * buf,struct prefix_tree * prefix_head,struct rde_peer * peer,int withdraw)852 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
853     struct rde_peer *peer, int withdraw)
854 {
855 	struct prefix	*p, *np;
856 	int		 done = 0, has_ap = -1, rv = -1;
857 
858 	RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
859 		if (has_ap == -1)
860 			has_ap = peer_has_add_path(peer, p->pt->aid,
861 			    CAPA_AP_SEND);
862 		if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
863 		    -1)
864 			break;
865 
866 		/* make sure we only dump prefixes which belong together */
867 		if (np == NULL ||
868 		    np->aspath != p->aspath ||
869 		    np->communities != p->communities ||
870 		    np->nexthop != p->nexthop ||
871 		    np->nhflags != p->nhflags ||
872 		    (np->flags & PREFIX_FLAG_EOR))
873 			done = 1;
874 
875 		rv = 0;
876 		up_prefix_free(prefix_head, p, peer, withdraw);
877 		if (done)
878 			break;
879 	}
880 	return rv;
881 }
882 
883 static int
up_generate_mp_reach(struct ibuf * buf,struct rde_peer * peer,struct nexthop * nh,uint8_t aid)884 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
885     struct nexthop *nh, uint8_t aid)
886 {
887 	struct bgpd_addr *nexthop;
888 	size_t off, nhoff;
889 	uint16_t len, afi;
890 	uint8_t safi;
891 
892 	/* attribute header, defaulting to extended length one */
893 	if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
894 		return -1;
895 	if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
896 		return -1;
897 	off = ibuf_size(buf);
898 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
899 		return -1;
900 
901 	if (aid2afi(aid, &afi, &safi))
902 		fatalx("up_generate_mp_reach: bad AID");
903 
904 	/* AFI + SAFI + NH LEN + NH + Reserved */
905 	if (ibuf_add_n16(buf, afi) == -1)
906 		return -1;
907 	if (ibuf_add_n8(buf, safi) == -1)
908 		return -1;
909 	nhoff = ibuf_size(buf);
910 	if (ibuf_add_zero(buf, 1) == -1)
911 		return -1;
912 
913 	if (aid == AID_VPN_IPv4 || aid == AID_VPN_IPv6) {
914 		/* write zero rd */
915 		if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
916 			return -1;
917 	}
918 
919 	switch (aid) {
920 	case AID_INET:
921 	case AID_VPN_IPv4:
922 		if (nh == NULL)
923 			return -1;
924 		nexthop = &nh->exit_nexthop;
925 		/* AID_INET must only use this path with an IPv6 nexthop */
926 		if (nexthop->aid == AID_INET && aid != AID_INET) {
927 			if (ibuf_add(buf, &nexthop->v4,
928 			    sizeof(nexthop->v4)) == -1)
929 				return -1;
930 			break;
931 		} else if (nexthop->aid == AID_INET6 &&
932 		    peer_has_ext_nexthop(peer, aid)) {
933 			if (ibuf_add(buf, &nexthop->v6,
934 			    sizeof(nexthop->v6)) == -1)
935 				return -1;
936 		} else {
937 			/* can't encode nexthop, give up and withdraw prefix */
938 			return -1;
939 		}
940 		break;
941 	case AID_INET6:
942 	case AID_VPN_IPv6:
943 		if (nh == NULL)
944 			return -1;
945 		nexthop = &nh->exit_nexthop;
946 		if (ibuf_add(buf, &nexthop->v6, sizeof(nexthop->v6)) == -1)
947 			return -1;
948 		break;
949 	case AID_FLOWSPECv4:
950 	case AID_FLOWSPECv6:
951 		/* no NH */
952 		break;
953 	default:
954 		fatalx("up_generate_mp_reach: unknown AID");
955 	}
956 
957 	/* update nexthop len */
958 	len = ibuf_size(buf) - nhoff - 1;
959 	if (ibuf_set_n8(buf, nhoff, len) == -1)
960 		return -1;
961 
962 	if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
963 		return -1;
964 
965 	if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
966 		/* no prefixes written, fail update  */
967 		return -1;
968 
969 	/* update MP_REACH attribute length field */
970 	len = ibuf_size(buf) - off - sizeof(len);
971 	if (ibuf_set_n16(buf, off, len) == -1)
972 		return -1;
973 
974 	return 0;
975 }
976 
977 /*
978  * Generate UPDATE message containing either just withdraws or updates.
979  * UPDATE messages are contructed like this:
980  *
981  *    +-----------------------------------------------------+
982  *    |   Withdrawn Routes Length (2 octets)                |
983  *    +-----------------------------------------------------+
984  *    |   Withdrawn Routes (variable)                       |
985  *    +-----------------------------------------------------+
986  *    |   Total Path Attribute Length (2 octets)            |
987  *    +-----------------------------------------------------+
988  *    |   Path Attributes (variable)                        |
989  *    +-----------------------------------------------------+
990  *    |   Network Layer Reachability Information (variable) |
991  *    +-----------------------------------------------------+
992  *
993  * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
994  * the latter will be the only path attribute in a message.
995  */
996 
997 /*
998  * Write UPDATE message for withdrawn routes. The size of buf limits
999  * how may routes can be added. Return 0 on success -1 on error which
1000  * includes generating an empty withdraw message.
1001  */
1002 struct ibuf *
up_dump_withdraws(struct rde_peer * peer,uint8_t aid)1003 up_dump_withdraws(struct rde_peer *peer, uint8_t aid)
1004 {
1005 	struct ibuf *buf;
1006 	size_t off, pkgsize = MAX_PKTSIZE;
1007 	uint16_t afi, len;
1008 	uint8_t safi;
1009 
1010 	if (peer_has_ext_msg(peer))
1011 		pkgsize = MAX_EXT_PKTSIZE;
1012 
1013 	if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
1014 		goto fail;
1015 
1016 	/* reserve space for the withdrawn routes length field */
1017 	off = ibuf_size(buf);
1018 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1019 		goto fail;
1020 
1021 	if (aid != AID_INET) {
1022 		/* reserve space for 2-byte path attribute length */
1023 		off = ibuf_size(buf);
1024 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1025 			goto fail;
1026 
1027 		/* attribute header, defaulting to extended length one */
1028 		if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1029 			goto fail;
1030 		if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1031 			goto fail;
1032 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1033 			goto fail;
1034 
1035 		/* afi & safi */
1036 		if (aid2afi(aid, &afi, &safi))
1037 			fatalx("%s: bad AID", __func__);
1038 		if (ibuf_add_n16(buf, afi) == -1)
1039 			goto fail;
1040 		if (ibuf_add_n8(buf, safi) == -1)
1041 			goto fail;
1042 	}
1043 
1044 	if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
1045 		goto fail;
1046 
1047 	/* update length field (either withdrawn routes or attribute length) */
1048 	len = ibuf_size(buf) - off - sizeof(len);
1049 	if (ibuf_set_n16(buf, off, len) == -1)
1050 		goto fail;
1051 
1052 	if (aid != AID_INET) {
1053 		/* write MP_UNREACH_NLRI attribute length (always extended) */
1054 		len -= 4; /* skip attribute header */
1055 		if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1056 			goto fail;
1057 	} else {
1058 		/* no extra attributes so set attribute len to 0 */
1059 		if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1060 			goto fail;
1061 		}
1062 	}
1063 
1064 	return buf;
1065 
1066  fail:
1067 	/* something went horribly wrong */
1068 	log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1069 	ibuf_free(buf);
1070 	return NULL;
1071 }
1072 
1073 /*
1074  * Withdraw a single prefix after an error.
1075  */
1076 static struct ibuf *
up_dump_withdraw_one(struct rde_peer * peer,struct prefix * p,struct ibuf * buf)1077 up_dump_withdraw_one(struct rde_peer *peer, struct prefix *p, struct ibuf *buf)
1078 {
1079 	size_t off;
1080 	int has_ap;
1081 	uint16_t afi, len;
1082 	uint8_t safi;
1083 
1084 	/* reset the buffer and start fresh */
1085 	ibuf_truncate(buf, 0);
1086 
1087 	/* reserve space for the withdrawn routes length field */
1088 	off = ibuf_size(buf);
1089 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1090 		goto fail;
1091 
1092 	if (p->pt->aid != AID_INET) {
1093 		/* reserve space for 2-byte path attribute length */
1094 		off = ibuf_size(buf);
1095 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1096 			goto fail;
1097 
1098 		/* attribute header, defaulting to extended length one */
1099 		if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1100 			goto fail;
1101 		if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1102 			goto fail;
1103 		if (ibuf_add_zero(buf, sizeof(len)) == -1)
1104 			goto fail;
1105 
1106 		/* afi & safi */
1107 		if (aid2afi(p->pt->aid, &afi, &safi))
1108 			fatalx("%s: bad AID", __func__);
1109 		if (ibuf_add_n16(buf, afi) == -1)
1110 			goto fail;
1111 		if (ibuf_add_n8(buf, safi) == -1)
1112 			goto fail;
1113 	}
1114 
1115 	has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND);
1116 	if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1)
1117 		goto fail;
1118 
1119 	/* update length field (either withdrawn routes or attribute length) */
1120 	len = ibuf_size(buf) - off - sizeof(len);
1121 	if (ibuf_set_n16(buf, off, len) == -1)
1122 		goto fail;
1123 
1124 	if (p->pt->aid != AID_INET) {
1125 		/* write MP_UNREACH_NLRI attribute length (always extended) */
1126 		len -= 4; /* skip attribute header */
1127 		if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1128 			goto fail;
1129 	} else {
1130 		/* no extra attributes so set attribute len to 0 */
1131 		if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1132 			goto fail;
1133 		}
1134 	}
1135 
1136 	return buf;
1137 
1138  fail:
1139 	/* something went horribly wrong */
1140 	log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1141 	ibuf_free(buf);
1142 	return NULL;
1143 }
1144 
1145 /*
1146  * Write UPDATE message for changed and added routes. The size of buf limits
1147  * how may routes can be added. The function first dumps the path attributes
1148  * and then tries to add as many prefixes using these attributes.
1149  * Return 0 on success -1 on error which includes producing an empty message.
1150  */
1151 struct ibuf *
up_dump_update(struct rde_peer * peer,uint8_t aid)1152 up_dump_update(struct rde_peer *peer, uint8_t aid)
1153 {
1154 	struct ibuf *buf;
1155 	struct bgpd_addr addr;
1156 	struct prefix *p;
1157 	size_t off, pkgsize = MAX_PKTSIZE;
1158 	uint16_t len;
1159 	int force_ip4mp = 0;
1160 
1161 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
1162 	if (p == NULL)
1163 		return NULL;
1164 
1165 	if (peer_has_ext_msg(peer))
1166 		pkgsize = MAX_EXT_PKTSIZE;
1167 
1168 	if (aid == AID_INET && peer_has_ext_nexthop(peer, AID_INET)) {
1169 		struct nexthop *nh = prefix_nexthop(p);
1170 		if (nh != NULL && nh->exit_nexthop.aid == AID_INET6)
1171 			force_ip4mp = 1;
1172 	}
1173 
1174 	if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
1175 		goto fail;
1176 
1177 	/* withdrawn routes length field is 0 */
1178 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1179 		goto fail;
1180 
1181 	/* reserve space for 2-byte path attribute length */
1182 	off = ibuf_size(buf);
1183 	if (ibuf_add_zero(buf, sizeof(len)) == -1)
1184 		goto fail;
1185 
1186 	if (up_generate_attr(buf, peer, prefix_aspath(p),
1187 	    prefix_communities(p), prefix_nexthop(p), aid) == -1)
1188 		goto drop;
1189 
1190 	if (aid != AID_INET || force_ip4mp) {
1191 		/* write mp attribute including nlri */
1192 
1193 		/*
1194 		 * RFC 7606 wants this to be first but then we need
1195 		 * to use multiple buffers with adjusted length to
1196 		 * merge the attributes together in reverse order of
1197 		 * creation.
1198 		 */
1199 		if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
1200 		    -1)
1201 			goto drop;
1202 	}
1203 
1204 	/* update attribute length field */
1205 	len = ibuf_size(buf) - off - sizeof(len);
1206 	if (ibuf_set_n16(buf, off, len) == -1)
1207 		goto fail;
1208 
1209 	if (aid == AID_INET && !force_ip4mp) {
1210 		/* last but not least dump the IPv4 nlri */
1211 		if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
1212 			goto drop;
1213 	}
1214 
1215 	return buf;
1216 
1217  drop:
1218 	/* Not enough space. Drop current prefix, it will never fit. */
1219 	p = RB_MIN(prefix_tree, &peer->updates[aid]);
1220 	pt_getaddr(p->pt, &addr);
1221 	log_peer_warnx(&peer->conf, "generating update failed, "
1222 	    "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
1223 
1224 	up_prefix_free(&peer->updates[aid], p, peer, 0);
1225 	return up_dump_withdraw_one(peer, p, buf);
1226 
1227  fail:
1228 	/* something went horribly wrong */
1229 	log_peer_warn(&peer->conf, "generating update failed, peer desynced");
1230 	ibuf_free(buf);
1231 	return NULL;
1232 }
1233