xref: /openbsd/usr.sbin/bgpd/rde_decide.c (revision 89ee02f7)
1 /*	$OpenBSD: rde_decide.c,v 1.103 2024/08/14 19:09:51 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
5  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 #include <sys/queue.h>
22 
23 #include <string.h>
24 
25 #include "bgpd.h"
26 #include "rde.h"
27 #include "log.h"
28 
29 int	prefix_cmp(struct prefix *, struct prefix *, int *);
30 void	prefix_set_dmetric(struct prefix *, struct prefix *);
31 void	prefix_insert(struct prefix *, struct prefix *, struct rib_entry *);
32 void	prefix_remove(struct prefix *, struct rib_entry *);
33 /*
34  * Decision Engine RFC implementation:
35  *  Phase 1:
36  *   - calculate LOCAL_PREF if needed -- EBGP or IGP learnt routes
37  *   - IBGP routes may either use LOCAL_PREF or the local system computes
38  *     the degree of preference
39  *   - If the route is ineligible, the route MAY NOT serve as an input to
40  *     the next phase of route selection
41  *   - if the route is eligible the computed value MUST be used as the
42  *     LOCAL_PREF value in any IBGP readvertisement
43  *
44  *  Phase 2:
45  *   - If the NEXT_HOP attribute of a BGP route depicts an address that is
46  *     not resolvable the BGP route MUST be excluded from the Phase 2 decision
47  *     function.
48  *   - If the AS_PATH attribute of a BGP route contains an AS loop, the BGP
49  *     route should be excluded from the Phase 2 decision function.
50  *   - The local BGP speaker identifies the route that has:
51  *     a) the highest degree of preference of any route to the same set
52  *        of destinations
53  *     b) is the only route to that destination
54  *     c) is selected as a result of the Phase 2 tie breaking rules
55  *   - The local speaker MUST determine the immediate next-hop address from
56  *     the NEXT_HOP attribute of the selected route.
57  *   - If either the immediate next hop or the IGP cost to the NEXT_HOP changes,
58  *     Phase 2 Route Selection MUST be performed again.
59  *
60  *  Route Resolvability Condition
61  *   - A route Rte1, referencing only the intermediate network address, is
62  *     considered resolvable if the Routing Table contains at least one
63  *     resolvable route Rte2 that matches Rte1's intermediate network address
64  *     and is not recursively resolved through Rte1.
65  *   - Routes referencing interfaces are considered resolvable if the state of
66  *     the referenced interface is up and IP processing is enabled.
67  *
68  *  Breaking Ties (Phase 2)
69  *   1. Remove from consideration all routes which are not tied for having the
70  *      smallest number of AS numbers present in their AS_PATH attributes.
71  *      Note, that when counting this number, an AS_SET counts as 1
72  *   2. Remove from consideration all routes which are not tied for having the
73  *      lowest Origin number in their Origin attribute.
74  *   3. Remove from consideration routes with less-preferred MULTI_EXIT_DISC
75  *      attributes. MULTI_EXIT_DISC is only comparable between routes learned
76  *      from the same neighboring AS.
77  *   4. If at least one of the candidate routes was received via EBGP,
78  *      remove from consideration all routes which were received via IBGP.
79  *   5. Remove from consideration any routes with less-preferred interior cost.
80  *      If the NEXT_HOP hop for a route is reachable, but no cost can be
81  *      determined, then this step should be skipped.
82  *   6. Remove from consideration all routes other than the route that was
83  *      advertised by the BGP speaker whose BGP Identifier has the lowest value.
84  *   7. Prefer the route received from the lowest peer address.
85  *
86  * Phase 3: Route Dissemination
87  *   - All routes in the Loc-RIB are processed into Adj-RIBs-Out according
88  *     to configured policy. A route SHALL NOT be installed in the Adj-Rib-Out
89  *     unless the destination and NEXT_HOP described by this route may be
90  *     forwarded appropriately by the Routing Table.
91  */
92 
93 /*
94  * Decision Engine OUR implementation:
95  * The filtering is done first. The filtering calculates the preference and
96  * stores it in LOCAL_PREF (Phase 1).
97  * Ineligible routes are flagged as ineligible via nexthop_add().
98  * Phase 3 is done together with Phase 2.
99  * In following cases a prefix needs to be reevaluated:
100  *  - update of a prefix (prefix_update)
101  *  - withdraw of a prefix (prefix_withdraw)
102  *  - state change of the nexthop (nexthop-{in}validate)
103  *  - state change of session (session down)
104  */
105 
106 /*
107  * Compare two prefixes with equal pt_entry. Returns an integer greater than or
108  * less than 0, according to whether the prefix p1 is more or less preferred
109  * than the prefix p2. p1 should be used for the new prefix and p2 for a
110  * already added prefix. The absolute value returned specifies the similarity
111  * of the prefixes.
112  *   1: prefixes differ because of validity
113  *   2: prefixes don't belong in any multipath set
114  *   3: prefixes belong only in the as-wide multipath set
115  *   4: prefixes belong in both the ecmp and as-wide multipath set
116  *   TODO: maybe we also need a strict ecmp set that requires
117  *   prefixes to e.g. equal ASPATH or equal neighbor-as (like for MED).
118  */
119 int
prefix_cmp(struct prefix * p1,struct prefix * p2,int * testall)120 prefix_cmp(struct prefix *p1, struct prefix *p2, int *testall)
121 {
122 	struct rde_aspath	*asp1, *asp2;
123 	struct rde_peer		*peer1, *peer2;
124 	struct attr		*a;
125 	uint32_t		 p1id, p2id;
126 	int			 p1cnt, p2cnt, i;
127 	int			 rv = 1;
128 
129 	/*
130 	 * If a match happens before the MED check then the list is
131 	 * correctly sorted. If a match happens after MED then further
132 	 * elements may need to be checked to ensure that all paths
133 	 * which could affect this path were considered. This only
134 	 * matters for strict MED evaluation and in that case testall
135 	 * is set to 1. If the check happens to be on the MED check
136 	 * itself testall is set to 2.
137 	 */
138 	*testall = 0;
139 
140 	if (p1 == NULL)
141 		return -rv;
142 	if (p2 == NULL)
143 		return rv;
144 
145 	asp1 = prefix_aspath(p1);
146 	asp2 = prefix_aspath(p2);
147 	peer1 = prefix_peer(p1);
148 	peer2 = prefix_peer(p2);
149 
150 	/* 1. check if prefix is eligible a.k.a reachable */
151 	if (!prefix_eligible(p2))
152 		return rv;
153 	if (!prefix_eligible(p1))
154 		return -rv;
155 
156 	/* bump rv, from here on prefix is considered valid */
157 	rv++;
158 
159 	/* 2. local preference of prefix, bigger is better */
160 	if (asp1->lpref > asp2->lpref)
161 		return rv;
162 	if (asp1->lpref < asp2->lpref)
163 		return -rv;
164 
165 	/* 3. aspath count, the shorter the better */
166 	if (asp1->aspath->ascnt < asp2->aspath->ascnt)
167 		return rv;
168 	if (asp1->aspath->ascnt > asp2->aspath->ascnt)
169 		return -rv;
170 
171 	/* 4. origin, the lower the better */
172 	if (asp1->origin < asp2->origin)
173 		return rv;
174 	if (asp1->origin > asp2->origin)
175 		return -rv;
176 
177 	/*
178 	 * 5. MED decision
179 	 * Only comparable between the same neighboring AS or if
180 	 * 'rde med compare always' is set. In the first case
181 	 * set the testall flag since further elements need to be
182 	 * evaluated as well.
183 	 */
184 	if ((rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS) ||
185 	    aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath)) {
186 		if (!(rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS))
187 			*testall = 2;
188 		/* lowest value wins */
189 		if (asp1->med < asp2->med)
190 			return rv;
191 		if (asp1->med > asp2->med)
192 			return -rv;
193 	}
194 
195 	if (!(rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS))
196 		*testall = 1;
197 
198 	/*
199 	 * 6. EBGP is cooler than IBGP
200 	 * It is absolutely important that the ebgp value in peer_config.ebgp
201 	 * is bigger than all other ones (IBGP, confederations)
202 	 */
203 	if (peer1->conf.ebgp != peer2->conf.ebgp) {
204 		if (peer1->conf.ebgp) /* peer1 is EBGP other is lower */
205 			return rv;
206 		else if (peer2->conf.ebgp) /* peer2 is EBGP */
207 			return -rv;
208 	}
209 
210 	/* bump rv, as-wide multipath */
211 	rv++;
212 
213 	/*
214 	 * 7. local tie-breaker, this weight is here to tip equal long AS
215 	 * paths in one or the other direction. It happens more and more
216 	 * that AS paths are equally long and so traffic engineering needs
217 	 * a metric that weights a prefix at a very late stage in the
218 	 * decision process.
219 	 */
220 	if (asp1->weight > asp2->weight)
221 		return rv;
222 	if (asp1->weight < asp2->weight)
223 		return -rv;
224 
225 	/* 8. nexthop costs. NOT YET -> IGNORE */
226 
227 	/* bump rv, equal cost multipath */
228 	rv++;
229 
230 	/*
231 	 * 9. older route (more stable) wins but only if route-age
232 	 * evaluation is enabled.
233 	 */
234 	if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE) {
235 		if (p1->lastchange < p2->lastchange) /* p1 is older */
236 			return rv;
237 		if (p1->lastchange > p2->lastchange)
238 			return -rv;
239 	}
240 
241 	/* 10. lowest BGP Id wins, use ORIGINATOR_ID if present */
242 	if ((a = attr_optget(asp1, ATTR_ORIGINATOR_ID)) != NULL) {
243 		memcpy(&p1id, a->data, sizeof(p1id));
244 		p1id = ntohl(p1id);
245 	} else
246 		p1id = peer1->remote_bgpid;
247 	if ((a = attr_optget(asp2, ATTR_ORIGINATOR_ID)) != NULL) {
248 		memcpy(&p2id, a->data, sizeof(p2id));
249 		p2id = ntohl(p2id);
250 	} else
251 		p2id = peer2->remote_bgpid;
252 	if (p1id < p2id)
253 		return rv;
254 	if (p1id > p2id)
255 		return -rv;
256 
257 	/* 11. compare CLUSTER_LIST length, shorter is better */
258 	p1cnt = p2cnt = 0;
259 	if ((a = attr_optget(asp1, ATTR_CLUSTER_LIST)) != NULL)
260 		p1cnt = a->len / sizeof(uint32_t);
261 	if ((a = attr_optget(asp2, ATTR_CLUSTER_LIST)) != NULL)
262 		p2cnt = a->len / sizeof(uint32_t);
263 	if (p1cnt < p2cnt)
264 		return rv;
265 	if (p1cnt > p2cnt)
266 		return -rv;
267 
268 	/* 12. lowest peer address wins (IPv4 is better than IPv6) */
269 	if (peer1->remote_addr.aid < peer2->remote_addr.aid)
270 		return rv;
271 	if (peer1->remote_addr.aid > peer2->remote_addr.aid)
272 		return -rv;
273 	switch (peer1->remote_addr.aid) {
274 	case AID_INET:
275 		i = memcmp(&peer1->remote_addr.v4, &peer2->remote_addr.v4,
276 		    sizeof(struct in_addr));
277 		break;
278 	case AID_INET6:
279 		i = memcmp(&peer1->remote_addr.v6, &peer2->remote_addr.v6,
280 		    sizeof(struct in6_addr));
281 		break;
282 	default:
283 		fatalx("%s: unknown af", __func__);
284 	}
285 	if (i < 0)
286 		return rv;
287 	if (i > 0)
288 		return -rv;
289 
290 	/* RFC7911 does not specify this but something like this is needed. */
291 	/* 13. lowest path identifier wins */
292 	if (p1->path_id < p2->path_id)
293 		return rv;
294 	if (p1->path_id > p2->path_id)
295 		return -rv;
296 
297 	fatalx("Uh, oh a politician in the decision process");
298 }
299 
300 /*
301  * set the dmetric value of np based on the return value of
302  * prefix_evaluate(pp, np) or set it to either PREFIX_DMETRIC_BEST
303  * or PREFIX_DMETRIC_INVALID for the first element.
304  */
305 void
prefix_set_dmetric(struct prefix * pp,struct prefix * np)306 prefix_set_dmetric(struct prefix *pp, struct prefix *np)
307 {
308 	int testall;
309 
310 	if (np != NULL) {
311 		if (pp == NULL)
312 			np->dmetric = prefix_eligible(np) ?
313 			    PREFIX_DMETRIC_BEST : PREFIX_DMETRIC_INVALID;
314 		else
315 			np->dmetric = prefix_cmp(pp, np, &testall);
316 		if (np->dmetric < 0) {
317 			struct bgpd_addr addr;
318 			pt_getaddr(np->pt, &addr);
319 			log_debug("bad dmetric in decision process: %s/%u",
320 			    log_addr(&addr), np->pt->prefixlen);
321 		}
322 	}
323 }
324 
325 /*
326  * Insert a prefix keeping the total order of the list. For routes
327  * that may depend on a MED selection the set is scanned until the
328  * condition is cleared. If a MED inversion is detected the respective
329  * prefix is taken of the rib list and put onto a redo queue. All
330  * prefixes on the redo queue are re-inserted at the end.
331  */
332 void
prefix_insert(struct prefix * new,struct prefix * ep,struct rib_entry * re)333 prefix_insert(struct prefix *new, struct prefix *ep, struct rib_entry *re)
334 {
335 	struct prefix_queue redo = TAILQ_HEAD_INITIALIZER(redo);
336 	struct prefix *xp, *np, *insertp = ep;
337 	int testall, preferred, selected = 0, removed = 0;
338 
339 	/* start scan at the entry point (ep) or the head if ep == NULL */
340 	if (ep == NULL)
341 		ep = TAILQ_FIRST(&re->prefix_h);
342 
343 	for (xp = ep; xp != NULL; xp = np) {
344 		np = TAILQ_NEXT(xp, entry.list.rib);
345 
346 		if ((preferred = (prefix_cmp(new, xp, &testall) > 0))) {
347 			/* new is preferred over xp */
348 			if (testall == 2) {
349 				/*
350 				 * MED inversion, take out prefix and
351 				 * put it onto redo queue.
352 				 */
353 				TAILQ_REMOVE(&re->prefix_h, xp, entry.list.rib);
354 				TAILQ_INSERT_TAIL(&redo, xp, entry.list.rib);
355 				removed = 1;
356 				continue;
357 			}
358 
359 			if (testall == 1) {
360 				/*
361 				 * lock insertion point and
362 				 * continue on with scan
363 				 */
364 				selected = 1;
365 			}
366 		} else {
367 			/*
368 			 * xp is preferred over new.
369 			 * Remember insertion point for later unless the
370 			 * traverse is just looking for a possible MED
371 			 * inversion (selected == 1).
372 			 * If the last comparison's tie-breaker was the MED
373 			 * check reset selected and with it insertp since
374 			 * this was an actual MED priority inversion.
375 			 */
376 			if (testall == 2)
377 				selected = 0;
378 			if (!selected)
379 				insertp = xp;
380 		}
381 
382 		/*
383 		 * If previous element(s) got removed, fixup the
384 		 * dmetric, now that it is clear that this element
385 		 * is on the list.
386 		 */
387 		if (removed) {
388 			prefix_set_dmetric(TAILQ_PREV(xp, prefix_queue,
389 			    entry.list.rib), xp);
390 			removed = 0;
391 		}
392 
393 		if (preferred && testall == 0)
394 			break;			/* we're done */
395 	}
396 
397 	if (insertp == NULL) {
398 		TAILQ_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
399 	} else {
400 		TAILQ_INSERT_AFTER(&re->prefix_h, insertp, new, entry.list.rib);
401 	}
402 
403 	prefix_set_dmetric(insertp, new);
404 	prefix_set_dmetric(new, TAILQ_NEXT(new, entry.list.rib));
405 
406 	/* Fixup MED order again. All elements are < new */
407 	while (!TAILQ_EMPTY(&redo)) {
408 		xp = TAILQ_FIRST(&redo);
409 		TAILQ_REMOVE(&redo, xp, entry.list.rib);
410 
411 		prefix_insert(xp, new, re);
412 	}
413 }
414 
415 /*
416  * Remove a prefix from the RIB list ensuring that the total order of the
417  * list remains intact. All routes that differ in the MED are taken of the
418  * list and put on the redo list. To figure out if a route could cause a
419  * resort because of a MED check the next prefix of the to-remove prefix
420  * is compared with the old prefix. A full scan is only done if the next
421  * route differs because of the MED or later checks.
422  * Again at the end all routes on the redo queue are reinserted.
423  */
424 void
prefix_remove(struct prefix * old,struct rib_entry * re)425 prefix_remove(struct prefix *old, struct rib_entry *re)
426 {
427 	struct prefix_queue redo = TAILQ_HEAD_INITIALIZER(redo);
428 	struct prefix *xp, *np, *pp;
429 	int testall, removed = 0;
430 
431 	xp = TAILQ_NEXT(old, entry.list.rib);
432 	pp = TAILQ_PREV(old, prefix_queue, entry.list.rib);
433 	TAILQ_REMOVE(&re->prefix_h, old, entry.list.rib);
434 
435 	/* check if a MED inversion could be possible */
436 	prefix_cmp(old, xp, &testall);
437 	if (testall > 0) {
438 		/* maybe MED route, scan tail for other possible routes */
439 		for (; xp != NULL; xp = np) {
440 			np = TAILQ_NEXT(xp, entry.list.rib);
441 
442 			/* only interested in the testall result */
443 			prefix_cmp(old, xp, &testall);
444 			if (testall == 2) {
445 				/*
446 				 * possible MED inversion, take out prefix and
447 				 * put it onto redo queue.
448 				 */
449 				TAILQ_REMOVE(&re->prefix_h, xp, entry.list.rib);
450 				TAILQ_INSERT_TAIL(&redo, xp, entry.list.rib);
451 				removed = 1;
452 				continue;
453 			}
454 			/*
455 			 * If previous element(s) got removed, fixup the
456 			 * dmetric, now that it is clear that this element
457 			 * is on the list.
458 			 */
459 			if (removed) {
460 				prefix_set_dmetric(TAILQ_PREV(xp, prefix_queue,
461 				    entry.list.rib), xp);
462 				removed = 0;
463 			}
464 			if (testall == 0)
465 				break;		/* we're done */
466 		}
467 	}
468 
469 	if (pp)
470 		prefix_set_dmetric(pp, TAILQ_NEXT(pp, entry.list.rib));
471 	else
472 		prefix_set_dmetric(NULL, TAILQ_FIRST(&re->prefix_h));
473 
474 	/* Fixup MED order again, reinsert prefixes from the start */
475 	while (!TAILQ_EMPTY(&redo)) {
476 		xp = TAILQ_FIRST(&redo);
477 		TAILQ_REMOVE(&redo, xp, entry.list.rib);
478 
479 		prefix_insert(xp, NULL, re);
480 	}
481 }
482 
483 /* helper function to check if a prefix is valid to be selected */
484 int
prefix_eligible(struct prefix * p)485 prefix_eligible(struct prefix *p)
486 {
487 	struct rde_aspath *asp = prefix_aspath(p);
488 
489 	/* prefix itself is marked ineligible */
490 	if (prefix_filtered(p))
491 		return 0;
492 
493 	/* The aspath needs to be loop and error free */
494 	if (asp == NULL ||
495 	    asp->flags & (F_ATTR_LOOP|F_ATTR_OTC_LEAK|F_ATTR_PARSE_ERR))
496 		return 0;
497 
498 	/* The nexthop must be valid. */
499 	if (!prefix_nhvalid(p))
500 		return 0;
501 
502 	return 1;
503 }
504 
505 struct prefix *
prefix_best(struct rib_entry * re)506 prefix_best(struct rib_entry *re)
507 {
508 	struct prefix	*xp;
509 	struct rib	*rib;
510 
511 	rib = re_rib(re);
512 	if (rib->flags & F_RIB_NOEVALUATE)
513 		/* decision process is turned off */
514 		return NULL;
515 
516 	xp = TAILQ_FIRST(&re->prefix_h);
517 	if (xp != NULL && !prefix_eligible(xp))
518 		xp = NULL;
519 	return xp;
520 }
521 
522 /*
523  * Find the correct place to insert the prefix in the prefix list.
524  * If the active prefix has changed we need to send an update also special
525  * treatment is needed if 'rde evaluate all' is used on some peers.
526  * To re-evaluate a prefix just call prefix_evaluate with old and new pointing
527  * to the same prefix.
528  */
529 void
prefix_evaluate(struct rib_entry * re,struct prefix * new,struct prefix * old)530 prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old)
531 {
532 	struct prefix	*newbest, *oldbest;
533 	struct rib	*rib;
534 
535 	rib = re_rib(re);
536 	if (rib->flags & F_RIB_NOEVALUATE) {
537 		/* decision process is turned off */
538 		if (old != NULL)
539 			TAILQ_REMOVE(&re->prefix_h, old, entry.list.rib);
540 		if (new != NULL) {
541 			TAILQ_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
542 			new->dmetric = PREFIX_DMETRIC_INVALID;
543 		}
544 		return;
545 	}
546 
547 	oldbest = prefix_best(re);
548 	if (old != NULL)
549 		prefix_remove(old, re);
550 	if (new != NULL)
551 		prefix_insert(new, NULL, re);
552 	newbest = prefix_best(re);
553 
554 	/*
555 	 * If the active prefix changed or the active prefix was removed
556 	 * and added again then generate an update.
557 	 */
558 	if (oldbest != newbest || (old != NULL && newbest == old)) {
559 		/*
560 		 * Send update withdrawing oldbest and adding newbest
561 		 * but remember that newbest may be NULL aka ineligible.
562 		 * Additional decision may be made by the called functions.
563 		 */
564 		if ((rib->flags & F_RIB_NOFIB) == 0)
565 			rde_send_kroute(rib, newbest, oldbest);
566 		rde_generate_updates(re, new, old, EVAL_DEFAULT);
567 		return;
568 	}
569 
570 	/*
571 	 * If there are peers with 'rde evaluate all' every update needs
572 	 * to be passed on (not only a change of the best prefix).
573 	 * rde_generate_updates() will then take care of distribution.
574 	 */
575 	if (rde_evaluate_all()) {
576 		if (new != NULL && !prefix_eligible(new))
577 			new = NULL;
578 		if (new != NULL || old != NULL)
579 			rde_generate_updates(re, new, old, EVAL_ALL);
580 	}
581 }
582 
583 void
prefix_evaluate_nexthop(struct prefix * p,enum nexthop_state state,enum nexthop_state oldstate)584 prefix_evaluate_nexthop(struct prefix *p, enum nexthop_state state,
585     enum nexthop_state oldstate)
586 {
587 	struct rib_entry *re = prefix_re(p);
588 	struct prefix	*newbest, *oldbest, *new, *old;
589 	struct rib	*rib;
590 
591 	/* Skip non local-RIBs or RIBs that are flagged as noeval. */
592 	rib = re_rib(re);
593 	if (rib->flags & F_RIB_NOEVALUATE) {
594 		log_warnx("%s: prefix with F_RIB_NOEVALUATE hit", __func__);
595 		return;
596 	}
597 
598 	if (oldstate == state) {
599 		/*
600 		 * The state of the nexthop did not change. The only
601 		 * thing that may have changed is the true_nexthop
602 		 * or other internal infos. This will not change
603 		 * the routing decision so shortcut here.
604 		 * XXX needs to be changed for ECMP
605 		 */
606 		if (state == NEXTHOP_REACH) {
607 			if ((rib->flags & F_RIB_NOFIB) == 0 &&
608 			    p == prefix_best(re))
609 				rde_send_kroute(rib, p, NULL);
610 		}
611 		return;
612 	}
613 
614 	/*
615 	 * Re-evaluate the prefix by removing the prefix then updating the
616 	 * nexthop state and reinserting the prefix again.
617 	 */
618 	old = p;
619 	oldbest = prefix_best(re);
620 	prefix_remove(p, re);
621 
622 	if (state == NEXTHOP_REACH)
623 		p->nhflags |= NEXTHOP_VALID;
624 	else
625 		p->nhflags &= ~NEXTHOP_VALID;
626 
627 	prefix_insert(p, NULL, re);
628 	newbest = prefix_best(re);
629 	new = p;
630 	if (!prefix_eligible(new))
631 		new = NULL;
632 
633 	/*
634 	 * If the active prefix changed or the active prefix was removed
635 	 * and added again then generate an update.
636 	 */
637 	if (oldbest != newbest || newbest == p) {
638 		/*
639 		 * Send update withdrawing oldbest and adding newbest
640 		 * but remember that newbest may be NULL aka ineligible.
641 		 * Additional decision may be made by the called functions.
642 		 */
643 		if ((rib->flags & F_RIB_NOFIB) == 0)
644 			rde_send_kroute(rib, newbest, oldbest);
645 		rde_generate_updates(re, new, old, EVAL_DEFAULT);
646 		return;
647 	}
648 
649 	/*
650 	 * If there are peers with 'rde evaluate all' every update needs
651 	 * to be passed on (not only a change of the best prefix).
652 	 * rde_generate_updates() will then take care of distribution.
653 	 */
654 	if (rde_evaluate_all())
655 		rde_generate_updates(re, new, old, EVAL_ALL);
656 }
657