1 /* $OpenBSD: rde_rib.c,v 1.263 2024/08/14 19:09:51 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/types.h>
20 #include <sys/queue.h>
21
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <time.h>
26
27 #include "bgpd.h"
28 #include "rde.h"
29 #include "log.h"
30
31 /*
32 * BGP RIB -- Routing Information Base
33 *
34 * The RIB is build with one aspect in mind. Speed -- actually update speed.
35 * Therefore one thing needs to be absolutely avoided, long table walks.
36 * This is achieved by heavily linking the different parts together.
37 */
38 uint16_t rib_size;
39 struct rib **ribs;
40 struct rib flowrib = { .id = 1, .tree = RB_INITIALIZER(&flowrib.tree) };
41
42 struct rib_entry *rib_add(struct rib *, struct pt_entry *);
43 static inline int rib_compare(const struct rib_entry *,
44 const struct rib_entry *);
45 void rib_remove(struct rib_entry *);
46 int rib_empty(struct rib_entry *);
47 static void rib_dump_abort(uint16_t);
48
49 RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare);
50 RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare);
51
52 struct rib_context {
53 LIST_ENTRY(rib_context) entry;
54 struct rib_entry *ctx_re;
55 struct prefix *ctx_p;
56 uint32_t ctx_id;
57 void (*ctx_rib_call)(struct rib_entry *, void *);
58 void (*ctx_prefix_call)(struct prefix *, void *);
59 void (*ctx_done)(void *, uint8_t);
60 int (*ctx_throttle)(void *);
61 void *ctx_arg;
62 struct bgpd_addr ctx_subtree;
63 unsigned int ctx_count;
64 uint8_t ctx_aid;
65 uint8_t ctx_subtreelen;
66 };
67 LIST_HEAD(, rib_context) rib_dumps = LIST_HEAD_INITIALIZER(rib_dumps);
68
69 static void prefix_dump_r(struct rib_context *);
70
71 static inline struct rib_entry *
re_lock(struct rib_entry * re)72 re_lock(struct rib_entry *re)
73 {
74 if (re->lock != 0)
75 log_warnx("%s: entry already locked", __func__);
76 re->lock = 1;
77 return re;
78 }
79
80 static inline struct rib_entry *
re_unlock(struct rib_entry * re)81 re_unlock(struct rib_entry *re)
82 {
83 if (re->lock == 0)
84 log_warnx("%s: entry already unlocked", __func__);
85 re->lock = 0;
86 return re;
87 }
88
89 static inline int
re_is_locked(struct rib_entry * re)90 re_is_locked(struct rib_entry *re)
91 {
92 return (re->lock != 0);
93 }
94
95 static inline struct prefix *
prefix_lock(struct prefix * p)96 prefix_lock(struct prefix *p)
97 {
98 if (p->flags & PREFIX_FLAG_LOCKED)
99 fatalx("%s: locking locked prefix", __func__);
100 p->flags |= PREFIX_FLAG_LOCKED;
101 return p;
102 }
103
104 static inline struct prefix *
prefix_unlock(struct prefix * p)105 prefix_unlock(struct prefix *p)
106 {
107 if ((p->flags & PREFIX_FLAG_LOCKED) == 0)
108 fatalx("%s: unlocking unlocked prefix", __func__);
109 p->flags &= ~PREFIX_FLAG_LOCKED;
110 return p;
111 }
112
113 static inline int
prefix_is_locked(struct prefix * p)114 prefix_is_locked(struct prefix *p)
115 {
116 return (p->flags & PREFIX_FLAG_LOCKED) != 0;
117 }
118
119 static inline int
prefix_is_dead(struct prefix * p)120 prefix_is_dead(struct prefix *p)
121 {
122 return (p->flags & PREFIX_FLAG_DEAD) != 0;
123 }
124
125 static inline struct rib_tree *
rib_tree(struct rib * rib)126 rib_tree(struct rib *rib)
127 {
128 return (&rib->tree);
129 }
130
131 static inline int
rib_compare(const struct rib_entry * a,const struct rib_entry * b)132 rib_compare(const struct rib_entry *a, const struct rib_entry *b)
133 {
134 return (pt_prefix_cmp(a->prefix, b->prefix));
135 }
136
137 /* RIB specific functions */
138 struct rib *
rib_new(char * name,u_int rtableid,uint16_t flags)139 rib_new(char *name, u_int rtableid, uint16_t flags)
140 {
141 struct rib *new;
142 uint16_t id;
143
144 for (id = 0; id < rib_size; id++) {
145 if (ribs[id] == NULL)
146 break;
147 }
148
149 if (id >= rib_size) {
150 if ((ribs = recallocarray(ribs, id, id + 8,
151 sizeof(struct rib *))) == NULL)
152 fatal(NULL);
153 rib_size = id + 8;
154 }
155
156 if ((new = calloc(1, sizeof(*new))) == NULL)
157 fatal(NULL);
158
159 strlcpy(new->name, name, sizeof(new->name));
160 RB_INIT(rib_tree(new));
161 new->state = RECONF_REINIT;
162 new->id = id;
163 new->flags = flags;
164 new->rtableid = rtableid;
165
166 new->in_rules = calloc(1, sizeof(struct filter_head));
167 if (new->in_rules == NULL)
168 fatal(NULL);
169 TAILQ_INIT(new->in_rules);
170
171 ribs[id] = new;
172
173 log_debug("%s: %s -> %u", __func__, name, id);
174 return (new);
175 }
176
177 /*
178 * This function is only called when the FIB information of a RIB changed.
179 * It will flush the FIB if there was one previously and change the fibstate
180 * from RECONF_NONE (nothing to do) to either RECONF_RELOAD (reload the FIB)
181 * or RECONF_REINIT (rerun the route decision process for every element)
182 * depending on the new flags.
183 */
184 int
rib_update(struct rib * rib)185 rib_update(struct rib *rib)
186 {
187 /* flush fib first if there was one */
188 if ((rib->flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE)) == 0)
189 rde_send_kroute_flush(rib);
190
191 /* if no evaluate changes then a full reinit is needed */
192 if ((rib->flags & F_RIB_NOEVALUATE) !=
193 (rib->flags_tmp & F_RIB_NOEVALUATE))
194 rib->fibstate = RECONF_REINIT;
195
196 rib->flags = rib->flags_tmp;
197 rib->rtableid = rib->rtableid_tmp;
198
199 /* reload fib if there is no reinit pending and there will be a fib */
200 if (rib->fibstate != RECONF_REINIT &&
201 (rib->flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE)) == 0)
202 rib->fibstate = RECONF_RELOAD;
203
204 return (rib->fibstate == RECONF_REINIT);
205 }
206
207 struct rib *
rib_byid(uint16_t id)208 rib_byid(uint16_t id)
209 {
210 if (id == RIB_NOTFOUND || id >= rib_size || ribs[id] == NULL)
211 return NULL;
212 return ribs[id];
213 }
214
215 uint16_t
rib_find(char * name)216 rib_find(char *name)
217 {
218 uint16_t id;
219
220 /* no name returns the first Loc-RIB */
221 if (name == NULL || *name == '\0')
222 return RIB_LOC_START;
223
224 for (id = 0; id < rib_size; id++) {
225 if (ribs[id] != NULL && !strcmp(ribs[id]->name, name))
226 return id;
227 }
228
229 return RIB_NOTFOUND;
230 }
231
232 void
rib_free(struct rib * rib)233 rib_free(struct rib *rib)
234 {
235 struct rib_entry *re, *xre;
236 struct prefix *p;
237
238 rib_dump_abort(rib->id);
239
240 /*
241 * flush the rib, disable route evaluation and fib sync to speed up
242 * the prefix removal. Nothing depends on this data anymore.
243 */
244 if ((rib->flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE)) == 0)
245 rde_send_kroute_flush(rib);
246 rib->flags |= F_RIB_NOEVALUATE | F_RIB_NOFIB;
247
248 for (re = RB_MIN(rib_tree, rib_tree(rib)); re != NULL; re = xre) {
249 xre = RB_NEXT(rib_tree, rib_tree(rib), re);
250
251 /*
252 * Removing the prefixes is tricky because the last one
253 * will remove the rib_entry as well and because we do
254 * an empty check in prefix_destroy() it is not possible to
255 * use the default for loop.
256 */
257 while ((p = TAILQ_FIRST(&re->prefix_h))) {
258 struct rde_aspath *asp = prefix_aspath(p);
259 if (asp && asp->pftableid)
260 rde_pftable_del(asp->pftableid, p);
261 prefix_destroy(p);
262 }
263 }
264 if (rib->id <= RIB_LOC_START)
265 return; /* never remove the default ribs */
266 filterlist_free(rib->in_rules_tmp);
267 filterlist_free(rib->in_rules);
268 ribs[rib->id] = NULL;
269 free(rib);
270 }
271
272 void
rib_shutdown(void)273 rib_shutdown(void)
274 {
275 struct rib *rib;
276 uint16_t id;
277
278 for (id = 0; id < rib_size; id++) {
279 rib = rib_byid(id);
280 if (rib == NULL)
281 continue;
282 if (!RB_EMPTY(rib_tree(ribs[id]))) {
283 log_warnx("%s: rib %s is not empty", __func__,
284 ribs[id]->name);
285 }
286 rib_free(ribs[id]);
287 }
288 for (id = 0; id <= RIB_LOC_START; id++) {
289 rib = rib_byid(id);
290 if (rib == NULL)
291 continue;
292 filterlist_free(rib->in_rules_tmp);
293 filterlist_free(rib->in_rules);
294 ribs[id] = NULL;
295 free(rib);
296 }
297 free(ribs);
298 }
299
300 struct rib_entry *
rib_get(struct rib * rib,struct pt_entry * pte)301 rib_get(struct rib *rib, struct pt_entry *pte)
302 {
303 struct rib_entry xre, *re;
304
305 memset(&xre, 0, sizeof(xre));
306 xre.prefix = pte;
307
308 re = RB_FIND(rib_tree, rib_tree(rib), &xre);
309 if (re && re->rib_id != rib->id)
310 fatalx("%s: Unexpected RIB %u != %u.", __func__,
311 re->rib_id, rib->id);
312 return re;
313 }
314
315 struct rib_entry *
rib_get_addr(struct rib * rib,struct bgpd_addr * prefix,int prefixlen)316 rib_get_addr(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
317 {
318 return rib_get(rib, pt_fill(prefix, prefixlen));
319 }
320
321 struct rib_entry *
rib_match(struct rib * rib,struct bgpd_addr * addr)322 rib_match(struct rib *rib, struct bgpd_addr *addr)
323 {
324 struct rib_entry *re;
325 int i;
326
327 switch (addr->aid) {
328 case AID_INET:
329 case AID_VPN_IPv4:
330 for (i = 32; i >= 0; i--) {
331 re = rib_get_addr(rib, addr, i);
332 if (re != NULL)
333 return (re);
334 }
335 break;
336 case AID_INET6:
337 case AID_VPN_IPv6:
338 for (i = 128; i >= 0; i--) {
339 re = rib_get_addr(rib, addr, i);
340 if (re != NULL)
341 return (re);
342 }
343 break;
344 default:
345 fatalx("%s: unknown af", __func__);
346 }
347 return (NULL);
348 }
349
350
351 struct rib_entry *
rib_add(struct rib * rib,struct pt_entry * pte)352 rib_add(struct rib *rib, struct pt_entry *pte)
353 {
354 struct rib_entry *re;
355
356 if ((re = calloc(1, sizeof(*re))) == NULL)
357 fatal("rib_add");
358
359 TAILQ_INIT(&re->prefix_h);
360 re->prefix = pt_ref(pte);
361 re->rib_id = rib->id;
362
363 if (RB_INSERT(rib_tree, rib_tree(rib), re) != NULL) {
364 log_warnx("rib_add: insert failed");
365 free(re);
366 return (NULL);
367 }
368
369
370 rdemem.rib_cnt++;
371
372 return (re);
373 }
374
375 void
rib_remove(struct rib_entry * re)376 rib_remove(struct rib_entry *re)
377 {
378 if (!rib_empty(re))
379 fatalx("rib_remove: entry not empty");
380
381 if (re_is_locked(re))
382 /* entry is locked, don't free it. */
383 return;
384
385 pt_unref(re->prefix);
386
387 if (RB_REMOVE(rib_tree, rib_tree(re_rib(re)), re) == NULL)
388 log_warnx("rib_remove: remove failed.");
389
390 free(re);
391 rdemem.rib_cnt--;
392 }
393
394 int
rib_empty(struct rib_entry * re)395 rib_empty(struct rib_entry *re)
396 {
397 return TAILQ_EMPTY(&re->prefix_h);
398 }
399
400 static struct rib_entry *
rib_restart(struct rib_context * ctx)401 rib_restart(struct rib_context *ctx)
402 {
403 struct rib_entry *re = NULL;
404
405 if (ctx->ctx_re)
406 re = re_unlock(ctx->ctx_re);
407
408 /* find first non empty element */
409 while (re && rib_empty(re))
410 re = RB_NEXT(rib_tree, unused, re);
411
412 /* free the previously locked rib element if empty */
413 if (ctx->ctx_re && rib_empty(ctx->ctx_re))
414 rib_remove(ctx->ctx_re);
415 ctx->ctx_re = NULL;
416 return (re);
417 }
418
419 static void
rib_dump_r(struct rib_context * ctx)420 rib_dump_r(struct rib_context *ctx)
421 {
422 struct rib_entry *re, *next;
423 struct rib *rib;
424 unsigned int i;
425
426 rib = rib_byid(ctx->ctx_id);
427 if (rib == NULL)
428 fatalx("%s: rib id %u gone", __func__, ctx->ctx_id);
429
430 if (ctx->ctx_re == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
431 re = RB_MIN(rib_tree, rib_tree(rib));
432 else
433 re = rib_restart(ctx);
434
435 for (i = 0; re != NULL; re = next) {
436 next = RB_NEXT(rib_tree, unused, re);
437 if (re->rib_id != ctx->ctx_id)
438 fatalx("%s: Unexpected RIB %u != %u.", __func__,
439 re->rib_id, ctx->ctx_id);
440 if (ctx->ctx_aid != AID_UNSPEC &&
441 ctx->ctx_aid != re->prefix->aid)
442 continue;
443 if (ctx->ctx_subtree.aid != AID_UNSPEC) {
444 struct bgpd_addr addr;
445 pt_getaddr(re->prefix, &addr);
446 if (prefix_compare(&ctx->ctx_subtree, &addr,
447 ctx->ctx_subtreelen) != 0)
448 /* left subtree, walk is done */
449 break;
450 }
451 if (ctx->ctx_count && i++ >= ctx->ctx_count &&
452 !re_is_locked(re)) {
453 /* store and lock last element */
454 ctx->ctx_re = re_lock(re);
455 return;
456 }
457 ctx->ctx_rib_call(re, ctx->ctx_arg);
458 }
459
460 if (ctx->ctx_done)
461 ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
462 LIST_REMOVE(ctx, entry);
463 free(ctx);
464 }
465
466 int
rib_dump_pending(void)467 rib_dump_pending(void)
468 {
469 struct rib_context *ctx;
470
471 /* return true if at least one context is not throttled */
472 LIST_FOREACH(ctx, &rib_dumps, entry) {
473 if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg))
474 continue;
475 return 1;
476 }
477 return 0;
478 }
479
480 void
rib_dump_runner(void)481 rib_dump_runner(void)
482 {
483 struct rib_context *ctx, *next;
484
485 LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
486 if (ctx->ctx_throttle && ctx->ctx_throttle(ctx->ctx_arg))
487 continue;
488 if (ctx->ctx_rib_call != NULL)
489 rib_dump_r(ctx);
490 else
491 prefix_dump_r(ctx);
492 }
493 }
494
495 static void
rib_dump_abort(uint16_t id)496 rib_dump_abort(uint16_t id)
497 {
498 struct rib_context *ctx, *next;
499
500 LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
501 if (id != ctx->ctx_id)
502 continue;
503 if (ctx->ctx_done)
504 ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
505 if (ctx->ctx_re && rib_empty(re_unlock(ctx->ctx_re)))
506 rib_remove(ctx->ctx_re);
507 if (ctx->ctx_p && prefix_is_dead(prefix_unlock(ctx->ctx_p)))
508 prefix_adjout_destroy(ctx->ctx_p);
509 LIST_REMOVE(ctx, entry);
510 free(ctx);
511 }
512 }
513
514 void
rib_dump_terminate(void * arg)515 rib_dump_terminate(void *arg)
516 {
517 struct rib_context *ctx, *next;
518
519 LIST_FOREACH_SAFE(ctx, &rib_dumps, entry, next) {
520 if (ctx->ctx_arg != arg)
521 continue;
522 if (ctx->ctx_done)
523 ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
524 if (ctx->ctx_re && rib_empty(re_unlock(ctx->ctx_re)))
525 rib_remove(ctx->ctx_re);
526 if (ctx->ctx_p && prefix_is_dead(prefix_unlock(ctx->ctx_p)))
527 prefix_adjout_destroy(ctx->ctx_p);
528 LIST_REMOVE(ctx, entry);
529 free(ctx);
530 }
531 }
532
533 int
rib_dump_new(uint16_t id,uint8_t aid,unsigned int count,void * arg,void (* upcall)(struct rib_entry *,void *),void (* done)(void *,uint8_t),int (* throttle)(void *))534 rib_dump_new(uint16_t id, uint8_t aid, unsigned int count, void *arg,
535 void (*upcall)(struct rib_entry *, void *), void (*done)(void *, uint8_t),
536 int (*throttle)(void *))
537 {
538 struct rib_context *ctx;
539
540 if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
541 return -1;
542 ctx->ctx_id = id;
543 ctx->ctx_aid = aid;
544 ctx->ctx_count = count;
545 ctx->ctx_arg = arg;
546 ctx->ctx_rib_call = upcall;
547 ctx->ctx_done = done;
548 ctx->ctx_throttle = throttle;
549
550 LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
551
552 /* requested a sync traversal */
553 if (count == 0)
554 rib_dump_r(ctx);
555
556 return 0;
557 }
558
559 int
rib_dump_subtree(uint16_t id,struct bgpd_addr * subtree,uint8_t subtreelen,unsigned int count,void * arg,void (* upcall)(struct rib_entry *,void *),void (* done)(void *,uint8_t),int (* throttle)(void *))560 rib_dump_subtree(uint16_t id, struct bgpd_addr *subtree, uint8_t subtreelen,
561 unsigned int count, void *arg, void (*upcall)(struct rib_entry *, void *),
562 void (*done)(void *, uint8_t), int (*throttle)(void *))
563 {
564 struct rib_context *ctx;
565 struct rib_entry xre;
566
567 if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
568 return -1;
569 ctx->ctx_id = id;
570 ctx->ctx_aid = subtree->aid;
571 ctx->ctx_count = count;
572 ctx->ctx_arg = arg;
573 ctx->ctx_rib_call = upcall;
574 ctx->ctx_done = done;
575 ctx->ctx_throttle = throttle;
576 ctx->ctx_subtree = *subtree;
577 ctx->ctx_subtreelen = subtreelen;
578
579 LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
580
581 /* lookup start of subtree */
582 memset(&xre, 0, sizeof(xre));
583 xre.prefix = pt_fill(subtree, subtreelen);
584 ctx->ctx_re = RB_NFIND(rib_tree, rib_tree(rib_byid(id)), &xre);
585 if (ctx->ctx_re)
586 re_lock(ctx->ctx_re);
587
588 /* requested a sync traversal */
589 if (count == 0)
590 rib_dump_r(ctx);
591
592 return 0;
593 }
594
595 /* path specific functions */
596
597 static struct rde_aspath *path_lookup(struct rde_aspath *);
598 static void path_link(struct rde_aspath *);
599 static void path_unlink(struct rde_aspath *);
600
601 static inline int
path_compare(struct rde_aspath * a,struct rde_aspath * b)602 path_compare(struct rde_aspath *a, struct rde_aspath *b)
603 {
604 int r;
605
606 if (a == NULL && b == NULL)
607 return (0);
608 else if (b == NULL)
609 return (1);
610 else if (a == NULL)
611 return (-1);
612 if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED))
613 return (1);
614 if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED))
615 return (-1);
616 if (a->origin > b->origin)
617 return (1);
618 if (a->origin < b->origin)
619 return (-1);
620 if (a->med > b->med)
621 return (1);
622 if (a->med < b->med)
623 return (-1);
624 if (a->lpref > b->lpref)
625 return (1);
626 if (a->lpref < b->lpref)
627 return (-1);
628 if (a->weight > b->weight)
629 return (1);
630 if (a->weight < b->weight)
631 return (-1);
632 if (a->rtlabelid > b->rtlabelid)
633 return (1);
634 if (a->rtlabelid < b->rtlabelid)
635 return (-1);
636 if (a->pftableid > b->pftableid)
637 return (1);
638 if (a->pftableid < b->pftableid)
639 return (-1);
640
641 /* no need to check aspa_state or aspa_generation */
642
643 r = aspath_compare(a->aspath, b->aspath);
644 if (r > 0)
645 return (1);
646 if (r < 0)
647 return (-1);
648
649 return (attr_compare(a, b));
650 }
651
652 RB_HEAD(path_tree, rde_aspath) pathtable = RB_INITIALIZER(&pathtable);
653 RB_GENERATE_STATIC(path_tree, rde_aspath, entry, path_compare);
654
655 static inline struct rde_aspath *
path_ref(struct rde_aspath * asp)656 path_ref(struct rde_aspath *asp)
657 {
658 if ((asp->flags & F_ATTR_LINKED) == 0)
659 fatalx("%s: unlinked object", __func__);
660 asp->refcnt++;
661 rdemem.path_refs++;
662
663 return asp;
664 }
665
666 static inline void
path_unref(struct rde_aspath * asp)667 path_unref(struct rde_aspath *asp)
668 {
669 if (asp == NULL)
670 return;
671 if ((asp->flags & F_ATTR_LINKED) == 0)
672 fatalx("%s: unlinked object", __func__);
673 asp->refcnt--;
674 rdemem.path_refs--;
675 if (asp->refcnt <= 0)
676 path_unlink(asp);
677 }
678
679 void
path_shutdown(void)680 path_shutdown(void)
681 {
682 if (!RB_EMPTY(&pathtable))
683 log_warnx("path_free: free non-free table");
684 }
685
686 static struct rde_aspath *
path_lookup(struct rde_aspath * aspath)687 path_lookup(struct rde_aspath *aspath)
688 {
689 return (RB_FIND(path_tree, &pathtable, aspath));
690 }
691
692 /*
693 * Link this aspath into the global table.
694 * The asp had to be alloced with path_get.
695 */
696 static void
path_link(struct rde_aspath * asp)697 path_link(struct rde_aspath *asp)
698 {
699 if (RB_INSERT(path_tree, &pathtable, asp) != NULL)
700 fatalx("%s: already linked object", __func__);
701 asp->flags |= F_ATTR_LINKED;
702 }
703
704 /*
705 * This function can only be called when all prefix have been removed first.
706 * Normally this happens directly out of the prefix removal functions.
707 */
708 static void
path_unlink(struct rde_aspath * asp)709 path_unlink(struct rde_aspath *asp)
710 {
711 if (asp == NULL)
712 return;
713
714 /* make sure no reference is hold for this rde_aspath */
715 if (asp->refcnt != 0)
716 fatalx("%s: still holds references", __func__);
717
718 RB_REMOVE(path_tree, &pathtable, asp);
719 asp->flags &= ~F_ATTR_LINKED;
720
721 path_put(asp);
722 }
723
724 /*
725 * Copy asp to a new UNLINKED aspath.
726 * On dst either path_get() or path_prep() had to be called before.
727 */
728 struct rde_aspath *
path_copy(struct rde_aspath * dst,const struct rde_aspath * src)729 path_copy(struct rde_aspath *dst, const struct rde_aspath *src)
730 {
731 dst->aspath = aspath_copy(src->aspath);
732 dst->refcnt = 0;
733 dst->flags = src->flags & ~F_ATTR_LINKED;
734
735 dst->med = src->med;
736 dst->lpref = src->lpref;
737 dst->weight = src->weight;
738 dst->rtlabelid = rtlabel_ref(src->rtlabelid);
739 dst->pftableid = pftable_ref(src->pftableid);
740 dst->origin = src->origin;
741
742 attr_copy(dst, src);
743
744 return (dst);
745 }
746
747 /* initialize or prepare an aspath for use */
748 struct rde_aspath *
path_prep(struct rde_aspath * asp)749 path_prep(struct rde_aspath *asp)
750 {
751 memset(asp, 0, sizeof(*asp));
752 asp->origin = ORIGIN_INCOMPLETE;
753 asp->lpref = DEFAULT_LPREF;
754
755 return (asp);
756 }
757
758 /* alloc and initialize new entry. May not fail. */
759 struct rde_aspath *
path_get(void)760 path_get(void)
761 {
762 struct rde_aspath *asp;
763
764 asp = malloc(sizeof(*asp));
765 if (asp == NULL)
766 fatal("path_get");
767 rdemem.path_cnt++;
768
769 return (path_prep(asp));
770 }
771
772 /* clean up an asp after use (frees all references to sub-objects) */
773 void
path_clean(struct rde_aspath * asp)774 path_clean(struct rde_aspath *asp)
775 {
776 if (asp->flags & F_ATTR_LINKED)
777 fatalx("%s: linked object", __func__);
778
779 rtlabel_unref(asp->rtlabelid);
780 pftable_unref(asp->pftableid);
781 aspath_put(asp->aspath);
782 asp->aspath = NULL;
783 attr_freeall(asp);
784 }
785
786 /* free an unlinked element */
787 void
path_put(struct rde_aspath * asp)788 path_put(struct rde_aspath *asp)
789 {
790 if (asp == NULL)
791 return;
792
793 path_clean(asp);
794
795 rdemem.path_cnt--;
796 free(asp);
797 }
798
799 /* prefix specific functions */
800
801 static int prefix_add(struct bgpd_addr *, int, struct rib *,
802 struct rde_peer *, uint32_t, uint32_t, struct rde_aspath *,
803 struct rde_community *, struct nexthop *,
804 uint8_t, uint8_t, int);
805 static int prefix_move(struct prefix *, struct rde_peer *,
806 struct rde_aspath *, struct rde_community *,
807 struct nexthop *, uint8_t, uint8_t, int);
808
809 static void prefix_link(struct prefix *, struct rib_entry *,
810 struct pt_entry *, struct rde_peer *, uint32_t, uint32_t,
811 struct rde_aspath *, struct rde_community *,
812 struct nexthop *, uint8_t, uint8_t);
813 static void prefix_unlink(struct prefix *);
814
815 static struct prefix *prefix_alloc(void);
816 static void prefix_free(struct prefix *);
817
818 /* RB tree comparison function */
819 static inline int
prefix_index_cmp(struct prefix * a,struct prefix * b)820 prefix_index_cmp(struct prefix *a, struct prefix *b)
821 {
822 int r;
823 r = pt_prefix_cmp(a->pt, b->pt);
824 if (r != 0)
825 return r;
826
827 if (a->path_id_tx > b->path_id_tx)
828 return 1;
829 if (a->path_id_tx < b->path_id_tx)
830 return -1;
831 return 0;
832 }
833
834 static inline int
prefix_cmp(struct prefix * a,struct prefix * b)835 prefix_cmp(struct prefix *a, struct prefix *b)
836 {
837 if ((a->flags & PREFIX_FLAG_EOR) != (b->flags & PREFIX_FLAG_EOR))
838 return (a->flags & PREFIX_FLAG_EOR) ? 1 : -1;
839 /* if EOR marker no need to check the rest */
840 if (a->flags & PREFIX_FLAG_EOR)
841 return 0;
842
843 if (a->aspath != b->aspath)
844 return (a->aspath > b->aspath ? 1 : -1);
845 if (a->communities != b->communities)
846 return (a->communities > b->communities ? 1 : -1);
847 if (a->nexthop != b->nexthop)
848 return (a->nexthop > b->nexthop ? 1 : -1);
849 if (a->nhflags != b->nhflags)
850 return (a->nhflags > b->nhflags ? 1 : -1);
851 return prefix_index_cmp(a, b);
852 }
853
854 RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp)
855 RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp)
856
857 /*
858 * Search for specified prefix of a peer. Returns NULL if not found.
859 */
860 struct prefix *
prefix_get(struct rib * rib,struct rde_peer * peer,uint32_t path_id,struct bgpd_addr * prefix,int prefixlen)861 prefix_get(struct rib *rib, struct rde_peer *peer, uint32_t path_id,
862 struct bgpd_addr *prefix, int prefixlen)
863 {
864 struct rib_entry *re;
865
866 re = rib_get_addr(rib, prefix, prefixlen);
867 if (re == NULL)
868 return (NULL);
869 return (prefix_bypeer(re, peer, path_id));
870 }
871
872 /*
873 * Search for specified prefix in the peer prefix_index.
874 * Returns NULL if not found.
875 */
876 struct prefix *
prefix_adjout_get(struct rde_peer * peer,uint32_t path_id_tx,struct pt_entry * pte)877 prefix_adjout_get(struct rde_peer *peer, uint32_t path_id_tx,
878 struct pt_entry *pte)
879 {
880 struct prefix xp;
881
882 memset(&xp, 0, sizeof(xp));
883 xp.pt = pte;
884 xp.path_id_tx = path_id_tx;
885
886 return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
887 }
888
889 /*
890 * Lookup a prefix without considering path_id in the peer prefix_index.
891 * Returns NULL if not found.
892 */
893 struct prefix *
prefix_adjout_first(struct rde_peer * peer,struct pt_entry * pte)894 prefix_adjout_first(struct rde_peer *peer, struct pt_entry *pte)
895 {
896 struct prefix xp, *np;
897
898 memset(&xp, 0, sizeof(xp));
899 xp.pt = pte;
900
901 np = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
902 if (np == NULL || pt_prefix_cmp(np->pt, xp.pt) != 0)
903 return NULL;
904 return np;
905 }
906
907 /*
908 * Return next prefix after a lookup that is actually an update.
909 */
910 struct prefix *
prefix_adjout_next(struct rde_peer * peer,struct prefix * p)911 prefix_adjout_next(struct rde_peer *peer, struct prefix *p)
912 {
913 struct prefix *np;
914
915 np = RB_NEXT(prefix_index, &peer->adj_rib_out, p);
916 if (np == NULL || np->pt != p->pt)
917 return NULL;
918 return np;
919 }
920
921 /*
922 * Lookup addr/prefixlen in the peer prefix_index. Returns first match.
923 * Returns NULL if not found.
924 */
925 struct prefix *
prefix_adjout_lookup(struct rde_peer * peer,struct bgpd_addr * addr,int plen)926 prefix_adjout_lookup(struct rde_peer *peer, struct bgpd_addr *addr, int plen)
927 {
928 return prefix_adjout_first(peer, pt_fill(addr, plen));
929 }
930
931 /*
932 * Lookup addr in the peer prefix_index. Returns first match.
933 * Returns NULL if not found.
934 */
935 struct prefix *
prefix_adjout_match(struct rde_peer * peer,struct bgpd_addr * addr)936 prefix_adjout_match(struct rde_peer *peer, struct bgpd_addr *addr)
937 {
938 struct prefix *p;
939 int i;
940
941 switch (addr->aid) {
942 case AID_INET:
943 case AID_VPN_IPv4:
944 for (i = 32; i >= 0; i--) {
945 p = prefix_adjout_lookup(peer, addr, i);
946 if (p != NULL)
947 return p;
948 }
949 break;
950 case AID_INET6:
951 case AID_VPN_IPv6:
952 for (i = 128; i >= 0; i--) {
953 p = prefix_adjout_lookup(peer, addr, i);
954 if (p != NULL)
955 return p;
956 }
957 break;
958 default:
959 fatalx("%s: unknown af", __func__);
960 }
961 return NULL;
962 }
963
964 /*
965 * Update a prefix.
966 * Return 1 if prefix was newly added, 0 if it was just changed.
967 */
968 int
prefix_update(struct rib * rib,struct rde_peer * peer,uint32_t path_id,uint32_t path_id_tx,struct filterstate * state,int filtered,struct bgpd_addr * prefix,int prefixlen)969 prefix_update(struct rib *rib, struct rde_peer *peer, uint32_t path_id,
970 uint32_t path_id_tx, struct filterstate *state, int filtered,
971 struct bgpd_addr *prefix, int prefixlen)
972 {
973 struct rde_aspath *asp, *nasp = &state->aspath;
974 struct rde_community *comm, *ncomm = &state->communities;
975 struct prefix *p;
976
977 /*
978 * First try to find a prefix in the specified RIB.
979 */
980 if ((p = prefix_get(rib, peer, path_id, prefix, prefixlen)) != NULL) {
981 if (path_id_tx != p->path_id_tx)
982 fatalx("path_id mismatch");
983 if (prefix_nexthop(p) == state->nexthop &&
984 prefix_nhflags(p) == state->nhflags &&
985 communities_equal(ncomm, prefix_communities(p)) &&
986 path_compare(nasp, prefix_aspath(p)) == 0) {
987 /* no change, update last change */
988 p->lastchange = getmonotime();
989 p->validation_state = state->vstate;
990 if (filtered)
991 p->flags |= PREFIX_FLAG_FILTERED;
992 else
993 p->flags &= ~PREFIX_FLAG_FILTERED;
994 return (0);
995 }
996 }
997
998 /*
999 * Either the prefix does not exist or the path changed.
1000 * In both cases lookup the new aspath to make sure it is not
1001 * already in the RIB.
1002 */
1003 if ((asp = path_lookup(nasp)) == NULL) {
1004 /* Path not available, create and link a new one. */
1005 asp = path_copy(path_get(), nasp);
1006 path_link(asp);
1007 }
1008
1009 if ((comm = communities_lookup(ncomm)) == NULL) {
1010 /* Communities not available, create and link a new one. */
1011 comm = communities_link(ncomm);
1012 }
1013
1014 /* If the prefix was found move it else add it to the RIB. */
1015 if (p != NULL)
1016 return (prefix_move(p, peer, asp, comm, state->nexthop,
1017 state->nhflags, state->vstate, filtered));
1018 else
1019 return (prefix_add(prefix, prefixlen, rib, peer, path_id,
1020 path_id_tx, asp, comm, state->nexthop, state->nhflags,
1021 state->vstate, filtered));
1022 }
1023
1024 /*
1025 * Adds or updates a prefix.
1026 */
1027 static int
prefix_add(struct bgpd_addr * prefix,int prefixlen,struct rib * rib,struct rde_peer * peer,uint32_t path_id,uint32_t path_id_tx,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nexthop,uint8_t nhflags,uint8_t vstate,int filtered)1028 prefix_add(struct bgpd_addr *prefix, int prefixlen, struct rib *rib,
1029 struct rde_peer *peer, uint32_t path_id, uint32_t path_id_tx,
1030 struct rde_aspath *asp, struct rde_community *comm,
1031 struct nexthop *nexthop, uint8_t nhflags, uint8_t vstate, int filtered)
1032 {
1033 struct pt_entry *pte;
1034 struct prefix *p;
1035 struct rib_entry *re;
1036
1037 pte = pt_get(prefix, prefixlen);
1038 if (pte == NULL)
1039 pte = pt_add(prefix, prefixlen);
1040 re = rib_get(rib, pte);
1041 if (re == NULL)
1042 re = rib_add(rib, pte);
1043
1044 p = prefix_alloc();
1045 prefix_link(p, re, re->prefix, peer, path_id, path_id_tx, asp, comm,
1046 nexthop, nhflags, vstate);
1047
1048 if (filtered)
1049 p->flags |= PREFIX_FLAG_FILTERED;
1050
1051 /* add possible pftable reference form aspath */
1052 if (asp && asp->pftableid)
1053 rde_pftable_add(asp->pftableid, p);
1054 /* make route decision */
1055 prefix_evaluate(re, p, NULL);
1056 return (1);
1057 }
1058
1059 /*
1060 * Move the prefix to the specified as path, removes the old asp if needed.
1061 */
1062 static int
prefix_move(struct prefix * p,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nexthop,uint8_t nhflags,uint8_t vstate,int filtered)1063 prefix_move(struct prefix *p, struct rde_peer *peer,
1064 struct rde_aspath *asp, struct rde_community *comm,
1065 struct nexthop *nexthop, uint8_t nhflags, uint8_t vstate, int filtered)
1066 {
1067 struct prefix *np;
1068
1069 if (p->flags & PREFIX_FLAG_ADJOUT)
1070 fatalx("%s: prefix with PREFIX_FLAG_ADJOUT hit", __func__);
1071
1072 if (peer != prefix_peer(p))
1073 fatalx("prefix_move: cross peer move");
1074
1075 /* add new prefix node */
1076 np = prefix_alloc();
1077 prefix_link(np, prefix_re(p), p->pt, peer, p->path_id, p->path_id_tx,
1078 asp, comm, nexthop, nhflags, vstate);
1079
1080 if (filtered)
1081 np->flags |= PREFIX_FLAG_FILTERED;
1082
1083 /* add possible pftable reference from new aspath */
1084 if (asp && asp->pftableid)
1085 rde_pftable_add(asp->pftableid, np);
1086
1087 /*
1088 * no need to update the peer prefix count because we are only moving
1089 * the prefix without changing the peer.
1090 */
1091 prefix_evaluate(prefix_re(np), np, p);
1092
1093 /* remove possible pftable reference from old path */
1094 if (p->aspath && p->aspath->pftableid)
1095 rde_pftable_del(p->aspath->pftableid, p);
1096
1097 /* remove old prefix node */
1098 prefix_unlink(p);
1099 prefix_free(p);
1100
1101 return (0);
1102 }
1103
1104 /*
1105 * Removes a prefix from the specified RIB. If the parent objects -- rib_entry
1106 * or pt_entry -- become empty remove them too.
1107 */
1108 int
prefix_withdraw(struct rib * rib,struct rde_peer * peer,uint32_t path_id,struct bgpd_addr * prefix,int prefixlen)1109 prefix_withdraw(struct rib *rib, struct rde_peer *peer, uint32_t path_id,
1110 struct bgpd_addr *prefix, int prefixlen)
1111 {
1112 struct prefix *p;
1113 struct rde_aspath *asp;
1114
1115 p = prefix_get(rib, peer, path_id, prefix, prefixlen);
1116 if (p == NULL) /* Got a dummy withdrawn request. */
1117 return (0);
1118
1119 if (p->flags & PREFIX_FLAG_ADJOUT)
1120 fatalx("%s: prefix with PREFIX_FLAG_ADJOUT hit", __func__);
1121 asp = prefix_aspath(p);
1122 if (asp && asp->pftableid)
1123 /* only prefixes in the local RIB were pushed into pf */
1124 rde_pftable_del(asp->pftableid, p);
1125
1126 prefix_destroy(p);
1127
1128 return (1);
1129 }
1130
1131 /*
1132 * Special functions for flowspec until full integration is available.
1133 * This just directly feeds the prefixes into the Adj-RIB-Out bypassing
1134 * Adj-RIB-In and Loc-RIB for now.
1135 */
1136 int
prefix_flowspec_update(struct rde_peer * peer,struct filterstate * state,struct pt_entry * pte,uint32_t path_id_tx)1137 prefix_flowspec_update(struct rde_peer *peer, struct filterstate *state,
1138 struct pt_entry *pte, uint32_t path_id_tx)
1139 {
1140 struct rde_aspath *asp, *nasp;
1141 struct rde_community *comm, *ncomm;
1142 struct rib_entry *re;
1143 struct prefix *new, *old;
1144
1145 re = rib_get(&flowrib, pte);
1146 if (re == NULL)
1147 re = rib_add(&flowrib, pte);
1148
1149 old = prefix_bypeer(re, peer, 0);
1150 new = prefix_alloc();
1151
1152 nasp = &state->aspath;
1153 ncomm = &state->communities;
1154 if ((asp = path_lookup(nasp)) == NULL) {
1155 /* Path not available, create and link a new one. */
1156 asp = path_copy(path_get(), nasp);
1157 path_link(asp);
1158 }
1159 if ((comm = communities_lookup(ncomm)) == NULL) {
1160 /* Communities not available, create and link a new one. */
1161 comm = communities_link(ncomm);
1162 }
1163
1164 prefix_link(new, re, re->prefix, peer, 0, path_id_tx, asp, comm,
1165 NULL, 0, 0);
1166 TAILQ_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
1167
1168 rde_generate_updates(re, new, old, EVAL_DEFAULT);
1169
1170 if (old != NULL) {
1171 TAILQ_REMOVE(&re->prefix_h, old, entry.list.rib);
1172 prefix_unlink(old);
1173 prefix_free(old);
1174 return 0;
1175 }
1176 return 1;
1177 }
1178
1179 /*
1180 * Remove a possible flowspec prefix from all Adj-RIB-Outs.
1181 */
1182 int
prefix_flowspec_withdraw(struct rde_peer * peer,struct pt_entry * pte)1183 prefix_flowspec_withdraw(struct rde_peer *peer, struct pt_entry *pte)
1184 {
1185 struct rib_entry *re;
1186 struct prefix *p;
1187
1188 re = rib_get(&flowrib, pte);
1189 if (re == NULL)
1190 return 0;
1191 p = prefix_bypeer(re, peer, 0);
1192 if (p == NULL)
1193 return 0;
1194 rde_generate_updates(re, NULL, p, EVAL_DEFAULT);
1195 TAILQ_REMOVE(&re->prefix_h, p, entry.list.rib);
1196 prefix_unlink(p);
1197 prefix_free(p);
1198 return 1;
1199 }
1200
1201 /*
1202 * Push all flowspec rules into a newly available Adj-RIB-Out.
1203 */
1204 void
prefix_flowspec_dump(uint8_t aid,void * arg,void (* call)(struct rib_entry *,void *),void (* done)(void *,uint8_t))1205 prefix_flowspec_dump(uint8_t aid, void *arg,
1206 void (*call)(struct rib_entry *, void *), void (*done)(void *, uint8_t))
1207 {
1208 struct rib_entry *re, *next;
1209
1210 RB_FOREACH_SAFE(re, rib_tree, rib_tree(&flowrib), next) {
1211 if (aid != AID_UNSPEC && aid != re->prefix->aid)
1212 continue;
1213 call(re, arg);
1214 }
1215 if (done != NULL)
1216 done(arg, aid);
1217 }
1218
1219 /*
1220 * Insert an End-of-RIB marker into the update queue.
1221 */
1222 void
prefix_add_eor(struct rde_peer * peer,uint8_t aid)1223 prefix_add_eor(struct rde_peer *peer, uint8_t aid)
1224 {
1225 struct prefix *p;
1226
1227 p = prefix_alloc();
1228 p->flags = PREFIX_FLAG_ADJOUT | PREFIX_FLAG_UPDATE | PREFIX_FLAG_EOR;
1229 if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
1230 /* no need to add if EoR marker already present */
1231 prefix_free(p);
1232 /* EOR marker is not inserted into the adj_rib_out index */
1233 }
1234
1235 /*
1236 * Put a prefix from the Adj-RIB-Out onto the update queue.
1237 */
1238 void
prefix_adjout_update(struct prefix * p,struct rde_peer * peer,struct filterstate * state,struct pt_entry * pte,uint32_t path_id_tx)1239 prefix_adjout_update(struct prefix *p, struct rde_peer *peer,
1240 struct filterstate *state, struct pt_entry *pte, uint32_t path_id_tx)
1241 {
1242 struct rde_aspath *asp;
1243 struct rde_community *comm;
1244
1245 if (p == NULL) {
1246 p = prefix_alloc();
1247 /* initially mark DEAD so code below is skipped */
1248 p->flags |= PREFIX_FLAG_ADJOUT | PREFIX_FLAG_DEAD;
1249
1250 p->pt = pt_ref(pte);
1251 p->peer = peer;
1252 p->path_id_tx = path_id_tx;
1253
1254 if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
1255 fatalx("%s: RB index invariant violated", __func__);
1256 }
1257
1258 if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
1259 fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
1260 if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) == 0) {
1261 /*
1262 * XXX for now treat a different path_id_tx like different
1263 * attributes and force out an update. It is unclear how
1264 * common it is to have equivalent updates from alternative
1265 * paths.
1266 */
1267 if (p->path_id_tx == path_id_tx &&
1268 prefix_nhflags(p) == state->nhflags &&
1269 prefix_nexthop(p) == state->nexthop &&
1270 communities_equal(&state->communities,
1271 prefix_communities(p)) &&
1272 path_compare(&state->aspath, prefix_aspath(p)) == 0) {
1273 /* nothing changed */
1274 p->validation_state = state->vstate;
1275 p->lastchange = getmonotime();
1276 p->flags &= ~PREFIX_FLAG_STALE;
1277 return;
1278 }
1279
1280 /* if pending update unhook it before it is unlinked */
1281 if (p->flags & PREFIX_FLAG_UPDATE) {
1282 RB_REMOVE(prefix_tree, &peer->updates[pte->aid], p);
1283 peer->stats.pending_update--;
1284 }
1285
1286 /* unlink prefix so it can be relinked below */
1287 prefix_unlink(p);
1288 peer->stats.prefix_out_cnt--;
1289 }
1290 if (p->flags & PREFIX_FLAG_WITHDRAW) {
1291 RB_REMOVE(prefix_tree, &peer->withdraws[pte->aid], p);
1292 peer->stats.pending_withdraw--;
1293 }
1294
1295 /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
1296 p->flags &= ~PREFIX_FLAG_MASK;
1297
1298 /* update path_id_tx now that the prefix is unlinked */
1299 if (p->path_id_tx != path_id_tx) {
1300 /* path_id_tx is part of the index so remove and re-insert p */
1301 RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
1302 p->path_id_tx = path_id_tx;
1303 if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
1304 fatalx("%s: RB index invariant violated", __func__);
1305 }
1306
1307 if ((asp = path_lookup(&state->aspath)) == NULL) {
1308 /* Path not available, create and link a new one. */
1309 asp = path_copy(path_get(), &state->aspath);
1310 path_link(asp);
1311 }
1312
1313 if ((comm = communities_lookup(&state->communities)) == NULL) {
1314 /* Communities not available, create and link a new one. */
1315 comm = communities_link(&state->communities);
1316 }
1317
1318 prefix_link(p, NULL, p->pt, peer, 0, p->path_id_tx, asp, comm,
1319 state->nexthop, state->nhflags, state->vstate);
1320 peer->stats.prefix_out_cnt++;
1321
1322 if (p->flags & PREFIX_FLAG_MASK)
1323 fatalx("%s: bad flags %x", __func__, p->flags);
1324 p->flags |= PREFIX_FLAG_UPDATE;
1325 if (RB_INSERT(prefix_tree, &peer->updates[pte->aid], p) != NULL)
1326 fatalx("%s: RB tree invariant violated", __func__);
1327 peer->stats.pending_update++;
1328 }
1329
1330 /*
1331 * Withdraw a prefix from the Adj-RIB-Out, this unlinks the aspath but leaves
1332 * the prefix in the RIB linked to the peer withdraw list.
1333 */
1334 void
prefix_adjout_withdraw(struct prefix * p)1335 prefix_adjout_withdraw(struct prefix *p)
1336 {
1337 struct rde_peer *peer = prefix_peer(p);
1338
1339 if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
1340 fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
1341
1342 /* already a withdraw, shortcut */
1343 if (p->flags & PREFIX_FLAG_WITHDRAW) {
1344 p->lastchange = getmonotime();
1345 p->flags &= ~PREFIX_FLAG_STALE;
1346 return;
1347 }
1348 /* pending update just got withdrawn */
1349 if (p->flags & PREFIX_FLAG_UPDATE) {
1350 RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
1351 peer->stats.pending_update--;
1352 }
1353 /* unlink prefix if it was linked (not a withdraw or dead) */
1354 if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) == 0) {
1355 prefix_unlink(p);
1356 peer->stats.prefix_out_cnt--;
1357 }
1358
1359 /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
1360 p->flags &= ~PREFIX_FLAG_MASK;
1361 p->lastchange = getmonotime();
1362
1363 p->flags |= PREFIX_FLAG_WITHDRAW;
1364 if (RB_INSERT(prefix_tree, &peer->withdraws[p->pt->aid], p) != NULL)
1365 fatalx("%s: RB tree invariant violated", __func__);
1366 peer->stats.pending_withdraw++;
1367 }
1368
1369 void
prefix_adjout_destroy(struct prefix * p)1370 prefix_adjout_destroy(struct prefix *p)
1371 {
1372 struct rde_peer *peer = prefix_peer(p);
1373
1374 if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
1375 fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
1376
1377 if (p->flags & PREFIX_FLAG_EOR) {
1378 /* EOR marker is not linked in the index */
1379 prefix_free(p);
1380 return;
1381 }
1382
1383 if (p->flags & PREFIX_FLAG_WITHDRAW) {
1384 RB_REMOVE(prefix_tree, &peer->withdraws[p->pt->aid], p);
1385 peer->stats.pending_withdraw--;
1386 }
1387 if (p->flags & PREFIX_FLAG_UPDATE) {
1388 RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
1389 peer->stats.pending_update--;
1390 }
1391 /* unlink prefix if it was linked (not a withdraw or dead) */
1392 if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) == 0) {
1393 prefix_unlink(p);
1394 peer->stats.prefix_out_cnt--;
1395 }
1396
1397 /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
1398 p->flags &= ~PREFIX_FLAG_MASK;
1399
1400 if (prefix_is_locked(p)) {
1401 /* mark prefix dead but leave it for prefix_restart */
1402 p->flags |= PREFIX_FLAG_DEAD;
1403 } else {
1404 RB_REMOVE(prefix_index, &peer->adj_rib_out, p);
1405 /* remove the last prefix reference before free */
1406 pt_unref(p->pt);
1407 prefix_free(p);
1408 }
1409 }
1410
1411 static struct prefix *
prefix_restart(struct rib_context * ctx)1412 prefix_restart(struct rib_context *ctx)
1413 {
1414 struct prefix *p = NULL;
1415
1416 if (ctx->ctx_p)
1417 p = prefix_unlock(ctx->ctx_p);
1418
1419 if (p && prefix_is_dead(p)) {
1420 struct prefix *next;
1421
1422 next = RB_NEXT(prefix_index, unused, p);
1423 prefix_adjout_destroy(p);
1424 p = next;
1425 }
1426 ctx->ctx_p = NULL;
1427 return p;
1428 }
1429
1430 static void
prefix_dump_r(struct rib_context * ctx)1431 prefix_dump_r(struct rib_context *ctx)
1432 {
1433 struct prefix *p, *next;
1434 struct rde_peer *peer;
1435 unsigned int i;
1436
1437 if ((peer = peer_get(ctx->ctx_id)) == NULL)
1438 goto done;
1439
1440 if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC)
1441 p = RB_MIN(prefix_index, &peer->adj_rib_out);
1442 else
1443 p = prefix_restart(ctx);
1444
1445 for (i = 0; p != NULL; p = next) {
1446 next = RB_NEXT(prefix_index, unused, p);
1447 if (prefix_is_dead(p))
1448 continue;
1449 if (ctx->ctx_aid != AID_UNSPEC &&
1450 ctx->ctx_aid != p->pt->aid)
1451 continue;
1452 if (ctx->ctx_subtree.aid != AID_UNSPEC) {
1453 struct bgpd_addr addr;
1454 pt_getaddr(p->pt, &addr);
1455 if (prefix_compare(&ctx->ctx_subtree, &addr,
1456 ctx->ctx_subtreelen) != 0)
1457 /* left subtree, walk is done */
1458 break;
1459 }
1460 if (ctx->ctx_count && i++ >= ctx->ctx_count &&
1461 !prefix_is_locked(p)) {
1462 /* store and lock last element */
1463 ctx->ctx_p = prefix_lock(p);
1464 return;
1465 }
1466 ctx->ctx_prefix_call(p, ctx->ctx_arg);
1467 }
1468
1469 done:
1470 if (ctx->ctx_done)
1471 ctx->ctx_done(ctx->ctx_arg, ctx->ctx_aid);
1472 LIST_REMOVE(ctx, entry);
1473 free(ctx);
1474 }
1475
1476 int
prefix_dump_new(struct rde_peer * peer,uint8_t aid,unsigned int count,void * arg,void (* upcall)(struct prefix *,void *),void (* done)(void *,uint8_t),int (* throttle)(void *))1477 prefix_dump_new(struct rde_peer *peer, uint8_t aid, unsigned int count,
1478 void *arg, void (*upcall)(struct prefix *, void *),
1479 void (*done)(void *, uint8_t), int (*throttle)(void *))
1480 {
1481 struct rib_context *ctx;
1482
1483 if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
1484 return -1;
1485 ctx->ctx_id = peer->conf.id;
1486 ctx->ctx_aid = aid;
1487 ctx->ctx_count = count;
1488 ctx->ctx_arg = arg;
1489 ctx->ctx_prefix_call = upcall;
1490 ctx->ctx_done = done;
1491 ctx->ctx_throttle = throttle;
1492
1493 LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
1494
1495 /* requested a sync traversal */
1496 if (count == 0)
1497 prefix_dump_r(ctx);
1498
1499 return 0;
1500 }
1501
1502 int
prefix_dump_subtree(struct rde_peer * peer,struct bgpd_addr * subtree,uint8_t subtreelen,unsigned int count,void * arg,void (* upcall)(struct prefix *,void *),void (* done)(void *,uint8_t),int (* throttle)(void *))1503 prefix_dump_subtree(struct rde_peer *peer, struct bgpd_addr *subtree,
1504 uint8_t subtreelen, unsigned int count, void *arg,
1505 void (*upcall)(struct prefix *, void *), void (*done)(void *, uint8_t),
1506 int (*throttle)(void *))
1507 {
1508 struct rib_context *ctx;
1509 struct prefix xp;
1510
1511 if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
1512 return -1;
1513 ctx->ctx_id = peer->conf.id;
1514 ctx->ctx_aid = subtree->aid;
1515 ctx->ctx_count = count;
1516 ctx->ctx_arg = arg;
1517 ctx->ctx_prefix_call = upcall;
1518 ctx->ctx_done = done;
1519 ctx->ctx_throttle = throttle;
1520 ctx->ctx_subtree = *subtree;
1521 ctx->ctx_subtreelen = subtreelen;
1522
1523 LIST_INSERT_HEAD(&rib_dumps, ctx, entry);
1524
1525 /* lookup start of subtree */
1526 memset(&xp, 0, sizeof(xp));
1527 xp.pt = pt_fill(subtree, subtreelen);
1528 ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
1529 if (ctx->ctx_p)
1530 prefix_lock(ctx->ctx_p);
1531
1532 /* requested a sync traversal */
1533 if (count == 0)
1534 prefix_dump_r(ctx);
1535
1536 return 0;
1537 }
1538
1539 /*
1540 * Searches in the prefix list of specified rib_entry for a prefix entry
1541 * belonging to the peer peer. Returns NULL if no match found.
1542 */
1543 struct prefix *
prefix_bypeer(struct rib_entry * re,struct rde_peer * peer,uint32_t path_id)1544 prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, uint32_t path_id)
1545 {
1546 struct prefix *p;
1547
1548 TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib)
1549 if (prefix_peer(p) == peer && p->path_id == path_id)
1550 return (p);
1551 return (NULL);
1552 }
1553
1554 /* kill a prefix. */
1555 void
prefix_destroy(struct prefix * p)1556 prefix_destroy(struct prefix *p)
1557 {
1558 /* make route decision */
1559 prefix_evaluate(prefix_re(p), NULL, p);
1560
1561 prefix_unlink(p);
1562 prefix_free(p);
1563 }
1564
1565 /*
1566 * Link a prefix into the different parent objects.
1567 */
1568 static void
prefix_link(struct prefix * p,struct rib_entry * re,struct pt_entry * pt,struct rde_peer * peer,uint32_t path_id,uint32_t path_id_tx,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nexthop,uint8_t nhflags,uint8_t vstate)1569 prefix_link(struct prefix *p, struct rib_entry *re, struct pt_entry *pt,
1570 struct rde_peer *peer, uint32_t path_id, uint32_t path_id_tx,
1571 struct rde_aspath *asp, struct rde_community *comm,
1572 struct nexthop *nexthop, uint8_t nhflags, uint8_t vstate)
1573 {
1574 if (re)
1575 p->entry.list.re = re;
1576 p->aspath = path_ref(asp);
1577 p->communities = communities_ref(comm);
1578 p->peer = peer;
1579 p->pt = pt_ref(pt);
1580 p->path_id = path_id;
1581 p->path_id_tx = path_id_tx;
1582 p->validation_state = vstate;
1583 p->nhflags = nhflags;
1584 p->nexthop = nexthop_ref(nexthop);
1585 nexthop_link(p);
1586 p->lastchange = getmonotime();
1587 }
1588
1589 /*
1590 * Unlink a prefix from the different parent objects.
1591 */
1592 static void
prefix_unlink(struct prefix * p)1593 prefix_unlink(struct prefix *p)
1594 {
1595 struct rib_entry *re = prefix_re(p);
1596
1597 /* destroy all references to other objects */
1598 /* remove nexthop ref ... */
1599 nexthop_unlink(p);
1600 nexthop_unref(p->nexthop);
1601 p->nexthop = NULL;
1602 p->nhflags = 0;
1603 /* ... communities ... */
1604 communities_unref(p->communities);
1605 p->communities = NULL;
1606 /* and unlink from aspath */
1607 path_unref(p->aspath);
1608 p->aspath = NULL;
1609
1610 if (re && rib_empty(re))
1611 rib_remove(re);
1612
1613 pt_unref(p->pt);
1614 }
1615
1616 /* alloc and zero new entry. May not fail. */
1617 static struct prefix *
prefix_alloc(void)1618 prefix_alloc(void)
1619 {
1620 struct prefix *p;
1621
1622 p = calloc(1, sizeof(*p));
1623 if (p == NULL)
1624 fatal("prefix_alloc");
1625 rdemem.prefix_cnt++;
1626 return p;
1627 }
1628
1629 /* free a unlinked entry */
1630 static void
prefix_free(struct prefix * p)1631 prefix_free(struct prefix *p)
1632 {
1633 rdemem.prefix_cnt--;
1634 free(p);
1635 }
1636
1637 /*
1638 * nexthop functions
1639 */
1640 struct nexthop *nexthop_lookup(struct bgpd_addr *);
1641
1642 /*
1643 * In BGP there exist two nexthops: the exit nexthop which was announced via
1644 * BGP and the true nexthop which is used in the FIB -- forward information
1645 * base a.k.a kernel routing table. When sending updates it is even more
1646 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors
1647 * while in EBGP normally the address of the router is sent. The exit nexthop
1648 * may be passed to the external neighbor if the neighbor and the exit nexthop
1649 * reside in the same subnet -- directly connected.
1650 */
1651
1652 TAILQ_HEAD(nexthop_queue, nexthop) nexthop_runners =
1653 TAILQ_HEAD_INITIALIZER(nexthop_runners);
1654
1655 RB_HEAD(nexthop_tree, nexthop) nexthoptable =
1656 RB_INITIALIZER(&nexthoptree);
1657
1658 static inline int nexthop_cmp(struct nexthop *, struct nexthop *);
1659
1660 RB_GENERATE_STATIC(nexthop_tree, nexthop, entry, nexthop_cmp);
1661
1662 void
nexthop_shutdown(void)1663 nexthop_shutdown(void)
1664 {
1665 struct nexthop *nh, *nnh;
1666
1667 RB_FOREACH_SAFE(nh, nexthop_tree, &nexthoptable, nnh) {
1668 nh->state = NEXTHOP_UNREACH;
1669 nexthop_unref(nh);
1670 }
1671
1672 RB_FOREACH(nh, nexthop_tree, &nexthoptable) {
1673 log_warnx("%s: nexthop %s refcnt %d", __func__,
1674 log_addr(&nh->exit_nexthop), nh->refcnt);
1675 }
1676 }
1677
1678 int
nexthop_pending(void)1679 nexthop_pending(void)
1680 {
1681 return !TAILQ_EMPTY(&nexthop_runners);
1682 }
1683
1684 void
nexthop_runner(void)1685 nexthop_runner(void)
1686 {
1687 struct nexthop *nh;
1688 struct prefix *p;
1689 uint32_t j;
1690
1691 nh = TAILQ_FIRST(&nexthop_runners);
1692 if (nh == NULL)
1693 return;
1694
1695 /* remove from runnner queue */
1696 TAILQ_REMOVE(&nexthop_runners, nh, runner_l);
1697
1698 p = nh->next_prefix;
1699 for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) {
1700 prefix_evaluate_nexthop(p, nh->state, nh->oldstate);
1701 p = LIST_NEXT(p, entry.list.nexthop);
1702 }
1703
1704 /* prep for next run, if not finished readd to tail of queue */
1705 nh->next_prefix = p;
1706 if (p != NULL)
1707 TAILQ_INSERT_TAIL(&nexthop_runners, nh, runner_l);
1708 else
1709 log_debug("nexthop %s update finished",
1710 log_addr(&nh->exit_nexthop));
1711 }
1712
1713 void
nexthop_update(struct kroute_nexthop * msg)1714 nexthop_update(struct kroute_nexthop *msg)
1715 {
1716 struct nexthop *nh;
1717
1718 nh = nexthop_lookup(&msg->nexthop);
1719 if (nh == NULL) {
1720 log_warnx("nexthop_update: non-existent nexthop %s",
1721 log_addr(&msg->nexthop));
1722 return;
1723 }
1724
1725 nh->oldstate = nh->state;
1726 if (msg->valid)
1727 nh->state = NEXTHOP_REACH;
1728 else
1729 nh->state = NEXTHOP_UNREACH;
1730
1731 if (nh->oldstate == NEXTHOP_LOOKUP)
1732 /* drop reference which was hold during the lookup */
1733 if (nexthop_unref(nh))
1734 return; /* nh lost last ref, no work left */
1735
1736 if (nh->next_prefix) {
1737 /*
1738 * If nexthop_runner() is not finished with this nexthop
1739 * then ensure that all prefixes are updated by setting
1740 * the oldstate to NEXTHOP_FLAPPED.
1741 */
1742 nh->oldstate = NEXTHOP_FLAPPED;
1743 TAILQ_REMOVE(&nexthop_runners, nh, runner_l);
1744 }
1745
1746 if (msg->connected)
1747 nh->flags |= NEXTHOP_CONNECTED;
1748
1749 nh->true_nexthop = msg->gateway;
1750 nh->nexthop_net = msg->net;
1751 nh->nexthop_netlen = msg->netlen;
1752
1753 nh->next_prefix = LIST_FIRST(&nh->prefix_h);
1754 if (nh->next_prefix != NULL) {
1755 TAILQ_INSERT_HEAD(&nexthop_runners, nh, runner_l);
1756 log_debug("nexthop %s update starting",
1757 log_addr(&nh->exit_nexthop));
1758 }
1759 }
1760
1761 void
nexthop_modify(struct nexthop * setnh,enum action_types type,uint8_t aid,struct nexthop ** nexthop,uint8_t * flags)1762 nexthop_modify(struct nexthop *setnh, enum action_types type, uint8_t aid,
1763 struct nexthop **nexthop, uint8_t *flags)
1764 {
1765 switch (type) {
1766 case ACTION_SET_NEXTHOP_REJECT:
1767 *flags = NEXTHOP_REJECT;
1768 break;
1769 case ACTION_SET_NEXTHOP_BLACKHOLE:
1770 *flags = NEXTHOP_BLACKHOLE;
1771 break;
1772 case ACTION_SET_NEXTHOP_NOMODIFY:
1773 *flags = NEXTHOP_NOMODIFY;
1774 break;
1775 case ACTION_SET_NEXTHOP_SELF:
1776 *flags = NEXTHOP_SELF;
1777 break;
1778 case ACTION_SET_NEXTHOP_REF:
1779 /*
1780 * it is possible that a prefix matches but has the wrong
1781 * address family for the set nexthop. In this case ignore it.
1782 */
1783 if (aid != setnh->exit_nexthop.aid)
1784 break;
1785 nexthop_unref(*nexthop);
1786 *nexthop = nexthop_ref(setnh);
1787 *flags = 0;
1788 break;
1789 default:
1790 break;
1791 }
1792 }
1793
1794 void
nexthop_link(struct prefix * p)1795 nexthop_link(struct prefix *p)
1796 {
1797 p->nhflags &= ~NEXTHOP_VALID;
1798
1799 if (p->flags & PREFIX_FLAG_ADJOUT) {
1800 /* All nexthops are valid in Adj-RIB-Out */
1801 p->nhflags |= NEXTHOP_VALID;
1802 return;
1803 }
1804
1805 /* no need to link prefixes in RIBs that have no decision process */
1806 if (re_rib(prefix_re(p))->flags & F_RIB_NOEVALUATE)
1807 return;
1808
1809 /* self-announce networks use nexthop NULL and are valid as well */
1810 if (p->nexthop == NULL || p->nexthop->state == NEXTHOP_REACH)
1811 p->nhflags |= NEXTHOP_VALID;
1812
1813 if (p->nexthop == NULL)
1814 return;
1815 p->flags |= PREFIX_NEXTHOP_LINKED;
1816 LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop);
1817 }
1818
1819 void
nexthop_unlink(struct prefix * p)1820 nexthop_unlink(struct prefix *p)
1821 {
1822 p->nhflags &= ~NEXTHOP_VALID;
1823
1824 if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0)
1825 return;
1826
1827 if (p == p->nexthop->next_prefix) {
1828 p->nexthop->next_prefix = LIST_NEXT(p, entry.list.nexthop);
1829 /* remove nexthop from list if no prefixes left to update */
1830 if (p->nexthop->next_prefix == NULL) {
1831 TAILQ_REMOVE(&nexthop_runners, p->nexthop, runner_l);
1832 log_debug("nexthop %s update finished",
1833 log_addr(&p->nexthop->exit_nexthop));
1834 }
1835 }
1836
1837 p->flags &= ~PREFIX_NEXTHOP_LINKED;
1838 LIST_REMOVE(p, entry.list.nexthop);
1839 }
1840
1841 struct nexthop *
nexthop_get(struct bgpd_addr * nexthop)1842 nexthop_get(struct bgpd_addr *nexthop)
1843 {
1844 struct nexthop *nh;
1845
1846 nh = nexthop_lookup(nexthop);
1847 if (nh == NULL) {
1848 nh = calloc(1, sizeof(*nh));
1849 if (nh == NULL)
1850 fatal("nexthop_get");
1851 rdemem.nexthop_cnt++;
1852
1853 LIST_INIT(&nh->prefix_h);
1854 nh->state = NEXTHOP_LOOKUP;
1855 nexthop_ref(nh); /* take reference for lookup */
1856 nh->exit_nexthop = *nexthop;
1857 if (RB_INSERT(nexthop_tree, &nexthoptable, nh) != NULL)
1858 fatalx("nexthop tree inconsistency");
1859
1860 rde_send_nexthop(&nh->exit_nexthop, 1);
1861 }
1862
1863 return nexthop_ref(nh);
1864 }
1865
1866 struct nexthop *
nexthop_ref(struct nexthop * nexthop)1867 nexthop_ref(struct nexthop *nexthop)
1868 {
1869 if (nexthop)
1870 nexthop->refcnt++;
1871 return (nexthop);
1872 }
1873
1874 int
nexthop_unref(struct nexthop * nh)1875 nexthop_unref(struct nexthop *nh)
1876 {
1877 if (nh == NULL)
1878 return (0);
1879 if (--nh->refcnt > 0)
1880 return (0);
1881
1882 /* sanity check */
1883 if (!LIST_EMPTY(&nh->prefix_h) || nh->state == NEXTHOP_LOOKUP)
1884 fatalx("%s: refcnt error", __func__);
1885
1886 /* is nexthop update running? impossible, that is a refcnt error */
1887 if (nh->next_prefix)
1888 fatalx("%s: next_prefix not NULL", __func__);
1889
1890 RB_REMOVE(nexthop_tree, &nexthoptable, nh);
1891 rde_send_nexthop(&nh->exit_nexthop, 0);
1892
1893 rdemem.nexthop_cnt--;
1894 free(nh);
1895 return (1);
1896 }
1897
1898 static inline int
nexthop_cmp(struct nexthop * na,struct nexthop * nb)1899 nexthop_cmp(struct nexthop *na, struct nexthop *nb)
1900 {
1901 struct bgpd_addr *a, *b;
1902
1903 if (na == nb)
1904 return (0);
1905 if (na == NULL)
1906 return (-1);
1907 if (nb == NULL)
1908 return (1);
1909
1910 a = &na->exit_nexthop;
1911 b = &nb->exit_nexthop;
1912
1913 if (a->aid != b->aid)
1914 return (a->aid - b->aid);
1915
1916 switch (a->aid) {
1917 case AID_INET:
1918 if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr))
1919 return (1);
1920 if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr))
1921 return (-1);
1922 return (0);
1923 case AID_INET6:
1924 return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr)));
1925 default:
1926 fatalx("nexthop_cmp: %s is unsupported", aid2str(a->aid));
1927 }
1928 return (-1);
1929 }
1930
1931 struct nexthop *
nexthop_lookup(struct bgpd_addr * nexthop)1932 nexthop_lookup(struct bgpd_addr *nexthop)
1933 {
1934 struct nexthop needle = { 0 };
1935
1936 needle.exit_nexthop = *nexthop;
1937 return RB_FIND(nexthop_tree, &nexthoptable, &needle);
1938 }
1939