1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/iface.h"
37 #include "lib/resource.h"
38 #include "lib/event.h"
39 #include "lib/timer.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "filter/data.h"
44 #include "lib/hash.h"
45 #include "lib/string.h"
46 #include "lib/alloca.h"
47
48 #ifdef CONFIG_BGP
49 #include "proto/bgp/bgp.h"
50 #endif
51
52 pool *rt_table_pool;
53
54 static slab *rte_slab;
55 static linpool *rte_update_pool;
56
57 list routing_tables;
58
59 static void rt_free_hostcache(rtable *tab);
60 static void rt_notify_hostcache(rtable *tab, net *net);
61 static void rt_update_hostcache(rtable *tab);
62 static void rt_next_hop_update(rtable *tab);
63 static inline void rt_prune_table(rtable *tab);
64 static inline void rt_schedule_notify(rtable *tab);
65
66
67 /* Like fib_route(), but skips empty net entries */
68 static inline void *
net_route_ip4(rtable * t,net_addr_ip4 * n)69 net_route_ip4(rtable *t, net_addr_ip4 *n)
70 {
71 net *r;
72
73 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
74 {
75 n->pxlen--;
76 ip4_clrbit(&n->prefix, n->pxlen);
77 }
78
79 return r;
80 }
81
82 static inline void *
net_route_ip6(rtable * t,net_addr_ip6 * n)83 net_route_ip6(rtable *t, net_addr_ip6 *n)
84 {
85 net *r;
86
87 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
88 {
89 n->pxlen--;
90 ip6_clrbit(&n->prefix, n->pxlen);
91 }
92
93 return r;
94 }
95
96 static inline void *
net_route_ip6_sadr(rtable * t,net_addr_ip6_sadr * n)97 net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
98 {
99 struct fib_node *fn;
100
101 while (1)
102 {
103 net *best = NULL;
104 int best_pxlen = 0;
105
106 /* We need to do dst first matching. Since sadr addresses are hashed on dst
107 prefix only, find the hash table chain and go through it to find the
108 match with the smallest matching src prefix. */
109 for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
110 {
111 net_addr_ip6_sadr *a = (void *) fn->addr;
112
113 if (net_equal_dst_ip6_sadr(n, a) &&
114 net_in_net_src_ip6_sadr(n, a) &&
115 (a->src_pxlen >= best_pxlen))
116 {
117 best = fib_node_to_user(&t->fib, fn);
118 best_pxlen = a->src_pxlen;
119 }
120 }
121
122 if (best)
123 return best;
124
125 if (!n->dst_pxlen)
126 break;
127
128 n->dst_pxlen--;
129 ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
130 }
131
132 return NULL;
133 }
134
135 void *
net_route(rtable * tab,const net_addr * n)136 net_route(rtable *tab, const net_addr *n)
137 {
138 ASSERT(tab->addr_type == n->type);
139
140 net_addr *n0 = alloca(n->length);
141 net_copy(n0, n);
142
143 switch (n->type)
144 {
145 case NET_IP4:
146 case NET_VPN4:
147 case NET_ROA4:
148 return net_route_ip4(tab, (net_addr_ip4 *) n0);
149
150 case NET_IP6:
151 case NET_VPN6:
152 case NET_ROA6:
153 return net_route_ip6(tab, (net_addr_ip6 *) n0);
154
155 case NET_IP6_SADR:
156 return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);
157
158 default:
159 return NULL;
160 }
161 }
162
163
164 static int
net_roa_check_ip4(rtable * tab,const net_addr_ip4 * px,u32 asn)165 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
166 {
167 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
168 struct fib_node *fn;
169 int anything = 0;
170
171 while (1)
172 {
173 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
174 {
175 net_addr_roa4 *roa = (void *) fn->addr;
176 net *r = fib_node_to_user(&tab->fib, fn);
177
178 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
179 {
180 anything = 1;
181 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
182 return ROA_VALID;
183 }
184 }
185
186 if (n.pxlen == 0)
187 break;
188
189 n.pxlen--;
190 ip4_clrbit(&n.prefix, n.pxlen);
191 }
192
193 return anything ? ROA_INVALID : ROA_UNKNOWN;
194 }
195
196 static int
net_roa_check_ip6(rtable * tab,const net_addr_ip6 * px,u32 asn)197 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
198 {
199 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
200 struct fib_node *fn;
201 int anything = 0;
202
203 while (1)
204 {
205 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
206 {
207 net_addr_roa6 *roa = (void *) fn->addr;
208 net *r = fib_node_to_user(&tab->fib, fn);
209
210 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
211 {
212 anything = 1;
213 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
214 return ROA_VALID;
215 }
216 }
217
218 if (n.pxlen == 0)
219 break;
220
221 n.pxlen--;
222 ip6_clrbit(&n.prefix, n.pxlen);
223 }
224
225 return anything ? ROA_INVALID : ROA_UNKNOWN;
226 }
227
228 /**
229 * roa_check - check validity of route origination in a ROA table
230 * @tab: ROA table
231 * @n: network prefix to check
232 * @asn: AS number of network prefix
233 *
234 * Implements RFC 6483 route validation for the given network prefix. The
235 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
236 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
237 * a candidate ROA with matching ASN and maxlen field greater than or equal to
238 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
239 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
240 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
241 * must have type NET_IP4 or NET_IP6, respectively.
242 */
243 int
net_roa_check(rtable * tab,const net_addr * n,u32 asn)244 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
245 {
246 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
247 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
248 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
249 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
250 else
251 return ROA_UNKNOWN; /* Should not happen */
252 }
253
254 /**
255 * rte_find - find a route
256 * @net: network node
257 * @src: route source
258 *
259 * The rte_find() function returns a route for destination @net
260 * which is from route source @src.
261 */
262 rte *
rte_find(net * net,struct rte_src * src)263 rte_find(net *net, struct rte_src *src)
264 {
265 rte *e = net->routes;
266
267 while (e && e->attrs->src != src)
268 e = e->next;
269 return e;
270 }
271
272 /**
273 * rte_get_temp - get a temporary &rte
274 * @a: attributes to assign to the new route (a &rta; in case it's
275 * un-cached, rte_update() will create a cached copy automatically)
276 *
277 * Create a temporary &rte and bind it with the attributes @a.
278 * Also set route preference to the default preference set for
279 * the protocol.
280 */
281 rte *
rte_get_temp(rta * a)282 rte_get_temp(rta *a)
283 {
284 rte *e = sl_alloc(rte_slab);
285
286 e->attrs = a;
287 e->id = 0;
288 e->flags = 0;
289 e->pref = 0;
290 return e;
291 }
292
293 rte *
rte_do_cow(rte * r)294 rte_do_cow(rte *r)
295 {
296 rte *e = sl_alloc(rte_slab);
297
298 memcpy(e, r, sizeof(rte));
299 e->attrs = rta_clone(r->attrs);
300 e->flags = 0;
301 return e;
302 }
303
304 /**
305 * rte_cow_rta - get a private writable copy of &rte with writable &rta
306 * @r: a route entry to be copied
307 * @lp: a linpool from which to allocate &rta
308 *
309 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
310 * modification. There are three possibilities: First, both &rte and &rta are
311 * private copies, in that case they are returned unchanged. Second, &rte is
312 * private copy, but &rta is cached, in that case &rta is duplicated using
313 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
314 * both structures are duplicated by rte_do_cow() and rta_do_cow().
315 *
316 * Note that in the second case, cached &rta loses one reference, while private
317 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
318 * nexthops, ...) with it. To work properly, original shared &rta should have
319 * another reference during the life of created private copy.
320 *
321 * Result: a pointer to the new writable &rte with writable &rta.
322 */
323 rte *
rte_cow_rta(rte * r,linpool * lp)324 rte_cow_rta(rte *r, linpool *lp)
325 {
326 if (!rta_is_cached(r->attrs))
327 return r;
328
329 r = rte_cow(r);
330 rta *a = rta_do_cow(r->attrs, lp);
331 rta_free(r->attrs);
332 r->attrs = a;
333 return r;
334 }
335
336
337 /**
338 * rte_init_tmp_attrs - initialize temporary ea_list for route
339 * @r: route entry to be modified
340 * @lp: linpool from which to allocate attributes
341 * @max: maximum number of added temporary attribus
342 *
343 * This function is supposed to be called from make_tmp_attrs() and
344 * store_tmp_attrs() hooks before rte_make_tmp_attr() / rte_store_tmp_attr()
345 * functions. It allocates &ea_list with length for @max items for temporary
346 * attributes and puts it on top of eattrs stack.
347 */
348 void
rte_init_tmp_attrs(rte * r,linpool * lp,uint max)349 rte_init_tmp_attrs(rte *r, linpool *lp, uint max)
350 {
351 struct ea_list *e = lp_alloc(lp, sizeof(struct ea_list) + max * sizeof(eattr));
352
353 e->next = r->attrs->eattrs;
354 e->flags = EALF_SORTED | EALF_TEMP;
355 e->count = 0;
356
357 r->attrs->eattrs = e;
358 }
359
360 /**
361 * rte_make_tmp_attr - make temporary eattr from private route fields
362 * @r: route entry to be modified
363 * @id: attribute ID
364 * @type: attribute type
365 * @val: attribute value (u32 or adata ptr)
366 *
367 * This function is supposed to be called from make_tmp_attrs() hook for
368 * each temporary attribute, after temporary &ea_list was initialized by
369 * rte_init_tmp_attrs(). It checks whether temporary attribute is supposed to
370 * be defined (based on route pflags) and if so then it fills &eattr field in
371 * preallocated temporary &ea_list on top of route @r eattrs stack.
372 *
373 * Note that it may require free &eattr in temporary &ea_list, so it must not be
374 * called more times than @max argument of rte_init_tmp_attrs().
375 */
376 void
rte_make_tmp_attr(rte * r,uint id,uint type,uintptr_t val)377 rte_make_tmp_attr(rte *r, uint id, uint type, uintptr_t val)
378 {
379 if (r->pflags & EA_ID_FLAG(id))
380 {
381 ea_list *e = r->attrs->eattrs;
382 eattr *a = &e->attrs[e->count++];
383 a->id = id;
384 a->type = type;
385 a->flags = 0;
386
387 if (type & EAF_EMBEDDED)
388 a->u.data = (u32) val;
389 else
390 a->u.ptr = (struct adata *) val;
391 }
392 }
393
394 /**
395 * rte_store_tmp_attr - store temporary eattr to private route fields
396 * @r: route entry to be modified
397 * @id: attribute ID
398 *
399 * This function is supposed to be called from store_tmp_attrs() hook for
400 * each temporary attribute, after temporary &ea_list was initialized by
401 * rte_init_tmp_attrs(). It checks whether temporary attribute is defined in
402 * route @r eattrs stack, updates route pflags accordingly, undefines it by
403 * filling &eattr field in preallocated temporary &ea_list on top of the eattrs
404 * stack, and returns the value. Caller is supposed to store it in the
405 * appropriate private field.
406 *
407 * Note that it may require free &eattr in temporary &ea_list, so it must not be
408 * called more times than @max argument of rte_init_tmp_attrs()
409 */
410 uintptr_t
rte_store_tmp_attr(rte * r,uint id)411 rte_store_tmp_attr(rte *r, uint id)
412 {
413 ea_list *e = r->attrs->eattrs;
414 eattr *a = ea_find(e->next, id);
415
416 if (a)
417 {
418 e->attrs[e->count++] = (struct eattr) { .id = id, .type = EAF_TYPE_UNDEF };
419 r->pflags |= EA_ID_FLAG(id);
420 return (a->type & EAF_EMBEDDED) ? a->u.data : (uintptr_t) a->u.ptr;
421 }
422 else
423 {
424 r->pflags &= ~EA_ID_FLAG(id);
425 return 0;
426 }
427 }
428
429 /**
430 * rte_make_tmp_attrs - prepare route by adding all relevant temporary route attributes
431 * @r: route entry to be modified (may be replaced if COW)
432 * @lp: linpool from which to allocate attributes
433 * @old_attrs: temporary ref to old &rta (may be NULL)
434 *
435 * This function expands privately stored protocol-dependent route attributes
436 * to a uniform &eattr / &ea_list representation. It is essentially a wrapper
437 * around protocol make_tmp_attrs() hook, which does some additional work like
438 * ensuring that route @r is writable.
439 *
440 * The route @r may be read-only (with %REF_COW flag), in that case rw copy is
441 * obtained by rte_cow() and @r is replaced. If @rte is originally rw, it may be
442 * directly modified (and it is never copied).
443 *
444 * If the @old_attrs ptr is supplied, the function obtains another reference of
445 * old cached &rta, that is necessary in some cases (see rte_cow_rta() for
446 * details). It is freed by rte_store_tmp_attrs(), or manually by rta_free().
447 *
448 * Generally, if caller ensures that @r is read-only (e.g. in route export) then
449 * it may ignore @old_attrs (and set it to NULL), but must handle replacement of
450 * @r. If caller ensures that @r is writable (e.g. in route import) then it may
451 * ignore replacement of @r, but it must handle @old_attrs.
452 */
453 void
rte_make_tmp_attrs(rte ** r,linpool * lp,rta ** old_attrs)454 rte_make_tmp_attrs(rte **r, linpool *lp, rta **old_attrs)
455 {
456 void (*make_tmp_attrs)(rte *r, linpool *lp);
457 make_tmp_attrs = (*r)->attrs->src->proto->make_tmp_attrs;
458
459 if (!make_tmp_attrs)
460 return;
461
462 /* We may need to keep ref to old attributes, will be freed in rte_store_tmp_attrs() */
463 if (old_attrs)
464 *old_attrs = rta_is_cached((*r)->attrs) ? rta_clone((*r)->attrs) : NULL;
465
466 *r = rte_cow_rta(*r, lp);
467 make_tmp_attrs(*r, lp);
468 }
469
470 /**
471 * rte_store_tmp_attrs - store temporary route attributes back to private route fields
472 * @r: route entry to be modified
473 * @lp: linpool from which to allocate attributes
474 * @old_attrs: temporary ref to old &rta
475 *
476 * This function stores temporary route attributes that were expanded by
477 * rte_make_tmp_attrs() back to private route fields and also undefines them.
478 * It is essentially a wrapper around protocol store_tmp_attrs() hook, which
479 * does some additional work like shortcut if there is no change and cleanup
480 * of @old_attrs reference obtained by rte_make_tmp_attrs().
481 */
482 static void
rte_store_tmp_attrs(rte * r,linpool * lp,rta * old_attrs)483 rte_store_tmp_attrs(rte *r, linpool *lp, rta *old_attrs)
484 {
485 void (*store_tmp_attrs)(rte *rt, linpool *lp);
486 store_tmp_attrs = r->attrs->src->proto->store_tmp_attrs;
487
488 if (!store_tmp_attrs)
489 return;
490
491 ASSERT(!rta_is_cached(r->attrs));
492
493 /* If there is no new ea_list, we just skip the temporary ea_list */
494 ea_list *ea = r->attrs->eattrs;
495 if (ea && (ea->flags & EALF_TEMP))
496 r->attrs->eattrs = ea->next;
497 else
498 store_tmp_attrs(r, lp);
499
500 /* Free ref we got in rte_make_tmp_attrs(), have to do rta_lookup() first */
501 r->attrs = rta_lookup(r->attrs);
502 rta_free(old_attrs);
503 }
504
505
506 static int /* Actually better or at least as good as */
rte_better(rte * new,rte * old)507 rte_better(rte *new, rte *old)
508 {
509 int (*better)(rte *, rte *);
510
511 if (!rte_is_valid(old))
512 return 1;
513 if (!rte_is_valid(new))
514 return 0;
515
516 if (new->pref > old->pref)
517 return 1;
518 if (new->pref < old->pref)
519 return 0;
520 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
521 {
522 /*
523 * If the user has configured protocol preferences, so that two different protocols
524 * have the same preference, try to break the tie by comparing addresses. Not too
525 * useful, but keeps the ordering of routes unambiguous.
526 */
527 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
528 }
529 if (better = new->attrs->src->proto->rte_better)
530 return better(new, old);
531 return 0;
532 }
533
534 static int
rte_mergable(rte * pri,rte * sec)535 rte_mergable(rte *pri, rte *sec)
536 {
537 int (*mergable)(rte *, rte *);
538
539 if (!rte_is_valid(pri) || !rte_is_valid(sec))
540 return 0;
541
542 if (pri->pref != sec->pref)
543 return 0;
544
545 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
546 return 0;
547
548 if (mergable = pri->attrs->src->proto->rte_mergable)
549 return mergable(pri, sec);
550
551 return 0;
552 }
553
554 static void
rte_trace(struct channel * c,rte * e,int dir,char * msg)555 rte_trace(struct channel *c, rte *e, int dir, char *msg)
556 {
557 log(L_TRACE "%s.%s %c %s %N %s",
558 c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr,
559 rta_dest_name(e->attrs->dest));
560 }
561
562 static inline void
rte_trace_in(uint flag,struct channel * c,rte * e,char * msg)563 rte_trace_in(uint flag, struct channel *c, rte *e, char *msg)
564 {
565 if ((c->debug & flag) || (c->proto->debug & flag))
566 rte_trace(c, e, '>', msg);
567 }
568
569 static inline void
rte_trace_out(uint flag,struct channel * c,rte * e,char * msg)570 rte_trace_out(uint flag, struct channel *c, rte *e, char *msg)
571 {
572 if ((c->debug & flag) || (c->proto->debug & flag))
573 rte_trace(c, e, '<', msg);
574 }
575
576 static rte *
export_filter_(struct channel * c,rte * rt0,rte ** rt_free,linpool * pool,int silent)577 export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
578 {
579 struct proto *p = c->proto;
580 const struct filter *filter = c->out_filter;
581 struct proto_stats *stats = &c->stats;
582 rte *rt;
583 int v;
584
585 rt = rt0;
586 *rt_free = NULL;
587
588 v = p->preexport ? p->preexport(p, &rt, pool) : 0;
589 if (v < 0)
590 {
591 if (silent)
592 goto reject;
593
594 stats->exp_updates_rejected++;
595 if (v == RIC_REJECT)
596 rte_trace_out(D_FILTERS, c, rt, "rejected by protocol");
597 goto reject;
598 }
599 if (v > 0)
600 {
601 if (!silent)
602 rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol");
603 goto accept;
604 }
605
606 rte_make_tmp_attrs(&rt, pool, NULL);
607
608 v = filter && ((filter == FILTER_REJECT) ||
609 (f_run(filter, &rt, pool,
610 (silent ? FF_SILENT : 0)) > F_ACCEPT));
611 if (v)
612 {
613 if (silent)
614 goto reject;
615
616 stats->exp_updates_filtered++;
617 rte_trace_out(D_FILTERS, c, rt, "filtered out");
618 goto reject;
619 }
620
621 accept:
622 if (rt != rt0)
623 *rt_free = rt;
624 return rt;
625
626 reject:
627 /* Discard temporary rte */
628 if (rt != rt0)
629 rte_free(rt);
630 return NULL;
631 }
632
633 static inline rte *
export_filter(struct channel * c,rte * rt0,rte ** rt_free,int silent)634 export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
635 {
636 return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
637 }
638
639 static void
do_rt_notify(struct channel * c,net * net,rte * new,rte * old,int refeed)640 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
641 {
642 struct proto *p = c->proto;
643 struct proto_stats *stats = &c->stats;
644
645 if (refeed && new)
646 c->refeed_count++;
647
648 /* Apply export limit */
649 struct channel_limit *l = &c->out_limit;
650 if (l->action && !old && new)
651 {
652 if (stats->exp_routes >= l->limit)
653 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
654
655 if (l->state == PLS_BLOCKED)
656 {
657 stats->exp_updates_rejected++;
658 rte_trace_out(D_FILTERS, c, new, "rejected [limit]");
659 return;
660 }
661 }
662
663 /* Apply export table */
664 if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed))
665 return;
666
667 if (new)
668 stats->exp_updates_accepted++;
669 else
670 stats->exp_withdraws_accepted++;
671
672 if (old)
673 {
674 bmap_clear(&c->export_map, old->id);
675 stats->exp_routes--;
676 }
677
678 if (new)
679 {
680 bmap_set(&c->export_map, new->id);
681 stats->exp_routes++;
682 }
683
684 if (p->debug & D_ROUTES)
685 {
686 if (new && old)
687 rte_trace_out(D_ROUTES, c, new, "replaced");
688 else if (new)
689 rte_trace_out(D_ROUTES, c, new, "added");
690 else if (old)
691 rte_trace_out(D_ROUTES, c, old, "removed");
692 }
693
694 p->rt_notify(p, c, net, new, old);
695 }
696
697 static void
rt_notify_basic(struct channel * c,net * net,rte * new,rte * old,int refeed)698 rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed)
699 {
700 // struct proto *p = c->proto;
701 rte *new_free = NULL;
702
703 if (new)
704 c->stats.exp_updates_received++;
705 else
706 c->stats.exp_withdraws_received++;
707
708 if (new)
709 new = export_filter(c, new, &new_free, 0);
710
711 if (old && !bmap_test(&c->export_map, old->id))
712 old = NULL;
713
714 if (!new && !old)
715 return;
716
717 do_rt_notify(c, net, new, old, refeed);
718
719 /* Discard temporary rte */
720 if (new_free)
721 rte_free(new_free);
722 }
723
724 static void
rt_notify_accepted(struct channel * c,net * net,rte * new_changed,rte * old_changed,int refeed)725 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed)
726 {
727 // struct proto *p = c->proto;
728 rte *new_best = NULL;
729 rte *old_best = NULL;
730 rte *new_free = NULL;
731 int new_first = 0;
732
733 /*
734 * We assume that there are no changes in net route order except (added)
735 * new_changed and (removed) old_changed. Therefore, the function is not
736 * compatible with deterministic_med (where nontrivial reordering can happen
737 * as a result of a route change) and with recomputation of recursive routes
738 * due to next hop update (where many routes can be changed in one step).
739 *
740 * Note that we need this assumption just for optimizations, we could just
741 * run full new_best recomputation otherwise.
742 *
743 * There are three cases:
744 * feed or old_best is old_changed -> we need to recompute new_best
745 * old_best is before new_changed -> new_best is old_best, ignore
746 * old_best is after new_changed -> try new_changed, otherwise old_best
747 */
748
749 if (net->routes)
750 c->stats.exp_updates_received++;
751 else
752 c->stats.exp_withdraws_received++;
753
754 /* Find old_best - either old_changed, or route for net->routes */
755 if (old_changed && bmap_test(&c->export_map, old_changed->id))
756 old_best = old_changed;
757 else
758 {
759 for (rte *r = net->routes; rte_is_valid(r); r = r->next)
760 {
761 if (bmap_test(&c->export_map, r->id))
762 {
763 old_best = r;
764 break;
765 }
766
767 /* Note if new_changed found before old_best */
768 if (r == new_changed)
769 new_first = 1;
770 }
771 }
772
773 /* Find new_best */
774 if ((new_changed == old_changed) || (old_best == old_changed))
775 {
776 /* Feed or old_best changed -> find first accepted by filters */
777 for (rte *r = net->routes; rte_is_valid(r); r = r->next)
778 if (new_best = export_filter(c, r, &new_free, 0))
779 break;
780 }
781 else
782 {
783 /* Other cases -> either new_changed, or old_best (and nothing changed) */
784 if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0)))
785 new_best = new_changed;
786 else
787 return;
788 }
789
790 if (!new_best && !old_best)
791 return;
792
793 do_rt_notify(c, net, new_best, old_best, refeed);
794
795 /* Discard temporary rte */
796 if (new_free)
797 rte_free(new_free);
798 }
799
800
801 static struct nexthop *
nexthop_merge_rta(struct nexthop * nhs,rta * a,linpool * pool,int max)802 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
803 {
804 return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
805 }
806
807 rte *
rt_export_merged(struct channel * c,net * net,rte ** rt_free,linpool * pool,int silent)808 rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
809 {
810 // struct proto *p = c->proto;
811 struct nexthop *nhs = NULL;
812 rte *best0, *best, *rt0, *rt, *tmp;
813
814 best0 = net->routes;
815 *rt_free = NULL;
816
817 if (!rte_is_valid(best0))
818 return NULL;
819
820 best = export_filter_(c, best0, rt_free, pool, silent);
821
822 if (!best || !rte_is_reachable(best))
823 return best;
824
825 for (rt0 = best0->next; rt0; rt0 = rt0->next)
826 {
827 if (!rte_mergable(best0, rt0))
828 continue;
829
830 rt = export_filter_(c, rt0, &tmp, pool, 1);
831
832 if (!rt)
833 continue;
834
835 if (rte_is_reachable(rt))
836 nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
837
838 if (tmp)
839 rte_free(tmp);
840 }
841
842 if (nhs)
843 {
844 nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
845
846 if (nhs->next)
847 {
848 best = rte_cow_rta(best, pool);
849 nexthop_link(best->attrs, nhs);
850 }
851 }
852
853 if (best != best0)
854 *rt_free = best;
855
856 return best;
857 }
858
859
860 static void
rt_notify_merged(struct channel * c,net * net,rte * new_changed,rte * old_changed,rte * new_best,rte * old_best,int refeed)861 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
862 rte *new_best, rte *old_best, int refeed)
863 {
864 // struct proto *p = c->proto;
865 rte *new_free = NULL;
866
867 /* We assume that all rte arguments are either NULL or rte_is_valid() */
868
869 /* This check should be done by the caller */
870 if (!new_best && !old_best)
871 return;
872
873 /* Check whether the change is relevant to the merged route */
874 if ((new_best == old_best) &&
875 (new_changed != old_changed) &&
876 !rte_mergable(new_best, new_changed) &&
877 !rte_mergable(old_best, old_changed))
878 return;
879
880 if (new_best)
881 c->stats.exp_updates_received++;
882 else
883 c->stats.exp_withdraws_received++;
884
885 /* Prepare new merged route */
886 if (new_best)
887 new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0);
888
889 /* Check old merged route */
890 if (old_best && !bmap_test(&c->export_map, old_best->id))
891 old_best = NULL;
892
893 if (!new_best && !old_best)
894 return;
895
896 do_rt_notify(c, net, new_best, old_best, refeed);
897
898 /* Discard temporary rte */
899 if (new_free)
900 rte_free(new_free);
901 }
902
903
904 /**
905 * rte_announce - announce a routing table change
906 * @tab: table the route has been added to
907 * @type: type of route announcement (RA_UNDEF or RA_ANY)
908 * @net: network in question
909 * @new: the new or changed route
910 * @old: the previous route replaced by the new one
911 * @new_best: the new best route for the same network
912 * @old_best: the previous best route for the same network
913 *
914 * This function gets a routing table update and announces it to all protocols
915 * that are connected to the same table by their channels.
916 *
917 * There are two ways of how routing table changes are announced. First, there
918 * is a change of just one route in @net (which may caused a change of the best
919 * route of the network). In this case @new and @old describes the changed route
920 * and @new_best and @old_best describes best routes. Other routes are not
921 * affected, but in sorted table the order of other routes might change.
922 *
923 * Second, There is a bulk change of multiple routes in @net, with shared best
924 * route selection. In such case separate route changes are described using
925 * @type of %RA_ANY, with @new and @old specifying the changed route, while
926 * @new_best and @old_best are NULL. After that, another notification is done
927 * where @new_best and @old_best are filled (may be the same), but @new and @old
928 * are NULL.
929 *
930 * The function announces the change to all associated channels. For each
931 * channel, an appropriate preprocessing is done according to channel &ra_mode.
932 * For example, %RA_OPTIMAL channels receive just changes of best routes.
933 *
934 * In general, we first call preexport() hook of a protocol, which performs
935 * basic checks on the route (each protocol has a right to veto or force accept
936 * of the route before any filter is asked). Then we consult an export filter
937 * of the channel and verify the old route in an export map of the channel.
938 * Finally, the rt_notify() hook of the protocol gets called.
939 *
940 * Note that there are also calls of rt_notify() hooks due to feed, but that is
941 * done outside of scope of rte_announce().
942 */
943 static void
rte_announce(rtable * tab,uint type,net * net,rte * new,rte * old,rte * new_best,rte * old_best)944 rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old,
945 rte *new_best, rte *old_best)
946 {
947 if (!rte_is_valid(new))
948 new = NULL;
949
950 if (!rte_is_valid(old))
951 old = NULL;
952
953 if (!rte_is_valid(new_best))
954 new_best = NULL;
955
956 if (!rte_is_valid(old_best))
957 old_best = NULL;
958
959 if (!new && !old && !new_best && !old_best)
960 return;
961
962 if (new_best != old_best)
963 {
964 if (new_best)
965 new_best->sender->stats.pref_routes++;
966 if (old_best)
967 old_best->sender->stats.pref_routes--;
968
969 if (tab->hostcache)
970 rt_notify_hostcache(tab, net);
971 }
972
973 rt_schedule_notify(tab);
974
975 struct channel *c; node *n;
976 WALK_LIST2(c, n, tab->channels, table_node)
977 {
978 if (c->export_state == ES_DOWN)
979 continue;
980
981 if (type && (type != c->ra_mode))
982 continue;
983
984 switch (c->ra_mode)
985 {
986 case RA_OPTIMAL:
987 if (new_best != old_best)
988 rt_notify_basic(c, net, new_best, old_best, 0);
989 break;
990
991 case RA_ANY:
992 if (new != old)
993 rt_notify_basic(c, net, new, old, 0);
994 break;
995
996 case RA_ACCEPTED:
997 rt_notify_accepted(c, net, new, old, 0);
998 break;
999
1000 case RA_MERGED:
1001 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
1002 break;
1003 }
1004 }
1005 }
1006
1007 static inline int
rte_validate(rte * e)1008 rte_validate(rte *e)
1009 {
1010 int c;
1011 net *n = e->net;
1012
1013 if (!net_validate(n->n.addr))
1014 {
1015 log(L_WARN "Ignoring bogus prefix %N received via %s",
1016 n->n.addr, e->sender->proto->name);
1017 return 0;
1018 }
1019
1020 /* FIXME: better handling different nettypes */
1021 c = !net_is_flow(n->n.addr) ?
1022 net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
1023 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1024 {
1025 log(L_WARN "Ignoring bogus route %N received via %s",
1026 n->n.addr, e->sender->proto->name);
1027 return 0;
1028 }
1029
1030 if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
1031 {
1032 log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
1033 n->n.addr, e->attrs->dest, e->sender->proto->name);
1034 return 0;
1035 }
1036
1037 if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
1038 {
1039 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
1040 n->n.addr, e->sender->proto->name);
1041 return 0;
1042 }
1043
1044 return 1;
1045 }
1046
1047 /**
1048 * rte_free - delete a &rte
1049 * @e: &rte to be deleted
1050 *
1051 * rte_free() deletes the given &rte from the routing table it's linked to.
1052 */
1053 void
rte_free(rte * e)1054 rte_free(rte *e)
1055 {
1056 if (rta_is_cached(e->attrs))
1057 rta_free(e->attrs);
1058 sl_free(rte_slab, e);
1059 }
1060
1061 static inline void
rte_free_quick(rte * e)1062 rte_free_quick(rte *e)
1063 {
1064 rta_free(e->attrs);
1065 sl_free(rte_slab, e);
1066 }
1067
1068 static int
rte_same(rte * x,rte * y)1069 rte_same(rte *x, rte *y)
1070 {
1071 /* rte.flags are not checked, as they are mostly internal to rtable */
1072 return
1073 x->attrs == y->attrs &&
1074 x->pflags == y->pflags &&
1075 x->pref == y->pref &&
1076 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) &&
1077 rte_is_filtered(x) == rte_is_filtered(y);
1078 }
1079
rte_is_ok(rte * e)1080 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
1081
1082 static void
rte_recalculate(struct channel * c,net * net,rte * new,struct rte_src * src)1083 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
1084 {
1085 struct proto *p = c->proto;
1086 struct rtable *table = c->table;
1087 struct proto_stats *stats = &c->stats;
1088 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1089 rte *before_old = NULL;
1090 rte *old_best = net->routes;
1091 rte *old = NULL;
1092 rte **k;
1093
1094 k = &net->routes; /* Find and remove original route from the same protocol */
1095 while (old = *k)
1096 {
1097 if (old->attrs->src == src)
1098 {
1099 /* If there is the same route in the routing table but from
1100 * a different sender, then there are two paths from the
1101 * source protocol to this routing table through transparent
1102 * pipes, which is not allowed.
1103 *
1104 * We log that and ignore the route. If it is withdraw, we
1105 * ignore it completely (there might be 'spurious withdraws',
1106 * see FIXME in do_rte_announce())
1107 */
1108 if (old->sender->proto != p)
1109 {
1110 if (new)
1111 {
1112 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1113 net->n.addr, table->name);
1114 rte_free_quick(new);
1115 }
1116 return;
1117 }
1118
1119 if (new && rte_same(old, new))
1120 {
1121 /* No changes, ignore the new route and refresh the old one */
1122
1123 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
1124
1125 if (!rte_is_filtered(new))
1126 {
1127 stats->imp_updates_ignored++;
1128 rte_trace_in(D_ROUTES, c, new, "ignored");
1129 }
1130
1131 rte_free_quick(new);
1132 return;
1133 }
1134 *k = old->next;
1135 table->rt_count--;
1136 break;
1137 }
1138 k = &old->next;
1139 before_old = old;
1140 }
1141
1142 /* Save the last accessed position */
1143 rte **pos = k;
1144
1145 if (!old)
1146 before_old = NULL;
1147
1148 if (!old && !new)
1149 {
1150 stats->imp_withdraws_ignored++;
1151 return;
1152 }
1153
1154 int new_ok = rte_is_ok(new);
1155 int old_ok = rte_is_ok(old);
1156
1157 struct channel_limit *l = &c->rx_limit;
1158 if (l->action && !old && new && !c->in_table)
1159 {
1160 u32 all_routes = stats->imp_routes + stats->filt_routes;
1161
1162 if (all_routes >= l->limit)
1163 channel_notify_limit(c, l, PLD_RX, all_routes);
1164
1165 if (l->state == PLS_BLOCKED)
1166 {
1167 /* In receive limit the situation is simple, old is NULL so
1168 we just free new and exit like nothing happened */
1169
1170 stats->imp_updates_ignored++;
1171 rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
1172 rte_free_quick(new);
1173 return;
1174 }
1175 }
1176
1177 l = &c->in_limit;
1178 if (l->action && !old_ok && new_ok)
1179 {
1180 if (stats->imp_routes >= l->limit)
1181 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1182
1183 if (l->state == PLS_BLOCKED)
1184 {
1185 /* In import limit the situation is more complicated. We
1186 shouldn't just drop the route, we should handle it like
1187 it was filtered. We also have to continue the route
1188 processing if old or new is non-NULL, but we should exit
1189 if both are NULL as this case is probably assumed to be
1190 already handled. */
1191
1192 stats->imp_updates_ignored++;
1193 rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
1194
1195 if (c->in_keep_filtered)
1196 new->flags |= REF_FILTERED;
1197 else
1198 { rte_free_quick(new); new = NULL; }
1199
1200 /* Note that old && !new could be possible when
1201 c->in_keep_filtered changed in the recent past. */
1202
1203 if (!old && !new)
1204 return;
1205
1206 new_ok = 0;
1207 goto skip_stats1;
1208 }
1209 }
1210
1211 if (new_ok)
1212 stats->imp_updates_accepted++;
1213 else if (old_ok)
1214 stats->imp_withdraws_accepted++;
1215 else
1216 stats->imp_withdraws_ignored++;
1217
1218 if (old_ok || new_ok)
1219 table->last_rt_change = current_time();
1220
1221 skip_stats1:
1222
1223 if (new)
1224 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1225 if (old)
1226 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1227
1228 if (table->config->sorted)
1229 {
1230 /* If routes are sorted, just insert new route to appropriate position */
1231 if (new)
1232 {
1233 if (before_old && !rte_better(new, before_old))
1234 k = &before_old->next;
1235 else
1236 k = &net->routes;
1237
1238 for (; *k; k=&(*k)->next)
1239 if (rte_better(new, *k))
1240 break;
1241
1242 new->next = *k;
1243 *k = new;
1244
1245 table->rt_count++;
1246 }
1247 }
1248 else
1249 {
1250 /* If routes are not sorted, find the best route and move it on
1251 the first position. There are several optimized cases. */
1252
1253 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1254 goto do_recalculate;
1255
1256 if (new && rte_better(new, old_best))
1257 {
1258 /* The first case - the new route is cleary optimal,
1259 we link it at the first position */
1260
1261 new->next = net->routes;
1262 net->routes = new;
1263
1264 table->rt_count++;
1265 }
1266 else if (old == old_best)
1267 {
1268 /* The second case - the old best route disappeared, we add the
1269 new route (if we have any) to the list (we don't care about
1270 position) and then we elect the new optimal route and relink
1271 that route at the first position and announce it. New optimal
1272 route might be NULL if there is no more routes */
1273
1274 do_recalculate:
1275 /* Add the new route to the list */
1276 if (new)
1277 {
1278 new->next = *pos;
1279 *pos = new;
1280
1281 table->rt_count++;
1282 }
1283
1284 /* Find a new optimal route (if there is any) */
1285 if (net->routes)
1286 {
1287 rte **bp = &net->routes;
1288 for (k=&(*bp)->next; *k; k=&(*k)->next)
1289 if (rte_better(*k, *bp))
1290 bp = k;
1291
1292 /* And relink it */
1293 rte *best = *bp;
1294 *bp = best->next;
1295 best->next = net->routes;
1296 net->routes = best;
1297 }
1298 }
1299 else if (new)
1300 {
1301 /* The third case - the new route is not better than the old
1302 best route (therefore old_best != NULL) and the old best
1303 route was not removed (therefore old_best == net->routes).
1304 We just link the new route to the old/last position. */
1305
1306 new->next = *pos;
1307 *pos = new;
1308
1309 table->rt_count++;
1310 }
1311 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1312 }
1313
1314 if (new)
1315 {
1316 new->lastmod = current_time();
1317
1318 if (!old)
1319 {
1320 new->id = hmap_first_zero(&table->id_map);
1321 hmap_set(&table->id_map, new->id);
1322 }
1323 else
1324 new->id = old->id;
1325 }
1326
1327 /* Log the route change */
1328 if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES))
1329 {
1330 if (new_ok)
1331 rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added");
1332 else if (old_ok)
1333 {
1334 if (old != old_best)
1335 rte_trace(c, old, '>', "removed");
1336 else if (rte_is_ok(net->routes))
1337 rte_trace(c, old, '>', "removed [replaced]");
1338 else
1339 rte_trace(c, old, '>', "removed [sole]");
1340 }
1341 }
1342
1343 /* Propagate the route change */
1344 rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best);
1345
1346 if (!net->routes &&
1347 (table->gc_counter++ >= table->config->gc_max_ops) &&
1348 (table->gc_time + table->config->gc_min_time <= current_time()))
1349 rt_schedule_prune(table);
1350
1351 if (old_ok && p->rte_remove)
1352 p->rte_remove(net, old);
1353 if (new_ok && p->rte_insert)
1354 p->rte_insert(net, new);
1355
1356 if (old)
1357 {
1358 if (!new)
1359 hmap_clear(&table->id_map, old->id);
1360
1361 rte_free_quick(old);
1362 }
1363 }
1364
1365 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1366
1367 static inline void
rte_update_lock(void)1368 rte_update_lock(void)
1369 {
1370 rte_update_nest_cnt++;
1371 }
1372
1373 static inline void
rte_update_unlock(void)1374 rte_update_unlock(void)
1375 {
1376 if (!--rte_update_nest_cnt)
1377 lp_flush(rte_update_pool);
1378 }
1379
1380 static inline void
rte_hide_dummy_routes(net * net,rte ** dummy)1381 rte_hide_dummy_routes(net *net, rte **dummy)
1382 {
1383 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1384 {
1385 *dummy = net->routes;
1386 net->routes = (*dummy)->next;
1387 }
1388 }
1389
1390 static inline void
rte_unhide_dummy_routes(net * net,rte ** dummy)1391 rte_unhide_dummy_routes(net *net, rte **dummy)
1392 {
1393 if (*dummy)
1394 {
1395 (*dummy)->next = net->routes;
1396 net->routes = *dummy;
1397 }
1398 }
1399
1400 /**
1401 * rte_update - enter a new update to a routing table
1402 * @table: table to be updated
1403 * @c: channel doing the update
1404 * @net: network node
1405 * @p: protocol submitting the update
1406 * @src: protocol originating the update
1407 * @new: a &rte representing the new route or %NULL for route removal.
1408 *
1409 * This function is called by the routing protocols whenever they discover
1410 * a new route or wish to update/remove an existing route. The right announcement
1411 * sequence is to build route attributes first (either un-cached with @aflags set
1412 * to zero or a cached one using rta_lookup(); in this case please note that
1413 * you need to increase the use count of the attributes yourself by calling
1414 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1415 * the appropriate data and finally submit the new &rte by calling rte_update().
1416 *
1417 * @src specifies the protocol that originally created the route and the meaning
1418 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1419 * same value as @new->attrs->proto. @p specifies the protocol that called
1420 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1421 * stores @p in @new->sender;
1422 *
1423 * When rte_update() gets any route, it automatically validates it (checks,
1424 * whether the network and next hop address are valid IP addresses and also
1425 * whether a normal routing protocol doesn't try to smuggle a host or link
1426 * scope route to the table), converts all protocol dependent attributes stored
1427 * in the &rte to temporary extended attributes, consults import filters of the
1428 * protocol to see if the route should be accepted and/or its attributes modified,
1429 * stores the temporary attributes back to the &rte.
1430 *
1431 * Now, having a "public" version of the route, we
1432 * automatically find any old route defined by the protocol @src
1433 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1434 * recalculate the optimal route for this destination and finally broadcast
1435 * the change (if any) to all routing protocols by calling rte_announce().
1436 *
1437 * All memory used for attribute lists and other temporary allocations is taken
1438 * from a special linear pool @rte_update_pool and freed when rte_update()
1439 * finishes.
1440 */
1441
1442 void
rte_update2(struct channel * c,const net_addr * n,rte * new,struct rte_src * src)1443 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1444 {
1445 // struct proto *p = c->proto;
1446 struct proto_stats *stats = &c->stats;
1447 const struct filter *filter = c->in_filter;
1448 rte *dummy = NULL;
1449 net *nn;
1450
1451 ASSERT(c->channel_state == CS_UP);
1452
1453 rte_update_lock();
1454 if (new)
1455 {
1456 /* Create a temporary table node */
1457 nn = alloca(sizeof(net) + n->length);
1458 memset(nn, 0, sizeof(net) + n->length);
1459 net_copy(nn->n.addr, n);
1460
1461 new->net = nn;
1462 new->sender = c;
1463
1464 if (!new->pref)
1465 new->pref = c->preference;
1466
1467 stats->imp_updates_received++;
1468 if (!rte_validate(new))
1469 {
1470 rte_trace_in(D_FILTERS, c, new, "invalid");
1471 stats->imp_updates_invalid++;
1472 goto drop;
1473 }
1474
1475 if (filter == FILTER_REJECT)
1476 {
1477 stats->imp_updates_filtered++;
1478 rte_trace_in(D_FILTERS, c, new, "filtered out");
1479
1480 if (! c->in_keep_filtered)
1481 goto drop;
1482
1483 /* new is a private copy, i could modify it */
1484 new->flags |= REF_FILTERED;
1485 }
1486 else if (filter)
1487 {
1488 rta *old_attrs = NULL;
1489 rte_make_tmp_attrs(&new, rte_update_pool, &old_attrs);
1490
1491 int fr = f_run(filter, &new, rte_update_pool, 0);
1492 if (fr > F_ACCEPT)
1493 {
1494 stats->imp_updates_filtered++;
1495 rte_trace_in(D_FILTERS, c, new, "filtered out");
1496
1497 if (! c->in_keep_filtered)
1498 {
1499 rta_free(old_attrs);
1500 goto drop;
1501 }
1502
1503 new->flags |= REF_FILTERED;
1504 }
1505
1506 rte_store_tmp_attrs(new, rte_update_pool, old_attrs);
1507 }
1508 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1509 new->attrs = rta_lookup(new->attrs);
1510 new->flags |= REF_COW;
1511
1512 /* Use the actual struct network, not the dummy one */
1513 nn = net_get(c->table, n);
1514 new->net = nn;
1515 }
1516 else
1517 {
1518 stats->imp_withdraws_received++;
1519
1520 if (!(nn = net_find(c->table, n)) || !src)
1521 {
1522 stats->imp_withdraws_ignored++;
1523 rte_update_unlock();
1524 return;
1525 }
1526 }
1527
1528 recalc:
1529 /* And recalculate the best route */
1530 rte_hide_dummy_routes(nn, &dummy);
1531 rte_recalculate(c, nn, new, src);
1532 rte_unhide_dummy_routes(nn, &dummy);
1533
1534 rte_update_unlock();
1535 return;
1536
1537 drop:
1538 rte_free(new);
1539 new = NULL;
1540 if (nn = net_find(c->table, n))
1541 goto recalc;
1542
1543 rte_update_unlock();
1544 }
1545
1546 /* Independent call to rte_announce(), used from next hop
1547 recalculation, outside of rte_update(). new must be non-NULL */
1548 static inline void
rte_announce_i(rtable * tab,uint type,net * net,rte * new,rte * old,rte * new_best,rte * old_best)1549 rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old,
1550 rte *new_best, rte *old_best)
1551 {
1552 rte_update_lock();
1553 rte_announce(tab, type, net, new, old, new_best, old_best);
1554 rte_update_unlock();
1555 }
1556
1557 static inline void
rte_discard(rte * old)1558 rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
1559 {
1560 rte_update_lock();
1561 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1562 rte_update_unlock();
1563 }
1564
1565 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1566 static inline void
rte_modify(rte * old)1567 rte_modify(rte *old)
1568 {
1569 rte_update_lock();
1570
1571 rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1572 if (new != old)
1573 {
1574 if (new)
1575 {
1576 if (!rta_is_cached(new->attrs))
1577 new->attrs = rta_lookup(new->attrs);
1578 new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1579 }
1580
1581 rte_recalculate(old->sender, old->net, new, old->attrs->src);
1582 }
1583
1584 rte_update_unlock();
1585 }
1586
1587 /* Check rtable for best route to given net whether it would be exported do p */
1588 int
rt_examine(rtable * t,net_addr * a,struct proto * p,const struct filter * filter)1589 rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter)
1590 {
1591 net *n = net_find(t, a);
1592 rte *rt = n ? n->routes : NULL;
1593
1594 if (!rte_is_valid(rt))
1595 return 0;
1596
1597 rte_update_lock();
1598
1599 /* Rest is stripped down export_filter() */
1600 int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0;
1601 if (v == RIC_PROCESS)
1602 {
1603 rte_make_tmp_attrs(&rt, rte_update_pool, NULL);
1604 v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
1605 }
1606
1607 /* Discard temporary rte */
1608 if (rt != n->routes)
1609 rte_free(rt);
1610
1611 rte_update_unlock();
1612
1613 return v > 0;
1614 }
1615
1616
1617 /**
1618 * rt_refresh_begin - start a refresh cycle
1619 * @t: related routing table
1620 * @c related channel
1621 *
1622 * This function starts a refresh cycle for given routing table and announce
1623 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1624 * routes to the routing table (by rte_update()). After that, all protocol
1625 * routes (more precisely routes with @c as @sender) not sent during the
1626 * refresh cycle but still in the table from the past are pruned. This is
1627 * implemented by marking all related routes as stale by REF_STALE flag in
1628 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1629 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1630 */
1631 void
rt_refresh_begin(rtable * t,struct channel * c)1632 rt_refresh_begin(rtable *t, struct channel *c)
1633 {
1634 FIB_WALK(&t->fib, net, n)
1635 {
1636 rte *e;
1637 for (e = n->routes; e; e = e->next)
1638 if (e->sender == c)
1639 e->flags |= REF_STALE;
1640 }
1641 FIB_WALK_END;
1642 }
1643
1644 /**
1645 * rt_refresh_end - end a refresh cycle
1646 * @t: related routing table
1647 * @c: related channel
1648 *
1649 * This function ends a refresh cycle for given routing table and announce
1650 * hook. See rt_refresh_begin() for description of refresh cycles.
1651 */
1652 void
rt_refresh_end(rtable * t,struct channel * c)1653 rt_refresh_end(rtable *t, struct channel *c)
1654 {
1655 int prune = 0;
1656
1657 FIB_WALK(&t->fib, net, n)
1658 {
1659 rte *e;
1660 for (e = n->routes; e; e = e->next)
1661 if ((e->sender == c) && (e->flags & REF_STALE))
1662 {
1663 e->flags |= REF_DISCARD;
1664 prune = 1;
1665 }
1666 }
1667 FIB_WALK_END;
1668
1669 if (prune)
1670 rt_schedule_prune(t);
1671 }
1672
1673 void
rt_modify_stale(rtable * t,struct channel * c)1674 rt_modify_stale(rtable *t, struct channel *c)
1675 {
1676 int prune = 0;
1677
1678 FIB_WALK(&t->fib, net, n)
1679 {
1680 rte *e;
1681 for (e = n->routes; e; e = e->next)
1682 if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1683 {
1684 e->flags |= REF_MODIFY;
1685 prune = 1;
1686 }
1687 }
1688 FIB_WALK_END;
1689
1690 if (prune)
1691 rt_schedule_prune(t);
1692 }
1693
1694 /**
1695 * rte_dump - dump a route
1696 * @e: &rte to be dumped
1697 *
1698 * This functions dumps contents of a &rte to debug output.
1699 */
1700 void
rte_dump(rte * e)1701 rte_dump(rte *e)
1702 {
1703 net *n = e->net;
1704 debug("%-1N ", n->n.addr);
1705 debug("PF=%02x pref=%d ", e->pflags, e->pref);
1706 rta_dump(e->attrs);
1707 if (e->attrs->src->proto->proto->dump_attrs)
1708 e->attrs->src->proto->proto->dump_attrs(e);
1709 debug("\n");
1710 }
1711
1712 /**
1713 * rt_dump - dump a routing table
1714 * @t: routing table to be dumped
1715 *
1716 * This function dumps contents of a given routing table to debug output.
1717 */
1718 void
rt_dump(rtable * t)1719 rt_dump(rtable *t)
1720 {
1721 debug("Dump of routing table <%s>\n", t->name);
1722 #ifdef DEBUGGING
1723 fib_check(&t->fib);
1724 #endif
1725 FIB_WALK(&t->fib, net, n)
1726 {
1727 rte *e;
1728 for(e=n->routes; e; e=e->next)
1729 rte_dump(e);
1730 }
1731 FIB_WALK_END;
1732 debug("\n");
1733 }
1734
1735 /**
1736 * rt_dump_all - dump all routing tables
1737 *
1738 * This function dumps contents of all routing tables to debug output.
1739 */
1740 void
rt_dump_all(void)1741 rt_dump_all(void)
1742 {
1743 rtable *t;
1744
1745 WALK_LIST(t, routing_tables)
1746 rt_dump(t);
1747 }
1748
1749 static inline void
rt_schedule_hcu(rtable * tab)1750 rt_schedule_hcu(rtable *tab)
1751 {
1752 if (tab->hcu_scheduled)
1753 return;
1754
1755 tab->hcu_scheduled = 1;
1756 ev_schedule(tab->rt_event);
1757 }
1758
1759 static inline void
rt_schedule_nhu(rtable * tab)1760 rt_schedule_nhu(rtable *tab)
1761 {
1762 if (tab->nhu_state == NHU_CLEAN)
1763 ev_schedule(tab->rt_event);
1764
1765 /* state change:
1766 * NHU_CLEAN -> NHU_SCHEDULED
1767 * NHU_RUNNING -> NHU_DIRTY
1768 */
1769 tab->nhu_state |= NHU_SCHEDULED;
1770 }
1771
1772 void
rt_schedule_prune(rtable * tab)1773 rt_schedule_prune(rtable *tab)
1774 {
1775 if (tab->prune_state == 0)
1776 ev_schedule(tab->rt_event);
1777
1778 /* state change 0->1, 2->3 */
1779 tab->prune_state |= 1;
1780 }
1781
1782
1783 static void
rt_event(void * ptr)1784 rt_event(void *ptr)
1785 {
1786 rtable *tab = ptr;
1787
1788 rt_lock_table(tab);
1789
1790 if (tab->hcu_scheduled)
1791 rt_update_hostcache(tab);
1792
1793 if (tab->nhu_state)
1794 rt_next_hop_update(tab);
1795
1796 if (tab->prune_state)
1797 rt_prune_table(tab);
1798
1799 rt_unlock_table(tab);
1800 }
1801
1802
1803 static inline btime
rt_settled_time(rtable * tab)1804 rt_settled_time(rtable *tab)
1805 {
1806 ASSUME(tab->base_settle_time != 0);
1807
1808 return MIN(tab->last_rt_change + tab->config->min_settle_time,
1809 tab->base_settle_time + tab->config->max_settle_time);
1810 }
1811
1812 static void
rt_settle_timer(timer * t)1813 rt_settle_timer(timer *t)
1814 {
1815 rtable *tab = t->data;
1816
1817 if (!tab->base_settle_time)
1818 return;
1819
1820 btime settled_time = rt_settled_time(tab);
1821 if (current_time() < settled_time)
1822 {
1823 tm_set(tab->settle_timer, settled_time);
1824 return;
1825 }
1826
1827 /* Settled */
1828 tab->base_settle_time = 0;
1829
1830 struct rt_subscription *s;
1831 WALK_LIST(s, tab->subscribers)
1832 s->hook(s);
1833 }
1834
1835 static void
rt_kick_settle_timer(rtable * tab)1836 rt_kick_settle_timer(rtable *tab)
1837 {
1838 tab->base_settle_time = current_time();
1839
1840 if (!tab->settle_timer)
1841 tab->settle_timer = tm_new_init(rt_table_pool, rt_settle_timer, tab, 0, 0);
1842
1843 if (!tm_active(tab->settle_timer))
1844 tm_set(tab->settle_timer, rt_settled_time(tab));
1845 }
1846
1847 static inline void
rt_schedule_notify(rtable * tab)1848 rt_schedule_notify(rtable *tab)
1849 {
1850 if (EMPTY_LIST(tab->subscribers))
1851 return;
1852
1853 if (tab->base_settle_time)
1854 return;
1855
1856 rt_kick_settle_timer(tab);
1857 }
1858
1859 void
rt_subscribe(rtable * tab,struct rt_subscription * s)1860 rt_subscribe(rtable *tab, struct rt_subscription *s)
1861 {
1862 s->tab = tab;
1863 rt_lock_table(tab);
1864 add_tail(&tab->subscribers, &s->n);
1865 }
1866
1867 void
rt_unsubscribe(struct rt_subscription * s)1868 rt_unsubscribe(struct rt_subscription *s)
1869 {
1870 rem_node(&s->n);
1871 rt_unlock_table(s->tab);
1872 }
1873
1874 void
rt_setup(pool * p,rtable * t,struct rtable_config * cf)1875 rt_setup(pool *p, rtable *t, struct rtable_config *cf)
1876 {
1877 bzero(t, sizeof(*t));
1878 t->name = cf->name;
1879 t->config = cf;
1880 t->addr_type = cf->addr_type;
1881 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1882 init_list(&t->channels);
1883
1884 hmap_init(&t->id_map, p, 1024);
1885 hmap_set(&t->id_map, 0);
1886
1887 t->rt_event = ev_new_init(p, rt_event, t);
1888 t->last_rt_change = t->gc_time = current_time();
1889
1890 init_list(&t->subscribers);
1891 }
1892
1893 /**
1894 * rt_init - initialize routing tables
1895 *
1896 * This function is called during BIRD startup. It initializes the
1897 * routing table module.
1898 */
1899 void
rt_init(void)1900 rt_init(void)
1901 {
1902 rta_init();
1903 rt_table_pool = rp_new(&root_pool, "Routing tables");
1904 rte_update_pool = lp_new_default(rt_table_pool);
1905 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1906 init_list(&routing_tables);
1907 }
1908
1909
1910 /**
1911 * rt_prune_table - prune a routing table
1912 *
1913 * The prune loop scans routing tables and removes routes belonging to flushing
1914 * protocols, discarded routes and also stale network entries. It is called from
1915 * rt_event(). The event is rescheduled if the current iteration do not finish
1916 * the table. The pruning is directed by the prune state (@prune_state),
1917 * specifying whether the prune cycle is scheduled or running, and there
1918 * is also a persistent pruning iterator (@prune_fit).
1919 *
1920 * The prune loop is used also for channel flushing. For this purpose, the
1921 * channels to flush are marked before the iteration and notified after the
1922 * iteration.
1923 */
1924 static void
rt_prune_table(rtable * tab)1925 rt_prune_table(rtable *tab)
1926 {
1927 struct fib_iterator *fit = &tab->prune_fit;
1928 int limit = 512;
1929
1930 struct channel *c;
1931 node *n, *x;
1932
1933 DBG("Pruning route table %s\n", tab->name);
1934 #ifdef DEBUGGING
1935 fib_check(&tab->fib);
1936 #endif
1937
1938 if (tab->prune_state == 0)
1939 return;
1940
1941 if (tab->prune_state == 1)
1942 {
1943 /* Mark channels to flush */
1944 WALK_LIST2(c, n, tab->channels, table_node)
1945 if (c->channel_state == CS_FLUSHING)
1946 c->flush_active = 1;
1947
1948 FIB_ITERATE_INIT(fit, &tab->fib);
1949 tab->prune_state = 2;
1950 }
1951
1952 again:
1953 FIB_ITERATE_START(&tab->fib, fit, net, n)
1954 {
1955 rte *e;
1956
1957 rescan:
1958 for (e=n->routes; e; e=e->next)
1959 {
1960 if (e->sender->flush_active || (e->flags & REF_DISCARD))
1961 {
1962 if (limit <= 0)
1963 {
1964 FIB_ITERATE_PUT(fit);
1965 ev_schedule(tab->rt_event);
1966 return;
1967 }
1968
1969 rte_discard(e);
1970 limit--;
1971
1972 goto rescan;
1973 }
1974
1975 if (e->flags & REF_MODIFY)
1976 {
1977 if (limit <= 0)
1978 {
1979 FIB_ITERATE_PUT(fit);
1980 ev_schedule(tab->rt_event);
1981 return;
1982 }
1983
1984 rte_modify(e);
1985 limit--;
1986
1987 goto rescan;
1988 }
1989 }
1990
1991 if (!n->routes) /* Orphaned FIB entry */
1992 {
1993 FIB_ITERATE_PUT(fit);
1994 fib_delete(&tab->fib, n);
1995 goto again;
1996 }
1997 }
1998 FIB_ITERATE_END;
1999
2000 #ifdef DEBUGGING
2001 fib_check(&tab->fib);
2002 #endif
2003
2004 tab->gc_counter = 0;
2005 tab->gc_time = current_time();
2006
2007 /* state change 2->0, 3->1 */
2008 tab->prune_state &= 1;
2009
2010 if (tab->prune_state > 0)
2011 ev_schedule(tab->rt_event);
2012
2013 /* FIXME: This should be handled in a better way */
2014 rt_prune_sources();
2015
2016 /* Close flushed channels */
2017 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
2018 if (c->flush_active)
2019 {
2020 c->flush_active = 0;
2021 channel_set_state(c, CS_DOWN);
2022 }
2023
2024 return;
2025 }
2026
2027 void
rt_preconfig(struct config * c)2028 rt_preconfig(struct config *c)
2029 {
2030 init_list(&c->tables);
2031
2032 rt_new_table(cf_get_symbol("master4"), NET_IP4);
2033 rt_new_table(cf_get_symbol("master6"), NET_IP6);
2034 }
2035
2036
2037 /*
2038 * Some functions for handing internal next hop updates
2039 * triggered by rt_schedule_nhu().
2040 */
2041
2042 static inline int
rta_next_hop_outdated(rta * a)2043 rta_next_hop_outdated(rta *a)
2044 {
2045 struct hostentry *he = a->hostentry;
2046
2047 if (!he)
2048 return 0;
2049
2050 if (!he->src)
2051 return a->dest != RTD_UNREACHABLE;
2052
2053 return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
2054 (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
2055 }
2056
2057 void
rta_apply_hostentry(rta * a,struct hostentry * he,mpls_label_stack * mls)2058 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
2059 {
2060 a->hostentry = he;
2061 a->dest = he->dest;
2062 a->igp_metric = he->igp_metric;
2063
2064 if (a->dest != RTD_UNICAST)
2065 {
2066 /* No nexthop */
2067 no_nexthop:
2068 a->nh = (struct nexthop) {};
2069 if (mls)
2070 { /* Store the label stack for later changes */
2071 a->nh.labels_orig = a->nh.labels = mls->len;
2072 memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
2073 }
2074 return;
2075 }
2076
2077 if (((!mls) || (!mls->len)) && he->nexthop_linkable)
2078 { /* Just link the nexthop chain, no label append happens. */
2079 memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
2080 return;
2081 }
2082
2083 struct nexthop *nhp = NULL, *nhr = NULL;
2084 int skip_nexthop = 0;
2085
2086 for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
2087 {
2088 if (skip_nexthop)
2089 skip_nexthop--;
2090 else
2091 {
2092 nhr = nhp;
2093 nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
2094 }
2095
2096 memset(nhp, 0, NEXTHOP_MAX_SIZE);
2097 nhp->iface = nh->iface;
2098 nhp->weight = nh->weight;
2099
2100 if (mls)
2101 {
2102 nhp->labels = nh->labels + mls->len;
2103 nhp->labels_orig = mls->len;
2104 if (nhp->labels <= MPLS_MAX_LABEL_STACK)
2105 {
2106 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
2107 memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
2108 }
2109 else
2110 {
2111 log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
2112 nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
2113 skip_nexthop++;
2114 continue;
2115 }
2116 }
2117 else if (nh->labels)
2118 {
2119 nhp->labels = nh->labels;
2120 nhp->labels_orig = 0;
2121 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32));
2122 }
2123
2124 if (ipa_nonzero(nh->gw))
2125 {
2126 nhp->gw = nh->gw; /* Router nexthop */
2127 nhp->flags |= (nh->flags & RNF_ONLINK);
2128 }
2129 else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK))
2130 nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */
2131 else if (ipa_nonzero(he->link))
2132 nhp->gw = he->link; /* Device nexthop with link-local address known */
2133 else
2134 nhp->gw = he->addr; /* Device nexthop with link-local address unknown */
2135 }
2136
2137 if (skip_nexthop)
2138 if (nhr)
2139 nhr->next = NULL;
2140 else
2141 {
2142 a->dest = RTD_UNREACHABLE;
2143 log(L_WARN "No valid nexthop remaining, setting route unreachable");
2144 goto no_nexthop;
2145 }
2146 }
2147
2148 static inline rte *
rt_next_hop_update_rte(rtable * tab UNUSED,rte * old)2149 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
2150 {
2151 rta *a = alloca(RTA_MAX_SIZE);
2152 memcpy(a, old->attrs, rta_size(old->attrs));
2153
2154 mpls_label_stack mls = { .len = a->nh.labels_orig };
2155 memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
2156
2157 rta_apply_hostentry(a, old->attrs->hostentry, &mls);
2158 a->aflags = 0;
2159
2160 rte *e = sl_alloc(rte_slab);
2161 memcpy(e, old, sizeof(rte));
2162 e->attrs = rta_lookup(a);
2163
2164 return e;
2165 }
2166
2167 static inline int
rt_next_hop_update_net(rtable * tab,net * n)2168 rt_next_hop_update_net(rtable *tab, net *n)
2169 {
2170 rte **k, *e, *new, *old_best, **new_best;
2171 int count = 0;
2172 int free_old_best = 0;
2173
2174 old_best = n->routes;
2175 if (!old_best)
2176 return 0;
2177
2178 for (k = &n->routes; e = *k; k = &e->next)
2179 if (rta_next_hop_outdated(e->attrs))
2180 {
2181 new = rt_next_hop_update_rte(tab, e);
2182 *k = new;
2183
2184 rte_trace_in(D_ROUTES, new->sender, new, "updated");
2185 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
2186
2187 /* Call a pre-comparison hook */
2188 /* Not really an efficient way to compute this */
2189 if (e->attrs->src->proto->rte_recalculate)
2190 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
2191
2192 if (e != old_best)
2193 rte_free_quick(e);
2194 else /* Freeing of the old best rte is postponed */
2195 free_old_best = 1;
2196
2197 e = new;
2198 count++;
2199 }
2200
2201 if (!count)
2202 return 0;
2203
2204 /* Find the new best route */
2205 new_best = NULL;
2206 for (k = &n->routes; e = *k; k = &e->next)
2207 {
2208 if (!new_best || rte_better(e, *new_best))
2209 new_best = k;
2210 }
2211
2212 /* Relink the new best route to the first position */
2213 new = *new_best;
2214 if (new != n->routes)
2215 {
2216 *new_best = new->next;
2217 new->next = n->routes;
2218 n->routes = new;
2219 }
2220
2221 /* Announce the new best route */
2222 if (new != old_best)
2223 rte_trace_in(D_ROUTES, new->sender, new, "updated [best]");
2224
2225 /* Propagate changes */
2226 rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best);
2227
2228 if (free_old_best)
2229 rte_free_quick(old_best);
2230
2231 return count;
2232 }
2233
2234 static void
rt_next_hop_update(rtable * tab)2235 rt_next_hop_update(rtable *tab)
2236 {
2237 struct fib_iterator *fit = &tab->nhu_fit;
2238 int max_feed = 32;
2239
2240 if (tab->nhu_state == NHU_CLEAN)
2241 return;
2242
2243 if (tab->nhu_state == NHU_SCHEDULED)
2244 {
2245 FIB_ITERATE_INIT(fit, &tab->fib);
2246 tab->nhu_state = NHU_RUNNING;
2247 }
2248
2249 FIB_ITERATE_START(&tab->fib, fit, net, n)
2250 {
2251 if (max_feed <= 0)
2252 {
2253 FIB_ITERATE_PUT(fit);
2254 ev_schedule(tab->rt_event);
2255 return;
2256 }
2257 max_feed -= rt_next_hop_update_net(tab, n);
2258 }
2259 FIB_ITERATE_END;
2260
2261 /* State change:
2262 * NHU_DIRTY -> NHU_SCHEDULED
2263 * NHU_RUNNING -> NHU_CLEAN
2264 */
2265 tab->nhu_state &= 1;
2266
2267 if (tab->nhu_state != NHU_CLEAN)
2268 ev_schedule(tab->rt_event);
2269 }
2270
2271
2272 struct rtable_config *
rt_new_table(struct symbol * s,uint addr_type)2273 rt_new_table(struct symbol *s, uint addr_type)
2274 {
2275 /* Hack that allows to 'redefine' the master table */
2276 if ((s->class == SYM_TABLE) &&
2277 (s->table == new_config->def_tables[addr_type]) &&
2278 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
2279 return s->table;
2280
2281 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2282
2283 cf_define_symbol(s, SYM_TABLE, table, c);
2284 c->name = s->name;
2285 c->addr_type = addr_type;
2286 c->gc_max_ops = 1000;
2287 c->gc_min_time = 5;
2288 c->min_settle_time = 1 S;
2289 c->max_settle_time = 20 S;
2290
2291 add_tail(&new_config->tables, &c->n);
2292
2293 /* First table of each type is kept as default */
2294 if (! new_config->def_tables[addr_type])
2295 new_config->def_tables[addr_type] = c;
2296
2297 return c;
2298 }
2299
2300 /**
2301 * rt_lock_table - lock a routing table
2302 * @r: routing table to be locked
2303 *
2304 * Lock a routing table, because it's in use by a protocol,
2305 * preventing it from being freed when it gets undefined in a new
2306 * configuration.
2307 */
2308 void
rt_lock_table(rtable * r)2309 rt_lock_table(rtable *r)
2310 {
2311 r->use_count++;
2312 }
2313
2314 /**
2315 * rt_unlock_table - unlock a routing table
2316 * @r: routing table to be unlocked
2317 *
2318 * Unlock a routing table formerly locked by rt_lock_table(),
2319 * that is decrease its use count and delete it if it's scheduled
2320 * for deletion by configuration changes.
2321 */
2322 void
rt_unlock_table(rtable * r)2323 rt_unlock_table(rtable *r)
2324 {
2325 if (!--r->use_count && r->deleted)
2326 {
2327 struct config *conf = r->deleted;
2328 DBG("Deleting routing table %s\n", r->name);
2329 r->config->table = NULL;
2330 if (r->hostcache)
2331 rt_free_hostcache(r);
2332 rem_node(&r->n);
2333 fib_free(&r->fib);
2334 hmap_free(&r->id_map);
2335 rfree(r->rt_event);
2336 rfree(r->settle_timer);
2337 mb_free(r);
2338 config_del_obstacle(conf);
2339 }
2340 }
2341
2342 static struct rtable_config *
rt_find_table_config(struct config * cf,char * name)2343 rt_find_table_config(struct config *cf, char *name)
2344 {
2345 struct symbol *sym = cf_find_symbol(cf, name);
2346 return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL;
2347 }
2348
2349 /**
2350 * rt_commit - commit new routing table configuration
2351 * @new: new configuration
2352 * @old: original configuration or %NULL if it's boot time config
2353 *
2354 * Scan differences between @old and @new configuration and modify
2355 * the routing tables according to these changes. If @new defines a
2356 * previously unknown table, create it, if it omits a table existing
2357 * in @old, schedule it for deletion (it gets deleted when all protocols
2358 * disconnect from it by calling rt_unlock_table()), if it exists
2359 * in both configurations, leave it unchanged.
2360 */
2361 void
rt_commit(struct config * new,struct config * old)2362 rt_commit(struct config *new, struct config *old)
2363 {
2364 struct rtable_config *o, *r;
2365
2366 DBG("rt_commit:\n");
2367 if (old)
2368 {
2369 WALK_LIST(o, old->tables)
2370 {
2371 rtable *ot = o->table;
2372 if (!ot->deleted)
2373 {
2374 r = rt_find_table_config(new, o->name);
2375 if (r && (r->addr_type == o->addr_type) && !new->shutdown)
2376 {
2377 DBG("\t%s: same\n", o->name);
2378 r->table = ot;
2379 ot->name = r->name;
2380 ot->config = r;
2381 if (o->sorted != r->sorted)
2382 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2383 }
2384 else
2385 {
2386 DBG("\t%s: deleted\n", o->name);
2387 ot->deleted = old;
2388 config_add_obstacle(old);
2389 rt_lock_table(ot);
2390 rt_unlock_table(ot);
2391 }
2392 }
2393 }
2394 }
2395
2396 WALK_LIST(r, new->tables)
2397 if (!r->table)
2398 {
2399 rtable *t = mb_allocz(rt_table_pool, sizeof(struct rtable));
2400 DBG("\t%s: created\n", r->name);
2401 rt_setup(rt_table_pool, t, r);
2402 add_tail(&routing_tables, &t->n);
2403 r->table = t;
2404 }
2405 DBG("\tdone\n");
2406 }
2407
2408 static inline void
do_feed_channel(struct channel * c,net * n,rte * e)2409 do_feed_channel(struct channel *c, net *n, rte *e)
2410 {
2411 rte_update_lock();
2412 if (c->ra_mode == RA_ACCEPTED)
2413 rt_notify_accepted(c, n, NULL, NULL, c->refeeding);
2414 else if (c->ra_mode == RA_MERGED)
2415 rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding);
2416 else /* RA_BASIC */
2417 rt_notify_basic(c, n, e, e, c->refeeding);
2418 rte_update_unlock();
2419 }
2420
2421 /**
2422 * rt_feed_channel - advertise all routes to a channel
2423 * @c: channel to be fed
2424 *
2425 * This function performs one pass of advertisement of routes to a channel that
2426 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2427 * has something to do. (We avoid transferring all the routes in single pass in
2428 * order not to monopolize CPU time.)
2429 */
2430 int
rt_feed_channel(struct channel * c)2431 rt_feed_channel(struct channel *c)
2432 {
2433 struct fib_iterator *fit = &c->feed_fit;
2434 int max_feed = 256;
2435
2436 ASSERT(c->export_state == ES_FEEDING);
2437
2438 if (!c->feed_active)
2439 {
2440 FIB_ITERATE_INIT(fit, &c->table->fib);
2441 c->feed_active = 1;
2442 }
2443
2444 FIB_ITERATE_START(&c->table->fib, fit, net, n)
2445 {
2446 rte *e = n->routes;
2447 if (max_feed <= 0)
2448 {
2449 FIB_ITERATE_PUT(fit);
2450 return 0;
2451 }
2452
2453 if ((c->ra_mode == RA_OPTIMAL) ||
2454 (c->ra_mode == RA_ACCEPTED) ||
2455 (c->ra_mode == RA_MERGED))
2456 if (rte_is_valid(e))
2457 {
2458 /* In the meantime, the protocol may fell down */
2459 if (c->export_state != ES_FEEDING)
2460 goto done;
2461
2462 do_feed_channel(c, n, e);
2463 max_feed--;
2464 }
2465
2466 if (c->ra_mode == RA_ANY)
2467 for(e = n->routes; e; e = e->next)
2468 {
2469 /* In the meantime, the protocol may fell down */
2470 if (c->export_state != ES_FEEDING)
2471 goto done;
2472
2473 if (!rte_is_valid(e))
2474 continue;
2475
2476 do_feed_channel(c, n, e);
2477 max_feed--;
2478 }
2479 }
2480 FIB_ITERATE_END;
2481
2482 done:
2483 c->feed_active = 0;
2484 return 1;
2485 }
2486
2487 /**
2488 * rt_feed_baby_abort - abort protocol feeding
2489 * @c: channel
2490 *
2491 * This function is called by the protocol code when the protocol stops or
2492 * ceases to exist during the feeding.
2493 */
2494 void
rt_feed_channel_abort(struct channel * c)2495 rt_feed_channel_abort(struct channel *c)
2496 {
2497 if (c->feed_active)
2498 {
2499 /* Unlink the iterator */
2500 fit_get(&c->table->fib, &c->feed_fit);
2501 c->feed_active = 0;
2502 }
2503 }
2504
2505
2506 /*
2507 * Import table
2508 */
2509
2510 int
rte_update_in(struct channel * c,const net_addr * n,rte * new,struct rte_src * src)2511 rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
2512 {
2513 struct rtable *tab = c->in_table;
2514 rte *old, **pos;
2515 net *net;
2516
2517 if (new)
2518 {
2519 net = net_get(tab, n);
2520
2521 if (!new->pref)
2522 new->pref = c->preference;
2523
2524 if (!rta_is_cached(new->attrs))
2525 new->attrs = rta_lookup(new->attrs);
2526 }
2527 else
2528 {
2529 net = net_find(tab, n);
2530
2531 if (!net)
2532 goto drop_withdraw;
2533 }
2534
2535 /* Find the old rte */
2536 for (pos = &net->routes; old = *pos; pos = &old->next)
2537 if (old->attrs->src == src)
2538 {
2539 if (new && rte_same(old, new))
2540 {
2541 /* Refresh the old rte, continue with update to main rtable */
2542 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
2543 {
2544 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
2545 return 1;
2546 }
2547
2548 goto drop_update;
2549 }
2550
2551 /* Move iterator if needed */
2552 if (old == c->reload_next_rte)
2553 c->reload_next_rte = old->next;
2554
2555 /* Remove the old rte */
2556 *pos = old->next;
2557 rte_free_quick(old);
2558 tab->rt_count--;
2559
2560 break;
2561 }
2562
2563 if (!new)
2564 {
2565 if (!old)
2566 goto drop_withdraw;
2567
2568 return 1;
2569 }
2570
2571 struct channel_limit *l = &c->rx_limit;
2572 if (l->action && !old)
2573 {
2574 if (tab->rt_count >= l->limit)
2575 channel_notify_limit(c, l, PLD_RX, tab->rt_count);
2576
2577 if (l->state == PLS_BLOCKED)
2578 {
2579 /* Required by rte_trace_in() */
2580 new->net = net;
2581
2582 rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
2583 goto drop_update;
2584 }
2585 }
2586
2587 /* Insert the new rte */
2588 rte *e = rte_do_cow(new);
2589 e->flags |= REF_COW;
2590 e->net = net;
2591 e->sender = c;
2592 e->lastmod = current_time();
2593 e->next = *pos;
2594 *pos = e;
2595 tab->rt_count++;
2596 return 1;
2597
2598 drop_update:
2599 c->stats.imp_updates_received++;
2600 c->stats.imp_updates_ignored++;
2601 rte_free(new);
2602 return 0;
2603
2604 drop_withdraw:
2605 c->stats.imp_withdraws_received++;
2606 c->stats.imp_withdraws_ignored++;
2607 return 0;
2608 }
2609
2610 int
rt_reload_channel(struct channel * c)2611 rt_reload_channel(struct channel *c)
2612 {
2613 struct rtable *tab = c->in_table;
2614 struct fib_iterator *fit = &c->reload_fit;
2615 int max_feed = 64;
2616
2617 ASSERT(c->channel_state == CS_UP);
2618
2619 if (!c->reload_active)
2620 {
2621 FIB_ITERATE_INIT(fit, &tab->fib);
2622 c->reload_active = 1;
2623 }
2624
2625 do {
2626 for (rte *e = c->reload_next_rte; e; e = e->next)
2627 {
2628 if (max_feed-- <= 0)
2629 {
2630 c->reload_next_rte = e;
2631 debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed);
2632 return 0;
2633 }
2634
2635 rte_update2(c, e->net->n.addr, rte_do_cow(e), e->attrs->src);
2636 }
2637
2638 c->reload_next_rte = NULL;
2639
2640 FIB_ITERATE_START(&tab->fib, fit, net, n)
2641 {
2642 if (c->reload_next_rte = n->routes)
2643 {
2644 FIB_ITERATE_PUT_NEXT(fit, &tab->fib);
2645 break;
2646 }
2647 }
2648 FIB_ITERATE_END;
2649 }
2650 while (c->reload_next_rte);
2651
2652 c->reload_active = 0;
2653 return 1;
2654 }
2655
2656 void
rt_reload_channel_abort(struct channel * c)2657 rt_reload_channel_abort(struct channel *c)
2658 {
2659 if (c->reload_active)
2660 {
2661 /* Unlink the iterator */
2662 fit_get(&c->in_table->fib, &c->reload_fit);
2663 c->reload_next_rte = NULL;
2664 c->reload_active = 0;
2665 }
2666 }
2667
2668 void
rt_prune_sync(rtable * t,int all)2669 rt_prune_sync(rtable *t, int all)
2670 {
2671 FIB_WALK(&t->fib, net, n)
2672 {
2673 rte *e, **ee = &n->routes;
2674 while (e = *ee)
2675 {
2676 if (all || (e->flags & (REF_STALE | REF_DISCARD)))
2677 {
2678 *ee = e->next;
2679 rte_free_quick(e);
2680 t->rt_count--;
2681 }
2682 else
2683 ee = &e->next;
2684 }
2685 }
2686 FIB_WALK_END;
2687 }
2688
2689
2690 /*
2691 * Export table
2692 */
2693
2694 int
rte_update_out(struct channel * c,const net_addr * n,rte * new,rte * old0,int refeed)2695 rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed)
2696 {
2697 struct rtable *tab = c->out_table;
2698 struct rte_src *src;
2699 rte *old, **pos;
2700 net *net;
2701
2702 if (new)
2703 {
2704 net = net_get(tab, n);
2705 src = new->attrs->src;
2706
2707 rte_store_tmp_attrs(new, rte_update_pool, NULL);
2708
2709 if (!rta_is_cached(new->attrs))
2710 new->attrs = rta_lookup(new->attrs);
2711 }
2712 else
2713 {
2714 net = net_find(tab, n);
2715 src = old0->attrs->src;
2716
2717 if (!net)
2718 goto drop_withdraw;
2719 }
2720
2721 /* Find the old rte */
2722 for (pos = &net->routes; old = *pos; pos = &old->next)
2723 if ((c->ra_mode != RA_ANY) || (old->attrs->src == src))
2724 {
2725 if (new && rte_same(old, new))
2726 {
2727 /* REF_STALE / REF_DISCARD not used in export table */
2728 /*
2729 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
2730 {
2731 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
2732 return 1;
2733 }
2734 */
2735
2736 goto drop_update;
2737 }
2738
2739 /* Remove the old rte */
2740 *pos = old->next;
2741 rte_free_quick(old);
2742 tab->rt_count--;
2743
2744 break;
2745 }
2746
2747 if (!new)
2748 {
2749 if (!old)
2750 goto drop_withdraw;
2751
2752 return 1;
2753 }
2754
2755 /* Insert the new rte */
2756 rte *e = rte_do_cow(new);
2757 e->flags |= REF_COW;
2758 e->net = net;
2759 e->sender = c;
2760 e->lastmod = current_time();
2761 e->next = *pos;
2762 *pos = e;
2763 tab->rt_count++;
2764 return 1;
2765
2766 drop_update:
2767 return refeed;
2768
2769 drop_withdraw:
2770 return 0;
2771 }
2772
2773
2774 /*
2775 * Hostcache
2776 */
2777
2778 static inline u32
hc_hash(ip_addr a,rtable * dep)2779 hc_hash(ip_addr a, rtable *dep)
2780 {
2781 return ipa_hash(a) ^ ptr_hash(dep);
2782 }
2783
2784 static inline void
hc_insert(struct hostcache * hc,struct hostentry * he)2785 hc_insert(struct hostcache *hc, struct hostentry *he)
2786 {
2787 uint k = he->hash_key >> hc->hash_shift;
2788 he->next = hc->hash_table[k];
2789 hc->hash_table[k] = he;
2790 }
2791
2792 static inline void
hc_remove(struct hostcache * hc,struct hostentry * he)2793 hc_remove(struct hostcache *hc, struct hostentry *he)
2794 {
2795 struct hostentry **hep;
2796 uint k = he->hash_key >> hc->hash_shift;
2797
2798 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2799 *hep = he->next;
2800 }
2801
2802 #define HC_DEF_ORDER 10
2803 #define HC_HI_MARK *4
2804 #define HC_HI_STEP 2
2805 #define HC_HI_ORDER 16 /* Must be at most 16 */
2806 #define HC_LO_MARK /5
2807 #define HC_LO_STEP 2
2808 #define HC_LO_ORDER 10
2809
2810 static void
hc_alloc_table(struct hostcache * hc,unsigned order)2811 hc_alloc_table(struct hostcache *hc, unsigned order)
2812 {
2813 uint hsize = 1 << order;
2814 hc->hash_order = order;
2815 hc->hash_shift = 32 - order;
2816 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2817 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
2818
2819 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2820 }
2821
2822 static void
hc_resize(struct hostcache * hc,unsigned new_order)2823 hc_resize(struct hostcache *hc, unsigned new_order)
2824 {
2825 struct hostentry **old_table = hc->hash_table;
2826 struct hostentry *he, *hen;
2827 uint old_size = 1 << hc->hash_order;
2828 uint i;
2829
2830 hc_alloc_table(hc, new_order);
2831 for (i = 0; i < old_size; i++)
2832 for (he = old_table[i]; he != NULL; he=hen)
2833 {
2834 hen = he->next;
2835 hc_insert(hc, he);
2836 }
2837 mb_free(old_table);
2838 }
2839
2840 static struct hostentry *
hc_new_hostentry(struct hostcache * hc,ip_addr a,ip_addr ll,rtable * dep,unsigned k)2841 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2842 {
2843 struct hostentry *he = sl_alloc(hc->slab);
2844
2845 *he = (struct hostentry) {
2846 .addr = a,
2847 .link = ll,
2848 .tab = dep,
2849 .hash_key = k,
2850 };
2851
2852 add_tail(&hc->hostentries, &he->ln);
2853 hc_insert(hc, he);
2854
2855 hc->hash_items++;
2856 if (hc->hash_items > hc->hash_max)
2857 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2858
2859 return he;
2860 }
2861
2862 static void
hc_delete_hostentry(struct hostcache * hc,struct hostentry * he)2863 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2864 {
2865 rta_free(he->src);
2866
2867 rem_node(&he->ln);
2868 hc_remove(hc, he);
2869 sl_free(hc->slab, he);
2870
2871 hc->hash_items--;
2872 if (hc->hash_items < hc->hash_min)
2873 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2874 }
2875
2876 static void
rt_init_hostcache(rtable * tab)2877 rt_init_hostcache(rtable *tab)
2878 {
2879 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2880 init_list(&hc->hostentries);
2881
2882 hc->hash_items = 0;
2883 hc_alloc_table(hc, HC_DEF_ORDER);
2884 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2885
2886 hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2887 hc->trie = f_new_trie(hc->lp, 0);
2888
2889 tab->hostcache = hc;
2890 }
2891
2892 static void
rt_free_hostcache(rtable * tab)2893 rt_free_hostcache(rtable *tab)
2894 {
2895 struct hostcache *hc = tab->hostcache;
2896
2897 node *n;
2898 WALK_LIST(n, hc->hostentries)
2899 {
2900 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2901 rta_free(he->src);
2902
2903 if (he->uc)
2904 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2905 }
2906
2907 rfree(hc->slab);
2908 rfree(hc->lp);
2909 mb_free(hc->hash_table);
2910 mb_free(hc);
2911 }
2912
2913 static void
rt_notify_hostcache(rtable * tab,net * net)2914 rt_notify_hostcache(rtable *tab, net *net)
2915 {
2916 if (tab->hcu_scheduled)
2917 return;
2918
2919 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2920 rt_schedule_hcu(tab);
2921 }
2922
2923 static int
if_local_addr(ip_addr a,struct iface * i)2924 if_local_addr(ip_addr a, struct iface *i)
2925 {
2926 struct ifa *b;
2927
2928 WALK_LIST(b, i->addrs)
2929 if (ipa_equal(a, b->ip))
2930 return 1;
2931
2932 return 0;
2933 }
2934
2935 u32
rt_get_igp_metric(rte * rt)2936 rt_get_igp_metric(rte *rt)
2937 {
2938 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2939
2940 if (ea)
2941 return ea->u.data;
2942
2943 rta *a = rt->attrs;
2944
2945 #ifdef CONFIG_OSPF
2946 if ((a->source == RTS_OSPF) ||
2947 (a->source == RTS_OSPF_IA) ||
2948 (a->source == RTS_OSPF_EXT1))
2949 return rt->u.ospf.metric1;
2950 #endif
2951
2952 #ifdef CONFIG_RIP
2953 if (a->source == RTS_RIP)
2954 return rt->u.rip.metric;
2955 #endif
2956
2957 #ifdef CONFIG_BGP
2958 if (a->source == RTS_BGP)
2959 {
2960 u64 metric = bgp_total_aigp_metric(rt);
2961 return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
2962 }
2963 #endif
2964
2965 #ifdef CONFIG_BABEL
2966 if (a->source == RTS_BABEL)
2967 return rt->u.babel.metric;
2968 #endif
2969
2970 if (a->source == RTS_DEVICE)
2971 return 0;
2972
2973 return IGP_METRIC_UNKNOWN;
2974 }
2975
2976 static int
rt_update_hostentry(rtable * tab,struct hostentry * he)2977 rt_update_hostentry(rtable *tab, struct hostentry *he)
2978 {
2979 rta *old_src = he->src;
2980 int direct = 0;
2981 int pxlen = 0;
2982
2983 /* Reset the hostentry */
2984 he->src = NULL;
2985 he->dest = RTD_UNREACHABLE;
2986 he->nexthop_linkable = 0;
2987 he->igp_metric = 0;
2988
2989 net_addr he_addr;
2990 net_fill_ip_host(&he_addr, he->addr);
2991 net *n = net_route(tab, &he_addr);
2992 if (n)
2993 {
2994 rte *e = n->routes;
2995 rta *a = e->attrs;
2996 pxlen = n->n.addr->pxlen;
2997
2998 if (a->hostentry)
2999 {
3000 /* Recursive route should not depend on another recursive route */
3001 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
3002 he->addr, n->n.addr);
3003 goto done;
3004 }
3005
3006 if (a->dest == RTD_UNICAST)
3007 {
3008 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
3009 if (ipa_zero(nh->gw))
3010 {
3011 if (if_local_addr(he->addr, nh->iface))
3012 {
3013 /* The host address is a local address, this is not valid */
3014 log(L_WARN "Next hop address %I is a local address of iface %s",
3015 he->addr, nh->iface->name);
3016 goto done;
3017 }
3018
3019 direct++;
3020 }
3021 }
3022
3023 he->src = rta_clone(a);
3024 he->dest = a->dest;
3025 he->nexthop_linkable = !direct;
3026 he->igp_metric = rt_get_igp_metric(e);
3027 }
3028
3029 done:
3030 /* Add a prefix range to the trie */
3031 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
3032
3033 rta_free(old_src);
3034 return old_src != he->src;
3035 }
3036
3037 static void
rt_update_hostcache(rtable * tab)3038 rt_update_hostcache(rtable *tab)
3039 {
3040 struct hostcache *hc = tab->hostcache;
3041 struct hostentry *he;
3042 node *n, *x;
3043
3044 /* Reset the trie */
3045 lp_flush(hc->lp);
3046 hc->trie = f_new_trie(hc->lp, 0);
3047
3048 WALK_LIST_DELSAFE(n, x, hc->hostentries)
3049 {
3050 he = SKIP_BACK(struct hostentry, ln, n);
3051 if (!he->uc)
3052 {
3053 hc_delete_hostentry(hc, he);
3054 continue;
3055 }
3056
3057 if (rt_update_hostentry(tab, he))
3058 rt_schedule_nhu(he->tab);
3059 }
3060
3061 tab->hcu_scheduled = 0;
3062 }
3063
3064 struct hostentry *
rt_get_hostentry(rtable * tab,ip_addr a,ip_addr ll,rtable * dep)3065 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
3066 {
3067 struct hostentry *he;
3068
3069 if (!tab->hostcache)
3070 rt_init_hostcache(tab);
3071
3072 u32 k = hc_hash(a, dep);
3073 struct hostcache *hc = tab->hostcache;
3074 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
3075 if (ipa_equal(he->addr, a) && (he->tab == dep))
3076 return he;
3077
3078 he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
3079 rt_update_hostentry(tab, he);
3080 return he;
3081 }
3082
3083
3084 /*
3085 * Documentation for functions declared inline in route.h
3086 */
3087 #if 0
3088
3089 /**
3090 * net_find - find a network entry
3091 * @tab: a routing table
3092 * @addr: address of the network
3093 *
3094 * net_find() looks up the given network in routing table @tab and
3095 * returns a pointer to its &net entry or %NULL if no such network
3096 * exists.
3097 */
3098 static inline net *net_find(rtable *tab, net_addr *addr)
3099 { DUMMY; }
3100
3101 /**
3102 * net_get - obtain a network entry
3103 * @tab: a routing table
3104 * @addr: address of the network
3105 *
3106 * net_get() looks up the given network in routing table @tab and
3107 * returns a pointer to its &net entry. If no such entry exists, it's
3108 * created.
3109 */
3110 static inline net *net_get(rtable *tab, net_addr *addr)
3111 { DUMMY; }
3112
3113 /**
3114 * rte_cow - copy a route for writing
3115 * @r: a route entry to be copied
3116 *
3117 * rte_cow() takes a &rte and prepares it for modification. The exact action
3118 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
3119 * just returned unchanged, else a new temporary entry with the same contents
3120 * is created.
3121 *
3122 * The primary use of this function is inside the filter machinery -- when
3123 * a filter wants to modify &rte contents (to change the preference or to
3124 * attach another set of attributes), it must ensure that the &rte is not
3125 * shared with anyone else (and especially that it isn't stored in any routing
3126 * table).
3127 *
3128 * Result: a pointer to the new writable &rte.
3129 */
3130 static inline rte * rte_cow(rte *r)
3131 { DUMMY; }
3132
3133 #endif
3134