1 /*
2  *	BIRD -- Routing Tables
3  *
4  *	(c) 1998--2000 Martin Mares <mj@ucw.cz>
5  *
6  *	Can be freely distributed and used under the terms of the GNU GPL.
7  */
8 
9 /**
10  * DOC: Routing tables
11  *
12  * Routing tables are probably the most important structures BIRD uses. They
13  * hold all the information about known networks, the associated routes and
14  * their attributes.
15  *
16  * There are multiple routing tables (a primary one together with any
17  * number of secondary ones if requested by the configuration). Each table
18  * is basically a FIB containing entries describing the individual
19  * destination networks. For each network (represented by structure &net),
20  * there is a one-way linked list of route entries (&rte), the first entry
21  * on the list being the best one (i.e., the one we currently use
22  * for routing), the order of the other ones is undetermined.
23  *
24  * The &rte contains information specific to the route (preference, protocol
25  * metrics, time of last modification etc.) and a pointer to a &rta structure
26  * (see the route attribute module for a precise explanation) holding the
27  * remaining route attributes which are expected to be shared by multiple
28  * routes in order to conserve memory.
29  */
30 
31 #undef LOCAL_DEBUG
32 
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/iface.h"
37 #include "lib/resource.h"
38 #include "lib/event.h"
39 #include "lib/timer.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "filter/data.h"
44 #include "lib/hash.h"
45 #include "lib/string.h"
46 #include "lib/alloca.h"
47 
48 #ifdef CONFIG_BGP
49 #include "proto/bgp/bgp.h"
50 #endif
51 
52 pool *rt_table_pool;
53 
54 static slab *rte_slab;
55 static linpool *rte_update_pool;
56 
57 list routing_tables;
58 
59 static void rt_free_hostcache(rtable *tab);
60 static void rt_notify_hostcache(rtable *tab, net *net);
61 static void rt_update_hostcache(rtable *tab);
62 static void rt_next_hop_update(rtable *tab);
63 static inline void rt_prune_table(rtable *tab);
64 static inline void rt_schedule_notify(rtable *tab);
65 
66 
67 /* Like fib_route(), but skips empty net entries */
68 static inline void *
net_route_ip4(rtable * t,net_addr_ip4 * n)69 net_route_ip4(rtable *t, net_addr_ip4 *n)
70 {
71   net *r;
72 
73   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
74   {
75     n->pxlen--;
76     ip4_clrbit(&n->prefix, n->pxlen);
77   }
78 
79   return r;
80 }
81 
82 static inline void *
net_route_ip6(rtable * t,net_addr_ip6 * n)83 net_route_ip6(rtable *t, net_addr_ip6 *n)
84 {
85   net *r;
86 
87   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
88   {
89     n->pxlen--;
90     ip6_clrbit(&n->prefix, n->pxlen);
91   }
92 
93   return r;
94 }
95 
96 static inline void *
net_route_ip6_sadr(rtable * t,net_addr_ip6_sadr * n)97 net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
98 {
99   struct fib_node *fn;
100 
101   while (1)
102   {
103     net *best = NULL;
104     int best_pxlen = 0;
105 
106     /* We need to do dst first matching. Since sadr addresses are hashed on dst
107        prefix only, find the hash table chain and go through it to find the
108        match with the smallest matching src prefix. */
109     for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
110     {
111       net_addr_ip6_sadr *a = (void *) fn->addr;
112 
113       if (net_equal_dst_ip6_sadr(n, a) &&
114 	  net_in_net_src_ip6_sadr(n, a) &&
115 	  (a->src_pxlen >= best_pxlen))
116       {
117 	best = fib_node_to_user(&t->fib, fn);
118 	best_pxlen = a->src_pxlen;
119       }
120     }
121 
122     if (best)
123       return best;
124 
125     if (!n->dst_pxlen)
126       break;
127 
128     n->dst_pxlen--;
129     ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
130   }
131 
132   return NULL;
133 }
134 
135 void *
net_route(rtable * tab,const net_addr * n)136 net_route(rtable *tab, const net_addr *n)
137 {
138   ASSERT(tab->addr_type == n->type);
139 
140   net_addr *n0 = alloca(n->length);
141   net_copy(n0, n);
142 
143   switch (n->type)
144   {
145   case NET_IP4:
146   case NET_VPN4:
147   case NET_ROA4:
148     return net_route_ip4(tab, (net_addr_ip4 *) n0);
149 
150   case NET_IP6:
151   case NET_VPN6:
152   case NET_ROA6:
153     return net_route_ip6(tab, (net_addr_ip6 *) n0);
154 
155   case NET_IP6_SADR:
156     return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);
157 
158   default:
159     return NULL;
160   }
161 }
162 
163 
164 static int
net_roa_check_ip4(rtable * tab,const net_addr_ip4 * px,u32 asn)165 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
166 {
167   struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
168   struct fib_node *fn;
169   int anything = 0;
170 
171   while (1)
172   {
173     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
174     {
175       net_addr_roa4 *roa = (void *) fn->addr;
176       net *r = fib_node_to_user(&tab->fib, fn);
177 
178       if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
179       {
180 	anything = 1;
181 	if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
182 	  return ROA_VALID;
183       }
184     }
185 
186     if (n.pxlen == 0)
187       break;
188 
189     n.pxlen--;
190     ip4_clrbit(&n.prefix, n.pxlen);
191   }
192 
193   return anything ? ROA_INVALID : ROA_UNKNOWN;
194 }
195 
196 static int
net_roa_check_ip6(rtable * tab,const net_addr_ip6 * px,u32 asn)197 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
198 {
199   struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
200   struct fib_node *fn;
201   int anything = 0;
202 
203   while (1)
204   {
205     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
206     {
207       net_addr_roa6 *roa = (void *) fn->addr;
208       net *r = fib_node_to_user(&tab->fib, fn);
209 
210       if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
211       {
212 	anything = 1;
213 	if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
214 	  return ROA_VALID;
215       }
216     }
217 
218     if (n.pxlen == 0)
219       break;
220 
221     n.pxlen--;
222     ip6_clrbit(&n.prefix, n.pxlen);
223   }
224 
225   return anything ? ROA_INVALID : ROA_UNKNOWN;
226 }
227 
228 /**
229  * roa_check - check validity of route origination in a ROA table
230  * @tab: ROA table
231  * @n: network prefix to check
232  * @asn: AS number of network prefix
233  *
234  * Implements RFC 6483 route validation for the given network prefix. The
235  * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
236  * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
237  * a candidate ROA with matching ASN and maxlen field greater than or equal to
238  * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
239  * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
240  * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
241  * must have type NET_IP4 or NET_IP6, respectively.
242  */
243 int
net_roa_check(rtable * tab,const net_addr * n,u32 asn)244 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
245 {
246   if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
247     return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
248   else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
249     return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
250   else
251     return ROA_UNKNOWN;	/* Should not happen */
252 }
253 
254 /**
255  * rte_find - find a route
256  * @net: network node
257  * @src: route source
258  *
259  * The rte_find() function returns a route for destination @net
260  * which is from route source @src.
261  */
262 rte *
rte_find(net * net,struct rte_src * src)263 rte_find(net *net, struct rte_src *src)
264 {
265   rte *e = net->routes;
266 
267   while (e && e->attrs->src != src)
268     e = e->next;
269   return e;
270 }
271 
272 /**
273  * rte_get_temp - get a temporary &rte
274  * @a: attributes to assign to the new route (a &rta; in case it's
275  * un-cached, rte_update() will create a cached copy automatically)
276  *
277  * Create a temporary &rte and bind it with the attributes @a.
278  * Also set route preference to the default preference set for
279  * the protocol.
280  */
281 rte *
rte_get_temp(rta * a)282 rte_get_temp(rta *a)
283 {
284   rte *e = sl_alloc(rte_slab);
285 
286   e->attrs = a;
287   e->id = 0;
288   e->flags = 0;
289   e->pref = 0;
290   return e;
291 }
292 
293 rte *
rte_do_cow(rte * r)294 rte_do_cow(rte *r)
295 {
296   rte *e = sl_alloc(rte_slab);
297 
298   memcpy(e, r, sizeof(rte));
299   e->attrs = rta_clone(r->attrs);
300   e->flags = 0;
301   return e;
302 }
303 
304 /**
305  * rte_cow_rta - get a private writable copy of &rte with writable &rta
306  * @r: a route entry to be copied
307  * @lp: a linpool from which to allocate &rta
308  *
309  * rte_cow_rta() takes a &rte and prepares it and associated &rta for
310  * modification. There are three possibilities: First, both &rte and &rta are
311  * private copies, in that case they are returned unchanged.  Second, &rte is
312  * private copy, but &rta is cached, in that case &rta is duplicated using
313  * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
314  * both structures are duplicated by rte_do_cow() and rta_do_cow().
315  *
316  * Note that in the second case, cached &rta loses one reference, while private
317  * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
318  * nexthops, ...) with it. To work properly, original shared &rta should have
319  * another reference during the life of created private copy.
320  *
321  * Result: a pointer to the new writable &rte with writable &rta.
322  */
323 rte *
rte_cow_rta(rte * r,linpool * lp)324 rte_cow_rta(rte *r, linpool *lp)
325 {
326   if (!rta_is_cached(r->attrs))
327     return r;
328 
329   r = rte_cow(r);
330   rta *a = rta_do_cow(r->attrs, lp);
331   rta_free(r->attrs);
332   r->attrs = a;
333   return r;
334 }
335 
336 
337 /**
338  * rte_init_tmp_attrs - initialize temporary ea_list for route
339  * @r: route entry to be modified
340  * @lp: linpool from which to allocate attributes
341  * @max: maximum number of added temporary attribus
342  *
343  * This function is supposed to be called from make_tmp_attrs() and
344  * store_tmp_attrs() hooks before rte_make_tmp_attr() / rte_store_tmp_attr()
345  * functions. It allocates &ea_list with length for @max items for temporary
346  * attributes and puts it on top of eattrs stack.
347  */
348 void
rte_init_tmp_attrs(rte * r,linpool * lp,uint max)349 rte_init_tmp_attrs(rte *r, linpool *lp, uint max)
350 {
351   struct ea_list *e = lp_alloc(lp, sizeof(struct ea_list) + max * sizeof(eattr));
352 
353   e->next = r->attrs->eattrs;
354   e->flags = EALF_SORTED | EALF_TEMP;
355   e->count = 0;
356 
357   r->attrs->eattrs = e;
358 }
359 
360 /**
361  * rte_make_tmp_attr - make temporary eattr from private route fields
362  * @r: route entry to be modified
363  * @id: attribute ID
364  * @type: attribute type
365  * @val: attribute value (u32 or adata ptr)
366  *
367  * This function is supposed to be called from make_tmp_attrs() hook for
368  * each temporary attribute, after temporary &ea_list was initialized by
369  * rte_init_tmp_attrs(). It checks whether temporary attribute is supposed to
370  * be defined (based on route pflags) and if so then it fills &eattr field in
371  * preallocated temporary &ea_list on top of route @r eattrs stack.
372  *
373  * Note that it may require free &eattr in temporary &ea_list, so it must not be
374  * called more times than @max argument of rte_init_tmp_attrs().
375  */
376 void
rte_make_tmp_attr(rte * r,uint id,uint type,uintptr_t val)377 rte_make_tmp_attr(rte *r, uint id, uint type, uintptr_t val)
378 {
379   if (r->pflags & EA_ID_FLAG(id))
380   {
381     ea_list *e = r->attrs->eattrs;
382     eattr *a = &e->attrs[e->count++];
383     a->id = id;
384     a->type = type;
385     a->flags = 0;
386 
387     if (type & EAF_EMBEDDED)
388       a->u.data = (u32) val;
389     else
390       a->u.ptr = (struct adata *) val;
391   }
392 }
393 
394 /**
395  * rte_store_tmp_attr - store temporary eattr to private route fields
396  * @r: route entry to be modified
397  * @id: attribute ID
398  *
399  * This function is supposed to be called from store_tmp_attrs() hook for
400  * each temporary attribute, after temporary &ea_list was initialized by
401  * rte_init_tmp_attrs(). It checks whether temporary attribute is defined in
402  * route @r eattrs stack, updates route pflags accordingly, undefines it by
403  * filling &eattr field in preallocated temporary &ea_list on top of the eattrs
404  * stack, and returns the value. Caller is supposed to store it in the
405  * appropriate private field.
406  *
407  * Note that it may require free &eattr in temporary &ea_list, so it must not be
408  * called more times than @max argument of rte_init_tmp_attrs()
409  */
410 uintptr_t
rte_store_tmp_attr(rte * r,uint id)411 rte_store_tmp_attr(rte *r, uint id)
412 {
413   ea_list *e = r->attrs->eattrs;
414   eattr *a = ea_find(e->next, id);
415 
416   if (a)
417   {
418     e->attrs[e->count++] = (struct eattr) { .id = id, .type = EAF_TYPE_UNDEF };
419     r->pflags |= EA_ID_FLAG(id);
420     return (a->type & EAF_EMBEDDED) ? a->u.data : (uintptr_t) a->u.ptr;
421   }
422   else
423   {
424     r->pflags &= ~EA_ID_FLAG(id);
425     return 0;
426   }
427 }
428 
429 /**
430  * rte_make_tmp_attrs - prepare route by adding all relevant temporary route attributes
431  * @r: route entry to be modified (may be replaced if COW)
432  * @lp: linpool from which to allocate attributes
433  * @old_attrs: temporary ref to old &rta (may be NULL)
434  *
435  * This function expands privately stored protocol-dependent route attributes
436  * to a uniform &eattr / &ea_list representation. It is essentially a wrapper
437  * around protocol make_tmp_attrs() hook, which does some additional work like
438  * ensuring that route @r is writable.
439  *
440  * The route @r may be read-only (with %REF_COW flag), in that case rw copy is
441  * obtained by rte_cow() and @r is replaced. If @rte is originally rw, it may be
442  * directly modified (and it is never copied).
443  *
444  * If the @old_attrs ptr is supplied, the function obtains another reference of
445  * old cached &rta, that is necessary in some cases (see rte_cow_rta() for
446  * details). It is freed by rte_store_tmp_attrs(), or manually by rta_free().
447  *
448  * Generally, if caller ensures that @r is read-only (e.g. in route export) then
449  * it may ignore @old_attrs (and set it to NULL), but must handle replacement of
450  * @r. If caller ensures that @r is writable (e.g. in route import) then it may
451  * ignore replacement of @r, but it must handle @old_attrs.
452  */
453 void
rte_make_tmp_attrs(rte ** r,linpool * lp,rta ** old_attrs)454 rte_make_tmp_attrs(rte **r, linpool *lp, rta **old_attrs)
455 {
456   void (*make_tmp_attrs)(rte *r, linpool *lp);
457   make_tmp_attrs = (*r)->attrs->src->proto->make_tmp_attrs;
458 
459   if (!make_tmp_attrs)
460     return;
461 
462   /* We may need to keep ref to old attributes, will be freed in rte_store_tmp_attrs() */
463   if (old_attrs)
464     *old_attrs = rta_is_cached((*r)->attrs) ? rta_clone((*r)->attrs) : NULL;
465 
466   *r = rte_cow_rta(*r, lp);
467   make_tmp_attrs(*r, lp);
468 }
469 
470 /**
471  * rte_store_tmp_attrs - store temporary route attributes back to private route fields
472  * @r: route entry to be modified
473  * @lp: linpool from which to allocate attributes
474  * @old_attrs: temporary ref to old &rta
475  *
476  * This function stores temporary route attributes that were expanded by
477  * rte_make_tmp_attrs() back to private route fields and also undefines them.
478  * It is essentially a wrapper around protocol store_tmp_attrs() hook, which
479  * does some additional work like shortcut if there is no change and cleanup
480  * of @old_attrs reference obtained by rte_make_tmp_attrs().
481  */
482 static void
rte_store_tmp_attrs(rte * r,linpool * lp,rta * old_attrs)483 rte_store_tmp_attrs(rte *r, linpool *lp, rta *old_attrs)
484 {
485   void (*store_tmp_attrs)(rte *rt, linpool *lp);
486   store_tmp_attrs = r->attrs->src->proto->store_tmp_attrs;
487 
488   if (!store_tmp_attrs)
489     return;
490 
491   ASSERT(!rta_is_cached(r->attrs));
492 
493   /* If there is no new ea_list, we just skip the temporary ea_list */
494   ea_list *ea = r->attrs->eattrs;
495   if (ea && (ea->flags & EALF_TEMP))
496     r->attrs->eattrs = ea->next;
497   else
498     store_tmp_attrs(r, lp);
499 
500   /* Free ref we got in rte_make_tmp_attrs(), have to do rta_lookup() first */
501   r->attrs = rta_lookup(r->attrs);
502   rta_free(old_attrs);
503 }
504 
505 
506 static int				/* Actually better or at least as good as */
rte_better(rte * new,rte * old)507 rte_better(rte *new, rte *old)
508 {
509   int (*better)(rte *, rte *);
510 
511   if (!rte_is_valid(old))
512     return 1;
513   if (!rte_is_valid(new))
514     return 0;
515 
516   if (new->pref > old->pref)
517     return 1;
518   if (new->pref < old->pref)
519     return 0;
520   if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
521     {
522       /*
523        *  If the user has configured protocol preferences, so that two different protocols
524        *  have the same preference, try to break the tie by comparing addresses. Not too
525        *  useful, but keeps the ordering of routes unambiguous.
526        */
527       return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
528     }
529   if (better = new->attrs->src->proto->rte_better)
530     return better(new, old);
531   return 0;
532 }
533 
534 static int
rte_mergable(rte * pri,rte * sec)535 rte_mergable(rte *pri, rte *sec)
536 {
537   int (*mergable)(rte *, rte *);
538 
539   if (!rte_is_valid(pri) || !rte_is_valid(sec))
540     return 0;
541 
542   if (pri->pref != sec->pref)
543     return 0;
544 
545   if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
546     return 0;
547 
548   if (mergable = pri->attrs->src->proto->rte_mergable)
549     return mergable(pri, sec);
550 
551   return 0;
552 }
553 
554 static void
rte_trace(struct channel * c,rte * e,int dir,char * msg)555 rte_trace(struct channel *c, rte *e, int dir, char *msg)
556 {
557   log(L_TRACE "%s.%s %c %s %N %s",
558       c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr,
559       rta_dest_name(e->attrs->dest));
560 }
561 
562 static inline void
rte_trace_in(uint flag,struct channel * c,rte * e,char * msg)563 rte_trace_in(uint flag, struct channel *c, rte *e, char *msg)
564 {
565   if ((c->debug & flag) || (c->proto->debug & flag))
566     rte_trace(c, e, '>', msg);
567 }
568 
569 static inline void
rte_trace_out(uint flag,struct channel * c,rte * e,char * msg)570 rte_trace_out(uint flag, struct channel *c, rte *e, char *msg)
571 {
572   if ((c->debug & flag) || (c->proto->debug & flag))
573     rte_trace(c, e, '<', msg);
574 }
575 
576 static rte *
export_filter_(struct channel * c,rte * rt0,rte ** rt_free,linpool * pool,int silent)577 export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
578 {
579   struct proto *p = c->proto;
580   const struct filter *filter = c->out_filter;
581   struct proto_stats *stats = &c->stats;
582   rte *rt;
583   int v;
584 
585   rt = rt0;
586   *rt_free = NULL;
587 
588   v = p->preexport ? p->preexport(p, &rt, pool) : 0;
589   if (v < 0)
590     {
591       if (silent)
592 	goto reject;
593 
594       stats->exp_updates_rejected++;
595       if (v == RIC_REJECT)
596 	rte_trace_out(D_FILTERS, c, rt, "rejected by protocol");
597       goto reject;
598     }
599   if (v > 0)
600     {
601       if (!silent)
602 	rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol");
603       goto accept;
604     }
605 
606   rte_make_tmp_attrs(&rt, pool, NULL);
607 
608   v = filter && ((filter == FILTER_REJECT) ||
609 		 (f_run(filter, &rt, pool,
610 			(silent ? FF_SILENT : 0)) > F_ACCEPT));
611   if (v)
612     {
613       if (silent)
614 	goto reject;
615 
616       stats->exp_updates_filtered++;
617       rte_trace_out(D_FILTERS, c, rt, "filtered out");
618       goto reject;
619     }
620 
621  accept:
622   if (rt != rt0)
623     *rt_free = rt;
624   return rt;
625 
626  reject:
627   /* Discard temporary rte */
628   if (rt != rt0)
629     rte_free(rt);
630   return NULL;
631 }
632 
633 static inline rte *
export_filter(struct channel * c,rte * rt0,rte ** rt_free,int silent)634 export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
635 {
636   return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
637 }
638 
639 static void
do_rt_notify(struct channel * c,net * net,rte * new,rte * old,int refeed)640 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
641 {
642   struct proto *p = c->proto;
643   struct proto_stats *stats = &c->stats;
644 
645   if (refeed && new)
646     c->refeed_count++;
647 
648   /* Apply export limit */
649   struct channel_limit *l = &c->out_limit;
650   if (l->action && !old && new)
651   {
652     if (stats->exp_routes >= l->limit)
653       channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
654 
655     if (l->state == PLS_BLOCKED)
656     {
657       stats->exp_updates_rejected++;
658       rte_trace_out(D_FILTERS, c, new, "rejected [limit]");
659       return;
660     }
661   }
662 
663   /* Apply export table */
664   if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed))
665     return;
666 
667   if (new)
668     stats->exp_updates_accepted++;
669   else
670     stats->exp_withdraws_accepted++;
671 
672   if (old)
673   {
674     bmap_clear(&c->export_map, old->id);
675     stats->exp_routes--;
676   }
677 
678   if (new)
679   {
680     bmap_set(&c->export_map, new->id);
681     stats->exp_routes++;
682   }
683 
684   if (p->debug & D_ROUTES)
685   {
686     if (new && old)
687       rte_trace_out(D_ROUTES, c, new, "replaced");
688     else if (new)
689       rte_trace_out(D_ROUTES, c, new, "added");
690     else if (old)
691       rte_trace_out(D_ROUTES, c, old, "removed");
692   }
693 
694   p->rt_notify(p, c, net, new, old);
695 }
696 
697 static void
rt_notify_basic(struct channel * c,net * net,rte * new,rte * old,int refeed)698 rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed)
699 {
700   // struct proto *p = c->proto;
701   rte *new_free = NULL;
702 
703   if (new)
704     c->stats.exp_updates_received++;
705   else
706     c->stats.exp_withdraws_received++;
707 
708   if (new)
709     new = export_filter(c, new, &new_free, 0);
710 
711   if (old && !bmap_test(&c->export_map, old->id))
712     old = NULL;
713 
714   if (!new && !old)
715     return;
716 
717   do_rt_notify(c, net, new, old, refeed);
718 
719   /* Discard temporary rte */
720   if (new_free)
721     rte_free(new_free);
722 }
723 
724 static void
rt_notify_accepted(struct channel * c,net * net,rte * new_changed,rte * old_changed,int refeed)725 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed)
726 {
727   // struct proto *p = c->proto;
728   rte *new_best = NULL;
729   rte *old_best = NULL;
730   rte *new_free = NULL;
731   int new_first = 0;
732 
733   /*
734    * We assume that there are no changes in net route order except (added)
735    * new_changed and (removed) old_changed. Therefore, the function is not
736    * compatible with deterministic_med (where nontrivial reordering can happen
737    * as a result of a route change) and with recomputation of recursive routes
738    * due to next hop update (where many routes can be changed in one step).
739    *
740    * Note that we need this assumption just for optimizations, we could just
741    * run full new_best recomputation otherwise.
742    *
743    * There are three cases:
744    * feed or old_best is old_changed -> we need to recompute new_best
745    * old_best is before new_changed -> new_best is old_best, ignore
746    * old_best is after new_changed -> try new_changed, otherwise old_best
747    */
748 
749   if (net->routes)
750     c->stats.exp_updates_received++;
751   else
752     c->stats.exp_withdraws_received++;
753 
754   /* Find old_best - either old_changed, or route for net->routes */
755   if (old_changed && bmap_test(&c->export_map, old_changed->id))
756     old_best = old_changed;
757   else
758   {
759     for (rte *r = net->routes; rte_is_valid(r); r = r->next)
760     {
761       if (bmap_test(&c->export_map, r->id))
762       {
763 	old_best = r;
764 	break;
765       }
766 
767       /* Note if new_changed found before old_best */
768       if (r == new_changed)
769 	new_first = 1;
770     }
771   }
772 
773   /* Find new_best */
774   if ((new_changed == old_changed) || (old_best == old_changed))
775   {
776     /* Feed or old_best changed -> find first accepted by filters */
777     for (rte *r = net->routes; rte_is_valid(r); r = r->next)
778       if (new_best = export_filter(c, r, &new_free, 0))
779 	break;
780   }
781   else
782   {
783     /* Other cases -> either new_changed, or old_best (and nothing changed) */
784     if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0)))
785       new_best = new_changed;
786     else
787       return;
788   }
789 
790   if (!new_best && !old_best)
791     return;
792 
793   do_rt_notify(c, net, new_best, old_best, refeed);
794 
795   /* Discard temporary rte */
796   if (new_free)
797     rte_free(new_free);
798 }
799 
800 
801 static struct nexthop *
nexthop_merge_rta(struct nexthop * nhs,rta * a,linpool * pool,int max)802 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
803 {
804   return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
805 }
806 
807 rte *
rt_export_merged(struct channel * c,net * net,rte ** rt_free,linpool * pool,int silent)808 rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
809 {
810   // struct proto *p = c->proto;
811   struct nexthop *nhs = NULL;
812   rte *best0, *best, *rt0, *rt, *tmp;
813 
814   best0 = net->routes;
815   *rt_free = NULL;
816 
817   if (!rte_is_valid(best0))
818     return NULL;
819 
820   best = export_filter_(c, best0, rt_free, pool, silent);
821 
822   if (!best || !rte_is_reachable(best))
823     return best;
824 
825   for (rt0 = best0->next; rt0; rt0 = rt0->next)
826   {
827     if (!rte_mergable(best0, rt0))
828       continue;
829 
830     rt = export_filter_(c, rt0, &tmp, pool, 1);
831 
832     if (!rt)
833       continue;
834 
835     if (rte_is_reachable(rt))
836       nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
837 
838     if (tmp)
839       rte_free(tmp);
840   }
841 
842   if (nhs)
843   {
844     nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
845 
846     if (nhs->next)
847     {
848       best = rte_cow_rta(best, pool);
849       nexthop_link(best->attrs, nhs);
850     }
851   }
852 
853   if (best != best0)
854     *rt_free = best;
855 
856   return best;
857 }
858 
859 
860 static void
rt_notify_merged(struct channel * c,net * net,rte * new_changed,rte * old_changed,rte * new_best,rte * old_best,int refeed)861 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
862 		 rte *new_best, rte *old_best, int refeed)
863 {
864   // struct proto *p = c->proto;
865   rte *new_free = NULL;
866 
867   /* We assume that all rte arguments are either NULL or rte_is_valid() */
868 
869   /* This check should be done by the caller */
870   if (!new_best && !old_best)
871     return;
872 
873   /* Check whether the change is relevant to the merged route */
874   if ((new_best == old_best) &&
875       (new_changed != old_changed) &&
876       !rte_mergable(new_best, new_changed) &&
877       !rte_mergable(old_best, old_changed))
878     return;
879 
880   if (new_best)
881     c->stats.exp_updates_received++;
882   else
883     c->stats.exp_withdraws_received++;
884 
885   /* Prepare new merged route */
886   if (new_best)
887     new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0);
888 
889   /* Check old merged route */
890   if (old_best && !bmap_test(&c->export_map, old_best->id))
891     old_best = NULL;
892 
893   if (!new_best && !old_best)
894     return;
895 
896   do_rt_notify(c, net, new_best, old_best, refeed);
897 
898   /* Discard temporary rte */
899   if (new_free)
900     rte_free(new_free);
901 }
902 
903 
904 /**
905  * rte_announce - announce a routing table change
906  * @tab: table the route has been added to
907  * @type: type of route announcement (RA_UNDEF or RA_ANY)
908  * @net: network in question
909  * @new: the new or changed route
910  * @old: the previous route replaced by the new one
911  * @new_best: the new best route for the same network
912  * @old_best: the previous best route for the same network
913  *
914  * This function gets a routing table update and announces it to all protocols
915  * that are connected to the same table by their channels.
916  *
917  * There are two ways of how routing table changes are announced. First, there
918  * is a change of just one route in @net (which may caused a change of the best
919  * route of the network). In this case @new and @old describes the changed route
920  * and @new_best and @old_best describes best routes. Other routes are not
921  * affected, but in sorted table the order of other routes might change.
922  *
923  * Second, There is a bulk change of multiple routes in @net, with shared best
924  * route selection. In such case separate route changes are described using
925  * @type of %RA_ANY, with @new and @old specifying the changed route, while
926  * @new_best and @old_best are NULL. After that, another notification is done
927  * where @new_best and @old_best are filled (may be the same), but @new and @old
928  * are NULL.
929  *
930  * The function announces the change to all associated channels. For each
931  * channel, an appropriate preprocessing is done according to channel &ra_mode.
932  * For example, %RA_OPTIMAL channels receive just changes of best routes.
933  *
934  * In general, we first call preexport() hook of a protocol, which performs
935  * basic checks on the route (each protocol has a right to veto or force accept
936  * of the route before any filter is asked). Then we consult an export filter
937  * of the channel and verify the old route in an export map of the channel.
938  * Finally, the rt_notify() hook of the protocol gets called.
939  *
940  * Note that there are also calls of rt_notify() hooks due to feed, but that is
941  * done outside of scope of rte_announce().
942  */
943 static void
rte_announce(rtable * tab,uint type,net * net,rte * new,rte * old,rte * new_best,rte * old_best)944 rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old,
945 	     rte *new_best, rte *old_best)
946 {
947   if (!rte_is_valid(new))
948     new = NULL;
949 
950   if (!rte_is_valid(old))
951     old = NULL;
952 
953   if (!rte_is_valid(new_best))
954     new_best = NULL;
955 
956   if (!rte_is_valid(old_best))
957     old_best = NULL;
958 
959   if (!new && !old && !new_best && !old_best)
960     return;
961 
962   if (new_best != old_best)
963   {
964     if (new_best)
965       new_best->sender->stats.pref_routes++;
966     if (old_best)
967       old_best->sender->stats.pref_routes--;
968 
969     if (tab->hostcache)
970       rt_notify_hostcache(tab, net);
971   }
972 
973   rt_schedule_notify(tab);
974 
975   struct channel *c; node *n;
976   WALK_LIST2(c, n, tab->channels, table_node)
977   {
978     if (c->export_state == ES_DOWN)
979       continue;
980 
981     if (type && (type != c->ra_mode))
982       continue;
983 
984     switch (c->ra_mode)
985     {
986     case RA_OPTIMAL:
987       if (new_best != old_best)
988 	rt_notify_basic(c, net, new_best, old_best, 0);
989       break;
990 
991     case RA_ANY:
992       if (new != old)
993 	rt_notify_basic(c, net, new, old, 0);
994       break;
995 
996     case RA_ACCEPTED:
997       rt_notify_accepted(c, net, new, old, 0);
998       break;
999 
1000     case RA_MERGED:
1001       rt_notify_merged(c, net, new, old, new_best, old_best, 0);
1002       break;
1003     }
1004   }
1005 }
1006 
1007 static inline int
rte_validate(rte * e)1008 rte_validate(rte *e)
1009 {
1010   int c;
1011   net *n = e->net;
1012 
1013   if (!net_validate(n->n.addr))
1014   {
1015     log(L_WARN "Ignoring bogus prefix %N received via %s",
1016 	n->n.addr, e->sender->proto->name);
1017     return 0;
1018   }
1019 
1020   /* FIXME: better handling different nettypes */
1021   c = !net_is_flow(n->n.addr) ?
1022     net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
1023   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1024   {
1025     log(L_WARN "Ignoring bogus route %N received via %s",
1026 	n->n.addr, e->sender->proto->name);
1027     return 0;
1028   }
1029 
1030   if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
1031   {
1032     log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
1033 	n->n.addr, e->attrs->dest, e->sender->proto->name);
1034     return 0;
1035   }
1036 
1037   if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
1038   {
1039     log(L_WARN "Ignoring unsorted multipath route %N received via %s",
1040 	n->n.addr, e->sender->proto->name);
1041     return 0;
1042   }
1043 
1044   return 1;
1045 }
1046 
1047 /**
1048  * rte_free - delete a &rte
1049  * @e: &rte to be deleted
1050  *
1051  * rte_free() deletes the given &rte from the routing table it's linked to.
1052  */
1053 void
rte_free(rte * e)1054 rte_free(rte *e)
1055 {
1056   if (rta_is_cached(e->attrs))
1057     rta_free(e->attrs);
1058   sl_free(rte_slab, e);
1059 }
1060 
1061 static inline void
rte_free_quick(rte * e)1062 rte_free_quick(rte *e)
1063 {
1064   rta_free(e->attrs);
1065   sl_free(rte_slab, e);
1066 }
1067 
1068 static int
rte_same(rte * x,rte * y)1069 rte_same(rte *x, rte *y)
1070 {
1071   /* rte.flags are not checked, as they are mostly internal to rtable */
1072   return
1073     x->attrs == y->attrs &&
1074     x->pflags == y->pflags &&
1075     x->pref == y->pref &&
1076     (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) &&
1077     rte_is_filtered(x) == rte_is_filtered(y);
1078 }
1079 
rte_is_ok(rte * e)1080 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
1081 
1082 static void
rte_recalculate(struct channel * c,net * net,rte * new,struct rte_src * src)1083 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
1084 {
1085   struct proto *p = c->proto;
1086   struct rtable *table = c->table;
1087   struct proto_stats *stats = &c->stats;
1088   static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1089   rte *before_old = NULL;
1090   rte *old_best = net->routes;
1091   rte *old = NULL;
1092   rte **k;
1093 
1094   k = &net->routes;			/* Find and remove original route from the same protocol */
1095   while (old = *k)
1096     {
1097       if (old->attrs->src == src)
1098 	{
1099 	  /* If there is the same route in the routing table but from
1100 	   * a different sender, then there are two paths from the
1101 	   * source protocol to this routing table through transparent
1102 	   * pipes, which is not allowed.
1103 	   *
1104 	   * We log that and ignore the route. If it is withdraw, we
1105 	   * ignore it completely (there might be 'spurious withdraws',
1106 	   * see FIXME in do_rte_announce())
1107 	   */
1108 	  if (old->sender->proto != p)
1109 	    {
1110 	      if (new)
1111 		{
1112 		  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1113 		      net->n.addr, table->name);
1114 		  rte_free_quick(new);
1115 		}
1116 	      return;
1117 	    }
1118 
1119 	  if (new && rte_same(old, new))
1120 	    {
1121 	      /* No changes, ignore the new route and refresh the old one */
1122 
1123 	      old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
1124 
1125 	      if (!rte_is_filtered(new))
1126 		{
1127 		  stats->imp_updates_ignored++;
1128 		  rte_trace_in(D_ROUTES, c, new, "ignored");
1129 		}
1130 
1131 	      rte_free_quick(new);
1132 	      return;
1133 	    }
1134 	  *k = old->next;
1135 	  table->rt_count--;
1136 	  break;
1137 	}
1138       k = &old->next;
1139       before_old = old;
1140     }
1141 
1142   /* Save the last accessed position */
1143   rte **pos = k;
1144 
1145   if (!old)
1146     before_old = NULL;
1147 
1148   if (!old && !new)
1149     {
1150       stats->imp_withdraws_ignored++;
1151       return;
1152     }
1153 
1154   int new_ok = rte_is_ok(new);
1155   int old_ok = rte_is_ok(old);
1156 
1157   struct channel_limit *l = &c->rx_limit;
1158   if (l->action && !old && new && !c->in_table)
1159     {
1160       u32 all_routes = stats->imp_routes + stats->filt_routes;
1161 
1162       if (all_routes >= l->limit)
1163 	channel_notify_limit(c, l, PLD_RX, all_routes);
1164 
1165       if (l->state == PLS_BLOCKED)
1166 	{
1167 	  /* In receive limit the situation is simple, old is NULL so
1168 	     we just free new and exit like nothing happened */
1169 
1170 	  stats->imp_updates_ignored++;
1171 	  rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
1172 	  rte_free_quick(new);
1173 	  return;
1174 	}
1175     }
1176 
1177   l = &c->in_limit;
1178   if (l->action && !old_ok && new_ok)
1179     {
1180       if (stats->imp_routes >= l->limit)
1181 	channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1182 
1183       if (l->state == PLS_BLOCKED)
1184 	{
1185 	  /* In import limit the situation is more complicated. We
1186 	     shouldn't just drop the route, we should handle it like
1187 	     it was filtered. We also have to continue the route
1188 	     processing if old or new is non-NULL, but we should exit
1189 	     if both are NULL as this case is probably assumed to be
1190 	     already handled. */
1191 
1192 	  stats->imp_updates_ignored++;
1193 	  rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
1194 
1195 	  if (c->in_keep_filtered)
1196 	    new->flags |= REF_FILTERED;
1197 	  else
1198 	    { rte_free_quick(new); new = NULL; }
1199 
1200 	  /* Note that old && !new could be possible when
1201 	     c->in_keep_filtered changed in the recent past. */
1202 
1203 	  if (!old && !new)
1204 	    return;
1205 
1206 	  new_ok = 0;
1207 	  goto skip_stats1;
1208 	}
1209     }
1210 
1211   if (new_ok)
1212     stats->imp_updates_accepted++;
1213   else if (old_ok)
1214     stats->imp_withdraws_accepted++;
1215   else
1216     stats->imp_withdraws_ignored++;
1217 
1218   if (old_ok || new_ok)
1219     table->last_rt_change = current_time();
1220 
1221  skip_stats1:
1222 
1223   if (new)
1224     rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1225   if (old)
1226     rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1227 
1228   if (table->config->sorted)
1229     {
1230       /* If routes are sorted, just insert new route to appropriate position */
1231       if (new)
1232 	{
1233 	  if (before_old && !rte_better(new, before_old))
1234 	    k = &before_old->next;
1235 	  else
1236 	    k = &net->routes;
1237 
1238 	  for (; *k; k=&(*k)->next)
1239 	    if (rte_better(new, *k))
1240 	      break;
1241 
1242 	  new->next = *k;
1243 	  *k = new;
1244 
1245 	  table->rt_count++;
1246 	}
1247     }
1248   else
1249     {
1250       /* If routes are not sorted, find the best route and move it on
1251 	 the first position. There are several optimized cases. */
1252 
1253       if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1254 	goto do_recalculate;
1255 
1256       if (new && rte_better(new, old_best))
1257 	{
1258 	  /* The first case - the new route is cleary optimal,
1259 	     we link it at the first position */
1260 
1261 	  new->next = net->routes;
1262 	  net->routes = new;
1263 
1264 	  table->rt_count++;
1265 	}
1266       else if (old == old_best)
1267 	{
1268 	  /* The second case - the old best route disappeared, we add the
1269 	     new route (if we have any) to the list (we don't care about
1270 	     position) and then we elect the new optimal route and relink
1271 	     that route at the first position and announce it. New optimal
1272 	     route might be NULL if there is no more routes */
1273 
1274 	do_recalculate:
1275 	  /* Add the new route to the list */
1276 	  if (new)
1277 	    {
1278 	      new->next = *pos;
1279 	      *pos = new;
1280 
1281 	      table->rt_count++;
1282 	    }
1283 
1284 	  /* Find a new optimal route (if there is any) */
1285 	  if (net->routes)
1286 	    {
1287 	      rte **bp = &net->routes;
1288 	      for (k=&(*bp)->next; *k; k=&(*k)->next)
1289 		if (rte_better(*k, *bp))
1290 		  bp = k;
1291 
1292 	      /* And relink it */
1293 	      rte *best = *bp;
1294 	      *bp = best->next;
1295 	      best->next = net->routes;
1296 	      net->routes = best;
1297 	    }
1298 	}
1299       else if (new)
1300 	{
1301 	  /* The third case - the new route is not better than the old
1302 	     best route (therefore old_best != NULL) and the old best
1303 	     route was not removed (therefore old_best == net->routes).
1304 	     We just link the new route to the old/last position. */
1305 
1306 	  new->next = *pos;
1307 	  *pos = new;
1308 
1309 	  table->rt_count++;
1310 	}
1311       /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1312     }
1313 
1314   if (new)
1315     {
1316       new->lastmod = current_time();
1317 
1318       if (!old)
1319         {
1320 	  new->id = hmap_first_zero(&table->id_map);
1321 	  hmap_set(&table->id_map, new->id);
1322 	}
1323       else
1324 	new->id = old->id;
1325     }
1326 
1327   /* Log the route change */
1328   if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES))
1329     {
1330       if (new_ok)
1331 	rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added");
1332       else if (old_ok)
1333 	{
1334 	  if (old != old_best)
1335 	    rte_trace(c, old, '>', "removed");
1336 	  else if (rte_is_ok(net->routes))
1337 	    rte_trace(c, old, '>', "removed [replaced]");
1338 	  else
1339 	    rte_trace(c, old, '>', "removed [sole]");
1340 	}
1341     }
1342 
1343   /* Propagate the route change */
1344   rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best);
1345 
1346   if (!net->routes &&
1347       (table->gc_counter++ >= table->config->gc_max_ops) &&
1348       (table->gc_time + table->config->gc_min_time <= current_time()))
1349     rt_schedule_prune(table);
1350 
1351   if (old_ok && p->rte_remove)
1352     p->rte_remove(net, old);
1353   if (new_ok && p->rte_insert)
1354     p->rte_insert(net, new);
1355 
1356   if (old)
1357     {
1358       if (!new)
1359 	hmap_clear(&table->id_map, old->id);
1360 
1361       rte_free_quick(old);
1362     }
1363 }
1364 
1365 static int rte_update_nest_cnt;		/* Nesting counter to allow recursive updates */
1366 
1367 static inline void
rte_update_lock(void)1368 rte_update_lock(void)
1369 {
1370   rte_update_nest_cnt++;
1371 }
1372 
1373 static inline void
rte_update_unlock(void)1374 rte_update_unlock(void)
1375 {
1376   if (!--rte_update_nest_cnt)
1377     lp_flush(rte_update_pool);
1378 }
1379 
1380 static inline void
rte_hide_dummy_routes(net * net,rte ** dummy)1381 rte_hide_dummy_routes(net *net, rte **dummy)
1382 {
1383   if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1384   {
1385     *dummy = net->routes;
1386     net->routes = (*dummy)->next;
1387   }
1388 }
1389 
1390 static inline void
rte_unhide_dummy_routes(net * net,rte ** dummy)1391 rte_unhide_dummy_routes(net *net, rte **dummy)
1392 {
1393   if (*dummy)
1394   {
1395     (*dummy)->next = net->routes;
1396     net->routes = *dummy;
1397   }
1398 }
1399 
1400 /**
1401  * rte_update - enter a new update to a routing table
1402  * @table: table to be updated
1403  * @c: channel doing the update
1404  * @net: network node
1405  * @p: protocol submitting the update
1406  * @src: protocol originating the update
1407  * @new: a &rte representing the new route or %NULL for route removal.
1408  *
1409  * This function is called by the routing protocols whenever they discover
1410  * a new route or wish to update/remove an existing route. The right announcement
1411  * sequence is to build route attributes first (either un-cached with @aflags set
1412  * to zero or a cached one using rta_lookup(); in this case please note that
1413  * you need to increase the use count of the attributes yourself by calling
1414  * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1415  * the appropriate data and finally submit the new &rte by calling rte_update().
1416  *
1417  * @src specifies the protocol that originally created the route and the meaning
1418  * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1419  * same value as @new->attrs->proto. @p specifies the protocol that called
1420  * rte_update(). In most cases it is the same protocol as @src. rte_update()
1421  * stores @p in @new->sender;
1422  *
1423  * When rte_update() gets any route, it automatically validates it (checks,
1424  * whether the network and next hop address are valid IP addresses and also
1425  * whether a normal routing protocol doesn't try to smuggle a host or link
1426  * scope route to the table), converts all protocol dependent attributes stored
1427  * in the &rte to temporary extended attributes, consults import filters of the
1428  * protocol to see if the route should be accepted and/or its attributes modified,
1429  * stores the temporary attributes back to the &rte.
1430  *
1431  * Now, having a "public" version of the route, we
1432  * automatically find any old route defined by the protocol @src
1433  * for network @n, replace it by the new one (or removing it if @new is %NULL),
1434  * recalculate the optimal route for this destination and finally broadcast
1435  * the change (if any) to all routing protocols by calling rte_announce().
1436  *
1437  * All memory used for attribute lists and other temporary allocations is taken
1438  * from a special linear pool @rte_update_pool and freed when rte_update()
1439  * finishes.
1440  */
1441 
1442 void
rte_update2(struct channel * c,const net_addr * n,rte * new,struct rte_src * src)1443 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1444 {
1445   // struct proto *p = c->proto;
1446   struct proto_stats *stats = &c->stats;
1447   const struct filter *filter = c->in_filter;
1448   rte *dummy = NULL;
1449   net *nn;
1450 
1451   ASSERT(c->channel_state == CS_UP);
1452 
1453   rte_update_lock();
1454   if (new)
1455     {
1456       /* Create a temporary table node */
1457       nn = alloca(sizeof(net) + n->length);
1458       memset(nn, 0, sizeof(net) + n->length);
1459       net_copy(nn->n.addr, n);
1460 
1461       new->net = nn;
1462       new->sender = c;
1463 
1464       if (!new->pref)
1465 	new->pref = c->preference;
1466 
1467       stats->imp_updates_received++;
1468       if (!rte_validate(new))
1469 	{
1470 	  rte_trace_in(D_FILTERS, c, new, "invalid");
1471 	  stats->imp_updates_invalid++;
1472 	  goto drop;
1473 	}
1474 
1475       if (filter == FILTER_REJECT)
1476 	{
1477 	  stats->imp_updates_filtered++;
1478 	  rte_trace_in(D_FILTERS, c, new, "filtered out");
1479 
1480 	  if (! c->in_keep_filtered)
1481 	    goto drop;
1482 
1483 	  /* new is a private copy, i could modify it */
1484 	  new->flags |= REF_FILTERED;
1485 	}
1486       else if (filter)
1487 	{
1488 	  rta *old_attrs = NULL;
1489 	  rte_make_tmp_attrs(&new, rte_update_pool, &old_attrs);
1490 
1491 	  int fr = f_run(filter, &new, rte_update_pool, 0);
1492 	  if (fr > F_ACCEPT)
1493 	  {
1494 	    stats->imp_updates_filtered++;
1495 	    rte_trace_in(D_FILTERS, c, new, "filtered out");
1496 
1497 	    if (! c->in_keep_filtered)
1498 	    {
1499 	      rta_free(old_attrs);
1500 	      goto drop;
1501 	    }
1502 
1503 	    new->flags |= REF_FILTERED;
1504 	  }
1505 
1506 	  rte_store_tmp_attrs(new, rte_update_pool, old_attrs);
1507 	}
1508       if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1509 	new->attrs = rta_lookup(new->attrs);
1510       new->flags |= REF_COW;
1511 
1512       /* Use the actual struct network, not the dummy one */
1513       nn = net_get(c->table, n);
1514       new->net = nn;
1515     }
1516   else
1517     {
1518       stats->imp_withdraws_received++;
1519 
1520       if (!(nn = net_find(c->table, n)) || !src)
1521 	{
1522 	  stats->imp_withdraws_ignored++;
1523 	  rte_update_unlock();
1524 	  return;
1525 	}
1526     }
1527 
1528  recalc:
1529   /* And recalculate the best route */
1530   rte_hide_dummy_routes(nn, &dummy);
1531   rte_recalculate(c, nn, new, src);
1532   rte_unhide_dummy_routes(nn, &dummy);
1533 
1534   rte_update_unlock();
1535   return;
1536 
1537  drop:
1538   rte_free(new);
1539   new = NULL;
1540   if (nn = net_find(c->table, n))
1541     goto recalc;
1542 
1543   rte_update_unlock();
1544 }
1545 
1546 /* Independent call to rte_announce(), used from next hop
1547    recalculation, outside of rte_update(). new must be non-NULL */
1548 static inline void
rte_announce_i(rtable * tab,uint type,net * net,rte * new,rte * old,rte * new_best,rte * old_best)1549 rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old,
1550 	       rte *new_best, rte *old_best)
1551 {
1552   rte_update_lock();
1553   rte_announce(tab, type, net, new, old, new_best, old_best);
1554   rte_update_unlock();
1555 }
1556 
1557 static inline void
rte_discard(rte * old)1558 rte_discard(rte *old)	/* Non-filtered route deletion, used during garbage collection */
1559 {
1560   rte_update_lock();
1561   rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1562   rte_update_unlock();
1563 }
1564 
1565 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1566 static inline void
rte_modify(rte * old)1567 rte_modify(rte *old)
1568 {
1569   rte_update_lock();
1570 
1571   rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1572   if (new != old)
1573   {
1574     if (new)
1575     {
1576       if (!rta_is_cached(new->attrs))
1577 	new->attrs = rta_lookup(new->attrs);
1578       new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1579     }
1580 
1581     rte_recalculate(old->sender, old->net, new, old->attrs->src);
1582   }
1583 
1584   rte_update_unlock();
1585 }
1586 
1587 /* Check rtable for best route to given net whether it would be exported do p */
1588 int
rt_examine(rtable * t,net_addr * a,struct proto * p,const struct filter * filter)1589 rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter)
1590 {
1591   net *n = net_find(t, a);
1592   rte *rt = n ? n->routes : NULL;
1593 
1594   if (!rte_is_valid(rt))
1595     return 0;
1596 
1597   rte_update_lock();
1598 
1599   /* Rest is stripped down export_filter() */
1600   int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0;
1601   if (v == RIC_PROCESS)
1602   {
1603     rte_make_tmp_attrs(&rt, rte_update_pool, NULL);
1604     v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
1605   }
1606 
1607   /* Discard temporary rte */
1608   if (rt != n->routes)
1609     rte_free(rt);
1610 
1611   rte_update_unlock();
1612 
1613   return v > 0;
1614 }
1615 
1616 
1617 /**
1618  * rt_refresh_begin - start a refresh cycle
1619  * @t: related routing table
1620  * @c related channel
1621  *
1622  * This function starts a refresh cycle for given routing table and announce
1623  * hook. The refresh cycle is a sequence where the protocol sends all its valid
1624  * routes to the routing table (by rte_update()). After that, all protocol
1625  * routes (more precisely routes with @c as @sender) not sent during the
1626  * refresh cycle but still in the table from the past are pruned. This is
1627  * implemented by marking all related routes as stale by REF_STALE flag in
1628  * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1629  * flag in rt_refresh_end() and then removing such routes in the prune loop.
1630  */
1631 void
rt_refresh_begin(rtable * t,struct channel * c)1632 rt_refresh_begin(rtable *t, struct channel *c)
1633 {
1634   FIB_WALK(&t->fib, net, n)
1635     {
1636       rte *e;
1637       for (e = n->routes; e; e = e->next)
1638 	if (e->sender == c)
1639 	  e->flags |= REF_STALE;
1640     }
1641   FIB_WALK_END;
1642 }
1643 
1644 /**
1645  * rt_refresh_end - end a refresh cycle
1646  * @t: related routing table
1647  * @c: related channel
1648  *
1649  * This function ends a refresh cycle for given routing table and announce
1650  * hook. See rt_refresh_begin() for description of refresh cycles.
1651  */
1652 void
rt_refresh_end(rtable * t,struct channel * c)1653 rt_refresh_end(rtable *t, struct channel *c)
1654 {
1655   int prune = 0;
1656 
1657   FIB_WALK(&t->fib, net, n)
1658     {
1659       rte *e;
1660       for (e = n->routes; e; e = e->next)
1661 	if ((e->sender == c) && (e->flags & REF_STALE))
1662 	  {
1663 	    e->flags |= REF_DISCARD;
1664 	    prune = 1;
1665 	  }
1666     }
1667   FIB_WALK_END;
1668 
1669   if (prune)
1670     rt_schedule_prune(t);
1671 }
1672 
1673 void
rt_modify_stale(rtable * t,struct channel * c)1674 rt_modify_stale(rtable *t, struct channel *c)
1675 {
1676   int prune = 0;
1677 
1678   FIB_WALK(&t->fib, net, n)
1679     {
1680       rte *e;
1681       for (e = n->routes; e; e = e->next)
1682 	if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1683 	  {
1684 	    e->flags |= REF_MODIFY;
1685 	    prune = 1;
1686 	  }
1687     }
1688   FIB_WALK_END;
1689 
1690   if (prune)
1691     rt_schedule_prune(t);
1692 }
1693 
1694 /**
1695  * rte_dump - dump a route
1696  * @e: &rte to be dumped
1697  *
1698  * This functions dumps contents of a &rte to debug output.
1699  */
1700 void
rte_dump(rte * e)1701 rte_dump(rte *e)
1702 {
1703   net *n = e->net;
1704   debug("%-1N ", n->n.addr);
1705   debug("PF=%02x pref=%d ", e->pflags, e->pref);
1706   rta_dump(e->attrs);
1707   if (e->attrs->src->proto->proto->dump_attrs)
1708     e->attrs->src->proto->proto->dump_attrs(e);
1709   debug("\n");
1710 }
1711 
1712 /**
1713  * rt_dump - dump a routing table
1714  * @t: routing table to be dumped
1715  *
1716  * This function dumps contents of a given routing table to debug output.
1717  */
1718 void
rt_dump(rtable * t)1719 rt_dump(rtable *t)
1720 {
1721   debug("Dump of routing table <%s>\n", t->name);
1722 #ifdef DEBUGGING
1723   fib_check(&t->fib);
1724 #endif
1725   FIB_WALK(&t->fib, net, n)
1726     {
1727       rte *e;
1728       for(e=n->routes; e; e=e->next)
1729 	rte_dump(e);
1730     }
1731   FIB_WALK_END;
1732   debug("\n");
1733 }
1734 
1735 /**
1736  * rt_dump_all - dump all routing tables
1737  *
1738  * This function dumps contents of all routing tables to debug output.
1739  */
1740 void
rt_dump_all(void)1741 rt_dump_all(void)
1742 {
1743   rtable *t;
1744 
1745   WALK_LIST(t, routing_tables)
1746     rt_dump(t);
1747 }
1748 
1749 static inline void
rt_schedule_hcu(rtable * tab)1750 rt_schedule_hcu(rtable *tab)
1751 {
1752   if (tab->hcu_scheduled)
1753     return;
1754 
1755   tab->hcu_scheduled = 1;
1756   ev_schedule(tab->rt_event);
1757 }
1758 
1759 static inline void
rt_schedule_nhu(rtable * tab)1760 rt_schedule_nhu(rtable *tab)
1761 {
1762   if (tab->nhu_state == NHU_CLEAN)
1763     ev_schedule(tab->rt_event);
1764 
1765   /* state change:
1766    *   NHU_CLEAN   -> NHU_SCHEDULED
1767    *   NHU_RUNNING -> NHU_DIRTY
1768    */
1769   tab->nhu_state |= NHU_SCHEDULED;
1770 }
1771 
1772 void
rt_schedule_prune(rtable * tab)1773 rt_schedule_prune(rtable *tab)
1774 {
1775   if (tab->prune_state == 0)
1776     ev_schedule(tab->rt_event);
1777 
1778   /* state change 0->1, 2->3 */
1779   tab->prune_state |= 1;
1780 }
1781 
1782 
1783 static void
rt_event(void * ptr)1784 rt_event(void *ptr)
1785 {
1786   rtable *tab = ptr;
1787 
1788   rt_lock_table(tab);
1789 
1790   if (tab->hcu_scheduled)
1791     rt_update_hostcache(tab);
1792 
1793   if (tab->nhu_state)
1794     rt_next_hop_update(tab);
1795 
1796   if (tab->prune_state)
1797     rt_prune_table(tab);
1798 
1799   rt_unlock_table(tab);
1800 }
1801 
1802 
1803 static inline btime
rt_settled_time(rtable * tab)1804 rt_settled_time(rtable *tab)
1805 {
1806   ASSUME(tab->base_settle_time != 0);
1807 
1808   return MIN(tab->last_rt_change + tab->config->min_settle_time,
1809 	     tab->base_settle_time + tab->config->max_settle_time);
1810 }
1811 
1812 static void
rt_settle_timer(timer * t)1813 rt_settle_timer(timer *t)
1814 {
1815   rtable *tab = t->data;
1816 
1817   if (!tab->base_settle_time)
1818     return;
1819 
1820   btime settled_time = rt_settled_time(tab);
1821   if (current_time() < settled_time)
1822   {
1823     tm_set(tab->settle_timer, settled_time);
1824     return;
1825   }
1826 
1827   /* Settled */
1828   tab->base_settle_time = 0;
1829 
1830   struct rt_subscription *s;
1831   WALK_LIST(s, tab->subscribers)
1832     s->hook(s);
1833 }
1834 
1835 static void
rt_kick_settle_timer(rtable * tab)1836 rt_kick_settle_timer(rtable *tab)
1837 {
1838   tab->base_settle_time = current_time();
1839 
1840   if (!tab->settle_timer)
1841     tab->settle_timer = tm_new_init(rt_table_pool, rt_settle_timer, tab, 0, 0);
1842 
1843   if (!tm_active(tab->settle_timer))
1844     tm_set(tab->settle_timer, rt_settled_time(tab));
1845 }
1846 
1847 static inline void
rt_schedule_notify(rtable * tab)1848 rt_schedule_notify(rtable *tab)
1849 {
1850   if (EMPTY_LIST(tab->subscribers))
1851     return;
1852 
1853   if (tab->base_settle_time)
1854     return;
1855 
1856   rt_kick_settle_timer(tab);
1857 }
1858 
1859 void
rt_subscribe(rtable * tab,struct rt_subscription * s)1860 rt_subscribe(rtable *tab, struct rt_subscription *s)
1861 {
1862   s->tab = tab;
1863   rt_lock_table(tab);
1864   add_tail(&tab->subscribers, &s->n);
1865 }
1866 
1867 void
rt_unsubscribe(struct rt_subscription * s)1868 rt_unsubscribe(struct rt_subscription *s)
1869 {
1870   rem_node(&s->n);
1871   rt_unlock_table(s->tab);
1872 }
1873 
1874 void
rt_setup(pool * p,rtable * t,struct rtable_config * cf)1875 rt_setup(pool *p, rtable *t, struct rtable_config *cf)
1876 {
1877   bzero(t, sizeof(*t));
1878   t->name = cf->name;
1879   t->config = cf;
1880   t->addr_type = cf->addr_type;
1881   fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1882   init_list(&t->channels);
1883 
1884   hmap_init(&t->id_map, p, 1024);
1885   hmap_set(&t->id_map, 0);
1886 
1887   t->rt_event = ev_new_init(p, rt_event, t);
1888   t->last_rt_change = t->gc_time = current_time();
1889 
1890   init_list(&t->subscribers);
1891 }
1892 
1893 /**
1894  * rt_init - initialize routing tables
1895  *
1896  * This function is called during BIRD startup. It initializes the
1897  * routing table module.
1898  */
1899 void
rt_init(void)1900 rt_init(void)
1901 {
1902   rta_init();
1903   rt_table_pool = rp_new(&root_pool, "Routing tables");
1904   rte_update_pool = lp_new_default(rt_table_pool);
1905   rte_slab = sl_new(rt_table_pool, sizeof(rte));
1906   init_list(&routing_tables);
1907 }
1908 
1909 
1910 /**
1911  * rt_prune_table - prune a routing table
1912  *
1913  * The prune loop scans routing tables and removes routes belonging to flushing
1914  * protocols, discarded routes and also stale network entries. It is called from
1915  * rt_event(). The event is rescheduled if the current iteration do not finish
1916  * the table. The pruning is directed by the prune state (@prune_state),
1917  * specifying whether the prune cycle is scheduled or running, and there
1918  * is also a persistent pruning iterator (@prune_fit).
1919  *
1920  * The prune loop is used also for channel flushing. For this purpose, the
1921  * channels to flush are marked before the iteration and notified after the
1922  * iteration.
1923  */
1924 static void
rt_prune_table(rtable * tab)1925 rt_prune_table(rtable *tab)
1926 {
1927   struct fib_iterator *fit = &tab->prune_fit;
1928   int limit = 512;
1929 
1930   struct channel *c;
1931   node *n, *x;
1932 
1933   DBG("Pruning route table %s\n", tab->name);
1934 #ifdef DEBUGGING
1935   fib_check(&tab->fib);
1936 #endif
1937 
1938   if (tab->prune_state == 0)
1939     return;
1940 
1941   if (tab->prune_state == 1)
1942   {
1943     /* Mark channels to flush */
1944     WALK_LIST2(c, n, tab->channels, table_node)
1945       if (c->channel_state == CS_FLUSHING)
1946 	c->flush_active = 1;
1947 
1948     FIB_ITERATE_INIT(fit, &tab->fib);
1949     tab->prune_state = 2;
1950   }
1951 
1952 again:
1953   FIB_ITERATE_START(&tab->fib, fit, net, n)
1954     {
1955       rte *e;
1956 
1957     rescan:
1958       for (e=n->routes; e; e=e->next)
1959       {
1960 	if (e->sender->flush_active || (e->flags & REF_DISCARD))
1961 	  {
1962 	    if (limit <= 0)
1963 	      {
1964 		FIB_ITERATE_PUT(fit);
1965 		ev_schedule(tab->rt_event);
1966 		return;
1967 	      }
1968 
1969 	    rte_discard(e);
1970 	    limit--;
1971 
1972 	    goto rescan;
1973 	  }
1974 
1975 	if (e->flags & REF_MODIFY)
1976 	  {
1977 	    if (limit <= 0)
1978 	      {
1979 		FIB_ITERATE_PUT(fit);
1980 		ev_schedule(tab->rt_event);
1981 		return;
1982 	      }
1983 
1984 	    rte_modify(e);
1985 	    limit--;
1986 
1987 	    goto rescan;
1988 	  }
1989       }
1990 
1991       if (!n->routes)		/* Orphaned FIB entry */
1992 	{
1993 	  FIB_ITERATE_PUT(fit);
1994 	  fib_delete(&tab->fib, n);
1995 	  goto again;
1996 	}
1997     }
1998   FIB_ITERATE_END;
1999 
2000 #ifdef DEBUGGING
2001   fib_check(&tab->fib);
2002 #endif
2003 
2004   tab->gc_counter = 0;
2005   tab->gc_time = current_time();
2006 
2007   /* state change 2->0, 3->1 */
2008   tab->prune_state &= 1;
2009 
2010   if (tab->prune_state > 0)
2011     ev_schedule(tab->rt_event);
2012 
2013   /* FIXME: This should be handled in a better way */
2014   rt_prune_sources();
2015 
2016   /* Close flushed channels */
2017   WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
2018     if (c->flush_active)
2019       {
2020 	c->flush_active = 0;
2021 	channel_set_state(c, CS_DOWN);
2022       }
2023 
2024   return;
2025 }
2026 
2027 void
rt_preconfig(struct config * c)2028 rt_preconfig(struct config *c)
2029 {
2030   init_list(&c->tables);
2031 
2032   rt_new_table(cf_get_symbol("master4"), NET_IP4);
2033   rt_new_table(cf_get_symbol("master6"), NET_IP6);
2034 }
2035 
2036 
2037 /*
2038  * Some functions for handing internal next hop updates
2039  * triggered by rt_schedule_nhu().
2040  */
2041 
2042 static inline int
rta_next_hop_outdated(rta * a)2043 rta_next_hop_outdated(rta *a)
2044 {
2045   struct hostentry *he = a->hostentry;
2046 
2047   if (!he)
2048     return 0;
2049 
2050   if (!he->src)
2051     return a->dest != RTD_UNREACHABLE;
2052 
2053   return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
2054     (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
2055 }
2056 
2057 void
rta_apply_hostentry(rta * a,struct hostentry * he,mpls_label_stack * mls)2058 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
2059 {
2060   a->hostentry = he;
2061   a->dest = he->dest;
2062   a->igp_metric = he->igp_metric;
2063 
2064   if (a->dest != RTD_UNICAST)
2065   {
2066     /* No nexthop */
2067 no_nexthop:
2068     a->nh = (struct nexthop) {};
2069     if (mls)
2070     { /* Store the label stack for later changes */
2071       a->nh.labels_orig = a->nh.labels = mls->len;
2072       memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
2073     }
2074     return;
2075   }
2076 
2077   if (((!mls) || (!mls->len)) && he->nexthop_linkable)
2078   { /* Just link the nexthop chain, no label append happens. */
2079     memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
2080     return;
2081   }
2082 
2083   struct nexthop *nhp = NULL, *nhr = NULL;
2084   int skip_nexthop = 0;
2085 
2086   for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
2087   {
2088     if (skip_nexthop)
2089       skip_nexthop--;
2090     else
2091     {
2092       nhr = nhp;
2093       nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
2094     }
2095 
2096     memset(nhp, 0, NEXTHOP_MAX_SIZE);
2097     nhp->iface = nh->iface;
2098     nhp->weight = nh->weight;
2099 
2100     if (mls)
2101     {
2102       nhp->labels = nh->labels + mls->len;
2103       nhp->labels_orig = mls->len;
2104       if (nhp->labels <= MPLS_MAX_LABEL_STACK)
2105       {
2106 	memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
2107 	memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
2108       }
2109       else
2110       {
2111 	log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
2112 	    nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
2113 	skip_nexthop++;
2114 	continue;
2115       }
2116     }
2117     else if (nh->labels)
2118     {
2119       nhp->labels = nh->labels;
2120       nhp->labels_orig = 0;
2121       memcpy(nhp->label, nh->label, nh->labels * sizeof(u32));
2122     }
2123 
2124     if (ipa_nonzero(nh->gw))
2125     {
2126       nhp->gw = nh->gw;			/* Router nexthop */
2127       nhp->flags |= (nh->flags & RNF_ONLINK);
2128     }
2129     else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK))
2130       nhp->gw = IPA_NONE;		/* PtP link - no need for nexthop */
2131     else if (ipa_nonzero(he->link))
2132       nhp->gw = he->link;		/* Device nexthop with link-local address known */
2133     else
2134       nhp->gw = he->addr;		/* Device nexthop with link-local address unknown */
2135   }
2136 
2137   if (skip_nexthop)
2138     if (nhr)
2139       nhr->next = NULL;
2140     else
2141     {
2142       a->dest = RTD_UNREACHABLE;
2143       log(L_WARN "No valid nexthop remaining, setting route unreachable");
2144       goto no_nexthop;
2145     }
2146 }
2147 
2148 static inline rte *
rt_next_hop_update_rte(rtable * tab UNUSED,rte * old)2149 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
2150 {
2151   rta *a = alloca(RTA_MAX_SIZE);
2152   memcpy(a, old->attrs, rta_size(old->attrs));
2153 
2154   mpls_label_stack mls = { .len = a->nh.labels_orig };
2155   memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
2156 
2157   rta_apply_hostentry(a, old->attrs->hostentry, &mls);
2158   a->aflags = 0;
2159 
2160   rte *e = sl_alloc(rte_slab);
2161   memcpy(e, old, sizeof(rte));
2162   e->attrs = rta_lookup(a);
2163 
2164   return e;
2165 }
2166 
2167 static inline int
rt_next_hop_update_net(rtable * tab,net * n)2168 rt_next_hop_update_net(rtable *tab, net *n)
2169 {
2170   rte **k, *e, *new, *old_best, **new_best;
2171   int count = 0;
2172   int free_old_best = 0;
2173 
2174   old_best = n->routes;
2175   if (!old_best)
2176     return 0;
2177 
2178   for (k = &n->routes; e = *k; k = &e->next)
2179     if (rta_next_hop_outdated(e->attrs))
2180       {
2181 	new = rt_next_hop_update_rte(tab, e);
2182 	*k = new;
2183 
2184 	rte_trace_in(D_ROUTES, new->sender, new, "updated");
2185 	rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
2186 
2187 	/* Call a pre-comparison hook */
2188 	/* Not really an efficient way to compute this */
2189 	if (e->attrs->src->proto->rte_recalculate)
2190 	  e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
2191 
2192 	if (e != old_best)
2193 	  rte_free_quick(e);
2194 	else /* Freeing of the old best rte is postponed */
2195 	  free_old_best = 1;
2196 
2197 	e = new;
2198 	count++;
2199       }
2200 
2201   if (!count)
2202     return 0;
2203 
2204   /* Find the new best route */
2205   new_best = NULL;
2206   for (k = &n->routes; e = *k; k = &e->next)
2207     {
2208       if (!new_best || rte_better(e, *new_best))
2209 	new_best = k;
2210     }
2211 
2212   /* Relink the new best route to the first position */
2213   new = *new_best;
2214   if (new != n->routes)
2215     {
2216       *new_best = new->next;
2217       new->next = n->routes;
2218       n->routes = new;
2219     }
2220 
2221   /* Announce the new best route */
2222   if (new != old_best)
2223     rte_trace_in(D_ROUTES, new->sender, new, "updated [best]");
2224 
2225   /* Propagate changes */
2226   rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best);
2227 
2228   if (free_old_best)
2229     rte_free_quick(old_best);
2230 
2231   return count;
2232 }
2233 
2234 static void
rt_next_hop_update(rtable * tab)2235 rt_next_hop_update(rtable *tab)
2236 {
2237   struct fib_iterator *fit = &tab->nhu_fit;
2238   int max_feed = 32;
2239 
2240   if (tab->nhu_state == NHU_CLEAN)
2241     return;
2242 
2243   if (tab->nhu_state == NHU_SCHEDULED)
2244     {
2245       FIB_ITERATE_INIT(fit, &tab->fib);
2246       tab->nhu_state = NHU_RUNNING;
2247     }
2248 
2249   FIB_ITERATE_START(&tab->fib, fit, net, n)
2250     {
2251       if (max_feed <= 0)
2252 	{
2253 	  FIB_ITERATE_PUT(fit);
2254 	  ev_schedule(tab->rt_event);
2255 	  return;
2256 	}
2257       max_feed -= rt_next_hop_update_net(tab, n);
2258     }
2259   FIB_ITERATE_END;
2260 
2261   /* State change:
2262    *   NHU_DIRTY   -> NHU_SCHEDULED
2263    *   NHU_RUNNING -> NHU_CLEAN
2264    */
2265   tab->nhu_state &= 1;
2266 
2267   if (tab->nhu_state != NHU_CLEAN)
2268     ev_schedule(tab->rt_event);
2269 }
2270 
2271 
2272 struct rtable_config *
rt_new_table(struct symbol * s,uint addr_type)2273 rt_new_table(struct symbol *s, uint addr_type)
2274 {
2275   /* Hack that allows to 'redefine' the master table */
2276   if ((s->class == SYM_TABLE) &&
2277       (s->table == new_config->def_tables[addr_type]) &&
2278       ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
2279     return s->table;
2280 
2281   struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2282 
2283   cf_define_symbol(s, SYM_TABLE, table, c);
2284   c->name = s->name;
2285   c->addr_type = addr_type;
2286   c->gc_max_ops = 1000;
2287   c->gc_min_time = 5;
2288   c->min_settle_time = 1 S;
2289   c->max_settle_time = 20 S;
2290 
2291   add_tail(&new_config->tables, &c->n);
2292 
2293   /* First table of each type is kept as default */
2294   if (! new_config->def_tables[addr_type])
2295     new_config->def_tables[addr_type] = c;
2296 
2297   return c;
2298 }
2299 
2300 /**
2301  * rt_lock_table - lock a routing table
2302  * @r: routing table to be locked
2303  *
2304  * Lock a routing table, because it's in use by a protocol,
2305  * preventing it from being freed when it gets undefined in a new
2306  * configuration.
2307  */
2308 void
rt_lock_table(rtable * r)2309 rt_lock_table(rtable *r)
2310 {
2311   r->use_count++;
2312 }
2313 
2314 /**
2315  * rt_unlock_table - unlock a routing table
2316  * @r: routing table to be unlocked
2317  *
2318  * Unlock a routing table formerly locked by rt_lock_table(),
2319  * that is decrease its use count and delete it if it's scheduled
2320  * for deletion by configuration changes.
2321  */
2322 void
rt_unlock_table(rtable * r)2323 rt_unlock_table(rtable *r)
2324 {
2325   if (!--r->use_count && r->deleted)
2326     {
2327       struct config *conf = r->deleted;
2328       DBG("Deleting routing table %s\n", r->name);
2329       r->config->table = NULL;
2330       if (r->hostcache)
2331 	rt_free_hostcache(r);
2332       rem_node(&r->n);
2333       fib_free(&r->fib);
2334       hmap_free(&r->id_map);
2335       rfree(r->rt_event);
2336       rfree(r->settle_timer);
2337       mb_free(r);
2338       config_del_obstacle(conf);
2339     }
2340 }
2341 
2342 static struct rtable_config *
rt_find_table_config(struct config * cf,char * name)2343 rt_find_table_config(struct config *cf, char *name)
2344 {
2345   struct symbol *sym = cf_find_symbol(cf, name);
2346   return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL;
2347 }
2348 
2349 /**
2350  * rt_commit - commit new routing table configuration
2351  * @new: new configuration
2352  * @old: original configuration or %NULL if it's boot time config
2353  *
2354  * Scan differences between @old and @new configuration and modify
2355  * the routing tables according to these changes. If @new defines a
2356  * previously unknown table, create it, if it omits a table existing
2357  * in @old, schedule it for deletion (it gets deleted when all protocols
2358  * disconnect from it by calling rt_unlock_table()), if it exists
2359  * in both configurations, leave it unchanged.
2360  */
2361 void
rt_commit(struct config * new,struct config * old)2362 rt_commit(struct config *new, struct config *old)
2363 {
2364   struct rtable_config *o, *r;
2365 
2366   DBG("rt_commit:\n");
2367   if (old)
2368     {
2369       WALK_LIST(o, old->tables)
2370 	{
2371 	  rtable *ot = o->table;
2372 	  if (!ot->deleted)
2373 	    {
2374 	      r = rt_find_table_config(new, o->name);
2375 	      if (r && (r->addr_type == o->addr_type) && !new->shutdown)
2376 		{
2377 		  DBG("\t%s: same\n", o->name);
2378 		  r->table = ot;
2379 		  ot->name = r->name;
2380 		  ot->config = r;
2381 		  if (o->sorted != r->sorted)
2382 		    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2383 		}
2384 	      else
2385 		{
2386 		  DBG("\t%s: deleted\n", o->name);
2387 		  ot->deleted = old;
2388 		  config_add_obstacle(old);
2389 		  rt_lock_table(ot);
2390 		  rt_unlock_table(ot);
2391 		}
2392 	    }
2393 	}
2394     }
2395 
2396   WALK_LIST(r, new->tables)
2397     if (!r->table)
2398       {
2399 	rtable *t = mb_allocz(rt_table_pool, sizeof(struct rtable));
2400 	DBG("\t%s: created\n", r->name);
2401 	rt_setup(rt_table_pool, t, r);
2402 	add_tail(&routing_tables, &t->n);
2403 	r->table = t;
2404       }
2405   DBG("\tdone\n");
2406 }
2407 
2408 static inline void
do_feed_channel(struct channel * c,net * n,rte * e)2409 do_feed_channel(struct channel *c, net *n, rte *e)
2410 {
2411   rte_update_lock();
2412   if (c->ra_mode == RA_ACCEPTED)
2413     rt_notify_accepted(c, n, NULL, NULL, c->refeeding);
2414   else if (c->ra_mode == RA_MERGED)
2415     rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding);
2416   else /* RA_BASIC */
2417     rt_notify_basic(c, n, e, e, c->refeeding);
2418   rte_update_unlock();
2419 }
2420 
2421 /**
2422  * rt_feed_channel - advertise all routes to a channel
2423  * @c: channel to be fed
2424  *
2425  * This function performs one pass of advertisement of routes to a channel that
2426  * is in the ES_FEEDING state. It is called by the protocol code as long as it
2427  * has something to do. (We avoid transferring all the routes in single pass in
2428  * order not to monopolize CPU time.)
2429  */
2430 int
rt_feed_channel(struct channel * c)2431 rt_feed_channel(struct channel *c)
2432 {
2433   struct fib_iterator *fit = &c->feed_fit;
2434   int max_feed = 256;
2435 
2436   ASSERT(c->export_state == ES_FEEDING);
2437 
2438   if (!c->feed_active)
2439     {
2440       FIB_ITERATE_INIT(fit, &c->table->fib);
2441       c->feed_active = 1;
2442     }
2443 
2444   FIB_ITERATE_START(&c->table->fib, fit, net, n)
2445     {
2446       rte *e = n->routes;
2447       if (max_feed <= 0)
2448 	{
2449 	  FIB_ITERATE_PUT(fit);
2450 	  return 0;
2451 	}
2452 
2453       if ((c->ra_mode == RA_OPTIMAL) ||
2454 	  (c->ra_mode == RA_ACCEPTED) ||
2455 	  (c->ra_mode == RA_MERGED))
2456 	if (rte_is_valid(e))
2457 	  {
2458 	    /* In the meantime, the protocol may fell down */
2459 	    if (c->export_state != ES_FEEDING)
2460 	      goto done;
2461 
2462 	    do_feed_channel(c, n, e);
2463 	    max_feed--;
2464 	  }
2465 
2466       if (c->ra_mode == RA_ANY)
2467 	for(e = n->routes; e; e = e->next)
2468 	  {
2469 	    /* In the meantime, the protocol may fell down */
2470 	    if (c->export_state != ES_FEEDING)
2471 	      goto done;
2472 
2473 	    if (!rte_is_valid(e))
2474 	      continue;
2475 
2476 	    do_feed_channel(c, n, e);
2477 	    max_feed--;
2478 	  }
2479     }
2480   FIB_ITERATE_END;
2481 
2482 done:
2483   c->feed_active = 0;
2484   return 1;
2485 }
2486 
2487 /**
2488  * rt_feed_baby_abort - abort protocol feeding
2489  * @c: channel
2490  *
2491  * This function is called by the protocol code when the protocol stops or
2492  * ceases to exist during the feeding.
2493  */
2494 void
rt_feed_channel_abort(struct channel * c)2495 rt_feed_channel_abort(struct channel *c)
2496 {
2497   if (c->feed_active)
2498     {
2499       /* Unlink the iterator */
2500       fit_get(&c->table->fib, &c->feed_fit);
2501       c->feed_active = 0;
2502     }
2503 }
2504 
2505 
2506 /*
2507  *	Import table
2508  */
2509 
2510 int
rte_update_in(struct channel * c,const net_addr * n,rte * new,struct rte_src * src)2511 rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
2512 {
2513   struct rtable *tab = c->in_table;
2514   rte *old, **pos;
2515   net *net;
2516 
2517   if (new)
2518   {
2519     net = net_get(tab, n);
2520 
2521     if (!new->pref)
2522       new->pref = c->preference;
2523 
2524     if (!rta_is_cached(new->attrs))
2525       new->attrs = rta_lookup(new->attrs);
2526   }
2527   else
2528   {
2529     net = net_find(tab, n);
2530 
2531     if (!net)
2532       goto drop_withdraw;
2533   }
2534 
2535   /* Find the old rte */
2536   for (pos = &net->routes; old = *pos; pos = &old->next)
2537     if (old->attrs->src == src)
2538     {
2539       if (new && rte_same(old, new))
2540       {
2541 	/* Refresh the old rte, continue with update to main rtable */
2542 	if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
2543 	{
2544 	  old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
2545 	  return 1;
2546 	}
2547 
2548 	goto drop_update;
2549       }
2550 
2551       /* Move iterator if needed */
2552       if (old == c->reload_next_rte)
2553 	c->reload_next_rte = old->next;
2554 
2555       /* Remove the old rte */
2556       *pos = old->next;
2557       rte_free_quick(old);
2558       tab->rt_count--;
2559 
2560       break;
2561     }
2562 
2563   if (!new)
2564   {
2565     if (!old)
2566       goto drop_withdraw;
2567 
2568     return 1;
2569   }
2570 
2571   struct channel_limit *l = &c->rx_limit;
2572   if (l->action && !old)
2573   {
2574     if (tab->rt_count >= l->limit)
2575       channel_notify_limit(c, l, PLD_RX, tab->rt_count);
2576 
2577     if (l->state == PLS_BLOCKED)
2578     {
2579       /* Required by rte_trace_in() */
2580       new->net = net;
2581 
2582       rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
2583       goto drop_update;
2584     }
2585   }
2586 
2587   /* Insert the new rte */
2588   rte *e = rte_do_cow(new);
2589   e->flags |= REF_COW;
2590   e->net = net;
2591   e->sender = c;
2592   e->lastmod = current_time();
2593   e->next = *pos;
2594   *pos = e;
2595   tab->rt_count++;
2596   return 1;
2597 
2598 drop_update:
2599   c->stats.imp_updates_received++;
2600   c->stats.imp_updates_ignored++;
2601   rte_free(new);
2602   return 0;
2603 
2604 drop_withdraw:
2605   c->stats.imp_withdraws_received++;
2606   c->stats.imp_withdraws_ignored++;
2607   return 0;
2608 }
2609 
2610 int
rt_reload_channel(struct channel * c)2611 rt_reload_channel(struct channel *c)
2612 {
2613   struct rtable *tab = c->in_table;
2614   struct fib_iterator *fit = &c->reload_fit;
2615   int max_feed = 64;
2616 
2617   ASSERT(c->channel_state == CS_UP);
2618 
2619   if (!c->reload_active)
2620   {
2621     FIB_ITERATE_INIT(fit, &tab->fib);
2622     c->reload_active = 1;
2623   }
2624 
2625   do {
2626     for (rte *e = c->reload_next_rte; e; e = e->next)
2627     {
2628       if (max_feed-- <= 0)
2629       {
2630 	c->reload_next_rte = e;
2631 	debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed);
2632 	return 0;
2633       }
2634 
2635       rte_update2(c, e->net->n.addr, rte_do_cow(e), e->attrs->src);
2636     }
2637 
2638     c->reload_next_rte = NULL;
2639 
2640     FIB_ITERATE_START(&tab->fib, fit, net, n)
2641     {
2642       if (c->reload_next_rte = n->routes)
2643       {
2644 	FIB_ITERATE_PUT_NEXT(fit, &tab->fib);
2645 	break;
2646       }
2647     }
2648     FIB_ITERATE_END;
2649   }
2650   while (c->reload_next_rte);
2651 
2652   c->reload_active = 0;
2653   return 1;
2654 }
2655 
2656 void
rt_reload_channel_abort(struct channel * c)2657 rt_reload_channel_abort(struct channel *c)
2658 {
2659   if (c->reload_active)
2660   {
2661     /* Unlink the iterator */
2662     fit_get(&c->in_table->fib, &c->reload_fit);
2663     c->reload_next_rte = NULL;
2664     c->reload_active = 0;
2665   }
2666 }
2667 
2668 void
rt_prune_sync(rtable * t,int all)2669 rt_prune_sync(rtable *t, int all)
2670 {
2671   FIB_WALK(&t->fib, net, n)
2672   {
2673     rte *e, **ee = &n->routes;
2674     while (e = *ee)
2675     {
2676       if (all || (e->flags & (REF_STALE | REF_DISCARD)))
2677       {
2678 	*ee = e->next;
2679 	rte_free_quick(e);
2680 	t->rt_count--;
2681       }
2682       else
2683 	ee = &e->next;
2684     }
2685   }
2686   FIB_WALK_END;
2687 }
2688 
2689 
2690 /*
2691  *	Export table
2692  */
2693 
2694 int
rte_update_out(struct channel * c,const net_addr * n,rte * new,rte * old0,int refeed)2695 rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed)
2696 {
2697   struct rtable *tab = c->out_table;
2698   struct rte_src *src;
2699   rte *old, **pos;
2700   net *net;
2701 
2702   if (new)
2703   {
2704     net = net_get(tab, n);
2705     src = new->attrs->src;
2706 
2707     rte_store_tmp_attrs(new, rte_update_pool, NULL);
2708 
2709     if (!rta_is_cached(new->attrs))
2710       new->attrs = rta_lookup(new->attrs);
2711   }
2712   else
2713   {
2714     net = net_find(tab, n);
2715     src = old0->attrs->src;
2716 
2717     if (!net)
2718       goto drop_withdraw;
2719   }
2720 
2721   /* Find the old rte */
2722   for (pos = &net->routes; old = *pos; pos = &old->next)
2723     if ((c->ra_mode != RA_ANY) || (old->attrs->src == src))
2724     {
2725       if (new && rte_same(old, new))
2726       {
2727 	/* REF_STALE / REF_DISCARD not used in export table */
2728 	/*
2729 	if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
2730 	{
2731 	  old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
2732 	  return 1;
2733 	}
2734 	*/
2735 
2736 	goto drop_update;
2737       }
2738 
2739       /* Remove the old rte */
2740       *pos = old->next;
2741       rte_free_quick(old);
2742       tab->rt_count--;
2743 
2744       break;
2745     }
2746 
2747   if (!new)
2748   {
2749     if (!old)
2750       goto drop_withdraw;
2751 
2752     return 1;
2753   }
2754 
2755   /* Insert the new rte */
2756   rte *e = rte_do_cow(new);
2757   e->flags |= REF_COW;
2758   e->net = net;
2759   e->sender = c;
2760   e->lastmod = current_time();
2761   e->next = *pos;
2762   *pos = e;
2763   tab->rt_count++;
2764   return 1;
2765 
2766 drop_update:
2767   return refeed;
2768 
2769 drop_withdraw:
2770   return 0;
2771 }
2772 
2773 
2774 /*
2775  *	Hostcache
2776  */
2777 
2778 static inline u32
hc_hash(ip_addr a,rtable * dep)2779 hc_hash(ip_addr a, rtable *dep)
2780 {
2781   return ipa_hash(a) ^ ptr_hash(dep);
2782 }
2783 
2784 static inline void
hc_insert(struct hostcache * hc,struct hostentry * he)2785 hc_insert(struct hostcache *hc, struct hostentry *he)
2786 {
2787   uint k = he->hash_key >> hc->hash_shift;
2788   he->next = hc->hash_table[k];
2789   hc->hash_table[k] = he;
2790 }
2791 
2792 static inline void
hc_remove(struct hostcache * hc,struct hostentry * he)2793 hc_remove(struct hostcache *hc, struct hostentry *he)
2794 {
2795   struct hostentry **hep;
2796   uint k = he->hash_key >> hc->hash_shift;
2797 
2798   for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2799   *hep = he->next;
2800 }
2801 
2802 #define HC_DEF_ORDER 10
2803 #define HC_HI_MARK *4
2804 #define HC_HI_STEP 2
2805 #define HC_HI_ORDER 16			/* Must be at most 16 */
2806 #define HC_LO_MARK /5
2807 #define HC_LO_STEP 2
2808 #define HC_LO_ORDER 10
2809 
2810 static void
hc_alloc_table(struct hostcache * hc,unsigned order)2811 hc_alloc_table(struct hostcache *hc, unsigned order)
2812 {
2813   uint hsize = 1 << order;
2814   hc->hash_order = order;
2815   hc->hash_shift = 32 - order;
2816   hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2817   hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
2818 
2819   hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2820 }
2821 
2822 static void
hc_resize(struct hostcache * hc,unsigned new_order)2823 hc_resize(struct hostcache *hc, unsigned new_order)
2824 {
2825   struct hostentry **old_table = hc->hash_table;
2826   struct hostentry *he, *hen;
2827   uint old_size = 1 << hc->hash_order;
2828   uint i;
2829 
2830   hc_alloc_table(hc, new_order);
2831   for (i = 0; i < old_size; i++)
2832     for (he = old_table[i]; he != NULL; he=hen)
2833       {
2834 	hen = he->next;
2835 	hc_insert(hc, he);
2836       }
2837   mb_free(old_table);
2838 }
2839 
2840 static struct hostentry *
hc_new_hostentry(struct hostcache * hc,ip_addr a,ip_addr ll,rtable * dep,unsigned k)2841 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2842 {
2843   struct hostentry *he = sl_alloc(hc->slab);
2844 
2845   *he = (struct hostentry) {
2846     .addr = a,
2847     .link = ll,
2848     .tab = dep,
2849     .hash_key = k,
2850   };
2851 
2852   add_tail(&hc->hostentries, &he->ln);
2853   hc_insert(hc, he);
2854 
2855   hc->hash_items++;
2856   if (hc->hash_items > hc->hash_max)
2857     hc_resize(hc, hc->hash_order + HC_HI_STEP);
2858 
2859   return he;
2860 }
2861 
2862 static void
hc_delete_hostentry(struct hostcache * hc,struct hostentry * he)2863 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2864 {
2865   rta_free(he->src);
2866 
2867   rem_node(&he->ln);
2868   hc_remove(hc, he);
2869   sl_free(hc->slab, he);
2870 
2871   hc->hash_items--;
2872   if (hc->hash_items < hc->hash_min)
2873     hc_resize(hc, hc->hash_order - HC_LO_STEP);
2874 }
2875 
2876 static void
rt_init_hostcache(rtable * tab)2877 rt_init_hostcache(rtable *tab)
2878 {
2879   struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2880   init_list(&hc->hostentries);
2881 
2882   hc->hash_items = 0;
2883   hc_alloc_table(hc, HC_DEF_ORDER);
2884   hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2885 
2886   hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2887   hc->trie = f_new_trie(hc->lp, 0);
2888 
2889   tab->hostcache = hc;
2890 }
2891 
2892 static void
rt_free_hostcache(rtable * tab)2893 rt_free_hostcache(rtable *tab)
2894 {
2895   struct hostcache *hc = tab->hostcache;
2896 
2897   node *n;
2898   WALK_LIST(n, hc->hostentries)
2899     {
2900       struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2901       rta_free(he->src);
2902 
2903       if (he->uc)
2904 	log(L_ERR "Hostcache is not empty in table %s", tab->name);
2905     }
2906 
2907   rfree(hc->slab);
2908   rfree(hc->lp);
2909   mb_free(hc->hash_table);
2910   mb_free(hc);
2911 }
2912 
2913 static void
rt_notify_hostcache(rtable * tab,net * net)2914 rt_notify_hostcache(rtable *tab, net *net)
2915 {
2916   if (tab->hcu_scheduled)
2917     return;
2918 
2919   if (trie_match_net(tab->hostcache->trie, net->n.addr))
2920     rt_schedule_hcu(tab);
2921 }
2922 
2923 static int
if_local_addr(ip_addr a,struct iface * i)2924 if_local_addr(ip_addr a, struct iface *i)
2925 {
2926   struct ifa *b;
2927 
2928   WALK_LIST(b, i->addrs)
2929     if (ipa_equal(a, b->ip))
2930       return 1;
2931 
2932   return 0;
2933 }
2934 
2935 u32
rt_get_igp_metric(rte * rt)2936 rt_get_igp_metric(rte *rt)
2937 {
2938   eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2939 
2940   if (ea)
2941     return ea->u.data;
2942 
2943   rta *a = rt->attrs;
2944 
2945 #ifdef CONFIG_OSPF
2946   if ((a->source == RTS_OSPF) ||
2947       (a->source == RTS_OSPF_IA) ||
2948       (a->source == RTS_OSPF_EXT1))
2949     return rt->u.ospf.metric1;
2950 #endif
2951 
2952 #ifdef CONFIG_RIP
2953   if (a->source == RTS_RIP)
2954     return rt->u.rip.metric;
2955 #endif
2956 
2957 #ifdef CONFIG_BGP
2958   if (a->source == RTS_BGP)
2959   {
2960     u64 metric = bgp_total_aigp_metric(rt);
2961     return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
2962   }
2963 #endif
2964 
2965 #ifdef CONFIG_BABEL
2966   if (a->source == RTS_BABEL)
2967     return rt->u.babel.metric;
2968 #endif
2969 
2970   if (a->source == RTS_DEVICE)
2971     return 0;
2972 
2973   return IGP_METRIC_UNKNOWN;
2974 }
2975 
2976 static int
rt_update_hostentry(rtable * tab,struct hostentry * he)2977 rt_update_hostentry(rtable *tab, struct hostentry *he)
2978 {
2979   rta *old_src = he->src;
2980   int direct = 0;
2981   int pxlen = 0;
2982 
2983   /* Reset the hostentry */
2984   he->src = NULL;
2985   he->dest = RTD_UNREACHABLE;
2986   he->nexthop_linkable = 0;
2987   he->igp_metric = 0;
2988 
2989   net_addr he_addr;
2990   net_fill_ip_host(&he_addr, he->addr);
2991   net *n = net_route(tab, &he_addr);
2992   if (n)
2993     {
2994       rte *e = n->routes;
2995       rta *a = e->attrs;
2996       pxlen = n->n.addr->pxlen;
2997 
2998       if (a->hostentry)
2999 	{
3000 	  /* Recursive route should not depend on another recursive route */
3001 	  log(L_WARN "Next hop address %I resolvable through recursive route for %N",
3002 	      he->addr, n->n.addr);
3003 	  goto done;
3004 	}
3005 
3006       if (a->dest == RTD_UNICAST)
3007 	{
3008 	  for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
3009 	    if (ipa_zero(nh->gw))
3010 	      {
3011 		if (if_local_addr(he->addr, nh->iface))
3012 		  {
3013 		    /* The host address is a local address, this is not valid */
3014 		    log(L_WARN "Next hop address %I is a local address of iface %s",
3015 			he->addr, nh->iface->name);
3016 		    goto done;
3017 		  }
3018 
3019 		direct++;
3020 	      }
3021 	}
3022 
3023       he->src = rta_clone(a);
3024       he->dest = a->dest;
3025       he->nexthop_linkable = !direct;
3026       he->igp_metric = rt_get_igp_metric(e);
3027     }
3028 
3029 done:
3030   /* Add a prefix range to the trie */
3031   trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
3032 
3033   rta_free(old_src);
3034   return old_src != he->src;
3035 }
3036 
3037 static void
rt_update_hostcache(rtable * tab)3038 rt_update_hostcache(rtable *tab)
3039 {
3040   struct hostcache *hc = tab->hostcache;
3041   struct hostentry *he;
3042   node *n, *x;
3043 
3044   /* Reset the trie */
3045   lp_flush(hc->lp);
3046   hc->trie = f_new_trie(hc->lp, 0);
3047 
3048   WALK_LIST_DELSAFE(n, x, hc->hostentries)
3049     {
3050       he = SKIP_BACK(struct hostentry, ln, n);
3051       if (!he->uc)
3052 	{
3053 	  hc_delete_hostentry(hc, he);
3054 	  continue;
3055 	}
3056 
3057       if (rt_update_hostentry(tab, he))
3058 	rt_schedule_nhu(he->tab);
3059     }
3060 
3061   tab->hcu_scheduled = 0;
3062 }
3063 
3064 struct hostentry *
rt_get_hostentry(rtable * tab,ip_addr a,ip_addr ll,rtable * dep)3065 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
3066 {
3067   struct hostentry *he;
3068 
3069   if (!tab->hostcache)
3070     rt_init_hostcache(tab);
3071 
3072   u32 k = hc_hash(a, dep);
3073   struct hostcache *hc = tab->hostcache;
3074   for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
3075     if (ipa_equal(he->addr, a) && (he->tab == dep))
3076       return he;
3077 
3078   he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
3079   rt_update_hostentry(tab, he);
3080   return he;
3081 }
3082 
3083 
3084 /*
3085  *  Documentation for functions declared inline in route.h
3086  */
3087 #if 0
3088 
3089 /**
3090  * net_find - find a network entry
3091  * @tab: a routing table
3092  * @addr: address of the network
3093  *
3094  * net_find() looks up the given network in routing table @tab and
3095  * returns a pointer to its &net entry or %NULL if no such network
3096  * exists.
3097  */
3098 static inline net *net_find(rtable *tab, net_addr *addr)
3099 { DUMMY; }
3100 
3101 /**
3102  * net_get - obtain a network entry
3103  * @tab: a routing table
3104  * @addr: address of the network
3105  *
3106  * net_get() looks up the given network in routing table @tab and
3107  * returns a pointer to its &net entry. If no such entry exists, it's
3108  * created.
3109  */
3110 static inline net *net_get(rtable *tab, net_addr *addr)
3111 { DUMMY; }
3112 
3113 /**
3114  * rte_cow - copy a route for writing
3115  * @r: a route entry to be copied
3116  *
3117  * rte_cow() takes a &rte and prepares it for modification. The exact action
3118  * taken depends on the flags of the &rte -- if it's a temporary entry, it's
3119  * just returned unchanged, else a new temporary entry with the same contents
3120  * is created.
3121  *
3122  * The primary use of this function is inside the filter machinery -- when
3123  * a filter wants to modify &rte contents (to change the preference or to
3124  * attach another set of attributes), it must ensure that the &rte is not
3125  * shared with anyone else (and especially that it isn't stored in any routing
3126  * table).
3127  *
3128  * Result: a pointer to the new writable &rte.
3129  */
3130 static inline rte * rte_cow(rte *r)
3131 { DUMMY; }
3132 
3133 #endif
3134