1 /*
2 * BIRD -- Route Attribute Cache
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Route attribute cache
11 *
12 * Each route entry carries a set of route attributes. Several of them
13 * vary from route to route, but most attributes are usually common
14 * for a large number of routes. To conserve memory, we've decided to
15 * store only the varying ones directly in the &rte and hold the rest
16 * in a special structure called &rta which is shared among all the
17 * &rte's with these attributes.
18 *
19 * Each &rta contains all the static attributes of the route (i.e.,
20 * those which are always present) as structure members and a list of
21 * dynamic attributes represented by a linked list of &ea_list
22 * structures, each of them consisting of an array of &eattr's containing
23 * the individual attributes. An attribute can be specified more than once
24 * in the &ea_list chain and in such case the first occurrence overrides
25 * the others. This semantics is used especially when someone (for example
26 * a filter) wishes to alter values of several dynamic attributes, but
27 * it wants to preserve the original attribute lists maintained by
28 * another module.
29 *
30 * Each &eattr contains an attribute identifier (split to protocol ID and
31 * per-protocol attribute ID), protocol dependent flags, a type code (consisting
32 * of several bit fields describing attribute characteristics) and either an
33 * embedded 32-bit value or a pointer to a &adata structure holding attribute
34 * contents.
35 *
36 * There exist two variants of &rta's -- cached and un-cached ones. Un-cached
37 * &rta's can have arbitrarily complex structure of &ea_list's and they
38 * can be modified by any module in the route processing chain. Cached
39 * &rta's have their attribute lists normalized (that means at most one
40 * &ea_list is present and its values are sorted in order to speed up
41 * searching), they are stored in a hash table to make fast lookup possible
42 * and they are provided with a use count to allow sharing.
43 *
44 * Routing tables always contain only cached &rta's.
45 */
46
47 #include "nest/bird.h"
48 #include "nest/route.h"
49 #include "nest/protocol.h"
50 #include "nest/iface.h"
51 #include "nest/cli.h"
52 #include "nest/attrs.h"
53 #include "lib/alloca.h"
54 #include "lib/hash.h"
55 #include "lib/idm.h"
56 #include "lib/resource.h"
57 #include "lib/string.h"
58
59 #include <stddef.h>
60
61 const adata null_adata; /* adata of length 0 */
62
63 const char * const rta_src_names[RTS_MAX] = {
64 [RTS_DUMMY] = "",
65 [RTS_STATIC] = "static",
66 [RTS_INHERIT] = "inherit",
67 [RTS_DEVICE] = "device",
68 [RTS_STATIC_DEVICE] = "static-device",
69 [RTS_REDIRECT] = "redirect",
70 [RTS_RIP] = "RIP",
71 [RTS_OSPF] = "OSPF",
72 [RTS_OSPF_IA] = "OSPF-IA",
73 [RTS_OSPF_EXT1] = "OSPF-E1",
74 [RTS_OSPF_EXT2] = "OSPF-E2",
75 [RTS_BGP] = "BGP",
76 [RTS_PIPE] = "pipe",
77 [RTS_BABEL] = "Babel",
78 [RTS_RPKI] = "RPKI",
79 };
80
81 const char * rta_dest_names[RTD_MAX] = {
82 [RTD_NONE] = "",
83 [RTD_UNICAST] = "unicast",
84 [RTD_BLACKHOLE] = "blackhole",
85 [RTD_UNREACHABLE] = "unreachable",
86 [RTD_PROHIBIT] = "prohibited",
87 };
88
89 pool *rta_pool;
90
91 static slab *rta_slab_[4];
92 static slab *nexthop_slab_[4];
93 static slab *rte_src_slab;
94
95 static struct idm src_ids;
96 #define SRC_ID_INIT_SIZE 4
97
98 /* rte source hash */
99
100 #define RSH_KEY(n) n->proto, n->private_id
101 #define RSH_NEXT(n) n->next
102 #define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2
103 #define RSH_FN(p,n) p->hash_key ^ u32_hash(n)
104
105 #define RSH_REHASH rte_src_rehash
106 #define RSH_PARAMS /2, *2, 1, 1, 8, 20
107 #define RSH_INIT_ORDER 6
108
109 static HASH(struct rte_src) src_hash;
110
111 static void
rte_src_init(void)112 rte_src_init(void)
113 {
114 rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));
115
116 idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
117
118 HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER);
119 }
120
121
HASH_DEFINE_REHASH_FN(RSH,struct rte_src)122 HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
123
124 struct rte_src *
125 rt_find_source(struct proto *p, u32 id)
126 {
127 return HASH_FIND(src_hash, RSH, p, id);
128 }
129
130 struct rte_src *
rt_get_source(struct proto * p,u32 id)131 rt_get_source(struct proto *p, u32 id)
132 {
133 struct rte_src *src = rt_find_source(p, id);
134
135 if (src)
136 return src;
137
138 src = sl_allocz(rte_src_slab);
139 src->proto = p;
140 src->private_id = id;
141 src->global_id = idm_alloc(&src_ids);
142 src->uc = 0;
143
144 HASH_INSERT2(src_hash, RSH, rta_pool, src);
145
146 return src;
147 }
148
149 void
rt_prune_sources(void)150 rt_prune_sources(void)
151 {
152 HASH_WALK_FILTER(src_hash, next, src, sp)
153 {
154 if (src->uc == 0)
155 {
156 HASH_DO_REMOVE(src_hash, RSH, sp);
157 idm_free(&src_ids, src->global_id);
158 sl_free(rte_src_slab, src);
159 }
160 }
161 HASH_WALK_FILTER_END;
162
163 HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool);
164 }
165
166
167 /*
168 * Multipath Next Hop
169 */
170
171 static inline u32
nexthop_hash(struct nexthop * x)172 nexthop_hash(struct nexthop *x)
173 {
174 u32 h = 0;
175 for (; x; x = x->next)
176 {
177 h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9);
178
179 for (int i = 0; i < x->labels; i++)
180 h ^= x->label[i] ^ (h << 6) ^ (h >> 7);
181 }
182
183 return h;
184 }
185
186 int
nexthop__same(struct nexthop * x,struct nexthop * y)187 nexthop__same(struct nexthop *x, struct nexthop *y)
188 {
189 for (; x && y; x = x->next, y = y->next)
190 {
191 if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) ||
192 (x->flags != y->flags) || (x->weight != y->weight) ||
193 (x->labels_orig != y->labels_orig) || (x->labels != y->labels))
194 return 0;
195
196 for (int i = 0; i < x->labels; i++)
197 if (x->label[i] != y->label[i])
198 return 0;
199 }
200
201 return x == y;
202 }
203
204 static int
nexthop_compare_node(const struct nexthop * x,const struct nexthop * y)205 nexthop_compare_node(const struct nexthop *x, const struct nexthop *y)
206 {
207 int r;
208
209 if (!x)
210 return 1;
211
212 if (!y)
213 return -1;
214
215 /* Should we also compare flags ? */
216
217 r = ((int) y->weight) - ((int) x->weight);
218 if (r)
219 return r;
220
221 r = ipa_compare(x->gw, y->gw);
222 if (r)
223 return r;
224
225 r = ((int) y->labels) - ((int) x->labels);
226 if (r)
227 return r;
228
229 for (int i = 0; i < y->labels; i++)
230 {
231 r = ((int) y->label[i]) - ((int) x->label[i]);
232 if (r)
233 return r;
234 }
235
236 return ((int) x->iface->index) - ((int) y->iface->index);
237 }
238
239 static inline struct nexthop *
nexthop_copy_node(const struct nexthop * src,linpool * lp)240 nexthop_copy_node(const struct nexthop *src, linpool *lp)
241 {
242 struct nexthop *n = lp_alloc(lp, nexthop_size(src));
243
244 memcpy(n, src, nexthop_size(src));
245 n->next = NULL;
246
247 return n;
248 }
249
250 /**
251 * nexthop_merge - merge nexthop lists
252 * @x: list 1
253 * @y: list 2
254 * @rx: reusability of list @x
255 * @ry: reusability of list @y
256 * @max: max number of nexthops
257 * @lp: linpool for allocating nexthops
258 *
259 * The nexthop_merge() function takes two nexthop lists @x and @y and merges them,
260 * eliminating possible duplicates. The input lists must be sorted and the
261 * result is sorted too. The number of nexthops in result is limited by @max.
262 * New nodes are allocated from linpool @lp.
263 *
264 * The arguments @rx and @ry specify whether corresponding input lists may be
265 * consumed by the function (i.e. their nodes reused in the resulting list), in
266 * that case the caller should not access these lists after that. To eliminate
267 * issues with deallocation of these lists, the caller should use some form of
268 * bulk deallocation (e.g. stack or linpool) to free these nodes when the
269 * resulting list is no longer needed. When reusability is not set, the
270 * corresponding lists are not modified nor linked from the resulting list.
271 */
272 struct nexthop *
nexthop_merge(struct nexthop * x,struct nexthop * y,int rx,int ry,int max,linpool * lp)273 nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp)
274 {
275 struct nexthop *root = NULL;
276 struct nexthop **n = &root;
277
278 while ((x || y) && max--)
279 {
280 int cmp = nexthop_compare_node(x, y);
281
282 if (cmp < 0)
283 {
284 ASSUME(x);
285 *n = rx ? x : nexthop_copy_node(x, lp);
286 x = x->next;
287 }
288 else if (cmp > 0)
289 {
290 ASSUME(y);
291 *n = ry ? y : nexthop_copy_node(y, lp);
292 y = y->next;
293 }
294 else
295 {
296 ASSUME(x && y);
297 *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp));
298 x = x->next;
299 y = y->next;
300 }
301 n = &((*n)->next);
302 }
303 *n = NULL;
304
305 return root;
306 }
307
308 void
nexthop_insert(struct nexthop ** n,struct nexthop * x)309 nexthop_insert(struct nexthop **n, struct nexthop *x)
310 {
311 for (; *n; n = &((*n)->next))
312 {
313 int cmp = nexthop_compare_node(*n, x);
314
315 if (cmp < 0)
316 continue;
317 else if (cmp > 0)
318 break;
319 else
320 return;
321 }
322
323 x->next = *n;
324 *n = x;
325 }
326
327 struct nexthop *
nexthop_sort(struct nexthop * x)328 nexthop_sort(struct nexthop *x)
329 {
330 struct nexthop *s = NULL;
331
332 /* Simple insert-sort */
333 while (x)
334 {
335 struct nexthop *n = x;
336 x = n->next;
337 n->next = NULL;
338
339 nexthop_insert(&s, n);
340 }
341
342 return s;
343 }
344
345 int
nexthop_is_sorted(struct nexthop * x)346 nexthop_is_sorted(struct nexthop *x)
347 {
348 for (; x && x->next; x = x->next)
349 if (nexthop_compare_node(x, x->next) >= 0)
350 return 0;
351
352 return 1;
353 }
354
355 static inline slab *
nexthop_slab(struct nexthop * nh)356 nexthop_slab(struct nexthop *nh)
357 {
358 return nexthop_slab_[MIN(nh->labels, 3)];
359 }
360
361 static struct nexthop *
nexthop_copy(struct nexthop * o)362 nexthop_copy(struct nexthop *o)
363 {
364 struct nexthop *first = NULL;
365 struct nexthop **last = &first;
366
367 for (; o; o = o->next)
368 {
369 struct nexthop *n = sl_allocz(nexthop_slab(o));
370 n->gw = o->gw;
371 n->iface = o->iface;
372 n->next = NULL;
373 n->flags = o->flags;
374 n->weight = o->weight;
375 n->labels_orig = o->labels_orig;
376 n->labels = o->labels;
377 for (int i=0; i<o->labels; i++)
378 n->label[i] = o->label[i];
379
380 *last = n;
381 last = &(n->next);
382 }
383
384 return first;
385 }
386
387 static void
nexthop_free(struct nexthop * o)388 nexthop_free(struct nexthop *o)
389 {
390 struct nexthop *n;
391
392 while (o)
393 {
394 n = o->next;
395 sl_free(nexthop_slab(o), o);
396 o = n;
397 }
398 }
399
400
401 /*
402 * Extended Attributes
403 */
404
405 static inline eattr *
ea__find(ea_list * e,unsigned id)406 ea__find(ea_list *e, unsigned id)
407 {
408 eattr *a;
409 int l, r, m;
410
411 while (e)
412 {
413 if (e->flags & EALF_BISECT)
414 {
415 l = 0;
416 r = e->count - 1;
417 while (l <= r)
418 {
419 m = (l+r) / 2;
420 a = &e->attrs[m];
421 if (a->id == id)
422 return a;
423 else if (a->id < id)
424 l = m+1;
425 else
426 r = m-1;
427 }
428 }
429 else
430 for(m=0; m<e->count; m++)
431 if (e->attrs[m].id == id)
432 return &e->attrs[m];
433 e = e->next;
434 }
435 return NULL;
436 }
437
438 /**
439 * ea_find - find an extended attribute
440 * @e: attribute list to search in
441 * @id: attribute ID to search for
442 *
443 * Given an extended attribute list, ea_find() searches for a first
444 * occurrence of an attribute with specified ID, returning either a pointer
445 * to its &eattr structure or %NULL if no such attribute exists.
446 */
447 eattr *
ea_find(ea_list * e,unsigned id)448 ea_find(ea_list *e, unsigned id)
449 {
450 eattr *a = ea__find(e, id & EA_CODE_MASK);
451
452 if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF &&
453 !(id & EA_ALLOW_UNDEF))
454 return NULL;
455 return a;
456 }
457
458 /**
459 * ea_walk - walk through extended attributes
460 * @s: walk state structure
461 * @id: start of attribute ID interval
462 * @max: length of attribute ID interval
463 *
464 * Given an extended attribute list, ea_walk() walks through the list looking
465 * for first occurrences of attributes with ID in specified interval from @id to
466 * (@id + @max - 1), returning pointers to found &eattr structures, storing its
467 * walk state in @s for subsequent calls.
468 *
469 * The function ea_walk() is supposed to be called in a loop, with initially
470 * zeroed walk state structure @s with filled the initial extended attribute
471 * list, returning one found attribute in each call or %NULL when no other
472 * attribute exists. The extended attribute list or the arguments should not be
473 * modified between calls. The maximum value of @max is 128.
474 */
475 eattr *
ea_walk(struct ea_walk_state * s,uint id,uint max)476 ea_walk(struct ea_walk_state *s, uint id, uint max)
477 {
478 ea_list *e = s->eattrs;
479 eattr *a = s->ea;
480 eattr *a_max;
481
482 max = id + max;
483
484 if (a)
485 goto step;
486
487 for (; e; e = e->next)
488 {
489 if (e->flags & EALF_BISECT)
490 {
491 int l, r, m;
492
493 l = 0;
494 r = e->count - 1;
495 while (l < r)
496 {
497 m = (l+r) / 2;
498 if (e->attrs[m].id < id)
499 l = m + 1;
500 else
501 r = m;
502 }
503 a = e->attrs + l;
504 }
505 else
506 a = e->attrs;
507
508 step:
509 a_max = e->attrs + e->count;
510 for (; a < a_max; a++)
511 if ((a->id >= id) && (a->id < max))
512 {
513 int n = a->id - id;
514
515 if (BIT32_TEST(s->visited, n))
516 continue;
517
518 BIT32_SET(s->visited, n);
519
520 if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
521 continue;
522
523 s->eattrs = e;
524 s->ea = a;
525 return a;
526 }
527 else if (e->flags & EALF_BISECT)
528 break;
529 }
530
531 return NULL;
532 }
533
534 /**
535 * ea_get_int - fetch an integer attribute
536 * @e: attribute list
537 * @id: attribute ID
538 * @def: default value
539 *
540 * This function is a shortcut for retrieving a value of an integer attribute
541 * by calling ea_find() to find the attribute, extracting its value or returning
542 * a provided default if no such attribute is present.
543 */
544 int
ea_get_int(ea_list * e,unsigned id,int def)545 ea_get_int(ea_list *e, unsigned id, int def)
546 {
547 eattr *a = ea_find(e, id);
548 if (!a)
549 return def;
550 return a->u.data;
551 }
552
553 static inline void
ea_do_sort(ea_list * e)554 ea_do_sort(ea_list *e)
555 {
556 unsigned n = e->count;
557 eattr *a = e->attrs;
558 eattr *b = alloca(n * sizeof(eattr));
559 unsigned s, ss;
560
561 /* We need to use a stable sorting algorithm, hence mergesort */
562 do
563 {
564 s = ss = 0;
565 while (s < n)
566 {
567 eattr *p, *q, *lo, *hi;
568 p = b;
569 ss = s;
570 *p++ = a[s++];
571 while (s < n && p[-1].id <= a[s].id)
572 *p++ = a[s++];
573 if (s < n)
574 {
575 q = p;
576 *p++ = a[s++];
577 while (s < n && p[-1].id <= a[s].id)
578 *p++ = a[s++];
579 lo = b;
580 hi = q;
581 s = ss;
582 while (lo < q && hi < p)
583 if (lo->id <= hi->id)
584 a[s++] = *lo++;
585 else
586 a[s++] = *hi++;
587 while (lo < q)
588 a[s++] = *lo++;
589 while (hi < p)
590 a[s++] = *hi++;
591 }
592 }
593 }
594 while (ss);
595 }
596
597 /**
598 * In place discard duplicates and undefs in sorted ea_list. We use stable sort
599 * for this reason.
600 **/
601 static inline void
ea_do_prune(ea_list * e)602 ea_do_prune(ea_list *e)
603 {
604 eattr *s, *d, *l, *s0;
605 int i = 0;
606
607 s = d = e->attrs; /* Beginning of the list. @s is source, @d is destination. */
608 l = e->attrs + e->count; /* End of the list */
609
610 /* Walk from begin to end. */
611 while (s < l)
612 {
613 s0 = s++;
614 /* Find a consecutive block of the same attribute */
615 while (s < l && s->id == s[-1].id)
616 s++;
617
618 /* Now s0 is the most recent version, s[-1] the oldest one */
619 /* Drop undefs */
620 if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
621 continue;
622
623 /* Copy the newest version to destination */
624 *d = *s0;
625
626 /* Preserve info whether it originated locally */
627 d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED);
628
629 /* Next destination */
630 d++;
631 i++;
632 }
633
634 e->count = i;
635 }
636
637 /**
638 * ea_sort - sort an attribute list
639 * @e: list to be sorted
640 *
641 * This function takes a &ea_list chain and sorts the attributes
642 * within each of its entries.
643 *
644 * If an attribute occurs multiple times in a single &ea_list,
645 * ea_sort() leaves only the first (the only significant) occurrence.
646 */
647 void
ea_sort(ea_list * e)648 ea_sort(ea_list *e)
649 {
650 while (e)
651 {
652 if (!(e->flags & EALF_SORTED))
653 {
654 ea_do_sort(e);
655 ea_do_prune(e);
656 e->flags |= EALF_SORTED;
657 }
658 if (e->count > 5)
659 e->flags |= EALF_BISECT;
660 e = e->next;
661 }
662 }
663
664 /**
665 * ea_scan - estimate attribute list size
666 * @e: attribute list
667 *
668 * This function calculates an upper bound of the size of
669 * a given &ea_list after merging with ea_merge().
670 */
671 unsigned
ea_scan(ea_list * e)672 ea_scan(ea_list *e)
673 {
674 unsigned cnt = 0;
675
676 while (e)
677 {
678 cnt += e->count;
679 e = e->next;
680 }
681 return sizeof(ea_list) + sizeof(eattr)*cnt;
682 }
683
684 /**
685 * ea_merge - merge segments of an attribute list
686 * @e: attribute list
687 * @t: buffer to store the result to
688 *
689 * This function takes a possibly multi-segment attribute list
690 * and merges all of its segments to one.
691 *
692 * The primary use of this function is for &ea_list normalization:
693 * first call ea_scan() to determine how much memory will the result
694 * take, then allocate a buffer (usually using alloca()), merge the
695 * segments with ea_merge() and finally sort and prune the result
696 * by calling ea_sort().
697 */
698 void
ea_merge(ea_list * e,ea_list * t)699 ea_merge(ea_list *e, ea_list *t)
700 {
701 eattr *d = t->attrs;
702
703 t->flags = 0;
704 t->count = 0;
705 t->next = NULL;
706 while (e)
707 {
708 memcpy(d, e->attrs, sizeof(eattr)*e->count);
709 t->count += e->count;
710 d += e->count;
711 e = e->next;
712 }
713 }
714
715 /**
716 * ea_same - compare two &ea_list's
717 * @x: attribute list
718 * @y: attribute list
719 *
720 * ea_same() compares two normalized attribute lists @x and @y and returns
721 * 1 if they contain the same attributes, 0 otherwise.
722 */
723 int
ea_same(ea_list * x,ea_list * y)724 ea_same(ea_list *x, ea_list *y)
725 {
726 int c;
727
728 if (!x || !y)
729 return x == y;
730 ASSERT(!x->next && !y->next);
731 if (x->count != y->count)
732 return 0;
733 for(c=0; c<x->count; c++)
734 {
735 eattr *a = &x->attrs[c];
736 eattr *b = &y->attrs[c];
737
738 if (a->id != b->id ||
739 a->flags != b->flags ||
740 a->type != b->type ||
741 ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr)))
742 return 0;
743 }
744 return 1;
745 }
746
747 static inline ea_list *
ea_list_copy(ea_list * o)748 ea_list_copy(ea_list *o)
749 {
750 ea_list *n;
751 unsigned i, len;
752
753 if (!o)
754 return NULL;
755 ASSERT(!o->next);
756 len = sizeof(ea_list) + sizeof(eattr) * o->count;
757 n = mb_alloc(rta_pool, len);
758 memcpy(n, o, len);
759 n->flags |= EALF_CACHED;
760 for(i=0; i<o->count; i++)
761 {
762 eattr *a = &n->attrs[i];
763 if (!(a->type & EAF_EMBEDDED))
764 {
765 unsigned size = sizeof(struct adata) + a->u.ptr->length;
766 struct adata *d = mb_alloc(rta_pool, size);
767 memcpy(d, a->u.ptr, size);
768 a->u.ptr = d;
769 }
770 }
771 return n;
772 }
773
774 static inline void
ea_free(ea_list * o)775 ea_free(ea_list *o)
776 {
777 int i;
778
779 if (o)
780 {
781 ASSERT(!o->next);
782 for(i=0; i<o->count; i++)
783 {
784 eattr *a = &o->attrs[i];
785 if (!(a->type & EAF_EMBEDDED))
786 mb_free((void *) a->u.ptr);
787 }
788 mb_free(o);
789 }
790 }
791
792 static int
get_generic_attr(const eattr * a,byte ** buf,int buflen UNUSED)793 get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED)
794 {
795 if (a->id == EA_GEN_IGP_METRIC)
796 {
797 *buf += bsprintf(*buf, "igp_metric");
798 return GA_NAME;
799 }
800
801 return GA_UNKNOWN;
802 }
803
804 void
ea_format_bitfield(const struct eattr * a,byte * buf,int bufsize,const char ** names,int min,int max)805 ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max)
806 {
807 byte *bound = buf + bufsize - 32;
808 u32 data = a->u.data;
809 int i;
810
811 for (i = min; i < max; i++)
812 if ((data & (1u << i)) && names[i])
813 {
814 if (buf > bound)
815 {
816 strcpy(buf, " ...");
817 return;
818 }
819
820 buf += bsprintf(buf, " %s", names[i]);
821 data &= ~(1u << i);
822 }
823
824 if (data)
825 bsprintf(buf, " %08x", data);
826
827 return;
828 }
829
830 static inline void
opaque_format(const struct adata * ad,byte * buf,uint size)831 opaque_format(const struct adata *ad, byte *buf, uint size)
832 {
833 byte *bound = buf + size - 10;
834 uint i;
835
836 for(i = 0; i < ad->length; i++)
837 {
838 if (buf > bound)
839 {
840 strcpy(buf, " ...");
841 return;
842 }
843 if (i)
844 *buf++ = ' ';
845
846 buf += bsprintf(buf, "%02x", ad->data[i]);
847 }
848
849 *buf = 0;
850 return;
851 }
852
853 static inline void
ea_show_int_set(struct cli * c,const struct adata * ad,int way,byte * pos,byte * buf,byte * end)854 ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end)
855 {
856 int i = int_set_format(ad, way, 0, pos, end - pos);
857 cli_printf(c, -1012, "\t%s", buf);
858 while (i)
859 {
860 i = int_set_format(ad, way, i, buf, end - buf - 1);
861 cli_printf(c, -1012, "\t\t%s", buf);
862 }
863 }
864
865 static inline void
ea_show_ec_set(struct cli * c,const struct adata * ad,byte * pos,byte * buf,byte * end)866 ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end)
867 {
868 int i = ec_set_format(ad, 0, pos, end - pos);
869 cli_printf(c, -1012, "\t%s", buf);
870 while (i)
871 {
872 i = ec_set_format(ad, i, buf, end - buf - 1);
873 cli_printf(c, -1012, "\t\t%s", buf);
874 }
875 }
876
877 static inline void
ea_show_lc_set(struct cli * c,const struct adata * ad,byte * pos,byte * buf,byte * end)878 ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end)
879 {
880 int i = lc_set_format(ad, 0, pos, end - pos);
881 cli_printf(c, -1012, "\t%s", buf);
882 while (i)
883 {
884 i = lc_set_format(ad, i, buf, end - buf - 1);
885 cli_printf(c, -1012, "\t\t%s", buf);
886 }
887 }
888
889 /**
890 * ea_show - print an &eattr to CLI
891 * @c: destination CLI
892 * @e: attribute to be printed
893 *
894 * This function takes an extended attribute represented by its &eattr
895 * structure and prints it to the CLI according to the type information.
896 *
897 * If the protocol defining the attribute provides its own
898 * get_attr() hook, it's consulted first.
899 */
900 void
ea_show(struct cli * c,const eattr * e)901 ea_show(struct cli *c, const eattr *e)
902 {
903 struct protocol *p;
904 int status = GA_UNKNOWN;
905 const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr;
906 byte buf[CLI_MSG_SIZE];
907 byte *pos = buf, *end = buf + sizeof(buf);
908
909 if (EA_IS_CUSTOM(e->id))
910 {
911 const char *name = ea_custom_name(e->id);
912 if (name)
913 {
914 pos += bsprintf(pos, "%s", name);
915 status = GA_NAME;
916 }
917 else
918 pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
919 }
920 else if (p = class_to_protocol[EA_PROTO(e->id)])
921 {
922 pos += bsprintf(pos, "%s.", p->name);
923 if (p->get_attr)
924 status = p->get_attr(e, pos, end - pos);
925 pos += strlen(pos);
926 }
927 else if (EA_PROTO(e->id))
928 pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
929 else
930 status = get_generic_attr(e, &pos, end - pos);
931
932 if (status < GA_NAME)
933 pos += bsprintf(pos, "%02x", EA_ID(e->id));
934 if (status < GA_FULL)
935 {
936 *pos++ = ':';
937 *pos++ = ' ';
938 switch (e->type & EAF_TYPE_MASK)
939 {
940 case EAF_TYPE_INT:
941 bsprintf(pos, "%u", e->u.data);
942 break;
943 case EAF_TYPE_OPAQUE:
944 opaque_format(ad, pos, end - pos);
945 break;
946 case EAF_TYPE_IP_ADDRESS:
947 bsprintf(pos, "%I", *(ip_addr *) ad->data);
948 break;
949 case EAF_TYPE_ROUTER_ID:
950 bsprintf(pos, "%R", e->u.data);
951 break;
952 case EAF_TYPE_AS_PATH:
953 as_path_format(ad, pos, end - pos);
954 break;
955 case EAF_TYPE_BITFIELD:
956 bsprintf(pos, "%08x", e->u.data);
957 break;
958 case EAF_TYPE_INT_SET:
959 ea_show_int_set(c, ad, 1, pos, buf, end);
960 return;
961 case EAF_TYPE_EC_SET:
962 ea_show_ec_set(c, ad, pos, buf, end);
963 return;
964 case EAF_TYPE_LC_SET:
965 ea_show_lc_set(c, ad, pos, buf, end);
966 return;
967 case EAF_TYPE_UNDEF:
968 default:
969 bsprintf(pos, "<type %02x>", e->type);
970 }
971 }
972 cli_printf(c, -1012, "\t%s", buf);
973 }
974
975 /**
976 * ea_dump - dump an extended attribute
977 * @e: attribute to be dumped
978 *
979 * ea_dump() dumps contents of the extended attribute given to
980 * the debug output.
981 */
982 void
ea_dump(ea_list * e)983 ea_dump(ea_list *e)
984 {
985 int i;
986
987 if (!e)
988 {
989 debug("NONE");
990 return;
991 }
992 while (e)
993 {
994 debug("[%c%c%c]",
995 (e->flags & EALF_SORTED) ? 'S' : 's',
996 (e->flags & EALF_BISECT) ? 'B' : 'b',
997 (e->flags & EALF_CACHED) ? 'C' : 'c');
998 for(i=0; i<e->count; i++)
999 {
1000 eattr *a = &e->attrs[i];
1001 debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags);
1002 debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]);
1003 if (a->type & EAF_ORIGINATED)
1004 debug("o");
1005 if (a->type & EAF_EMBEDDED)
1006 debug(":%08x", a->u.data);
1007 else
1008 {
1009 int j, len = a->u.ptr->length;
1010 debug("[%d]:", len);
1011 for(j=0; j<len; j++)
1012 debug("%02x", a->u.ptr->data[j]);
1013 }
1014 }
1015 if (e = e->next)
1016 debug(" | ");
1017 }
1018 }
1019
1020 /**
1021 * ea_hash - calculate an &ea_list hash key
1022 * @e: attribute list
1023 *
1024 * ea_hash() takes an extended attribute list and calculated a hopefully
1025 * uniformly distributed hash value from its contents.
1026 */
1027 inline uint
ea_hash(ea_list * e)1028 ea_hash(ea_list *e)
1029 {
1030 const u64 mul = 0x68576150f3d6847;
1031 u64 h = 0xafcef24eda8b29;
1032 int i;
1033
1034 if (e) /* Assuming chain of length 1 */
1035 {
1036 for(i=0; i<e->count; i++)
1037 {
1038 struct eattr *a = &e->attrs[i];
1039 h ^= a->id; h *= mul;
1040 if (a->type & EAF_EMBEDDED)
1041 h ^= a->u.data;
1042 else
1043 {
1044 const struct adata *d = a->u.ptr;
1045 h ^= mem_hash(d->data, d->length);
1046 }
1047 h *= mul;
1048 }
1049 }
1050 return (h >> 32) ^ (h & 0xffffffff);
1051 }
1052
1053 /**
1054 * ea_append - concatenate &ea_list's
1055 * @to: destination list (can be %NULL)
1056 * @what: list to be appended (can be %NULL)
1057 *
1058 * This function appends the &ea_list @what at the end of
1059 * &ea_list @to and returns a pointer to the resulting list.
1060 */
1061 ea_list *
ea_append(ea_list * to,ea_list * what)1062 ea_append(ea_list *to, ea_list *what)
1063 {
1064 ea_list *res;
1065
1066 if (!to)
1067 return what;
1068 res = to;
1069 while (to->next)
1070 to = to->next;
1071 to->next = what;
1072 return res;
1073 }
1074
1075 /*
1076 * rta's
1077 */
1078
1079 static uint rta_cache_count;
1080 static uint rta_cache_size = 32;
1081 static uint rta_cache_limit;
1082 static uint rta_cache_mask;
1083 static rta **rta_hash_table;
1084
1085 static void
rta_alloc_hash(void)1086 rta_alloc_hash(void)
1087 {
1088 rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size);
1089 if (rta_cache_size < 32768)
1090 rta_cache_limit = rta_cache_size * 2;
1091 else
1092 rta_cache_limit = ~0;
1093 rta_cache_mask = rta_cache_size - 1;
1094 }
1095
1096 static inline uint
rta_hash(rta * a)1097 rta_hash(rta *a)
1098 {
1099 u64 h;
1100 mem_hash_init(&h);
1101 #define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f));
1102 MIX(src);
1103 MIX(hostentry);
1104 MIX(from);
1105 MIX(igp_metric);
1106 MIX(source);
1107 MIX(scope);
1108 MIX(dest);
1109 #undef MIX
1110
1111 return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs);
1112 }
1113
1114 static inline int
rta_same(rta * x,rta * y)1115 rta_same(rta *x, rta *y)
1116 {
1117 return (x->src == y->src &&
1118 x->source == y->source &&
1119 x->scope == y->scope &&
1120 x->dest == y->dest &&
1121 x->igp_metric == y->igp_metric &&
1122 ipa_equal(x->from, y->from) &&
1123 x->hostentry == y->hostentry &&
1124 nexthop_same(&(x->nh), &(y->nh)) &&
1125 ea_same(x->eattrs, y->eattrs));
1126 }
1127
1128 static inline slab *
rta_slab(rta * a)1129 rta_slab(rta *a)
1130 {
1131 return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels];
1132 }
1133
1134 static rta *
rta_copy(rta * o)1135 rta_copy(rta *o)
1136 {
1137 rta *r = sl_alloc(rta_slab(o));
1138
1139 memcpy(r, o, rta_size(o));
1140 r->uc = 1;
1141 r->nh.next = nexthop_copy(o->nh.next);
1142 r->eattrs = ea_list_copy(o->eattrs);
1143 return r;
1144 }
1145
1146 static inline void
rta_insert(rta * r)1147 rta_insert(rta *r)
1148 {
1149 uint h = r->hash_key & rta_cache_mask;
1150 r->next = rta_hash_table[h];
1151 if (r->next)
1152 r->next->pprev = &r->next;
1153 r->pprev = &rta_hash_table[h];
1154 rta_hash_table[h] = r;
1155 }
1156
1157 static void
rta_rehash(void)1158 rta_rehash(void)
1159 {
1160 uint ohs = rta_cache_size;
1161 uint h;
1162 rta *r, *n;
1163 rta **oht = rta_hash_table;
1164
1165 rta_cache_size = 2*rta_cache_size;
1166 DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size);
1167 rta_alloc_hash();
1168 for(h=0; h<ohs; h++)
1169 for(r=oht[h]; r; r=n)
1170 {
1171 n = r->next;
1172 rta_insert(r);
1173 }
1174 mb_free(oht);
1175 }
1176
1177 /**
1178 * rta_lookup - look up a &rta in attribute cache
1179 * @o: a un-cached &rta
1180 *
1181 * rta_lookup() gets an un-cached &rta structure and returns its cached
1182 * counterpart. It starts with examining the attribute cache to see whether
1183 * there exists a matching entry. If such an entry exists, it's returned and
1184 * its use count is incremented, else a new entry is created with use count
1185 * set to 1.
1186 *
1187 * The extended attribute lists attached to the &rta are automatically
1188 * converted to the normalized form.
1189 */
1190 rta *
rta_lookup(rta * o)1191 rta_lookup(rta *o)
1192 {
1193 rta *r;
1194 uint h;
1195
1196 ASSERT(!(o->aflags & RTAF_CACHED));
1197 if (o->eattrs)
1198 ea_normalize(o->eattrs);
1199
1200 h = rta_hash(o);
1201 for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next)
1202 if (r->hash_key == h && rta_same(r, o))
1203 return rta_clone(r);
1204
1205 r = rta_copy(o);
1206 r->hash_key = h;
1207 r->aflags = RTAF_CACHED;
1208 rt_lock_source(r->src);
1209 rt_lock_hostentry(r->hostentry);
1210 rta_insert(r);
1211
1212 if (++rta_cache_count > rta_cache_limit)
1213 rta_rehash();
1214
1215 return r;
1216 }
1217
1218 void
rta__free(rta * a)1219 rta__free(rta *a)
1220 {
1221 ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED));
1222 rta_cache_count--;
1223 *a->pprev = a->next;
1224 if (a->next)
1225 a->next->pprev = a->pprev;
1226 rt_unlock_hostentry(a->hostentry);
1227 rt_unlock_source(a->src);
1228 if (a->nh.next)
1229 nexthop_free(a->nh.next);
1230 ea_free(a->eattrs);
1231 a->aflags = 0; /* Poison the entry */
1232 sl_free(rta_slab(a), a);
1233 }
1234
1235 rta *
rta_do_cow(rta * o,linpool * lp)1236 rta_do_cow(rta *o, linpool *lp)
1237 {
1238 rta *r = lp_alloc(lp, rta_size(o));
1239 memcpy(r, o, rta_size(o));
1240 for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next)
1241 {
1242 *nhn = lp_alloc(lp, nexthop_size(nho));
1243 memcpy(*nhn, nho, nexthop_size(nho));
1244 nhn = &((*nhn)->next);
1245 }
1246 r->aflags = 0;
1247 r->uc = 0;
1248 return r;
1249 }
1250
1251 /**
1252 * rta_dump - dump route attributes
1253 * @a: attribute structure to dump
1254 *
1255 * This function takes a &rta and dumps its contents to the debug output.
1256 */
1257 void
rta_dump(rta * a)1258 rta_dump(rta *a)
1259 {
1260 static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
1261 "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP",
1262 "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1",
1263 "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" };
1264 static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };
1265
1266 debug("p=%s uc=%d %s %s%s h=%04x",
1267 a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope),
1268 rtd[a->dest], a->hash_key);
1269 if (!(a->aflags & RTAF_CACHED))
1270 debug(" !CACHED");
1271 debug(" <-%I", a->from);
1272 if (a->dest == RTD_UNICAST)
1273 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
1274 {
1275 if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw);
1276 if (nh->labels) debug(" L %d", nh->label[0]);
1277 for (int i=1; i<nh->labels; i++)
1278 debug("/%d", nh->label[i]);
1279 debug(" [%s]", nh->iface ? nh->iface->name : "???");
1280 }
1281 if (a->eattrs)
1282 {
1283 debug(" EA: ");
1284 ea_dump(a->eattrs);
1285 }
1286 }
1287
1288 /**
1289 * rta_dump_all - dump attribute cache
1290 *
1291 * This function dumps the whole contents of route attribute cache
1292 * to the debug output.
1293 */
1294 void
rta_dump_all(void)1295 rta_dump_all(void)
1296 {
1297 rta *a;
1298 uint h;
1299
1300 debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit);
1301 for(h=0; h<rta_cache_size; h++)
1302 for(a=rta_hash_table[h]; a; a=a->next)
1303 {
1304 debug("%p ", a);
1305 rta_dump(a);
1306 debug("\n");
1307 }
1308 debug("\n");
1309 }
1310
1311 void
rta_show(struct cli * c,rta * a)1312 rta_show(struct cli *c, rta *a)
1313 {
1314 cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope));
1315
1316 for(ea_list *eal = a->eattrs; eal; eal=eal->next)
1317 for(int i=0; i<eal->count; i++)
1318 ea_show(c, &eal->attrs[i]);
1319 }
1320
1321 /**
1322 * rta_init - initialize route attribute cache
1323 *
1324 * This function is called during initialization of the routing
1325 * table module to set up the internals of the attribute cache.
1326 */
1327 void
rta_init(void)1328 rta_init(void)
1329 {
1330 rta_pool = rp_new(&root_pool, "Attributes");
1331
1332 rta_slab_[0] = sl_new(rta_pool, sizeof(rta));
1333 rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32));
1334 rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2);
1335 rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1336
1337 nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop));
1338 nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32));
1339 nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2);
1340 nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1341
1342 rta_alloc_hash();
1343 rte_src_init();
1344 }
1345
1346 /*
1347 * Documentation for functions declared inline in route.h
1348 */
1349 #if 0
1350
1351 /**
1352 * rta_clone - clone route attributes
1353 * @r: a &rta to be cloned
1354 *
1355 * rta_clone() takes a cached &rta and returns its identical cached
1356 * copy. Currently it works by just returning the original &rta with
1357 * its use count incremented.
1358 */
1359 static inline rta *rta_clone(rta *r)
1360 { DUMMY; }
1361
1362 /**
1363 * rta_free - free route attributes
1364 * @r: a &rta to be freed
1365 *
1366 * If you stop using a &rta (for example when deleting a route which uses
1367 * it), you need to call rta_free() to notify the attribute cache the
1368 * attribute is no longer in use and can be freed if you were the last
1369 * user (which rta_free() tests by inspecting the use count).
1370 */
1371 static inline void rta_free(rta *r)
1372 { DUMMY; }
1373
1374 #endif
1375