1 /*
2 *
3 * Copyright 2009-2016, LabN Consulting, L.L.C.
4 *
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20 
21 /*
22  * File:	rfapi_import.c
23  * Purpose:	Handle import of routes from BGP to RFAPI
24  */
25 
26 #include "lib/zebra.h"
27 #include "lib/prefix.h"
28 #include "lib/agg_table.h"
29 #include "lib/vty.h"
30 #include "lib/memory.h"
31 #include "lib/log.h"
32 #include "lib/skiplist.h"
33 #include "lib/thread.h"
34 #include "lib/stream.h"
35 #include "lib/lib_errors.h"
36 
37 #include "bgpd/bgpd.h"
38 #include "bgpd/bgp_ecommunity.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_route.h"
41 #include "bgpd/bgp_mplsvpn.h" /* prefix_rd2str() */
42 #include "bgpd/bgp_vnc_types.h"
43 #include "bgpd/bgp_rd.h"
44 
45 #include "bgpd/rfapi/rfapi.h"
46 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
47 #include "bgpd/rfapi/rfapi_backend.h"
48 #include "bgpd/rfapi/rfapi_import.h"
49 #include "bgpd/rfapi/rfapi_private.h"
50 #include "bgpd/rfapi/rfapi_monitor.h"
51 #include "bgpd/rfapi/rfapi_nve_addr.h"
52 #include "bgpd/rfapi/rfapi_vty.h"
53 #include "bgpd/rfapi/vnc_export_bgp.h"
54 #include "bgpd/rfapi/vnc_export_bgp_p.h"
55 #include "bgpd/rfapi/vnc_zebra.h"
56 #include "bgpd/rfapi/vnc_import_bgp.h"
57 #include "bgpd/rfapi/vnc_import_bgp_p.h"
58 #include "bgpd/rfapi/rfapi_rib.h"
59 #include "bgpd/rfapi/rfapi_encap_tlv.h"
60 #include "bgpd/rfapi/vnc_debug.h"
61 
62 #ifdef HAVE_GLIBC_BACKTRACE
63 /* for backtrace and friends */
64 #include <execinfo.h>
65 #endif /* HAVE_GLIBC_BACKTRACE */
66 
67 #undef DEBUG_MONITOR_MOVE_SHORTER
68 #undef DEBUG_RETURNED_NHL
69 #undef DEBUG_ROUTE_COUNTERS
70 #undef DEBUG_ENCAP_MONITOR
71 #undef DEBUG_L2_EXTRA
72 #undef DEBUG_IT_NODES
73 #undef DEBUG_BI_SEARCH
74 
75 /*
76  * Allocated for each withdraw timer instance; freed when the timer
77  * expires or is canceled
78  */
79 struct rfapi_withdraw {
80 	struct rfapi_import_table *import_table;
81 	struct agg_node *node;
82 	struct bgp_path_info *info;
83 	safi_t safi; /* used only for bulk operations */
84 	/*
85 	 * For import table node reference count checking (i.e., debugging).
86 	 * Normally when a timer expires, lockoffset should be 0. However, if
87 	 * the timer expiration function is called directly (e.g.,
88 	 * rfapiExpireVpnNow), the node could be locked by a preceding
89 	 * agg_route_top() or agg_route_next() in a loop, so we need to pass
90 	 * this value in.
91 	 */
92 	int lockoffset;
93 };
94 
95 /*
96  * DEBUG FUNCTION
97  * It's evil and fiendish. It's compiler-dependent.
98  * ? Might need LDFLAGS -rdynamic to produce all function names
99  */
rfapiDebugBacktrace(void)100 void rfapiDebugBacktrace(void)
101 {
102 #ifdef HAVE_GLIBC_BACKTRACE
103 #define RFAPI_DEBUG_BACKTRACE_NENTRIES	200
104 	void *buf[RFAPI_DEBUG_BACKTRACE_NENTRIES];
105 	char **syms;
106 	size_t i;
107 	size_t size;
108 
109 	size = backtrace(buf, RFAPI_DEBUG_BACKTRACE_NENTRIES);
110 	syms = backtrace_symbols(buf, size);
111 
112 	for (i = 0; i < size && i < RFAPI_DEBUG_BACKTRACE_NENTRIES; ++i) {
113 		vnc_zlog_debug_verbose("backtrace[%2zu]: %s", i, syms[i]);
114 	}
115 
116 	free(syms);
117 #else
118 #endif
119 }
120 
121 /*
122  * DEBUG FUNCTION
123  * Count remote routes and compare with actively-maintained values.
124  * Abort if they disagree.
125  */
rfapiCheckRouteCount(void)126 void rfapiCheckRouteCount(void)
127 {
128 	struct bgp *bgp = bgp_get_default();
129 	struct rfapi *h;
130 	struct rfapi_import_table *it;
131 	afi_t afi;
132 
133 	assert(bgp);
134 
135 	h = bgp->rfapi;
136 	assert(h);
137 
138 	for (it = h->imports; it; it = it->next) {
139 		for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
140 
141 			struct agg_table *rt;
142 			struct agg_node *rn;
143 
144 			int holddown_count = 0;
145 			int local_count = 0;
146 			int imported_count = 0;
147 			int remote_count = 0;
148 
149 			rt = it->imported_vpn[afi];
150 
151 			for (rn = agg_route_top(rt); rn;
152 			     rn = agg_route_next(rn)) {
153 				struct bgp_path_info *bpi;
154 				struct bgp_path_info *next;
155 
156 				for (bpi = rn->info; bpi; bpi = next) {
157 					next = bpi->next;
158 
159 					if (CHECK_FLAG(bpi->flags,
160 						       BGP_PATH_REMOVED)) {
161 						++holddown_count;
162 
163 					} else {
164 						if (RFAPI_LOCAL_BI(bpi)) {
165 							++local_count;
166 						} else {
167 							if (RFAPI_DIRECT_IMPORT_BI(
168 								    bpi)) {
169 								++imported_count;
170 							} else {
171 								++remote_count;
172 							}
173 						}
174 					}
175 				}
176 			}
177 
178 			if (it->holddown_count[afi] != holddown_count) {
179 				vnc_zlog_debug_verbose(
180 					"%s: it->holddown_count %d != holddown_count %d",
181 					__func__, it->holddown_count[afi],
182 					holddown_count);
183 				assert(0);
184 			}
185 			if (it->remote_count[afi] != remote_count) {
186 				vnc_zlog_debug_verbose(
187 					"%s: it->remote_count %d != remote_count %d",
188 					__func__, it->remote_count[afi],
189 					remote_count);
190 				assert(0);
191 			}
192 			if (it->imported_count[afi] != imported_count) {
193 				vnc_zlog_debug_verbose(
194 					"%s: it->imported_count %d != imported_count %d",
195 					__func__, it->imported_count[afi],
196 					imported_count);
197 				assert(0);
198 			}
199 		}
200 	}
201 }
202 
203 #ifdef DEBUG_ROUTE_COUNTERS
204 #define VNC_ITRCCK do {rfapiCheckRouteCount();} while (0)
205 #else
206 #define VNC_ITRCCK
207 #endif
208 
209 /*
210  * Validate reference count for a node in an import table
211  *
212  * Normally lockoffset is 0 for nodes in quiescent state. However,
213  * agg_unlock_node will delete the node if it is called when
214  * node->lock == 1, and we have to validate the refcount before
215  * the node is deleted. In this case, we specify lockoffset 1.
216  */
rfapiCheckRefcount(struct agg_node * rn,safi_t safi,int lockoffset)217 void rfapiCheckRefcount(struct agg_node *rn, safi_t safi, int lockoffset)
218 {
219 	unsigned int count_bpi = 0;
220 	unsigned int count_monitor = 0;
221 	struct bgp_path_info *bpi;
222 	struct rfapi_monitor_encap *hme;
223 	struct rfapi_monitor_vpn *hmv;
224 
225 	for (bpi = rn->info; bpi; bpi = bpi->next)
226 		++count_bpi;
227 
228 
229 	if (rn->aggregate) {
230 		++count_monitor; /* rfapi_it_extra */
231 
232 		switch (safi) {
233 			void *cursor;
234 			int rc;
235 
236 		case SAFI_ENCAP:
237 			for (hme = RFAPI_MONITOR_ENCAP(rn); hme;
238 			     hme = hme->next)
239 				++count_monitor;
240 			break;
241 
242 		case SAFI_MPLS_VPN:
243 
244 			for (hmv = RFAPI_MONITOR_VPN(rn); hmv; hmv = hmv->next)
245 				++count_monitor;
246 
247 			if (RFAPI_MONITOR_EXTERIOR(rn)->source) {
248 				++count_monitor; /* sl */
249 				cursor = NULL;
250 				for (rc = skiplist_next(
251 					     RFAPI_MONITOR_EXTERIOR(rn)->source,
252 					     NULL, NULL, &cursor);
253 				     !rc;
254 				     rc = skiplist_next(
255 					     RFAPI_MONITOR_EXTERIOR(rn)->source,
256 					     NULL, NULL, &cursor)) {
257 
258 					++count_monitor; /* sl entry */
259 				}
260 			}
261 			break;
262 
263 		default:
264 			assert(0);
265 		}
266 	}
267 
268 	if (count_bpi + count_monitor + lockoffset != rn->lock) {
269 		vnc_zlog_debug_verbose(
270 			"%s: count_bpi=%d, count_monitor=%d, lockoffset=%d, rn->lock=%d",
271 			__func__, count_bpi, count_monitor, lockoffset,
272 			rn->lock);
273 		assert(0);
274 	}
275 }
276 
277 /*
278  * Perform deferred rfapi_close operations that were queued
279  * during callbacks.
280  */
rfapi_deferred_close_workfunc(struct work_queue * q,void * data)281 static wq_item_status rfapi_deferred_close_workfunc(struct work_queue *q,
282 						    void *data)
283 {
284 	struct rfapi_descriptor *rfd = data;
285 	struct rfapi *h = q->spec.data;
286 
287 	assert(!(h->flags & RFAPI_INCALLBACK));
288 	rfapi_close(rfd);
289 	vnc_zlog_debug_verbose("%s: completed deferred close on handle %p",
290 			       __func__, rfd);
291 	return WQ_SUCCESS;
292 }
293 
294 /*
295  * Extract layer 2 option from Encap TLVS in BGP attrs
296  */
rfapiGetL2o(struct attr * attr,struct rfapi_l2address_option * l2o)297 int rfapiGetL2o(struct attr *attr, struct rfapi_l2address_option *l2o)
298 {
299 	if (attr) {
300 
301 		struct bgp_attr_encap_subtlv *pEncap;
302 
303 		for (pEncap = attr->vnc_subtlvs; pEncap;
304 		     pEncap = pEncap->next) {
305 
306 			if (pEncap->type == BGP_VNC_SUBTLV_TYPE_RFPOPTION) {
307 				if (pEncap->value[0]
308 				    == RFAPI_VN_OPTION_TYPE_L2ADDR) {
309 
310 					if (pEncap->value[1] == 14) {
311 						memcpy(l2o->macaddr.octet,
312 						       pEncap->value + 2,
313 						       ETH_ALEN);
314 						l2o->label =
315 							((pEncap->value[10]
316 							  >> 4)
317 							 & 0x0f)
318 							+ ((pEncap->value[9]
319 							    << 4)
320 							   & 0xff0)
321 							+ ((pEncap->value[8]
322 							    << 12)
323 							   & 0xff000);
324 
325 						l2o->local_nve_id =
326 							pEncap->value[12];
327 
328 						l2o->logical_net_id =
329 							(pEncap->value[15]
330 							 & 0xff)
331 							+ ((pEncap->value[14]
332 							    << 8)
333 							   & 0xff00)
334 							+ ((pEncap->value[13]
335 							    << 16)
336 							   & 0xff0000);
337 					}
338 
339 					return 0;
340 				}
341 			}
342 		}
343 	}
344 
345 	return ENOENT;
346 }
347 
348 /*
349  * Extract the lifetime from the Tunnel Encap attribute of a route in
350  * an import table
351  */
rfapiGetVncLifetime(struct attr * attr,uint32_t * lifetime)352 int rfapiGetVncLifetime(struct attr *attr, uint32_t *lifetime)
353 {
354 	struct bgp_attr_encap_subtlv *pEncap;
355 
356 	*lifetime = RFAPI_INFINITE_LIFETIME; /* default to infinite */
357 
358 	if (attr) {
359 
360 		for (pEncap = attr->vnc_subtlvs; pEncap;
361 		     pEncap = pEncap->next) {
362 
363 			if (pEncap->type
364 			    == BGP_VNC_SUBTLV_TYPE_LIFETIME) { /* lifetime */
365 				if (pEncap->length == 4) {
366 					memcpy(lifetime, pEncap->value, 4);
367 					*lifetime = ntohl(*lifetime);
368 					return 0;
369 				}
370 			}
371 		}
372 	}
373 
374 	return ENOENT;
375 }
376 
377 /*
378  * Look for UN address in Encap attribute
379  */
rfapiGetVncTunnelUnAddr(struct attr * attr,struct prefix * p)380 int rfapiGetVncTunnelUnAddr(struct attr *attr, struct prefix *p)
381 {
382 	struct bgp_attr_encap_subtlv *pEncap;
383 	bgp_encap_types tun_type = BGP_ENCAP_TYPE_MPLS;/*Default tunnel type*/
384 
385 	bgp_attr_extcom_tunnel_type(attr, &tun_type);
386 	if (tun_type == BGP_ENCAP_TYPE_MPLS) {
387 		if (!p)
388 			return 0;
389 		/* MPLS carries UN address in next hop */
390 		rfapiNexthop2Prefix(attr, p);
391 		if (p->family != 0)
392 			return 0;
393 
394 		return ENOENT;
395 	}
396 	if (attr) {
397 		for (pEncap = attr->encap_subtlvs; pEncap;
398 		     pEncap = pEncap->next) {
399 
400 			if (pEncap->type
401 			    == BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT) { /* un
402 									   addr
403 									   */
404 				switch (pEncap->length) {
405 				case 8:
406 					if (p) {
407 						p->family = AF_INET;
408 						p->prefixlen = 32;
409 						memcpy(p->u.val, pEncap->value,
410 						       4);
411 					}
412 					return 0;
413 
414 				case 20:
415 					if (p) {
416 						p->family = AF_INET6;
417 						p->prefixlen = 128;
418 						memcpy(p->u.val, pEncap->value,
419 						       16);
420 					}
421 					return 0;
422 				}
423 			}
424 		}
425 	}
426 
427 	return ENOENT;
428 }
429 
430 /*
431  * Get UN address wherever it might be
432  */
rfapiGetUnAddrOfVpnBi(struct bgp_path_info * bpi,struct prefix * p)433 int rfapiGetUnAddrOfVpnBi(struct bgp_path_info *bpi, struct prefix *p)
434 {
435 	/* If it's in this route's VNC attribute, we're done */
436 	if (!rfapiGetVncTunnelUnAddr(bpi->attr, p))
437 		return 0;
438 	/*
439 	 * Otherwise, see if it's cached from a corresponding ENCAP SAFI
440 	 * advertisement
441 	 */
442 	if (bpi->extra) {
443 		switch (bpi->extra->vnc.import.un_family) {
444 		case AF_INET:
445 			if (p) {
446 				p->family = bpi->extra->vnc.import.un_family;
447 				p->u.prefix4 = bpi->extra->vnc.import.un.addr4;
448 				p->prefixlen = 32;
449 			}
450 			return 0;
451 		case AF_INET6:
452 			if (p) {
453 				p->family = bpi->extra->vnc.import.un_family;
454 				p->u.prefix6 = bpi->extra->vnc.import.un.addr6;
455 				p->prefixlen = 128;
456 			}
457 			return 0;
458 		default:
459 			if (p)
460 				p->family = 0;
461 #ifdef DEBUG_ENCAP_MONITOR
462 			vnc_zlog_debug_verbose(
463 				"%s: bpi->extra->vnc.import.un_family is 0, no UN addr",
464 				__func__);
465 #endif
466 			break;
467 		}
468 	}
469 
470 	return ENOENT;
471 }
472 
473 
474 /*
475  * Make a new bgp_path_info from gathered parameters
476  */
rfapiBgpInfoCreate(struct attr * attr,struct peer * peer,void * rfd,struct prefix_rd * prd,uint8_t type,uint8_t sub_type,uint32_t * label)477 static struct bgp_path_info *rfapiBgpInfoCreate(struct attr *attr,
478 						struct peer *peer, void *rfd,
479 						struct prefix_rd *prd,
480 						uint8_t type, uint8_t sub_type,
481 						uint32_t *label)
482 {
483 	struct bgp_path_info *new;
484 
485 	new = info_make(type, sub_type, 0, peer, attr, NULL);
486 
487 	new->attr = bgp_attr_intern(attr);
488 
489 	bgp_path_info_extra_get(new);
490 	if (prd) {
491 		new->extra->vnc.import.rd = *prd;
492 		rfapi_time(&new->extra->vnc.import.create_time);
493 	}
494 	if (label)
495 		encode_label(*label, &new->extra->label[0]);
496 
497 	peer_lock(peer);
498 
499 	return new;
500 }
501 
502 /*
503  * Frees bgp_path_info as used in import tables (parts are not
504  * allocated exactly the way they are in the main RIBs)
505  */
rfapiBgpInfoFree(struct bgp_path_info * goner)506 static void rfapiBgpInfoFree(struct bgp_path_info *goner)
507 {
508 	if (!goner)
509 		return;
510 
511 	if (goner->peer) {
512 		vnc_zlog_debug_verbose("%s: calling peer_unlock(%p), #%d",
513 				       __func__, goner->peer,
514 				       goner->peer->lock);
515 		peer_unlock(goner->peer);
516 	}
517 
518 	bgp_attr_unintern(&goner->attr);
519 
520 	if (goner->extra)
521 		bgp_path_info_extra_free(&goner->extra);
522 	XFREE(MTYPE_BGP_ROUTE, goner);
523 }
524 
rfapiMacImportTableGetNoAlloc(struct bgp * bgp,uint32_t lni)525 struct rfapi_import_table *rfapiMacImportTableGetNoAlloc(struct bgp *bgp,
526 							 uint32_t lni)
527 {
528 	struct rfapi *h;
529 	struct rfapi_import_table *it = NULL;
530 	uintptr_t lni_as_ptr = lni;
531 
532 	h = bgp->rfapi;
533 	if (!h)
534 		return NULL;
535 
536 	if (!h->import_mac)
537 		return NULL;
538 
539 	if (skiplist_search(h->import_mac, (void *)lni_as_ptr, (void **)&it))
540 		return NULL;
541 
542 	return it;
543 }
544 
rfapiMacImportTableGet(struct bgp * bgp,uint32_t lni)545 struct rfapi_import_table *rfapiMacImportTableGet(struct bgp *bgp, uint32_t lni)
546 {
547 	struct rfapi *h;
548 	struct rfapi_import_table *it = NULL;
549 	uintptr_t lni_as_ptr = lni;
550 
551 	h = bgp->rfapi;
552 	assert(h);
553 
554 	if (!h->import_mac) {
555 		/* default cmp is good enough for LNI */
556 		h->import_mac = skiplist_new(0, NULL, NULL);
557 	}
558 
559 	if (skiplist_search(h->import_mac, (void *)lni_as_ptr, (void **)&it)) {
560 
561 		struct ecommunity *enew;
562 		struct ecommunity_val eval;
563 		afi_t afi;
564 
565 		it = XCALLOC(MTYPE_RFAPI_IMPORTTABLE,
566 			     sizeof(struct rfapi_import_table));
567 		/* set RT list of new import table based on LNI */
568 		memset((char *)&eval, 0, sizeof(eval));
569 		eval.val[0] = 0; /* VNC L2VPN */
570 		eval.val[1] = 2; /* VNC L2VPN */
571 		eval.val[5] = (lni >> 16) & 0xff;
572 		eval.val[6] = (lni >> 8) & 0xff;
573 		eval.val[7] = (lni >> 0) & 0xff;
574 
575 		enew = ecommunity_new();
576 		ecommunity_add_val(enew, &eval, false, false);
577 		it->rt_import_list = enew;
578 
579 		for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
580 			it->imported_vpn[afi] = agg_table_init();
581 			it->imported_encap[afi] = agg_table_init();
582 		}
583 
584 		it->l2_logical_net_id = lni;
585 
586 		skiplist_insert(h->import_mac, (void *)lni_as_ptr, it);
587 	}
588 
589 	assert(it);
590 	return it;
591 }
592 
593 /*
594  * Implement MONITOR_MOVE_SHORTER(original_node) from
595  * RFAPI-Import-Event-Handling.txt
596  *
597  * Returns pointer to the list of moved monitors
598  */
599 static struct rfapi_monitor_vpn *
rfapiMonitorMoveShorter(struct agg_node * original_vpn_node,int lockoffset)600 rfapiMonitorMoveShorter(struct agg_node *original_vpn_node, int lockoffset)
601 {
602 	struct bgp_path_info *bpi;
603 	struct agg_node *par;
604 	struct rfapi_monitor_vpn *m;
605 	struct rfapi_monitor_vpn *mlast;
606 	struct rfapi_monitor_vpn *moved;
607 	int movecount = 0;
608 	int parent_already_refcounted = 0;
609 
610 	RFAPI_CHECK_REFCOUNT(original_vpn_node, SAFI_MPLS_VPN, lockoffset);
611 
612 #ifdef DEBUG_MONITOR_MOVE_SHORTER
613 	{
614 		char buf[PREFIX_STRLEN];
615 
616 		prefix2str(&original_vpn_node->p, buf, sizeof(buf));
617 		vnc_zlog_debug_verbose("%s: called with node pfx=%s", __func__,
618 				       buf);
619 	}
620 #endif
621 
622 	/*
623 	 * 1. If there is at least one bpi (either regular route or
624 	 *    route marked as withdrawn, with a pending timer) at
625 	 *    original_node with a valid UN address, we're done. Return.
626 	 */
627 	for (bpi = original_vpn_node->info; bpi; bpi = bpi->next) {
628 		struct prefix pfx;
629 
630 		if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx)) {
631 #ifdef DEBUG_MONITOR_MOVE_SHORTER
632 			vnc_zlog_debug_verbose(
633 				"%s: have valid UN at original node, no change",
634 				__func__);
635 #endif
636 			return NULL;
637 		}
638 	}
639 
640 	/*
641 	 * 2. Travel up the tree (toward less-specific prefixes) from
642 	 *    original_node to find the first node that has at least
643 	 *    one route (even if it is only a withdrawn route) with a
644 	 *    valid UN address. Call this node "Node P."
645 	 */
646 	for (par = agg_node_parent(original_vpn_node); par;
647 	     par = agg_node_parent(par)) {
648 		for (bpi = par->info; bpi; bpi = bpi->next) {
649 			struct prefix pfx;
650 			if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx)) {
651 				break;
652 			}
653 		}
654 		if (bpi)
655 			break;
656 	}
657 
658 	if (par) {
659 		RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN, 0);
660 	}
661 
662 	/*
663 	 * If no less-specific routes, try to use the 0/0 node
664 	 */
665 	if (!par) {
666 		const struct prefix *p;
667 		/* this isn't necessarily 0/0 */
668 		par = agg_route_table_top(original_vpn_node);
669 
670 		if (par)
671 			p = agg_node_get_prefix(par);
672 		/*
673 		 * If we got the top node but it wasn't 0/0,
674 		 * ignore it
675 		 */
676 		if (par && p->prefixlen) {
677 			agg_unlock_node(par); /* maybe free */
678 			par = NULL;
679 		}
680 
681 		if (par) {
682 			++parent_already_refcounted;
683 		}
684 	}
685 
686 	/*
687 	 * Create 0/0 node if it isn't there
688 	 */
689 	if (!par) {
690 		struct prefix pfx_default;
691 		const struct prefix *p = agg_node_get_prefix(original_vpn_node);
692 
693 		memset(&pfx_default, 0, sizeof(pfx_default));
694 		pfx_default.family = p->family;
695 
696 		/* creates default node if none exists */
697 		par = agg_node_get(agg_get_table(original_vpn_node),
698 				   &pfx_default);
699 		++parent_already_refcounted;
700 	}
701 
702 	/*
703 	 * 3. Move each of the monitors found at original_node to Node P.
704 	 *    These are "Moved Monitors."
705 	 *
706 	 */
707 
708 	/*
709 	 * Attach at end so that the list pointer we return points
710 	 * only to the moved routes
711 	 */
712 	for (m = RFAPI_MONITOR_VPN(par), mlast = NULL; m;
713 	     mlast = m, m = m->next)
714 		;
715 
716 	if (mlast) {
717 		moved = mlast->next = RFAPI_MONITOR_VPN(original_vpn_node);
718 	} else {
719 		moved = RFAPI_MONITOR_VPN_W_ALLOC(par) =
720 			RFAPI_MONITOR_VPN(original_vpn_node);
721 	}
722 	if (RFAPI_MONITOR_VPN(
723 		    original_vpn_node)) /* check agg, so not allocated */
724 		RFAPI_MONITOR_VPN_W_ALLOC(original_vpn_node) = NULL;
725 
726 	/*
727 	 * update the node pointers on the monitors
728 	 */
729 	for (m = moved; m; m = m->next) {
730 		++movecount;
731 		m->node = par;
732 	}
733 
734 	RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN,
735 			     parent_already_refcounted - movecount);
736 	while (movecount > parent_already_refcounted) {
737 		agg_lock_node(par);
738 		++parent_already_refcounted;
739 	}
740 	while (movecount < parent_already_refcounted) {
741 		/* unlikely, but code defensively */
742 		agg_unlock_node(par);
743 		--parent_already_refcounted;
744 	}
745 	RFAPI_CHECK_REFCOUNT(original_vpn_node, SAFI_MPLS_VPN,
746 			     movecount + lockoffset);
747 	while (movecount--) {
748 		agg_unlock_node(original_vpn_node);
749 	}
750 
751 #ifdef DEBUG_MONITOR_MOVE_SHORTER
752 	{
753 		char buf[PREFIX_STRLEN];
754 
755 		prefix2str(&par->p, buf, sizeof(buf));
756 		vnc_zlog_debug_verbose("%s: moved to node pfx=%s", __func__,
757 				       buf);
758 	}
759 #endif
760 
761 
762 	return moved;
763 }
764 
765 /*
766  * Implement MONITOR_MOVE_LONGER(new_node) from
767  * RFAPI-Import-Event-Handling.txt
768  */
rfapiMonitorMoveLonger(struct agg_node * new_vpn_node)769 static void rfapiMonitorMoveLonger(struct agg_node *new_vpn_node)
770 {
771 	struct rfapi_monitor_vpn *monitor;
772 	struct rfapi_monitor_vpn *mlast;
773 	struct bgp_path_info *bpi;
774 	struct agg_node *par;
775 	const struct prefix *new_vpn_node_p = agg_node_get_prefix(new_vpn_node);
776 
777 	RFAPI_CHECK_REFCOUNT(new_vpn_node, SAFI_MPLS_VPN, 0);
778 
779 	/*
780 	 * Make sure we have at least one valid route at the new node
781 	 */
782 	for (bpi = new_vpn_node->info; bpi; bpi = bpi->next) {
783 		struct prefix pfx;
784 		if (!rfapiGetUnAddrOfVpnBi(bpi, &pfx))
785 			break;
786 	}
787 
788 	if (!bpi) {
789 		vnc_zlog_debug_verbose(
790 			"%s: no valid routes at node %p, so not attempting moves",
791 			__func__, new_vpn_node);
792 		return;
793 	}
794 
795 	/*
796 	 * Find first parent node that has monitors
797 	 */
798 	for (par = agg_node_parent(new_vpn_node); par;
799 	     par = agg_node_parent(par)) {
800 		if (RFAPI_MONITOR_VPN(par))
801 			break;
802 	}
803 
804 	if (!par) {
805 		vnc_zlog_debug_verbose(
806 			"%s: no parent nodes with monitors, done", __func__);
807 		return;
808 	}
809 
810 	/*
811 	 * Check each of these monitors to see of their longest-match
812 	 * is now the updated node. Move any such monitors to the more-
813 	 * specific updated node
814 	 */
815 	for (mlast = NULL, monitor = RFAPI_MONITOR_VPN(par); monitor;) {
816 		/*
817 		 * If new longest match for monitor prefix is the new
818 		 * route's prefix, move monitor to new route's prefix
819 		 */
820 		if (prefix_match(new_vpn_node_p, &monitor->p)) {
821 			/* detach */
822 			if (mlast) {
823 				mlast->next = monitor->next;
824 			} else {
825 				RFAPI_MONITOR_VPN_W_ALLOC(par) = monitor->next;
826 			}
827 
828 
829 			/* attach */
830 			monitor->next = RFAPI_MONITOR_VPN(new_vpn_node);
831 			RFAPI_MONITOR_VPN_W_ALLOC(new_vpn_node) = monitor;
832 			monitor->node = new_vpn_node;
833 
834 			agg_lock_node(new_vpn_node); /* incr refcount */
835 
836 			monitor = mlast ? mlast->next : RFAPI_MONITOR_VPN(par);
837 
838 			RFAPI_CHECK_REFCOUNT(par, SAFI_MPLS_VPN, 1);
839 			/* decr refcount after we're done with par as this might
840 			 * free it */
841 			agg_unlock_node(par);
842 
843 			continue;
844 		}
845 		mlast = monitor;
846 		monitor = monitor->next;
847 	}
848 
849 	RFAPI_CHECK_REFCOUNT(new_vpn_node, SAFI_MPLS_VPN, 0);
850 }
851 
852 
rfapiBgpInfoChainFree(struct bgp_path_info * bpi)853 static void rfapiBgpInfoChainFree(struct bgp_path_info *bpi)
854 {
855 	struct bgp_path_info *next;
856 
857 	while (bpi) {
858 
859 		/*
860 		 * If there is a timer waiting to delete this bpi, cancel
861 		 * the timer and delete immediately
862 		 */
863 		if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
864 		    && bpi->extra->vnc.import.timer) {
865 
866 			struct thread *t =
867 				(struct thread *)bpi->extra->vnc.import.timer;
868 			struct rfapi_withdraw *wcb = t->arg;
869 
870 			XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
871 			thread_cancel(t);
872 		}
873 
874 		next = bpi->next;
875 		bpi->next = NULL;
876 		rfapiBgpInfoFree(bpi);
877 		bpi = next;
878 	}
879 }
880 
rfapiImportTableFlush(struct rfapi_import_table * it)881 static void rfapiImportTableFlush(struct rfapi_import_table *it)
882 {
883 	afi_t afi;
884 
885 	/*
886 	 * Free ecommunity
887 	 */
888 	ecommunity_free(&it->rt_import_list);
889 	it->rt_import_list = NULL;
890 
891 	for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
892 
893 		struct agg_node *rn;
894 
895 		for (rn = agg_route_top(it->imported_vpn[afi]); rn;
896 		     rn = agg_route_next(rn)) {
897 			/*
898 			 * Each route_node has:
899 			 * aggregate: points to rfapi_it_extra with monitor
900 			 * chain(s)
901 			 * info: points to chain of bgp_path_info
902 			 */
903 			/* free bgp_path_info and its children */
904 			rfapiBgpInfoChainFree(rn->info);
905 			rn->info = NULL;
906 
907 			rfapiMonitorExtraFlush(SAFI_MPLS_VPN, rn);
908 		}
909 
910 		for (rn = agg_route_top(it->imported_encap[afi]); rn;
911 		     rn = agg_route_next(rn)) {
912 			/* free bgp_path_info and its children */
913 			rfapiBgpInfoChainFree(rn->info);
914 			rn->info = NULL;
915 
916 			rfapiMonitorExtraFlush(SAFI_ENCAP, rn);
917 		}
918 
919 		agg_table_finish(it->imported_vpn[afi]);
920 		agg_table_finish(it->imported_encap[afi]);
921 	}
922 	if (it->monitor_exterior_orphans) {
923 		skiplist_free(it->monitor_exterior_orphans);
924 	}
925 }
926 
rfapiImportTableRefDelByIt(struct bgp * bgp,struct rfapi_import_table * it_target)927 void rfapiImportTableRefDelByIt(struct bgp *bgp,
928 				struct rfapi_import_table *it_target)
929 {
930 	struct rfapi *h;
931 	struct rfapi_import_table *it;
932 	struct rfapi_import_table *prev = NULL;
933 
934 	assert(it_target);
935 
936 	h = bgp->rfapi;
937 	assert(h);
938 
939 	for (it = h->imports; it; prev = it, it = it->next) {
940 		if (it == it_target)
941 			break;
942 	}
943 
944 	assert(it);
945 	assert(it->refcount);
946 
947 	it->refcount -= 1;
948 
949 	if (!it->refcount) {
950 		if (prev) {
951 			prev->next = it->next;
952 		} else {
953 			h->imports = it->next;
954 		}
955 		rfapiImportTableFlush(it);
956 		XFREE(MTYPE_RFAPI_IMPORTTABLE, it);
957 	}
958 }
959 
960 #ifdef RFAPI_REQUIRE_ENCAP_BEEC
961 /*
962  * Look for magic BGP Encapsulation Extended Community value
963  * Format in RFC 5512 Sect. 4.5
964  */
rfapiEcommunitiesMatchBeec(struct ecommunity * ecom,bgp_encap_types type)965 static int rfapiEcommunitiesMatchBeec(struct ecommunity *ecom,
966 				      bgp_encap_types type)
967 {
968 	int i;
969 
970 	if (!ecom)
971 		return 0;
972 
973 	for (i = 0; i < (ecom->size * ECOMMUNITY_SIZE); i += ECOMMUNITY_SIZE) {
974 
975 		uint8_t *ep;
976 
977 		ep = ecom->val + i;
978 
979 		if (ep[0] == ECOMMUNITY_ENCODE_OPAQUE
980 		    && ep[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
981 		    && ep[6] == ((type && 0xff00) >> 8)
982 		    && ep[7] == (type & 0xff)) {
983 
984 			return 1;
985 		}
986 	}
987 	return 0;
988 }
989 #endif
990 
rfapiEcommunitiesIntersect(struct ecommunity * e1,struct ecommunity * e2)991 int rfapiEcommunitiesIntersect(struct ecommunity *e1, struct ecommunity *e2)
992 {
993 	int i, j;
994 
995 	if (!e1 || !e2)
996 		return 0;
997 
998 	{
999 		char *s1, *s2;
1000 		s1 = ecommunity_ecom2str(e1, ECOMMUNITY_FORMAT_DISPLAY, 0);
1001 		s2 = ecommunity_ecom2str(e2, ECOMMUNITY_FORMAT_DISPLAY, 0);
1002 		vnc_zlog_debug_verbose("%s: e1[%s], e2[%s]", __func__, s1, s2);
1003 		XFREE(MTYPE_ECOMMUNITY_STR, s1);
1004 		XFREE(MTYPE_ECOMMUNITY_STR, s2);
1005 	}
1006 
1007 	for (i = 0; i < e1->size; ++i) {
1008 		for (j = 0; j < e2->size; ++j) {
1009 			if (!memcmp(e1->val + (i * ECOMMUNITY_SIZE),
1010 				    e2->val + (j * ECOMMUNITY_SIZE),
1011 				    ECOMMUNITY_SIZE)) {
1012 
1013 				return 1;
1014 			}
1015 		}
1016 	}
1017 	return 0;
1018 }
1019 
rfapiEcommunityGetLNI(struct ecommunity * ecom,uint32_t * lni)1020 int rfapiEcommunityGetLNI(struct ecommunity *ecom, uint32_t *lni)
1021 {
1022 	if (ecom) {
1023 		int i;
1024 		for (i = 0; i < ecom->size; ++i) {
1025 			uint8_t *p = ecom->val + (i * ECOMMUNITY_SIZE);
1026 
1027 			if ((*(p + 0) == 0x00) && (*(p + 1) == 0x02)) {
1028 
1029 				*lni = (*(p + 5) << 16) | (*(p + 6) << 8)
1030 				       | (*(p + 7));
1031 				return 0;
1032 			}
1033 		}
1034 	}
1035 	return ENOENT;
1036 }
1037 
rfapiEcommunityGetEthernetTag(struct ecommunity * ecom,uint16_t * tag_id)1038 int rfapiEcommunityGetEthernetTag(struct ecommunity *ecom, uint16_t *tag_id)
1039 {
1040 	struct bgp *bgp = bgp_get_default();
1041 	*tag_id = 0; /* default to untagged */
1042 	if (ecom) {
1043 		int i;
1044 		for (i = 0; i < ecom->size; ++i) {
1045 			as_t as = 0;
1046 			int encode = 0;
1047 			const uint8_t *p = ecom->val + (i * ECOMMUNITY_SIZE);
1048 
1049 			/* High-order octet of type. */
1050 			encode = *p++;
1051 
1052 			if (*p++ == ECOMMUNITY_ROUTE_TARGET) {
1053 				if (encode == ECOMMUNITY_ENCODE_AS4) {
1054 					p = ptr_get_be32(p, &as);
1055 				} else if (encode == ECOMMUNITY_ENCODE_AS) {
1056 					as = (*p++ << 8);
1057 					as |= (*p++);
1058 					p += 2; /* skip next two, tag/vid
1059 						   always in lowest bytes */
1060 				}
1061 				if (as == bgp->as) {
1062 					*tag_id = *p++ << 8;
1063 					*tag_id |= (*p++);
1064 					return 0;
1065 				}
1066 			}
1067 		}
1068 	}
1069 	return ENOENT;
1070 }
1071 
rfapiVpnBiNhEqualsPt(struct bgp_path_info * bpi,struct rfapi_ip_addr * hpt)1072 static int rfapiVpnBiNhEqualsPt(struct bgp_path_info *bpi,
1073 				struct rfapi_ip_addr *hpt)
1074 {
1075 	uint8_t family;
1076 
1077 	if (!hpt || !bpi)
1078 		return 0;
1079 
1080 	family = BGP_MP_NEXTHOP_FAMILY(bpi->attr->mp_nexthop_len);
1081 
1082 	if (hpt->addr_family != family)
1083 		return 0;
1084 
1085 	switch (family) {
1086 	case AF_INET:
1087 		if (bpi->attr->mp_nexthop_global_in.s_addr
1088 		    != hpt->addr.v4.s_addr)
1089 			return 0;
1090 		break;
1091 
1092 	case AF_INET6:
1093 		if (IPV6_ADDR_CMP(&bpi->attr->mp_nexthop_global, &hpt->addr.v6))
1094 			return 0;
1095 		break;
1096 
1097 	default:
1098 		return 0;
1099 	}
1100 
1101 	return 1;
1102 }
1103 
1104 
1105 /*
1106  * Compare 2 VPN BIs. Return true if they have the same VN and UN addresses
1107  */
rfapiVpnBiSamePtUn(struct bgp_path_info * bpi1,struct bgp_path_info * bpi2)1108 static int rfapiVpnBiSamePtUn(struct bgp_path_info *bpi1,
1109 			      struct bgp_path_info *bpi2)
1110 {
1111 	struct prefix pfx_un1;
1112 	struct prefix pfx_un2;
1113 
1114 	if (!bpi1 || !bpi2)
1115 		return 0;
1116 
1117 	/*
1118 	 * VN address comparisons
1119 	 */
1120 
1121 	if (BGP_MP_NEXTHOP_FAMILY(bpi1->attr->mp_nexthop_len)
1122 	    != BGP_MP_NEXTHOP_FAMILY(bpi2->attr->mp_nexthop_len)) {
1123 		return 0;
1124 	}
1125 
1126 	switch (BGP_MP_NEXTHOP_FAMILY(bpi1->attr->mp_nexthop_len)) {
1127 	case AF_INET:
1128 		if (bpi1->attr->mp_nexthop_global_in.s_addr
1129 		    != bpi2->attr->mp_nexthop_global_in.s_addr)
1130 			return 0;
1131 		break;
1132 
1133 	case AF_INET6:
1134 		if (IPV6_ADDR_CMP(&bpi1->attr->mp_nexthop_global,
1135 				  &bpi2->attr->mp_nexthop_global))
1136 			return 0;
1137 		break;
1138 
1139 	default:
1140 		return 0;
1141 	}
1142 
1143 	memset(&pfx_un1, 0, sizeof(pfx_un1));
1144 	memset(&pfx_un2, 0, sizeof(pfx_un2));
1145 
1146 	/*
1147 	 * UN address comparisons
1148 	 */
1149 	if (rfapiGetVncTunnelUnAddr(bpi1->attr, &pfx_un1)) {
1150 		if (bpi1->extra) {
1151 			pfx_un1.family = bpi1->extra->vnc.import.un_family;
1152 			switch (bpi1->extra->vnc.import.un_family) {
1153 			case AF_INET:
1154 				pfx_un1.u.prefix4 =
1155 					bpi1->extra->vnc.import.un.addr4;
1156 				break;
1157 			case AF_INET6:
1158 				pfx_un1.u.prefix6 =
1159 					bpi1->extra->vnc.import.un.addr6;
1160 				break;
1161 			default:
1162 				pfx_un1.family = 0;
1163 				break;
1164 			}
1165 		}
1166 	}
1167 
1168 	if (rfapiGetVncTunnelUnAddr(bpi2->attr, &pfx_un2)) {
1169 		if (bpi2->extra) {
1170 			pfx_un2.family = bpi2->extra->vnc.import.un_family;
1171 			switch (bpi2->extra->vnc.import.un_family) {
1172 			case AF_INET:
1173 				pfx_un2.u.prefix4 =
1174 					bpi2->extra->vnc.import.un.addr4;
1175 				break;
1176 			case AF_INET6:
1177 				pfx_un2.u.prefix6 =
1178 					bpi2->extra->vnc.import.un.addr6;
1179 				break;
1180 			default:
1181 				pfx_un2.family = 0;
1182 				break;
1183 			}
1184 		}
1185 	}
1186 
1187 	if (pfx_un1.family == 0 || pfx_un2.family == 0)
1188 		return 0;
1189 
1190 	if (pfx_un1.family != pfx_un2.family)
1191 		return 0;
1192 
1193 	switch (pfx_un1.family) {
1194 	case AF_INET:
1195 		if (!IPV4_ADDR_SAME(&pfx_un1.u.prefix4, &pfx_un2.u.prefix4))
1196 			return 0;
1197 		break;
1198 	case AF_INET6:
1199 		if (!IPV6_ADDR_SAME(&pfx_un1.u.prefix6, &pfx_un2.u.prefix6))
1200 			return 0;
1201 		break;
1202 	}
1203 
1204 
1205 	return 1;
1206 }
1207 
rfapiRfpCost(struct attr * attr)1208 uint8_t rfapiRfpCost(struct attr *attr)
1209 {
1210 	if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF)) {
1211 		if (attr->local_pref > 255) {
1212 			return 0;
1213 		}
1214 		return 255 - attr->local_pref;
1215 	}
1216 
1217 	return 255;
1218 }
1219 
1220 /*------------------------------------------
1221  * rfapi_extract_l2o
1222  *
1223  * Find Layer 2 options in an option chain
1224  *
1225  * input:
1226  *	pHop		option chain
1227  *
1228  * output:
1229  *	l2o		layer 2 options extracted
1230  *
1231  * return value:
1232  *	0		OK
1233  *	1		no options found
1234  *
1235  --------------------------------------------*/
rfapi_extract_l2o(struct bgp_tea_options * pHop,struct rfapi_l2address_option * l2o)1236 int rfapi_extract_l2o(
1237 	struct bgp_tea_options *pHop,       /* chain of options */
1238 	struct rfapi_l2address_option *l2o) /* return extracted value */
1239 {
1240 	struct bgp_tea_options *p;
1241 
1242 	for (p = pHop; p; p = p->next) {
1243 		if ((p->type == RFAPI_VN_OPTION_TYPE_L2ADDR)
1244 		    && (p->length >= 8)) {
1245 
1246 			char *v = p->value;
1247 
1248 			memcpy(&l2o->macaddr, v, 6);
1249 
1250 			l2o->label = ((v[6] << 12) & 0xff000)
1251 				     + ((v[7] << 4) & 0xff0)
1252 				     + ((v[8] >> 4) & 0xf);
1253 
1254 			l2o->local_nve_id = (uint8_t)v[10];
1255 
1256 			l2o->logical_net_id =
1257 				(v[11] << 16) + (v[12] << 8) + (v[13] << 0);
1258 
1259 			return 0;
1260 		}
1261 	}
1262 	return 1;
1263 }
1264 
1265 static struct rfapi_next_hop_entry *
rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix * rprefix,struct bgp_path_info * bpi,uint32_t lifetime,struct agg_node * rn)1266 rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix *rprefix,
1267 			    struct bgp_path_info *bpi, /* route to encode */
1268 			    uint32_t lifetime,	 /* use this in nhe */
1269 			    struct agg_node *rn)       /* req for L2 eth addr */
1270 {
1271 	struct rfapi_next_hop_entry *new;
1272 	int have_vnc_tunnel_un = 0;
1273 	const struct prefix *p = agg_node_get_prefix(rn);
1274 
1275 #ifdef DEBUG_ENCAP_MONITOR
1276 	vnc_zlog_debug_verbose("%s: entry, bpi %p, rn %p", __func__, bpi, rn);
1277 #endif
1278 
1279 	new = XCALLOC(MTYPE_RFAPI_NEXTHOP, sizeof(struct rfapi_next_hop_entry));
1280 	assert(new);
1281 
1282 	new->prefix = *rprefix;
1283 
1284 	if (bpi->extra
1285 	    && decode_rd_type(bpi->extra->vnc.import.rd.val)
1286 		       == RD_TYPE_VNC_ETH) {
1287 		/* ethernet */
1288 
1289 		struct rfapi_vn_option *vo;
1290 
1291 		vo = XCALLOC(MTYPE_RFAPI_VN_OPTION,
1292 			     sizeof(struct rfapi_vn_option));
1293 		assert(vo);
1294 
1295 		vo->type = RFAPI_VN_OPTION_TYPE_L2ADDR;
1296 
1297 		memcpy(&vo->v.l2addr.macaddr, &p->u.prefix_eth.octet, ETH_ALEN);
1298 		/* only low 3 bytes of this are significant */
1299 		(void)rfapiEcommunityGetLNI(bpi->attr->ecommunity,
1300 					    &vo->v.l2addr.logical_net_id);
1301 		(void)rfapiEcommunityGetEthernetTag(bpi->attr->ecommunity,
1302 						    &vo->v.l2addr.tag_id);
1303 
1304 		/* local_nve_id comes from lower byte of RD type */
1305 		vo->v.l2addr.local_nve_id = bpi->extra->vnc.import.rd.val[1];
1306 
1307 		/* label comes from MP_REACH_NLRI label */
1308 		vo->v.l2addr.label = decode_label(&bpi->extra->label[0]);
1309 
1310 		new->vn_options = vo;
1311 
1312 		/*
1313 		 * If there is an auxiliary prefix (i.e., host IP address),
1314 		 * use it as the nexthop prefix instead of the query prefix
1315 		 */
1316 		if (bpi->extra->vnc.import.aux_prefix.family) {
1317 			rfapiQprefix2Rprefix(&bpi->extra->vnc.import.aux_prefix,
1318 					     &new->prefix);
1319 		}
1320 	}
1321 
1322 	bgp_encap_types tun_type = BGP_ENCAP_TYPE_MPLS; /*Default*/
1323 	new->prefix.cost = rfapiRfpCost(bpi->attr);
1324 
1325 	struct bgp_attr_encap_subtlv *pEncap;
1326 
1327 	switch (BGP_MP_NEXTHOP_FAMILY(bpi->attr->mp_nexthop_len)) {
1328 	case AF_INET:
1329 		new->vn_address.addr_family = AF_INET;
1330 		new->vn_address.addr.v4 = bpi->attr->mp_nexthop_global_in;
1331 		break;
1332 
1333 	case AF_INET6:
1334 		new->vn_address.addr_family = AF_INET6;
1335 		new->vn_address.addr.v6 = bpi->attr->mp_nexthop_global;
1336 		break;
1337 
1338 	default:
1339 		zlog_warn("%s: invalid vpn nexthop length: %d", __func__,
1340 			  bpi->attr->mp_nexthop_len);
1341 		rfapi_free_next_hop_list(new);
1342 		return NULL;
1343 	}
1344 
1345 	for (pEncap = bpi->attr->vnc_subtlvs; pEncap; pEncap = pEncap->next) {
1346 		switch (pEncap->type) {
1347 		case BGP_VNC_SUBTLV_TYPE_LIFETIME:
1348 			/* use configured lifetime, not attr lifetime */
1349 			break;
1350 
1351 		default:
1352 			zlog_warn("%s: unknown VNC option type %d", __func__,
1353 				  pEncap->type);
1354 
1355 			break;
1356 		}
1357 	}
1358 
1359 	bgp_attr_extcom_tunnel_type(bpi->attr, &tun_type);
1360 	if (tun_type == BGP_ENCAP_TYPE_MPLS) {
1361 		struct prefix p;
1362 		/* MPLS carries UN address in next hop */
1363 		rfapiNexthop2Prefix(bpi->attr, &p);
1364 		if (p.family != 0) {
1365 			rfapiQprefix2Raddr(&p, &new->un_address);
1366 			have_vnc_tunnel_un = 1;
1367 		}
1368 	}
1369 
1370 	for (pEncap = bpi->attr->encap_subtlvs; pEncap; pEncap = pEncap->next) {
1371 		switch (pEncap->type) {
1372 		case BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT:
1373 			/*
1374 			 * Overrides ENCAP UN address, if any
1375 			 */
1376 			switch (pEncap->length) {
1377 
1378 			case 8:
1379 				new->un_address.addr_family = AF_INET;
1380 				memcpy(&new->un_address.addr.v4, pEncap->value,
1381 				       4);
1382 				have_vnc_tunnel_un = 1;
1383 				break;
1384 
1385 			case 20:
1386 				new->un_address.addr_family = AF_INET6;
1387 				memcpy(&new->un_address.addr.v6, pEncap->value,
1388 				       16);
1389 				have_vnc_tunnel_un = 1;
1390 				break;
1391 
1392 			default:
1393 				zlog_warn(
1394 					"%s: invalid tunnel subtlv UN addr length (%d) for bpi %p",
1395 					__func__, pEncap->length, bpi);
1396 			}
1397 			break;
1398 
1399 		default:
1400 			zlog_warn("%s: unknown Encap Attribute option type %d",
1401 				  __func__, pEncap->type);
1402 			break;
1403 		}
1404 	}
1405 
1406 	new->un_options = rfapi_encap_tlv_to_un_option(bpi->attr);
1407 
1408 #ifdef DEBUG_ENCAP_MONITOR
1409 	vnc_zlog_debug_verbose("%s: line %d: have_vnc_tunnel_un=%d", __func__,
1410 			       __LINE__, have_vnc_tunnel_un);
1411 #endif
1412 
1413 	if (!have_vnc_tunnel_un && bpi->extra) {
1414 		/*
1415 		 * use cached UN address from ENCAP route
1416 		 */
1417 		new->un_address.addr_family = bpi->extra->vnc.import.un_family;
1418 		switch (new->un_address.addr_family) {
1419 		case AF_INET:
1420 			new->un_address.addr.v4 =
1421 				bpi->extra->vnc.import.un.addr4;
1422 			break;
1423 		case AF_INET6:
1424 			new->un_address.addr.v6 =
1425 				bpi->extra->vnc.import.un.addr6;
1426 			break;
1427 		default:
1428 			zlog_warn("%s: invalid UN addr family (%d) for bpi %p",
1429 				  __func__, new->un_address.addr_family, bpi);
1430 			rfapi_free_next_hop_list(new);
1431 			return NULL;
1432 		}
1433 	}
1434 
1435 	new->lifetime = lifetime;
1436 	return new;
1437 }
1438 
rfapiHasNonRemovedRoutes(struct agg_node * rn)1439 int rfapiHasNonRemovedRoutes(struct agg_node *rn)
1440 {
1441 	struct bgp_path_info *bpi;
1442 
1443 	for (bpi = rn->info; bpi; bpi = bpi->next) {
1444 		struct prefix pfx;
1445 
1446 		if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
1447 		    && (bpi->extra && !rfapiGetUnAddrOfVpnBi(bpi, &pfx))) {
1448 
1449 			return 1;
1450 		}
1451 	}
1452 	return 0;
1453 }
1454 
1455 #ifdef DEBUG_IT_NODES
1456 /*
1457  * DEBUG FUNCTION
1458  */
rfapiDumpNode(struct agg_node * rn)1459 void rfapiDumpNode(struct agg_node *rn)
1460 {
1461 	struct bgp_path_info *bpi;
1462 
1463 	vnc_zlog_debug_verbose("%s: rn=%p", __func__, rn);
1464 	for (bpi = rn->info; bpi; bpi = bpi->next) {
1465 		struct prefix pfx;
1466 		int ctrc = rfapiGetUnAddrOfVpnBi(bpi, &pfx);
1467 		int nr;
1468 
1469 		if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
1470 		    && (bpi->extra && !ctrc)) {
1471 
1472 			nr = 1;
1473 		} else {
1474 			nr = 0;
1475 		}
1476 
1477 		vnc_zlog_debug_verbose(
1478 			"  bpi=%p, nr=%d, flags=0x%x, extra=%p, ctrc=%d", bpi,
1479 			nr, bpi->flags, bpi->extra, ctrc);
1480 	}
1481 }
1482 #endif
1483 
rfapiNhlAddNodeRoutes(struct agg_node * rn,struct rfapi_ip_prefix * rprefix,uint32_t lifetime,int removed,struct rfapi_next_hop_entry ** head,struct rfapi_next_hop_entry ** tail,struct rfapi_ip_addr * exclude_vnaddr,struct agg_node * rfd_rib_node,struct prefix * pfx_target_original)1484 static int rfapiNhlAddNodeRoutes(
1485 	struct agg_node *rn,		      /* in */
1486 	struct rfapi_ip_prefix *rprefix,      /* in */
1487 	uint32_t lifetime,		      /* in */
1488 	int removed,			      /* in */
1489 	struct rfapi_next_hop_entry **head,   /* in/out */
1490 	struct rfapi_next_hop_entry **tail,   /* in/out */
1491 	struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
1492 	struct agg_node *rfd_rib_node,	/* preload this NVE rib node */
1493 	struct prefix *pfx_target_original)   /* query target */
1494 {
1495 	struct bgp_path_info *bpi;
1496 	struct rfapi_next_hop_entry *new;
1497 	struct prefix pfx_un;
1498 	struct skiplist *seen_nexthops;
1499 	int count = 0;
1500 	const struct prefix *p = agg_node_get_prefix(rn);
1501 	int is_l2 = (p->family == AF_ETHERNET);
1502 
1503 	if (rfd_rib_node) {
1504 		struct agg_table *atable = agg_get_table(rfd_rib_node);
1505 		struct rfapi_descriptor *rfd;
1506 
1507 		if (atable) {
1508 			rfd = agg_get_table_info(atable);
1509 
1510 			if (rfapiRibFTDFilterRecentPrefix(rfd, rn,
1511 							  pfx_target_original))
1512 				return 0;
1513 		}
1514 	}
1515 
1516 	seen_nexthops =
1517 		skiplist_new(0, vnc_prefix_cmp, prefix_free_lists);
1518 
1519 	for (bpi = rn->info; bpi; bpi = bpi->next) {
1520 
1521 		struct prefix pfx_vn;
1522 		struct prefix *newpfx;
1523 
1524 		if (removed && !CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
1525 #ifdef DEBUG_RETURNED_NHL
1526 			vnc_zlog_debug_verbose(
1527 				"%s: want holddown, this route not holddown, skip",
1528 				__func__);
1529 #endif
1530 			continue;
1531 		}
1532 		if (!removed && CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
1533 			continue;
1534 		}
1535 
1536 		if (!bpi->extra) {
1537 			continue;
1538 		}
1539 
1540 		/*
1541 		 * Check for excluded VN address
1542 		 */
1543 		if (rfapiVpnBiNhEqualsPt(bpi, exclude_vnaddr))
1544 			continue;
1545 
1546 		/*
1547 		 * Check for VN address (nexthop) copied already
1548 		 */
1549 		if (is_l2) {
1550 			/* L2 routes: semantic nexthop in aux_prefix; VN addr
1551 			 * ain't it */
1552 			pfx_vn = bpi->extra->vnc.import.aux_prefix;
1553 		} else {
1554 			rfapiNexthop2Prefix(bpi->attr, &pfx_vn);
1555 		}
1556 		if (!skiplist_search(seen_nexthops, &pfx_vn, NULL)) {
1557 #ifdef DEBUG_RETURNED_NHL
1558 			char buf[PREFIX_STRLEN];
1559 
1560 			prefix2str(&pfx_vn, buf, sizeof(buf));
1561 			vnc_zlog_debug_verbose(
1562 				"%s: already put VN/nexthop %s, skip", __func__,
1563 				buf);
1564 #endif
1565 			continue;
1566 		}
1567 
1568 		if (rfapiGetUnAddrOfVpnBi(bpi, &pfx_un)) {
1569 #ifdef DEBUG_ENCAP_MONITOR
1570 			vnc_zlog_debug_verbose(
1571 				"%s: failed to get UN address of this VPN bpi",
1572 				__func__);
1573 #endif
1574 			continue;
1575 		}
1576 
1577 		newpfx = prefix_new();
1578 		*newpfx = pfx_vn;
1579 		skiplist_insert(seen_nexthops, newpfx, newpfx);
1580 
1581 		new = rfapiRouteInfo2NextHopEntry(rprefix, bpi, lifetime, rn);
1582 		if (new) {
1583 			if (rfapiRibPreloadBi(rfd_rib_node, &pfx_vn, &pfx_un,
1584 					      lifetime, bpi)) {
1585 				/* duplicate filtered by RIB */
1586 				rfapi_free_next_hop_list(new);
1587 				new = NULL;
1588 			}
1589 		}
1590 
1591 		if (new) {
1592 			if (*tail) {
1593 				(*tail)->next = new;
1594 			} else {
1595 				*head = new;
1596 			}
1597 			*tail = new;
1598 			++count;
1599 		}
1600 	}
1601 
1602 	skiplist_free(seen_nexthops);
1603 
1604 	return count;
1605 }
1606 
1607 
1608 /*
1609  * Breadth-first
1610  *
1611  * omit_node is meant for the situation where we are adding a subtree
1612  * of a parent of some original requested node. The response already
1613  * contains the original requested node, and we don't want to duplicate
1614  * its routes in the list, so we skip it if the right or left node
1615  * matches (of course, we still travel down its child subtrees).
1616  */
rfapiNhlAddSubtree(struct agg_node * rn,uint32_t lifetime,struct rfapi_next_hop_entry ** head,struct rfapi_next_hop_entry ** tail,struct agg_node * omit_node,struct rfapi_ip_addr * exclude_vnaddr,struct agg_table * rfd_rib_table,struct prefix * pfx_target_original)1617 static int rfapiNhlAddSubtree(
1618 	struct agg_node *rn,		      /* in */
1619 	uint32_t lifetime,		      /* in */
1620 	struct rfapi_next_hop_entry **head,   /* in/out */
1621 	struct rfapi_next_hop_entry **tail,   /* in/out */
1622 	struct agg_node *omit_node,	   /* in */
1623 	struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
1624 	struct agg_table *rfd_rib_table,      /* preload here */
1625 	struct prefix *pfx_target_original)   /* query target */
1626 {
1627 	struct rfapi_ip_prefix rprefix;
1628 	int rcount = 0;
1629 
1630 	/* FIXME: need to find a better way here to work without sticking our
1631 	 * hands in node->link */
1632 	if (agg_node_left(rn) && agg_node_left(rn) != omit_node) {
1633 		if (agg_node_left(rn)->info) {
1634 			const struct prefix *p =
1635 				agg_node_get_prefix(agg_node_left(rn));
1636 			int count = 0;
1637 			struct agg_node *rib_rn = NULL;
1638 
1639 			rfapiQprefix2Rprefix(p, &rprefix);
1640 			if (rfd_rib_table)
1641 				rib_rn = agg_node_get(rfd_rib_table, p);
1642 
1643 			count = rfapiNhlAddNodeRoutes(
1644 				agg_node_left(rn), &rprefix, lifetime, 0, head,
1645 				tail, exclude_vnaddr, rib_rn,
1646 				pfx_target_original);
1647 			if (!count) {
1648 				count = rfapiNhlAddNodeRoutes(
1649 					agg_node_left(rn), &rprefix, lifetime,
1650 					1, head, tail, exclude_vnaddr, rib_rn,
1651 					pfx_target_original);
1652 			}
1653 			rcount += count;
1654 			if (rib_rn)
1655 				agg_unlock_node(rib_rn);
1656 		}
1657 	}
1658 
1659 	if (agg_node_right(rn) && agg_node_right(rn) != omit_node) {
1660 		if (agg_node_right(rn)->info) {
1661 			const struct prefix *p =
1662 				agg_node_get_prefix(agg_node_right(rn));
1663 			int count = 0;
1664 			struct agg_node *rib_rn = NULL;
1665 
1666 			rfapiQprefix2Rprefix(p, &rprefix);
1667 			if (rfd_rib_table)
1668 				rib_rn = agg_node_get(rfd_rib_table, p);
1669 
1670 			count = rfapiNhlAddNodeRoutes(
1671 				agg_node_right(rn), &rprefix, lifetime, 0, head,
1672 				tail, exclude_vnaddr, rib_rn,
1673 				pfx_target_original);
1674 			if (!count) {
1675 				count = rfapiNhlAddNodeRoutes(
1676 					agg_node_right(rn), &rprefix, lifetime,
1677 					1, head, tail, exclude_vnaddr, rib_rn,
1678 					pfx_target_original);
1679 			}
1680 			rcount += count;
1681 			if (rib_rn)
1682 				agg_unlock_node(rib_rn);
1683 		}
1684 	}
1685 
1686 	if (agg_node_left(rn)) {
1687 		rcount += rfapiNhlAddSubtree(
1688 			agg_node_left(rn), lifetime, head, tail, omit_node,
1689 			exclude_vnaddr, rfd_rib_table, pfx_target_original);
1690 	}
1691 	if (agg_node_right(rn)) {
1692 		rcount += rfapiNhlAddSubtree(
1693 			agg_node_right(rn), lifetime, head, tail, omit_node,
1694 			exclude_vnaddr, rfd_rib_table, pfx_target_original);
1695 	}
1696 
1697 	return rcount;
1698 }
1699 
1700 /*
1701  * Implementation of ROUTE_LIST(node) from RFAPI-Import-Event-Handling.txt
1702  *
1703  * Construct an rfapi nexthop list based on the routes attached to
1704  * the specified node.
1705  *
1706  * If there are any routes that do NOT have BGP_PATH_REMOVED set,
1707  * return those only. If there are ONLY routes with BGP_PATH_REMOVED,
1708  * then return those, and also include all the non-removed routes from the
1709  * next less-specific node (i.e., this node's parent) at the end.
1710  */
rfapiRouteNode2NextHopList(struct agg_node * rn,uint32_t lifetime,struct rfapi_ip_addr * exclude_vnaddr,struct agg_table * rfd_rib_table,struct prefix * pfx_target_original)1711 struct rfapi_next_hop_entry *rfapiRouteNode2NextHopList(
1712 	struct agg_node *rn, uint32_t lifetime, /* put into nexthop entries */
1713 	struct rfapi_ip_addr *exclude_vnaddr,   /* omit routes to same NVE */
1714 	struct agg_table *rfd_rib_table,	/* preload here */
1715 	struct prefix *pfx_target_original)     /* query target */
1716 {
1717 	struct rfapi_ip_prefix rprefix;
1718 	struct rfapi_next_hop_entry *answer = NULL;
1719 	struct rfapi_next_hop_entry *last = NULL;
1720 	struct agg_node *parent;
1721 	const struct prefix *p = agg_node_get_prefix(rn);
1722 	int count = 0;
1723 	struct agg_node *rib_rn;
1724 
1725 #ifdef DEBUG_RETURNED_NHL
1726 	vnc_zlog_debug_verbose("%s: called with node pfx=%rRN", __func__, rn);
1727 	rfapiDebugBacktrace();
1728 #endif
1729 
1730 	rfapiQprefix2Rprefix(p, &rprefix);
1731 
1732 	rib_rn = rfd_rib_table ? agg_node_get(rfd_rib_table, p) : NULL;
1733 
1734 	/*
1735 	 * Add non-withdrawn routes at this node
1736 	 */
1737 	count = rfapiNhlAddNodeRoutes(rn, &rprefix, lifetime, 0, &answer, &last,
1738 				      exclude_vnaddr, rib_rn,
1739 				      pfx_target_original);
1740 
1741 	/*
1742 	 * If the list has at least one entry, it's finished
1743 	 */
1744 	if (count) {
1745 		count += rfapiNhlAddSubtree(rn, lifetime, &answer, &last, NULL,
1746 					    exclude_vnaddr, rfd_rib_table,
1747 					    pfx_target_original);
1748 		vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__,
1749 				       count, answer);
1750 #ifdef DEBUG_RETURNED_NHL
1751 		rfapiPrintNhl(NULL, answer);
1752 #endif
1753 		if (rib_rn)
1754 			agg_unlock_node(rib_rn);
1755 		return answer;
1756 	}
1757 
1758 	/*
1759 	 * Add withdrawn routes at this node
1760 	 */
1761 	count = rfapiNhlAddNodeRoutes(rn, &rprefix, lifetime, 1, &answer, &last,
1762 				      exclude_vnaddr, rib_rn,
1763 				      pfx_target_original);
1764 	if (rib_rn)
1765 		agg_unlock_node(rib_rn);
1766 
1767 	// rfapiPrintNhl(NULL, answer);
1768 
1769 	/*
1770 	 * walk up the tree until we find a node with non-deleted
1771 	 * routes, then add them
1772 	 */
1773 	for (parent = agg_node_parent(rn); parent;
1774 	     parent = agg_node_parent(parent)) {
1775 		if (rfapiHasNonRemovedRoutes(parent)) {
1776 			break;
1777 		}
1778 	}
1779 
1780 	/*
1781 	 * Add non-withdrawn routes from less-specific prefix
1782 	 */
1783 	if (parent) {
1784 		const struct prefix *p = agg_node_get_prefix(parent);
1785 
1786 		rib_rn = rfd_rib_table ? agg_node_get(rfd_rib_table, p) : NULL;
1787 		rfapiQprefix2Rprefix(p, &rprefix);
1788 		count += rfapiNhlAddNodeRoutes(parent, &rprefix, lifetime, 0,
1789 					       &answer, &last, exclude_vnaddr,
1790 					       rib_rn, pfx_target_original);
1791 		count += rfapiNhlAddSubtree(parent, lifetime, &answer, &last,
1792 					    rn, exclude_vnaddr, rfd_rib_table,
1793 					    pfx_target_original);
1794 		if (rib_rn)
1795 			agg_unlock_node(rib_rn);
1796 	} else {
1797 		/*
1798 		 * There is no parent with non-removed routes. Still need to
1799 		 * add subtree of original node if it contributed routes to the
1800 		 * answer.
1801 		 */
1802 		if (count)
1803 			count += rfapiNhlAddSubtree(rn, lifetime, &answer,
1804 						    &last, rn, exclude_vnaddr,
1805 						    rfd_rib_table,
1806 						    pfx_target_original);
1807 	}
1808 
1809 	vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__, count,
1810 			       answer);
1811 #ifdef DEBUG_RETURNED_NHL
1812 	rfapiPrintNhl(NULL, answer);
1813 #endif
1814 	return answer;
1815 }
1816 
1817 /*
1818  * Construct nexthop list of all routes in table
1819  */
rfapiRouteTable2NextHopList(struct agg_table * rt,uint32_t lifetime,struct rfapi_ip_addr * exclude_vnaddr,struct agg_table * rfd_rib_table,struct prefix * pfx_target_original)1820 struct rfapi_next_hop_entry *rfapiRouteTable2NextHopList(
1821 	struct agg_table *rt, uint32_t lifetime, /* put into nexthop entries */
1822 	struct rfapi_ip_addr *exclude_vnaddr,    /* omit routes to same NVE */
1823 	struct agg_table *rfd_rib_table,    /* preload this NVE rib table */
1824 	struct prefix *pfx_target_original) /* query target */
1825 {
1826 	struct agg_node *rn;
1827 	struct rfapi_next_hop_entry *biglist = NULL;
1828 	struct rfapi_next_hop_entry *nhl;
1829 	struct rfapi_next_hop_entry *tail = NULL;
1830 	int count = 0;
1831 
1832 	for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
1833 
1834 		nhl = rfapiRouteNode2NextHopList(rn, lifetime, exclude_vnaddr,
1835 						 rfd_rib_table,
1836 						 pfx_target_original);
1837 		if (!tail) {
1838 			tail = biglist = nhl;
1839 			if (tail)
1840 				count = 1;
1841 		} else {
1842 			tail->next = nhl;
1843 		}
1844 		if (tail) {
1845 			while (tail->next) {
1846 				++count;
1847 				tail = tail->next;
1848 			}
1849 		}
1850 	}
1851 
1852 	vnc_zlog_debug_verbose("%s: returning %d routes", __func__, count);
1853 	return biglist;
1854 }
1855 
rfapiEthRouteNode2NextHopList(struct agg_node * rn,struct rfapi_ip_prefix * rprefix,uint32_t lifetime,struct rfapi_ip_addr * exclude_vnaddr,struct agg_table * rfd_rib_table,struct prefix * pfx_target_original)1856 struct rfapi_next_hop_entry *rfapiEthRouteNode2NextHopList(
1857 	struct agg_node *rn, struct rfapi_ip_prefix *rprefix,
1858 	uint32_t lifetime,		      /* put into nexthop entries */
1859 	struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
1860 	struct agg_table *rfd_rib_table,      /* preload NVE rib table */
1861 	struct prefix *pfx_target_original)   /* query target */
1862 {
1863 	int count = 0;
1864 	struct rfapi_next_hop_entry *answer = NULL;
1865 	struct rfapi_next_hop_entry *last = NULL;
1866 	struct agg_node *rib_rn;
1867 
1868 	rib_rn = rfd_rib_table
1869 			 ? agg_node_get(rfd_rib_table, agg_node_get_prefix(rn))
1870 			 : NULL;
1871 
1872 	count = rfapiNhlAddNodeRoutes(rn, rprefix, lifetime, 0, &answer, &last,
1873 				      NULL, rib_rn, pfx_target_original);
1874 
1875 #ifdef DEBUG_ENCAP_MONITOR
1876 	vnc_zlog_debug_verbose("%s: node %p: %d non-holddown routes", __func__,
1877 			       rn, count);
1878 #endif
1879 
1880 	if (!count) {
1881 		count = rfapiNhlAddNodeRoutes(rn, rprefix, lifetime, 1, &answer,
1882 					      &last, exclude_vnaddr, rib_rn,
1883 					      pfx_target_original);
1884 		vnc_zlog_debug_verbose("%s: node %p: %d holddown routes",
1885 				       __func__, rn, count);
1886 	}
1887 
1888 	if (rib_rn)
1889 		agg_unlock_node(rib_rn);
1890 
1891 #ifdef DEBUG_RETURNED_NHL
1892 	rfapiPrintNhl(NULL, answer);
1893 #endif
1894 
1895 	return answer;
1896 }
1897 
1898 
1899 /*
1900  * Construct nexthop list of all routes in table
1901  */
rfapiEthRouteTable2NextHopList(uint32_t logical_net_id,struct rfapi_ip_prefix * rprefix,uint32_t lifetime,struct rfapi_ip_addr * exclude_vnaddr,struct agg_table * rfd_rib_table,struct prefix * pfx_target_original)1902 struct rfapi_next_hop_entry *rfapiEthRouteTable2NextHopList(
1903 	uint32_t logical_net_id, struct rfapi_ip_prefix *rprefix,
1904 	uint32_t lifetime,		      /* put into nexthop entries */
1905 	struct rfapi_ip_addr *exclude_vnaddr, /* omit routes to same NVE */
1906 	struct agg_table *rfd_rib_table,      /* preload NVE rib node */
1907 	struct prefix *pfx_target_original)   /* query target */
1908 {
1909 	struct rfapi_import_table *it;
1910 	struct bgp *bgp = bgp_get_default();
1911 	struct agg_table *rt;
1912 	struct agg_node *rn;
1913 	struct rfapi_next_hop_entry *biglist = NULL;
1914 	struct rfapi_next_hop_entry *nhl;
1915 	struct rfapi_next_hop_entry *tail = NULL;
1916 	int count = 0;
1917 
1918 
1919 	it = rfapiMacImportTableGet(bgp, logical_net_id);
1920 	rt = it->imported_vpn[AFI_L2VPN];
1921 
1922 	for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
1923 
1924 		nhl = rfapiEthRouteNode2NextHopList(
1925 			rn, rprefix, lifetime, exclude_vnaddr, rfd_rib_table,
1926 			pfx_target_original);
1927 		if (!tail) {
1928 			tail = biglist = nhl;
1929 			if (tail)
1930 				count = 1;
1931 		} else {
1932 			tail->next = nhl;
1933 		}
1934 		if (tail) {
1935 			while (tail->next) {
1936 				++count;
1937 				tail = tail->next;
1938 			}
1939 		}
1940 	}
1941 
1942 	vnc_zlog_debug_verbose("%s: returning %d routes", __func__, count);
1943 	return biglist;
1944 }
1945 
1946 /*
1947  * Insert a new bpi to the imported route table node,
1948  * keeping the list of BPIs sorted best route first
1949  */
rfapiBgpInfoAttachSorted(struct agg_node * rn,struct bgp_path_info * info_new,afi_t afi,safi_t safi)1950 static void rfapiBgpInfoAttachSorted(struct agg_node *rn,
1951 				     struct bgp_path_info *info_new, afi_t afi,
1952 				     safi_t safi)
1953 {
1954 	struct bgp *bgp;
1955 	struct bgp_path_info *prev;
1956 	struct bgp_path_info *next;
1957 	char pfx_buf[PREFIX2STR_BUFFER];
1958 
1959 
1960 	bgp = bgp_get_default(); /* assume 1 instance for now */
1961 
1962 	if (VNC_DEBUG(IMPORT_BI_ATTACH)) {
1963 		vnc_zlog_debug_verbose("%s: info_new->peer=%p", __func__,
1964 				       info_new->peer);
1965 		vnc_zlog_debug_verbose("%s: info_new->peer->su_remote=%p",
1966 				       __func__, info_new->peer->su_remote);
1967 	}
1968 
1969 	for (prev = NULL, next = rn->info; next;
1970 	     prev = next, next = next->next) {
1971 		enum bgp_path_selection_reason reason;
1972 
1973 		if (!bgp
1974 		    || (!CHECK_FLAG(info_new->flags, BGP_PATH_REMOVED)
1975 			&& CHECK_FLAG(next->flags, BGP_PATH_REMOVED))
1976 		    || bgp_path_info_cmp_compatible(bgp, info_new, next,
1977 						    pfx_buf, afi, safi,
1978 						    &reason)
1979 			       == -1) { /* -1 if 1st is better */
1980 			break;
1981 		}
1982 	}
1983 	vnc_zlog_debug_verbose("%s: prev=%p, next=%p", __func__, prev, next);
1984 	if (prev) {
1985 		prev->next = info_new;
1986 	} else {
1987 		rn->info = info_new;
1988 	}
1989 	info_new->prev = prev;
1990 	info_new->next = next;
1991 	if (next)
1992 		next->prev = info_new;
1993 	bgp_attr_intern(info_new->attr);
1994 }
1995 
rfapiBgpInfoDetach(struct agg_node * rn,struct bgp_path_info * bpi)1996 static void rfapiBgpInfoDetach(struct agg_node *rn, struct bgp_path_info *bpi)
1997 {
1998 	/*
1999 	 * Remove the route (doubly-linked)
2000 	 */
2001 	//  bgp_attr_unintern (&bpi->attr);
2002 	if (bpi->next)
2003 		bpi->next->prev = bpi->prev;
2004 	if (bpi->prev)
2005 		bpi->prev->next = bpi->next;
2006 	else
2007 		rn->info = bpi->next;
2008 }
2009 
2010 /*
2011  * For L3-indexed import tables
2012  */
rfapi_bi_peer_rd_cmp(const void * b1,const void * b2)2013 static int rfapi_bi_peer_rd_cmp(const void *b1, const void *b2)
2014 {
2015 	const struct bgp_path_info *bpi1 = b1;
2016 	const struct bgp_path_info *bpi2 = b2;
2017 
2018 	/*
2019 	 * Compare peers
2020 	 */
2021 	if (bpi1->peer < bpi2->peer)
2022 		return -1;
2023 	if (bpi1->peer > bpi2->peer)
2024 		return 1;
2025 
2026 	/*
2027 	 * compare RDs
2028 	 */
2029 	return vnc_prefix_cmp(
2030 		(const struct prefix *)&bpi1->extra->vnc.import.rd,
2031 		(const struct prefix *)&bpi2->extra->vnc.import.rd);
2032 }
2033 
2034 /*
2035  * For L2-indexed import tables
2036  * The BPIs in these tables should ALWAYS have an aux_prefix set because
2037  * they arrive via IPv4 or IPv6 advertisements.
2038  */
rfapi_bi_peer_rd_aux_cmp(const void * b1,const void * b2)2039 static int rfapi_bi_peer_rd_aux_cmp(const void *b1, const void *b2)
2040 {
2041 	const struct bgp_path_info *bpi1 = b1;
2042 	const struct bgp_path_info *bpi2 = b2;
2043 	int rc;
2044 
2045 	/*
2046 	 * Compare peers
2047 	 */
2048 	if (bpi1->peer < bpi2->peer)
2049 		return -1;
2050 	if (bpi1->peer > bpi2->peer)
2051 		return 1;
2052 
2053 	/*
2054 	 * compare RDs
2055 	 */
2056 	rc = vnc_prefix_cmp((struct prefix *)&bpi1->extra->vnc.import.rd,
2057 			    (struct prefix *)&bpi2->extra->vnc.import.rd);
2058 	if (rc) {
2059 		return rc;
2060 	}
2061 
2062 	/*
2063 	 * L2 import tables can have multiple entries with the
2064 	 * same MAC address, same RD, but different L3 addresses.
2065 	 *
2066 	 * Use presence of aux_prefix with AF=ethernet and prefixlen=1
2067 	 * as magic value to signify explicit wildcarding of the aux_prefix.
2068 	 * This magic value will not appear in bona fide bpi entries in
2069 	 * the import table, but is allowed in the "fake" bpi used to
2070 	 * probe the table when searching. (We have to test both b1 and b2
2071 	 * because there is no guarantee of the order the test key and
2072 	 * the real key will be passed)
2073 	 */
2074 	if ((bpi1->extra->vnc.import.aux_prefix.family == AF_ETHERNET
2075 	     && (bpi1->extra->vnc.import.aux_prefix.prefixlen == 1))
2076 	    || (bpi2->extra->vnc.import.aux_prefix.family == AF_ETHERNET
2077 		&& (bpi2->extra->vnc.import.aux_prefix.prefixlen == 1))) {
2078 
2079 		/*
2080 		 * wildcard aux address specified
2081 		 */
2082 		return 0;
2083 	}
2084 
2085 	return vnc_prefix_cmp(&bpi1->extra->vnc.import.aux_prefix,
2086 			      &bpi2->extra->vnc.import.aux_prefix);
2087 }
2088 
2089 
2090 /*
2091  * Index on RD and Peer
2092  */
rfapiItBiIndexAdd(struct agg_node * rn,struct bgp_path_info * bpi)2093 static void rfapiItBiIndexAdd(struct agg_node *rn, /* Import table VPN node */
2094 			      struct bgp_path_info *bpi) /* new BPI */
2095 {
2096 	struct skiplist *sl;
2097 	const struct prefix *p;
2098 
2099 	assert(rn);
2100 	assert(bpi);
2101 	assert(bpi->extra);
2102 
2103 	{
2104 		char buf[RD_ADDRSTRLEN];
2105 
2106 		vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %s", __func__,
2107 				       bpi, bpi->peer,
2108 				       prefix_rd2str(&bpi->extra->vnc.import.rd,
2109 						     buf, sizeof(buf)));
2110 	}
2111 
2112 	sl = RFAPI_RDINDEX_W_ALLOC(rn);
2113 	if (!sl) {
2114 		p = agg_node_get_prefix(rn);
2115 		if (AF_ETHERNET == p->family) {
2116 			sl = skiplist_new(0, rfapi_bi_peer_rd_aux_cmp, NULL);
2117 		} else {
2118 			sl = skiplist_new(0, rfapi_bi_peer_rd_cmp, NULL);
2119 		}
2120 		RFAPI_IT_EXTRA_GET(rn)->u.vpn.idx_rd = sl;
2121 		agg_lock_node(rn); /* for skiplist */
2122 	}
2123 	assert(!skiplist_insert(sl, (void *)bpi, (void *)bpi));
2124 	agg_lock_node(rn); /* for skiplist entry */
2125 
2126 	/* NB: BPIs in import tables are not refcounted */
2127 }
2128 
rfapiItBiIndexDump(struct agg_node * rn)2129 static void rfapiItBiIndexDump(struct agg_node *rn)
2130 {
2131 	struct skiplist *sl;
2132 	void *cursor = NULL;
2133 	struct bgp_path_info *k;
2134 	struct bgp_path_info *v;
2135 	int rc;
2136 
2137 	sl = RFAPI_RDINDEX(rn);
2138 	if (!sl)
2139 		return;
2140 
2141 	for (rc = skiplist_next(sl, (void **)&k, (void **)&v, &cursor); !rc;
2142 	     rc = skiplist_next(sl, (void **)&k, (void **)&v, &cursor)) {
2143 
2144 		char buf[RD_ADDRSTRLEN];
2145 		char buf_aux_pfx[PREFIX_STRLEN];
2146 
2147 		prefix_rd2str(&k->extra->vnc.import.rd, buf, sizeof(buf));
2148 		if (k->extra->vnc.import.aux_prefix.family) {
2149 			prefix2str(&k->extra->vnc.import.aux_prefix,
2150 				   buf_aux_pfx, sizeof(buf_aux_pfx));
2151 		} else
2152 			strlcpy(buf_aux_pfx, "(none)", sizeof(buf_aux_pfx));
2153 
2154 		vnc_zlog_debug_verbose("bpi %p, peer %p, rd %s, aux_prefix %s",
2155 				       k, k->peer, buf, buf_aux_pfx);
2156 	}
2157 }
2158 
rfapiItBiIndexSearch(struct agg_node * rn,struct prefix_rd * prd,struct peer * peer,const struct prefix * aux_prefix)2159 static struct bgp_path_info *rfapiItBiIndexSearch(
2160 	struct agg_node *rn, /* Import table VPN node */
2161 	struct prefix_rd *prd, struct peer *peer,
2162 	const struct prefix *aux_prefix) /* optional L3 addr for L2 ITs */
2163 {
2164 	struct skiplist *sl;
2165 	int rc;
2166 	struct bgp_path_info bpi_fake = {0};
2167 	struct bgp_path_info_extra bpi_extra = {0};
2168 	struct bgp_path_info *bpi_result;
2169 
2170 	sl = RFAPI_RDINDEX(rn);
2171 	if (!sl)
2172 		return NULL;
2173 
2174 #ifdef DEBUG_BI_SEARCH
2175 	{
2176 		char buf[RD_ADDRSTRLEN];
2177 		char buf_aux_pfx[PREFIX_STRLEN];
2178 
2179 		if (aux_prefix) {
2180 			prefix2str(aux_prefix, buf_aux_pfx,
2181 				   sizeof(buf_aux_pfx));
2182 		} else
2183 			strlcpy(buf_aux_pfx, "(nil)", sizeof(buf_aux_pfx));
2184 
2185 		vnc_zlog_debug_verbose("%s want prd=%s, peer=%p, aux_prefix=%s",
2186 				       __func__,
2187 				       prefix_rd2str(prd, buf, sizeof(buf)),
2188 				       peer, buf_aux_pfx);
2189 		rfapiItBiIndexDump(rn);
2190 	}
2191 #endif
2192 
2193 	/* threshold is a WAG */
2194 	if (sl->count < 3) {
2195 #ifdef DEBUG_BI_SEARCH
2196 		vnc_zlog_debug_verbose("%s: short list algorithm", __func__);
2197 #endif
2198 		/* if short list, linear search might be faster */
2199 		for (bpi_result = rn->info; bpi_result;
2200 		     bpi_result = bpi_result->next) {
2201 #ifdef DEBUG_BI_SEARCH
2202 			{
2203 				char buf[RD_ADDRSTRLEN];
2204 
2205 				vnc_zlog_debug_verbose(
2206 					"%s: bpi has prd=%s, peer=%p", __func__,
2207 					prefix_rd2str(&bpi_result->extra->vnc
2208 							       .import.rd,
2209 						      buf, sizeof(buf)),
2210 					bpi_result->peer);
2211 			}
2212 #endif
2213 			if (peer == bpi_result->peer
2214 			    && !prefix_cmp((struct prefix *)&bpi_result->extra
2215 						   ->vnc.import.rd,
2216 					   (struct prefix *)prd)) {
2217 
2218 #ifdef DEBUG_BI_SEARCH
2219 				vnc_zlog_debug_verbose(
2220 					"%s: peer and RD same, doing aux_prefix check",
2221 					__func__);
2222 #endif
2223 				if (!aux_prefix
2224 				    || !prefix_cmp(
2225 					       aux_prefix,
2226 					       &bpi_result->extra->vnc.import
2227 							.aux_prefix)) {
2228 
2229 #ifdef DEBUG_BI_SEARCH
2230 					vnc_zlog_debug_verbose("%s: match",
2231 							       __func__);
2232 #endif
2233 					break;
2234 				}
2235 			}
2236 		}
2237 		return bpi_result;
2238 	}
2239 
2240 	bpi_fake.peer = peer;
2241 	bpi_fake.extra = &bpi_extra;
2242 	bpi_fake.extra->vnc.import.rd = *prd;
2243 	if (aux_prefix) {
2244 		bpi_fake.extra->vnc.import.aux_prefix = *aux_prefix;
2245 	} else {
2246 		/* wildcard */
2247 		bpi_fake.extra->vnc.import.aux_prefix.family = AF_ETHERNET;
2248 		bpi_fake.extra->vnc.import.aux_prefix.prefixlen = 1;
2249 	}
2250 
2251 	rc = skiplist_search(sl, (void *)&bpi_fake, (void *)&bpi_result);
2252 
2253 	if (rc) {
2254 #ifdef DEBUG_BI_SEARCH
2255 		vnc_zlog_debug_verbose("%s: no match", __func__);
2256 #endif
2257 		return NULL;
2258 	}
2259 
2260 #ifdef DEBUG_BI_SEARCH
2261 	vnc_zlog_debug_verbose("%s: matched bpi=%p", __func__, bpi_result);
2262 #endif
2263 
2264 	return bpi_result;
2265 }
2266 
rfapiItBiIndexDel(struct agg_node * rn,struct bgp_path_info * bpi)2267 static void rfapiItBiIndexDel(struct agg_node *rn, /* Import table VPN node */
2268 			      struct bgp_path_info *bpi) /* old BPI */
2269 {
2270 	struct skiplist *sl;
2271 	int rc;
2272 
2273 	{
2274 		char buf[RD_ADDRSTRLEN];
2275 
2276 		vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %s", __func__,
2277 				       bpi, bpi->peer,
2278 				       prefix_rd2str(&bpi->extra->vnc.import.rd,
2279 						     buf, sizeof(buf)));
2280 	}
2281 
2282 	sl = RFAPI_RDINDEX(rn);
2283 	assert(sl);
2284 
2285 	rc = skiplist_delete(sl, (void *)(bpi), (void *)bpi);
2286 	if (rc) {
2287 		rfapiItBiIndexDump(rn);
2288 	}
2289 	assert(!rc);
2290 
2291 	agg_unlock_node(rn); /* for skiplist entry */
2292 
2293 	/* NB: BPIs in import tables are not refcounted */
2294 }
2295 
2296 /*
2297  * Add a backreference at the ENCAP node to the VPN route that
2298  * refers to it
2299  */
2300 static void
rfapiMonitorEncapAdd(struct rfapi_import_table * import_table,struct prefix * p,struct agg_node * vpn_rn,struct bgp_path_info * vpn_bpi)2301 rfapiMonitorEncapAdd(struct rfapi_import_table *import_table,
2302 		     struct prefix *p,		    /* VN address */
2303 		     struct agg_node *vpn_rn,       /* VPN node */
2304 		     struct bgp_path_info *vpn_bpi) /* VPN bpi/route */
2305 {
2306 	afi_t afi = family2afi(p->family);
2307 	struct agg_node *rn;
2308 	struct rfapi_monitor_encap *m;
2309 
2310 	assert(afi);
2311 	rn = agg_node_get(import_table->imported_encap[afi], p); /* locks rn */
2312 	assert(rn);
2313 
2314 	m = XCALLOC(MTYPE_RFAPI_MONITOR_ENCAP,
2315 		    sizeof(struct rfapi_monitor_encap));
2316 	assert(m);
2317 
2318 	m->node = vpn_rn;
2319 	m->bpi = vpn_bpi;
2320 	m->rn = rn;
2321 
2322 	/* insert to encap node's list */
2323 	m->next = RFAPI_MONITOR_ENCAP(rn);
2324 	if (m->next)
2325 		m->next->prev = m;
2326 	RFAPI_MONITOR_ENCAP_W_ALLOC(rn) = m;
2327 
2328 	/* for easy lookup when deleting vpn route */
2329 	vpn_bpi->extra->vnc.import.hme = m;
2330 
2331 	vnc_zlog_debug_verbose(
2332 		"%s: it=%p, vpn_bpi=%p, afi=%d, encap rn=%p, setting vpn_bpi->extra->vnc.import.hme=%p",
2333 		__func__, import_table, vpn_bpi, afi, rn, m);
2334 
2335 	RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 0);
2336 	bgp_attr_intern(vpn_bpi->attr);
2337 }
2338 
rfapiMonitorEncapDelete(struct bgp_path_info * vpn_bpi)2339 static void rfapiMonitorEncapDelete(struct bgp_path_info *vpn_bpi)
2340 {
2341 	/*
2342 	 * Remove encap monitor
2343 	 */
2344 	vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__, vpn_bpi);
2345 	if (vpn_bpi->extra) {
2346 		struct rfapi_monitor_encap *hme =
2347 			vpn_bpi->extra->vnc.import.hme;
2348 
2349 		if (hme) {
2350 
2351 			vnc_zlog_debug_verbose("%s: hme=%p", __func__, hme);
2352 
2353 			/* Refcount checking takes too long here */
2354 			// RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 0);
2355 			if (hme->next)
2356 				hme->next->prev = hme->prev;
2357 			if (hme->prev)
2358 				hme->prev->next = hme->next;
2359 			else
2360 				RFAPI_MONITOR_ENCAP_W_ALLOC(hme->rn) =
2361 					hme->next;
2362 			/* Refcount checking takes too long here */
2363 			// RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 1);
2364 
2365 			/* see if the struct rfapi_it_extra is empty and can be
2366 			 * freed */
2367 			rfapiMonitorExtraPrune(SAFI_ENCAP, hme->rn);
2368 
2369 			agg_unlock_node(hme->rn); /* decr ref count */
2370 			XFREE(MTYPE_RFAPI_MONITOR_ENCAP, hme);
2371 			vpn_bpi->extra->vnc.import.hme = NULL;
2372 		}
2373 	}
2374 }
2375 
2376 /*
2377  * quagga lib/thread.h says this must return int even though
2378  * it doesn't do anything with the return value
2379  */
rfapiWithdrawTimerVPN(struct thread * t)2380 static int rfapiWithdrawTimerVPN(struct thread *t)
2381 {
2382 	struct rfapi_withdraw *wcb = t->arg;
2383 	struct bgp_path_info *bpi = wcb->info;
2384 	struct bgp *bgp = bgp_get_default();
2385 	const struct prefix *p;
2386 	struct rfapi_monitor_vpn *moved;
2387 	afi_t afi;
2388 
2389 	if (bgp == NULL) {
2390 		vnc_zlog_debug_verbose(
2391                    "%s: NULL BGP pointer, assume shutdown race condition!!!",
2392                    __func__);
2393 		return 0;
2394 	}
2395 	if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)) {
2396 		vnc_zlog_debug_verbose(
2397 			"%s: BGP delete in progress, assume shutdown race condition!!!",
2398 			__func__);
2399 		return 0;
2400 	}
2401 	assert(wcb->node);
2402 	assert(bpi);
2403 	assert(wcb->import_table);
2404 	assert(bpi->extra);
2405 
2406 	RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_MPLS_VPN, wcb->lockoffset);
2407 
2408 	vnc_zlog_debug_verbose("%s: removing bpi %p at prefix %pRN", __func__,
2409 			       bpi, wcb->node);
2410 
2411 	/*
2412 	 * Remove the route (doubly-linked)
2413 	 */
2414 	if (CHECK_FLAG(bpi->flags, BGP_PATH_VALID)
2415 	    && VALID_INTERIOR_TYPE(bpi->type))
2416 		RFAPI_MONITOR_EXTERIOR(wcb->node)->valid_interior_count--;
2417 
2418 	p = agg_node_get_prefix(wcb->node);
2419 	afi = family2afi(p->family);
2420 	wcb->import_table->holddown_count[afi] -= 1; /* keep count consistent */
2421 	rfapiItBiIndexDel(wcb->node, bpi);
2422 	rfapiBgpInfoDetach(wcb->node, bpi); /* with removed bpi */
2423 
2424 	vnc_import_bgp_exterior_del_route_interior(bgp, wcb->import_table,
2425 						   wcb->node, bpi);
2426 
2427 
2428 	/*
2429 	 * If VNC is configured to send response remove messages, AND
2430 	 * if the removed route had a UN address, do response removal
2431 	 * processing.
2432 	 */
2433 	if (!(bgp->rfapi_cfg->flags
2434 	      & BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE)) {
2435 
2436 		int has_valid_duplicate = 0;
2437 		struct bgp_path_info *bpii;
2438 
2439 		/*
2440 		 * First check if there are any OTHER routes at this node
2441 		 * that have the same nexthop and a valid UN address. If
2442 		 * there are (e.g., from other peers), then the route isn't
2443 		 * really gone, so skip sending a response removal message.
2444 		 */
2445 		for (bpii = wcb->node->info; bpii; bpii = bpii->next) {
2446 			if (rfapiVpnBiSamePtUn(bpi, bpii)) {
2447 				has_valid_duplicate = 1;
2448 				break;
2449 			}
2450 		}
2451 
2452 		vnc_zlog_debug_verbose("%s: has_valid_duplicate=%d", __func__,
2453 				       has_valid_duplicate);
2454 
2455 		if (!has_valid_duplicate) {
2456 			rfapiRibPendingDeleteRoute(bgp, wcb->import_table, afi,
2457 						   wcb->node);
2458 		}
2459 	}
2460 
2461 	rfapiMonitorEncapDelete(bpi);
2462 
2463 	/*
2464 	 * If there are no VPN monitors at this VPN Node A,
2465 	 * we are done
2466 	 */
2467 	if (!RFAPI_MONITOR_VPN(wcb->node)) {
2468 		vnc_zlog_debug_verbose("%s: no VPN monitors at this node",
2469 				       __func__);
2470 		goto done;
2471 	}
2472 
2473 	/*
2474 	 * rfapiMonitorMoveShorter only moves monitors if there are
2475 	 * no remaining valid routes at the current node
2476 	 */
2477 	moved = rfapiMonitorMoveShorter(wcb->node, 1);
2478 
2479 	if (moved) {
2480 		rfapiMonitorMovedUp(wcb->import_table, wcb->node, moved->node,
2481 				    moved);
2482 	}
2483 
2484 done:
2485 	/*
2486 	 * Free VPN bpi
2487 	 */
2488 	rfapiBgpInfoFree(bpi);
2489 	wcb->info = NULL;
2490 
2491 	/*
2492 	 * If route count at this node has gone to 0, withdraw exported prefix
2493 	 */
2494 	if (!wcb->node->info) {
2495 		/* see if the struct rfapi_it_extra is empty and can be freed */
2496 		rfapiMonitorExtraPrune(SAFI_MPLS_VPN, wcb->node);
2497 		vnc_direct_bgp_del_prefix(bgp, wcb->import_table, wcb->node);
2498 		vnc_zebra_del_prefix(bgp, wcb->import_table, wcb->node);
2499 	} else {
2500 		/*
2501 		 * nexthop change event
2502 		 * vnc_direct_bgp_add_prefix() will recompute the VN addr
2503 		 * ecommunity
2504 		 */
2505 		vnc_direct_bgp_add_prefix(bgp, wcb->import_table, wcb->node);
2506 	}
2507 
2508 	RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_MPLS_VPN, 1 + wcb->lockoffset);
2509 	agg_unlock_node(wcb->node); /* decr ref count */
2510 	XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
2511 	return 0;
2512 }
2513 
2514 /*
2515  * This works for multiprotocol extension, but not for plain ol'
2516  * unicast IPv4 because that nexthop is stored in attr->nexthop
2517  */
rfapiNexthop2Prefix(struct attr * attr,struct prefix * p)2518 void rfapiNexthop2Prefix(struct attr *attr, struct prefix *p)
2519 {
2520 	assert(p);
2521 	assert(attr);
2522 
2523 	memset(p, 0, sizeof(struct prefix));
2524 
2525 	switch (p->family = BGP_MP_NEXTHOP_FAMILY(attr->mp_nexthop_len)) {
2526 	case AF_INET:
2527 		p->u.prefix4 = attr->mp_nexthop_global_in;
2528 		p->prefixlen = 32;
2529 		break;
2530 
2531 	case AF_INET6:
2532 		p->u.prefix6 = attr->mp_nexthop_global;
2533 		p->prefixlen = 128;
2534 		break;
2535 
2536 	default:
2537 		vnc_zlog_debug_verbose("%s: Family is unknown = %d", __func__,
2538 				       p->family);
2539 	}
2540 }
2541 
rfapiUnicastNexthop2Prefix(afi_t afi,struct attr * attr,struct prefix * p)2542 void rfapiUnicastNexthop2Prefix(afi_t afi, struct attr *attr, struct prefix *p)
2543 {
2544 	if (afi == AFI_IP) {
2545 		p->family = AF_INET;
2546 		p->prefixlen = 32;
2547 		p->u.prefix4 = attr->nexthop;
2548 	} else {
2549 		rfapiNexthop2Prefix(attr, p);
2550 	}
2551 }
2552 
rfapiAttrNexthopAddrDifferent(struct prefix * p1,struct prefix * p2)2553 static int rfapiAttrNexthopAddrDifferent(struct prefix *p1, struct prefix *p2)
2554 {
2555 	if (!p1 || !p2) {
2556 		vnc_zlog_debug_verbose("%s: p1 or p2 is NULL", __func__);
2557 		return 1;
2558 	}
2559 
2560 	/*
2561 	 * Are address families the same?
2562 	 */
2563 	if (p1->family != p2->family) {
2564 		return 1;
2565 	}
2566 
2567 	switch (p1->family) {
2568 	case AF_INET:
2569 		if (IPV4_ADDR_SAME(&p1->u.prefix4, &p2->u.prefix4))
2570 			return 0;
2571 		break;
2572 
2573 	case AF_INET6:
2574 		if (IPV6_ADDR_SAME(&p1->u.prefix6, &p2->u.prefix6))
2575 			return 0;
2576 		break;
2577 
2578 	default:
2579 		assert(1);
2580 	}
2581 
2582 	return 1;
2583 }
2584 
rfapiCopyUnEncap2VPN(struct bgp_path_info * encap_bpi,struct bgp_path_info * vpn_bpi)2585 static void rfapiCopyUnEncap2VPN(struct bgp_path_info *encap_bpi,
2586 				 struct bgp_path_info *vpn_bpi)
2587 {
2588 	if (!vpn_bpi || !vpn_bpi->extra) {
2589 		zlog_warn("%s: no vpn  bpi attr/extra, can't copy UN address",
2590 			  __func__);
2591 		return;
2592 	}
2593 
2594 	switch (BGP_MP_NEXTHOP_FAMILY(encap_bpi->attr->mp_nexthop_len)) {
2595 	case AF_INET:
2596 
2597 		/*
2598 		 * instrumentation to debug segfault of 091127
2599 		 */
2600 		vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__, vpn_bpi);
2601 		if (vpn_bpi) {
2602 			vnc_zlog_debug_verbose("%s: vpn_bpi->extra=%p",
2603 					       __func__, vpn_bpi->extra);
2604 		}
2605 
2606 		vpn_bpi->extra->vnc.import.un_family = AF_INET;
2607 		vpn_bpi->extra->vnc.import.un.addr4 =
2608 			encap_bpi->attr->mp_nexthop_global_in;
2609 		break;
2610 
2611 	case AF_INET6:
2612 		vpn_bpi->extra->vnc.import.un_family = AF_INET6;
2613 		vpn_bpi->extra->vnc.import.un.addr6 =
2614 			encap_bpi->attr->mp_nexthop_global;
2615 		break;
2616 
2617 	default:
2618 		zlog_warn("%s: invalid encap nexthop length: %d", __func__,
2619 			  encap_bpi->attr->mp_nexthop_len);
2620 		vpn_bpi->extra->vnc.import.un_family = 0;
2621 		break;
2622 	}
2623 }
2624 
2625 /*
2626  * returns 0 on success, nonzero on error
2627  */
2628 static int
rfapiWithdrawEncapUpdateCachedUn(struct rfapi_import_table * import_table,struct bgp_path_info * encap_bpi,struct agg_node * vpn_rn,struct bgp_path_info * vpn_bpi)2629 rfapiWithdrawEncapUpdateCachedUn(struct rfapi_import_table *import_table,
2630 				 struct bgp_path_info *encap_bpi,
2631 				 struct agg_node *vpn_rn,
2632 				 struct bgp_path_info *vpn_bpi)
2633 {
2634 	if (!encap_bpi) {
2635 
2636 		/*
2637 		 * clear cached UN address
2638 		 */
2639 		if (!vpn_bpi || !vpn_bpi->extra) {
2640 			zlog_warn(
2641 				"%s: missing VPN bpi/extra, can't clear UN addr",
2642 				__func__);
2643 			return 1;
2644 		}
2645 		vpn_bpi->extra->vnc.import.un_family = 0;
2646 		memset(&vpn_bpi->extra->vnc.import.un, 0,
2647 		       sizeof(vpn_bpi->extra->vnc.import.un));
2648 		if (CHECK_FLAG(vpn_bpi->flags, BGP_PATH_VALID)) {
2649 			if (rfapiGetVncTunnelUnAddr(vpn_bpi->attr, NULL)) {
2650 				UNSET_FLAG(vpn_bpi->flags, BGP_PATH_VALID);
2651 				if (VALID_INTERIOR_TYPE(vpn_bpi->type))
2652 					RFAPI_MONITOR_EXTERIOR(vpn_rn)
2653 						->valid_interior_count--;
2654 				/* signal interior route withdrawal to
2655 				 * import-exterior */
2656 				vnc_import_bgp_exterior_del_route_interior(
2657 					bgp_get_default(), import_table, vpn_rn,
2658 					vpn_bpi);
2659 			}
2660 		}
2661 
2662 	} else {
2663 		if (!vpn_bpi) {
2664 			zlog_warn("%s: missing VPN bpi, can't clear UN addr",
2665 				  __func__);
2666 			return 1;
2667 		}
2668 		rfapiCopyUnEncap2VPN(encap_bpi, vpn_bpi);
2669 		if (!CHECK_FLAG(vpn_bpi->flags, BGP_PATH_VALID)) {
2670 			SET_FLAG(vpn_bpi->flags, BGP_PATH_VALID);
2671 			if (VALID_INTERIOR_TYPE(vpn_bpi->type))
2672 				RFAPI_MONITOR_EXTERIOR(vpn_rn)
2673 					->valid_interior_count++;
2674 			/* signal interior route withdrawal to import-exterior
2675 			 */
2676 			vnc_import_bgp_exterior_add_route_interior(
2677 				bgp_get_default(), import_table, vpn_rn,
2678 				vpn_bpi);
2679 		}
2680 	}
2681 	return 0;
2682 }
2683 
rfapiWithdrawTimerEncap(struct thread * t)2684 static int rfapiWithdrawTimerEncap(struct thread *t)
2685 {
2686 	struct rfapi_withdraw *wcb = t->arg;
2687 	struct bgp_path_info *bpi = wcb->info;
2688 	int was_first_route = 0;
2689 	struct rfapi_monitor_encap *em;
2690 	struct skiplist *vpn_node_sl = skiplist_new(0, NULL, NULL);
2691 
2692 	assert(wcb->node);
2693 	assert(bpi);
2694 	assert(wcb->import_table);
2695 
2696 	RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_ENCAP, 0);
2697 
2698 	if (wcb->node->info == bpi)
2699 		was_first_route = 1;
2700 
2701 	/*
2702 	 * Remove the route/bpi and free it
2703 	 */
2704 	rfapiBgpInfoDetach(wcb->node, bpi);
2705 	rfapiBgpInfoFree(bpi);
2706 
2707 	if (!was_first_route)
2708 		goto done;
2709 
2710 	for (em = RFAPI_MONITOR_ENCAP(wcb->node); em; em = em->next) {
2711 
2712 		/*
2713 		 * Update monitoring VPN BPIs with new encap info at the
2714 		 * head of the encap bpi chain (which could be NULL after
2715 		 * removing the expiring bpi above)
2716 		 */
2717 		if (rfapiWithdrawEncapUpdateCachedUn(wcb->import_table,
2718 						     wcb->node->info, em->node,
2719 						     em->bpi))
2720 			continue;
2721 
2722 		/*
2723 		 * Build a list of unique VPN nodes referenced by these
2724 		 * monitors.
2725 		 * Use a skiplist for speed.
2726 		 */
2727 		skiplist_insert(vpn_node_sl, em->node, em->node);
2728 	}
2729 
2730 
2731 	/*
2732 	 * for each VPN node referenced in the ENCAP monitors:
2733 	 */
2734 	struct agg_node *rn;
2735 	while (!skiplist_first(vpn_node_sl, (void **)&rn, NULL)) {
2736 		if (!wcb->node->info) {
2737 			struct rfapi_monitor_vpn *moved;
2738 
2739 			moved = rfapiMonitorMoveShorter(rn, 0);
2740 			if (moved) {
2741 				// rfapiDoRouteCallback(wcb->import_table,
2742 				// moved->node, moved);
2743 				rfapiMonitorMovedUp(wcb->import_table, rn,
2744 						    moved->node, moved);
2745 			}
2746 		} else {
2747 			// rfapiDoRouteCallback(wcb->import_table, rn, NULL);
2748 			rfapiMonitorItNodeChanged(wcb->import_table, rn, NULL);
2749 		}
2750 		skiplist_delete_first(vpn_node_sl);
2751 	}
2752 
2753 done:
2754 	RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_ENCAP, 1);
2755 	agg_unlock_node(wcb->node); /* decr ref count */
2756 	XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
2757 	skiplist_free(vpn_node_sl);
2758 	return 0;
2759 }
2760 
2761 
2762 /*
2763  * Works for both VPN and ENCAP routes; timer_service_func is different
2764  * in each case
2765  */
2766 static void
rfapiBiStartWithdrawTimer(struct rfapi_import_table * import_table,struct agg_node * rn,struct bgp_path_info * bpi,afi_t afi,safi_t safi,int (* timer_service_func)(struct thread *))2767 rfapiBiStartWithdrawTimer(struct rfapi_import_table *import_table,
2768 			  struct agg_node *rn, struct bgp_path_info *bpi,
2769 			  afi_t afi, safi_t safi,
2770 			  int (*timer_service_func)(struct thread *))
2771 {
2772 	uint32_t lifetime;
2773 	struct rfapi_withdraw *wcb;
2774 
2775 	if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
2776 		/*
2777 		 * Already on the path to being withdrawn,
2778 		 * should already have a timer set up to
2779 		 * delete it.
2780 		 */
2781 		vnc_zlog_debug_verbose(
2782 			"%s: already being withdrawn, do nothing", __func__);
2783 		return;
2784 	}
2785 
2786 	rfapiGetVncLifetime(bpi->attr, &lifetime);
2787 	vnc_zlog_debug_verbose("%s: VNC lifetime is %u", __func__, lifetime);
2788 
2789 	/*
2790 	 * withdrawn routes get to hang around for a while
2791 	 */
2792 	SET_FLAG(bpi->flags, BGP_PATH_REMOVED);
2793 
2794 	/* set timer to remove the route later */
2795 	lifetime = rfapiGetHolddownFromLifetime(lifetime);
2796 	vnc_zlog_debug_verbose("%s: using timeout %u", __func__, lifetime);
2797 
2798 	/*
2799 	 * Stash import_table, node, and info for use by timer
2800 	 * service routine, which is supposed to free the wcb.
2801 	 */
2802 	wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw));
2803 	assert(wcb);
2804 	wcb->node = rn;
2805 	wcb->info = bpi;
2806 	wcb->import_table = import_table;
2807 	bgp_attr_intern(bpi->attr);
2808 
2809 	if (VNC_DEBUG(VERBOSE)) {
2810 		vnc_zlog_debug_verbose(
2811 			"%s: wcb values: node=%p, info=%p, import_table=%p (bpi follows)",
2812 			__func__, wcb->node, wcb->info, wcb->import_table);
2813 		rfapiPrintBi(NULL, bpi);
2814 	}
2815 
2816 
2817 	assert(bpi->extra);
2818 	if (lifetime > UINT32_MAX / 1001) {
2819 		/* sub-optimal case, but will probably never happen */
2820 		bpi->extra->vnc.import.timer = NULL;
2821 		thread_add_timer(bm->master, timer_service_func, wcb, lifetime,
2822 				 &bpi->extra->vnc.import.timer);
2823 	} else {
2824 		static uint32_t jitter;
2825 		uint32_t lifetime_msec;
2826 
2827 		/*
2828 		 * the goal here is to spread out the timers so they are
2829 		 * sortable in the skip list
2830 		 */
2831 		if (++jitter >= 1000)
2832 			jitter = 0;
2833 
2834 		lifetime_msec = (lifetime * 1000) + jitter;
2835 
2836 		bpi->extra->vnc.import.timer = NULL;
2837 		thread_add_timer_msec(bm->master, timer_service_func, wcb,
2838 				      lifetime_msec,
2839 				      &bpi->extra->vnc.import.timer);
2840 	}
2841 
2842 	/* re-sort route list (BGP_PATH_REMOVED routes are last) */
2843 	if (((struct bgp_path_info *)rn->info)->next) {
2844 		rfapiBgpInfoDetach(rn, bpi);
2845 		rfapiBgpInfoAttachSorted(rn, bpi, afi, safi);
2846 	}
2847 }
2848 
2849 
2850 typedef void(rfapi_bi_filtered_import_f)(struct rfapi_import_table *table,
2851 					 int action, struct peer *peer,
2852 					 void *rfd, const struct prefix *prefix,
2853 					 const struct prefix *aux_prefix,
2854 					 afi_t afi, struct prefix_rd *prd,
2855 					 struct attr *attr, uint8_t type,
2856 					 uint8_t sub_type, uint32_t *label);
2857 
2858 
rfapiExpireEncapNow(struct rfapi_import_table * it,struct agg_node * rn,struct bgp_path_info * bpi)2859 static void rfapiExpireEncapNow(struct rfapi_import_table *it,
2860 				struct agg_node *rn, struct bgp_path_info *bpi)
2861 {
2862 	struct rfapi_withdraw *wcb;
2863 	struct thread t;
2864 
2865 	/*
2866 	 * pretend we're an expiring timer
2867 	 */
2868 	wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw));
2869 	wcb->info = bpi;
2870 	wcb->node = rn;
2871 	wcb->import_table = it;
2872 	memset(&t, 0, sizeof(t));
2873 	t.arg = wcb;
2874 	rfapiWithdrawTimerEncap(&t); /* frees wcb */
2875 }
2876 
rfapiGetNexthop(struct attr * attr,struct prefix * prefix)2877 static int rfapiGetNexthop(struct attr *attr, struct prefix *prefix)
2878 {
2879 	switch (BGP_MP_NEXTHOP_FAMILY(attr->mp_nexthop_len)) {
2880 	case AF_INET:
2881 		prefix->family = AF_INET;
2882 		prefix->prefixlen = 32;
2883 		prefix->u.prefix4 = attr->mp_nexthop_global_in;
2884 		break;
2885 	case AF_INET6:
2886 		prefix->family = AF_INET6;
2887 		prefix->prefixlen = 128;
2888 		prefix->u.prefix6 = attr->mp_nexthop_global;
2889 		break;
2890 	default:
2891 		vnc_zlog_debug_verbose("%s: unknown attr->mp_nexthop_len %d",
2892 				       __func__, attr->mp_nexthop_len);
2893 		return EINVAL;
2894 	}
2895 	return 0;
2896 }
2897 
2898 /*
2899  * import a bgp_path_info if its route target list intersects with the
2900  * import table's route target list
2901  */
rfapiBgpInfoFilteredImportEncap(struct rfapi_import_table * import_table,int action,struct peer * peer,void * rfd,const struct prefix * p,const struct prefix * aux_prefix,afi_t afi,struct prefix_rd * prd,struct attr * attr,uint8_t type,uint8_t sub_type,uint32_t * label)2902 static void rfapiBgpInfoFilteredImportEncap(
2903 	struct rfapi_import_table *import_table, int action, struct peer *peer,
2904 	void *rfd, /* set for looped back routes */
2905 	const struct prefix *p,
2906 	const struct prefix *aux_prefix, /* Unused for encap routes */
2907 	afi_t afi, struct prefix_rd *prd,
2908 	struct attr *attr, /* part of bgp_path_info */
2909 	uint8_t type,	   /* part of bgp_path_info */
2910 	uint8_t sub_type,  /* part of bgp_path_info */
2911 	uint32_t *label)   /* part of bgp_path_info */
2912 {
2913 	struct agg_table *rt = NULL;
2914 	struct agg_node *rn;
2915 	struct bgp_path_info *info_new;
2916 	struct bgp_path_info *bpi;
2917 	struct bgp_path_info *next;
2918 	char buf[BUFSIZ];
2919 
2920 	struct prefix p_firstbpi_old;
2921 	struct prefix p_firstbpi_new;
2922 	int replacing = 0;
2923 	const char *action_str = NULL;
2924 	struct prefix un_prefix;
2925 
2926 	struct bgp *bgp;
2927 	bgp = bgp_get_default(); /* assume 1 instance for now */
2928 
2929 	switch (action) {
2930 	case FIF_ACTION_UPDATE:
2931 		action_str = "update";
2932 		break;
2933 	case FIF_ACTION_WITHDRAW:
2934 		action_str = "withdraw";
2935 		break;
2936 	case FIF_ACTION_KILL:
2937 		action_str = "kill";
2938 		break;
2939 	default:
2940 		assert(0);
2941 		break;
2942 	}
2943 
2944 	vnc_zlog_debug_verbose(
2945 		"%s: entry: %s: prefix %s/%d", __func__, action_str,
2946 		inet_ntop(p->family, &p->u.prefix, buf, BUFSIZ), p->prefixlen);
2947 
2948 	memset(&p_firstbpi_old, 0, sizeof(p_firstbpi_old));
2949 	memset(&p_firstbpi_new, 0, sizeof(p_firstbpi_new));
2950 
2951 	if (action == FIF_ACTION_UPDATE) {
2952 		/*
2953 		 * Compare rt lists. If no intersection, don't import this route
2954 		 * On a withdraw, peer and RD are sufficient to determine if
2955 		 * we should act.
2956 		 */
2957 		if (!attr || !attr->ecommunity) {
2958 
2959 			vnc_zlog_debug_verbose(
2960 				"%s: attr, extra, or ecommunity missing, not importing",
2961 				__func__);
2962 			return;
2963 		}
2964 #ifdef RFAPI_REQUIRE_ENCAP_BEEC
2965 		if (!rfapiEcommunitiesMatchBeec(attr->ecommunity)) {
2966 			vnc_zlog_debug_verbose(
2967 				"%s: it=%p: no match for BGP Encapsulation ecommunity",
2968 				__func__, import_table);
2969 			return;
2970 		}
2971 #endif
2972 		if (!rfapiEcommunitiesIntersect(import_table->rt_import_list,
2973 						attr->ecommunity)) {
2974 
2975 			vnc_zlog_debug_verbose(
2976 				"%s: it=%p: no ecommunity intersection",
2977 				__func__, import_table);
2978 			return;
2979 		}
2980 
2981 		/*
2982 		 * Updates must also have a nexthop address
2983 		 */
2984 		memset(&un_prefix, 0,
2985 		       sizeof(un_prefix)); /* keep valgrind happy */
2986 		if (rfapiGetNexthop(attr, &un_prefix)) {
2987 			vnc_zlog_debug_verbose("%s: missing nexthop address",
2988 					       __func__);
2989 			return;
2990 		}
2991 	}
2992 
2993 	/*
2994 	 * Figure out which radix tree the route would go into
2995 	 */
2996 	switch (afi) {
2997 	case AFI_IP:
2998 	case AFI_IP6:
2999 		rt = import_table->imported_encap[afi];
3000 		break;
3001 
3002 	default:
3003 		flog_err(EC_LIB_DEVELOPMENT, "%s: bad afi %d", __func__, afi);
3004 		return;
3005 	}
3006 
3007 	/*
3008 	 * agg_node_lookup returns a node only if there is at least
3009 	 * one route attached.
3010 	 */
3011 	rn = agg_node_lookup(rt, p);
3012 
3013 #ifdef DEBUG_ENCAP_MONITOR
3014 	vnc_zlog_debug_verbose("%s: initial encap lookup(it=%p) rn=%p",
3015 			       __func__, import_table, rn);
3016 #endif
3017 
3018 	if (rn) {
3019 
3020 		RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 1);
3021 		agg_unlock_node(rn); /* undo lock in agg_node_lookup */
3022 
3023 
3024 		/*
3025 		 * capture nexthop of first bpi
3026 		 */
3027 		if (rn->info) {
3028 			rfapiNexthop2Prefix(
3029 				((struct bgp_path_info *)(rn->info))->attr,
3030 				&p_firstbpi_old);
3031 		}
3032 
3033 		for (bpi = rn->info; bpi; bpi = bpi->next) {
3034 
3035 			/*
3036 			 * Does this bgp_path_info refer to the same route
3037 			 * as we are trying to add?
3038 			 */
3039 			vnc_zlog_debug_verbose("%s: comparing BPI %p", __func__,
3040 					       bpi);
3041 
3042 
3043 			/*
3044 			 * Compare RDs
3045 			 *
3046 			 * RD of import table bpi is in
3047 			 * bpi->extra->vnc.import.rd RD of info_orig is in prd
3048 			 */
3049 			if (!bpi->extra) {
3050 				vnc_zlog_debug_verbose("%s: no bpi->extra",
3051 						       __func__);
3052 				continue;
3053 			}
3054 			if (prefix_cmp(
3055 				    (struct prefix *)&bpi->extra->vnc.import.rd,
3056 				    (struct prefix *)prd)) {
3057 
3058 				vnc_zlog_debug_verbose("%s: prd does not match",
3059 						       __func__);
3060 				continue;
3061 			}
3062 
3063 			/*
3064 			 * Compare peers
3065 			 */
3066 			if (bpi->peer != peer) {
3067 				vnc_zlog_debug_verbose(
3068 					"%s: peer does not match", __func__);
3069 				continue;
3070 			}
3071 
3072 			vnc_zlog_debug_verbose("%s: found matching bpi",
3073 					       __func__);
3074 
3075 			/* Same route. Delete this bpi, replace with new one */
3076 
3077 			if (action == FIF_ACTION_WITHDRAW) {
3078 
3079 				vnc_zlog_debug_verbose(
3080 					"%s: withdrawing at prefix %pRN",
3081 					__func__, rn);
3082 
3083 				rfapiBiStartWithdrawTimer(
3084 					import_table, rn, bpi, afi, SAFI_ENCAP,
3085 					rfapiWithdrawTimerEncap);
3086 
3087 			} else {
3088 				vnc_zlog_debug_verbose(
3089 					"%s: %s at prefix %pRN", __func__,
3090 					((action == FIF_ACTION_KILL)
3091 						 ? "killing"
3092 						 : "replacing"),
3093 					rn);
3094 
3095 				/*
3096 				 * If this route is waiting to be deleted
3097 				 * because of
3098 				 * a previous withdraw, we must cancel its
3099 				 * timer.
3100 				 */
3101 				if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
3102 				    && bpi->extra->vnc.import.timer) {
3103 
3104 					struct thread *t =
3105 						(struct thread *)bpi->extra->vnc
3106 							.import.timer;
3107 					struct rfapi_withdraw *wcb = t->arg;
3108 
3109 					XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
3110 					thread_cancel(t);
3111 				}
3112 
3113 				if (action == FIF_ACTION_UPDATE) {
3114 					rfapiBgpInfoDetach(rn, bpi);
3115 					rfapiBgpInfoFree(bpi);
3116 					replacing = 1;
3117 				} else {
3118 					/*
3119 					 * Kill: do export stuff when removing
3120 					 * bpi
3121 					 */
3122 					struct rfapi_withdraw *wcb;
3123 					struct thread t;
3124 
3125 					/*
3126 					 * pretend we're an expiring timer
3127 					 */
3128 					wcb = XCALLOC(
3129 						MTYPE_RFAPI_WITHDRAW,
3130 						sizeof(struct rfapi_withdraw));
3131 					wcb->info = bpi;
3132 					wcb->node = rn;
3133 					wcb->import_table = import_table;
3134 					memset(&t, 0, sizeof(t));
3135 					t.arg = wcb;
3136 					rfapiWithdrawTimerEncap(
3137 						&t); /* frees wcb */
3138 				}
3139 			}
3140 
3141 			break;
3142 		}
3143 	}
3144 
3145 	if (rn)
3146 		RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, replacing ? 1 : 0);
3147 
3148 	if (action == FIF_ACTION_WITHDRAW || action == FIF_ACTION_KILL)
3149 		return;
3150 
3151 	info_new =
3152 		rfapiBgpInfoCreate(attr, peer, rfd, prd, type, sub_type, NULL);
3153 
3154 	if (rn) {
3155 		if (!replacing)
3156 			agg_lock_node(rn); /* incr ref count for new BPI */
3157 	} else {
3158 		rn = agg_node_get(rt, p);
3159 	}
3160 
3161 	vnc_zlog_debug_verbose("%s: (afi=%d, rn=%p) inserting at prefix %pRN",
3162 			       __func__, afi, rn, rn);
3163 
3164 	rfapiBgpInfoAttachSorted(rn, info_new, afi, SAFI_ENCAP);
3165 
3166 	/*
3167 	 * Delete holddown routes from same NVE. See details in
3168 	 * rfapiBgpInfoFilteredImportVPN()
3169 	 */
3170 	for (bpi = info_new->next; bpi; bpi = next) {
3171 
3172 		struct prefix pfx_un;
3173 		int un_match = 0;
3174 
3175 		next = bpi->next;
3176 		if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED))
3177 			continue;
3178 
3179 		/*
3180 		 * We already match the VN address (it is the prefix
3181 		 * of the route node)
3182 		 */
3183 
3184 		if (!rfapiGetNexthop(bpi->attr, &pfx_un)
3185 		    && prefix_same(&pfx_un, &un_prefix)) {
3186 
3187 			un_match = 1;
3188 		}
3189 
3190 		if (!un_match)
3191 			continue;
3192 
3193 		vnc_zlog_debug_verbose(
3194 			"%s: removing holddown bpi matching NVE of new route",
3195 			__func__);
3196 		if (bpi->extra->vnc.import.timer) {
3197 			struct thread *t =
3198 				(struct thread *)bpi->extra->vnc.import.timer;
3199 			struct rfapi_withdraw *wcb = t->arg;
3200 
3201 			XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
3202 			thread_cancel(t);
3203 		}
3204 		rfapiExpireEncapNow(import_table, rn, bpi);
3205 	}
3206 
3207 	rfapiNexthop2Prefix(((struct bgp_path_info *)(rn->info))->attr,
3208 			    &p_firstbpi_new);
3209 
3210 	/*
3211 	 * If the nexthop address of the selected Encap route (i.e.,
3212 	 * the UN address) has changed, then we must update the VPN
3213 	 * routes that refer to this Encap route and possibly force
3214 	 * rfapi callbacks.
3215 	 */
3216 	if (rfapiAttrNexthopAddrDifferent(&p_firstbpi_old, &p_firstbpi_new)) {
3217 
3218 		struct rfapi_monitor_encap *m;
3219 		struct rfapi_monitor_encap *mnext;
3220 
3221 		struct agg_node *referenced_vpn_prefix;
3222 
3223 		/*
3224 		 * Optimized approach: build radix tree on the fly to
3225 		 * hold list of VPN nodes referenced by the ENCAP monitors
3226 		 *
3227 		 * The nodes in this table correspond to prefixes of VPN routes.
3228 		 * The "info" pointer of the node points to a chain of
3229 		 * struct rfapi_monitor_encap, each of which refers to a
3230 		 * specific VPN node.
3231 		 */
3232 		struct agg_table *referenced_vpn_table;
3233 
3234 		referenced_vpn_table = agg_table_init();
3235 		assert(referenced_vpn_table);
3236 
3237 /*
3238  * iterate over the set of monitors at this ENCAP node.
3239  */
3240 #ifdef DEBUG_ENCAP_MONITOR
3241 		vnc_zlog_debug_verbose("%s: examining monitors at rn=%p",
3242 				       __func__, rn);
3243 #endif
3244 		for (m = RFAPI_MONITOR_ENCAP(rn); m; m = m->next) {
3245 			const struct prefix *p;
3246 
3247 			/*
3248 			 * For each referenced bpi/route, copy the ENCAP route's
3249 			 * nexthop to the VPN route's cached UN address field
3250 			 * and set
3251 			 * the address family of the cached UN address field.
3252 			 */
3253 			rfapiCopyUnEncap2VPN(info_new, m->bpi);
3254 			if (!CHECK_FLAG(m->bpi->flags, BGP_PATH_VALID)) {
3255 				SET_FLAG(m->bpi->flags, BGP_PATH_VALID);
3256 				if (VALID_INTERIOR_TYPE(m->bpi->type))
3257 					RFAPI_MONITOR_EXTERIOR(m->node)
3258 						->valid_interior_count++;
3259 				vnc_import_bgp_exterior_add_route_interior(
3260 					bgp, import_table, m->node, m->bpi);
3261 			}
3262 
3263 			/*
3264 			 * Build a list of unique VPN nodes referenced by these
3265 			 * monitors
3266 			 *
3267 			 * There could be more than one VPN node here with a
3268 			 * given
3269 			 * prefix. Those are currently in an unsorted linear
3270 			 * list
3271 			 * per prefix.
3272 			 */
3273 			p = agg_node_get_prefix(m->node);
3274 			referenced_vpn_prefix =
3275 				agg_node_get(referenced_vpn_table, p);
3276 			assert(referenced_vpn_prefix);
3277 			for (mnext = referenced_vpn_prefix->info; mnext;
3278 			     mnext = mnext->next) {
3279 
3280 				if (mnext->node == m->node)
3281 					break;
3282 			}
3283 
3284 			if (mnext) {
3285 				/*
3286 				 * already have an entry for this VPN node
3287 				 */
3288 				agg_unlock_node(referenced_vpn_prefix);
3289 			} else {
3290 				mnext = XCALLOC(
3291 					MTYPE_RFAPI_MONITOR_ENCAP,
3292 					sizeof(struct rfapi_monitor_encap));
3293 				assert(mnext);
3294 				mnext->node = m->node;
3295 				mnext->next = referenced_vpn_prefix->info;
3296 				referenced_vpn_prefix->info = mnext;
3297 			}
3298 		}
3299 
3300 		/*
3301 		 * for each VPN node referenced in the ENCAP monitors:
3302 		 */
3303 		for (referenced_vpn_prefix =
3304 			     agg_route_top(referenced_vpn_table);
3305 		     referenced_vpn_prefix;
3306 		     referenced_vpn_prefix =
3307 			     agg_route_next(referenced_vpn_prefix)) {
3308 
3309 			while ((m = referenced_vpn_prefix->info)) {
3310 
3311 				struct agg_node *n;
3312 
3313 				rfapiMonitorMoveLonger(m->node);
3314 				for (n = m->node; n; n = agg_node_parent(n)) {
3315 					// rfapiDoRouteCallback(import_table, n,
3316 					// NULL);
3317 				}
3318 				rfapiMonitorItNodeChanged(import_table, m->node,
3319 							  NULL);
3320 
3321 				referenced_vpn_prefix->info = m->next;
3322 				agg_unlock_node(referenced_vpn_prefix);
3323 				XFREE(MTYPE_RFAPI_MONITOR_ENCAP, m);
3324 			}
3325 		}
3326 		agg_table_finish(referenced_vpn_table);
3327 	}
3328 
3329 	RFAPI_CHECK_REFCOUNT(rn, SAFI_ENCAP, 0);
3330 }
3331 
rfapiExpireVpnNow(struct rfapi_import_table * it,struct agg_node * rn,struct bgp_path_info * bpi,int lockoffset)3332 static void rfapiExpireVpnNow(struct rfapi_import_table *it,
3333 			      struct agg_node *rn, struct bgp_path_info *bpi,
3334 			      int lockoffset)
3335 {
3336 	struct rfapi_withdraw *wcb;
3337 	struct thread t;
3338 
3339 	/*
3340 	 * pretend we're an expiring timer
3341 	 */
3342 	wcb = XCALLOC(MTYPE_RFAPI_WITHDRAW, sizeof(struct rfapi_withdraw));
3343 	wcb->info = bpi;
3344 	wcb->node = rn;
3345 	wcb->import_table = it;
3346 	wcb->lockoffset = lockoffset;
3347 	memset(&t, 0, sizeof(t));
3348 	t.arg = wcb;
3349 	rfapiWithdrawTimerVPN(&t); /* frees wcb */
3350 }
3351 
3352 
3353 /*
3354  * import a bgp_path_info if its route target list intersects with the
3355  * import table's route target list
3356  */
rfapiBgpInfoFilteredImportVPN(struct rfapi_import_table * import_table,int action,struct peer * peer,void * rfd,const struct prefix * p,const struct prefix * aux_prefix,afi_t afi,struct prefix_rd * prd,struct attr * attr,uint8_t type,uint8_t sub_type,uint32_t * label)3357 void rfapiBgpInfoFilteredImportVPN(
3358 	struct rfapi_import_table *import_table, int action, struct peer *peer,
3359 	void *rfd, /* set for looped back routes */
3360 	const struct prefix *p,
3361 	const struct prefix *aux_prefix, /* AFI_L2VPN: optional IP */
3362 	afi_t afi, struct prefix_rd *prd,
3363 	struct attr *attr, /* part of bgp_path_info */
3364 	uint8_t type,	   /* part of bgp_path_info */
3365 	uint8_t sub_type,  /* part of bgp_path_info */
3366 	uint32_t *label)   /* part of bgp_path_info */
3367 {
3368 	struct agg_table *rt = NULL;
3369 	struct agg_node *rn;
3370 	struct agg_node *n;
3371 	struct bgp_path_info *info_new;
3372 	struct bgp_path_info *bpi;
3373 	struct bgp_path_info *next;
3374 	char buf[BUFSIZ];
3375 	struct prefix vn_prefix;
3376 	struct prefix un_prefix;
3377 	int un_prefix_valid = 0;
3378 	struct agg_node *ern;
3379 	int replacing = 0;
3380 	int original_had_routes = 0;
3381 	struct prefix original_nexthop;
3382 	const char *action_str = NULL;
3383 	int is_it_ce = 0;
3384 
3385 	struct bgp *bgp;
3386 	bgp = bgp_get_default(); /* assume 1 instance for now */
3387 
3388 	switch (action) {
3389 	case FIF_ACTION_UPDATE:
3390 		action_str = "update";
3391 		break;
3392 	case FIF_ACTION_WITHDRAW:
3393 		action_str = "withdraw";
3394 		break;
3395 	case FIF_ACTION_KILL:
3396 		action_str = "kill";
3397 		break;
3398 	default:
3399 		assert(0);
3400 		break;
3401 	}
3402 
3403 	if (import_table == bgp->rfapi->it_ce)
3404 		is_it_ce = 1;
3405 
3406 	vnc_zlog_debug_verbose("%s: entry: %s%s: prefix %s/%d: it %p, afi %s",
3407 			       __func__, (is_it_ce ? "CE-IT " : ""), action_str,
3408 			       rfapi_ntop(p->family, &p->u.prefix, buf, BUFSIZ),
3409 			       p->prefixlen, import_table, afi2str(afi));
3410 
3411 	VNC_ITRCCK;
3412 
3413 	/*
3414 	 * Compare rt lists. If no intersection, don't import this route
3415 	 * On a withdraw, peer and RD are sufficient to determine if
3416 	 * we should act.
3417 	 */
3418 	if (action == FIF_ACTION_UPDATE) {
3419 		if (!attr || !attr->ecommunity) {
3420 
3421 			vnc_zlog_debug_verbose(
3422 				"%s: attr, extra, or ecommunity missing, not importing",
3423 				__func__);
3424 			return;
3425 		}
3426 		if ((import_table != bgp->rfapi->it_ce)
3427 		    && !rfapiEcommunitiesIntersect(import_table->rt_import_list,
3428 						   attr->ecommunity)) {
3429 
3430 			vnc_zlog_debug_verbose(
3431 				"%s: it=%p: no ecommunity intersection",
3432 				__func__, import_table);
3433 			return;
3434 		}
3435 
3436 		memset(&vn_prefix, 0,
3437 		       sizeof(vn_prefix)); /* keep valgrind happy */
3438 		if (rfapiGetNexthop(attr, &vn_prefix)) {
3439 			/* missing nexthop address would be a bad, bad thing */
3440 			vnc_zlog_debug_verbose("%s: missing nexthop", __func__);
3441 			return;
3442 		}
3443 	}
3444 
3445 	/*
3446 	 * Figure out which radix tree the route would go into
3447 	 */
3448 	switch (afi) {
3449 	case AFI_IP:
3450 	case AFI_IP6:
3451 	case AFI_L2VPN:
3452 		rt = import_table->imported_vpn[afi];
3453 		break;
3454 
3455 	default:
3456 		flog_err(EC_LIB_DEVELOPMENT, "%s: bad afi %d", __func__, afi);
3457 		return;
3458 	}
3459 
3460 	/* clear it */
3461 	memset(&original_nexthop, 0, sizeof(original_nexthop));
3462 
3463 	/*
3464 	 * agg_node_lookup returns a node only if there is at least
3465 	 * one route attached.
3466 	 */
3467 	rn = agg_node_lookup(rt, p);
3468 
3469 	vnc_zlog_debug_verbose("%s: rn=%p", __func__, rn);
3470 
3471 	if (rn) {
3472 
3473 		RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 1);
3474 		agg_unlock_node(rn); /* undo lock in agg_node_lookup */
3475 
3476 		if (rn->info)
3477 			original_had_routes = 1;
3478 
3479 		if (VNC_DEBUG(VERBOSE)) {
3480 			vnc_zlog_debug_verbose("%s: showing IT node on entry",
3481 					       __func__);
3482 			rfapiShowItNode(NULL, rn); /* debug */
3483 		}
3484 
3485 		/*
3486 		 * Look for same route (will have same RD and peer)
3487 		 */
3488 		bpi = rfapiItBiIndexSearch(rn, prd, peer, aux_prefix);
3489 
3490 		if (bpi) {
3491 
3492 			/*
3493 			 * This was an old test when we iterated over the
3494 			 * BPIs linearly. Since we're now looking up with
3495 			 * RD and peer, comparing types should not be
3496 			 * needed. Changed to assertion.
3497 			 *
3498 			 * Compare types. Doing so prevents a RFP-originated
3499 			 * route from matching an imported route, for example.
3500 			 */
3501 			if (VNC_DEBUG(VERBOSE) && bpi->type != type)
3502 				/* should be handled by RDs, but warn for now */
3503 				zlog_warn("%s: type mismatch! (bpi=%d, arg=%d)",
3504 					  __func__, bpi->type, type);
3505 
3506 			vnc_zlog_debug_verbose("%s: found matching bpi",
3507 					       __func__);
3508 
3509 			/*
3510 			 * In the special CE table, withdrawals occur without
3511 			 * holddown
3512 			 */
3513 			if (import_table == bgp->rfapi->it_ce) {
3514 				vnc_direct_bgp_del_route_ce(bgp, rn, bpi);
3515 				if (action == FIF_ACTION_WITHDRAW)
3516 					action = FIF_ACTION_KILL;
3517 			}
3518 
3519 			if (action == FIF_ACTION_WITHDRAW) {
3520 
3521 				int washolddown = CHECK_FLAG(bpi->flags,
3522 							     BGP_PATH_REMOVED);
3523 
3524 				vnc_zlog_debug_verbose(
3525 					"%s: withdrawing at prefix %pRN%s",
3526 					__func__, rn,
3527 					(washolddown
3528 						 ? " (already being withdrawn)"
3529 						 : ""));
3530 
3531 				VNC_ITRCCK;
3532 				if (!washolddown) {
3533 					rfapiBiStartWithdrawTimer(
3534 						import_table, rn, bpi, afi,
3535 						SAFI_MPLS_VPN,
3536 						rfapiWithdrawTimerVPN);
3537 
3538 					RFAPI_UPDATE_ITABLE_COUNT(
3539 						bpi, import_table, afi, -1);
3540 					import_table->holddown_count[afi] += 1;
3541 				}
3542 				VNC_ITRCCK;
3543 			} else {
3544 				vnc_zlog_debug_verbose(
3545 					"%s: %s at prefix %pRN", __func__,
3546 					((action == FIF_ACTION_KILL)
3547 						 ? "killing"
3548 						 : "replacing"),
3549 					rn);
3550 
3551 				/*
3552 				 * If this route is waiting to be deleted
3553 				 * because of
3554 				 * a previous withdraw, we must cancel its
3555 				 * timer.
3556 				 */
3557 				if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)
3558 				    && bpi->extra->vnc.import.timer) {
3559 
3560 					struct thread *t =
3561 						(struct thread *)bpi->extra->vnc
3562 							.import.timer;
3563 					struct rfapi_withdraw *wcb = t->arg;
3564 
3565 					XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
3566 					thread_cancel(t);
3567 
3568 					import_table->holddown_count[afi] -= 1;
3569 					RFAPI_UPDATE_ITABLE_COUNT(
3570 						bpi, import_table, afi, 1);
3571 				}
3572 				/*
3573 				 * decrement remote count (if route is remote)
3574 				 * because
3575 				 * we are going to remove it below
3576 				 */
3577 				RFAPI_UPDATE_ITABLE_COUNT(bpi, import_table,
3578 							  afi, -1);
3579 				if (action == FIF_ACTION_UPDATE) {
3580 					replacing = 1;
3581 
3582 					/*
3583 					 * make copy of original nexthop so we
3584 					 * can see if it changed
3585 					 */
3586 					rfapiGetNexthop(bpi->attr,
3587 							&original_nexthop);
3588 
3589 					/*
3590 					 * remove bpi without doing any export
3591 					 * processing
3592 					 */
3593 					if (CHECK_FLAG(bpi->flags,
3594 						       BGP_PATH_VALID)
3595 					    && VALID_INTERIOR_TYPE(bpi->type))
3596 						RFAPI_MONITOR_EXTERIOR(rn)
3597 							->valid_interior_count--;
3598 					rfapiItBiIndexDel(rn, bpi);
3599 					rfapiBgpInfoDetach(rn, bpi);
3600 					rfapiMonitorEncapDelete(bpi);
3601 					vnc_import_bgp_exterior_del_route_interior(
3602 						bgp, import_table, rn, bpi);
3603 					rfapiBgpInfoFree(bpi);
3604 				} else {
3605 					/* Kill */
3606 					/*
3607 					 * remove bpi and do export processing
3608 					 */
3609 					import_table->holddown_count[afi] += 1;
3610 					rfapiExpireVpnNow(import_table, rn, bpi,
3611 							  0);
3612 				}
3613 			}
3614 		}
3615 	}
3616 
3617 	if (rn)
3618 		RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, replacing ? 1 : 0);
3619 
3620 	if (action == FIF_ACTION_WITHDRAW || action == FIF_ACTION_KILL) {
3621 		VNC_ITRCCK;
3622 		return;
3623 	}
3624 
3625 	info_new =
3626 		rfapiBgpInfoCreate(attr, peer, rfd, prd, type, sub_type, label);
3627 
3628 	/*
3629 	 * lookup un address in encap table
3630 	 */
3631 	ern = agg_node_match(import_table->imported_encap[afi], &vn_prefix);
3632 	if (ern) {
3633 		rfapiCopyUnEncap2VPN(ern->info, info_new);
3634 		agg_unlock_node(ern); /* undo lock in route_note_match */
3635 	} else {
3636 		char bpf[PREFIX_STRLEN];
3637 
3638 		prefix2str(&vn_prefix, bpf, sizeof(bpf));
3639 		/* Not a big deal, just means VPN route got here first */
3640 		vnc_zlog_debug_verbose("%s: no encap route for vn addr %s",
3641 				       __func__, bpf);
3642 		info_new->extra->vnc.import.un_family = 0;
3643 	}
3644 
3645 	if (rn) {
3646 		if (!replacing)
3647 			agg_lock_node(rn);
3648 	} else {
3649 		/*
3650 		 * No need to increment reference count, so only "get"
3651 		 * if the node is not there already
3652 		 */
3653 		rn = agg_node_get(rt, p);
3654 	}
3655 
3656 	/*
3657 	 * For ethernet routes, if there is an accompanying IP address,
3658 	 * save it in the bpi
3659 	 */
3660 	if ((AFI_L2VPN == afi) && aux_prefix) {
3661 
3662 		vnc_zlog_debug_verbose("%s: setting BPI's aux_prefix",
3663 				       __func__);
3664 		info_new->extra->vnc.import.aux_prefix = *aux_prefix;
3665 	}
3666 
3667 	vnc_zlog_debug_verbose("%s: inserting bpi %p at prefix %pRN #%d",
3668 			       __func__, info_new, rn, rn->lock);
3669 
3670 	rfapiBgpInfoAttachSorted(rn, info_new, afi, SAFI_MPLS_VPN);
3671 	rfapiItBiIndexAdd(rn, info_new);
3672 	if (!rfapiGetUnAddrOfVpnBi(info_new, NULL)) {
3673 		if (VALID_INTERIOR_TYPE(info_new->type))
3674 			RFAPI_MONITOR_EXTERIOR(rn)->valid_interior_count++;
3675 		SET_FLAG(info_new->flags, BGP_PATH_VALID);
3676 	}
3677 	RFAPI_UPDATE_ITABLE_COUNT(info_new, import_table, afi, 1);
3678 	vnc_import_bgp_exterior_add_route_interior(bgp, import_table, rn,
3679 						   info_new);
3680 
3681 	if (import_table == bgp->rfapi->it_ce)
3682 		vnc_direct_bgp_add_route_ce(bgp, rn, info_new);
3683 
3684 	if (VNC_DEBUG(VERBOSE)) {
3685 		vnc_zlog_debug_verbose("%s: showing IT node", __func__);
3686 		rfapiShowItNode(NULL, rn); /* debug */
3687 	}
3688 
3689 	rfapiMonitorEncapAdd(import_table, &vn_prefix, rn, info_new);
3690 
3691 	if (!rfapiGetUnAddrOfVpnBi(info_new, &un_prefix)) {
3692 
3693 		/*
3694 		 * if we have a valid UN address (either via Encap route
3695 		 * or via tunnel attribute), then we should attempt
3696 		 * to move any monitors at less-specific nodes to this node
3697 		 */
3698 		rfapiMonitorMoveLonger(rn);
3699 
3700 		un_prefix_valid = 1;
3701 	}
3702 
3703 	/*
3704 	 * 101129 Enhancement: if we add a route (implication: it is not
3705 	 * in holddown), delete all other routes from this nve at this
3706 	 * node that are in holddown, regardless of peer.
3707 	 *
3708 	 * Reasons it's OK to do that:
3709 	 *
3710 	 * - if the holddown route being deleted originally came from BGP VPN,
3711 	 *   it is already gone from BGP (implication of holddown), so there
3712 	 *   won't be any added inconsistency with the BGP RIB.
3713 	 *
3714 	 * - once a fresh route is added at a prefix, any routes in holddown
3715 	 *   at that prefix will not show up in RFP responses, so deleting
3716 	 *   the holddown routes won't affect the contents of responses.
3717 	 *
3718 	 * - lifetimes are supposed to be consistent, so there should not
3719 	 *   be a case where the fresh route has a shorter lifetime than
3720 	 *   the holddown route, so we don't expect the fresh route to
3721 	 *   disappear and complete its holddown time before the existing
3722 	 *   holddown routes time out. Therefore, we won't have a situation
3723 	 *   where we expect the existing holddown routes to be hidden and
3724 	 *   then  to reappear sometime later (as holddown routes) in a
3725 	 *   RFP response.
3726 	 *
3727 	 * Among other things, this would enable us to skirt the problem
3728 	 * of local holddown routes that refer to NVE descriptors that
3729 	 * have already been closed (if the same NVE triggers a subsequent
3730 	 * rfapi_open(), the new peer is different and doesn't match the
3731 	 * peer of the holddown route, so the stale holddown route still
3732 	 * hangs around until it times out instead of just being replaced
3733 	 * by the fresh route).
3734 	 */
3735 	/*
3736 	 * We know that the new bpi will have been inserted before any routes
3737 	 * in holddown, so we can skip any that came before it
3738 	 */
3739 	for (bpi = info_new->next; bpi; bpi = next) {
3740 
3741 		struct prefix pfx_vn;
3742 		struct prefix pfx_un;
3743 		int un_match = 0;
3744 		int remote_peer_match = 0;
3745 
3746 		next = bpi->next;
3747 
3748 		/*
3749 		 * Must be holddown
3750 		 */
3751 		if (!CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED))
3752 			continue;
3753 
3754 		/*
3755 		 * Must match VN address (nexthop of VPN route)
3756 		 */
3757 		if (rfapiGetNexthop(bpi->attr, &pfx_vn))
3758 			continue;
3759 		if (!prefix_same(&pfx_vn, &vn_prefix))
3760 			continue;
3761 
3762 		if (un_prefix_valid && /* new route UN addr */
3763 		    !rfapiGetUnAddrOfVpnBi(bpi, &pfx_un)
3764 		    &&					/* old route UN addr */
3765 		    prefix_same(&pfx_un, &un_prefix)) { /* compare */
3766 			un_match = 1;
3767 		}
3768 		if (!RFAPI_LOCAL_BI(bpi) && !RFAPI_LOCAL_BI(info_new)
3769 		    && sockunion_same(&bpi->peer->su, &info_new->peer->su)) {
3770 			/* old & new are both remote, same peer */
3771 			remote_peer_match = 1;
3772 		}
3773 
3774 		if (!un_match & !remote_peer_match)
3775 			continue;
3776 
3777 		vnc_zlog_debug_verbose(
3778 			"%s: removing holddown bpi matching NVE of new route",
3779 			__func__);
3780 		if (bpi->extra->vnc.import.timer) {
3781 			struct thread *t =
3782 				(struct thread *)bpi->extra->vnc.import.timer;
3783 			struct rfapi_withdraw *wcb = t->arg;
3784 
3785 			XFREE(MTYPE_RFAPI_WITHDRAW, wcb);
3786 			thread_cancel(t);
3787 		}
3788 		rfapiExpireVpnNow(import_table, rn, bpi, 0);
3789 	}
3790 
3791 	if (!original_had_routes) {
3792 		/*
3793 		 * We went from 0 usable routes to 1 usable route. Perform the
3794 		 * "Adding a Route" export process.
3795 		 */
3796 		vnc_direct_bgp_add_prefix(bgp, import_table, rn);
3797 		vnc_zebra_add_prefix(bgp, import_table, rn);
3798 	} else {
3799 		/*
3800 		 * Check for nexthop change event
3801 		 * Note: the prefix_same() test below detects two situations:
3802 		 * 1. route is replaced, new route has different nexthop
3803 		 * 2. new route is added (original_nexthop is 0)
3804 		 */
3805 		struct prefix new_nexthop;
3806 
3807 		rfapiGetNexthop(attr, &new_nexthop);
3808 		if (!prefix_same(&original_nexthop, &new_nexthop)) {
3809 			/*
3810 			 * nexthop change event
3811 			 * vnc_direct_bgp_add_prefix() will recompute VN addr
3812 			 * ecommunity
3813 			 */
3814 			vnc_direct_bgp_add_prefix(bgp, import_table, rn);
3815 		}
3816 	}
3817 
3818 	if (!(bgp->rfapi_cfg->flags & BGP_VNC_CONFIG_CALLBACK_DISABLE)) {
3819 		for (n = rn; n; n = agg_node_parent(n)) {
3820 			// rfapiDoRouteCallback(import_table, n, NULL);
3821 		}
3822 		rfapiMonitorItNodeChanged(import_table, rn, NULL);
3823 	}
3824 	RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 0);
3825 	VNC_ITRCCK;
3826 }
3827 
rfapiBgpInfoFilteredImportBadSafi(struct rfapi_import_table * import_table,int action,struct peer * peer,void * rfd,const struct prefix * p,const struct prefix * aux_prefix,afi_t afi,struct prefix_rd * prd,struct attr * attr,uint8_t type,uint8_t sub_type,uint32_t * label)3828 static void rfapiBgpInfoFilteredImportBadSafi(
3829 	struct rfapi_import_table *import_table, int action, struct peer *peer,
3830 	void *rfd, /* set for looped back routes */
3831 	const struct prefix *p,
3832 	const struct prefix *aux_prefix, /* AFI_L2VPN: optional IP */
3833 	afi_t afi, struct prefix_rd *prd,
3834 	struct attr *attr, /* part of bgp_path_info */
3835 	uint8_t type,	   /* part of bgp_path_info */
3836 	uint8_t sub_type,  /* part of bgp_path_info */
3837 	uint32_t *label)   /* part of bgp_path_info */
3838 {
3839 	vnc_zlog_debug_verbose("%s: Error, bad safi", __func__);
3840 }
3841 
3842 static rfapi_bi_filtered_import_f *
rfapiBgpInfoFilteredImportFunction(safi_t safi)3843 rfapiBgpInfoFilteredImportFunction(safi_t safi)
3844 {
3845 	switch (safi) {
3846 	case SAFI_MPLS_VPN:
3847 		return rfapiBgpInfoFilteredImportVPN;
3848 
3849 	case SAFI_ENCAP:
3850 		return rfapiBgpInfoFilteredImportEncap;
3851 
3852 	default:
3853 		/* not expected */
3854 		flog_err(EC_LIB_DEVELOPMENT, "%s: bad safi %d", __func__, safi);
3855 		return rfapiBgpInfoFilteredImportBadSafi;
3856 	}
3857 }
3858 
rfapiProcessUpdate(struct peer * peer,void * rfd,const struct prefix * p,struct prefix_rd * prd,struct attr * attr,afi_t afi,safi_t safi,uint8_t type,uint8_t sub_type,uint32_t * label)3859 void rfapiProcessUpdate(struct peer *peer,
3860 			void *rfd, /* set when looped from RFP/RFAPI */
3861 			const struct prefix *p, struct prefix_rd *prd,
3862 			struct attr *attr, afi_t afi, safi_t safi, uint8_t type,
3863 			uint8_t sub_type, uint32_t *label)
3864 {
3865 	struct bgp *bgp;
3866 	struct rfapi *h;
3867 	struct rfapi_import_table *it;
3868 	int has_ip_route = 1;
3869 	uint32_t lni = 0;
3870 
3871 	bgp = bgp_get_default(); /* assume 1 instance for now */
3872 	assert(bgp);
3873 
3874 	h = bgp->rfapi;
3875 	assert(h);
3876 
3877 	/*
3878 	 * look at high-order byte of RD. FF means MAC
3879 	 * address is present (VNC L2VPN)
3880 	 */
3881 	if ((safi == SAFI_MPLS_VPN)
3882 	    && (decode_rd_type(prd->val) == RD_TYPE_VNC_ETH)) {
3883 		struct prefix pfx_mac_buf;
3884 		struct prefix pfx_nexthop_buf;
3885 		int rc;
3886 
3887 		/*
3888 		 * Set flag if prefix and nexthop are the same - don't
3889 		 * add the route to normal IP-based import tables
3890 		 */
3891 		if (!rfapiGetNexthop(attr, &pfx_nexthop_buf)) {
3892 			if (!prefix_cmp(&pfx_nexthop_buf, p)) {
3893 				has_ip_route = 0;
3894 			}
3895 		}
3896 
3897 		memset(&pfx_mac_buf, 0, sizeof(pfx_mac_buf));
3898 		pfx_mac_buf.family = AF_ETHERNET;
3899 		pfx_mac_buf.prefixlen = 48;
3900 		memcpy(&pfx_mac_buf.u.prefix_eth.octet, prd->val + 2, 6);
3901 
3902 		/*
3903 		 * Find rt containing LNI (Logical Network ID), which
3904 		 * _should_ always be present when mac address is present
3905 		 */
3906 		rc = rfapiEcommunityGetLNI(attr->ecommunity, &lni);
3907 
3908 		vnc_zlog_debug_verbose(
3909 			"%s: rfapiEcommunityGetLNI returned %d, lni=%d, attr=%p",
3910 			__func__, rc, lni, attr);
3911 		if (!rc) {
3912 			it = rfapiMacImportTableGet(bgp, lni);
3913 
3914 			rfapiBgpInfoFilteredImportVPN(
3915 				it, FIF_ACTION_UPDATE, peer, rfd,
3916 				&pfx_mac_buf, /* prefix */
3917 				p,	    /* aux prefix: IP addr */
3918 				AFI_L2VPN, prd, attr, type, sub_type, label);
3919 		}
3920 	}
3921 
3922 	if (!has_ip_route)
3923 		return;
3924 
3925 	/*
3926 	 * Iterate over all import tables; do a filtered import
3927 	 * for the afi/safi combination
3928 	 */
3929 	for (it = h->imports; it; it = it->next) {
3930 		(*rfapiBgpInfoFilteredImportFunction(safi))(
3931 			it, FIF_ACTION_UPDATE, peer, rfd, p, /* prefix */
3932 			NULL, afi, prd, attr, type, sub_type, label);
3933 	}
3934 
3935 	if (safi == SAFI_MPLS_VPN) {
3936 		vnc_direct_bgp_rh_add_route(bgp, afi, p, peer, attr);
3937 		rfapiBgpInfoFilteredImportVPN(
3938 			bgp->rfapi->it_ce, FIF_ACTION_UPDATE, peer, rfd,
3939 			p, /* prefix */
3940 			NULL, afi, prd, attr, type, sub_type, label);
3941 	}
3942 }
3943 
3944 
rfapiProcessWithdraw(struct peer * peer,void * rfd,const struct prefix * p,struct prefix_rd * prd,struct attr * attr,afi_t afi,safi_t safi,uint8_t type,int kill)3945 void rfapiProcessWithdraw(struct peer *peer, void *rfd, const struct prefix *p,
3946 			  struct prefix_rd *prd, struct attr *attr, afi_t afi,
3947 			  safi_t safi, uint8_t type, int kill)
3948 {
3949 	struct bgp *bgp;
3950 	struct rfapi *h;
3951 	struct rfapi_import_table *it;
3952 
3953 	bgp = bgp_get_default(); /* assume 1 instance for now */
3954 	assert(bgp);
3955 
3956 	h = bgp->rfapi;
3957 	assert(h);
3958 
3959 	/*
3960 	 * look at high-order byte of RD. FF means MAC
3961 	 * address is present (VNC L2VPN)
3962 	 */
3963 	if (h->import_mac != NULL && safi == SAFI_MPLS_VPN
3964 	    && decode_rd_type(prd->val) == RD_TYPE_VNC_ETH) {
3965 		struct prefix pfx_mac_buf;
3966 		void *cursor = NULL;
3967 		int rc;
3968 
3969 		memset(&pfx_mac_buf, 0, sizeof(pfx_mac_buf));
3970 		pfx_mac_buf.family = AF_ETHERNET;
3971 		pfx_mac_buf.prefixlen = 48;
3972 		memcpy(&pfx_mac_buf.u.prefix_eth, prd->val + 2, 6);
3973 
3974 		/*
3975 		 * withdraw does not contain attrs, so we don't have
3976 		 * access to the route's LNI, which would ordinarily
3977 		 * select the specific mac-based import table. Instead,
3978 		 * we must iterate over all mac-based tables and rely
3979 		 * on the RD to match.
3980 		 *
3981 		 * If this approach is too slow, add an index where
3982 		 * key is {RD, peer} and value is the import table
3983 		 */
3984 		for (rc = skiplist_next(h->import_mac, NULL, (void **)&it,
3985 					&cursor);
3986 		     rc == 0; rc = skiplist_next(h->import_mac, NULL,
3987 						 (void **)&it, &cursor)) {
3988 
3989 #ifdef DEBUG_L2_EXTRA
3990 			vnc_zlog_debug_verbose(
3991 				"%s: calling rfapiBgpInfoFilteredImportVPN(it=%p, afi=AFI_L2VPN)",
3992 				__func__, it);
3993 #endif
3994 
3995 			rfapiBgpInfoFilteredImportVPN(
3996 				it,
3997 				(kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW),
3998 				peer, rfd, &pfx_mac_buf, /* prefix */
3999 				p,			 /* aux_prefix: IP */
4000 				AFI_L2VPN, prd, attr, type, 0,
4001 				NULL); /* sub_type & label unused for withdraw
4002 					  */
4003 		}
4004 	}
4005 
4006 	/*
4007 	 * XXX For the case where the withdraw involves an L2
4008 	 * route with no IP information, we rely on the lack
4009 	 * of RT-list intersection to filter out the withdraw
4010 	 * from the IP-based import tables below
4011 	 */
4012 
4013 	/*
4014 	 * Iterate over all import tables; do a filtered import
4015 	 * for the afi/safi combination
4016 	 */
4017 
4018 	for (it = h->imports; it; it = it->next) {
4019 		(*rfapiBgpInfoFilteredImportFunction(safi))(
4020 			it, (kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW),
4021 			peer, rfd, p, /* prefix */
4022 			NULL, afi, prd, attr, type, 0,
4023 			NULL); /* sub_type & label unused for withdraw */
4024 	}
4025 
4026 	/* TBD the deletion should happen after the lifetime expires */
4027 	if (safi == SAFI_MPLS_VPN)
4028 		vnc_direct_bgp_rh_del_route(bgp, afi, p, peer);
4029 
4030 	if (safi == SAFI_MPLS_VPN) {
4031 		rfapiBgpInfoFilteredImportVPN(
4032 			bgp->rfapi->it_ce,
4033 			(kill ? FIF_ACTION_KILL : FIF_ACTION_WITHDRAW), peer,
4034 			rfd, p, /* prefix */
4035 			NULL, afi, prd, attr, type, 0,
4036 			NULL); /* sub_type & label unused for withdraw */
4037 	}
4038 }
4039 
4040 /*
4041  * TBD optimized withdraw timer algorithm for case of many
4042  * routes expiring at the same time due to peer drop.
4043  */
4044 /*
4045  * 1. Visit all BPIs in all ENCAP import tables.
4046  *
4047  *    a. If a bpi's peer is the failed peer, remove the bpi.
4048  *	  b. If the removed ENCAP bpi was first in the list of
4049  *       BPIs at this ENCAP node, loop over all monitors
4050  *       at this node:
4051  *
4052  *       (1) for each ENCAP monitor, loop over all its
4053  *           VPN node monitors and set their RFAPI_MON_FLAG_NEEDCALLBACK
4054  *           flags.
4055  *
4056  * 2. Visit all BPIs in all VPN import tables.
4057  *    a. If a bpi's peer is the failed peer, remove the bpi.
4058  *    b. loop over all the VPN node monitors and set their
4059  *       RFAPI_MON_FLAG_NEEDCALLBACK flags
4060  *    c. If there are no BPIs left at this VPN node,
4061  *
4062  */
4063 
4064 
4065 /* surprise, this gets called from peer_delete(), from rfapi_close() */
rfapiProcessPeerDownRt(struct peer * peer,struct rfapi_import_table * import_table,afi_t afi,safi_t safi)4066 static void rfapiProcessPeerDownRt(struct peer *peer,
4067 				   struct rfapi_import_table *import_table,
4068 				   afi_t afi, safi_t safi)
4069 {
4070 	struct agg_node *rn;
4071 	struct bgp_path_info *bpi;
4072 	struct agg_table *rt;
4073 	int (*timer_service_func)(struct thread *);
4074 
4075 	assert(afi == AFI_IP || afi == AFI_IP6);
4076 
4077 	VNC_ITRCCK;
4078 
4079 	switch (safi) {
4080 	case SAFI_MPLS_VPN:
4081 		rt = import_table->imported_vpn[afi];
4082 		timer_service_func = rfapiWithdrawTimerVPN;
4083 		break;
4084 	case SAFI_ENCAP:
4085 		rt = import_table->imported_encap[afi];
4086 		timer_service_func = rfapiWithdrawTimerEncap;
4087 		break;
4088 	default:
4089 		/* Suppress uninitialized variable warning */
4090 		rt = NULL;
4091 		timer_service_func = NULL;
4092 		assert(0);
4093 	}
4094 
4095 
4096 	for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
4097 		for (bpi = rn->info; bpi; bpi = bpi->next) {
4098 			if (bpi->peer == peer) {
4099 
4100 				if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
4101 					/* already in holddown, skip */
4102 					continue;
4103 				}
4104 
4105 				if (safi == SAFI_MPLS_VPN) {
4106 					RFAPI_UPDATE_ITABLE_COUNT(
4107 						bpi, import_table, afi, -1);
4108 					import_table->holddown_count[afi] += 1;
4109 				}
4110 				rfapiBiStartWithdrawTimer(import_table, rn, bpi,
4111 							  afi, safi,
4112 							  timer_service_func);
4113 			}
4114 		}
4115 	}
4116 	VNC_ITRCCK;
4117 }
4118 
4119 /*
4120  * This gets called when a peer connection drops. We have to remove
4121  * all the routes from this peer.
4122  *
4123  * Current approach is crude. TBD Optimize by setting fewer timers and
4124  * grouping withdrawn routes so we can generate callbacks more
4125  * efficiently.
4126  */
rfapiProcessPeerDown(struct peer * peer)4127 void rfapiProcessPeerDown(struct peer *peer)
4128 {
4129 	struct bgp *bgp;
4130 	struct rfapi *h;
4131 	struct rfapi_import_table *it;
4132 
4133 	/*
4134 	 * If this peer is a "dummy" peer structure atached to a RFAPI
4135 	 * nve_descriptor, we don't need to walk the import tables
4136 	 * because the routes are already withdrawn by rfapi_close()
4137 	 */
4138 	if (CHECK_FLAG(peer->flags, PEER_FLAG_IS_RFAPI_HD))
4139 		return;
4140 
4141 	/*
4142 	 * 1. Visit all BPIs in all ENCAP import tables.
4143 	 *    Start withdraw timer on the BPIs that match peer.
4144 	 *
4145 	 * 2. Visit All BPIs in all VPN import tables.
4146 	 *    Start withdraw timer on the BPIs that match peer.
4147 	 */
4148 
4149 	bgp = bgp_get_default(); /* assume 1 instance for now */
4150 	if (!bgp)
4151 		return;
4152 
4153 	h = bgp->rfapi;
4154 	assert(h);
4155 
4156 	for (it = h->imports; it; it = it->next) {
4157 		rfapiProcessPeerDownRt(peer, it, AFI_IP, SAFI_ENCAP);
4158 		rfapiProcessPeerDownRt(peer, it, AFI_IP6, SAFI_ENCAP);
4159 		rfapiProcessPeerDownRt(peer, it, AFI_IP, SAFI_MPLS_VPN);
4160 		rfapiProcessPeerDownRt(peer, it, AFI_IP6, SAFI_MPLS_VPN);
4161 	}
4162 
4163 	if (h->it_ce) {
4164 		rfapiProcessPeerDownRt(peer, h->it_ce, AFI_IP, SAFI_MPLS_VPN);
4165 		rfapiProcessPeerDownRt(peer, h->it_ce, AFI_IP6, SAFI_MPLS_VPN);
4166 	}
4167 }
4168 
4169 /*
4170  * Import an entire RIB (for an afi/safi) to an import table RIB,
4171  * filtered according to the import table's RT list
4172  *
4173  * TBD: does this function need additions to match rfapiProcessUpdate()
4174  * for, e.g., L2 handling?
4175  */
rfapiBgpTableFilteredImport(struct bgp * bgp,struct rfapi_import_table * it,afi_t afi,safi_t safi)4176 static void rfapiBgpTableFilteredImport(struct bgp *bgp,
4177 					struct rfapi_import_table *it,
4178 					afi_t afi, safi_t safi)
4179 {
4180 	struct bgp_dest *dest1;
4181 	struct bgp_dest *dest2;
4182 
4183 	/* Only these SAFIs have 2-level RIBS */
4184 	assert(safi == SAFI_MPLS_VPN || safi == SAFI_ENCAP);
4185 
4186 	/*
4187 	 * Now visit all the rd nodes and the nodes of all the
4188 	 * route tables attached to them, and import the routes
4189 	 * if they have matching route targets
4190 	 */
4191 	for (dest1 = bgp_table_top(bgp->rib[afi][safi]); dest1;
4192 	     dest1 = bgp_route_next(dest1)) {
4193 
4194 		if (bgp_dest_has_bgp_path_info_data(dest1)) {
4195 
4196 			for (dest2 = bgp_table_top(
4197 				     bgp_dest_get_bgp_table_info(dest1));
4198 			     dest2; dest2 = bgp_route_next(dest2)) {
4199 
4200 				struct bgp_path_info *bpi;
4201 
4202 				for (bpi = bgp_dest_get_bgp_path_info(dest2);
4203 				     bpi; bpi = bpi->next) {
4204 					uint32_t label = 0;
4205 
4206 					if (CHECK_FLAG(bpi->flags,
4207 						       BGP_PATH_REMOVED))
4208 						continue;
4209 
4210 					if (bpi->extra)
4211 						label = decode_label(
4212 							&bpi->extra->label[0]);
4213 					(*rfapiBgpInfoFilteredImportFunction(
4214 						safi))(
4215 						it, /* which import table */
4216 						FIF_ACTION_UPDATE, bpi->peer,
4217 						NULL,
4218 						bgp_dest_get_prefix(dest2),
4219 						NULL, afi,
4220 						(struct prefix_rd *)
4221 							bgp_dest_get_prefix(
4222 								dest1),
4223 						bpi->attr, bpi->type,
4224 						bpi->sub_type, &label);
4225 				}
4226 			}
4227 		}
4228 	}
4229 }
4230 
4231 
4232 /* per-bgp-instance rfapi data */
bgp_rfapi_new(struct bgp * bgp)4233 struct rfapi *bgp_rfapi_new(struct bgp *bgp)
4234 {
4235 	struct rfapi *h;
4236 	afi_t afi;
4237 	struct rfapi_rfp_cfg *cfg = NULL;
4238 	struct rfapi_rfp_cb_methods *cbm = NULL;
4239 
4240 	assert(bgp->rfapi_cfg == NULL);
4241 
4242 	h = XCALLOC(MTYPE_RFAPI, sizeof(struct rfapi));
4243 
4244 	for (afi = AFI_IP; afi < AFI_MAX; afi++) {
4245 		h->un[afi] = agg_table_init();
4246 	}
4247 
4248 	/*
4249 	 * initialize the ce import table
4250 	 */
4251 	h->it_ce = XCALLOC(MTYPE_RFAPI_IMPORTTABLE,
4252 			   sizeof(struct rfapi_import_table));
4253 	h->it_ce->imported_vpn[AFI_IP] = agg_table_init();
4254 	h->it_ce->imported_vpn[AFI_IP6] = agg_table_init();
4255 	h->it_ce->imported_encap[AFI_IP] = agg_table_init();
4256 	h->it_ce->imported_encap[AFI_IP6] = agg_table_init();
4257 	rfapiBgpTableFilteredImport(bgp, h->it_ce, AFI_IP, SAFI_MPLS_VPN);
4258 	rfapiBgpTableFilteredImport(bgp, h->it_ce, AFI_IP6, SAFI_MPLS_VPN);
4259 
4260 	/*
4261 	 * Set up work queue for deferred rfapi_close operations
4262 	 */
4263 	h->deferred_close_q =
4264 		work_queue_new(bm->master, "rfapi deferred close");
4265 	h->deferred_close_q->spec.workfunc = rfapi_deferred_close_workfunc;
4266 	h->deferred_close_q->spec.data = h;
4267 
4268 	h->rfp = rfp_start(bm->master, &cfg, &cbm);
4269 	bgp->rfapi_cfg = bgp_rfapi_cfg_new(cfg);
4270 	if (cbm != NULL) {
4271 		h->rfp_methods = *cbm;
4272 	}
4273 	return h;
4274 }
4275 
bgp_rfapi_destroy(struct bgp * bgp,struct rfapi * h)4276 void bgp_rfapi_destroy(struct bgp *bgp, struct rfapi *h)
4277 {
4278 	afi_t afi;
4279 
4280 	if (bgp == NULL || h == NULL)
4281 		return;
4282 
4283 	if (h->resolve_nve_nexthop) {
4284 		skiplist_free(h->resolve_nve_nexthop);
4285 		h->resolve_nve_nexthop = NULL;
4286 	}
4287 
4288 	agg_table_finish(h->it_ce->imported_vpn[AFI_IP]);
4289 	agg_table_finish(h->it_ce->imported_vpn[AFI_IP6]);
4290 	agg_table_finish(h->it_ce->imported_encap[AFI_IP]);
4291 	agg_table_finish(h->it_ce->imported_encap[AFI_IP6]);
4292 
4293 	if (h->import_mac) {
4294 		struct rfapi_import_table *it;
4295 		void *cursor;
4296 		int rc;
4297 
4298 		for (cursor = NULL,
4299 		    rc = skiplist_next(h->import_mac, NULL, (void **)&it,
4300 				       &cursor);
4301 		     !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it,
4302 					     &cursor)) {
4303 
4304 			rfapiImportTableFlush(it);
4305 			XFREE(MTYPE_RFAPI_IMPORTTABLE, it);
4306 		}
4307 		skiplist_free(h->import_mac);
4308 		h->import_mac = NULL;
4309 	}
4310 
4311 	work_queue_free_and_null(&h->deferred_close_q);
4312 
4313 	if (h->rfp != NULL)
4314 		rfp_stop(h->rfp);
4315 
4316 	for (afi = AFI_IP; afi < AFI_MAX; afi++) {
4317 		agg_table_finish(h->un[afi]);
4318 	}
4319 
4320 	XFREE(MTYPE_RFAPI_IMPORTTABLE, h->it_ce);
4321 	XFREE(MTYPE_RFAPI, h);
4322 }
4323 
4324 struct rfapi_import_table *
rfapiImportTableRefAdd(struct bgp * bgp,struct ecommunity * rt_import_list,struct rfapi_nve_group_cfg * rfg)4325 rfapiImportTableRefAdd(struct bgp *bgp, struct ecommunity *rt_import_list,
4326 		       struct rfapi_nve_group_cfg *rfg)
4327 {
4328 	struct rfapi *h;
4329 	struct rfapi_import_table *it;
4330 	afi_t afi;
4331 
4332 	h = bgp->rfapi;
4333 	assert(h);
4334 
4335 	for (it = h->imports; it; it = it->next) {
4336 		if (ecommunity_cmp(it->rt_import_list, rt_import_list))
4337 			break;
4338 	}
4339 
4340 	vnc_zlog_debug_verbose("%s: matched it=%p", __func__, it);
4341 
4342 	if (!it) {
4343 		it = XCALLOC(MTYPE_RFAPI_IMPORTTABLE,
4344 			     sizeof(struct rfapi_import_table));
4345 		assert(it);
4346 		it->next = h->imports;
4347 		h->imports = it;
4348 
4349 		it->rt_import_list = ecommunity_dup(rt_import_list);
4350 		it->rfg = rfg;
4351 		it->monitor_exterior_orphans =
4352 			skiplist_new(0, NULL, prefix_free_lists);
4353 
4354 		/*
4355 		 * fill import route tables from RIBs
4356 		 *
4357 		 * Potential area for optimization. If this occurs when
4358 		 * tables are large (e.g., the operator adds a nve group
4359 		 * with a new RT list to a running system), it could take
4360 		 * a while.
4361 		 *
4362 		 */
4363 		for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
4364 
4365 			it->imported_vpn[afi] = agg_table_init();
4366 			it->imported_encap[afi] = agg_table_init();
4367 
4368 			rfapiBgpTableFilteredImport(bgp, it, afi,
4369 						    SAFI_MPLS_VPN);
4370 			rfapiBgpTableFilteredImport(bgp, it, afi, SAFI_ENCAP);
4371 
4372 			vnc_import_bgp_exterior_redist_enable_it(bgp, afi, it);
4373 		}
4374 	}
4375 
4376 	it->refcount += 1;
4377 
4378 	return it;
4379 }
4380 
4381 /*
4382  * skiplist element free function
4383  */
delete_rem_pfx_na_free(void * na)4384 static void delete_rem_pfx_na_free(void *na)
4385 {
4386 	uint32_t *pCounter = ((struct rfapi_nve_addr *)na)->info;
4387 
4388 	*pCounter += 1;
4389 	XFREE(MTYPE_RFAPI_NVE_ADDR, na);
4390 }
4391 
4392 /*
4393  * Common deleter for IP and MAC import tables
4394  */
rfapiDeleteRemotePrefixesIt(struct bgp * bgp,struct rfapi_import_table * it,struct prefix * un,struct prefix * vn,struct prefix * p,int delete_active,int delete_holddown,uint32_t * pARcount,uint32_t * pAHcount,uint32_t * pHRcount,uint32_t * pHHcount,struct skiplist * uniq_active_nves,struct skiplist * uniq_holddown_nves)4395 static void rfapiDeleteRemotePrefixesIt(
4396 	struct bgp *bgp, struct rfapi_import_table *it, struct prefix *un,
4397 	struct prefix *vn, struct prefix *p, int delete_active,
4398 	int delete_holddown, uint32_t *pARcount, uint32_t *pAHcount,
4399 	uint32_t *pHRcount, uint32_t *pHHcount,
4400 	struct skiplist *uniq_active_nves, struct skiplist *uniq_holddown_nves)
4401 {
4402 	afi_t afi;
4403 
4404 #ifdef DEBUG_L2_EXTRA
4405 	{
4406 		char buf_pfx[PREFIX_STRLEN];
4407 
4408 		if (p) {
4409 			prefix2str(p, buf_pfx, sizeof(buf_pfx));
4410 		} else {
4411 			buf_pfx[0] = '*';
4412 			buf_pfx[1] = 0;
4413 		}
4414 
4415 		vnc_zlog_debug_verbose(
4416 			"%s: entry, p=%s, delete_active=%d, delete_holddown=%d",
4417 			__func__, buf_pfx, delete_active, delete_holddown);
4418 	}
4419 #endif
4420 
4421 	for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
4422 
4423 		struct agg_table *rt;
4424 		struct agg_node *rn;
4425 
4426 		if (p && (family2afi(p->family) != afi)) {
4427 			continue;
4428 		}
4429 
4430 		rt = it->imported_vpn[afi];
4431 		if (!rt)
4432 			continue;
4433 
4434 		vnc_zlog_debug_verbose("%s: scanning rt for afi=%d", __func__,
4435 				       afi);
4436 
4437 		for (rn = agg_route_top(rt); rn; rn = agg_route_next(rn)) {
4438 			struct bgp_path_info *bpi;
4439 			struct bgp_path_info *next;
4440 			const struct prefix *rn_p = agg_node_get_prefix(rn);
4441 
4442 			if (p && VNC_DEBUG(IMPORT_DEL_REMOTE)) {
4443 				char p1line[PREFIX_STRLEN];
4444 
4445 				prefix2str(p, p1line, sizeof(p1line));
4446 				vnc_zlog_debug_any("%s: want %s, have %pRN",
4447 						   __func__, p1line, rn);
4448 			}
4449 
4450 			if (p && prefix_cmp(p, rn_p))
4451 				continue;
4452 
4453 			vnc_zlog_debug_verbose("%s: rn pfx=%pRN", __func__, rn);
4454 
4455 			/* TBD is this valid for afi == AFI_L2VPN? */
4456 			RFAPI_CHECK_REFCOUNT(rn, SAFI_MPLS_VPN, 1);
4457 
4458 			for (bpi = rn->info; bpi; bpi = next) {
4459 				next = bpi->next;
4460 
4461 				struct prefix qpt;
4462 				struct prefix qct;
4463 				int qpt_valid = 0;
4464 				int qct_valid = 0;
4465 				int is_active = 0;
4466 
4467 				vnc_zlog_debug_verbose("%s: examining bpi %p",
4468 						       __func__, bpi);
4469 
4470 				if (!rfapiGetNexthop(bpi->attr, &qpt))
4471 					qpt_valid = 1;
4472 
4473 				if (vn) {
4474 					if (!qpt_valid
4475 					    || !prefix_match(vn, &qpt)) {
4476 #ifdef DEBUG_L2_EXTRA
4477 						vnc_zlog_debug_verbose(
4478 							"%s: continue at vn && !qpt_valid || !prefix_match(vn, &qpt)",
4479 							__func__);
4480 #endif
4481 						continue;
4482 					}
4483 				}
4484 
4485 				if (!rfapiGetUnAddrOfVpnBi(bpi, &qct))
4486 					qct_valid = 1;
4487 
4488 				if (un) {
4489 					if (!qct_valid
4490 					    || !prefix_match(un, &qct)) {
4491 #ifdef DEBUG_L2_EXTRA
4492 						vnc_zlog_debug_verbose(
4493 							"%s: continue at un && !qct_valid || !prefix_match(un, &qct)",
4494 							__func__);
4495 #endif
4496 						continue;
4497 					}
4498 				}
4499 
4500 
4501 				/*
4502 				 * Blow bpi away
4503 				 */
4504 				/*
4505 				 * If this route is waiting to be deleted
4506 				 * because of
4507 				 * a previous withdraw, we must cancel its
4508 				 * timer.
4509 				 */
4510 				if (CHECK_FLAG(bpi->flags, BGP_PATH_REMOVED)) {
4511 					if (!delete_holddown)
4512 						continue;
4513 					if (bpi->extra->vnc.import.timer) {
4514 
4515 						struct thread *t =
4516 							(struct thread *)bpi
4517 								->extra->vnc
4518 								.import.timer;
4519 						struct rfapi_withdraw *wcb =
4520 							t->arg;
4521 
4522 						wcb->import_table
4523 							->holddown_count[afi] -=
4524 							1;
4525 						RFAPI_UPDATE_ITABLE_COUNT(
4526 							bpi, wcb->import_table,
4527 							afi, 1);
4528 						XFREE(MTYPE_RFAPI_WITHDRAW,
4529 						      wcb);
4530 						thread_cancel(t);
4531 					}
4532 				} else {
4533 					if (!delete_active)
4534 						continue;
4535 					is_active = 1;
4536 				}
4537 
4538 				vnc_zlog_debug_verbose(
4539 					"%s: deleting bpi %p (qct_valid=%d, qpt_valid=%d, delete_holddown=%d, delete_active=%d)",
4540 					__func__, bpi, qct_valid, qpt_valid,
4541 					delete_holddown, delete_active);
4542 
4543 
4544 				/*
4545 				 * add nve to list
4546 				 */
4547 				if (qct_valid && qpt_valid) {
4548 
4549 					struct rfapi_nve_addr na;
4550 					struct rfapi_nve_addr *nap;
4551 
4552 					memset(&na, 0, sizeof(na));
4553 					assert(!rfapiQprefix2Raddr(&qct,
4554 								   &na.un));
4555 					assert(!rfapiQprefix2Raddr(&qpt,
4556 								   &na.vn));
4557 
4558 					if (skiplist_search(
4559 						    (is_active
4560 							     ? uniq_active_nves
4561 							     : uniq_holddown_nves),
4562 						    &na, (void **)&nap)) {
4563 						char line[BUFSIZ];
4564 
4565 						nap = XCALLOC(
4566 							MTYPE_RFAPI_NVE_ADDR,
4567 							sizeof(struct
4568 							       rfapi_nve_addr));
4569 						assert(nap);
4570 						*nap = na;
4571 						nap->info = is_active
4572 								    ? pAHcount
4573 								    : pHHcount;
4574 						skiplist_insert(
4575 							(is_active
4576 								 ? uniq_active_nves
4577 								 : uniq_holddown_nves),
4578 							nap, nap);
4579 
4580 						rfapiNveAddr2Str(nap, line,
4581 								 BUFSIZ);
4582 					}
4583 				}
4584 
4585 				vnc_direct_bgp_rh_del_route(bgp, afi, rn_p,
4586 							    bpi->peer);
4587 
4588 				RFAPI_UPDATE_ITABLE_COUNT(bpi, it, afi, -1);
4589 				it->holddown_count[afi] += 1;
4590 				rfapiExpireVpnNow(it, rn, bpi, 1);
4591 
4592 				vnc_zlog_debug_verbose(
4593 					"%s: incrementing count (is_active=%d)",
4594 					__func__, is_active);
4595 
4596 				if (is_active)
4597 					++*pARcount;
4598 				else
4599 					++*pHRcount;
4600 			}
4601 		}
4602 	}
4603 }
4604 
4605 
4606 /*
4607  * For use by the "clear vnc prefixes" command
4608  */
4609 /*------------------------------------------
4610  * rfapiDeleteRemotePrefixes
4611  *
4612  * UI helper: For use by the "clear vnc prefixes" command
4613  *
4614  * input:
4615  *	un			if set, tunnel must match this prefix
4616  *	vn			if set, nexthop prefix must match this prefix
4617  *	p			if set, prefix must match this prefix
4618  *      it                      if set, only look in this import table
4619  *
4620  * output
4621  *	pARcount		number of active routes deleted
4622  *	pAHcount		number of active nves deleted
4623  *	pHRcount		number of holddown routes deleted
4624  *	pHHcount		number of holddown nves deleted
4625  *
4626  * return value:
4627  *	void
4628  --------------------------------------------*/
rfapiDeleteRemotePrefixes(struct prefix * un,struct prefix * vn,struct prefix * p,struct rfapi_import_table * arg_it,int delete_active,int delete_holddown,uint32_t * pARcount,uint32_t * pAHcount,uint32_t * pHRcount,uint32_t * pHHcount)4629 void rfapiDeleteRemotePrefixes(struct prefix *un, struct prefix *vn,
4630 			       struct prefix *p,
4631 			       struct rfapi_import_table *arg_it,
4632 			       int delete_active, int delete_holddown,
4633 			       uint32_t *pARcount, uint32_t *pAHcount,
4634 			       uint32_t *pHRcount, uint32_t *pHHcount)
4635 {
4636 	struct bgp *bgp;
4637 	struct rfapi *h;
4638 	struct rfapi_import_table *it;
4639 	uint32_t deleted_holddown_route_count = 0;
4640 	uint32_t deleted_active_route_count = 0;
4641 	uint32_t deleted_holddown_nve_count = 0;
4642 	uint32_t deleted_active_nve_count = 0;
4643 	struct skiplist *uniq_holddown_nves;
4644 	struct skiplist *uniq_active_nves;
4645 
4646 	VNC_ITRCCK;
4647 
4648 	bgp = bgp_get_default(); /* assume 1 instance for now */
4649 	/* If no bgp instantiated yet, no vnc prefixes exist */
4650 	if (!bgp)
4651 		return;
4652 
4653 	h = bgp->rfapi;
4654 	assert(h);
4655 
4656 	uniq_holddown_nves =
4657 		skiplist_new(0, rfapi_nve_addr_cmp, delete_rem_pfx_na_free);
4658 	uniq_active_nves =
4659 		skiplist_new(0, rfapi_nve_addr_cmp, delete_rem_pfx_na_free);
4660 
4661 	/*
4662 	 * Iterate over all import tables; do a filtered import
4663 	 * for the afi/safi combination
4664 	 */
4665 
4666 	if (arg_it)
4667 		it = arg_it;
4668 	else
4669 		it = h->imports;
4670 	for (; it;) {
4671 
4672 		vnc_zlog_debug_verbose(
4673 			"%s: calling rfapiDeleteRemotePrefixesIt() on (IP) import %p",
4674 			__func__, it);
4675 
4676 		rfapiDeleteRemotePrefixesIt(
4677 			bgp, it, un, vn, p, delete_active, delete_holddown,
4678 			&deleted_active_route_count, &deleted_active_nve_count,
4679 			&deleted_holddown_route_count,
4680 			&deleted_holddown_nve_count, uniq_active_nves,
4681 			uniq_holddown_nves);
4682 
4683 		if (arg_it)
4684 			it = NULL;
4685 		else
4686 			it = it->next;
4687 	}
4688 
4689 	/*
4690 	 * Now iterate over L2 import tables
4691 	 */
4692 	if (h->import_mac && !(p && (p->family != AF_ETHERNET))) {
4693 
4694 		void *cursor = NULL;
4695 		int rc;
4696 
4697 		for (cursor = NULL,
4698 		    rc = skiplist_next(h->import_mac, NULL, (void **)&it,
4699 				       &cursor);
4700 		     !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it,
4701 					     &cursor)) {
4702 
4703 			vnc_zlog_debug_verbose(
4704 				"%s: calling rfapiDeleteRemotePrefixesIt() on import_mac %p",
4705 				__func__, it);
4706 
4707 			rfapiDeleteRemotePrefixesIt(
4708 				bgp, it, un, vn, p, delete_active,
4709 				delete_holddown, &deleted_active_route_count,
4710 				&deleted_active_nve_count,
4711 				&deleted_holddown_route_count,
4712 				&deleted_holddown_nve_count, uniq_active_nves,
4713 				uniq_holddown_nves);
4714 		}
4715 	}
4716 
4717 	/*
4718 	 * our custom element freeing function above counts as it deletes
4719 	 */
4720 	skiplist_free(uniq_holddown_nves);
4721 	skiplist_free(uniq_active_nves);
4722 
4723 	if (pARcount)
4724 		*pARcount = deleted_active_route_count;
4725 	if (pAHcount)
4726 		*pAHcount = deleted_active_nve_count;
4727 	if (pHRcount)
4728 		*pHRcount = deleted_holddown_route_count;
4729 	if (pHHcount)
4730 		*pHHcount = deleted_holddown_nve_count;
4731 
4732 	VNC_ITRCCK;
4733 }
4734 
4735 /*------------------------------------------
4736  * rfapiCountRemoteRoutes
4737  *
4738  * UI helper: count VRF routes from BGP side
4739  *
4740  * input:
4741  *
4742  * output
4743  *	pALRcount		count of active local routes
4744  *	pARRcount		count of active remote routes
4745  *	pHRcount		count of holddown routes
4746  *	pIRcount		count of direct imported routes
4747  *
4748  * return value:
4749  *	void
4750  --------------------------------------------*/
rfapiCountAllItRoutes(int * pALRcount,int * pARRcount,int * pHRcount,int * pIRcount)4751 void rfapiCountAllItRoutes(int *pALRcount, /* active local routes */
4752 			   int *pARRcount, /* active remote routes */
4753 			   int *pHRcount,  /* holddown routes */
4754 			   int *pIRcount)  /* imported routes */
4755 {
4756 	struct bgp *bgp;
4757 	struct rfapi *h;
4758 	struct rfapi_import_table *it;
4759 	afi_t afi;
4760 
4761 	int total_active_local = 0;
4762 	int total_active_remote = 0;
4763 	int total_holddown = 0;
4764 	int total_imported = 0;
4765 
4766 	bgp = bgp_get_default(); /* assume 1 instance for now */
4767 	assert(bgp);
4768 
4769 	h = bgp->rfapi;
4770 	assert(h);
4771 
4772 	/*
4773 	 * Iterate over all import tables; do a filtered import
4774 	 * for the afi/safi combination
4775 	 */
4776 
4777 	for (it = h->imports; it; it = it->next) {
4778 
4779 		for (afi = AFI_IP; afi < AFI_MAX; ++afi) {
4780 
4781 			total_active_local += it->local_count[afi];
4782 			total_active_remote += it->remote_count[afi];
4783 			total_holddown += it->holddown_count[afi];
4784 			total_imported += it->imported_count[afi];
4785 		}
4786 	}
4787 
4788 	void *cursor;
4789 	int rc;
4790 
4791 	if (h->import_mac) {
4792 		for (cursor = NULL,
4793 		    rc = skiplist_next(h->import_mac, NULL, (void **)&it,
4794 				       &cursor);
4795 		     !rc; rc = skiplist_next(h->import_mac, NULL, (void **)&it,
4796 					     &cursor)) {
4797 
4798 			total_active_local += it->local_count[AFI_L2VPN];
4799 			total_active_remote += it->remote_count[AFI_L2VPN];
4800 			total_holddown += it->holddown_count[AFI_L2VPN];
4801 			total_imported += it->imported_count[AFI_L2VPN];
4802 		}
4803 	}
4804 
4805 
4806 	if (pALRcount) {
4807 		*pALRcount = total_active_local;
4808 	}
4809 	if (pARRcount) {
4810 		*pARRcount = total_active_remote;
4811 	}
4812 	if (pHRcount) {
4813 		*pHRcount = total_holddown;
4814 	}
4815 	if (pIRcount) {
4816 		*pIRcount = total_imported;
4817 	}
4818 }
4819 
4820 /*------------------------------------------
4821  * rfapiGetHolddownFromLifetime
4822  *
4823  * calculate holddown value based on lifetime
4824  *
4825  * input:
4826  *     lifetime                lifetime
4827  *
4828  * return value:
4829  *     Holddown value based on lifetime, holddown_factor,
4830  *     and RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
4831  *
4832  --------------------------------------------*/
4833 /* hold down time maxes out at RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY */
rfapiGetHolddownFromLifetime(uint32_t lifetime)4834 uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime)
4835 {
4836 	uint32_t factor;
4837 	struct bgp *bgp;
4838 
4839 	bgp = bgp_get_default();
4840 	if (bgp && bgp->rfapi_cfg)
4841 		factor = bgp->rfapi_cfg->rfp_cfg.holddown_factor;
4842 	else
4843 		factor = RFAPI_RFP_CFG_DEFAULT_HOLDDOWN_FACTOR;
4844 
4845 	if (factor < 100 || lifetime < RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY)
4846 		lifetime = lifetime * factor / 100;
4847 	if (lifetime < RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY)
4848 		return lifetime;
4849 	else
4850 		return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY;
4851 }
4852