1 /*
2  * Zebra GR related helper functions.
3  *
4  * Portions:
5  *	Copyright (C) 2019 VMware, Inc.
6  *	et al.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms of the GNU General Public License as published by the Free
10  * Software Foundation; either version 2 of the License, or (at your option)
11  * any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; see the file COPYING; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include <zebra.h>
24 #include <libgen.h>
25 
26 #include "lib/prefix.h"
27 #include "lib/command.h"
28 #include "lib/if.h"
29 #include "lib/thread.h"
30 #include "lib/stream.h"
31 #include "lib/memory.h"
32 #include "lib/table.h"
33 #include "lib/network.h"
34 #include "lib/sockunion.h"
35 #include "lib/log.h"
36 #include "lib/zclient.h"
37 #include "lib/privs.h"
38 #include "lib/network.h"
39 #include "lib/buffer.h"
40 #include "lib/nexthop.h"
41 #include "lib/vrf.h"
42 #include "lib/libfrr.h"
43 #include "lib/sockopt.h"
44 
45 #include "zebra/zebra_router.h"
46 #include "zebra/debug.h"
47 #include "zebra/zapi_msg.h"
48 
49 
50 /*
51  * Forward declaration.
52  */
53 static struct zserv *zebra_gr_find_stale_client(struct zserv *client);
54 static int32_t zebra_gr_route_stale_delete_timer_expiry(struct thread *thread);
55 static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info);
56 static void zebra_gr_process_client_stale_routes(struct zserv *client,
57 						 vrf_id_t vrf_id);
58 
59 /*
60  * Debug macros.
61  */
62 #define LOG_GR(msg, ...)                                                       \
63 	do {                                                                   \
64 		if (IS_ZEBRA_DEBUG_EVENT)                                      \
65 			zlog_debug(msg, ##__VA_ARGS__);                        \
66 	} while (0)
67 
68 
69 /*
70  * Client connection functions
71  */
72 
73 /*
74  * Function to clean all the stale clients,
75  * function will also clean up all per instance
76  * capabilities that are exchanged.
77  */
zebra_gr_stale_client_cleanup(struct list * client_list)78 void zebra_gr_stale_client_cleanup(struct list *client_list)
79 {
80 	struct listnode *node, *nnode;
81 	struct zserv *s_client = NULL;
82 	struct client_gr_info *info, *ninfo;
83 
84 	/* Find the stale client */
85 	for (ALL_LIST_ELEMENTS(client_list, node, nnode, s_client)) {
86 
87 		LOG_GR("%s: Stale client %s is being deleted", __func__,
88 		       zebra_route_string(s_client->proto));
89 
90 		TAILQ_FOREACH_SAFE (info, &s_client->gr_info_queue, gr_info,
91 				    ninfo) {
92 
93 			/* Cancel the stale timer */
94 			if (info->t_stale_removal != NULL) {
95 				THREAD_OFF(info->t_stale_removal);
96 				info->t_stale_removal = NULL;
97 				/* Process the stale routes */
98 				thread_execute(
99 				    zrouter.master,
100 				    zebra_gr_route_stale_delete_timer_expiry,
101 				    info, 1);
102 			}
103 		}
104 	}
105 }
106 
107 /*
108  * A helper function to create client info.
109  */
zebra_gr_client_info_create(struct zserv * client)110 static struct client_gr_info *zebra_gr_client_info_create(struct zserv *client)
111 {
112 	struct client_gr_info *info;
113 
114 	info = XCALLOC(MTYPE_TMP, sizeof(struct client_gr_info));
115 
116 	TAILQ_INSERT_TAIL(&(client->gr_info_queue), info, gr_info);
117 	return info;
118 }
119 
120 /*
121  * A helper function to delte and destory client info.
122  */
zebra_gr_client_info_delte(struct zserv * client,struct client_gr_info * info)123 static void zebra_gr_client_info_delte(struct zserv *client,
124 				       struct client_gr_info *info)
125 {
126 	TAILQ_REMOVE(&(client->gr_info_queue), info, gr_info);
127 
128 	THREAD_OFF(info->t_stale_removal);
129 
130 	XFREE(MTYPE_TMP, info->current_prefix);
131 
132 	LOG_GR("%s: Instance info is being deleted for client %s", __func__,
133 	       zebra_route_string(client->proto));
134 
135 	/* Delete all the stale routes. */
136 	info->do_delete = true;
137 	zebra_gr_delete_stale_routes(info);
138 
139 	XFREE(MTYPE_TMP, info);
140 }
141 
142 /*
143  * Function to handle client when it disconnect.
144  */
zebra_gr_client_disconnect(struct zserv * client)145 int32_t zebra_gr_client_disconnect(struct zserv *client)
146 {
147 	struct zserv *stale_client;
148 	struct timeval tv;
149 	struct client_gr_info *info = NULL;
150 
151 	/* Find the stale client */
152 	stale_client = zebra_gr_find_stale_client(client);
153 
154 	/*
155 	 * We should never be here.
156 	 */
157 	if (stale_client) {
158 		LOG_GR("%s: Stale client %s exist, we should not be here!",
159 		       __func__, zebra_route_string(client->proto));
160 		assert(0);
161 	}
162 
163 	client->restart_time = monotime(&tv);
164 
165 	/* For all the GR instance start the starle removal timer. */
166 	TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
167 		if (ZEBRA_CLIENT_GR_ENABLED(info->capabilities)
168 		    && (info->t_stale_removal == NULL)) {
169 			thread_add_timer(
170 				zrouter.master,
171 				zebra_gr_route_stale_delete_timer_expiry, info,
172 				info->stale_removal_time,
173 				&info->t_stale_removal);
174 			info->current_afi = AFI_IP;
175 			info->stale_client_ptr = client;
176 			info->stale_client = true;
177 			LOG_GR("%s: Client %s Stale timer update to %d",
178 			       __func__, zebra_route_string(client->proto),
179 			       info->stale_removal_time);
180 		}
181 	}
182 
183 	listnode_add(zrouter.stale_client_list, client);
184 
185 	return 0;
186 }
187 
188 /*
189  * Function to delete stale client
190  */
zebra_gr_delete_stale_client(struct client_gr_info * info)191 static void zebra_gr_delete_stale_client(struct client_gr_info *info)
192 {
193 	struct client_gr_info *bgp_info;
194 	struct zserv *s_client = NULL;
195 
196 	s_client = info->stale_client_ptr;
197 
198 	if (!s_client || !info->stale_client)
199 		return;
200 
201 	/*
202 	 * If there are bgp instances with the stale delete timer pending
203 	 * then stale client is not deleted
204 	 */
205 	if ((s_client->gr_instance_count > 0) && info->gr_enable)
206 		s_client->gr_instance_count--;
207 
208 	TAILQ_REMOVE(&(s_client->gr_info_queue), info, gr_info);
209 
210 	LOG_GR("%s: Client %s gr count %d", __func__,
211 	       zebra_route_string(s_client->proto),
212 	       s_client->gr_instance_count);
213 
214 	TAILQ_FOREACH (bgp_info, &s_client->gr_info_queue, gr_info) {
215 		if (bgp_info->t_stale_removal != NULL)
216 			return;
217 	}
218 
219 	LOG_GR("%s: Client %s is being deleted", __func__,
220 	       zebra_route_string(s_client->proto));
221 
222 	TAILQ_INIT(&(s_client->gr_info_queue));
223 	listnode_delete(zrouter.stale_client_list, s_client);
224 	if (info->stale_client)
225 		XFREE(MTYPE_TMP, s_client);
226 	XFREE(MTYPE_TMP, info);
227 }
228 
229 /*
230  * Function to find stale client.
231  */
zebra_gr_find_stale_client(struct zserv * client)232 static struct zserv *zebra_gr_find_stale_client(struct zserv *client)
233 {
234 	struct listnode *node, *nnode;
235 	struct zserv *stale_client;
236 
237 	/* Find the stale client */
238 	for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode,
239 			       stale_client)) {
240 		if (client->proto == stale_client->proto
241 		    && client->instance == stale_client->instance) {
242 			return stale_client;
243 		}
244 	}
245 
246 	return NULL;
247 }
248 
249 /*
250  * Function to handle reconnect of client post restart.
251  */
zebra_gr_client_reconnect(struct zserv * client)252 void zebra_gr_client_reconnect(struct zserv *client)
253 {
254 	struct listnode *node, *nnode;
255 	struct zserv *old_client = NULL;
256 	struct client_gr_info *info = NULL;
257 
258 	/* Find the stale client */
259 	for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode,
260 			       old_client)) {
261 		if (client->proto == old_client->proto
262 		    && client->instance == old_client->instance)
263 			break;
264 	}
265 
266 	/* Copy the timers */
267 	if (!old_client)
268 		return;
269 
270 	client->gr_instance_count = old_client->gr_instance_count;
271 	client->restart_time = old_client->restart_time;
272 
273 	LOG_GR("%s : old client %s, gr_instance_count %d", __func__,
274 	       zebra_route_string(old_client->proto),
275 	       old_client->gr_instance_count);
276 
277 	if (TAILQ_FIRST(&old_client->gr_info_queue)) {
278 		TAILQ_CONCAT(&client->gr_info_queue, &old_client->gr_info_queue,
279 			     gr_info);
280 		TAILQ_INIT(&old_client->gr_info_queue);
281 	}
282 
283 	TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
284 		info->stale_client_ptr = client;
285 		info->stale_client = false;
286 	}
287 
288 	/* Delete the stale client */
289 	listnode_delete(zrouter.stale_client_list, old_client);
290 	/* Delete old client */
291 	XFREE(MTYPE_TMP, old_client);
292 }
293 
294 /*
295  * Functions to deal with capabilities
296  */
297 
298 /*
299  * Update the graceful restart information
300  * for the client instance.
301  * This function handles all the capabilties that are received.
302  */
zebra_client_update_info(struct zserv * client,struct zapi_cap * api)303 static void zebra_client_update_info(struct zserv *client, struct zapi_cap *api)
304 {
305 	struct client_gr_info *info = NULL;
306 
307 	/* Find the bgp information for the specified vrf id */
308 	TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
309 		if (info->vrf_id == api->vrf_id)
310 			break;
311 	}
312 
313 
314 	/*
315 	 * If the command is delete, then cancel the stale timer and
316 	 * delete the bgp info
317 	 */
318 	switch (api->cap) {
319 	case ZEBRA_CLIENT_GR_DISABLE:
320 		if (!info)
321 			return;
322 
323 		LOG_GR("%s: Client %s instance GR disabled count %d", __func__,
324 		       zebra_route_string(client->proto),
325 		       client->gr_instance_count);
326 
327 		if ((info->gr_enable) && (client->gr_instance_count > 0))
328 			client->gr_instance_count--;
329 
330 		zebra_gr_client_info_delte(client, info);
331 		break;
332 	case ZEBRA_CLIENT_GR_CAPABILITIES:
333 		/* Allocate bgp info */
334 		if (!info)
335 			info = zebra_gr_client_info_create(client);
336 
337 		/* Udpate other parameters */
338 		if (!info->gr_enable) {
339 			client->gr_instance_count++;
340 
341 			LOG_GR("%s: Cient %s GR enabled count %d", __func__,
342 			       zebra_route_string(client->proto),
343 			       client->gr_instance_count);
344 
345 			info->capabilities = api->cap;
346 			info->stale_removal_time = api->stale_removal_time;
347 			info->vrf_id = api->vrf_id;
348 			info->gr_enable = true;
349 		}
350 		break;
351 	case ZEBRA_CLIENT_RIB_STALE_TIME:
352 		LOG_GR("%s: Client %s stale time update event", __func__,
353 		       zebra_route_string(client->proto));
354 
355 		/* Update the stale removal timer */
356 		if (info && info->t_stale_removal == NULL) {
357 
358 			LOG_GR("%s: Stale time: %d is now update to: %d",
359 			       __func__, info->stale_removal_time,
360 			       api->stale_removal_time);
361 
362 			info->stale_removal_time = api->stale_removal_time;
363 		}
364 
365 		break;
366 	case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE:
367 		LOG_GR(
368 		   "%s: Client %s route update complete for AFI %d, SAFI %d",
369 		   __func__, zebra_route_string(client->proto), api->afi,
370 		   api->safi);
371 		if (info)
372 			info->route_sync[api->afi][api->safi] = true;
373 		break;
374 	case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING:
375 		LOG_GR("%s: Client %s route update pending for AFI %d, SAFI %d",
376 		       __func__, zebra_route_string(client->proto), api->afi,
377 		       api->safi);
378 		if (info)
379 			info->af_enabled[api->afi][api->safi] = true;
380 		break;
381 	}
382 }
383 
384 /*
385  * Handler for capabilities that are received from client.
386  */
zebra_client_capabilities_handler(struct zserv * client,struct zapi_cap * api)387 static void zebra_client_capabilities_handler(struct zserv *client,
388 					      struct zapi_cap *api)
389 {
390 	switch (api->cap) {
391 	case ZEBRA_CLIENT_GR_CAPABILITIES:
392 	case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING:
393 	case ZEBRA_CLIENT_GR_DISABLE:
394 	case ZEBRA_CLIENT_RIB_STALE_TIME:
395 		/*
396 		 * For all the cases we need to update the client info.
397 		 */
398 		zebra_client_update_info(client, api);
399 		break;
400 	case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE:
401 		/*
402 		 * After client info has been updated delete all
403 		 * stale routes
404 		 */
405 		zebra_client_update_info(client, api);
406 		zebra_gr_process_client_stale_routes(client, api->vrf_id);
407 		break;
408 	}
409 }
410 
411 /*
412  * Function to decode and call appropriate functions
413  * to handle client capabilities.
414  */
zread_client_capabilities(ZAPI_HANDLER_ARGS)415 void zread_client_capabilities(ZAPI_HANDLER_ARGS)
416 {
417 	struct zapi_cap api;
418 	struct stream *s;
419 
420 	s = msg;
421 
422 	if (zapi_capabilities_decode(s, &api)) {
423 		LOG_GR("%s: Error in reading capabilities for client %s",
424 		       __func__, zebra_route_string(client->proto));
425 		return;
426 	}
427 
428 	/* GR only for dynamic clients */
429 	if (client->proto <= ZEBRA_ROUTE_CONNECT) {
430 		LOG_GR("%s: GR capabilities for client %s not supported",
431 		       __func__, zebra_route_string(client->proto));
432 		return;
433 	}
434 	/* Call the capabilities handler */
435 	zebra_client_capabilities_handler(client, &api);
436 }
437 
438 
439 /*
440  * Stale route handling
441  */
442 
443 /*
444  * Delete all the stale routes that have not been refreshed
445  * post restart.
446  */
zebra_gr_route_stale_delete_timer_expiry(struct thread * thread)447 static int32_t zebra_gr_route_stale_delete_timer_expiry(struct thread *thread)
448 {
449 	struct client_gr_info *info;
450 	int32_t cnt = 0;
451 	struct zserv *client;
452 
453 	info = THREAD_ARG(thread);
454 	info->t_stale_removal = NULL;
455 	client = (struct zserv *)info->stale_client_ptr;
456 
457 	/* Set the flag to indicate all stale route deletion */
458 	if (thread->u.val == 1)
459 		info->do_delete = true;
460 
461 	cnt = zebra_gr_delete_stale_routes(info);
462 
463 	/* Retsart the timer */
464 	if (cnt > 0) {
465 		LOG_GR("%s: Client %s processed %d routes. Start timer again",
466 		       __func__, zebra_route_string(client->proto), cnt);
467 
468 		thread_add_timer(zrouter.master,
469 				 zebra_gr_route_stale_delete_timer_expiry, info,
470 				 ZEBRA_DEFAULT_STALE_UPDATE_DELAY,
471 				 &info->t_stale_removal);
472 	} else {
473 		/* No routes to delete for the VRF */
474 		LOG_GR("%s: Client %s all starle routes processed", __func__,
475 		       zebra_route_string(client->proto));
476 
477 		XFREE(MTYPE_TMP, info->current_prefix);
478 		info->current_afi = 0;
479 		zebra_gr_delete_stale_client(info);
480 	}
481 	return 0;
482 }
483 
484 
485 /*
486  * Function to process to check if route entry is stale
487  * or has been updated.
488  */
zebra_gr_process_route_entry(struct zserv * client,struct route_node * rn,struct route_entry * re)489 static void zebra_gr_process_route_entry(struct zserv *client,
490 					 struct route_node *rn,
491 					 struct route_entry *re)
492 {
493 	char buf[PREFIX2STR_BUFFER];
494 
495 	if ((client == NULL) || (rn == NULL) || (re == NULL))
496 		return;
497 
498 	/* If the route is not refreshed after restart, delete the entry */
499 	if (re->uptime < client->restart_time) {
500 		if (IS_ZEBRA_DEBUG_RIB) {
501 			prefix2str(&rn->p, buf, sizeof(buf));
502 			zlog_debug("%s: Client %s stale route %s is deleted",
503 				   __func__, zebra_route_string(client->proto),
504 				   buf);
505 		}
506 		rib_delnode(rn, re);
507 	}
508 }
509 
510 /*
511  * This function walks through the route table for all vrf and deletes
512  * the stale routes for the restarted client specified by the protocol
513  * type
514  */
zebra_gr_delete_stale_route(struct client_gr_info * info,struct zebra_vrf * zvrf)515 static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info,
516 					   struct zebra_vrf *zvrf)
517 {
518 	struct route_node *rn, *curr;
519 	struct route_entry *re;
520 	struct route_entry *next;
521 	struct route_table *table;
522 	int32_t n = 0;
523 	afi_t afi, curr_afi;
524 	uint8_t proto;
525 	uint16_t instance;
526 	struct zserv *s_client;
527 
528 	if ((info == NULL) || (zvrf == NULL))
529 		return -1;
530 
531 	s_client = info->stale_client_ptr;
532 	if (s_client == NULL) {
533 		LOG_GR("%s: Stale client not present", __func__);
534 		return -1;
535 	}
536 
537 	proto = s_client->proto;
538 	instance = s_client->instance;
539 	curr_afi = info->current_afi;
540 
541 	LOG_GR("%s: Client %s stale routes are being deleted", __func__,
542 	       zebra_route_string(proto));
543 
544 	/* Process routes for all AFI */
545 	for (afi = curr_afi; afi < AFI_MAX; afi++) {
546 		table = zvrf->table[afi][SAFI_UNICAST];
547 
548 		if (table) {
549 			/*
550 			 * If the current prefix is NULL then get the first
551 			 * route entry in the table
552 			 */
553 			if (info->current_prefix == NULL) {
554 				rn = route_top(table);
555 				if (rn == NULL)
556 					continue;
557 				curr = rn;
558 			} else
559 				/* Get the next route entry */
560 				curr = route_table_get_next(
561 					table, info->current_prefix);
562 
563 			for (rn = curr; rn; rn = srcdest_route_next(rn)) {
564 				RNODE_FOREACH_RE_SAFE (rn, re, next) {
565 					if (CHECK_FLAG(re->status,
566 						       ROUTE_ENTRY_REMOVED))
567 						continue;
568 					/* If the route refresh is received
569 					 * after restart then do not delete
570 					 * the route
571 					 */
572 					if (re->type == proto
573 					    && re->instance == instance) {
574 						zebra_gr_process_route_entry(
575 							s_client, rn, re);
576 						n++;
577 					}
578 
579 					/* If the max route count is reached
580 					 * then timer thread will be restarted
581 					 * Store the current prefix and afi
582 					 */
583 					if ((n >= ZEBRA_MAX_STALE_ROUTE_COUNT)
584 					    && (info->do_delete == false)) {
585 						info->current_afi = afi;
586 						info->current_prefix = XCALLOC(
587 							MTYPE_TMP,
588 							sizeof(struct prefix));
589 						prefix_copy(
590 							info->current_prefix,
591 							&rn->p);
592 						return n;
593 					}
594 				}
595 			}
596 		}
597 		/*
598 		 * Reset the current prefix to indicate processing completion
599 		 * of the current AFI
600 		 */
601 		XFREE(MTYPE_TMP, info->current_prefix);
602 	}
603 	return 0;
604 }
605 
606 /*
607  * Delete the stale routes when client is restarted and routes are not
608  * refreshed within the stale timeout
609  */
zebra_gr_delete_stale_routes(struct client_gr_info * info)610 static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info)
611 {
612 	struct vrf *vrf;
613 	struct zebra_vrf *zvrf;
614 	uint64_t cnt = 0;
615 
616 	if (info == NULL)
617 		return -1;
618 
619 	/* Get the current VRF */
620 	vrf = vrf_lookup_by_id(info->vrf_id);
621 	if (vrf == NULL) {
622 		LOG_GR("%s: Invalid VRF %d", __func__, info->vrf_id);
623 		return -1;
624 	}
625 
626 	zvrf = vrf->info;
627 	if (zvrf == NULL) {
628 		LOG_GR("%s: Invalid VRF entry %d", __func__, info->vrf_id);
629 		return -1;
630 	}
631 
632 	cnt = zebra_gr_delete_stale_route(info, zvrf);
633 	return cnt;
634 }
635 
636 /*
637  * This function checks if route update for all AFI, SAFI is completed
638  * and cancels the stale timer
639  */
zebra_gr_process_client_stale_routes(struct zserv * client,vrf_id_t vrf_id)640 static void zebra_gr_process_client_stale_routes(struct zserv *client,
641 						 vrf_id_t vrf_id)
642 {
643 	struct client_gr_info *info = NULL;
644 	afi_t afi;
645 	safi_t safi;
646 
647 	TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
648 		if (info->vrf_id == vrf_id)
649 			break;
650 	}
651 
652 	if (info == NULL)
653 		return;
654 
655 	/* Check if route update completed for all AFI, SAFI */
656 	for (afi = AFI_IP; afi < AFI_MAX; afi++)
657 		for (safi = SAFI_UNICAST; safi <= SAFI_MPLS_VPN; safi++) {
658 			if (info->af_enabled[afi][safi]) {
659 				if (!info->route_sync[afi][safi]) {
660 					LOG_GR(
661 					   "%s: Client %s route update not completed for AFI %d, SAFI %d",
662 					   __func__, zebra_route_string(
663 							    client->proto),
664 					   afi, safi);
665 					return;
666 				}
667 			}
668 		}
669 
670 	/*
671 	 * Route update completed for all AFI, SAFI
672 	 * Cancel the stale timer and process the routes
673 	 */
674 	if (info->t_stale_removal) {
675 		LOG_GR("%s: Client %s cancled stale delete timer vrf %d",
676 		       __func__, zebra_route_string(client->proto),
677 		       info->vrf_id);
678 		THREAD_OFF(info->t_stale_removal);
679 		thread_execute(zrouter.master,
680 			       zebra_gr_route_stale_delete_timer_expiry, info,
681 			       0);
682 	}
683 }
684