1 /* BGP Keepalives.
2  * Implements a producer thread to generate BGP keepalives for peers.
3  * Copyright (C) 2017 Cumulus Networks, Inc.
4  * Quentin Young
5  *
6  * This file is part of FRRouting.
7  *
8  * FRRouting is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License as published by the Free
10  * Software Foundation; either version 2, or (at your option) any later
11  * version.
12  *
13  * FRRouting is distributed in the hope that it will be useful, but WITHOUT ANY
14  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; see the file COPYING; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /* clang-format off */
24 #include <zebra.h>
25 #include <pthread.h>		// for pthread_mutex_lock, pthread_mutex_unlock
26 
27 #include "frr_pthread.h"        // for frr_pthread
28 #include "hash.h"		// for hash, hash_clean, hash_create_size...
29 #include "log.h"		// for zlog_debug
30 #include "memory.h"		// for MTYPE_TMP, XFREE, XCALLOC, XMALLOC
31 #include "monotime.h"		// for monotime, monotime_since
32 
33 #include "bgpd/bgpd.h"          // for peer, PEER_THREAD_KEEPALIVES_ON, peer...
34 #include "bgpd/bgp_debug.h"	// for bgp_debug_neighbor_events
35 #include "bgpd/bgp_packet.h"	// for bgp_keepalive_send
36 #include "bgpd/bgp_keepalives.h"
37 /* clang-format on */
38 
39 /*
40  * Peer KeepAlive Timer.
41  * Associates a peer with the time of its last keepalive.
42  */
43 struct pkat {
44 	/* the peer to send keepalives to */
45 	struct peer *peer;
46 	/* absolute time of last keepalive sent */
47 	struct timeval last;
48 };
49 
50 /* List of peers we are sending keepalives for, and associated mutex. */
51 static pthread_mutex_t *peerhash_mtx;
52 static pthread_cond_t *peerhash_cond;
53 static struct hash *peerhash;
54 
pkat_new(struct peer * peer)55 static struct pkat *pkat_new(struct peer *peer)
56 {
57 	struct pkat *pkat = XMALLOC(MTYPE_TMP, sizeof(struct pkat));
58 	pkat->peer = peer;
59 	monotime(&pkat->last);
60 	return pkat;
61 }
62 
pkat_del(void * pkat)63 static void pkat_del(void *pkat)
64 {
65 	XFREE(MTYPE_TMP, pkat);
66 }
67 
68 
69 /*
70  * Callback for hash_iterate. Determines if a peer needs a keepalive and if so,
71  * generates and sends it.
72  *
73  * For any given peer, if the elapsed time since its last keepalive exceeds its
74  * configured keepalive timer, a keepalive is sent to the peer and its
75  * last-sent time is reset. Additionally, If the elapsed time does not exceed
76  * the configured keepalive timer, but the time until the next keepalive is due
77  * is within a hardcoded tolerance, a keepalive is sent as if the configured
78  * timer was exceeded. Doing this helps alleviate nanosecond sleeps between
79  * ticks by grouping together peers who are due for keepalives at roughly the
80  * same time. This tolerance value is arbitrarily chosen to be 100ms.
81  *
82  * In addition, this function calculates the maximum amount of time that the
83  * keepalive thread can sleep before another tick needs to take place. This is
84  * equivalent to shortest time until a keepalive is due for any one peer.
85  *
86  * @return maximum time to wait until next update (0 if infinity)
87  */
peer_process(struct hash_bucket * hb,void * arg)88 static void peer_process(struct hash_bucket *hb, void *arg)
89 {
90 	struct pkat *pkat = hb->data;
91 
92 	struct timeval *next_update = arg;
93 
94 	static struct timeval elapsed;  // elapsed time since keepalive
95 	static struct timeval ka = {0}; // peer->v_keepalive as a timeval
96 	static struct timeval diff;     // ka - elapsed
97 
98 	static const struct timeval tolerance = {0, 100000};
99 
100 	uint32_t v_ka = atomic_load_explicit(&pkat->peer->v_keepalive,
101 					     memory_order_relaxed);
102 
103 	/* 0 keepalive timer means no keepalives */
104 	if (v_ka == 0)
105 		return;
106 
107 	/* calculate elapsed time since last keepalive */
108 	monotime_since(&pkat->last, &elapsed);
109 
110 	/* calculate difference between elapsed time and configured time */
111 	ka.tv_sec = v_ka;
112 	timersub(&ka, &elapsed, &diff);
113 
114 	int send_keepalive =
115 		elapsed.tv_sec >= ka.tv_sec || timercmp(&diff, &tolerance, <);
116 
117 	if (send_keepalive) {
118 		if (bgp_debug_neighbor_events(pkat->peer))
119 			zlog_debug("%s [FSM] Timer (keepalive timer expire)",
120 				   pkat->peer->host);
121 
122 		bgp_keepalive_send(pkat->peer);
123 		monotime(&pkat->last);
124 		memset(&elapsed, 0x00, sizeof(struct timeval));
125 		diff = ka;
126 	}
127 
128 	/* if calculated next update for this peer < current delay, use it */
129 	if (next_update->tv_sec < 0 || timercmp(&diff, next_update, <))
130 		*next_update = diff;
131 }
132 
peer_hash_cmp(const void * f,const void * s)133 static bool peer_hash_cmp(const void *f, const void *s)
134 {
135 	const struct pkat *p1 = f;
136 	const struct pkat *p2 = s;
137 
138 	return p1->peer == p2->peer;
139 }
140 
peer_hash_key(const void * arg)141 static unsigned int peer_hash_key(const void *arg)
142 {
143 	const struct pkat *pkat = arg;
144 	return (uintptr_t)pkat->peer;
145 }
146 
147 /* Cleanup handler / deinitializer. */
bgp_keepalives_finish(void * arg)148 static void bgp_keepalives_finish(void *arg)
149 {
150 	if (peerhash) {
151 		hash_clean(peerhash, pkat_del);
152 		hash_free(peerhash);
153 	}
154 
155 	peerhash = NULL;
156 
157 	pthread_mutex_unlock(peerhash_mtx);
158 	pthread_mutex_destroy(peerhash_mtx);
159 	pthread_cond_destroy(peerhash_cond);
160 
161 	XFREE(MTYPE_TMP, peerhash_mtx);
162 	XFREE(MTYPE_TMP, peerhash_cond);
163 }
164 
165 /*
166  * Entry function for peer keepalive generation pthread.
167  */
bgp_keepalives_start(void * arg)168 void *bgp_keepalives_start(void *arg)
169 {
170 	struct frr_pthread *fpt = arg;
171 	fpt->master->owner = pthread_self();
172 
173 	struct timeval currtime = {0, 0};
174 	struct timeval aftertime = {0, 0};
175 	struct timeval next_update = {0, 0};
176 	struct timespec next_update_ts = {0, 0};
177 
178 	peerhash_mtx = XCALLOC(MTYPE_TMP, sizeof(pthread_mutex_t));
179 	peerhash_cond = XCALLOC(MTYPE_TMP, sizeof(pthread_cond_t));
180 
181 	/* initialize mutex */
182 	pthread_mutex_init(peerhash_mtx, NULL);
183 
184 	/* use monotonic clock with condition variable */
185 	pthread_condattr_t attrs;
186 	pthread_condattr_init(&attrs);
187 	pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC);
188 	pthread_cond_init(peerhash_cond, &attrs);
189 	pthread_condattr_destroy(&attrs);
190 
191 	/*
192 	 * We are not using normal FRR pthread mechanics and are
193 	 * not using fpt_run
194 	 */
195 	frr_pthread_set_name(fpt);
196 
197 	/* initialize peer hashtable */
198 	peerhash = hash_create_size(2048, peer_hash_key, peer_hash_cmp, NULL);
199 	pthread_mutex_lock(peerhash_mtx);
200 
201 	/* register cleanup handler */
202 	pthread_cleanup_push(&bgp_keepalives_finish, NULL);
203 
204 	/* notify anybody waiting on us that we are done starting up */
205 	frr_pthread_notify_running(fpt);
206 
207 	while (atomic_load_explicit(&fpt->running, memory_order_relaxed)) {
208 		if (peerhash->count > 0)
209 			pthread_cond_timedwait(peerhash_cond, peerhash_mtx,
210 					       &next_update_ts);
211 		else
212 			while (peerhash->count == 0
213 			       && atomic_load_explicit(&fpt->running,
214 						       memory_order_relaxed))
215 				pthread_cond_wait(peerhash_cond, peerhash_mtx);
216 
217 		monotime(&currtime);
218 
219 		next_update.tv_sec = -1;
220 
221 		hash_iterate(peerhash, peer_process, &next_update);
222 		if (next_update.tv_sec == -1)
223 			memset(&next_update, 0x00, sizeof(next_update));
224 
225 		monotime_since(&currtime, &aftertime);
226 
227 		timeradd(&currtime, &next_update, &next_update);
228 		TIMEVAL_TO_TIMESPEC(&next_update, &next_update_ts);
229 	}
230 
231 	/* clean up */
232 	pthread_cleanup_pop(1);
233 
234 	return NULL;
235 }
236 
237 /* --- thread external functions ------------------------------------------- */
238 
bgp_keepalives_on(struct peer * peer)239 void bgp_keepalives_on(struct peer *peer)
240 {
241 	if (CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON))
242 		return;
243 
244 	struct frr_pthread *fpt = bgp_pth_ka;
245 	assert(fpt->running);
246 
247 	/* placeholder bucket data to use for fast key lookups */
248 	static struct pkat holder = {0};
249 
250 	/*
251 	 * We need to ensure that bgp_keepalives_init was called first
252 	 */
253 	assert(peerhash_mtx);
254 
255 	frr_with_mutex(peerhash_mtx) {
256 		holder.peer = peer;
257 		if (!hash_lookup(peerhash, &holder)) {
258 			struct pkat *pkat = pkat_new(peer);
259 			hash_get(peerhash, pkat, hash_alloc_intern);
260 			peer_lock(peer);
261 		}
262 		SET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON);
263 	}
264 	bgp_keepalives_wake();
265 }
266 
bgp_keepalives_off(struct peer * peer)267 void bgp_keepalives_off(struct peer *peer)
268 {
269 	if (!CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON))
270 		return;
271 
272 	struct frr_pthread *fpt = bgp_pth_ka;
273 	assert(fpt->running);
274 
275 	/* placeholder bucket data to use for fast key lookups */
276 	static struct pkat holder = {0};
277 
278 	/*
279 	 * We need to ensure that bgp_keepalives_init was called first
280 	 */
281 	assert(peerhash_mtx);
282 
283 	frr_with_mutex(peerhash_mtx) {
284 		holder.peer = peer;
285 		struct pkat *res = hash_release(peerhash, &holder);
286 		if (res) {
287 			pkat_del(res);
288 			peer_unlock(peer);
289 		}
290 		UNSET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON);
291 	}
292 }
293 
bgp_keepalives_wake(void)294 void bgp_keepalives_wake(void)
295 {
296 	frr_with_mutex(peerhash_mtx) {
297 		pthread_cond_signal(peerhash_cond);
298 	}
299 }
300 
bgp_keepalives_stop(struct frr_pthread * fpt,void ** result)301 int bgp_keepalives_stop(struct frr_pthread *fpt, void **result)
302 {
303 	assert(fpt->running);
304 
305 	atomic_store_explicit(&fpt->running, false, memory_order_relaxed);
306 	bgp_keepalives_wake();
307 
308 	pthread_join(fpt->thread, result);
309 	return 0;
310 }
311