1 /* BGP network related fucntions
2  * Copyright (C) 1999 Kunihiro Ishiguro
3  *
4  * This file is part of GNU Zebra.
5  *
6  * GNU Zebra is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; either version 2, or (at your option) any
9  * later version.
10  *
11  * GNU Zebra is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; see the file COPYING; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <zebra.h>
22 
23 #include "thread.h"
24 #include "sockunion.h"
25 #include "sockopt.h"
26 #include "memory.h"
27 #include "log.h"
28 #include "if.h"
29 #include "prefix.h"
30 #include "command.h"
31 #include "privs.h"
32 #include "linklist.h"
33 #include "network.h"
34 #include "queue.h"
35 #include "hash.h"
36 #include "filter.h"
37 #include "ns.h"
38 #include "lib_errors.h"
39 #include "nexthop.h"
40 
41 #include "bgpd/bgpd.h"
42 #include "bgpd/bgp_open.h"
43 #include "bgpd/bgp_fsm.h"
44 #include "bgpd/bgp_attr.h"
45 #include "bgpd/bgp_debug.h"
46 #include "bgpd/bgp_errors.h"
47 #include "bgpd/bgp_network.h"
48 #include "bgpd/bgp_zebra.h"
49 
50 extern struct zebra_privs_t bgpd_privs;
51 
52 static char *bgp_get_bound_name(struct peer *peer);
53 
54 /* BGP listening socket. */
55 struct bgp_listener {
56 	int fd;
57 	union sockunion su;
58 	struct thread *thread;
59 	struct bgp *bgp;
60 	char *name;
61 };
62 
bgp_dump_listener_info(struct vty * vty)63 void bgp_dump_listener_info(struct vty *vty)
64 {
65 	struct listnode *node;
66 	struct bgp_listener *listener;
67 
68 	vty_out(vty, "Name             fd Address\n");
69 	vty_out(vty, "---------------------------\n");
70 	for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener)) {
71 		char buf[SU_ADDRSTRLEN];
72 
73 		vty_out(vty, "%-16s %d %s\n",
74 			listener->name ? listener->name : VRF_DEFAULT_NAME,
75 			listener->fd,
76 			sockunion2str(&listener->su, buf, sizeof(buf)));
77 	}
78 }
79 
80 /*
81  * Set MD5 key for the socket, for the given IPv4 peer address.
82  * If the password is NULL or zero-length, the option will be disabled.
83  */
bgp_md5_set_socket(int socket,union sockunion * su,uint16_t prefixlen,const char * password)84 static int bgp_md5_set_socket(int socket, union sockunion *su,
85 			      uint16_t prefixlen, const char *password)
86 {
87 	int ret = -1;
88 	int en = ENOSYS;
89 #if HAVE_DECL_TCP_MD5SIG
90 	union sockunion su2;
91 #endif /* HAVE_TCP_MD5SIG */
92 
93 	assert(socket >= 0);
94 
95 #if HAVE_DECL_TCP_MD5SIG
96 	/* Ensure there is no extraneous port information. */
97 	memcpy(&su2, su, sizeof(union sockunion));
98 	if (su2.sa.sa_family == AF_INET)
99 		su2.sin.sin_port = 0;
100 	else
101 		su2.sin6.sin6_port = 0;
102 
103 	/* For addresses, use the non-extended signature functionality */
104 	if ((su2.sa.sa_family == AF_INET && prefixlen == IPV4_MAX_PREFIXLEN)
105 	    || (su2.sa.sa_family == AF_INET6
106 		&& prefixlen == IPV6_MAX_PREFIXLEN))
107 		ret = sockopt_tcp_signature(socket, &su2, password);
108 	else
109 		ret = sockopt_tcp_signature_ext(socket, &su2, prefixlen,
110 						password);
111 	en = errno;
112 #endif /* HAVE_TCP_MD5SIG */
113 
114 	if (ret < 0) {
115 		char sabuf[SU_ADDRSTRLEN];
116 		sockunion2str(su, sabuf, sizeof(sabuf));
117 
118 		switch (ret) {
119 		case -2:
120 			flog_warn(
121 				EC_BGP_NO_TCP_MD5,
122 				"Unable to set TCP MD5 option on socket for peer %s (sock=%d): This platform does not support MD5 auth for prefixes",
123 				sabuf, socket);
124 			break;
125 		default:
126 			flog_warn(
127 				EC_BGP_NO_TCP_MD5,
128 				"Unable to set TCP MD5 option on socket for peer %s (sock=%d): %s",
129 				sabuf, socket, safe_strerror(en));
130 		}
131 	}
132 
133 	return ret;
134 }
135 
136 /* Helper for bgp_connect */
bgp_md5_set_connect(int socket,union sockunion * su,uint16_t prefixlen,const char * password)137 static int bgp_md5_set_connect(int socket, union sockunion *su,
138 			       uint16_t prefixlen, const char *password)
139 {
140 	int ret = -1;
141 
142 #if HAVE_DECL_TCP_MD5SIG
143 	frr_with_privs(&bgpd_privs) {
144 		ret = bgp_md5_set_socket(socket, su, prefixlen, password);
145 	}
146 #endif /* HAVE_TCP_MD5SIG */
147 
148 	return ret;
149 }
150 
bgp_md5_set_password(struct peer * peer,const char * password)151 static int bgp_md5_set_password(struct peer *peer, const char *password)
152 {
153 	struct listnode *node;
154 	int ret = 0;
155 	struct bgp_listener *listener;
156 
157 	/*
158 	 * Set or unset the password on the listen socket(s). Outbound
159 	 * connections are taken care of in bgp_connect() below.
160 	 */
161 	frr_with_privs(&bgpd_privs) {
162 		for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
163 			if (listener->su.sa.sa_family ==
164 			    peer->su.sa.sa_family) {
165 				uint16_t prefixlen =
166 					peer->su.sa.sa_family == AF_INET
167 					? IPV4_MAX_PREFIXLEN
168 					: IPV6_MAX_PREFIXLEN;
169 
170 				/*
171 				 * if we have stored a BGP vrf instance in the
172 				 * listener it must match the bgp instance in
173 				 * the peer otherwise the peer bgp instance
174 				 * must be the default vrf or a view instance
175 				 */
176 				if (!listener->bgp) {
177 					if (peer->bgp->vrf_id != VRF_DEFAULT
178 					    && peer->bgp->inst_type
179 						       != BGP_INSTANCE_TYPE_VIEW)
180 						continue;
181 				} else if (listener->bgp != peer->bgp)
182 					continue;
183 
184 				ret = bgp_md5_set_socket(listener->fd,
185 							 &peer->su, prefixlen,
186 							 password);
187 				break;
188 			}
189 	}
190 	return ret;
191 }
192 
bgp_md5_set_prefix(struct bgp * bgp,struct prefix * p,const char * password)193 int bgp_md5_set_prefix(struct bgp *bgp, struct prefix *p, const char *password)
194 {
195 	int ret = 0;
196 	union sockunion su;
197 	struct listnode *node;
198 	struct bgp_listener *listener;
199 
200 	/* Set or unset the password on the listen socket(s). */
201 	frr_with_privs(&bgpd_privs) {
202 		for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
203 			if (listener->su.sa.sa_family == p->family
204 			    && ((bgp->vrf_id == VRF_DEFAULT)
205 				|| (listener->bgp == bgp))) {
206 				prefix2sockunion(p, &su);
207 				ret = bgp_md5_set_socket(listener->fd, &su,
208 							 p->prefixlen,
209 							 password);
210 				break;
211 			}
212 	}
213 
214 	return ret;
215 }
216 
bgp_md5_unset_prefix(struct bgp * bgp,struct prefix * p)217 int bgp_md5_unset_prefix(struct bgp *bgp, struct prefix *p)
218 {
219 	return bgp_md5_set_prefix(bgp, p, NULL);
220 }
221 
bgp_md5_set(struct peer * peer)222 int bgp_md5_set(struct peer *peer)
223 {
224 	/* Set the password from listen socket. */
225 	return bgp_md5_set_password(peer, peer->password);
226 }
227 
bgp_md5_unset(struct peer * peer)228 int bgp_md5_unset(struct peer *peer)
229 {
230 	/* Unset the password from listen socket. */
231 	return bgp_md5_set_password(peer, NULL);
232 }
233 
bgp_set_socket_ttl(struct peer * peer,int bgp_sock)234 int bgp_set_socket_ttl(struct peer *peer, int bgp_sock)
235 {
236 	char buf[INET_ADDRSTRLEN];
237 	int ret = 0;
238 
239 	/* In case of peer is EBGP, we should set TTL for this connection.  */
240 	if (!peer->gtsm_hops && (peer_sort_lookup(peer) == BGP_PEER_EBGP)) {
241 		ret = sockopt_ttl(peer->su.sa.sa_family, bgp_sock, peer->ttl);
242 		if (ret) {
243 			flog_err(
244 				EC_LIB_SOCKET,
245 				"%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
246 				__func__,
247 				inet_ntop(AF_INET, &peer->remote_id, buf,
248 					  sizeof(buf)),
249 				errno);
250 			return ret;
251 		}
252 	} else if (peer->gtsm_hops) {
253 		/* On Linux, setting minttl without setting ttl seems to mess
254 		   with the
255 		   outgoing ttl. Therefore setting both.
256 		*/
257 		ret = sockopt_ttl(peer->su.sa.sa_family, bgp_sock, MAXTTL);
258 		if (ret) {
259 			flog_err(
260 				EC_LIB_SOCKET,
261 				"%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
262 				__func__,
263 				inet_ntop(AF_INET, &peer->remote_id, buf,
264 					  sizeof(buf)),
265 				errno);
266 			return ret;
267 		}
268 		ret = sockopt_minttl(peer->su.sa.sa_family, bgp_sock,
269 				     MAXTTL + 1 - peer->gtsm_hops);
270 		if (ret) {
271 			flog_err(
272 				EC_LIB_SOCKET,
273 				"%s: Can't set MinTTL on peer (rtrid %s) socket, err = %d",
274 				__func__,
275 				inet_ntop(AF_INET, &peer->remote_id, buf,
276 					  sizeof(buf)),
277 				errno);
278 			return ret;
279 		}
280 	}
281 
282 	return ret;
283 }
284 
285 /*
286  * Obtain the BGP instance that the incoming connection should be processed
287  * against. This is important because more than one VRF could be using the
288  * same IP address space. The instance is got by obtaining the device to
289  * which the incoming connection is bound to. This could either be a VRF
290  * or it could be an interface, which in turn determines the VRF.
291  */
bgp_get_instance_for_inc_conn(int sock,struct bgp ** bgp_inst)292 static int bgp_get_instance_for_inc_conn(int sock, struct bgp **bgp_inst)
293 {
294 #ifndef SO_BINDTODEVICE
295 	/* only Linux has SO_BINDTODEVICE, but we're in Linux-specific code here
296 	 * anyway since the assumption is that the interface name returned by
297 	 * getsockopt() is useful in identifying the VRF, particularly with
298 	 * Linux's
299 	 * VRF l3master device.  The whole mechanism is specific to Linux, so...
300 	 * when other platforms add VRF support, this will need handling here as
301 	 * well.  (or, some restructuring) */
302 	*bgp_inst = bgp_get_default();
303 	return !*bgp_inst;
304 
305 #else
306 	char name[VRF_NAMSIZ + 1];
307 	socklen_t name_len = VRF_NAMSIZ;
308 	struct bgp *bgp;
309 	int rc;
310 	struct listnode *node, *nnode;
311 
312 	*bgp_inst = NULL;
313 	name[0] = '\0';
314 	rc = getsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, name, &name_len);
315 	if (rc != 0) {
316 #if defined(HAVE_CUMULUS)
317 		flog_err(EC_LIB_SOCKET,
318 			 "[Error] BGP SO_BINDTODEVICE get failed (%s), sock %d",
319 			 safe_strerror(errno), sock);
320 		return -1;
321 #endif
322 	}
323 
324 	if (!strlen(name)) {
325 		*bgp_inst = bgp_get_default();
326 		return 0; /* default instance. */
327 	}
328 
329 	/* First try match to instance; if that fails, check for interfaces. */
330 	bgp = bgp_lookup_by_name(name);
331 	if (bgp) {
332 		if (!bgp->vrf_id) // unexpected
333 			return -1;
334 		*bgp_inst = bgp;
335 		return 0;
336 	}
337 
338 	/* TODO - This will be optimized once interfaces move into the NS */
339 	for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp)) {
340 		struct interface *ifp;
341 
342 		if (bgp->inst_type == BGP_INSTANCE_TYPE_VIEW)
343 			continue;
344 
345 		ifp = if_lookup_by_name(name, bgp->vrf_id);
346 		if (ifp) {
347 			*bgp_inst = bgp;
348 			return 0;
349 		}
350 	}
351 
352 	/* We didn't match to either an instance or an interface. */
353 	return -1;
354 #endif
355 }
356 
bgp_socket_set_buffer_size(const int fd)357 static void bgp_socket_set_buffer_size(const int fd)
358 {
359 	if (getsockopt_so_sendbuf(fd) < (int)bm->socket_buffer)
360 		setsockopt_so_sendbuf(fd, bm->socket_buffer);
361 	if (getsockopt_so_recvbuf(fd) < (int)bm->socket_buffer)
362 		setsockopt_so_recvbuf(fd, bm->socket_buffer);
363 }
364 
365 /* Accept bgp connection. */
bgp_accept(struct thread * thread)366 static int bgp_accept(struct thread *thread)
367 {
368 	int bgp_sock;
369 	int accept_sock;
370 	union sockunion su;
371 	struct bgp_listener *listener = THREAD_ARG(thread);
372 	struct peer *peer;
373 	struct peer *peer1;
374 	char buf[SU_ADDRSTRLEN];
375 	struct bgp *bgp = NULL;
376 
377 	sockunion_init(&su);
378 
379 	bgp = bgp_lookup_by_name(listener->name);
380 
381 	/* Register accept thread. */
382 	accept_sock = THREAD_FD(thread);
383 	if (accept_sock < 0) {
384 		flog_err_sys(EC_LIB_SOCKET,
385 			     "[Error] BGP accept socket fd is negative: %d",
386 			     accept_sock);
387 		return -1;
388 	}
389 	listener->thread = NULL;
390 
391 	thread_add_read(bm->master, bgp_accept, listener, accept_sock,
392 			&listener->thread);
393 
394 	/* Accept client connection. */
395 	bgp_sock = sockunion_accept(accept_sock, &su);
396 	int save_errno = errno;
397 	if (bgp_sock < 0) {
398 		if (save_errno == EINVAL) {
399 			struct vrf *vrf =
400 				bgp ? vrf_lookup_by_id(bgp->vrf_id) : NULL;
401 
402 			/*
403 			 * It appears that sometimes, when VRFs are deleted on
404 			 * the system, it takes a little while for us to get
405 			 * notified about that. In the meantime we endlessly
406 			 * loop on accept(), because the socket, having been
407 			 * bound to a now-deleted VRF device, is in some weird
408 			 * state which causes accept() to fail.
409 			 *
410 			 * To avoid this, if we see accept() fail with EINVAL,
411 			 * we cancel ourselves and trust that when the VRF
412 			 * deletion notification comes in the event handler for
413 			 * that will take care of cleaning us up.
414 			 */
415 			flog_err_sys(
416 				EC_LIB_SOCKET,
417 				"[Error] accept() failed with error \"%s\" on BGP listener socket %d for BGP instance in VRF \"%s\"; refreshing socket",
418 				safe_strerror(save_errno), accept_sock,
419 				VRF_LOGNAME(vrf));
420 			THREAD_OFF(listener->thread);
421 		} else {
422 			flog_err_sys(
423 				EC_LIB_SOCKET,
424 				"[Error] BGP socket accept failed (%s); retrying",
425 				safe_strerror(save_errno));
426 		}
427 		return -1;
428 	}
429 	set_nonblocking(bgp_sock);
430 
431 	/* Obtain BGP instance this connection is meant for.
432 	 * - if it is a VRF netns sock, then BGP is in listener structure
433 	 * - otherwise, the bgp instance need to be demultiplexed
434 	 */
435 	if (listener->bgp)
436 		bgp = listener->bgp;
437 	else if (bgp_get_instance_for_inc_conn(bgp_sock, &bgp)) {
438 		if (bgp_debug_neighbor_events(NULL))
439 			zlog_debug(
440 				"[Event] Could not get instance for incoming conn from %s",
441 				inet_sutop(&su, buf));
442 		close(bgp_sock);
443 		return -1;
444 	}
445 
446 	bgp_socket_set_buffer_size(bgp_sock);
447 
448 	/* Check remote IP address */
449 	peer1 = peer_lookup(bgp, &su);
450 
451 	if (!peer1) {
452 		peer1 = peer_lookup_dynamic_neighbor(bgp, &su);
453 		if (peer1) {
454 			/* Dynamic neighbor has been created, let it proceed */
455 			peer1->fd = bgp_sock;
456 			bgp_fsm_change_status(peer1, Active);
457 			BGP_TIMER_OFF(
458 				peer1->t_start); /* created in peer_create() */
459 
460 			if (peer_active(peer1))
461 				BGP_EVENT_ADD(peer1, TCP_connection_open);
462 
463 			return 0;
464 		}
465 	}
466 
467 	if (!peer1) {
468 		if (bgp_debug_neighbor_events(NULL)) {
469 			zlog_debug(
470 				"[Event] %s connection rejected(%s:%u:%s) - not configured and not valid for dynamic",
471 				inet_sutop(&su, buf), bgp->name_pretty, bgp->as,
472 				VRF_LOGNAME(vrf_lookup_by_id(bgp->vrf_id)));
473 		}
474 		close(bgp_sock);
475 		return -1;
476 	}
477 
478 	if (CHECK_FLAG(peer1->flags, PEER_FLAG_SHUTDOWN)
479 	    || CHECK_FLAG(peer1->bgp->flags, BGP_FLAG_SHUTDOWN)) {
480 		if (bgp_debug_neighbor_events(peer1))
481 			zlog_debug(
482 				"[Event] connection from %s rejected(%s:%u:%s) due to admin shutdown",
483 				inet_sutop(&su, buf), bgp->name_pretty, bgp->as,
484 				VRF_LOGNAME(vrf_lookup_by_id(bgp->vrf_id)));
485 		close(bgp_sock);
486 		return -1;
487 	}
488 
489 	/*
490 	 * Do not accept incoming connections in Clearing state. This can result
491 	 * in incorect state transitions - e.g., the connection goes back to
492 	 * Established and then the Clearing_Completed event is generated. Also,
493 	 * block incoming connection in Deleted state.
494 	 */
495 	if (peer1->status == Clearing || peer1->status == Deleted) {
496 		if (bgp_debug_neighbor_events(peer1))
497 			zlog_debug(
498 				"[Event] Closing incoming conn for %s (%p) state %d",
499 				peer1->host, peer1, peer1->status);
500 		close(bgp_sock);
501 		return -1;
502 	}
503 
504 	/* Check that at least one AF is activated for the peer. */
505 	if (!peer_active(peer1)) {
506 		if (bgp_debug_neighbor_events(peer1))
507 			zlog_debug(
508 				"%s - incoming conn rejected - no AF activated for peer",
509 				peer1->host);
510 		close(bgp_sock);
511 		return -1;
512 	}
513 
514 	/* Do not try to reconnect if the peer reached maximum
515 	 * prefixes, restart timer is still running or the peer
516 	 * is shutdown.
517 	 */
518 	if (BGP_PEER_START_SUPPRESSED(peer1)) {
519 		if (bgp_debug_neighbor_events(peer1))
520 			zlog_debug(
521 				"[Event] Incoming BGP connection rejected from %s due to maximum-prefix or shutdown",
522 				peer1->host);
523 		close(bgp_sock);
524 		return -1;
525 	}
526 
527 	if (bgp_debug_neighbor_events(peer1))
528 		zlog_debug("[Event] BGP connection from host %s fd %d",
529 			   inet_sutop(&su, buf), bgp_sock);
530 
531 	if (peer1->doppelganger) {
532 		/* We have an existing connection. Kill the existing one and run
533 		   with this one.
534 		*/
535 		if (bgp_debug_neighbor_events(peer1))
536 			zlog_debug(
537 				"[Event] New active connection from peer %s, Killing previous active connection",
538 				peer1->host);
539 		peer_delete(peer1->doppelganger);
540 	}
541 
542 	if (bgp_set_socket_ttl(peer1, bgp_sock) < 0)
543 		if (bgp_debug_neighbor_events(peer1))
544 			zlog_debug(
545 				"[Event] Unable to set min/max TTL on peer %s, Continuing",
546 				peer1->host);
547 
548 	peer = peer_create(&su, peer1->conf_if, peer1->bgp, peer1->local_as,
549 			   peer1->as, peer1->as_type, 0, 0, NULL);
550 	hash_release(peer->bgp->peerhash, peer);
551 	hash_get(peer->bgp->peerhash, peer, hash_alloc_intern);
552 
553 	peer_xfer_config(peer, peer1);
554 	bgp_peer_gr_flags_update(peer);
555 
556 	BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
557 							  peer->bgp->peer);
558 
559 	if (bgp_peer_gr_mode_get(peer) == PEER_DISABLE) {
560 
561 		UNSET_FLAG(peer->sflags, PEER_STATUS_NSF_MODE);
562 
563 		if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT)) {
564 			peer_nsf_stop(peer);
565 		}
566 	}
567 
568 	UNSET_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE);
569 
570 	peer->doppelganger = peer1;
571 	peer1->doppelganger = peer;
572 	peer->fd = bgp_sock;
573 	vrf_bind(peer->bgp->vrf_id, bgp_sock, bgp_get_bound_name(peer));
574 	bgp_fsm_change_status(peer, Active);
575 	BGP_TIMER_OFF(peer->t_start); /* created in peer_create() */
576 
577 	SET_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER);
578 	/* Make dummy peer until read Open packet. */
579 	if (peer1->status == Established
580 	    && CHECK_FLAG(peer1->sflags, PEER_STATUS_NSF_MODE)) {
581 		/* If we have an existing established connection with graceful
582 		 * restart
583 		 * capability announced with one or more address families, then
584 		 * drop
585 		 * existing established connection and move state to connect.
586 		 */
587 		peer1->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
588 
589 		if (CHECK_FLAG(peer1->flags, PEER_FLAG_GRACEFUL_RESTART)
590 		    || CHECK_FLAG(peer1->flags,
591 				  PEER_FLAG_GRACEFUL_RESTART_HELPER))
592 			SET_FLAG(peer1->sflags, PEER_STATUS_NSF_WAIT);
593 
594 		bgp_event_update(peer1, TCP_connection_closed);
595 	}
596 
597 	if (peer_active(peer)) {
598 		BGP_EVENT_ADD(peer, TCP_connection_open);
599 	}
600 
601 	return 0;
602 }
603 
604 /* BGP socket bind. */
bgp_get_bound_name(struct peer * peer)605 static char *bgp_get_bound_name(struct peer *peer)
606 {
607 	char *name = NULL;
608 
609 	if (!peer)
610 		return NULL;
611 
612 	if ((peer->bgp->vrf_id == VRF_DEFAULT) && !peer->ifname
613 	    && !peer->conf_if)
614 		return NULL;
615 
616 	if (peer->su.sa.sa_family != AF_INET
617 	    && peer->su.sa.sa_family != AF_INET6)
618 		return NULL; // unexpected
619 
620 	/* For IPv6 peering, interface (unnumbered or link-local with interface)
621 	 * takes precedence over VRF. For IPv4 peering, explicit interface or
622 	 * VRF are the situations to bind.
623 	 */
624 	if (peer->su.sa.sa_family == AF_INET6)
625 		name = (peer->conf_if ? peer->conf_if
626 				      : (peer->ifname ? peer->ifname
627 						      : peer->bgp->name));
628 	else
629 		name = peer->ifname ? peer->ifname : peer->bgp->name;
630 
631 	return name;
632 }
633 
bgp_update_address(struct interface * ifp,const union sockunion * dst,union sockunion * addr)634 static int bgp_update_address(struct interface *ifp, const union sockunion *dst,
635 			      union sockunion *addr)
636 {
637 	struct prefix *p, *sel, d;
638 	struct connected *connected;
639 	struct listnode *node;
640 	int common;
641 
642 	sockunion2hostprefix(dst, &d);
643 	sel = NULL;
644 	common = -1;
645 
646 	for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) {
647 		p = connected->address;
648 		if (p->family != d.family)
649 			continue;
650 		if (prefix_common_bits(p, &d) > common) {
651 			sel = p;
652 			common = prefix_common_bits(sel, &d);
653 		}
654 	}
655 
656 	if (!sel)
657 		return 1;
658 
659 	prefix2sockunion(sel, addr);
660 	return 0;
661 }
662 
663 /* Update source selection.  */
bgp_update_source(struct peer * peer)664 static int bgp_update_source(struct peer *peer)
665 {
666 	struct interface *ifp;
667 	union sockunion addr;
668 	int ret = 0;
669 
670 	sockunion_init(&addr);
671 
672 	/* Source is specified with interface name.  */
673 	if (peer->update_if) {
674 		ifp = if_lookup_by_name(peer->update_if, peer->bgp->vrf_id);
675 		if (!ifp)
676 			return -1;
677 
678 		if (bgp_update_address(ifp, &peer->su, &addr))
679 			return -1;
680 
681 		ret = sockunion_bind(peer->fd, &addr, 0, &addr);
682 	}
683 
684 	/* Source is specified with IP address.  */
685 	if (peer->update_source)
686 		ret = sockunion_bind(peer->fd, peer->update_source, 0,
687 				     peer->update_source);
688 
689 	return ret;
690 }
691 
692 /* BGP try to connect to the peer.  */
bgp_connect(struct peer * peer)693 int bgp_connect(struct peer *peer)
694 {
695 	assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON));
696 	assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_READS_ON));
697 	ifindex_t ifindex = 0;
698 
699 	if (peer->conf_if && BGP_PEER_SU_UNSPEC(peer)) {
700 		zlog_debug("Peer address not learnt: Returning from connect");
701 		return 0;
702 	}
703 	frr_with_privs(&bgpd_privs) {
704 	/* Make socket for the peer. */
705 		peer->fd = vrf_sockunion_socket(&peer->su, peer->bgp->vrf_id,
706 						bgp_get_bound_name(peer));
707 	}
708 	if (peer->fd < 0)
709 		return -1;
710 
711 	set_nonblocking(peer->fd);
712 
713 	bgp_socket_set_buffer_size(peer->fd);
714 
715 	if (bgp_set_socket_ttl(peer, peer->fd) < 0)
716 		return -1;
717 
718 	sockopt_reuseaddr(peer->fd);
719 	sockopt_reuseport(peer->fd);
720 
721 #ifdef IPTOS_PREC_INTERNETCONTROL
722 	frr_with_privs(&bgpd_privs) {
723 		if (sockunion_family(&peer->su) == AF_INET)
724 			setsockopt_ipv4_tos(peer->fd,
725 					    IPTOS_PREC_INTERNETCONTROL);
726 		else if (sockunion_family(&peer->su) == AF_INET6)
727 			setsockopt_ipv6_tclass(peer->fd,
728 					       IPTOS_PREC_INTERNETCONTROL);
729 	}
730 #endif
731 
732 	if (peer->password) {
733 		uint16_t prefixlen = peer->su.sa.sa_family == AF_INET
734 					     ? IPV4_MAX_PREFIXLEN
735 					     : IPV6_MAX_PREFIXLEN;
736 
737 		bgp_md5_set_connect(peer->fd, &peer->su, prefixlen,
738 				    peer->password);
739 	}
740 
741 	/* Update source bind. */
742 	if (bgp_update_source(peer) < 0) {
743 		return connect_error;
744 	}
745 
746 	if (peer->conf_if || peer->ifname)
747 		ifindex = ifname2ifindex(peer->conf_if ? peer->conf_if
748 						       : peer->ifname,
749 					 peer->bgp->vrf_id);
750 
751 	if (bgp_debug_neighbor_events(peer))
752 		zlog_debug("%s [Event] Connect start to %s fd %d", peer->host,
753 			   peer->host, peer->fd);
754 
755 	/* Connect to the remote peer. */
756 	return sockunion_connect(peer->fd, &peer->su, htons(peer->port),
757 				 ifindex);
758 }
759 
760 /* After TCP connection is established.  Get local address and port. */
bgp_getsockname(struct peer * peer)761 int bgp_getsockname(struct peer *peer)
762 {
763 	if (peer->su_local) {
764 		sockunion_free(peer->su_local);
765 		peer->su_local = NULL;
766 	}
767 
768 	if (peer->su_remote) {
769 		sockunion_free(peer->su_remote);
770 		peer->su_remote = NULL;
771 	}
772 
773 	peer->su_local = sockunion_getsockname(peer->fd);
774 	if (!peer->su_local)
775 		return -1;
776 	peer->su_remote = sockunion_getpeername(peer->fd);
777 	if (!peer->su_remote)
778 		return -1;
779 
780 	if (!bgp_zebra_nexthop_set(peer->su_local, peer->su_remote,
781 				   &peer->nexthop, peer)) {
782 		flog_err(EC_BGP_NH_UPD,
783 			 "%s: nexthop_set failed, resetting connection - intf %p",
784 			 peer->host, peer->nexthop.ifp);
785 		return -1;
786 	}
787 	return 0;
788 }
789 
790 
bgp_listener(int sock,struct sockaddr * sa,socklen_t salen,struct bgp * bgp)791 static int bgp_listener(int sock, struct sockaddr *sa, socklen_t salen,
792 			struct bgp *bgp)
793 {
794 	struct bgp_listener *listener;
795 	int ret, en;
796 
797 	sockopt_reuseaddr(sock);
798 	sockopt_reuseport(sock);
799 
800 	frr_with_privs(&bgpd_privs) {
801 
802 #ifdef IPTOS_PREC_INTERNETCONTROL
803 		if (sa->sa_family == AF_INET)
804 			setsockopt_ipv4_tos(sock, IPTOS_PREC_INTERNETCONTROL);
805 		else if (sa->sa_family == AF_INET6)
806 			setsockopt_ipv6_tclass(sock,
807 					       IPTOS_PREC_INTERNETCONTROL);
808 #endif
809 
810 		sockopt_v6only(sa->sa_family, sock);
811 
812 		ret = bind(sock, sa, salen);
813 		en = errno;
814 	}
815 
816 	if (ret < 0) {
817 		flog_err_sys(EC_LIB_SOCKET, "bind: %s", safe_strerror(en));
818 		return ret;
819 	}
820 
821 	ret = listen(sock, SOMAXCONN);
822 	if (ret < 0) {
823 		flog_err_sys(EC_LIB_SOCKET, "listen: %s", safe_strerror(errno));
824 		return ret;
825 	}
826 
827 	listener = XCALLOC(MTYPE_BGP_LISTENER, sizeof(*listener));
828 	listener->fd = sock;
829 	listener->name = XSTRDUP(MTYPE_BGP_LISTENER, bgp->name);
830 
831 	/* this socket is in a vrf record bgp back pointer */
832 	if (bgp->vrf_id != VRF_DEFAULT
833 	    && bgp->inst_type != BGP_INSTANCE_TYPE_VIEW)
834 		listener->bgp = bgp;
835 
836 	memcpy(&listener->su, sa, salen);
837 	listener->thread = NULL;
838 	thread_add_read(bm->master, bgp_accept, listener, sock,
839 			&listener->thread);
840 	listnode_add(bm->listen_sockets, listener);
841 
842 	return 0;
843 }
844 
845 /* IPv6 supported version of BGP server socket setup.  */
bgp_socket(struct bgp * bgp,unsigned short port,const char * address)846 int bgp_socket(struct bgp *bgp, unsigned short port, const char *address)
847 {
848 	struct addrinfo *ainfo;
849 	struct addrinfo *ainfo_save;
850 	static const struct addrinfo req = {
851 		.ai_family = AF_UNSPEC,
852 		.ai_flags = AI_PASSIVE,
853 		.ai_socktype = SOCK_STREAM,
854 	};
855 	int ret, count;
856 	char port_str[BUFSIZ];
857 
858 	snprintf(port_str, sizeof(port_str), "%d", port);
859 	port_str[sizeof(port_str) - 1] = '\0';
860 
861 	frr_with_privs(&bgpd_privs) {
862 		ret = vrf_getaddrinfo(address, port_str, &req, &ainfo_save,
863 				      bgp->vrf_id);
864 	}
865 	if (ret != 0) {
866 		flog_err_sys(EC_LIB_SOCKET, "getaddrinfo: %s",
867 			     gai_strerror(ret));
868 		return -1;
869 	}
870 	if (bgp_option_check(BGP_OPT_NO_ZEBRA) &&
871 	    bgp->vrf_id != VRF_DEFAULT) {
872 		freeaddrinfo(ainfo_save);
873 		return -1;
874 	}
875 	count = 0;
876 	for (ainfo = ainfo_save; ainfo; ainfo = ainfo->ai_next) {
877 		int sock;
878 
879 		if (ainfo->ai_family != AF_INET && ainfo->ai_family != AF_INET6)
880 			continue;
881 
882 		frr_with_privs(&bgpd_privs) {
883 			sock = vrf_socket(ainfo->ai_family,
884 					  ainfo->ai_socktype,
885 					  ainfo->ai_protocol, bgp->vrf_id,
886 					  (bgp->inst_type
887 					   == BGP_INSTANCE_TYPE_VRF
888 					   ? bgp->name : NULL));
889 		}
890 		if (sock < 0) {
891 			flog_err_sys(EC_LIB_SOCKET, "socket: %s",
892 				     safe_strerror(errno));
893 			continue;
894 		}
895 
896 		/* if we intend to implement ttl-security, this socket needs
897 		 * ttl=255 */
898 		sockopt_ttl(ainfo->ai_family, sock, MAXTTL);
899 
900 		ret = bgp_listener(sock, ainfo->ai_addr, ainfo->ai_addrlen,
901 				   bgp);
902 		if (ret == 0)
903 			++count;
904 		else
905 			close(sock);
906 	}
907 	freeaddrinfo(ainfo_save);
908 	if (count == 0 && bgp->inst_type != BGP_INSTANCE_TYPE_VRF) {
909 		flog_err(
910 			EC_LIB_SOCKET,
911 			"%s: no usable addresses please check other programs usage of specified port %d",
912 			__func__, port);
913 		flog_err_sys(EC_LIB_SOCKET, "%s: Program cannot continue",
914 			     __func__);
915 		exit(-1);
916 	}
917 
918 	return 0;
919 }
920 
921 /* this function closes vrf socket
922  * this should be called only for vrf socket with netns backend
923  */
bgp_close_vrf_socket(struct bgp * bgp)924 void bgp_close_vrf_socket(struct bgp *bgp)
925 {
926 	struct listnode *node, *next;
927 	struct bgp_listener *listener;
928 
929 	if (!bgp)
930 		return;
931 
932 	if (bm->listen_sockets == NULL)
933 		return;
934 
935 	for (ALL_LIST_ELEMENTS(bm->listen_sockets, node, next, listener)) {
936 		if (listener->bgp == bgp) {
937 			THREAD_OFF(listener->thread);
938 			close(listener->fd);
939 			listnode_delete(bm->listen_sockets, listener);
940 			XFREE(MTYPE_BGP_LISTENER, listener->name);
941 			XFREE(MTYPE_BGP_LISTENER, listener);
942 		}
943 	}
944 }
945 
946 /* this function closes main socket
947  */
bgp_close(void)948 void bgp_close(void)
949 {
950 	struct listnode *node, *next;
951 	struct bgp_listener *listener;
952 
953 	if (bm->listen_sockets == NULL)
954 		return;
955 
956 	for (ALL_LIST_ELEMENTS(bm->listen_sockets, node, next, listener)) {
957 		if (listener->bgp)
958 			continue;
959 		THREAD_OFF(listener->thread);
960 		close(listener->fd);
961 		listnode_delete(bm->listen_sockets, listener);
962 		XFREE(MTYPE_BGP_LISTENER, listener->name);
963 		XFREE(MTYPE_BGP_LISTENER, listener);
964 	}
965 }
966