xref: /dragonfly/sys/net/wg/if_wg.c (revision dfbadd37)
1 /*-
2  * SPDX-License-Identifier: ISC
3  *
4  * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2021 Matt Dunwoodie <ncon@noconroy.net>
6  * Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
7  * Copyright (c) 2021 Kyle Evans <kevans@FreeBSD.org>
8  * Copyright (c) 2022 The FreeBSD Foundation
9  * Copyright (c) 2023-2024 Aaron LI <aly@aaronly.me>
10  *
11  * Permission to use, copy, modify, and distribute this software for any
12  * purpose with or without fee is hereby granted, provided that the above
13  * copyright notice and this permission notice appear in all copies.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
16  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
17  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
18  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
21  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  */
23 
24 #include "opt_inet6.h"
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/callout.h>
29 #include <sys/caps.h>
30 #include <sys/endian.h>
31 #include <sys/kernel.h>
32 #include <sys/lock.h>
33 #include <sys/malloc.h>
34 #include <sys/mbuf.h>
35 #include <sys/module.h>
36 #include <sys/objcache.h>
37 #include <sys/queue.h>
38 #include <sys/socket.h>
39 #include <sys/socketops.h> /* so_pru_*() functions */
40 #include <sys/socketvar.h>
41 #include <sys/sockio.h> /* SIOC* ioctl commands */
42 #include <sys/taskqueue.h>
43 #include <sys/time.h>
44 
45 #include <machine/atomic.h>
46 #include <machine/cpufunc.h> /* cpu_sfence() */
47 
48 #include <net/bpf.h>
49 #include <net/ethernet.h> /* ETHERMTU */
50 #include <net/if.h>
51 #include <net/if_clone.h>
52 #include <net/if_types.h> /* IFT_WIREGUARD */
53 #include <net/if_var.h>
54 #include <net/ifq_var.h>
55 #include <net/netisr.h>
56 #include <net/radix.h>
57 #include <net/route.h> /* struct rtentry */
58 
59 #include <netinet/in.h>
60 #include <netinet/ip.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/ip6.h>
63 #include <netinet/icmp6.h>
64 #include <netinet6/in6_var.h> /* in6_mask2len() */
65 
66 #include "wg_cookie.h"
67 #include "wg_noise.h"
68 #include "if_wg.h"
69 
70 CTASSERT(WG_KEY_SIZE >= NOISE_PUBLIC_KEY_LEN);
71 CTASSERT(WG_KEY_SIZE >= NOISE_SYMMETRIC_KEY_LEN);
72 
73 #define DEFAULT_MTU		(ETHERMTU - 80)
74 #define MAX_MTU			(IF_MAXMTU - 80)
75 
76 #ifndef ENOKEY
77 #define ENOKEY			ENOENT
78 #endif
79 
80 /*
81  * mbuf flags to clear after in-place encryption/decryption, so that the
82  * mbuf can be reused for re-entering the network stack or delivering to
83  * the remote peer.
84  *
85  * For example, the M_HASH and M_LENCHECKED flags must be cleared for an
86  * inbound packet; otherwise, panic is to be expected.
87  */
88 #define MBUF_CLEARFLAGS		(M_COPYFLAGS & ~(M_PKTHDR | M_EOR | M_PRIO))
89 
90 #define MAX_LOOPS		8 /* 0 means no loop allowed */
91 #define MAX_STAGED_PKT		128
92 #define MAX_QUEUED_PKT		1024
93 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
94 #define MAX_QUEUED_HANDSHAKES	4096
95 
96 #define REKEY_TIMEOUT_JITTER	(karc4random() % 334) /* msec */
97 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
98 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
99 #define UNDERLOAD_TIMEOUT	1
100 
101 /* First byte indicating packet type on the wire */
102 #define WG_PKT_INITIATION	htole32(1)
103 #define WG_PKT_RESPONSE		htole32(2)
104 #define WG_PKT_COOKIE		htole32(3)
105 #define WG_PKT_DATA		htole32(4)
106 
107 #define WG_PKT_PADDING		16
108 #define WG_PKT_WITH_PADDING(n)	\
109 	(((n) + (WG_PKT_PADDING - 1)) & ~(WG_PKT_PADDING - 1))
110 #define WG_PKT_DATA_MINLEN	\
111 	(sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN)
112 
113 
114 #define DPRINTF(sc, ...)	\
115 	if (sc->sc_ifp->if_flags & IFF_DEBUG) \
116 		if_printf(sc->sc_ifp, ##__VA_ARGS__)
117 
118 #define BPF_MTAP_AF(_ifp, _m, _af) do { \
119 	if ((_ifp)->if_bpf != NULL) { \
120 		bpf_gettoken(); \
121 		if ((_ifp)->if_bpf != NULL) { \
122 			/* Prepend the AF as a 4-byte field for DLT_NULL. */ \
123 			uint32_t __bpf_af = (uint32_t)(_af); \
124 			bpf_ptap((_ifp)->if_bpf, (_m), &__bpf_af, 4); \
125 		} \
126 		bpf_reltoken(); \
127 	} \
128 } while (0)
129 
130 
131 struct wg_pkt_initiation {
132 	uint32_t		t;
133 	uint32_t		s_idx;
134 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
135 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
136 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
137 	struct cookie_macs	m;
138 };
139 
140 struct wg_pkt_response {
141 	uint32_t		t;
142 	uint32_t		s_idx;
143 	uint32_t		r_idx;
144 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
145 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
146 	struct cookie_macs	m;
147 };
148 
149 struct wg_pkt_cookie {
150 	uint32_t		t;
151 	uint32_t		r_idx;
152 	uint8_t			nonce[COOKIE_NONCE_SIZE];
153 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
154 };
155 
156 struct wg_pkt_data {
157 	uint32_t		t;
158 	uint32_t		r_idx;
159 	uint64_t		counter;
160 	uint8_t			buf[];
161 };
162 
163 struct wg_endpoint {
164 	union {
165 		struct sockaddr		r_sa;
166 		struct sockaddr_in	r_sin;
167 #ifdef INET6
168 		struct sockaddr_in6	r_sin6;
169 #endif
170 	} e_remote;
171 	/*
172 	 * NOTE: No 'e_local' on DragonFly, because the socket upcall
173 	 *       and so_pru_soreceive() cannot provide the local
174 	 *       (i.e., destination) address of a received packet.
175 	 */
176 };
177 
178 struct aip_addr {
179 	uint8_t		length; /* required by the radix code */
180 	union {
181 		uint8_t		bytes[16];
182 		uint32_t	ip;
183 		uint32_t	ip6[4];
184 		struct in_addr	in;
185 		struct in6_addr	in6;
186 	};
187 };
188 
189 struct wg_aip {
190 	struct radix_node	 a_nodes[2]; /* make the first for casting */
191 	LIST_ENTRY(wg_aip)	 a_entry;
192 	struct aip_addr		 a_addr;
193 	struct aip_addr		 a_mask;
194 	struct wg_peer		*a_peer;
195 	sa_family_t		 a_af;
196 };
197 
198 enum wg_packet_state {
199 	WG_PACKET_DEAD,		/* to be dropped */
200 	WG_PACKET_UNCRYPTED,	/* before encryption/decryption */
201 	WG_PACKET_CRYPTED,	/* after encryption/decryption */
202 };
203 
204 struct wg_packet {
205 	STAILQ_ENTRY(wg_packet)	 p_serial;
206 	STAILQ_ENTRY(wg_packet)	 p_parallel;
207 	struct wg_endpoint	 p_endpoint;
208 	struct noise_keypair	*p_keypair;
209 	uint64_t		 p_counter;
210 	struct mbuf		*p_mbuf;
211 	int			 p_mtu;
212 	sa_family_t		 p_af;
213 	enum wg_packet_state	 p_state;
214 };
215 
216 STAILQ_HEAD(wg_packet_list, wg_packet);
217 
218 struct wg_queue {
219 	struct lock		 q_mtx;
220 	struct wg_packet_list	 q_queue;
221 	size_t			 q_len;
222 };
223 
224 struct wg_peer {
225 	TAILQ_ENTRY(wg_peer)	 p_entry;
226 	unsigned long		 p_id;
227 	struct wg_softc		*p_sc;
228 
229 	char			 p_description[WG_PEER_DESCR_SIZE];
230 
231 	struct noise_remote	*p_remote;
232 	struct cookie_maker	 p_cookie;
233 
234 	struct lock		 p_endpoint_lock;
235 	struct wg_endpoint	 p_endpoint;
236 
237 	struct wg_queue		 p_stage_queue;
238 	struct wg_queue		 p_encrypt_serial;
239 	struct wg_queue		 p_decrypt_serial;
240 
241 	bool			 p_enabled;
242 	bool			 p_need_another_keepalive;
243 	uint16_t		 p_persistent_keepalive_interval;
244 	struct callout		 p_new_handshake;
245 	struct callout		 p_send_keepalive;
246 	struct callout		 p_retry_handshake;
247 	struct callout		 p_zero_key_material;
248 	struct callout		 p_persistent_keepalive;
249 
250 	struct lock		 p_handshake_mtx;
251 	struct timespec		 p_handshake_complete; /* nanotime */
252 	int			 p_handshake_retries;
253 
254 	struct task		 p_send_task;
255 	struct task		 p_recv_task;
256 	struct taskqueue	*p_send_taskqueue;
257 	struct taskqueue	*p_recv_taskqueue;
258 
259 	uint64_t		*p_tx_bytes;
260 	uint64_t		*p_rx_bytes;
261 
262 	LIST_HEAD(, wg_aip)	 p_aips;
263 	size_t			 p_aips_num;
264 };
265 
266 struct wg_socket {
267 	struct lock	 so_lock;
268 	struct socket	*so_so4;
269 	struct socket	*so_so6;
270 	uint32_t	 so_user_cookie;
271 	in_port_t	 so_port;
272 };
273 
274 struct wg_softc {
275 	LIST_ENTRY(wg_softc)	 sc_entry;
276 	struct ifnet		*sc_ifp;
277 	int			 sc_flags;
278 
279 	struct wg_socket	 sc_socket;
280 
281 	TAILQ_HEAD(, wg_peer)	 sc_peers;
282 	size_t			 sc_peers_num;
283 
284 	struct noise_local	*sc_local;
285 	struct cookie_checker	 sc_cookie;
286 
287 	struct lock		 sc_aip_lock;
288 	struct radix_node_head	*sc_aip4;
289 	struct radix_node_head	*sc_aip6;
290 
291 	struct taskqueue	*sc_handshake_taskqueue;
292 	struct task		 sc_handshake_task;
293 	struct wg_queue		 sc_handshake_queue;
294 
295 	struct task		*sc_encrypt_tasks; /* one per CPU */
296 	struct task		*sc_decrypt_tasks; /* one per CPU */
297 	struct wg_queue		 sc_encrypt_parallel;
298 	struct wg_queue		 sc_decrypt_parallel;
299 	int			 sc_encrypt_last_cpu;
300 	int			 sc_decrypt_last_cpu;
301 
302 	struct lock		 sc_lock;
303 };
304 
305 
306 static MALLOC_DEFINE(M_WG, "WG", "wireguard");
307 static MALLOC_DEFINE(M_WG_PACKET, "WG packet", "wireguard packet");
308 
309 static const char wgname[] = "wg";
310 static volatile unsigned long peer_counter = 0;
311 
312 static struct objcache *wg_packet_zone;
313 static struct lock wg_mtx;
314 static struct taskqueue **wg_taskqueues; /* one taskqueue per CPU */
315 static struct radix_node_head *wg_maskhead; /* shared by all interfaces */
316 static LIST_HEAD(, wg_softc) wg_list = LIST_HEAD_INITIALIZER(wg_list);
317 
318 static int wg_clone_create(struct if_clone *, int, caddr_t, caddr_t);
319 static int wg_clone_destroy(struct ifnet *ifp);
320 static struct if_clone wg_cloner = IF_CLONE_INITIALIZER(
321 	wgname, wg_clone_create, wg_clone_destroy, 0, IF_MAXUNIT);
322 
323 
324 static int wg_socket_init(struct wg_softc *, in_port_t);
325 static void wg_socket_uninit(struct wg_softc *);
326 static int wg_socket_open(struct socket **, sa_family_t, in_port_t *, void *);
327 static int wg_socket_set_sockopt(struct socket *, struct socket *, int, void *, size_t);
328 static int wg_socket_set_cookie(struct wg_softc *, uint32_t);
329 static void wg_timers_enable(struct wg_peer *);
330 static void wg_timers_disable(struct wg_peer *);
331 static void wg_timers_set_persistent_keepalive(struct wg_peer *, uint16_t);
332 static bool wg_timers_get_persistent_keepalive(struct wg_peer *, uint16_t *);
333 static void wg_timers_get_last_handshake(struct wg_peer *, struct timespec *);
334 static void wg_timers_event_data_sent(struct wg_peer *);
335 static void wg_timers_event_data_received(struct wg_peer *);
336 static void wg_timers_event_any_authenticated_packet_sent(struct wg_peer *);
337 static void wg_timers_event_any_authenticated_packet_received(struct wg_peer *);
338 static void wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *);
339 static void wg_timers_event_handshake_initiated(struct wg_peer *);
340 static void wg_timers_event_handshake_complete(struct wg_peer *);
341 static void wg_timers_event_session_derived(struct wg_peer *);
342 static void wg_timers_event_want_initiation(struct wg_peer *);
343 static void wg_timers_run_send_initiation(struct wg_peer *, bool);
344 static void wg_timers_run_retry_handshake(void *);
345 static void wg_timers_run_send_keepalive(void *);
346 static void wg_timers_run_new_handshake(void *);
347 static void wg_timers_run_zero_key_material(void *);
348 static void wg_timers_run_persistent_keepalive(void *);
349 static int wg_aip_add(struct wg_softc *, struct wg_peer *, sa_family_t, const void *, uint8_t);
350 static struct wg_peer *wg_aip_lookup(struct wg_softc *, sa_family_t, const void *);
351 static void wg_aip_remove_all(struct wg_softc *, struct wg_peer *);
352 static struct wg_peer *wg_peer_create(struct wg_softc *, const uint8_t [WG_KEY_SIZE]);
353 static void wg_peer_destroy(struct wg_peer *);
354 static void wg_peer_destroy_all(struct wg_softc *);
355 static void wg_peer_send_buf(struct wg_peer *, const void *, size_t);
356 static void wg_peer_send_staged(struct wg_peer *);
357 static void wg_peer_set_endpoint(struct wg_peer *, const struct wg_endpoint *);
358 static void wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
359 static int wg_peer_set_sockaddr(struct wg_peer *, const struct sockaddr *);
360 static int wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
361 static int wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
362 static void wg_send_buf(struct wg_softc *, struct wg_endpoint *, const void *, size_t);
363 static void wg_send_initiation(struct wg_peer *);
364 static void wg_send_response(struct wg_peer *);
365 static void wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t, struct wg_endpoint *);
366 static void wg_send_keepalive(struct wg_peer *);
367 static void wg_handshake(struct wg_softc *, struct wg_packet *);
368 static void wg_encrypt(struct wg_softc *, struct wg_packet *);
369 static void wg_decrypt(struct wg_softc *, struct wg_packet *);
370 static void wg_softc_handshake_receive(void *, int);
371 static void wg_softc_decrypt(void *, int);
372 static void wg_softc_encrypt(void *, int);
373 static void wg_encrypt_dispatch(struct wg_softc *);
374 static void wg_decrypt_dispatch(struct wg_softc *);
375 static void wg_deliver_out(void *, int);
376 static void wg_deliver_in(void *, int);
377 static struct wg_packet *wg_packet_alloc(struct mbuf *);
378 static void wg_packet_free(struct wg_packet *);
379 static void wg_queue_init(struct wg_queue *, const char *);
380 static void wg_queue_deinit(struct wg_queue *);
381 static size_t wg_queue_len(const struct wg_queue *);
382 static bool wg_queue_enqueue_handshake(struct wg_queue *, struct wg_packet *);
383 static struct wg_packet *wg_queue_dequeue_handshake(struct wg_queue *);
384 static void wg_queue_push_staged(struct wg_queue *, struct wg_packet *);
385 static void wg_queue_enlist_staged(struct wg_queue *, struct wg_packet_list *);
386 static void wg_queue_delist_staged(struct wg_queue *, struct wg_packet_list *);
387 static void wg_queue_purge(struct wg_queue *);
388 static bool wg_queue_both(struct wg_queue *, struct wg_queue *, struct wg_packet *);
389 static struct wg_packet *wg_queue_dequeue_serial(struct wg_queue *);
390 static struct wg_packet *wg_queue_dequeue_parallel(struct wg_queue *);
391 static void wg_upcall(struct socket *, void *, int);
392 static void wg_input(struct wg_softc *, struct mbuf *, const struct sockaddr *);
393 static int wg_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *);
394 static int wg_up(struct wg_softc *);
395 static void wg_down(struct wg_softc *);
396 static int wg_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
397 static int wg_ioctl_get(struct wg_softc *, struct wg_data_io *, bool);
398 static int wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
399 static int wg_module_init(void);
400 static int wg_module_deinit(void);
401 static inline int determine_af_and_pullup(struct mbuf **, sa_family_t *);
402 
403 
404 /*----------------------------------------------------------------------------*/
405 /* Peer */
406 
407 static struct wg_peer *
408 wg_peer_create(struct wg_softc *sc, const uint8_t pub_key[WG_KEY_SIZE])
409 {
410 	struct wg_peer *peer;
411 
412 	KKASSERT(lockstatus(&sc->sc_lock, curthread) == LK_EXCLUSIVE);
413 
414 	peer = kmalloc(sizeof(*peer), M_WG, M_WAITOK | M_ZERO);
415 
416 	peer->p_remote = noise_remote_alloc(sc->sc_local, pub_key, peer);
417 	if (noise_remote_enable(peer->p_remote) != 0) {
418 		kfree(peer, M_WG);
419 		return (NULL);
420 	}
421 
422 	peer->p_id = peer_counter++;
423 	peer->p_sc = sc;
424 	peer->p_tx_bytes = kmalloc(sizeof(*peer->p_tx_bytes) * ncpus,
425 				   M_WG, M_WAITOK | M_ZERO);
426 	peer->p_rx_bytes = kmalloc(sizeof(*peer->p_rx_bytes) * ncpus,
427 				   M_WG, M_WAITOK | M_ZERO);
428 
429 	cookie_maker_init(&peer->p_cookie, pub_key);
430 
431 	lockinit(&peer->p_endpoint_lock, "wg_peer_endpoint", 0, 0);
432 	lockinit(&peer->p_handshake_mtx, "wg_peer_handshake", 0, 0);
433 
434 	wg_queue_init(&peer->p_stage_queue, "stageq");
435 	wg_queue_init(&peer->p_encrypt_serial, "txq");
436 	wg_queue_init(&peer->p_decrypt_serial, "rxq");
437 
438 	callout_init_mp(&peer->p_new_handshake);
439 	callout_init_mp(&peer->p_send_keepalive);
440 	callout_init_mp(&peer->p_retry_handshake);
441 	callout_init_mp(&peer->p_persistent_keepalive);
442 	callout_init_mp(&peer->p_zero_key_material);
443 
444 	TASK_INIT(&peer->p_send_task, 0, wg_deliver_out, peer);
445 	TASK_INIT(&peer->p_recv_task, 0, wg_deliver_in, peer);
446 
447 	/* Randomly choose the taskqueues to distribute the load. */
448 	peer->p_send_taskqueue = wg_taskqueues[karc4random() % ncpus];
449 	peer->p_recv_taskqueue = wg_taskqueues[karc4random() % ncpus];
450 
451 	LIST_INIT(&peer->p_aips);
452 
453 	TAILQ_INSERT_TAIL(&sc->sc_peers, peer, p_entry);
454 	sc->sc_peers_num++;
455 
456 	if (sc->sc_ifp->if_link_state == LINK_STATE_UP)
457 		wg_timers_enable(peer);
458 
459 	DPRINTF(sc, "Peer %ld created\n", peer->p_id);
460 	return (peer);
461 }
462 
463 static void
464 wg_peer_destroy(struct wg_peer *peer)
465 {
466 	struct wg_softc *sc = peer->p_sc;
467 
468 	KKASSERT(lockstatus(&sc->sc_lock, curthread) == LK_EXCLUSIVE);
469 
470 	/*
471 	 * Disable remote and timers.  This will prevent any new handshakes
472 	 * from occuring.
473 	 */
474 	noise_remote_disable(peer->p_remote);
475 	wg_timers_disable(peer);
476 
477 	/*
478 	 * Remove all allowed IPs, so no more packets will be routed to
479 	 * this peer.
480 	 */
481 	wg_aip_remove_all(sc, peer);
482 
483 	/* Remove peer from the interface, then free. */
484 	sc->sc_peers_num--;
485 	TAILQ_REMOVE(&sc->sc_peers, peer, p_entry);
486 
487 	/*
488 	 * While there are no references remaining, we may still have
489 	 * p_{send,recv}_task executing (think empty queue, but
490 	 * wg_deliver_{in,out} needs to check the queue).  We should wait
491 	 * for them and then free.
492 	 */
493 	taskqueue_drain(peer->p_recv_taskqueue, &peer->p_recv_task);
494 	taskqueue_drain(peer->p_send_taskqueue, &peer->p_send_task);
495 
496 	wg_queue_deinit(&peer->p_decrypt_serial);
497 	wg_queue_deinit(&peer->p_encrypt_serial);
498 	wg_queue_deinit(&peer->p_stage_queue);
499 
500 	kfree(peer->p_tx_bytes, M_WG);
501 	kfree(peer->p_rx_bytes, M_WG);
502 
503 	lockuninit(&peer->p_endpoint_lock);
504 	lockuninit(&peer->p_handshake_mtx);
505 
506 	noise_remote_free(peer->p_remote);
507 	cookie_maker_free(&peer->p_cookie);
508 
509 	DPRINTF(sc, "Peer %ld destroyed\n", peer->p_id);
510 	kfree(peer, M_WG);
511 }
512 
513 static void
514 wg_peer_destroy_all(struct wg_softc *sc)
515 {
516 	struct wg_peer *peer, *tpeer;
517 
518 	TAILQ_FOREACH_MUTABLE(peer, &sc->sc_peers, p_entry, tpeer)
519 		wg_peer_destroy(peer);
520 }
521 
522 static int
523 wg_peer_set_sockaddr(struct wg_peer *peer, const struct sockaddr *remote)
524 {
525 	int ret = 0;
526 
527 	lockmgr(&peer->p_endpoint_lock, LK_EXCLUSIVE);
528 
529 	memcpy(&peer->p_endpoint.e_remote, remote,
530 	       sizeof(peer->p_endpoint.e_remote));
531 	if (remote->sa_family == AF_INET)
532 		memcpy(&peer->p_endpoint.e_remote.r_sin, remote,
533 		       sizeof(peer->p_endpoint.e_remote.r_sin));
534 #ifdef INET6
535 	else if (remote->sa_family == AF_INET6)
536 		memcpy(&peer->p_endpoint.e_remote.r_sin6, remote,
537 		       sizeof(peer->p_endpoint.e_remote.r_sin6));
538 #endif
539 	else
540 		ret = EAFNOSUPPORT;
541 
542 	/* No 'e_local' to clear on DragonFly. */
543 
544 	lockmgr(&peer->p_endpoint_lock, LK_RELEASE);
545 	return (ret);
546 }
547 
548 static int
549 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
550 {
551 	int ret = ENOENT;
552 
553 	lockmgr(&peer->p_endpoint_lock, LK_SHARED);
554 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC) {
555 		memcpy(remote, &peer->p_endpoint.e_remote,
556 		       sizeof(peer->p_endpoint.e_remote));
557 		ret = 0;
558 	}
559 	lockmgr(&peer->p_endpoint_lock, LK_RELEASE);
560 	return (ret);
561 }
562 
563 static void
564 wg_peer_set_endpoint(struct wg_peer *peer, const struct wg_endpoint *e)
565 {
566 	KKASSERT(e->e_remote.r_sa.sa_family != AF_UNSPEC);
567 
568 	if (memcmp(e, &peer->p_endpoint, sizeof(*e)) == 0)
569 		return;
570 
571 	lockmgr(&peer->p_endpoint_lock, LK_EXCLUSIVE);
572 	peer->p_endpoint = *e;
573 	lockmgr(&peer->p_endpoint_lock, LK_RELEASE);
574 }
575 
576 static void
577 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
578 {
579 	lockmgr(&peer->p_endpoint_lock, LK_SHARED);
580 	*e = peer->p_endpoint;
581 	lockmgr(&peer->p_endpoint_lock, LK_RELEASE);
582 }
583 
584 /*----------------------------------------------------------------------------*/
585 /* Allowed IP */
586 
587 static int
588 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, sa_family_t af,
589 	   const void *addr, uint8_t cidr)
590 {
591 	struct radix_node_head	*head;
592 	struct radix_node	*node;
593 	struct wg_aip		*aip;
594 	int			 ret = 0;
595 
596 	aip = kmalloc(sizeof(*aip), M_WG, M_WAITOK | M_ZERO);
597 	aip->a_peer = peer;
598 	aip->a_af = af;
599 
600 	switch (af) {
601 	case AF_INET:
602 		if (cidr > 32)
603 			cidr = 32;
604 		head = sc->sc_aip4;
605 		aip->a_addr.in = *(const struct in_addr *)addr;
606 		aip->a_mask.ip =
607 		    htonl(~((1LL << (32 - cidr)) - 1) & 0xffffffff);
608 		aip->a_addr.ip &= aip->a_mask.ip;
609 		aip->a_addr.length = aip->a_mask.length =
610 		    offsetof(struct aip_addr, in) + sizeof(struct in_addr);
611 		break;
612 #ifdef INET6
613 	case AF_INET6:
614 	{
615 		int i;
616 
617 		if (cidr > 128)
618 			cidr = 128;
619 		head = sc->sc_aip6;
620 		aip->a_addr.in6 = *(const struct in6_addr *)addr;
621 		in6_prefixlen2mask(&aip->a_mask.in6, cidr);
622 		for (i = 0; i < 4; i++)
623 			aip->a_addr.ip6[i] &= aip->a_mask.ip6[i];
624 		aip->a_addr.length = aip->a_mask.length =
625 		    offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
626 		break;
627 	}
628 #endif
629 	default:
630 		kfree(aip, M_WG);
631 		return (EAFNOSUPPORT);
632 	}
633 
634 	lockmgr(&sc->sc_aip_lock, LK_EXCLUSIVE);
635 	node = head->rnh_addaddr(&aip->a_addr, &aip->a_mask, head,
636 				 aip->a_nodes);
637 	if (node != NULL) {
638 		KKASSERT(node == aip->a_nodes);
639 		LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
640 		peer->p_aips_num++;
641 	} else {
642 		/*
643 		 * Two possibilities:
644 		 * - out of memory failure
645 		 * - entry already exists
646 		 */
647 		node = head->rnh_lookup(&aip->a_addr, &aip->a_mask, head);
648 		if (node == NULL) {
649 			kfree(aip, M_WG);
650 			ret = ENOMEM;
651 		} else {
652 			KKASSERT(node != aip->a_nodes);
653 			kfree(aip, M_WG);
654 			aip = (struct wg_aip *)node;
655 			if (aip->a_peer != peer) {
656 				/* Replace the peer. */
657 				LIST_REMOVE(aip, a_entry);
658 				aip->a_peer->p_aips_num--;
659 				aip->a_peer = peer;
660 				LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
661 				aip->a_peer->p_aips_num++;
662 			}
663 		}
664 	}
665 	lockmgr(&sc->sc_aip_lock, LK_RELEASE);
666 
667 	return (ret);
668 }
669 
670 static struct wg_peer *
671 wg_aip_lookup(struct wg_softc *sc, sa_family_t af, const void *a)
672 {
673 	struct radix_node_head	*head;
674 	struct radix_node	*node;
675 	struct wg_peer		*peer;
676 	struct aip_addr		 addr;
677 
678 	switch (af) {
679 	case AF_INET:
680 		head = sc->sc_aip4;
681 		memcpy(&addr.in, a, sizeof(addr.in));
682 		addr.length = offsetof(struct aip_addr, in) + sizeof(addr.in);
683 		break;
684 	case AF_INET6:
685 		head = sc->sc_aip6;
686 		memcpy(&addr.in6, a, sizeof(addr.in6));
687 		addr.length = offsetof(struct aip_addr, in6) + sizeof(addr.in6);
688 		break;
689 	default:
690 		return (NULL);
691 	}
692 
693 	lockmgr(&sc->sc_aip_lock, LK_SHARED);
694 	node = head->rnh_matchaddr(&addr, head);
695 	if (node != NULL) {
696 		peer = ((struct wg_aip *)node)->a_peer;
697 		noise_remote_ref(peer->p_remote);
698 	} else {
699 		peer = NULL;
700 	}
701 	lockmgr(&sc->sc_aip_lock, LK_RELEASE);
702 
703 	return (peer);
704 }
705 
706 static void
707 wg_aip_remove_all(struct wg_softc *sc, struct wg_peer *peer)
708 {
709 	struct radix_node_head	*head;
710 	struct radix_node	*node;
711 	struct wg_aip		*aip, *taip;
712 
713 	lockmgr(&sc->sc_aip_lock, LK_EXCLUSIVE);
714 
715 	LIST_FOREACH_MUTABLE(aip, &peer->p_aips, a_entry, taip) {
716 		switch (aip->a_af) {
717 		case AF_INET:
718 			head = sc->sc_aip4;
719 			break;
720 		case AF_INET6:
721 			head = sc->sc_aip6;
722 			break;
723 		default:
724 			panic("%s: impossible aip %p", __func__, aip);
725 		}
726 		node = head->rnh_deladdr(&aip->a_addr, &aip->a_mask, head);
727 		if (node == NULL)
728 			panic("%s: failed to delete aip %p", __func__, aip);
729 		LIST_REMOVE(aip, a_entry);
730 		peer->p_aips_num--;
731 		kfree(aip, M_WG);
732 	}
733 
734 	if (!LIST_EMPTY(&peer->p_aips) || peer->p_aips_num != 0)
735 		panic("%s: could not delete all aips for peer %ld",
736 		      __func__, peer->p_id);
737 
738 	lockmgr(&sc->sc_aip_lock, LK_RELEASE);
739 }
740 
741 /*----------------------------------------------------------------------------*/
742 /* Socket */
743 
744 static int
745 wg_socket_init(struct wg_softc *sc, in_port_t port)
746 {
747 	struct wg_socket	*so = &sc->sc_socket;
748 	struct socket		*so4 = NULL, *so6 = NULL;
749 	in_port_t		 bound_port = port;
750 	uint32_t		 cookie;
751 	int			 ret;
752 
753 	/*
754 	 * When a host or a jail doesn't support the AF, sobind() would
755 	 * return EADDRNOTAVAIL.  Handle this case in order to support such
756 	 * IPv4-only or IPv6-only environments.
757 	 *
758 	 * However, in a dual-stack environment, both IPv4 and IPv6 sockets
759 	 * must bind the same port.
760 	 */
761 	ret = wg_socket_open(&so4, AF_INET, &bound_port, sc);
762 	if (ret != 0 && ret != EADDRNOTAVAIL)
763 		goto error;
764 
765 #ifdef INET6
766 	ret = wg_socket_open(&so6, AF_INET6, &bound_port, sc);
767 	if (ret != 0 && ret != EADDRNOTAVAIL)
768 		goto error;
769 #endif
770 
771 	if (so4 == NULL && so6 == NULL) {
772 		ret = EAFNOSUPPORT;
773 		goto error;
774 	}
775 
776 	cookie = so->so_user_cookie;
777 	if (cookie != 0) {
778 		ret = wg_socket_set_sockopt(so4, so6, SO_USER_COOKIE,
779 					    &cookie, sizeof(cookie));
780 		if (ret != 0)
781 			goto error;
782 	}
783 
784 	KKASSERT(lockstatus(&sc->sc_lock, curthread) == LK_EXCLUSIVE);
785 
786 	lockmgr(&so->so_lock, LK_EXCLUSIVE);
787 	if (so->so_so4 != NULL)
788 		soclose(so->so_so4, 0);
789 	if (so->so_so6 != NULL)
790 		soclose(so->so_so6, 0);
791 	so->so_so4 = so4;
792 	so->so_so6 = so6;
793 	so->so_port = bound_port;
794 	lockmgr(&so->so_lock, LK_RELEASE);
795 
796 	return (0);
797 
798 error:
799 	if (so4 != NULL)
800 		soclose(so4, 0);
801 	if (so6 != NULL)
802 		soclose(so6, 0);
803 	return (ret);
804 }
805 
806 static int
807 wg_socket_open(struct socket **so, sa_family_t af, in_port_t *port,
808 	       void *upcall_arg)
809 {
810 	struct sockaddr_in	 sin;
811 #ifdef INET6
812 	struct sockaddr_in6	 sin6;
813 #endif
814 	struct sockaddr		*sa, *bound_sa;
815 	int			 ret;
816 
817 	if (af == AF_INET) {
818 		bzero(&sin, sizeof(sin));
819 		sin.sin_len = sizeof(struct sockaddr_in);
820 		sin.sin_family = AF_INET;
821 		sin.sin_port = htons(*port);
822 		sa = sintosa(&sin);
823 #ifdef INET6
824 	} else if (af == AF_INET6) {
825 		bzero(&sin6, sizeof(sin6));
826 		sin6.sin6_len = sizeof(struct sockaddr_in6);
827 		sin6.sin6_family = AF_INET6;
828 		sin6.sin6_port = htons(*port);
829 		sa = sintosa(&sin6);
830 #endif
831 	} else {
832 		return (EAFNOSUPPORT);
833 	}
834 
835 	ret = socreate(af, so, SOCK_DGRAM, IPPROTO_UDP, curthread);
836 	if (ret != 0)
837 		return (ret);
838 
839 	(*so)->so_upcall = wg_upcall;
840 	(*so)->so_upcallarg = upcall_arg;
841 	atomic_set_int(&(*so)->so_rcv.ssb_flags, SSB_UPCALL);
842 
843 	ret = sobind(*so, sa, curthread);
844 	if (ret != 0)
845 		goto error;
846 
847 	if (*port == 0) {
848 		ret = so_pru_sockaddr(*so, &bound_sa);
849 		if (ret != 0)
850 			goto error;
851 		if (bound_sa->sa_family == AF_INET)
852 			*port = ntohs(satosin(bound_sa)->sin_port);
853 		else
854 			*port = ntohs(satosin6(bound_sa)->sin6_port);
855 		kfree(bound_sa, M_SONAME);
856 	}
857 
858 	return (0);
859 
860 error:
861 	if (*so != NULL) {
862 		soclose(*so, 0);
863 		*so = NULL;
864 	}
865 	return (ret);
866 }
867 
868 static void
869 wg_socket_uninit(struct wg_softc *sc)
870 {
871 	struct wg_socket *so = &sc->sc_socket;
872 
873 	KKASSERT(lockstatus(&sc->sc_lock, curthread) == LK_EXCLUSIVE);
874 
875 	lockmgr(&so->so_lock, LK_EXCLUSIVE);
876 
877 	if (so->so_so4 != NULL) {
878 		soclose(so->so_so4, 0);
879 		so->so_so4 = NULL;
880 	}
881 	if (so->so_so6 != NULL) {
882 		soclose(so->so_so6, 0);
883 		so->so_so6 = NULL;
884 	}
885 
886 	lockmgr(&so->so_lock, LK_RELEASE);
887 }
888 
889 static int
890 wg_socket_set_sockopt(struct socket *so4, struct socket *so6,
891 		      int name, void *val, size_t len)
892 {
893 	struct sockopt sopt = {
894 		.sopt_dir = SOPT_SET,
895 		.sopt_level = SOL_SOCKET,
896 		.sopt_name = name,
897 		.sopt_val = val,
898 		.sopt_valsize = len,
899 	};
900 	int ret;
901 
902 	if (so4 != NULL) {
903 		ret = sosetopt(so4, &sopt);
904 		if (ret != 0)
905 			return (ret);
906 	}
907 	if (so6 != NULL) {
908 		ret = sosetopt(so6, &sopt);
909 		if (ret != 0)
910 			return (ret);
911 	}
912 
913 	return (0);
914 }
915 
916 static int
917 wg_socket_set_cookie(struct wg_softc *sc, uint32_t user_cookie)
918 {
919 	struct wg_socket	*so;
920 	int			 ret;
921 
922 	KKASSERT(lockstatus(&sc->sc_lock, curthread) == LK_EXCLUSIVE);
923 
924 	so = &sc->sc_socket;
925 	lockmgr(&so->so_lock, LK_EXCLUSIVE);
926 
927 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_USER_COOKIE,
928 				    &user_cookie, sizeof(user_cookie));
929 	if (ret == 0)
930 		so->so_user_cookie = user_cookie;
931 
932 	lockmgr(&so->so_lock, LK_RELEASE);
933 	return (ret);
934 }
935 
936 static int
937 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
938 {
939 	struct wg_socket	*so;
940 	struct sockaddr		*sa;
941 	int			 len, ret;
942 
943 	so = &sc->sc_socket;
944 	sa = &e->e_remote.r_sa;
945 	len = m->m_pkthdr.len;
946 	ret = 0;
947 
948 	/*
949 	 * NOTE: DragonFly by default sends UDP packets asynchronously,
950 	 *       unless the 'net.inet.udp.sosend_async' sysctl MIB is set
951 	 *       to 0 or the 'MSG_SYNC' flag is set for so_pru_sosend().
952 	 *       And in the async mode, an error code cannot really be
953 	 *       replied to the caller.  So so_pru_sosend() may return 0
954 	 *       even if the packet fails to send.
955 	 */
956 	lockmgr(&so->so_lock, LK_SHARED);
957 	if (sa->sa_family == AF_INET && so->so_so4 != NULL) {
958 		ret = so_pru_sosend(so->so_so4, sa, NULL /* uio */,
959 				    m, NULL /* control */, 0 /* flags */,
960 				    curthread);
961 #ifdef INET6
962 	} else if (sa->sa_family == AF_INET6 && so->so_so6 != NULL) {
963 		ret = so_pru_sosend(so->so_so6, sa, NULL /* uio */,
964 				    m, NULL /* control */, 0 /* flags */,
965 				    curthread);
966 #endif
967 	} else {
968 		ret = ENOTCONN;
969 		m_freem(m);
970 	}
971 	lockmgr(&so->so_lock, LK_RELEASE);
972 
973 	if (ret == 0) {
974 		IFNET_STAT_INC(sc->sc_ifp, opackets, 1);
975 		IFNET_STAT_INC(sc->sc_ifp, obytes, len);
976 	}
977 
978 	return (ret);
979 }
980 
981 static void
982 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, const void *buf,
983 	    size_t len)
984 {
985 	struct mbuf	*m;
986 	int		 ret;
987 
988 	/*
989 	 * This function only sends handshake packets of known lengths that
990 	 * are <= MHLEN, so it's safe to just use m_gethdr() and memcpy().
991 	 */
992 	KKASSERT(len <= MHLEN);
993 
994 	m = m_gethdr(M_NOWAIT, MT_DATA);
995 	if (m == NULL) {
996 		DPRINTF(sc, "Unable to allocate mbuf\n");
997 		return;
998 	}
999 
1000 	/* Just plain copy as it's a single mbuf. */
1001 	memcpy(mtod(m, void *), buf, len);
1002 	m->m_pkthdr.len = m->m_len = len;
1003 
1004 	/* Give high priority to the handshake packets. */
1005 	m->m_flags |= M_PRIO;
1006 
1007 	ret = wg_send(sc, e, m);
1008 	if (ret != 0)
1009 		DPRINTF(sc, "Unable to send packet: %d\n", ret);
1010 }
1011 
1012 /*----------------------------------------------------------------------------*/
1013 /* Timers */
1014 
1015 static void
1016 wg_timers_enable(struct wg_peer *peer)
1017 {
1018 	atomic_store_bool(&peer->p_enabled, true);
1019 	wg_timers_run_persistent_keepalive(peer);
1020 }
1021 
1022 static void
1023 wg_timers_disable(struct wg_peer *peer)
1024 {
1025 	atomic_store_bool(&peer->p_enabled, false);
1026 	atomic_store_bool(&peer->p_need_another_keepalive, false);
1027 
1028 	/* Cancel the callouts and wait for them to complete. */
1029 	callout_drain(&peer->p_new_handshake);
1030 	callout_drain(&peer->p_send_keepalive);
1031 	callout_drain(&peer->p_retry_handshake);
1032 	callout_drain(&peer->p_persistent_keepalive);
1033 	callout_drain(&peer->p_zero_key_material);
1034 }
1035 
1036 static void
1037 wg_timers_set_persistent_keepalive(struct wg_peer *peer, uint16_t interval)
1038 {
1039 	atomic_store_16(&peer->p_persistent_keepalive_interval, interval);
1040 	if (atomic_load_bool(&peer->p_enabled))
1041 		wg_timers_run_persistent_keepalive(peer);
1042 }
1043 
1044 static bool
1045 wg_timers_get_persistent_keepalive(struct wg_peer *peer, uint16_t *interval)
1046 {
1047 	*interval = atomic_load_16(&peer->p_persistent_keepalive_interval);
1048 	return (*interval > 0);
1049 }
1050 
1051 static void
1052 wg_timers_get_last_handshake(struct wg_peer *peer, struct timespec *time)
1053 {
1054 	lockmgr(&peer->p_handshake_mtx, LK_EXCLUSIVE);
1055 	*time = peer->p_handshake_complete;
1056 	lockmgr(&peer->p_handshake_mtx, LK_RELEASE);
1057 }
1058 
1059 static void
1060 wg_timers_event_data_sent(struct wg_peer *peer)
1061 {
1062 	int ticks;
1063 
1064 	if (atomic_load_bool(&peer->p_enabled) &&
1065 	    !callout_pending(&peer->p_new_handshake)) {
1066 		ticks = NEW_HANDSHAKE_TIMEOUT * hz +
1067 			REKEY_TIMEOUT_JITTER * hz / 1000;
1068 		callout_reset(&peer->p_new_handshake, ticks,
1069 			      wg_timers_run_new_handshake, peer);
1070 	}
1071 }
1072 
1073 static void
1074 wg_timers_event_data_received(struct wg_peer *peer)
1075 {
1076 	if (atomic_load_bool(&peer->p_enabled)) {
1077 		if (!callout_pending(&peer->p_send_keepalive)) {
1078 			callout_reset(&peer->p_send_keepalive,
1079 				      KEEPALIVE_TIMEOUT * hz,
1080 				      wg_timers_run_send_keepalive, peer);
1081 		} else {
1082 			atomic_store_bool(&peer->p_need_another_keepalive,
1083 					  true);
1084 		}
1085 	}
1086 }
1087 
1088 static void
1089 wg_timers_event_any_authenticated_packet_sent(struct wg_peer *peer)
1090 {
1091 	callout_stop(&peer->p_send_keepalive);
1092 }
1093 
1094 static void
1095 wg_timers_event_any_authenticated_packet_received(struct wg_peer *peer)
1096 {
1097 	callout_stop(&peer->p_new_handshake);
1098 }
1099 
1100 static void
1101 wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *peer)
1102 {
1103 	uint16_t interval;
1104 
1105 	interval = atomic_load_16(&peer->p_persistent_keepalive_interval);
1106 	if (atomic_load_bool(&peer->p_enabled) && interval > 0) {
1107 		callout_reset(&peer->p_persistent_keepalive, interval * hz,
1108 			      wg_timers_run_persistent_keepalive, peer);
1109 	}
1110 }
1111 
1112 static void
1113 wg_timers_event_handshake_initiated(struct wg_peer *peer)
1114 {
1115 	int ticks;
1116 
1117 	if (atomic_load_bool(&peer->p_enabled)) {
1118 		ticks = REKEY_TIMEOUT * hz + REKEY_TIMEOUT_JITTER * hz / 1000;
1119 		callout_reset(&peer->p_retry_handshake, ticks,
1120 			      wg_timers_run_retry_handshake, peer);
1121 	}
1122 }
1123 
1124 static void
1125 wg_timers_event_handshake_complete(struct wg_peer *peer)
1126 {
1127 	if (atomic_load_bool(&peer->p_enabled)) {
1128 		lockmgr(&peer->p_handshake_mtx, LK_EXCLUSIVE);
1129 		callout_stop(&peer->p_retry_handshake);
1130 		peer->p_handshake_retries = 0;
1131 		getnanotime(&peer->p_handshake_complete);
1132 		lockmgr(&peer->p_handshake_mtx, LK_RELEASE);
1133 
1134 		wg_timers_run_send_keepalive(peer);
1135 	}
1136 }
1137 
1138 static void
1139 wg_timers_event_session_derived(struct wg_peer *peer)
1140 {
1141 	if (atomic_load_bool(&peer->p_enabled)) {
1142 		callout_reset(&peer->p_zero_key_material,
1143 			      REJECT_AFTER_TIME * 3 * hz,
1144 			      wg_timers_run_zero_key_material, peer);
1145 	}
1146 }
1147 
1148 static void
1149 wg_timers_event_want_initiation(struct wg_peer *peer)
1150 {
1151 	if (atomic_load_bool(&peer->p_enabled))
1152 		wg_timers_run_send_initiation(peer, false);
1153 }
1154 
1155 static void
1156 wg_timers_run_send_initiation(struct wg_peer *peer, bool is_retry)
1157 {
1158 	if (!is_retry)
1159 		peer->p_handshake_retries = 0;
1160 	if (noise_remote_initiation_expired(peer->p_remote))
1161 		wg_send_initiation(peer);
1162 }
1163 
1164 static void
1165 wg_timers_run_retry_handshake(void *_peer)
1166 {
1167 	struct wg_peer *peer = _peer;
1168 
1169 	lockmgr(&peer->p_handshake_mtx, LK_EXCLUSIVE);
1170 	if (peer->p_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1171 		peer->p_handshake_retries++;
1172 		lockmgr(&peer->p_handshake_mtx, LK_RELEASE);
1173 
1174 		DPRINTF(peer->p_sc, "Handshake for peer %ld did not complete "
1175 			"after %d seconds, retrying (try %d)\n", peer->p_id,
1176 			REKEY_TIMEOUT, peer->p_handshake_retries + 1);
1177 		wg_timers_run_send_initiation(peer, true);
1178 	} else {
1179 		lockmgr(&peer->p_handshake_mtx, LK_RELEASE);
1180 
1181 		DPRINTF(peer->p_sc, "Handshake for peer %ld did not complete "
1182 			"after %d retries, giving up\n", peer->p_id,
1183 			MAX_TIMER_HANDSHAKES + 2);
1184 		callout_stop(&peer->p_send_keepalive);
1185 		wg_queue_purge(&peer->p_stage_queue);
1186 		if (atomic_load_bool(&peer->p_enabled) &&
1187 		    !callout_pending(&peer->p_zero_key_material)) {
1188 			callout_reset(&peer->p_zero_key_material,
1189 				      REJECT_AFTER_TIME * 3 * hz,
1190 				      wg_timers_run_zero_key_material, peer);
1191 		}
1192 	}
1193 }
1194 
1195 static void
1196 wg_timers_run_send_keepalive(void *_peer)
1197 {
1198 	struct wg_peer *peer = _peer;
1199 
1200 	wg_send_keepalive(peer);
1201 
1202 	if (atomic_load_bool(&peer->p_enabled) &&
1203 	    atomic_load_bool(&peer->p_need_another_keepalive)) {
1204 		atomic_store_bool(&peer->p_need_another_keepalive, false);
1205 		callout_reset(&peer->p_send_keepalive, KEEPALIVE_TIMEOUT * hz,
1206 			      wg_timers_run_send_keepalive, peer);
1207 	}
1208 }
1209 
1210 static void
1211 wg_timers_run_persistent_keepalive(void *_peer)
1212 {
1213 	struct wg_peer *peer = _peer;
1214 
1215 	if (atomic_load_16(&peer->p_persistent_keepalive_interval) > 0)
1216 		wg_send_keepalive(peer);
1217 }
1218 
1219 static void
1220 wg_timers_run_new_handshake(void *_peer)
1221 {
1222 	struct wg_peer *peer = _peer;
1223 
1224 	DPRINTF(peer->p_sc, "Retrying handshake with peer %ld, "
1225 		"because we stopped hearing back after %d seconds\n",
1226 		peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1227 	wg_timers_run_send_initiation(peer, false);
1228 }
1229 
1230 static void
1231 wg_timers_run_zero_key_material(void *_peer)
1232 {
1233 	struct wg_peer *peer = _peer;
1234 
1235 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %ld, "
1236 		"since we haven't received a new one in %d seconds\n",
1237 		peer->p_id, REJECT_AFTER_TIME * 3);
1238 	noise_remote_keypairs_clear(peer->p_remote);
1239 }
1240 
1241 /*----------------------------------------------------------------------------*/
1242 /* Handshake */
1243 
1244 static void
1245 wg_peer_send_buf(struct wg_peer *peer, const void *buf, size_t len)
1246 {
1247 	struct wg_endpoint endpoint;
1248 
1249 	peer->p_tx_bytes[mycpuid] += len;
1250 
1251 	wg_timers_event_any_authenticated_packet_traversal(peer);
1252 	wg_timers_event_any_authenticated_packet_sent(peer);
1253 
1254 	wg_peer_get_endpoint(peer, &endpoint);
1255 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1256 }
1257 
1258 static void
1259 wg_send_initiation(struct wg_peer *peer)
1260 {
1261 	struct wg_pkt_initiation pkt;
1262 
1263 	if (!noise_create_initiation(peer->p_remote, &pkt.s_idx, pkt.ue,
1264 				     pkt.es, pkt.ets))
1265 		return;
1266 
1267 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %ld\n",
1268 		peer->p_id);
1269 
1270 	pkt.t = WG_PKT_INITIATION;
1271 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1272 			 sizeof(pkt) - sizeof(pkt.m));
1273 	wg_peer_send_buf(peer, &pkt, sizeof(pkt));
1274 	wg_timers_event_handshake_initiated(peer);
1275 }
1276 
1277 static void
1278 wg_send_response(struct wg_peer *peer)
1279 {
1280 	struct wg_pkt_response pkt;
1281 
1282 	if (!noise_create_response(peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1283 				   pkt.ue, pkt.en))
1284 		return;
1285 
1286 	DPRINTF(peer->p_sc, "Sending handshake response to peer %ld\n",
1287 		peer->p_id);
1288 
1289 	wg_timers_event_session_derived(peer);
1290 	pkt.t = WG_PKT_RESPONSE;
1291 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1292 			 sizeof(pkt) - sizeof(pkt.m));
1293 	wg_peer_send_buf(peer, &pkt, sizeof(pkt));
1294 }
1295 
1296 static void
1297 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1298 	       struct wg_endpoint *e)
1299 {
1300 	struct wg_pkt_cookie pkt;
1301 
1302 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1303 
1304 	pkt.t = WG_PKT_COOKIE;
1305 	pkt.r_idx = idx;
1306 
1307 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1308 				      pkt.ec, &e->e_remote.r_sa);
1309 	wg_send_buf(sc, e, &pkt, sizeof(pkt));
1310 }
1311 
1312 static void
1313 wg_send_keepalive(struct wg_peer *peer)
1314 {
1315 	struct wg_packet *pkt;
1316 	struct mbuf *m;
1317 
1318 	if (wg_queue_len(&peer->p_stage_queue) > 0)
1319 		goto send;
1320 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1321 		return;
1322 	if ((pkt = wg_packet_alloc(m)) == NULL) {
1323 		m_freem(m);
1324 		return;
1325 	}
1326 
1327 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
1328 	DPRINTF(peer->p_sc, "Sending keepalive packet to peer %ld\n",
1329 		peer->p_id);
1330 send:
1331 	wg_peer_send_staged(peer);
1332 }
1333 
1334 static void
1335 wg_handshake(struct wg_softc *sc, struct wg_packet *pkt)
1336 {
1337 	static struct timespec		 wg_last_underload; /* nanouptime */
1338 	struct wg_pkt_initiation	*init;
1339 	struct wg_pkt_response		*resp;
1340 	struct wg_pkt_cookie		*cook;
1341 	struct wg_endpoint		*e;
1342 	struct wg_peer			*peer;
1343 	struct mbuf			*m;
1344 	struct timespec			 now;
1345 	struct noise_remote		*remote = NULL;
1346 	bool				 underload;
1347 	int				 ret;
1348 
1349 	underload = (wg_queue_len(&sc->sc_handshake_queue) >=
1350 		     MAX_QUEUED_HANDSHAKES / 8);
1351 	if (underload) {
1352 		getnanouptime(&wg_last_underload);
1353 	} else if (timespecisset(&wg_last_underload)) {
1354 		getnanouptime(&now);
1355 		now.tv_sec -= UNDERLOAD_TIMEOUT;
1356 		underload = timespeccmp(&wg_last_underload, &now, >);
1357 		if (!underload)
1358 			timespecclear(&wg_last_underload);
1359 	}
1360 
1361 	m = pkt->p_mbuf;
1362 	e = &pkt->p_endpoint;
1363 
1364 	if ((pkt->p_mbuf = m = m_pullup(m, m->m_pkthdr.len)) == NULL)
1365 		goto error;
1366 
1367 	switch (*mtod(m, uint32_t *)) {
1368 	case WG_PKT_INITIATION:
1369 		init = mtod(m, struct wg_pkt_initiation *);
1370 
1371 		ret = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1372 		    init, sizeof(*init) - sizeof(init->m), underload,
1373 		    &e->e_remote.r_sa);
1374 		if (ret == EINVAL) {
1375 			DPRINTF(sc, "Invalid initiation MAC\n");
1376 			goto error;
1377 		} else if (ret == ECONNREFUSED) {
1378 			DPRINTF(sc, "Handshake ratelimited\n");
1379 			goto error;
1380 		} else if (ret == EAGAIN) {
1381 			wg_send_cookie(sc, &init->m, init->s_idx, e);
1382 			goto error;
1383 		} else if (ret != 0) {
1384 			panic("%s: unexpected return: %d", __func__, ret);
1385 		}
1386 
1387 		remote = noise_consume_initiation(sc->sc_local, init->s_idx,
1388 						  init->ue, init->es,
1389 						  init->ets);
1390 		if (remote == NULL) {
1391 			DPRINTF(sc, "Invalid handshake initiation\n");
1392 			goto error;
1393 		}
1394 
1395 		peer = noise_remote_arg(remote);
1396 		DPRINTF(sc, "Receiving handshake initiation from peer %ld\n",
1397 			peer->p_id);
1398 
1399 		wg_peer_set_endpoint(peer, e);
1400 		wg_send_response(peer);
1401 		break;
1402 
1403 	case WG_PKT_RESPONSE:
1404 		resp = mtod(m, struct wg_pkt_response *);
1405 
1406 		ret = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1407 		    resp, sizeof(*resp) - sizeof(resp->m), underload,
1408 		    &e->e_remote.r_sa);
1409 		if (ret == EINVAL) {
1410 			DPRINTF(sc, "Invalid response MAC\n");
1411 			goto error;
1412 		} else if (ret == ECONNREFUSED) {
1413 			DPRINTF(sc, "Handshake ratelimited\n");
1414 			goto error;
1415 		} else if (ret == EAGAIN) {
1416 			wg_send_cookie(sc, &resp->m, resp->s_idx, e);
1417 			goto error;
1418 		} else if (ret != 0) {
1419 			panic("%s: unexpected return: %d", __func__, ret);
1420 		}
1421 
1422 		remote = noise_consume_response(sc->sc_local, resp->s_idx,
1423 						resp->r_idx, resp->ue,
1424 						resp->en);
1425 		if (remote == NULL) {
1426 			DPRINTF(sc, "Invalid handshake response\n");
1427 			goto error;
1428 		}
1429 
1430 		peer = noise_remote_arg(remote);
1431 		DPRINTF(sc, "Receiving handshake response from peer %ld\n",
1432 			peer->p_id);
1433 
1434 		wg_peer_set_endpoint(peer, e);
1435 		wg_timers_event_session_derived(peer);
1436 		wg_timers_event_handshake_complete(peer);
1437 		break;
1438 
1439 	case WG_PKT_COOKIE:
1440 		cook = mtod(m, struct wg_pkt_cookie *);
1441 
1442 		remote = noise_remote_index(sc->sc_local, cook->r_idx);
1443 		if (remote == NULL) {
1444 			DPRINTF(sc, "Unknown cookie index\n");
1445 			goto error;
1446 		}
1447 
1448 		peer = noise_remote_arg(remote);
1449 		if (cookie_maker_consume_payload(&peer->p_cookie, cook->nonce,
1450 						 cook->ec) == 0) {
1451 			DPRINTF(sc, "Receiving cookie response\n");
1452 		} else {
1453 			DPRINTF(sc, "Could not decrypt cookie response\n");
1454 			goto error;
1455 		}
1456 
1457 		goto not_authenticated;
1458 
1459 	default:
1460 		panic("%s: invalid packet in handshake queue", __func__);
1461 	}
1462 
1463 	wg_timers_event_any_authenticated_packet_received(peer);
1464 	wg_timers_event_any_authenticated_packet_traversal(peer);
1465 
1466 not_authenticated:
1467 	peer->p_rx_bytes[mycpuid] += m->m_pkthdr.len;
1468 	IFNET_STAT_INC(sc->sc_ifp, ipackets, 1);
1469 	IFNET_STAT_INC(sc->sc_ifp, ibytes, m->m_pkthdr.len);
1470 error:
1471 	if (remote != NULL)
1472 		noise_remote_put(remote);
1473 	wg_packet_free(pkt);
1474 }
1475 
1476 static void
1477 wg_softc_handshake_receive(void *arg, int pending __unused)
1478 {
1479 	struct wg_softc		*sc = arg;
1480 	struct wg_queue		*queue = &sc->sc_handshake_queue;
1481 	struct wg_packet	*pkt;
1482 
1483 	while ((pkt = wg_queue_dequeue_handshake(queue)) != NULL)
1484 		wg_handshake(sc, pkt);
1485 }
1486 
1487 /*----------------------------------------------------------------------------*/
1488 /* Transport Packet Functions */
1489 
1490 static inline unsigned int
1491 calculate_padding(struct wg_packet *pkt)
1492 {
1493 	unsigned int padded_size, last_unit;
1494 
1495 	last_unit = pkt->p_mbuf->m_pkthdr.len;
1496 
1497 	/* Keepalive packets don't set p_mtu, but also have a length of zero. */
1498 	if (__predict_false(pkt->p_mtu == 0))
1499 		return WG_PKT_WITH_PADDING(last_unit) - last_unit;
1500 
1501 	/*
1502 	 * Just in case the packet is bigger than the MTU and would cause
1503 	 * the final subtraction to overflow.
1504 	 */
1505 	if (__predict_false(last_unit > pkt->p_mtu))
1506 		last_unit %= pkt->p_mtu;
1507 
1508 	padded_size = MIN(pkt->p_mtu, WG_PKT_WITH_PADDING(last_unit));
1509 	return (padded_size - last_unit);
1510 }
1511 
1512 static void
1513 wg_encrypt(struct wg_softc *sc, struct wg_packet *pkt)
1514 {
1515 	static const uint8_t	 padding[WG_PKT_PADDING] = { 0 };
1516 	struct wg_pkt_data	*data;
1517 	struct wg_peer		*peer;
1518 	struct noise_remote	*remote;
1519 	struct mbuf		*m;
1520 	uint32_t		 idx;
1521 	unsigned int		 padlen;
1522 	enum wg_packet_state	 state = WG_PACKET_DEAD;
1523 
1524 	remote = noise_keypair_remote(pkt->p_keypair);
1525 	peer = noise_remote_arg(remote);
1526 	m = pkt->p_mbuf;
1527 
1528 	padlen = calculate_padding(pkt);
1529 	if (padlen != 0 && !m_append(m, padlen, padding))
1530 		goto out;
1531 
1532 	if (noise_keypair_encrypt(pkt->p_keypair, &idx, pkt->p_counter, m) != 0)
1533 		goto out;
1534 
1535 	M_PREPEND(m, sizeof(struct wg_pkt_data), M_NOWAIT);
1536 	if (m == NULL)
1537 		goto out;
1538 	data = mtod(m, struct wg_pkt_data *);
1539 	data->t = WG_PKT_DATA;
1540 	data->r_idx = idx;
1541 	data->counter = htole64(pkt->p_counter);
1542 
1543 	/* Reset mbuf flags. */
1544 	m->m_flags &= ~MBUF_CLEARFLAGS;
1545 
1546 	state = WG_PACKET_CRYPTED;
1547 
1548 out:
1549 	pkt->p_mbuf = m;
1550 	cpu_sfence(); /* Update p_state only after p_mbuf. */
1551 	pkt->p_state = state;
1552 	taskqueue_enqueue(peer->p_send_taskqueue, &peer->p_send_task);
1553 	noise_remote_put(remote);
1554 }
1555 
1556 static void
1557 wg_decrypt(struct wg_softc *sc, struct wg_packet *pkt)
1558 {
1559 	struct wg_peer		*peer, *allowed_peer;
1560 	struct noise_remote	*remote;
1561 	struct mbuf		*m;
1562 	int			 len;
1563 	enum wg_packet_state	 state = WG_PACKET_DEAD;
1564 
1565 	remote = noise_keypair_remote(pkt->p_keypair);
1566 	peer = noise_remote_arg(remote);
1567 	m = pkt->p_mbuf;
1568 
1569 	pkt->p_counter = le64toh(mtod(m, struct wg_pkt_data *)->counter);
1570 	m_adj(m, sizeof(struct wg_pkt_data));
1571 
1572 	if (noise_keypair_decrypt(pkt->p_keypair, pkt->p_counter, m) != 0)
1573 		goto out;
1574 
1575 	/* A packet with length 0 is a keepalive packet. */
1576 	if (__predict_false(m->m_pkthdr.len == 0)) {
1577 		DPRINTF(sc, "Receiving keepalive packet from peer %ld\n",
1578 			peer->p_id);
1579 		state = WG_PACKET_CRYPTED;
1580 		goto out;
1581 	}
1582 
1583 	/*
1584 	 * We can let the network stack handle the intricate validation of the
1585 	 * IP header, we just worry about the sizeof and the version, so we can
1586 	 * read the source address in wg_aip_lookup.
1587 	 */
1588 	if (determine_af_and_pullup(&m, &pkt->p_af) == 0) {
1589 		KKASSERT(pkt->p_af == AF_INET || pkt->p_af == AF_INET6);
1590 		if (pkt->p_af == AF_INET) {
1591 			struct ip *ip = mtod(m, struct ip *);
1592 			allowed_peer = wg_aip_lookup(sc, AF_INET, &ip->ip_src);
1593 			len = ntohs(ip->ip_len);
1594 			if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1595 				m_adj(m, len - m->m_pkthdr.len);
1596 		} else {
1597 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1598 			allowed_peer = wg_aip_lookup(sc, AF_INET6, &ip6->ip6_src);
1599 			len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1600 			if (len < m->m_pkthdr.len)
1601 				m_adj(m, len - m->m_pkthdr.len);
1602 		}
1603 	} else {
1604 		DPRINTF(sc, "Packet is neither IPv4 nor IPv6 from peer %ld\n",
1605 			peer->p_id);
1606 		goto out;
1607 	}
1608 
1609 	/* Drop the reference, since no need to dereference it. */
1610 	if (allowed_peer != NULL)
1611 		noise_remote_put(allowed_peer->p_remote);
1612 
1613 	if (__predict_false(peer != allowed_peer)) {
1614 		DPRINTF(sc, "Packet has disallowed src IP from peer %ld\n",
1615 			peer->p_id);
1616 		goto out;
1617 	}
1618 
1619 	/* Reset mbuf flags. */
1620 	m->m_flags &= ~MBUF_CLEARFLAGS;
1621 	m->m_pkthdr.csum_flags = 0; /* Tunneled packet was not offloaded. */
1622 
1623 	state = WG_PACKET_CRYPTED;
1624 
1625 out:
1626 	pkt->p_mbuf = m;
1627 	cpu_sfence(); /* Update p_state only after p_mbuf. */
1628 	pkt->p_state = state;
1629 	taskqueue_enqueue(peer->p_recv_taskqueue, &peer->p_recv_task);
1630 	noise_remote_put(remote);
1631 }
1632 
1633 static void
1634 wg_softc_encrypt(void *arg, int pending __unused)
1635 {
1636 	struct wg_softc		*sc = arg;
1637 	struct wg_queue		*queue = &sc->sc_encrypt_parallel;
1638 	struct wg_packet	*pkt;
1639 
1640 	while ((pkt = wg_queue_dequeue_parallel(queue)) != NULL)
1641 		wg_encrypt(sc, pkt);
1642 }
1643 
1644 static void
1645 wg_softc_decrypt(void *arg, int pending __unused)
1646 {
1647 	struct wg_softc		*sc = arg;
1648 	struct wg_queue		*queue = &sc->sc_decrypt_parallel;
1649 	struct wg_packet	*pkt;
1650 
1651 	while ((pkt = wg_queue_dequeue_parallel(queue)) != NULL)
1652 		wg_decrypt(sc, pkt);
1653 }
1654 
1655 static void
1656 wg_encrypt_dispatch(struct wg_softc *sc)
1657 {
1658 	int cpu;
1659 
1660 	/*
1661 	 * The update to encrypt_last_cpu is racy such that we may
1662 	 * reschedule the task for the same CPU multiple times, but
1663 	 * the race doesn't really matter.
1664 	 */
1665 	cpu = (sc->sc_encrypt_last_cpu + 1) % ncpus;
1666 	sc->sc_encrypt_last_cpu = cpu;
1667 	taskqueue_enqueue(wg_taskqueues[cpu], &sc->sc_encrypt_tasks[cpu]);
1668 }
1669 
1670 static void
1671 wg_decrypt_dispatch(struct wg_softc *sc)
1672 {
1673 	int cpu;
1674 
1675 	cpu = (sc->sc_decrypt_last_cpu + 1) % ncpus;
1676 	sc->sc_decrypt_last_cpu = cpu;
1677 	taskqueue_enqueue(wg_taskqueues[cpu], &sc->sc_decrypt_tasks[cpu]);
1678 }
1679 
1680 static void
1681 wg_deliver_out(void *arg, int pending __unused)
1682 {
1683 	struct wg_peer		*peer = arg;
1684 	struct wg_softc		*sc = peer->p_sc;
1685 	struct wg_queue		*queue = &peer->p_encrypt_serial;
1686 	struct wg_endpoint	 endpoint;
1687 	struct wg_packet	*pkt;
1688 	struct mbuf		*m;
1689 	int			 ret, len;
1690 
1691 	while ((pkt = wg_queue_dequeue_serial(queue)) != NULL) {
1692 		if (pkt->p_state != WG_PACKET_CRYPTED)
1693 			goto error;
1694 
1695 		m = pkt->p_mbuf;
1696 		pkt->p_mbuf = NULL;
1697 		len = m->m_pkthdr.len;
1698 
1699 		wg_timers_event_any_authenticated_packet_traversal(peer);
1700 		wg_timers_event_any_authenticated_packet_sent(peer);
1701 		wg_peer_get_endpoint(peer, &endpoint);
1702 		ret = wg_send(sc, &endpoint, m);
1703 		if (ret != 0)
1704 			goto error;
1705 
1706 		peer->p_tx_bytes[mycpuid] += len;
1707 		if (len > WG_PKT_DATA_MINLEN)
1708 			wg_timers_event_data_sent(peer);
1709 		if (noise_keep_key_fresh_send(peer->p_remote))
1710 			wg_timers_event_want_initiation(peer);
1711 		wg_packet_free(pkt);
1712 		continue;
1713 
1714 error:
1715 		IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
1716 		wg_packet_free(pkt);
1717 	}
1718 }
1719 
1720 static void
1721 wg_deliver_in(void *arg, int pending __unused)
1722 {
1723 	struct wg_peer		*peer = arg;
1724 	struct wg_softc		*sc = peer->p_sc;
1725 	struct wg_queue		*queue = &peer->p_decrypt_serial;
1726 	struct wg_packet	*pkt;
1727 	struct ifnet		*ifp;
1728 	struct mbuf		*m;
1729 	size_t			 rx_bytes;
1730 
1731 	ifp = sc->sc_ifp;
1732 
1733 	while ((pkt = wg_queue_dequeue_serial(queue)) != NULL) {
1734 		if (pkt->p_state != WG_PACKET_CRYPTED ||
1735 		    !noise_keypair_counter_check(pkt->p_keypair,
1736 						 pkt->p_counter)) {
1737 			IFNET_STAT_INC(ifp, ierrors, 1);
1738 			wg_packet_free(pkt);
1739 			continue;
1740 		}
1741 
1742 		if (noise_keypair_received_with(pkt->p_keypair))
1743 			wg_timers_event_handshake_complete(peer);
1744 
1745 		wg_timers_event_any_authenticated_packet_received(peer);
1746 		wg_timers_event_any_authenticated_packet_traversal(peer);
1747 		wg_peer_set_endpoint(peer, &pkt->p_endpoint);
1748 
1749 		m = pkt->p_mbuf;
1750 		rx_bytes = m->m_pkthdr.len + sizeof(struct wg_pkt_data) +
1751 			   NOISE_AUTHTAG_LEN;
1752 		peer->p_rx_bytes[mycpuid] += rx_bytes;
1753 		IFNET_STAT_INC(ifp, ipackets, 1);
1754 		IFNET_STAT_INC(ifp, ibytes, rx_bytes);
1755 
1756 		if (m->m_pkthdr.len == 0)
1757 			goto done;
1758 
1759 		pkt->p_mbuf = NULL;
1760 		m->m_pkthdr.rcvif = ifp;
1761 		BPF_MTAP_AF(ifp, m, pkt->p_af);
1762 
1763 		KKASSERT(pkt->p_af == AF_INET || pkt->p_af == AF_INET6);
1764 		if (pkt->p_af == AF_INET)
1765 			netisr_queue(NETISR_IP, m);
1766 		else
1767 			netisr_queue(NETISR_IPV6, m);
1768 
1769 		wg_timers_event_data_received(peer);
1770 
1771 done:
1772 		if (noise_keep_key_fresh_recv(peer->p_remote))
1773 			wg_timers_event_want_initiation(peer);
1774 		wg_packet_free(pkt);
1775 	}
1776 }
1777 
1778 static struct wg_packet *
1779 wg_packet_alloc(struct mbuf *m)
1780 {
1781 	struct wg_packet *pkt;
1782 
1783 	if ((pkt = objcache_get(wg_packet_zone, M_NOWAIT)) == NULL)
1784 		return (NULL);
1785 	bzero(pkt, sizeof(*pkt)); /* objcache_get() doesn't ensure M_ZERO. */
1786 	pkt->p_mbuf = m;
1787 
1788 	return (pkt);
1789 }
1790 
1791 static void
1792 wg_packet_free(struct wg_packet *pkt)
1793 {
1794 	if (pkt->p_keypair != NULL)
1795 		noise_keypair_put(pkt->p_keypair);
1796 	if (pkt->p_mbuf != NULL)
1797 		m_freem(pkt->p_mbuf);
1798 	objcache_put(wg_packet_zone, pkt);
1799 }
1800 
1801 static void
1802 wg_queue_init(struct wg_queue *queue, const char *name)
1803 {
1804 	lockinit(&queue->q_mtx, name, 0, 0);
1805 	STAILQ_INIT(&queue->q_queue);
1806 	queue->q_len = 0;
1807 }
1808 
1809 static void
1810 wg_queue_deinit(struct wg_queue *queue)
1811 {
1812 	wg_queue_purge(queue);
1813 	lockuninit(&queue->q_mtx);
1814 }
1815 
1816 static size_t
1817 wg_queue_len(const struct wg_queue *queue)
1818 {
1819 	return (queue->q_len);
1820 }
1821 
1822 static bool
1823 wg_queue_enqueue_handshake(struct wg_queue *hs, struct wg_packet *pkt)
1824 {
1825 	bool ok = false;
1826 
1827 	lockmgr(&hs->q_mtx, LK_EXCLUSIVE);
1828 	if (hs->q_len < MAX_QUEUED_HANDSHAKES) {
1829 		STAILQ_INSERT_TAIL(&hs->q_queue, pkt, p_parallel);
1830 		hs->q_len++;
1831 		ok = true;
1832 	}
1833 	lockmgr(&hs->q_mtx, LK_RELEASE);
1834 
1835 	if (!ok)
1836 		wg_packet_free(pkt);
1837 
1838 	return (ok);
1839 }
1840 
1841 static struct wg_packet *
1842 wg_queue_dequeue_handshake(struct wg_queue *hs)
1843 {
1844 	struct wg_packet *pkt;
1845 
1846 	lockmgr(&hs->q_mtx, LK_EXCLUSIVE);
1847 	if ((pkt = STAILQ_FIRST(&hs->q_queue)) != NULL) {
1848 		STAILQ_REMOVE_HEAD(&hs->q_queue, p_parallel);
1849 		hs->q_len--;
1850 	}
1851 	lockmgr(&hs->q_mtx, LK_RELEASE);
1852 
1853 	return (pkt);
1854 }
1855 
1856 static void
1857 wg_queue_push_staged(struct wg_queue *staged, struct wg_packet *pkt)
1858 {
1859 	struct wg_packet *old = NULL;
1860 
1861 	lockmgr(&staged->q_mtx, LK_EXCLUSIVE);
1862 	if (staged->q_len >= MAX_STAGED_PKT) {
1863 		old = STAILQ_FIRST(&staged->q_queue);
1864 		STAILQ_REMOVE_HEAD(&staged->q_queue, p_parallel);
1865 		staged->q_len--;
1866 	}
1867 	STAILQ_INSERT_TAIL(&staged->q_queue, pkt, p_parallel);
1868 	staged->q_len++;
1869 	lockmgr(&staged->q_mtx, LK_RELEASE);
1870 
1871 	if (old != NULL)
1872 		wg_packet_free(old);
1873 }
1874 
1875 static void
1876 wg_queue_enlist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1877 {
1878 	struct wg_packet *pkt, *tpkt;
1879 
1880 	STAILQ_FOREACH_MUTABLE(pkt, list, p_parallel, tpkt)
1881 		wg_queue_push_staged(staged, pkt);
1882 }
1883 
1884 static void
1885 wg_queue_delist_staged(struct wg_queue *staged, struct wg_packet_list *list)
1886 {
1887 	STAILQ_INIT(list);
1888 	lockmgr(&staged->q_mtx, LK_EXCLUSIVE);
1889 	STAILQ_CONCAT(list, &staged->q_queue);
1890 	staged->q_len = 0;
1891 	lockmgr(&staged->q_mtx, LK_RELEASE);
1892 }
1893 
1894 static void
1895 wg_queue_purge(struct wg_queue *staged)
1896 {
1897 	struct wg_packet_list list;
1898 	struct wg_packet *pkt, *tpkt;
1899 
1900 	wg_queue_delist_staged(staged, &list);
1901 	STAILQ_FOREACH_MUTABLE(pkt, &list, p_parallel, tpkt)
1902 		wg_packet_free(pkt);
1903 }
1904 
1905 static bool
1906 wg_queue_both(struct wg_queue *parallel, struct wg_queue *serial,
1907 	      struct wg_packet *pkt)
1908 {
1909 	pkt->p_state = WG_PACKET_UNCRYPTED;
1910 
1911 	lockmgr(&serial->q_mtx, LK_EXCLUSIVE);
1912 	if (serial->q_len < MAX_QUEUED_PKT) {
1913 		serial->q_len++;
1914 		STAILQ_INSERT_TAIL(&serial->q_queue, pkt, p_serial);
1915 	} else {
1916 		lockmgr(&serial->q_mtx, LK_RELEASE);
1917 		wg_packet_free(pkt);
1918 		return (false);
1919 	}
1920 	lockmgr(&serial->q_mtx, LK_RELEASE);
1921 
1922 	lockmgr(&parallel->q_mtx, LK_EXCLUSIVE);
1923 	if (parallel->q_len < MAX_QUEUED_PKT) {
1924 		parallel->q_len++;
1925 		STAILQ_INSERT_TAIL(&parallel->q_queue, pkt, p_parallel);
1926 	} else {
1927 		lockmgr(&parallel->q_mtx, LK_RELEASE);
1928 		pkt->p_state = WG_PACKET_DEAD;
1929 		return (false);
1930 	}
1931 	lockmgr(&parallel->q_mtx, LK_RELEASE);
1932 
1933 	return (true);
1934 }
1935 
1936 static struct wg_packet *
1937 wg_queue_dequeue_serial(struct wg_queue *serial)
1938 {
1939 	struct wg_packet *pkt = NULL;
1940 
1941 	lockmgr(&serial->q_mtx, LK_EXCLUSIVE);
1942 	if (serial->q_len > 0 &&
1943 	    STAILQ_FIRST(&serial->q_queue)->p_state != WG_PACKET_UNCRYPTED) {
1944 		serial->q_len--;
1945 		pkt = STAILQ_FIRST(&serial->q_queue);
1946 		STAILQ_REMOVE_HEAD(&serial->q_queue, p_serial);
1947 	}
1948 	lockmgr(&serial->q_mtx, LK_RELEASE);
1949 
1950 	return (pkt);
1951 }
1952 
1953 static struct wg_packet *
1954 wg_queue_dequeue_parallel(struct wg_queue *parallel)
1955 {
1956 	struct wg_packet *pkt = NULL;
1957 
1958 	lockmgr(&parallel->q_mtx, LK_EXCLUSIVE);
1959 	if (parallel->q_len > 0) {
1960 		parallel->q_len--;
1961 		pkt = STAILQ_FIRST(&parallel->q_queue);
1962 		STAILQ_REMOVE_HEAD(&parallel->q_queue, p_parallel);
1963 	}
1964 	lockmgr(&parallel->q_mtx, LK_RELEASE);
1965 
1966 	return (pkt);
1967 }
1968 
1969 static void
1970 wg_upcall(struct socket *so, void *arg, int waitflag __unused)
1971 {
1972 	struct wg_softc		*sc = arg;
1973 	struct sockaddr		*from;
1974 	struct sockbuf		 sio;
1975 	int			 ret, flags;
1976 
1977 	/*
1978 	 * For UDP, soreceive typically pulls just one packet,
1979 	 * so loop to get the whole batch.
1980 	 */
1981 	do {
1982 		sbinit(&sio, 1000000000); /* really large to receive all */
1983 		flags = MSG_DONTWAIT;
1984 		ret = so_pru_soreceive(so, &from, NULL, &sio, NULL, &flags);
1985 		if (ret != 0 || sio.sb_mb == NULL) {
1986 			if (from != NULL)
1987 				kfree(from, M_SONAME);
1988 			break;
1989 		}
1990 		wg_input(sc, sio.sb_mb, from);
1991 		kfree(from, M_SONAME);
1992 	} while (sio.sb_mb != NULL);
1993 }
1994 
1995 static void
1996 wg_input(struct wg_softc *sc, struct mbuf *m, const struct sockaddr *sa)
1997 {
1998 	struct noise_remote		*remote;
1999 	struct wg_pkt_data		*data;
2000 	struct wg_packet		*pkt;
2001 	struct wg_peer			*peer;
2002 	struct mbuf			*defragged;
2003 	uint32_t			 pkt_type;
2004 	size_t				 pkt_len;
2005 
2006 	defragged = m_defrag(m, M_NOWAIT);
2007 	if (defragged != NULL)
2008 		m = defragged;
2009 
2010 	m = m_unshare(m, M_NOWAIT);
2011 	if (m == NULL) {
2012 		IFNET_STAT_INC(sc->sc_ifp, iqdrops, 1);
2013 		return;
2014 	}
2015 
2016 	/* Pullup enough to read packet type */
2017 	if ((m = m_pullup(m, sizeof(uint32_t))) == NULL) {
2018 		IFNET_STAT_INC(sc->sc_ifp, iqdrops, 1);
2019 		return;
2020 	}
2021 
2022 	if ((pkt = wg_packet_alloc(m)) == NULL) {
2023 		IFNET_STAT_INC(sc->sc_ifp, iqdrops, 1);
2024 		m_freem(m);
2025 		return;
2026 	}
2027 
2028 	/* Save the remote address and port for later use. */
2029 	switch (sa->sa_family) {
2030 	case AF_INET:
2031 		pkt->p_endpoint.e_remote.r_sin =
2032 		    *(const struct sockaddr_in *)sa;
2033 		break;
2034 #ifdef INET6
2035 	case AF_INET6:
2036 		pkt->p_endpoint.e_remote.r_sin6 =
2037 		    *(const struct sockaddr_in6 *)sa;
2038 		break;
2039 #endif
2040 	default:
2041 		goto error;
2042 	}
2043 
2044 	pkt_len = (size_t)m->m_pkthdr.len;
2045 	pkt_type = *mtod(m, uint32_t *);
2046 
2047 	if ((pkt_len == sizeof(struct wg_pkt_initiation) &&
2048 	     pkt_type == WG_PKT_INITIATION) ||
2049 	    (pkt_len == sizeof(struct wg_pkt_response) &&
2050 	     pkt_type == WG_PKT_RESPONSE) ||
2051 	    (pkt_len == sizeof(struct wg_pkt_cookie) &&
2052 	     pkt_type == WG_PKT_COOKIE))
2053 	{
2054 		if (!wg_queue_enqueue_handshake(&sc->sc_handshake_queue, pkt)) {
2055 			IFNET_STAT_INC(sc->sc_ifp, iqdrops, 1);
2056 			DPRINTF(sc, "Dropping handshake packet\n");
2057 		}
2058 		taskqueue_enqueue(sc->sc_handshake_taskqueue,
2059 				  &sc->sc_handshake_task);
2060 		return;
2061 	}
2062 
2063 	if (pkt_len >= WG_PKT_DATA_MINLEN && pkt_type == WG_PKT_DATA) {
2064 		/* Pullup the whole header to read r_idx below. */
2065 		pkt->p_mbuf = m_pullup(m, sizeof(struct wg_pkt_data));
2066 		if (pkt->p_mbuf == NULL)
2067 			goto error;
2068 
2069 		data = mtod(pkt->p_mbuf, struct wg_pkt_data *);
2070 		pkt->p_keypair = noise_keypair_lookup(sc->sc_local,
2071 						      data->r_idx);
2072 		if (pkt->p_keypair == NULL)
2073 			goto error;
2074 
2075 		remote = noise_keypair_remote(pkt->p_keypair);
2076 		peer = noise_remote_arg(remote);
2077 		if (!wg_queue_both(&sc->sc_decrypt_parallel,
2078 				   &peer->p_decrypt_serial, pkt))
2079 			IFNET_STAT_INC(sc->sc_ifp, iqdrops, 1);
2080 
2081 		wg_decrypt_dispatch(sc);
2082 		noise_remote_put(remote);
2083 		return;
2084 	}
2085 
2086 error:
2087 	IFNET_STAT_INC(sc->sc_ifp, ierrors, 1);
2088 	wg_packet_free(pkt);
2089 }
2090 
2091 static void
2092 wg_peer_send_staged(struct wg_peer *peer)
2093 {
2094 	struct wg_softc		*sc = peer->p_sc;
2095 	struct wg_packet	*pkt, *tpkt;
2096 	struct wg_packet_list	 list;
2097 	struct noise_keypair	*keypair;
2098 
2099 	wg_queue_delist_staged(&peer->p_stage_queue, &list);
2100 
2101 	if (STAILQ_EMPTY(&list))
2102 		return;
2103 
2104 	if ((keypair = noise_keypair_current(peer->p_remote)) == NULL)
2105 		goto error;
2106 
2107 	STAILQ_FOREACH(pkt, &list, p_parallel) {
2108 		if (!noise_keypair_counter_next(keypair, &pkt->p_counter))
2109 			goto error_keypair;
2110 	}
2111 	STAILQ_FOREACH_MUTABLE(pkt, &list, p_parallel, tpkt) {
2112 		pkt->p_keypair = noise_keypair_ref(keypair);
2113 		if (!wg_queue_both(&sc->sc_encrypt_parallel,
2114 				   &peer->p_encrypt_serial, pkt))
2115 			IFNET_STAT_INC(sc->sc_ifp, oqdrops, 1);
2116 	}
2117 	wg_encrypt_dispatch(sc);
2118 	noise_keypair_put(keypair);
2119 	return;
2120 
2121 error_keypair:
2122 	noise_keypair_put(keypair);
2123 error:
2124 	wg_queue_enlist_staged(&peer->p_stage_queue, &list);
2125 	wg_timers_event_want_initiation(peer);
2126 }
2127 
2128 static inline void
2129 xmit_err(struct ifnet *ifp, struct mbuf *m, struct wg_packet *pkt,
2130 	 sa_family_t af)
2131 {
2132 	IFNET_STAT_INC(ifp, oerrors, 1);
2133 
2134 	switch (af) {
2135 	case AF_INET:
2136 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
2137 		if (pkt != NULL)
2138 			pkt->p_mbuf = NULL;
2139 		m = NULL;
2140 		break;
2141 #ifdef INET6
2142 	case AF_INET6:
2143 		icmp6_error(m, ICMP6_DST_UNREACH, 0, 0);
2144 		if (pkt != NULL)
2145 			pkt->p_mbuf = NULL;
2146 		m = NULL;
2147 		break;
2148 #endif
2149 	}
2150 
2151 	if (pkt != NULL)
2152 		wg_packet_free(pkt);
2153 	else if (m != NULL)
2154 		m_freem(m);
2155 }
2156 
2157 static inline int
2158 determine_af_and_pullup(struct mbuf **m, sa_family_t *af)
2159 {
2160 	u_char ipv;
2161 
2162 	if ((*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2163 		*m = m_pullup(*m, sizeof(struct ip6_hdr));
2164 	else if ((*m)->m_pkthdr.len >= sizeof(struct ip))
2165 		*m = m_pullup(*m, sizeof(struct ip));
2166 	else
2167 		return (EAFNOSUPPORT);
2168 	if (*m == NULL)
2169 		return (ENOBUFS);
2170 
2171 	ipv = mtod(*m, struct ip *)->ip_v;
2172 	if (ipv == 4)
2173 		*af = AF_INET;
2174 	else if (ipv == 6 && (*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
2175 		*af = AF_INET6;
2176 	else
2177 		return (EAFNOSUPPORT);
2178 
2179 	return (0);
2180 }
2181 
2182 static int
2183 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2184 	  struct rtentry *rt)
2185 {
2186 	struct wg_softc		*sc = ifp->if_softc;
2187 	struct wg_packet	*pkt;
2188 	struct wg_peer		*peer;
2189 	struct mbuf		*defragged;
2190 	sa_family_t		 af, peer_af;
2191 	int			 error;
2192 
2193 	if (dst->sa_family == AF_UNSPEC) {
2194 		/*
2195 		 * Specially handle packets written/injected by BPF.
2196 		 * The packets have the same DLT_NULL link-layer type
2197 		 * (i.e., 4-byte link-layer header in host byte order).
2198 		 */
2199 		dst->sa_family = *(mtod(m, uint32_t *));
2200 		m_adj(m, sizeof(uint32_t));
2201 	}
2202 	if (dst->sa_family == AF_UNSPEC) {
2203 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2204 		error = EAFNOSUPPORT;
2205 		goto err;
2206 	}
2207 
2208 	BPF_MTAP_AF(ifp, m, dst->sa_family);
2209 
2210 	defragged = m_defrag(m, M_NOWAIT);
2211 	if (defragged != NULL)
2212 		m = defragged;
2213 
2214 	m = m_unshare(m, M_NOWAIT);
2215 	if (m == NULL) {
2216 		IFNET_STAT_INC(ifp, oqdrops, 1);
2217 		error = ENOBUFS;
2218 		goto err;
2219 	}
2220 
2221 	error = determine_af_and_pullup(&m, &af);
2222 	if (error) {
2223 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2224 		goto err;
2225 	}
2226 	if (af != dst->sa_family) {
2227 		xmit_err(ifp, m, NULL, AF_UNSPEC);
2228 		error = EAFNOSUPPORT;
2229 		goto err;
2230 	}
2231 
2232 	if ((pkt = wg_packet_alloc(m)) == NULL) {
2233 		error = ENOBUFS;
2234 		goto err_xmit;
2235 	}
2236 
2237 	pkt->p_af = af;
2238 	pkt->p_mtu = ifp->if_mtu;
2239 	if (rt != NULL && rt->rt_rmx.rmx_mtu > 0 &&
2240 	    rt->rt_rmx.rmx_mtu < pkt->p_mtu)
2241 		pkt->p_mtu = rt->rt_rmx.rmx_mtu;
2242 
2243 	if (af == AF_INET) {
2244 		peer = wg_aip_lookup(sc, AF_INET,
2245 		    &mtod(m, struct ip *)->ip_dst);
2246 	} else if (af == AF_INET6) {
2247 		peer = wg_aip_lookup(sc, AF_INET6,
2248 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2249 	} else {
2250 		error = EAFNOSUPPORT;
2251 		goto err_xmit;
2252 	}
2253 
2254 	if (__predict_false(peer == NULL)) {
2255 		error = ENOKEY;
2256 		goto err_xmit;
2257 	}
2258 
2259 	peer_af = peer->p_endpoint.e_remote.r_sa.sa_family;
2260 	if (__predict_false(peer_af != AF_INET && peer_af != AF_INET6)) {
2261 		DPRINTF(sc, "No valid endpoint has been configured or "
2262 			"discovered for peer %ld\n", peer->p_id);
2263 		error = EHOSTUNREACH;
2264 		goto err_peer;
2265 	}
2266 
2267 	if (__predict_false(m->m_pkthdr.loop_cnt++ > MAX_LOOPS)) {
2268 		DPRINTF(sc, "Packet looped");
2269 		error = ELOOP;
2270 		goto err_peer;
2271 	}
2272 
2273 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
2274 	wg_peer_send_staged(peer);
2275 	noise_remote_put(peer->p_remote);
2276 
2277 	return (0);
2278 
2279 err_peer:
2280 	noise_remote_put(peer->p_remote);
2281 err_xmit:
2282 	xmit_err(ifp, m, pkt, af);
2283 err:
2284 	return (error);
2285 }
2286 
2287 /*----------------------------------------------------------------------------*/
2288 /* Interface Functions */
2289 
2290 static int
2291 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data, bool privileged)
2292 {
2293 	struct wg_interface_io	*iface_p, iface_o;
2294 	struct wg_peer_io	*peer_p, peer_o;
2295 	struct wg_aip_io	*aip_p, aip_o;
2296 	struct wg_peer		*peer;
2297 	struct wg_aip		*aip;
2298 	size_t			 size, peer_count, aip_count;
2299 	int			 cpu, ret = 0;
2300 
2301 	lockmgr(&sc->sc_lock, LK_SHARED);
2302 
2303 	/* Determine the required data size. */
2304 	size = sizeof(struct wg_interface_io);
2305 	size += sizeof(struct wg_peer_io) * sc->sc_peers_num;
2306 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry)
2307 		size += sizeof(struct wg_aip_io) * peer->p_aips_num;
2308 
2309 	/* Return the required size for userland allocation. */
2310 	if (data->wgd_size < size) {
2311 		data->wgd_size = size;
2312 		lockmgr(&sc->sc_lock, LK_RELEASE);
2313 		return (0);
2314 	}
2315 
2316 	iface_p = data->wgd_interface;
2317 	bzero(&iface_o, sizeof(iface_o));
2318 	/*
2319 	 * No need to acquire the 'sc_socket.so_lock', because 'sc_lock'
2320 	 * is acquired and that's enough to prevent modifications to
2321 	 * 'sc_socket' members.
2322 	 */
2323 	if (sc->sc_socket.so_port != 0) {
2324 		iface_o.i_port = sc->sc_socket.so_port;
2325 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2326 	}
2327 	if (sc->sc_socket.so_user_cookie != 0) {
2328 		iface_o.i_cookie = sc->sc_socket.so_user_cookie;
2329 		iface_o.i_flags |= WG_INTERFACE_HAS_COOKIE;
2330 	}
2331 	if (noise_local_keys(sc->sc_local, iface_o.i_public,
2332 			     iface_o.i_private)) {
2333 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2334 		if (privileged)
2335 			iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2336 		else
2337 			bzero(iface_o.i_private, sizeof(iface_o.i_private));
2338 	}
2339 
2340 	peer_count = 0;
2341 	peer_p = &iface_p->i_peers[0];
2342 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2343 		bzero(&peer_o, sizeof(peer_o));
2344 
2345 		peer_o.p_flags |= WG_PEER_HAS_PUBLIC;
2346 		if (noise_remote_keys(peer->p_remote, peer_o.p_public,
2347 				      peer_o.p_psk)) {
2348 			if (privileged)
2349 				peer_o.p_flags |= WG_PEER_HAS_PSK;
2350 			else
2351 				bzero(peer_o.p_psk, sizeof(peer_o.p_psk));
2352 		}
2353 		if (wg_timers_get_persistent_keepalive(peer, &peer_o.p_pka))
2354 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2355 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2356 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2357 		for (cpu = 0; cpu < ncpus; cpu++) {
2358 			peer_o.p_rxbytes += peer->p_rx_bytes[cpu];
2359 			peer_o.p_txbytes += peer->p_tx_bytes[cpu];
2360 		}
2361 		wg_timers_get_last_handshake(peer, &peer_o.p_last_handshake);
2362 		peer_o.p_id = (uint64_t)peer->p_id;
2363 		strlcpy(peer_o.p_description, peer->p_description,
2364 			sizeof(peer_o.p_description));
2365 
2366 		aip_count = 0;
2367 		aip_p = &peer_p->p_aips[0];
2368 		LIST_FOREACH(aip, &peer->p_aips, a_entry) {
2369 			bzero(&aip_o, sizeof(aip_o));
2370 			aip_o.a_af = aip->a_af;
2371 			if (aip->a_af == AF_INET) {
2372 				aip_o.a_cidr = bitcount32(aip->a_mask.ip);
2373 				memcpy(&aip_o.a_ipv4, &aip->a_addr.in,
2374 				       sizeof(aip->a_addr.in));
2375 			} else if (aip->a_af == AF_INET6) {
2376 				aip_o.a_cidr = in6_mask2len(&aip->a_mask.in6,
2377 							    NULL);
2378 				memcpy(&aip_o.a_ipv6, &aip->a_addr.in6,
2379 				       sizeof(aip->a_addr.in6));
2380 			}
2381 
2382 			ret = copyout(&aip_o, aip_p, sizeof(aip_o));
2383 			if (ret != 0)
2384 				goto out;
2385 
2386 			aip_p++;
2387 			aip_count++;
2388 		}
2389 		KKASSERT(aip_count == peer->p_aips_num);
2390 		peer_o.p_aips_count = aip_count;
2391 
2392 		ret = copyout(&peer_o, peer_p, sizeof(peer_o));
2393 		if (ret != 0)
2394 			goto out;
2395 
2396 		peer_p = (struct wg_peer_io *)aip_p;
2397 		peer_count++;
2398 	}
2399 	KKASSERT(peer_count == sc->sc_peers_num);
2400 	iface_o.i_peers_count = peer_count;
2401 
2402 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2403 
2404 out:
2405 	lockmgr(&sc->sc_lock, LK_RELEASE);
2406 	explicit_bzero(&iface_o, sizeof(iface_o));
2407 	explicit_bzero(&peer_o, sizeof(peer_o));
2408 	return (ret);
2409 }
2410 
2411 static int
2412 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2413 {
2414 	struct wg_interface_io	*iface_p, iface_o;
2415 	struct wg_peer_io	*peer_p, peer_o;
2416 	struct wg_aip_io	*aip_p, aip_o;
2417 	struct wg_peer		*peer;
2418 	struct noise_remote	*remote;
2419 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2420 	size_t			 i, j;
2421 	int			 ret;
2422 
2423 	remote = NULL;
2424 	lockmgr(&sc->sc_lock, LK_EXCLUSIVE);
2425 
2426 	iface_p = data->wgd_interface;
2427 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2428 		goto error;
2429 
2430 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2431 		wg_peer_destroy_all(sc);
2432 
2433 	if ((iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE) &&
2434 	    (!noise_local_keys(sc->sc_local, NULL, private) ||
2435 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE) != 0)) {
2436 		if (curve25519_generate_public(public, iface_o.i_private)) {
2437 			remote = noise_remote_lookup(sc->sc_local, public);
2438 			if (remote != NULL) {
2439 				/* Remove the conflicting peer. */
2440 				peer = noise_remote_arg(remote);
2441 				wg_peer_destroy(peer);
2442 				noise_remote_put(remote);
2443 			}
2444 		}
2445 
2446 		/*
2447 		 * Set the private key.
2448 		 *
2449 		 * Note: we might be removing the private key.
2450 		 */
2451 		if (noise_local_set_private(sc->sc_local, iface_o.i_private))
2452 			cookie_checker_update(&sc->sc_cookie, public);
2453 		else
2454 			cookie_checker_update(&sc->sc_cookie, NULL);
2455 	}
2456 
2457 	if ((iface_o.i_flags & WG_INTERFACE_HAS_PORT) &&
2458 	    iface_o.i_port != sc->sc_socket.so_port) {
2459 		if (sc->sc_ifp->if_flags & IFF_RUNNING) {
2460 			ret = wg_socket_init(sc, iface_o.i_port);
2461 			if (ret != 0)
2462 				goto error;
2463 		} else {
2464 			sc->sc_socket.so_port = iface_o.i_port;
2465 		}
2466 	}
2467 
2468 	if (iface_o.i_flags & WG_INTERFACE_HAS_COOKIE) {
2469 		ret = wg_socket_set_cookie(sc, iface_o.i_cookie);
2470 		if (ret != 0)
2471 			goto error;
2472 	}
2473 
2474 	peer_p = &iface_p->i_peers[0];
2475 	for (i = 0; i < iface_o.i_peers_count; i++) {
2476 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2477 			goto error;
2478 
2479 		/* Peer must have public key. */
2480 		if ((peer_o.p_flags & WG_PEER_HAS_PUBLIC) == 0)
2481 			goto next_peer;
2482 		/* Ignore peer that has the same public key. */
2483 		if (noise_local_keys(sc->sc_local, public, NULL) &&
2484 		    memcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2485 			goto next_peer;
2486 
2487 		/* Lookup peer, or create if it doesn't exist. */
2488 		remote = noise_remote_lookup(sc->sc_local, peer_o.p_public);
2489 		if (remote != NULL) {
2490 			peer = noise_remote_arg(remote);
2491 		} else {
2492 			if (peer_o.p_flags & (WG_PEER_REMOVE | WG_PEER_UPDATE))
2493 				goto next_peer;
2494 
2495 			peer = wg_peer_create(sc, peer_o.p_public);
2496 			if (peer == NULL) {
2497 				ret = ENOMEM;
2498 				goto error;
2499 			}
2500 		}
2501 
2502 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2503 			wg_peer_destroy(peer);
2504 			goto next_peer;
2505 		}
2506 
2507 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT) {
2508 			ret = wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2509 			if (ret != 0)
2510 				goto error;
2511 		}
2512 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2513 			noise_remote_set_psk(peer->p_remote, peer_o.p_psk);
2514 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2515 			wg_timers_set_persistent_keepalive(peer, peer_o.p_pka);
2516 		if (peer_o.p_flags & WG_PEER_SET_DESCRIPTION)
2517 			strlcpy(peer->p_description, peer_o.p_description,
2518 				sizeof(peer->p_description));
2519 
2520 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS)
2521 			wg_aip_remove_all(sc, peer);
2522 
2523 		for (j = 0; j < peer_o.p_aips_count; j++) {
2524 			aip_p = &peer_p->p_aips[j];
2525 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2526 				goto error;
2527 			ret = wg_aip_add(sc, peer, aip_o.a_af, &aip_o.a_addr,
2528 					 aip_o.a_cidr);
2529 			if (ret != 0)
2530 				goto error;
2531 		}
2532 
2533 	next_peer:
2534 		if (remote != NULL) {
2535 			noise_remote_put(remote);
2536 			remote = NULL;
2537 		}
2538 		aip_p = &peer_p->p_aips[peer_p->p_aips_count];
2539 		peer_p = (struct wg_peer_io *)aip_p;
2540 	}
2541 
2542 error:
2543 	if (remote != NULL)
2544 		noise_remote_put(remote);
2545 	lockmgr(&sc->sc_lock, LK_RELEASE);
2546 	explicit_bzero(&iface_o, sizeof(iface_o));
2547 	explicit_bzero(&peer_o, sizeof(peer_o));
2548 	explicit_bzero(&aip_o, sizeof(aip_o));
2549 	explicit_bzero(public, sizeof(public));
2550 	explicit_bzero(private, sizeof(private));
2551 	return (ret);
2552 }
2553 
2554 static int
2555 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cred)
2556 {
2557 	struct wg_data_io	*wgd;
2558 	struct wg_softc		*sc;
2559 	struct ifreq		*ifr;
2560 	bool			 privileged;
2561 	int			 ret;
2562 
2563 	sc = ifp->if_softc;
2564 	ret = 0;
2565 
2566 	switch (cmd) {
2567 	case SIOCSWG:
2568 		ret = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
2569 		if (ret == 0) {
2570 			wgd = (struct wg_data_io *)data;
2571 			ret = wg_ioctl_set(sc, wgd);
2572 		}
2573 		break;
2574 	case SIOCGWG:
2575 		privileged =
2576 		    (caps_priv_check(cred, SYSCAP_RESTRICTEDROOT) == 0);
2577 		wgd = (struct wg_data_io *)data;
2578 		ret = wg_ioctl_get(sc, wgd, privileged);
2579 		break;
2580 	/* Interface IOCTLs */
2581 	case SIOCSIFADDR:
2582 		/*
2583 		 * This differs from *BSD norms, but is more uniform with how
2584 		 * WireGuard behaves elsewhere.
2585 		 */
2586 		break;
2587 	case SIOCSIFFLAGS:
2588 		if (ifp->if_flags & IFF_UP)
2589 			ret = wg_up(sc);
2590 		else
2591 			wg_down(sc);
2592 		break;
2593 	case SIOCSIFMTU:
2594 		ifr = (struct ifreq *)data;
2595 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > MAX_MTU)
2596 			ret = EINVAL;
2597 		else
2598 			ifp->if_mtu = ifr->ifr_mtu;
2599 		break;
2600 	case SIOCADDMULTI:
2601 	case SIOCDELMULTI:
2602 		break;
2603 	default:
2604 		ret = ENOTTY;
2605 	}
2606 
2607 	return (ret);
2608 }
2609 
2610 static int
2611 wg_up(struct wg_softc *sc)
2612 {
2613 	struct ifnet *ifp = sc->sc_ifp;
2614 	struct wg_peer *peer;
2615 	int ret = 0;
2616 
2617 	lockmgr(&sc->sc_lock, LK_EXCLUSIVE);
2618 
2619 	/* Silent success if we're already running. */
2620 	if (ifp->if_flags & IFF_RUNNING)
2621 		goto out;
2622 	ifp->if_flags |= IFF_RUNNING;
2623 
2624 	ret = wg_socket_init(sc, sc->sc_socket.so_port);
2625 	if (ret == 0) {
2626 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry)
2627 			wg_timers_enable(peer);
2628 		ifp->if_link_state = LINK_STATE_UP;
2629 		if_link_state_change(ifp);
2630 	} else {
2631 		ifp->if_flags &= ~IFF_RUNNING;
2632 		DPRINTF(sc, "Unable to initialize sockets: %d\n", ret);
2633 	}
2634 
2635 out:
2636 	lockmgr(&sc->sc_lock, LK_RELEASE);
2637 	return (ret);
2638 }
2639 
2640 static void
2641 wg_down(struct wg_softc *sc)
2642 {
2643 	struct ifnet *ifp = sc->sc_ifp;
2644 	struct wg_peer *peer;
2645 
2646 	lockmgr(&sc->sc_lock, LK_EXCLUSIVE);
2647 
2648 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
2649 		lockmgr(&sc->sc_lock, LK_RELEASE);
2650 		return;
2651 	}
2652 	ifp->if_flags &= ~IFF_RUNNING;
2653 
2654 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2655 		wg_queue_purge(&peer->p_stage_queue);
2656 		wg_timers_disable(peer);
2657 	}
2658 
2659 	wg_queue_purge(&sc->sc_handshake_queue);
2660 
2661 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
2662 		noise_remote_handshake_clear(peer->p_remote);
2663 		noise_remote_keypairs_clear(peer->p_remote);
2664 	}
2665 
2666 	ifp->if_link_state = LINK_STATE_DOWN;
2667 	if_link_state_change(ifp);
2668 	wg_socket_uninit(sc);
2669 
2670 	lockmgr(&sc->sc_lock, LK_RELEASE);
2671 }
2672 
2673 static int
2674 wg_clone_create(struct if_clone *ifc __unused, int unit,
2675 		caddr_t params __unused, caddr_t data __unused)
2676 {
2677 	struct wg_softc *sc;
2678 	struct ifnet *ifp;
2679 	int i;
2680 
2681 	sc = kmalloc(sizeof(*sc), M_WG, M_WAITOK | M_ZERO);
2682 
2683 	if (!rn_inithead(&sc->sc_aip4, wg_maskhead,
2684 			 offsetof(struct aip_addr, in)) ||
2685 	    !rn_inithead(&sc->sc_aip6, wg_maskhead,
2686 			 offsetof(struct aip_addr, in6))) {
2687 		if (sc->sc_aip4 != NULL)
2688 			rn_freehead(sc->sc_aip4);
2689 		if (sc->sc_aip6 != NULL)
2690 			rn_freehead(sc->sc_aip6);
2691 		kfree(sc, M_WG);
2692 		return (ENOMEM);
2693 	}
2694 
2695 	lockinit(&sc->sc_lock, "wg softc lock", 0, 0);
2696 	lockinit(&sc->sc_aip_lock, "wg aip lock", 0, 0);
2697 	lockinit(&sc->sc_socket.so_lock, "wg socket lock", 0, 0);
2698 
2699 	sc->sc_local = noise_local_alloc();
2700 
2701 	TAILQ_INIT(&sc->sc_peers);
2702 	cookie_checker_init(&sc->sc_cookie);
2703 
2704 	sc->sc_handshake_taskqueue = wg_taskqueues[karc4random() % ncpus];
2705 	TASK_INIT(&sc->sc_handshake_task, 0, wg_softc_handshake_receive, sc);
2706 	wg_queue_init(&sc->sc_handshake_queue, "hsq");
2707 
2708 	sc->sc_encrypt_tasks = kmalloc(sizeof(*sc->sc_encrypt_tasks) * ncpus,
2709 				       M_WG, M_WAITOK | M_ZERO);
2710 	sc->sc_decrypt_tasks = kmalloc(sizeof(*sc->sc_decrypt_tasks) * ncpus,
2711 				       M_WG, M_WAITOK | M_ZERO);
2712 	for (i = 0; i < ncpus; i++) {
2713 		TASK_INIT(&sc->sc_encrypt_tasks[i], 0, wg_softc_encrypt, sc);
2714 		TASK_INIT(&sc->sc_decrypt_tasks[i], 0, wg_softc_decrypt, sc);
2715 	}
2716 	wg_queue_init(&sc->sc_encrypt_parallel, "encp");
2717 	wg_queue_init(&sc->sc_decrypt_parallel, "decp");
2718 
2719 	ifp = sc->sc_ifp = if_alloc(IFT_WIREGUARD);
2720 	if_initname(ifp, wgname, unit);
2721 	ifp->if_softc = sc;
2722 	ifp->if_mtu = DEFAULT_MTU;
2723 	ifp->if_flags = IFF_NOARP | IFF_MULTICAST;
2724 	ifp->if_output = wg_output;
2725 	ifp->if_ioctl = wg_ioctl;
2726 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
2727 	ifq_set_ready(&ifp->if_snd);
2728 
2729 	if_attach(ifp, NULL);
2730 
2731 	/* DLT_NULL link-layer header: a 4-byte field in host byte order */
2732 	bpfattach(ifp, DLT_NULL, sizeof(uint32_t));
2733 
2734 	lockmgr(&wg_mtx, LK_EXCLUSIVE);
2735 	LIST_INSERT_HEAD(&wg_list, sc, sc_entry);
2736 	lockmgr(&wg_mtx, LK_RELEASE);
2737 
2738 	return (0);
2739 }
2740 
2741 static int
2742 wg_clone_destroy(struct ifnet *ifp)
2743 {
2744 	struct wg_softc *sc = ifp->if_softc;
2745 	int i;
2746 
2747 	lockmgr(&sc->sc_lock, LK_EXCLUSIVE);
2748 
2749 	ifp->if_link_state = LINK_STATE_DOWN;
2750 	if_link_state_change(ifp);
2751 	if_purgeaddrs_nolink(ifp);
2752 
2753 	wg_socket_uninit(sc);
2754 	lockuninit(&sc->sc_socket.so_lock);
2755 
2756 	/* Cancel all tasks. */
2757 	while (taskqueue_cancel(sc->sc_handshake_taskqueue,
2758 				&sc->sc_handshake_task, NULL) != 0) {
2759 		taskqueue_drain(sc->sc_handshake_taskqueue,
2760 				&sc->sc_handshake_task);
2761 	}
2762 	for (i = 0; i < ncpus; i++) {
2763 		while (taskqueue_cancel(wg_taskqueues[i],
2764 					&sc->sc_encrypt_tasks[i], NULL) != 0) {
2765 			taskqueue_drain(wg_taskqueues[i],
2766 					&sc->sc_encrypt_tasks[i]);
2767 		}
2768 		while (taskqueue_cancel(wg_taskqueues[i],
2769 					&sc->sc_decrypt_tasks[i], NULL) != 0) {
2770 			taskqueue_drain(wg_taskqueues[i],
2771 					&sc->sc_decrypt_tasks[i]);
2772 		}
2773 	}
2774 
2775 	kfree(sc->sc_encrypt_tasks, M_WG);
2776 	kfree(sc->sc_decrypt_tasks, M_WG);
2777 	wg_queue_deinit(&sc->sc_handshake_queue);
2778 	wg_queue_deinit(&sc->sc_encrypt_parallel);
2779 	wg_queue_deinit(&sc->sc_decrypt_parallel);
2780 
2781 	wg_peer_destroy_all(sc);
2782 	rn_freehead(sc->sc_aip4);
2783 	rn_freehead(sc->sc_aip6);
2784 	lockuninit(&sc->sc_aip_lock);
2785 
2786 	cookie_checker_free(&sc->sc_cookie);
2787 	noise_local_free(sc->sc_local);
2788 
2789 	bpfdetach(ifp);
2790 	if_detach(ifp);
2791 	if_free(ifp);
2792 
2793 	lockmgr(&wg_mtx, LK_EXCLUSIVE);
2794 	LIST_REMOVE(sc, sc_entry);
2795 	lockmgr(&wg_mtx, LK_RELEASE);
2796 
2797 	lockmgr(&sc->sc_lock, LK_RELEASE);
2798 	lockuninit(&sc->sc_lock);
2799 	kfree(sc, M_WG);
2800 
2801 	return (0);
2802 }
2803 
2804 /*----------------------------------------------------------------------------*/
2805 /* Module Interface */
2806 
2807 #ifdef WG_SELFTESTS
2808 #include "selftest/allowedips.c"
2809 static bool
2810 wg_run_selftests(void)
2811 {
2812 	bool ret = true;
2813 
2814 	ret &= wg_allowedips_selftest();
2815 	ret &= noise_counter_selftest();
2816 	ret &= cookie_selftest();
2817 
2818 	kprintf("%s: %s\n", __func__, ret ? "pass" : "FAIL");
2819 	return (ret);
2820 }
2821 #else /* !WG_SELFTESTS */
2822 static inline bool
2823 wg_run_selftests(void)
2824 {
2825 	return (true);
2826 }
2827 #endif /* WG_SELFTESTS */
2828 
2829 static int
2830 wg_module_init(void)
2831 {
2832 	int i, ret;
2833 
2834 	lockinit(&wg_mtx, "wg mtx lock", 0, 0);
2835 
2836 	wg_packet_zone = objcache_create_simple(M_WG_PACKET,
2837 						sizeof(struct wg_packet));
2838 	if (wg_packet_zone == NULL)
2839 		return (ENOMEM);
2840 
2841 	wg_taskqueues = kmalloc(sizeof(*wg_taskqueues) * ncpus, M_WG,
2842 				M_WAITOK | M_ZERO);
2843 	for (i = 0; i < ncpus; i++) {
2844 		wg_taskqueues[i] = taskqueue_create("wg_taskq", M_WAITOK,
2845 						    taskqueue_thread_enqueue,
2846 						    &wg_taskqueues[i]);
2847 		taskqueue_start_threads(&wg_taskqueues[i], 1,
2848 					TDPRI_KERN_DAEMON, i,
2849 					"wg_taskq_cpu_%d", i);
2850 	}
2851 
2852 	if (!rn_inithead(&wg_maskhead, NULL, 0))
2853 		return (ENOMEM);
2854 
2855 	ret = cookie_init();
2856 	if (ret != 0)
2857 		return (ret);
2858 
2859 	ret = if_clone_attach(&wg_cloner);
2860 	if (ret != 0)
2861 		return (ret);
2862 
2863 	if (!wg_run_selftests())
2864 		return (ENOTRECOVERABLE);
2865 
2866 	return (0);
2867 }
2868 
2869 static int
2870 wg_module_deinit(void)
2871 {
2872 	int i;
2873 
2874 	lockmgr(&wg_mtx, LK_EXCLUSIVE);
2875 
2876 	if (!LIST_EMPTY(&wg_list)) {
2877 		lockmgr(&wg_mtx, LK_RELEASE);
2878 		return (EBUSY);
2879 	}
2880 
2881 	if_clone_detach(&wg_cloner);
2882 
2883 	cookie_deinit();
2884 
2885 	for (i = 0; i < ncpus; i++)
2886 		taskqueue_free(wg_taskqueues[i]);
2887 	kfree(wg_taskqueues, M_WG);
2888 
2889 	rn_flush(wg_maskhead, rn_freemask);
2890 	rn_freehead(wg_maskhead);
2891 
2892 	if (wg_packet_zone != NULL)
2893 		objcache_destroy(wg_packet_zone);
2894 
2895 	lockmgr(&wg_mtx, LK_RELEASE);
2896 	lockuninit(&wg_mtx);
2897 
2898 	return (0);
2899 }
2900 
2901 static int
2902 wg_module_event_handler(module_t mod __unused, int what, void *arg __unused)
2903 {
2904 	switch (what) {
2905 	case MOD_LOAD:
2906 		return wg_module_init();
2907 	case MOD_UNLOAD:
2908 		return wg_module_deinit();
2909 	default:
2910 		return (EOPNOTSUPP);
2911 	}
2912 }
2913 
2914 static moduledata_t wg_moduledata = {
2915 	"if_wg",
2916 	wg_module_event_handler,
2917 	NULL
2918 };
2919 
2920 DECLARE_MODULE(if_wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
2921 MODULE_VERSION(if_wg, 1); /* WireGuard version */
2922 MODULE_DEPEND(if_wg, crypto, 1, 1, 1);
2923