1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
3 /*
4  * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5  * Covers:
6  *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
7  *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8  *  3. BPF reuseport helper - bpf_sk_select_reuseport
9  */
10 
11 #include <linux/compiler.h>
12 #include <errno.h>
13 #include <error.h>
14 #include <limits.h>
15 #include <netinet/in.h>
16 #include <pthread.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <unistd.h>
21 #include <linux/vm_sockets.h>
22 
23 #include <bpf/bpf.h>
24 #include <bpf/libbpf.h>
25 
26 #include "bpf_util.h"
27 #include "test_progs.h"
28 #include "test_sockmap_listen.skel.h"
29 
30 #include "sockmap_helpers.h"
31 
32 static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
33 				int family, int sotype, int mapfd)
34 {
35 	u32 key = 0;
36 	u64 value;
37 	int err;
38 
39 	value = -1;
40 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
41 	if (!err || errno != EINVAL)
42 		FAIL_ERRNO("map_update: expected EINVAL");
43 
44 	value = INT_MAX;
45 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
46 	if (!err || errno != EBADF)
47 		FAIL_ERRNO("map_update: expected EBADF");
48 }
49 
50 static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
51 			       int family, int sotype, int mapfd)
52 {
53 	u32 key = 0;
54 	u64 value;
55 	int err, s;
56 
57 	s = xsocket(family, sotype, 0);
58 	if (s == -1)
59 		return;
60 
61 	errno = 0;
62 	value = s;
63 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
64 	if (sotype == SOCK_STREAM) {
65 		if (!err || errno != EOPNOTSUPP)
66 			FAIL_ERRNO("map_update: expected EOPNOTSUPP");
67 	} else if (err)
68 		FAIL_ERRNO("map_update: expected success");
69 	xclose(s);
70 }
71 
72 static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
73 			      int family, int sotype, int mapfd)
74 {
75 	struct sockaddr_storage addr;
76 	socklen_t len;
77 	u32 key = 0;
78 	u64 value;
79 	int err, s;
80 
81 	init_addr_loopback(family, &addr, &len);
82 
83 	s = xsocket(family, sotype, 0);
84 	if (s == -1)
85 		return;
86 
87 	err = xbind(s, sockaddr(&addr), len);
88 	if (err)
89 		goto close;
90 
91 	errno = 0;
92 	value = s;
93 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
94 	if (!err || errno != EOPNOTSUPP)
95 		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
96 close:
97 	xclose(s);
98 }
99 
100 static void test_insert(struct test_sockmap_listen *skel __always_unused,
101 			int family, int sotype, int mapfd)
102 {
103 	u64 value;
104 	u32 key;
105 	int s;
106 
107 	s = socket_loopback(family, sotype);
108 	if (s < 0)
109 		return;
110 
111 	key = 0;
112 	value = s;
113 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
114 	xclose(s);
115 }
116 
117 static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
118 				     int family, int sotype, int mapfd)
119 {
120 	u64 value;
121 	u32 key;
122 	int s;
123 
124 	s = socket_loopback(family, sotype);
125 	if (s < 0)
126 		return;
127 
128 	key = 0;
129 	value = s;
130 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
131 	xbpf_map_delete_elem(mapfd, &key);
132 	xclose(s);
133 }
134 
135 static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
136 				    int family, int sotype, int mapfd)
137 {
138 	int err, s;
139 	u64 value;
140 	u32 key;
141 
142 	s = socket_loopback(family, sotype);
143 	if (s < 0)
144 		return;
145 
146 	key = 0;
147 	value = s;
148 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
149 
150 	xclose(s);
151 
152 	errno = 0;
153 	err = bpf_map_delete_elem(mapfd, &key);
154 	if (!err || (errno != EINVAL && errno != ENOENT))
155 		/* SOCKMAP and SOCKHASH return different error codes */
156 		FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
157 }
158 
159 static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
160 				     int family, int sotype, int mapfd)
161 {
162 	u64 cookie, value;
163 	socklen_t len;
164 	u32 key;
165 	int s;
166 
167 	s = socket_loopback(family, sotype);
168 	if (s < 0)
169 		return;
170 
171 	key = 0;
172 	value = s;
173 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
174 
175 	len = sizeof(cookie);
176 	xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
177 
178 	xbpf_map_lookup_elem(mapfd, &key, &value);
179 
180 	if (value != cookie) {
181 		FAIL("map_lookup: have %#llx, want %#llx",
182 		     (unsigned long long)value, (unsigned long long)cookie);
183 	}
184 
185 	xclose(s);
186 }
187 
188 static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
189 				     int family, int sotype, int mapfd)
190 {
191 	int err, s;
192 	u64 value;
193 	u32 key;
194 
195 	s = socket_loopback(family, sotype);
196 	if (s < 0)
197 		return;
198 
199 	key = 0;
200 	value = s;
201 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
202 	xbpf_map_delete_elem(mapfd, &key);
203 
204 	errno = 0;
205 	err = bpf_map_lookup_elem(mapfd, &key, &value);
206 	if (!err || errno != ENOENT)
207 		FAIL_ERRNO("map_lookup: expected ENOENT");
208 
209 	xclose(s);
210 }
211 
212 static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
213 				     int family, int sotype, int mapfd)
214 {
215 	u32 key, value32;
216 	int err, s;
217 
218 	s = socket_loopback(family, sotype);
219 	if (s < 0)
220 		return;
221 
222 	mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
223 			       sizeof(value32), 1, NULL);
224 	if (mapfd < 0) {
225 		FAIL_ERRNO("map_create");
226 		goto close;
227 	}
228 
229 	key = 0;
230 	value32 = s;
231 	xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
232 
233 	errno = 0;
234 	err = bpf_map_lookup_elem(mapfd, &key, &value32);
235 	if (!err || errno != ENOSPC)
236 		FAIL_ERRNO("map_lookup: expected ENOSPC");
237 
238 	xclose(mapfd);
239 close:
240 	xclose(s);
241 }
242 
243 static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
244 				 int family, int sotype, int mapfd)
245 {
246 	int s1, s2;
247 	u64 value;
248 	u32 key;
249 
250 	s1 = socket_loopback(family, sotype);
251 	if (s1 < 0)
252 		return;
253 
254 	s2 = socket_loopback(family, sotype);
255 	if (s2 < 0)
256 		goto close_s1;
257 
258 	key = 0;
259 	value = s1;
260 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
261 
262 	value = s2;
263 	xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
264 	xclose(s2);
265 close_s1:
266 	xclose(s1);
267 }
268 
269 /* Exercise the code path where we destroy child sockets that never
270  * got accept()'ed, aka orphans, when parent socket gets closed.
271  */
272 static void do_destroy_orphan_child(int family, int sotype, int mapfd)
273 {
274 	struct sockaddr_storage addr;
275 	socklen_t len;
276 	int err, s, c;
277 	u64 value;
278 	u32 key;
279 
280 	s = socket_loopback(family, sotype);
281 	if (s < 0)
282 		return;
283 
284 	len = sizeof(addr);
285 	err = xgetsockname(s, sockaddr(&addr), &len);
286 	if (err)
287 		goto close_srv;
288 
289 	key = 0;
290 	value = s;
291 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
292 
293 	c = xsocket(family, sotype, 0);
294 	if (c == -1)
295 		goto close_srv;
296 
297 	xconnect(c, sockaddr(&addr), len);
298 	xclose(c);
299 close_srv:
300 	xclose(s);
301 }
302 
303 static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
304 				      int family, int sotype, int mapfd)
305 {
306 	int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
307 	int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
308 	const struct test {
309 		int progfd;
310 		enum bpf_attach_type atype;
311 	} tests[] = {
312 		{ -1, -1 },
313 		{ msg_verdict, BPF_SK_MSG_VERDICT },
314 		{ skb_verdict, BPF_SK_SKB_VERDICT },
315 	};
316 	const struct test *t;
317 
318 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
319 		if (t->progfd != -1 &&
320 		    xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
321 			return;
322 
323 		do_destroy_orphan_child(family, sotype, mapfd);
324 
325 		if (t->progfd != -1)
326 			xbpf_prog_detach2(t->progfd, mapfd, t->atype);
327 	}
328 }
329 
330 /* Perform a passive open after removing listening socket from SOCKMAP
331  * to ensure that callbacks get restored properly.
332  */
333 static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
334 				    int family, int sotype, int mapfd)
335 {
336 	struct sockaddr_storage addr;
337 	socklen_t len;
338 	int err, s, c;
339 	u64 value;
340 	u32 key;
341 
342 	s = socket_loopback(family, sotype);
343 	if (s < 0)
344 		return;
345 
346 	len = sizeof(addr);
347 	err = xgetsockname(s, sockaddr(&addr), &len);
348 	if (err)
349 		goto close_srv;
350 
351 	key = 0;
352 	value = s;
353 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354 	xbpf_map_delete_elem(mapfd, &key);
355 
356 	c = xsocket(family, sotype, 0);
357 	if (c < 0)
358 		goto close_srv;
359 
360 	xconnect(c, sockaddr(&addr), len);
361 	xclose(c);
362 close_srv:
363 	xclose(s);
364 }
365 
366 /* Check that child socket that got created while parent was in a
367  * SOCKMAP, but got accept()'ed only after the parent has been removed
368  * from SOCKMAP, gets cloned without parent psock state or callbacks.
369  */
370 static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
371 				     int family, int sotype, int mapfd)
372 {
373 	struct sockaddr_storage addr;
374 	const u32 zero = 0;
375 	int err, s, c, p;
376 	socklen_t len;
377 	u64 value;
378 
379 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
380 	if (s == -1)
381 		return;
382 
383 	len = sizeof(addr);
384 	err = xgetsockname(s, sockaddr(&addr), &len);
385 	if (err)
386 		goto close_srv;
387 
388 	value = s;
389 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
390 	if (err)
391 		goto close_srv;
392 
393 	c = xsocket(family, sotype, 0);
394 	if (c == -1)
395 		goto close_srv;
396 
397 	/* Create child while parent is in sockmap */
398 	err = xconnect(c, sockaddr(&addr), len);
399 	if (err)
400 		goto close_cli;
401 
402 	/* Remove parent from sockmap */
403 	err = xbpf_map_delete_elem(mapfd, &zero);
404 	if (err)
405 		goto close_cli;
406 
407 	p = xaccept_nonblock(s, NULL, NULL);
408 	if (p == -1)
409 		goto close_cli;
410 
411 	/* Check that child sk_user_data is not set */
412 	value = p;
413 	xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
414 
415 	xclose(p);
416 close_cli:
417 	xclose(c);
418 close_srv:
419 	xclose(s);
420 }
421 
422 /* Check that child socket that got created and accepted while parent
423  * was in a SOCKMAP is cloned without parent psock state or callbacks.
424  */
425 static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
426 				      int family, int sotype, int mapfd)
427 {
428 	struct sockaddr_storage addr;
429 	const u32 zero = 0, one = 1;
430 	int err, s, c, p;
431 	socklen_t len;
432 	u64 value;
433 
434 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
435 	if (s == -1)
436 		return;
437 
438 	len = sizeof(addr);
439 	err = xgetsockname(s, sockaddr(&addr), &len);
440 	if (err)
441 		goto close_srv;
442 
443 	value = s;
444 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
445 	if (err)
446 		goto close_srv;
447 
448 	c = xsocket(family, sotype, 0);
449 	if (c == -1)
450 		goto close_srv;
451 
452 	/* Create & accept child while parent is in sockmap */
453 	err = xconnect(c, sockaddr(&addr), len);
454 	if (err)
455 		goto close_cli;
456 
457 	p = xaccept_nonblock(s, NULL, NULL);
458 	if (p == -1)
459 		goto close_cli;
460 
461 	/* Check that child sk_user_data is not set */
462 	value = p;
463 	xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
464 
465 	xclose(p);
466 close_cli:
467 	xclose(c);
468 close_srv:
469 	xclose(s);
470 }
471 
472 struct connect_accept_ctx {
473 	int sockfd;
474 	unsigned int done;
475 	unsigned int nr_iter;
476 };
477 
478 static bool is_thread_done(struct connect_accept_ctx *ctx)
479 {
480 	return READ_ONCE(ctx->done);
481 }
482 
483 static void *connect_accept_thread(void *arg)
484 {
485 	struct connect_accept_ctx *ctx = arg;
486 	struct sockaddr_storage addr;
487 	int family, socktype;
488 	socklen_t len;
489 	int err, i, s;
490 
491 	s = ctx->sockfd;
492 
493 	len = sizeof(addr);
494 	err = xgetsockname(s, sockaddr(&addr), &len);
495 	if (err)
496 		goto done;
497 
498 	len = sizeof(family);
499 	err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
500 	if (err)
501 		goto done;
502 
503 	len = sizeof(socktype);
504 	err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
505 	if (err)
506 		goto done;
507 
508 	for (i = 0; i < ctx->nr_iter; i++) {
509 		int c, p;
510 
511 		c = xsocket(family, socktype, 0);
512 		if (c < 0)
513 			break;
514 
515 		err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
516 		if (err) {
517 			xclose(c);
518 			break;
519 		}
520 
521 		p = xaccept_nonblock(s, NULL, NULL);
522 		if (p < 0) {
523 			xclose(c);
524 			break;
525 		}
526 
527 		xclose(p);
528 		xclose(c);
529 	}
530 done:
531 	WRITE_ONCE(ctx->done, 1);
532 	return NULL;
533 }
534 
535 static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
536 					int family, int sotype, int mapfd)
537 {
538 	struct connect_accept_ctx ctx = { 0 };
539 	struct sockaddr_storage addr;
540 	socklen_t len;
541 	u32 zero = 0;
542 	pthread_t t;
543 	int err, s;
544 	u64 value;
545 
546 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
547 	if (s < 0)
548 		return;
549 
550 	len = sizeof(addr);
551 	err = xgetsockname(s, sockaddr(&addr), &len);
552 	if (err)
553 		goto close;
554 
555 	ctx.sockfd = s;
556 	ctx.nr_iter = 1000;
557 
558 	err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
559 	if (err)
560 		goto close;
561 
562 	value = s;
563 	while (!is_thread_done(&ctx)) {
564 		err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
565 		if (err)
566 			break;
567 
568 		err = xbpf_map_delete_elem(mapfd, &zero);
569 		if (err)
570 			break;
571 	}
572 
573 	xpthread_join(t, NULL);
574 close:
575 	xclose(s);
576 }
577 
578 static void *listen_thread(void *arg)
579 {
580 	struct sockaddr unspec = { AF_UNSPEC };
581 	struct connect_accept_ctx *ctx = arg;
582 	int err, i, s;
583 
584 	s = ctx->sockfd;
585 
586 	for (i = 0; i < ctx->nr_iter; i++) {
587 		err = xlisten(s, 1);
588 		if (err)
589 			break;
590 		err = xconnect(s, &unspec, sizeof(unspec));
591 		if (err)
592 			break;
593 	}
594 
595 	WRITE_ONCE(ctx->done, 1);
596 	return NULL;
597 }
598 
599 static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
600 				    int family, int socktype, int mapfd)
601 {
602 	struct connect_accept_ctx ctx = { 0 };
603 	const u32 zero = 0;
604 	const int one = 1;
605 	pthread_t t;
606 	int err, s;
607 	u64 value;
608 
609 	s = xsocket(family, socktype, 0);
610 	if (s < 0)
611 		return;
612 
613 	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
614 	if (err)
615 		goto close;
616 
617 	ctx.sockfd = s;
618 	ctx.nr_iter = 10000;
619 
620 	err = pthread_create(&t, NULL, listen_thread, &ctx);
621 	if (err)
622 		goto close;
623 
624 	value = s;
625 	while (!is_thread_done(&ctx)) {
626 		err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
627 		/* Expecting EOPNOTSUPP before listen() */
628 		if (err && errno != EOPNOTSUPP) {
629 			FAIL_ERRNO("map_update");
630 			break;
631 		}
632 
633 		err = bpf_map_delete_elem(mapfd, &zero);
634 		/* Expecting no entry after unhash on connect(AF_UNSPEC) */
635 		if (err && errno != EINVAL && errno != ENOENT) {
636 			FAIL_ERRNO("map_delete");
637 			break;
638 		}
639 	}
640 
641 	xpthread_join(t, NULL);
642 close:
643 	xclose(s);
644 }
645 
646 static void zero_verdict_count(int mapfd)
647 {
648 	unsigned int zero = 0;
649 	int key;
650 
651 	key = SK_DROP;
652 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
653 	key = SK_PASS;
654 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
655 }
656 
657 enum redir_mode {
658 	REDIR_INGRESS,
659 	REDIR_EGRESS,
660 };
661 
662 static const char *redir_mode_str(enum redir_mode mode)
663 {
664 	switch (mode) {
665 	case REDIR_INGRESS:
666 		return "ingress";
667 	case REDIR_EGRESS:
668 		return "egress";
669 	default:
670 		return "unknown";
671 	}
672 }
673 
674 static void redir_to_connected(int family, int sotype, int sock_mapfd,
675 			       int verd_mapfd, enum redir_mode mode)
676 {
677 	const char *log_prefix = redir_mode_str(mode);
678 	int s, c0, c1, p0, p1;
679 	unsigned int pass;
680 	int err, n;
681 	u32 key;
682 	char b;
683 
684 	zero_verdict_count(verd_mapfd);
685 
686 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
687 	if (s < 0)
688 		return;
689 
690 	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
691 	if (err)
692 		goto close_srv;
693 
694 	err = add_to_sockmap(sock_mapfd, p0, p1);
695 	if (err)
696 		goto close;
697 
698 	n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
699 	if (n < 0)
700 		FAIL_ERRNO("%s: write", log_prefix);
701 	if (n == 0)
702 		FAIL("%s: incomplete write", log_prefix);
703 	if (n < 1)
704 		goto close;
705 
706 	key = SK_PASS;
707 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
708 	if (err)
709 		goto close;
710 	if (pass != 1)
711 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
712 	n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
713 	if (n < 0)
714 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
715 	if (n == 0)
716 		FAIL("%s: incomplete recv", log_prefix);
717 
718 close:
719 	xclose(p1);
720 	xclose(c1);
721 	xclose(p0);
722 	xclose(c0);
723 close_srv:
724 	xclose(s);
725 }
726 
727 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
728 					struct bpf_map *inner_map, int family,
729 					int sotype)
730 {
731 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
732 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
733 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
734 	int sock_map = bpf_map__fd(inner_map);
735 	int err;
736 
737 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
738 	if (err)
739 		return;
740 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
741 	if (err)
742 		goto detach;
743 
744 	redir_to_connected(family, sotype, sock_map, verdict_map,
745 			   REDIR_INGRESS);
746 
747 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
748 detach:
749 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
750 }
751 
752 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
753 					struct bpf_map *inner_map, int family,
754 					int sotype)
755 {
756 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
757 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
758 	int sock_map = bpf_map__fd(inner_map);
759 	int err;
760 
761 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
762 	if (err)
763 		return;
764 
765 	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
766 
767 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
768 }
769 
770 static void redir_to_listening(int family, int sotype, int sock_mapfd,
771 			       int verd_mapfd, enum redir_mode mode)
772 {
773 	const char *log_prefix = redir_mode_str(mode);
774 	struct sockaddr_storage addr;
775 	int s, c, p, err, n;
776 	unsigned int drop;
777 	socklen_t len;
778 	u32 key;
779 
780 	zero_verdict_count(verd_mapfd);
781 
782 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
783 	if (s < 0)
784 		return;
785 
786 	len = sizeof(addr);
787 	err = xgetsockname(s, sockaddr(&addr), &len);
788 	if (err)
789 		goto close_srv;
790 
791 	c = xsocket(family, sotype, 0);
792 	if (c < 0)
793 		goto close_srv;
794 	err = xconnect(c, sockaddr(&addr), len);
795 	if (err)
796 		goto close_cli;
797 
798 	p = xaccept_nonblock(s, NULL, NULL);
799 	if (p < 0)
800 		goto close_cli;
801 
802 	err = add_to_sockmap(sock_mapfd, s, p);
803 	if (err)
804 		goto close_peer;
805 
806 	n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
807 	if (n < 0 && errno != EACCES)
808 		FAIL_ERRNO("%s: write", log_prefix);
809 	if (n == 0)
810 		FAIL("%s: incomplete write", log_prefix);
811 	if (n < 1)
812 		goto close_peer;
813 
814 	key = SK_DROP;
815 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
816 	if (err)
817 		goto close_peer;
818 	if (drop != 1)
819 		FAIL("%s: want drop count 1, have %d", log_prefix, drop);
820 
821 close_peer:
822 	xclose(p);
823 close_cli:
824 	xclose(c);
825 close_srv:
826 	xclose(s);
827 }
828 
829 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
830 					struct bpf_map *inner_map, int family,
831 					int sotype)
832 {
833 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
834 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
835 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
836 	int sock_map = bpf_map__fd(inner_map);
837 	int err;
838 
839 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
840 	if (err)
841 		return;
842 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
843 	if (err)
844 		goto detach;
845 
846 	redir_to_listening(family, sotype, sock_map, verdict_map,
847 			   REDIR_INGRESS);
848 
849 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
850 detach:
851 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
852 }
853 
854 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
855 					struct bpf_map *inner_map, int family,
856 					int sotype)
857 {
858 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
859 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
860 	int sock_map = bpf_map__fd(inner_map);
861 	int err;
862 
863 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
864 	if (err)
865 		return;
866 
867 	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
868 
869 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
870 }
871 
872 static void test_reuseport_select_listening(int family, int sotype,
873 					    int sock_map, int verd_map,
874 					    int reuseport_prog)
875 {
876 	struct sockaddr_storage addr;
877 	unsigned int pass;
878 	int s, c, err;
879 	socklen_t len;
880 	u64 value;
881 	u32 key;
882 
883 	zero_verdict_count(verd_map);
884 
885 	s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
886 				      reuseport_prog);
887 	if (s < 0)
888 		return;
889 
890 	len = sizeof(addr);
891 	err = xgetsockname(s, sockaddr(&addr), &len);
892 	if (err)
893 		goto close_srv;
894 
895 	key = 0;
896 	value = s;
897 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
898 	if (err)
899 		goto close_srv;
900 
901 	c = xsocket(family, sotype, 0);
902 	if (c < 0)
903 		goto close_srv;
904 	err = xconnect(c, sockaddr(&addr), len);
905 	if (err)
906 		goto close_cli;
907 
908 	if (sotype == SOCK_STREAM) {
909 		int p;
910 
911 		p = xaccept_nonblock(s, NULL, NULL);
912 		if (p < 0)
913 			goto close_cli;
914 		xclose(p);
915 	} else {
916 		char b = 'a';
917 		ssize_t n;
918 
919 		n = xsend(c, &b, sizeof(b), 0);
920 		if (n == -1)
921 			goto close_cli;
922 
923 		n = xrecv_nonblock(s, &b, sizeof(b), 0);
924 		if (n == -1)
925 			goto close_cli;
926 	}
927 
928 	key = SK_PASS;
929 	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
930 	if (err)
931 		goto close_cli;
932 	if (pass != 1)
933 		FAIL("want pass count 1, have %d", pass);
934 
935 close_cli:
936 	xclose(c);
937 close_srv:
938 	xclose(s);
939 }
940 
941 static void test_reuseport_select_connected(int family, int sotype,
942 					    int sock_map, int verd_map,
943 					    int reuseport_prog)
944 {
945 	struct sockaddr_storage addr;
946 	int s, c0, c1, p0, err;
947 	unsigned int drop;
948 	socklen_t len;
949 	u64 value;
950 	u32 key;
951 
952 	zero_verdict_count(verd_map);
953 
954 	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
955 	if (s < 0)
956 		return;
957 
958 	/* Populate sock_map[0] to avoid ENOENT on first connection */
959 	key = 0;
960 	value = s;
961 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
962 	if (err)
963 		goto close_srv;
964 
965 	len = sizeof(addr);
966 	err = xgetsockname(s, sockaddr(&addr), &len);
967 	if (err)
968 		goto close_srv;
969 
970 	c0 = xsocket(family, sotype, 0);
971 	if (c0 < 0)
972 		goto close_srv;
973 
974 	err = xconnect(c0, sockaddr(&addr), len);
975 	if (err)
976 		goto close_cli0;
977 
978 	if (sotype == SOCK_STREAM) {
979 		p0 = xaccept_nonblock(s, NULL, NULL);
980 		if (p0 < 0)
981 			goto close_cli0;
982 	} else {
983 		p0 = xsocket(family, sotype, 0);
984 		if (p0 < 0)
985 			goto close_cli0;
986 
987 		len = sizeof(addr);
988 		err = xgetsockname(c0, sockaddr(&addr), &len);
989 		if (err)
990 			goto close_cli0;
991 
992 		err = xconnect(p0, sockaddr(&addr), len);
993 		if (err)
994 			goto close_cli0;
995 	}
996 
997 	/* Update sock_map[0] to redirect to a connected socket */
998 	key = 0;
999 	value = p0;
1000 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1001 	if (err)
1002 		goto close_peer0;
1003 
1004 	c1 = xsocket(family, sotype, 0);
1005 	if (c1 < 0)
1006 		goto close_peer0;
1007 
1008 	len = sizeof(addr);
1009 	err = xgetsockname(s, sockaddr(&addr), &len);
1010 	if (err)
1011 		goto close_srv;
1012 
1013 	errno = 0;
1014 	err = connect(c1, sockaddr(&addr), len);
1015 	if (sotype == SOCK_DGRAM) {
1016 		char b = 'a';
1017 		ssize_t n;
1018 
1019 		n = xsend(c1, &b, sizeof(b), 0);
1020 		if (n == -1)
1021 			goto close_cli1;
1022 
1023 		n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1024 		err = n == -1;
1025 	}
1026 	if (!err || errno != ECONNREFUSED)
1027 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1028 
1029 	key = SK_DROP;
1030 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1031 	if (err)
1032 		goto close_cli1;
1033 	if (drop != 1)
1034 		FAIL("want drop count 1, have %d", drop);
1035 
1036 close_cli1:
1037 	xclose(c1);
1038 close_peer0:
1039 	xclose(p0);
1040 close_cli0:
1041 	xclose(c0);
1042 close_srv:
1043 	xclose(s);
1044 }
1045 
1046 /* Check that redirecting across reuseport groups is not allowed. */
1047 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1048 					int verd_map, int reuseport_prog)
1049 {
1050 	struct sockaddr_storage addr;
1051 	int s1, s2, c, err;
1052 	unsigned int drop;
1053 	socklen_t len;
1054 	u32 key;
1055 
1056 	zero_verdict_count(verd_map);
1057 
1058 	/* Create two listeners, each in its own reuseport group */
1059 	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1060 	if (s1 < 0)
1061 		return;
1062 
1063 	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1064 	if (s2 < 0)
1065 		goto close_srv1;
1066 
1067 	err = add_to_sockmap(sock_map, s1, s2);
1068 	if (err)
1069 		goto close_srv2;
1070 
1071 	/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1072 	len = sizeof(addr);
1073 	err = xgetsockname(s2, sockaddr(&addr), &len);
1074 	if (err)
1075 		goto close_srv2;
1076 
1077 	c = xsocket(family, sotype, 0);
1078 	if (c < 0)
1079 		goto close_srv2;
1080 
1081 	err = connect(c, sockaddr(&addr), len);
1082 	if (sotype == SOCK_DGRAM) {
1083 		char b = 'a';
1084 		ssize_t n;
1085 
1086 		n = xsend(c, &b, sizeof(b), 0);
1087 		if (n == -1)
1088 			goto close_cli;
1089 
1090 		n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1091 		err = n == -1;
1092 	}
1093 	if (!err || errno != ECONNREFUSED) {
1094 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1095 		goto close_cli;
1096 	}
1097 
1098 	/* Expect drop, can't redirect outside of reuseport group */
1099 	key = SK_DROP;
1100 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1101 	if (err)
1102 		goto close_cli;
1103 	if (drop != 1)
1104 		FAIL("want drop count 1, have %d", drop);
1105 
1106 close_cli:
1107 	xclose(c);
1108 close_srv2:
1109 	xclose(s2);
1110 close_srv1:
1111 	xclose(s1);
1112 }
1113 
1114 #define TEST(fn, ...)                                                          \
1115 	{                                                                      \
1116 		fn, #fn, __VA_ARGS__                                           \
1117 	}
1118 
1119 static void test_ops_cleanup(const struct bpf_map *map)
1120 {
1121 	int err, mapfd;
1122 	u32 key;
1123 
1124 	mapfd = bpf_map__fd(map);
1125 
1126 	for (key = 0; key < bpf_map__max_entries(map); key++) {
1127 		err = bpf_map_delete_elem(mapfd, &key);
1128 		if (err && errno != EINVAL && errno != ENOENT)
1129 			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1130 	}
1131 }
1132 
1133 static const char *family_str(sa_family_t family)
1134 {
1135 	switch (family) {
1136 	case AF_INET:
1137 		return "IPv4";
1138 	case AF_INET6:
1139 		return "IPv6";
1140 	case AF_UNIX:
1141 		return "Unix";
1142 	case AF_VSOCK:
1143 		return "VSOCK";
1144 	default:
1145 		return "unknown";
1146 	}
1147 }
1148 
1149 static const char *map_type_str(const struct bpf_map *map)
1150 {
1151 	int type;
1152 
1153 	if (!map)
1154 		return "invalid";
1155 	type = bpf_map__type(map);
1156 
1157 	switch (type) {
1158 	case BPF_MAP_TYPE_SOCKMAP:
1159 		return "sockmap";
1160 	case BPF_MAP_TYPE_SOCKHASH:
1161 		return "sockhash";
1162 	default:
1163 		return "unknown";
1164 	}
1165 }
1166 
1167 static const char *sotype_str(int sotype)
1168 {
1169 	switch (sotype) {
1170 	case SOCK_DGRAM:
1171 		return "UDP";
1172 	case SOCK_STREAM:
1173 		return "TCP";
1174 	default:
1175 		return "unknown";
1176 	}
1177 }
1178 
1179 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1180 		     int family, int sotype)
1181 {
1182 	const struct op_test {
1183 		void (*fn)(struct test_sockmap_listen *skel,
1184 			   int family, int sotype, int mapfd);
1185 		const char *name;
1186 		int sotype;
1187 	} tests[] = {
1188 		/* insert */
1189 		TEST(test_insert_invalid),
1190 		TEST(test_insert_opened),
1191 		TEST(test_insert_bound, SOCK_STREAM),
1192 		TEST(test_insert),
1193 		/* delete */
1194 		TEST(test_delete_after_insert),
1195 		TEST(test_delete_after_close),
1196 		/* lookup */
1197 		TEST(test_lookup_after_insert),
1198 		TEST(test_lookup_after_delete),
1199 		TEST(test_lookup_32_bit_value),
1200 		/* update */
1201 		TEST(test_update_existing),
1202 		/* races with insert/delete */
1203 		TEST(test_destroy_orphan_child, SOCK_STREAM),
1204 		TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1205 		TEST(test_race_insert_listen, SOCK_STREAM),
1206 		/* child clone */
1207 		TEST(test_clone_after_delete, SOCK_STREAM),
1208 		TEST(test_accept_after_delete, SOCK_STREAM),
1209 		TEST(test_accept_before_delete, SOCK_STREAM),
1210 	};
1211 	const char *family_name, *map_name, *sotype_name;
1212 	const struct op_test *t;
1213 	char s[MAX_TEST_NAME];
1214 	int map_fd;
1215 
1216 	family_name = family_str(family);
1217 	map_name = map_type_str(map);
1218 	sotype_name = sotype_str(sotype);
1219 	map_fd = bpf_map__fd(map);
1220 
1221 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1222 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1223 			 sotype_name, t->name);
1224 
1225 		if (t->sotype != 0 && t->sotype != sotype)
1226 			continue;
1227 
1228 		if (!test__start_subtest(s))
1229 			continue;
1230 
1231 		t->fn(skel, family, sotype, map_fd);
1232 		test_ops_cleanup(map);
1233 	}
1234 }
1235 
1236 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1237 		       int family, int sotype)
1238 {
1239 	const struct redir_test {
1240 		void (*fn)(struct test_sockmap_listen *skel,
1241 			   struct bpf_map *map, int family, int sotype);
1242 		const char *name;
1243 	} tests[] = {
1244 		TEST(test_skb_redir_to_connected),
1245 		TEST(test_skb_redir_to_listening),
1246 		TEST(test_msg_redir_to_connected),
1247 		TEST(test_msg_redir_to_listening),
1248 	};
1249 	const char *family_name, *map_name;
1250 	const struct redir_test *t;
1251 	char s[MAX_TEST_NAME];
1252 
1253 	family_name = family_str(family);
1254 	map_name = map_type_str(map);
1255 
1256 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1257 		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1258 			 t->name);
1259 
1260 		if (!test__start_subtest(s))
1261 			continue;
1262 
1263 		t->fn(skel, map, family, sotype);
1264 	}
1265 }
1266 
1267 static void unix_redir_to_connected(int sotype, int sock_mapfd,
1268 			       int verd_mapfd, enum redir_mode mode)
1269 {
1270 	const char *log_prefix = redir_mode_str(mode);
1271 	int c0, c1, p0, p1;
1272 	unsigned int pass;
1273 	int err, n;
1274 	int sfd[2];
1275 	u32 key;
1276 	char b;
1277 
1278 	zero_verdict_count(verd_mapfd);
1279 
1280 	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1281 		return;
1282 	c0 = sfd[0], p0 = sfd[1];
1283 
1284 	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1285 		goto close0;
1286 	c1 = sfd[0], p1 = sfd[1];
1287 
1288 	err = add_to_sockmap(sock_mapfd, p0, p1);
1289 	if (err)
1290 		goto close;
1291 
1292 	n = write(c1, "a", 1);
1293 	if (n < 0)
1294 		FAIL_ERRNO("%s: write", log_prefix);
1295 	if (n == 0)
1296 		FAIL("%s: incomplete write", log_prefix);
1297 	if (n < 1)
1298 		goto close;
1299 
1300 	key = SK_PASS;
1301 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1302 	if (err)
1303 		goto close;
1304 	if (pass != 1)
1305 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1306 
1307 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1308 	if (n < 0)
1309 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1310 	if (n == 0)
1311 		FAIL("%s: incomplete recv", log_prefix);
1312 
1313 close:
1314 	xclose(c1);
1315 	xclose(p1);
1316 close0:
1317 	xclose(c0);
1318 	xclose(p0);
1319 }
1320 
1321 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1322 					struct bpf_map *inner_map, int sotype)
1323 {
1324 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1325 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1326 	int sock_map = bpf_map__fd(inner_map);
1327 	int err;
1328 
1329 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1330 	if (err)
1331 		return;
1332 
1333 	skel->bss->test_ingress = false;
1334 	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1335 	skel->bss->test_ingress = true;
1336 	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1337 
1338 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1339 }
1340 
1341 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1342 			    int sotype)
1343 {
1344 	const char *family_name, *map_name;
1345 	char s[MAX_TEST_NAME];
1346 
1347 	family_name = family_str(AF_UNIX);
1348 	map_name = map_type_str(map);
1349 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1350 	if (!test__start_subtest(s))
1351 		return;
1352 	unix_skb_redir_to_connected(skel, map, sotype);
1353 }
1354 
1355 /* Returns two connected loopback vsock sockets */
1356 static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
1357 {
1358 	struct sockaddr_storage addr;
1359 	socklen_t len = sizeof(addr);
1360 	int s, p, c;
1361 
1362 	s = socket_loopback(AF_VSOCK, sotype);
1363 	if (s < 0)
1364 		return -1;
1365 
1366 	c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
1367 	if (c == -1)
1368 		goto close_srv;
1369 
1370 	if (getsockname(s, sockaddr(&addr), &len) < 0)
1371 		goto close_cli;
1372 
1373 	if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
1374 		FAIL_ERRNO("connect");
1375 		goto close_cli;
1376 	}
1377 
1378 	len = sizeof(addr);
1379 	p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
1380 	if (p < 0)
1381 		goto close_cli;
1382 
1383 	*v0 = p;
1384 	*v1 = c;
1385 
1386 	return 0;
1387 
1388 close_cli:
1389 	close(c);
1390 close_srv:
1391 	close(s);
1392 
1393 	return -1;
1394 }
1395 
1396 static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
1397 					 enum redir_mode mode, int sotype)
1398 {
1399 	const char *log_prefix = redir_mode_str(mode);
1400 	char a = 'a', b = 'b';
1401 	int u0, u1, v0, v1;
1402 	int sfd[2];
1403 	unsigned int pass;
1404 	int err, n;
1405 	u32 key;
1406 
1407 	zero_verdict_count(verd_mapfd);
1408 
1409 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
1410 		return;
1411 
1412 	u0 = sfd[0];
1413 	u1 = sfd[1];
1414 
1415 	err = vsock_socketpair_connectible(sotype, &v0, &v1);
1416 	if (err) {
1417 		FAIL("vsock_socketpair_connectible() failed");
1418 		goto close_uds;
1419 	}
1420 
1421 	err = add_to_sockmap(sock_mapfd, u0, v0);
1422 	if (err) {
1423 		FAIL("add_to_sockmap failed");
1424 		goto close_vsock;
1425 	}
1426 
1427 	n = write(v1, &a, sizeof(a));
1428 	if (n < 0)
1429 		FAIL_ERRNO("%s: write", log_prefix);
1430 	if (n == 0)
1431 		FAIL("%s: incomplete write", log_prefix);
1432 	if (n < 1)
1433 		goto out;
1434 
1435 	n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
1436 	if (n < 0)
1437 		FAIL("%s: recv() err, errno=%d", log_prefix, errno);
1438 	if (n == 0)
1439 		FAIL("%s: incomplete recv", log_prefix);
1440 	if (b != a)
1441 		FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
1442 
1443 	key = SK_PASS;
1444 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1445 	if (err)
1446 		goto out;
1447 	if (pass != 1)
1448 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1449 out:
1450 	key = 0;
1451 	bpf_map_delete_elem(sock_mapfd, &key);
1452 	key = 1;
1453 	bpf_map_delete_elem(sock_mapfd, &key);
1454 
1455 close_vsock:
1456 	close(v0);
1457 	close(v1);
1458 
1459 close_uds:
1460 	close(u0);
1461 	close(u1);
1462 }
1463 
1464 static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
1465 					     struct bpf_map *inner_map,
1466 					     int sotype)
1467 {
1468 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1469 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1470 	int sock_map = bpf_map__fd(inner_map);
1471 	int err;
1472 
1473 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1474 	if (err)
1475 		return;
1476 
1477 	skel->bss->test_ingress = false;
1478 	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
1479 	skel->bss->test_ingress = true;
1480 	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
1481 
1482 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1483 }
1484 
1485 static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
1486 {
1487 	const char *family_name, *map_name;
1488 	char s[MAX_TEST_NAME];
1489 
1490 	family_name = family_str(AF_VSOCK);
1491 	map_name = map_type_str(map);
1492 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1493 	if (!test__start_subtest(s))
1494 		return;
1495 
1496 	vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
1497 	vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
1498 }
1499 
1500 static void test_reuseport(struct test_sockmap_listen *skel,
1501 			   struct bpf_map *map, int family, int sotype)
1502 {
1503 	const struct reuseport_test {
1504 		void (*fn)(int family, int sotype, int socket_map,
1505 			   int verdict_map, int reuseport_prog);
1506 		const char *name;
1507 		int sotype;
1508 	} tests[] = {
1509 		TEST(test_reuseport_select_listening),
1510 		TEST(test_reuseport_select_connected),
1511 		TEST(test_reuseport_mixed_groups),
1512 	};
1513 	int socket_map, verdict_map, reuseport_prog;
1514 	const char *family_name, *map_name, *sotype_name;
1515 	const struct reuseport_test *t;
1516 	char s[MAX_TEST_NAME];
1517 
1518 	family_name = family_str(family);
1519 	map_name = map_type_str(map);
1520 	sotype_name = sotype_str(sotype);
1521 
1522 	socket_map = bpf_map__fd(map);
1523 	verdict_map = bpf_map__fd(skel->maps.verdict_map);
1524 	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1525 
1526 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1527 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1528 			 sotype_name, t->name);
1529 
1530 		if (t->sotype != 0 && t->sotype != sotype)
1531 			continue;
1532 
1533 		if (!test__start_subtest(s))
1534 			continue;
1535 
1536 		t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1537 	}
1538 }
1539 
1540 static int inet_socketpair(int family, int type, int *s, int *c)
1541 {
1542 	struct sockaddr_storage addr;
1543 	socklen_t len;
1544 	int p0, c0;
1545 	int err;
1546 
1547 	p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1548 	if (p0 < 0)
1549 		return p0;
1550 
1551 	len = sizeof(addr);
1552 	err = xgetsockname(p0, sockaddr(&addr), &len);
1553 	if (err)
1554 		goto close_peer0;
1555 
1556 	c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1557 	if (c0 < 0) {
1558 		err = c0;
1559 		goto close_peer0;
1560 	}
1561 	err = xconnect(c0, sockaddr(&addr), len);
1562 	if (err)
1563 		goto close_cli0;
1564 	err = xgetsockname(c0, sockaddr(&addr), &len);
1565 	if (err)
1566 		goto close_cli0;
1567 	err = xconnect(p0, sockaddr(&addr), len);
1568 	if (err)
1569 		goto close_cli0;
1570 
1571 	*s = p0;
1572 	*c = c0;
1573 	return 0;
1574 
1575 close_cli0:
1576 	xclose(c0);
1577 close_peer0:
1578 	xclose(p0);
1579 	return err;
1580 }
1581 
1582 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1583 				   enum redir_mode mode)
1584 {
1585 	const char *log_prefix = redir_mode_str(mode);
1586 	int c0, c1, p0, p1;
1587 	unsigned int pass;
1588 	int err, n;
1589 	u32 key;
1590 	char b;
1591 
1592 	zero_verdict_count(verd_mapfd);
1593 
1594 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1595 	if (err)
1596 		return;
1597 	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1598 	if (err)
1599 		goto close_cli0;
1600 
1601 	err = add_to_sockmap(sock_mapfd, p0, p1);
1602 	if (err)
1603 		goto close_cli1;
1604 
1605 	n = write(c1, "a", 1);
1606 	if (n < 0)
1607 		FAIL_ERRNO("%s: write", log_prefix);
1608 	if (n == 0)
1609 		FAIL("%s: incomplete write", log_prefix);
1610 	if (n < 1)
1611 		goto close_cli1;
1612 
1613 	key = SK_PASS;
1614 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1615 	if (err)
1616 		goto close_cli1;
1617 	if (pass != 1)
1618 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1619 
1620 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1621 	if (n < 0)
1622 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1623 	if (n == 0)
1624 		FAIL("%s: incomplete recv", log_prefix);
1625 
1626 close_cli1:
1627 	xclose(c1);
1628 	xclose(p1);
1629 close_cli0:
1630 	xclose(c0);
1631 	xclose(p0);
1632 }
1633 
1634 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1635 				       struct bpf_map *inner_map, int family)
1636 {
1637 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1638 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1639 	int sock_map = bpf_map__fd(inner_map);
1640 	int err;
1641 
1642 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1643 	if (err)
1644 		return;
1645 
1646 	skel->bss->test_ingress = false;
1647 	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1648 	skel->bss->test_ingress = true;
1649 	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1650 
1651 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1652 }
1653 
1654 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1655 			   int family)
1656 {
1657 	const char *family_name, *map_name;
1658 	char s[MAX_TEST_NAME];
1659 
1660 	family_name = family_str(family);
1661 	map_name = map_type_str(map);
1662 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1663 	if (!test__start_subtest(s))
1664 		return;
1665 	udp_skb_redir_to_connected(skel, map, family);
1666 }
1667 
1668 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1669 					int verd_mapfd, enum redir_mode mode)
1670 {
1671 	const char *log_prefix = redir_mode_str(mode);
1672 	int c0, c1, p0, p1;
1673 	unsigned int pass;
1674 	int err, n;
1675 	int sfd[2];
1676 	u32 key;
1677 	char b;
1678 
1679 	zero_verdict_count(verd_mapfd);
1680 
1681 	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1682 		return;
1683 	c0 = sfd[0], p0 = sfd[1];
1684 
1685 	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1686 	if (err)
1687 		goto close;
1688 
1689 	err = add_to_sockmap(sock_mapfd, p0, p1);
1690 	if (err)
1691 		goto close_cli1;
1692 
1693 	n = write(c1, "a", 1);
1694 	if (n < 0)
1695 		FAIL_ERRNO("%s: write", log_prefix);
1696 	if (n == 0)
1697 		FAIL("%s: incomplete write", log_prefix);
1698 	if (n < 1)
1699 		goto close_cli1;
1700 
1701 	key = SK_PASS;
1702 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1703 	if (err)
1704 		goto close_cli1;
1705 	if (pass != 1)
1706 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1707 
1708 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1709 	if (n < 0)
1710 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1711 	if (n == 0)
1712 		FAIL("%s: incomplete recv", log_prefix);
1713 
1714 close_cli1:
1715 	xclose(c1);
1716 	xclose(p1);
1717 close:
1718 	xclose(c0);
1719 	xclose(p0);
1720 }
1721 
1722 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1723 					    struct bpf_map *inner_map, int family)
1724 {
1725 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1726 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1727 	int sock_map = bpf_map__fd(inner_map);
1728 	int err;
1729 
1730 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1731 	if (err)
1732 		return;
1733 
1734 	skel->bss->test_ingress = false;
1735 	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1736 				    REDIR_EGRESS);
1737 	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1738 				    REDIR_EGRESS);
1739 	skel->bss->test_ingress = true;
1740 	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1741 				    REDIR_INGRESS);
1742 	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1743 				    REDIR_INGRESS);
1744 
1745 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1746 }
1747 
1748 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
1749 					int verd_mapfd, enum redir_mode mode)
1750 {
1751 	const char *log_prefix = redir_mode_str(mode);
1752 	int c0, c1, p0, p1;
1753 	unsigned int pass;
1754 	int err, n;
1755 	int sfd[2];
1756 	u32 key;
1757 	char b;
1758 
1759 	zero_verdict_count(verd_mapfd);
1760 
1761 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1762 	if (err)
1763 		return;
1764 
1765 	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1766 		goto close_cli0;
1767 	c1 = sfd[0], p1 = sfd[1];
1768 
1769 	err = add_to_sockmap(sock_mapfd, p0, p1);
1770 	if (err)
1771 		goto close;
1772 
1773 	n = write(c1, "a", 1);
1774 	if (n < 0)
1775 		FAIL_ERRNO("%s: write", log_prefix);
1776 	if (n == 0)
1777 		FAIL("%s: incomplete write", log_prefix);
1778 	if (n < 1)
1779 		goto close;
1780 
1781 	key = SK_PASS;
1782 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1783 	if (err)
1784 		goto close;
1785 	if (pass != 1)
1786 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1787 
1788 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1789 	if (n < 0)
1790 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1791 	if (n == 0)
1792 		FAIL("%s: incomplete recv", log_prefix);
1793 
1794 close:
1795 	xclose(c1);
1796 	xclose(p1);
1797 close_cli0:
1798 	xclose(c0);
1799 	xclose(p0);
1800 
1801 }
1802 
1803 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1804 					    struct bpf_map *inner_map, int family)
1805 {
1806 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1807 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1808 	int sock_map = bpf_map__fd(inner_map);
1809 	int err;
1810 
1811 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1812 	if (err)
1813 		return;
1814 
1815 	skel->bss->test_ingress = false;
1816 	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1817 				     REDIR_EGRESS);
1818 	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1819 				     REDIR_EGRESS);
1820 	skel->bss->test_ingress = true;
1821 	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1822 				     REDIR_INGRESS);
1823 	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1824 				     REDIR_INGRESS);
1825 
1826 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1827 }
1828 
1829 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1830 				int family)
1831 {
1832 	const char *family_name, *map_name;
1833 	char s[MAX_TEST_NAME];
1834 
1835 	family_name = family_str(family);
1836 	map_name = map_type_str(map);
1837 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1838 	if (!test__start_subtest(s))
1839 		return;
1840 	inet_unix_skb_redir_to_connected(skel, map, family);
1841 	unix_inet_skb_redir_to_connected(skel, map, family);
1842 }
1843 
1844 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1845 		      int family)
1846 {
1847 	test_ops(skel, map, family, SOCK_STREAM);
1848 	test_ops(skel, map, family, SOCK_DGRAM);
1849 	test_redir(skel, map, family, SOCK_STREAM);
1850 	test_reuseport(skel, map, family, SOCK_STREAM);
1851 	test_reuseport(skel, map, family, SOCK_DGRAM);
1852 	test_udp_redir(skel, map, family);
1853 	test_udp_unix_redir(skel, map, family);
1854 }
1855 
1856 void serial_test_sockmap_listen(void)
1857 {
1858 	struct test_sockmap_listen *skel;
1859 
1860 	skel = test_sockmap_listen__open_and_load();
1861 	if (!skel) {
1862 		FAIL("skeleton open/load failed");
1863 		return;
1864 	}
1865 
1866 	skel->bss->test_sockmap = true;
1867 	run_tests(skel, skel->maps.sock_map, AF_INET);
1868 	run_tests(skel, skel->maps.sock_map, AF_INET6);
1869 	test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
1870 	test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
1871 	test_vsock_redir(skel, skel->maps.sock_map);
1872 
1873 	skel->bss->test_sockmap = false;
1874 	run_tests(skel, skel->maps.sock_hash, AF_INET);
1875 	run_tests(skel, skel->maps.sock_hash, AF_INET6);
1876 	test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
1877 	test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
1878 	test_vsock_redir(skel, skel->maps.sock_hash);
1879 
1880 	test_sockmap_listen__destroy(skel);
1881 }
1882