1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
3 /*
4  * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5  * Covers:
6  *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
7  *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8  *  3. BPF reuseport helper - bpf_sk_select_reuseport
9  */
10 
11 #include <linux/compiler.h>
12 #include <errno.h>
13 #include <error.h>
14 #include <limits.h>
15 #include <netinet/in.h>
16 #include <pthread.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <unistd.h>
21 
22 #include <bpf/bpf.h>
23 #include <bpf/libbpf.h>
24 
25 #include "bpf_util.h"
26 #include "test_progs.h"
27 #include "test_sockmap_listen.skel.h"
28 
29 #define IO_TIMEOUT_SEC 30
30 #define MAX_STRERR_LEN 256
31 #define MAX_TEST_NAME 80
32 
33 #define _FAIL(errnum, fmt...)                                                  \
34 	({                                                                     \
35 		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
36 		CHECK_FAIL(true);                                              \
37 	})
38 #define FAIL(fmt...) _FAIL(0, fmt)
39 #define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
40 #define FAIL_LIBBPF(err, msg)                                                  \
41 	({                                                                     \
42 		char __buf[MAX_STRERR_LEN];                                    \
43 		libbpf_strerror((err), __buf, sizeof(__buf));                  \
44 		FAIL("%s: %s", (msg), __buf);                                  \
45 	})
46 
47 /* Wrappers that fail the test on error and report it. */
48 
49 #define xaccept_nonblock(fd, addr, len)                                        \
50 	({                                                                     \
51 		int __ret =                                                    \
52 			accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
53 		if (__ret == -1)                                               \
54 			FAIL_ERRNO("accept");                                  \
55 		__ret;                                                         \
56 	})
57 
58 #define xbind(fd, addr, len)                                                   \
59 	({                                                                     \
60 		int __ret = bind((fd), (addr), (len));                         \
61 		if (__ret == -1)                                               \
62 			FAIL_ERRNO("bind");                                    \
63 		__ret;                                                         \
64 	})
65 
66 #define xclose(fd)                                                             \
67 	({                                                                     \
68 		int __ret = close((fd));                                       \
69 		if (__ret == -1)                                               \
70 			FAIL_ERRNO("close");                                   \
71 		__ret;                                                         \
72 	})
73 
74 #define xconnect(fd, addr, len)                                                \
75 	({                                                                     \
76 		int __ret = connect((fd), (addr), (len));                      \
77 		if (__ret == -1)                                               \
78 			FAIL_ERRNO("connect");                                 \
79 		__ret;                                                         \
80 	})
81 
82 #define xgetsockname(fd, addr, len)                                            \
83 	({                                                                     \
84 		int __ret = getsockname((fd), (addr), (len));                  \
85 		if (__ret == -1)                                               \
86 			FAIL_ERRNO("getsockname");                             \
87 		__ret;                                                         \
88 	})
89 
90 #define xgetsockopt(fd, level, name, val, len)                                 \
91 	({                                                                     \
92 		int __ret = getsockopt((fd), (level), (name), (val), (len));   \
93 		if (__ret == -1)                                               \
94 			FAIL_ERRNO("getsockopt(" #name ")");                   \
95 		__ret;                                                         \
96 	})
97 
98 #define xlisten(fd, backlog)                                                   \
99 	({                                                                     \
100 		int __ret = listen((fd), (backlog));                           \
101 		if (__ret == -1)                                               \
102 			FAIL_ERRNO("listen");                                  \
103 		__ret;                                                         \
104 	})
105 
106 #define xsetsockopt(fd, level, name, val, len)                                 \
107 	({                                                                     \
108 		int __ret = setsockopt((fd), (level), (name), (val), (len));   \
109 		if (__ret == -1)                                               \
110 			FAIL_ERRNO("setsockopt(" #name ")");                   \
111 		__ret;                                                         \
112 	})
113 
114 #define xsend(fd, buf, len, flags)                                             \
115 	({                                                                     \
116 		ssize_t __ret = send((fd), (buf), (len), (flags));             \
117 		if (__ret == -1)                                               \
118 			FAIL_ERRNO("send");                                    \
119 		__ret;                                                         \
120 	})
121 
122 #define xrecv_nonblock(fd, buf, len, flags)                                    \
123 	({                                                                     \
124 		ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
125 					     IO_TIMEOUT_SEC);                  \
126 		if (__ret == -1)                                               \
127 			FAIL_ERRNO("recv");                                    \
128 		__ret;                                                         \
129 	})
130 
131 #define xsocket(family, sotype, flags)                                         \
132 	({                                                                     \
133 		int __ret = socket(family, sotype, flags);                     \
134 		if (__ret == -1)                                               \
135 			FAIL_ERRNO("socket");                                  \
136 		__ret;                                                         \
137 	})
138 
139 #define xbpf_map_delete_elem(fd, key)                                          \
140 	({                                                                     \
141 		int __ret = bpf_map_delete_elem((fd), (key));                  \
142 		if (__ret == -1)                                               \
143 			FAIL_ERRNO("map_delete");                              \
144 		__ret;                                                         \
145 	})
146 
147 #define xbpf_map_lookup_elem(fd, key, val)                                     \
148 	({                                                                     \
149 		int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
150 		if (__ret == -1)                                               \
151 			FAIL_ERRNO("map_lookup");                              \
152 		__ret;                                                         \
153 	})
154 
155 #define xbpf_map_update_elem(fd, key, val, flags)                              \
156 	({                                                                     \
157 		int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
158 		if (__ret == -1)                                               \
159 			FAIL_ERRNO("map_update");                              \
160 		__ret;                                                         \
161 	})
162 
163 #define xbpf_prog_attach(prog, target, type, flags)                            \
164 	({                                                                     \
165 		int __ret =                                                    \
166 			bpf_prog_attach((prog), (target), (type), (flags));    \
167 		if (__ret == -1)                                               \
168 			FAIL_ERRNO("prog_attach(" #type ")");                  \
169 		__ret;                                                         \
170 	})
171 
172 #define xbpf_prog_detach2(prog, target, type)                                  \
173 	({                                                                     \
174 		int __ret = bpf_prog_detach2((prog), (target), (type));        \
175 		if (__ret == -1)                                               \
176 			FAIL_ERRNO("prog_detach2(" #type ")");                 \
177 		__ret;                                                         \
178 	})
179 
180 #define xpthread_create(thread, attr, func, arg)                               \
181 	({                                                                     \
182 		int __ret = pthread_create((thread), (attr), (func), (arg));   \
183 		errno = __ret;                                                 \
184 		if (__ret)                                                     \
185 			FAIL_ERRNO("pthread_create");                          \
186 		__ret;                                                         \
187 	})
188 
189 #define xpthread_join(thread, retval)                                          \
190 	({                                                                     \
191 		int __ret = pthread_join((thread), (retval));                  \
192 		errno = __ret;                                                 \
193 		if (__ret)                                                     \
194 			FAIL_ERRNO("pthread_join");                            \
195 		__ret;                                                         \
196 	})
197 
poll_read(int fd,unsigned int timeout_sec)198 static int poll_read(int fd, unsigned int timeout_sec)
199 {
200 	struct timeval timeout = { .tv_sec = timeout_sec };
201 	fd_set rfds;
202 	int r;
203 
204 	FD_ZERO(&rfds);
205 	FD_SET(fd, &rfds);
206 
207 	r = select(fd + 1, &rfds, NULL, NULL, &timeout);
208 	if (r == 0)
209 		errno = ETIME;
210 
211 	return r == 1 ? 0 : -1;
212 }
213 
accept_timeout(int fd,struct sockaddr * addr,socklen_t * len,unsigned int timeout_sec)214 static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
215 			  unsigned int timeout_sec)
216 {
217 	if (poll_read(fd, timeout_sec))
218 		return -1;
219 
220 	return accept(fd, addr, len);
221 }
222 
recv_timeout(int fd,void * buf,size_t len,int flags,unsigned int timeout_sec)223 static int recv_timeout(int fd, void *buf, size_t len, int flags,
224 			unsigned int timeout_sec)
225 {
226 	if (poll_read(fd, timeout_sec))
227 		return -1;
228 
229 	return recv(fd, buf, len, flags);
230 }
231 
init_addr_loopback4(struct sockaddr_storage * ss,socklen_t * len)232 static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
233 {
234 	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
235 
236 	addr4->sin_family = AF_INET;
237 	addr4->sin_port = 0;
238 	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
239 	*len = sizeof(*addr4);
240 }
241 
init_addr_loopback6(struct sockaddr_storage * ss,socklen_t * len)242 static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
243 {
244 	struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
245 
246 	addr6->sin6_family = AF_INET6;
247 	addr6->sin6_port = 0;
248 	addr6->sin6_addr = in6addr_loopback;
249 	*len = sizeof(*addr6);
250 }
251 
init_addr_loopback(int family,struct sockaddr_storage * ss,socklen_t * len)252 static void init_addr_loopback(int family, struct sockaddr_storage *ss,
253 			       socklen_t *len)
254 {
255 	switch (family) {
256 	case AF_INET:
257 		init_addr_loopback4(ss, len);
258 		return;
259 	case AF_INET6:
260 		init_addr_loopback6(ss, len);
261 		return;
262 	default:
263 		FAIL("unsupported address family %d", family);
264 	}
265 }
266 
sockaddr(struct sockaddr_storage * ss)267 static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
268 {
269 	return (struct sockaddr *)ss;
270 }
271 
enable_reuseport(int s,int progfd)272 static int enable_reuseport(int s, int progfd)
273 {
274 	int err, one = 1;
275 
276 	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
277 	if (err)
278 		return -1;
279 	err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
280 			  sizeof(progfd));
281 	if (err)
282 		return -1;
283 
284 	return 0;
285 }
286 
socket_loopback_reuseport(int family,int sotype,int progfd)287 static int socket_loopback_reuseport(int family, int sotype, int progfd)
288 {
289 	struct sockaddr_storage addr;
290 	socklen_t len;
291 	int err, s;
292 
293 	init_addr_loopback(family, &addr, &len);
294 
295 	s = xsocket(family, sotype, 0);
296 	if (s == -1)
297 		return -1;
298 
299 	if (progfd >= 0)
300 		enable_reuseport(s, progfd);
301 
302 	err = xbind(s, sockaddr(&addr), len);
303 	if (err)
304 		goto close;
305 
306 	if (sotype & SOCK_DGRAM)
307 		return s;
308 
309 	err = xlisten(s, SOMAXCONN);
310 	if (err)
311 		goto close;
312 
313 	return s;
314 close:
315 	xclose(s);
316 	return -1;
317 }
318 
socket_loopback(int family,int sotype)319 static int socket_loopback(int family, int sotype)
320 {
321 	return socket_loopback_reuseport(family, sotype, -1);
322 }
323 
test_insert_invalid(int family,int sotype,int mapfd)324 static void test_insert_invalid(int family, int sotype, int mapfd)
325 {
326 	u32 key = 0;
327 	u64 value;
328 	int err;
329 
330 	value = -1;
331 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
332 	if (!err || errno != EINVAL)
333 		FAIL_ERRNO("map_update: expected EINVAL");
334 
335 	value = INT_MAX;
336 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
337 	if (!err || errno != EBADF)
338 		FAIL_ERRNO("map_update: expected EBADF");
339 }
340 
test_insert_opened(int family,int sotype,int mapfd)341 static void test_insert_opened(int family, int sotype, int mapfd)
342 {
343 	u32 key = 0;
344 	u64 value;
345 	int err, s;
346 
347 	s = xsocket(family, sotype, 0);
348 	if (s == -1)
349 		return;
350 
351 	errno = 0;
352 	value = s;
353 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354 	if (!err || errno != EOPNOTSUPP)
355 		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
356 
357 	xclose(s);
358 }
359 
test_insert_bound(int family,int sotype,int mapfd)360 static void test_insert_bound(int family, int sotype, int mapfd)
361 {
362 	struct sockaddr_storage addr;
363 	socklen_t len;
364 	u32 key = 0;
365 	u64 value;
366 	int err, s;
367 
368 	init_addr_loopback(family, &addr, &len);
369 
370 	s = xsocket(family, sotype, 0);
371 	if (s == -1)
372 		return;
373 
374 	err = xbind(s, sockaddr(&addr), len);
375 	if (err)
376 		goto close;
377 
378 	errno = 0;
379 	value = s;
380 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
381 	if (!err || errno != EOPNOTSUPP)
382 		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
383 close:
384 	xclose(s);
385 }
386 
test_insert(int family,int sotype,int mapfd)387 static void test_insert(int family, int sotype, int mapfd)
388 {
389 	u64 value;
390 	u32 key;
391 	int s;
392 
393 	s = socket_loopback(family, sotype);
394 	if (s < 0)
395 		return;
396 
397 	key = 0;
398 	value = s;
399 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
400 	xclose(s);
401 }
402 
test_delete_after_insert(int family,int sotype,int mapfd)403 static void test_delete_after_insert(int family, int sotype, int mapfd)
404 {
405 	u64 value;
406 	u32 key;
407 	int s;
408 
409 	s = socket_loopback(family, sotype);
410 	if (s < 0)
411 		return;
412 
413 	key = 0;
414 	value = s;
415 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
416 	xbpf_map_delete_elem(mapfd, &key);
417 	xclose(s);
418 }
419 
test_delete_after_close(int family,int sotype,int mapfd)420 static void test_delete_after_close(int family, int sotype, int mapfd)
421 {
422 	int err, s;
423 	u64 value;
424 	u32 key;
425 
426 	s = socket_loopback(family, sotype);
427 	if (s < 0)
428 		return;
429 
430 	key = 0;
431 	value = s;
432 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
433 
434 	xclose(s);
435 
436 	errno = 0;
437 	err = bpf_map_delete_elem(mapfd, &key);
438 	if (!err || (errno != EINVAL && errno != ENOENT))
439 		/* SOCKMAP and SOCKHASH return different error codes */
440 		FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
441 }
442 
test_lookup_after_insert(int family,int sotype,int mapfd)443 static void test_lookup_after_insert(int family, int sotype, int mapfd)
444 {
445 	u64 cookie, value;
446 	socklen_t len;
447 	u32 key;
448 	int s;
449 
450 	s = socket_loopback(family, sotype);
451 	if (s < 0)
452 		return;
453 
454 	key = 0;
455 	value = s;
456 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
457 
458 	len = sizeof(cookie);
459 	xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
460 
461 	xbpf_map_lookup_elem(mapfd, &key, &value);
462 
463 	if (value != cookie) {
464 		FAIL("map_lookup: have %#llx, want %#llx",
465 		     (unsigned long long)value, (unsigned long long)cookie);
466 	}
467 
468 	xclose(s);
469 }
470 
test_lookup_after_delete(int family,int sotype,int mapfd)471 static void test_lookup_after_delete(int family, int sotype, int mapfd)
472 {
473 	int err, s;
474 	u64 value;
475 	u32 key;
476 
477 	s = socket_loopback(family, sotype);
478 	if (s < 0)
479 		return;
480 
481 	key = 0;
482 	value = s;
483 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
484 	xbpf_map_delete_elem(mapfd, &key);
485 
486 	errno = 0;
487 	err = bpf_map_lookup_elem(mapfd, &key, &value);
488 	if (!err || errno != ENOENT)
489 		FAIL_ERRNO("map_lookup: expected ENOENT");
490 
491 	xclose(s);
492 }
493 
test_lookup_32_bit_value(int family,int sotype,int mapfd)494 static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
495 {
496 	u32 key, value32;
497 	int err, s;
498 
499 	s = socket_loopback(family, sotype);
500 	if (s < 0)
501 		return;
502 
503 	mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
504 			       sizeof(value32), 1, 0);
505 	if (mapfd < 0) {
506 		FAIL_ERRNO("map_create");
507 		goto close;
508 	}
509 
510 	key = 0;
511 	value32 = s;
512 	xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
513 
514 	errno = 0;
515 	err = bpf_map_lookup_elem(mapfd, &key, &value32);
516 	if (!err || errno != ENOSPC)
517 		FAIL_ERRNO("map_lookup: expected ENOSPC");
518 
519 	xclose(mapfd);
520 close:
521 	xclose(s);
522 }
523 
test_update_existing(int family,int sotype,int mapfd)524 static void test_update_existing(int family, int sotype, int mapfd)
525 {
526 	int s1, s2;
527 	u64 value;
528 	u32 key;
529 
530 	s1 = socket_loopback(family, sotype);
531 	if (s1 < 0)
532 		return;
533 
534 	s2 = socket_loopback(family, sotype);
535 	if (s2 < 0)
536 		goto close_s1;
537 
538 	key = 0;
539 	value = s1;
540 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
541 
542 	value = s2;
543 	xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
544 	xclose(s2);
545 close_s1:
546 	xclose(s1);
547 }
548 
549 /* Exercise the code path where we destroy child sockets that never
550  * got accept()'ed, aka orphans, when parent socket gets closed.
551  */
test_destroy_orphan_child(int family,int sotype,int mapfd)552 static void test_destroy_orphan_child(int family, int sotype, int mapfd)
553 {
554 	struct sockaddr_storage addr;
555 	socklen_t len;
556 	int err, s, c;
557 	u64 value;
558 	u32 key;
559 
560 	s = socket_loopback(family, sotype);
561 	if (s < 0)
562 		return;
563 
564 	len = sizeof(addr);
565 	err = xgetsockname(s, sockaddr(&addr), &len);
566 	if (err)
567 		goto close_srv;
568 
569 	key = 0;
570 	value = s;
571 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
572 
573 	c = xsocket(family, sotype, 0);
574 	if (c == -1)
575 		goto close_srv;
576 
577 	xconnect(c, sockaddr(&addr), len);
578 	xclose(c);
579 close_srv:
580 	xclose(s);
581 }
582 
583 /* Perform a passive open after removing listening socket from SOCKMAP
584  * to ensure that callbacks get restored properly.
585  */
test_clone_after_delete(int family,int sotype,int mapfd)586 static void test_clone_after_delete(int family, int sotype, int mapfd)
587 {
588 	struct sockaddr_storage addr;
589 	socklen_t len;
590 	int err, s, c;
591 	u64 value;
592 	u32 key;
593 
594 	s = socket_loopback(family, sotype);
595 	if (s < 0)
596 		return;
597 
598 	len = sizeof(addr);
599 	err = xgetsockname(s, sockaddr(&addr), &len);
600 	if (err)
601 		goto close_srv;
602 
603 	key = 0;
604 	value = s;
605 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
606 	xbpf_map_delete_elem(mapfd, &key);
607 
608 	c = xsocket(family, sotype, 0);
609 	if (c < 0)
610 		goto close_srv;
611 
612 	xconnect(c, sockaddr(&addr), len);
613 	xclose(c);
614 close_srv:
615 	xclose(s);
616 }
617 
618 /* Check that child socket that got created while parent was in a
619  * SOCKMAP, but got accept()'ed only after the parent has been removed
620  * from SOCKMAP, gets cloned without parent psock state or callbacks.
621  */
test_accept_after_delete(int family,int sotype,int mapfd)622 static void test_accept_after_delete(int family, int sotype, int mapfd)
623 {
624 	struct sockaddr_storage addr;
625 	const u32 zero = 0;
626 	int err, s, c, p;
627 	socklen_t len;
628 	u64 value;
629 
630 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
631 	if (s == -1)
632 		return;
633 
634 	len = sizeof(addr);
635 	err = xgetsockname(s, sockaddr(&addr), &len);
636 	if (err)
637 		goto close_srv;
638 
639 	value = s;
640 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
641 	if (err)
642 		goto close_srv;
643 
644 	c = xsocket(family, sotype, 0);
645 	if (c == -1)
646 		goto close_srv;
647 
648 	/* Create child while parent is in sockmap */
649 	err = xconnect(c, sockaddr(&addr), len);
650 	if (err)
651 		goto close_cli;
652 
653 	/* Remove parent from sockmap */
654 	err = xbpf_map_delete_elem(mapfd, &zero);
655 	if (err)
656 		goto close_cli;
657 
658 	p = xaccept_nonblock(s, NULL, NULL);
659 	if (p == -1)
660 		goto close_cli;
661 
662 	/* Check that child sk_user_data is not set */
663 	value = p;
664 	xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
665 
666 	xclose(p);
667 close_cli:
668 	xclose(c);
669 close_srv:
670 	xclose(s);
671 }
672 
673 /* Check that child socket that got created and accepted while parent
674  * was in a SOCKMAP is cloned without parent psock state or callbacks.
675  */
test_accept_before_delete(int family,int sotype,int mapfd)676 static void test_accept_before_delete(int family, int sotype, int mapfd)
677 {
678 	struct sockaddr_storage addr;
679 	const u32 zero = 0, one = 1;
680 	int err, s, c, p;
681 	socklen_t len;
682 	u64 value;
683 
684 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
685 	if (s == -1)
686 		return;
687 
688 	len = sizeof(addr);
689 	err = xgetsockname(s, sockaddr(&addr), &len);
690 	if (err)
691 		goto close_srv;
692 
693 	value = s;
694 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
695 	if (err)
696 		goto close_srv;
697 
698 	c = xsocket(family, sotype, 0);
699 	if (c == -1)
700 		goto close_srv;
701 
702 	/* Create & accept child while parent is in sockmap */
703 	err = xconnect(c, sockaddr(&addr), len);
704 	if (err)
705 		goto close_cli;
706 
707 	p = xaccept_nonblock(s, NULL, NULL);
708 	if (p == -1)
709 		goto close_cli;
710 
711 	/* Check that child sk_user_data is not set */
712 	value = p;
713 	xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
714 
715 	xclose(p);
716 close_cli:
717 	xclose(c);
718 close_srv:
719 	xclose(s);
720 }
721 
722 struct connect_accept_ctx {
723 	int sockfd;
724 	unsigned int done;
725 	unsigned int nr_iter;
726 };
727 
is_thread_done(struct connect_accept_ctx * ctx)728 static bool is_thread_done(struct connect_accept_ctx *ctx)
729 {
730 	return READ_ONCE(ctx->done);
731 }
732 
connect_accept_thread(void * arg)733 static void *connect_accept_thread(void *arg)
734 {
735 	struct connect_accept_ctx *ctx = arg;
736 	struct sockaddr_storage addr;
737 	int family, socktype;
738 	socklen_t len;
739 	int err, i, s;
740 
741 	s = ctx->sockfd;
742 
743 	len = sizeof(addr);
744 	err = xgetsockname(s, sockaddr(&addr), &len);
745 	if (err)
746 		goto done;
747 
748 	len = sizeof(family);
749 	err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
750 	if (err)
751 		goto done;
752 
753 	len = sizeof(socktype);
754 	err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
755 	if (err)
756 		goto done;
757 
758 	for (i = 0; i < ctx->nr_iter; i++) {
759 		int c, p;
760 
761 		c = xsocket(family, socktype, 0);
762 		if (c < 0)
763 			break;
764 
765 		err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
766 		if (err) {
767 			xclose(c);
768 			break;
769 		}
770 
771 		p = xaccept_nonblock(s, NULL, NULL);
772 		if (p < 0) {
773 			xclose(c);
774 			break;
775 		}
776 
777 		xclose(p);
778 		xclose(c);
779 	}
780 done:
781 	WRITE_ONCE(ctx->done, 1);
782 	return NULL;
783 }
784 
test_syn_recv_insert_delete(int family,int sotype,int mapfd)785 static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
786 {
787 	struct connect_accept_ctx ctx = { 0 };
788 	struct sockaddr_storage addr;
789 	socklen_t len;
790 	u32 zero = 0;
791 	pthread_t t;
792 	int err, s;
793 	u64 value;
794 
795 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
796 	if (s < 0)
797 		return;
798 
799 	len = sizeof(addr);
800 	err = xgetsockname(s, sockaddr(&addr), &len);
801 	if (err)
802 		goto close;
803 
804 	ctx.sockfd = s;
805 	ctx.nr_iter = 1000;
806 
807 	err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
808 	if (err)
809 		goto close;
810 
811 	value = s;
812 	while (!is_thread_done(&ctx)) {
813 		err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
814 		if (err)
815 			break;
816 
817 		err = xbpf_map_delete_elem(mapfd, &zero);
818 		if (err)
819 			break;
820 	}
821 
822 	xpthread_join(t, NULL);
823 close:
824 	xclose(s);
825 }
826 
listen_thread(void * arg)827 static void *listen_thread(void *arg)
828 {
829 	struct sockaddr unspec = { AF_UNSPEC };
830 	struct connect_accept_ctx *ctx = arg;
831 	int err, i, s;
832 
833 	s = ctx->sockfd;
834 
835 	for (i = 0; i < ctx->nr_iter; i++) {
836 		err = xlisten(s, 1);
837 		if (err)
838 			break;
839 		err = xconnect(s, &unspec, sizeof(unspec));
840 		if (err)
841 			break;
842 	}
843 
844 	WRITE_ONCE(ctx->done, 1);
845 	return NULL;
846 }
847 
test_race_insert_listen(int family,int socktype,int mapfd)848 static void test_race_insert_listen(int family, int socktype, int mapfd)
849 {
850 	struct connect_accept_ctx ctx = { 0 };
851 	const u32 zero = 0;
852 	const int one = 1;
853 	pthread_t t;
854 	int err, s;
855 	u64 value;
856 
857 	s = xsocket(family, socktype, 0);
858 	if (s < 0)
859 		return;
860 
861 	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
862 	if (err)
863 		goto close;
864 
865 	ctx.sockfd = s;
866 	ctx.nr_iter = 10000;
867 
868 	err = pthread_create(&t, NULL, listen_thread, &ctx);
869 	if (err)
870 		goto close;
871 
872 	value = s;
873 	while (!is_thread_done(&ctx)) {
874 		err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
875 		/* Expecting EOPNOTSUPP before listen() */
876 		if (err && errno != EOPNOTSUPP) {
877 			FAIL_ERRNO("map_update");
878 			break;
879 		}
880 
881 		err = bpf_map_delete_elem(mapfd, &zero);
882 		/* Expecting no entry after unhash on connect(AF_UNSPEC) */
883 		if (err && errno != EINVAL && errno != ENOENT) {
884 			FAIL_ERRNO("map_delete");
885 			break;
886 		}
887 	}
888 
889 	xpthread_join(t, NULL);
890 close:
891 	xclose(s);
892 }
893 
zero_verdict_count(int mapfd)894 static void zero_verdict_count(int mapfd)
895 {
896 	unsigned int zero = 0;
897 	int key;
898 
899 	key = SK_DROP;
900 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
901 	key = SK_PASS;
902 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
903 }
904 
905 enum redir_mode {
906 	REDIR_INGRESS,
907 	REDIR_EGRESS,
908 };
909 
redir_mode_str(enum redir_mode mode)910 static const char *redir_mode_str(enum redir_mode mode)
911 {
912 	switch (mode) {
913 	case REDIR_INGRESS:
914 		return "ingress";
915 	case REDIR_EGRESS:
916 		return "egress";
917 	default:
918 		return "unknown";
919 	}
920 }
921 
redir_to_connected(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)922 static void redir_to_connected(int family, int sotype, int sock_mapfd,
923 			       int verd_mapfd, enum redir_mode mode)
924 {
925 	const char *log_prefix = redir_mode_str(mode);
926 	struct sockaddr_storage addr;
927 	int s, c0, c1, p0, p1;
928 	unsigned int pass;
929 	socklen_t len;
930 	int err, n;
931 	u64 value;
932 	u32 key;
933 	char b;
934 
935 	zero_verdict_count(verd_mapfd);
936 
937 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
938 	if (s < 0)
939 		return;
940 
941 	len = sizeof(addr);
942 	err = xgetsockname(s, sockaddr(&addr), &len);
943 	if (err)
944 		goto close_srv;
945 
946 	c0 = xsocket(family, sotype, 0);
947 	if (c0 < 0)
948 		goto close_srv;
949 	err = xconnect(c0, sockaddr(&addr), len);
950 	if (err)
951 		goto close_cli0;
952 
953 	p0 = xaccept_nonblock(s, NULL, NULL);
954 	if (p0 < 0)
955 		goto close_cli0;
956 
957 	c1 = xsocket(family, sotype, 0);
958 	if (c1 < 0)
959 		goto close_peer0;
960 	err = xconnect(c1, sockaddr(&addr), len);
961 	if (err)
962 		goto close_cli1;
963 
964 	p1 = xaccept_nonblock(s, NULL, NULL);
965 	if (p1 < 0)
966 		goto close_cli1;
967 
968 	key = 0;
969 	value = p0;
970 	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
971 	if (err)
972 		goto close_peer1;
973 
974 	key = 1;
975 	value = p1;
976 	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
977 	if (err)
978 		goto close_peer1;
979 
980 	n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
981 	if (n < 0)
982 		FAIL_ERRNO("%s: write", log_prefix);
983 	if (n == 0)
984 		FAIL("%s: incomplete write", log_prefix);
985 	if (n < 1)
986 		goto close_peer1;
987 
988 	key = SK_PASS;
989 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
990 	if (err)
991 		goto close_peer1;
992 	if (pass != 1)
993 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
994 
995 	n = read(c0, &b, 1);
996 	if (n < 0)
997 		FAIL_ERRNO("%s: read", log_prefix);
998 	if (n == 0)
999 		FAIL("%s: incomplete read", log_prefix);
1000 
1001 close_peer1:
1002 	xclose(p1);
1003 close_cli1:
1004 	xclose(c1);
1005 close_peer0:
1006 	xclose(p0);
1007 close_cli0:
1008 	xclose(c0);
1009 close_srv:
1010 	xclose(s);
1011 }
1012 
test_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1013 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
1014 					struct bpf_map *inner_map, int family,
1015 					int sotype)
1016 {
1017 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1018 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1019 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1020 	int sock_map = bpf_map__fd(inner_map);
1021 	int err;
1022 
1023 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1024 	if (err)
1025 		return;
1026 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1027 	if (err)
1028 		goto detach;
1029 
1030 	redir_to_connected(family, sotype, sock_map, verdict_map,
1031 			   REDIR_INGRESS);
1032 
1033 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1034 detach:
1035 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1036 }
1037 
test_msg_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1038 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
1039 					struct bpf_map *inner_map, int family,
1040 					int sotype)
1041 {
1042 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1043 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1044 	int sock_map = bpf_map__fd(inner_map);
1045 	int err;
1046 
1047 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1048 	if (err)
1049 		return;
1050 
1051 	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1052 
1053 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1054 }
1055 
redir_to_listening(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1056 static void redir_to_listening(int family, int sotype, int sock_mapfd,
1057 			       int verd_mapfd, enum redir_mode mode)
1058 {
1059 	const char *log_prefix = redir_mode_str(mode);
1060 	struct sockaddr_storage addr;
1061 	int s, c, p, err, n;
1062 	unsigned int drop;
1063 	socklen_t len;
1064 	u64 value;
1065 	u32 key;
1066 
1067 	zero_verdict_count(verd_mapfd);
1068 
1069 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
1070 	if (s < 0)
1071 		return;
1072 
1073 	len = sizeof(addr);
1074 	err = xgetsockname(s, sockaddr(&addr), &len);
1075 	if (err)
1076 		goto close_srv;
1077 
1078 	c = xsocket(family, sotype, 0);
1079 	if (c < 0)
1080 		goto close_srv;
1081 	err = xconnect(c, sockaddr(&addr), len);
1082 	if (err)
1083 		goto close_cli;
1084 
1085 	p = xaccept_nonblock(s, NULL, NULL);
1086 	if (p < 0)
1087 		goto close_cli;
1088 
1089 	key = 0;
1090 	value = s;
1091 	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1092 	if (err)
1093 		goto close_peer;
1094 
1095 	key = 1;
1096 	value = p;
1097 	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1098 	if (err)
1099 		goto close_peer;
1100 
1101 	n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
1102 	if (n < 0 && errno != EACCES)
1103 		FAIL_ERRNO("%s: write", log_prefix);
1104 	if (n == 0)
1105 		FAIL("%s: incomplete write", log_prefix);
1106 	if (n < 1)
1107 		goto close_peer;
1108 
1109 	key = SK_DROP;
1110 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
1111 	if (err)
1112 		goto close_peer;
1113 	if (drop != 1)
1114 		FAIL("%s: want drop count 1, have %d", log_prefix, drop);
1115 
1116 close_peer:
1117 	xclose(p);
1118 close_cli:
1119 	xclose(c);
1120 close_srv:
1121 	xclose(s);
1122 }
1123 
test_skb_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1124 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
1125 					struct bpf_map *inner_map, int family,
1126 					int sotype)
1127 {
1128 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1129 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1130 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1131 	int sock_map = bpf_map__fd(inner_map);
1132 	int err;
1133 
1134 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1135 	if (err)
1136 		return;
1137 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1138 	if (err)
1139 		goto detach;
1140 
1141 	redir_to_listening(family, sotype, sock_map, verdict_map,
1142 			   REDIR_INGRESS);
1143 
1144 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1145 detach:
1146 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1147 }
1148 
test_msg_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1149 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
1150 					struct bpf_map *inner_map, int family,
1151 					int sotype)
1152 {
1153 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1154 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1155 	int sock_map = bpf_map__fd(inner_map);
1156 	int err;
1157 
1158 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1159 	if (err)
1160 		return;
1161 
1162 	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1163 
1164 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1165 }
1166 
test_reuseport_select_listening(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1167 static void test_reuseport_select_listening(int family, int sotype,
1168 					    int sock_map, int verd_map,
1169 					    int reuseport_prog)
1170 {
1171 	struct sockaddr_storage addr;
1172 	unsigned int pass;
1173 	int s, c, err;
1174 	socklen_t len;
1175 	u64 value;
1176 	u32 key;
1177 
1178 	zero_verdict_count(verd_map);
1179 
1180 	s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
1181 				      reuseport_prog);
1182 	if (s < 0)
1183 		return;
1184 
1185 	len = sizeof(addr);
1186 	err = xgetsockname(s, sockaddr(&addr), &len);
1187 	if (err)
1188 		goto close_srv;
1189 
1190 	key = 0;
1191 	value = s;
1192 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1193 	if (err)
1194 		goto close_srv;
1195 
1196 	c = xsocket(family, sotype, 0);
1197 	if (c < 0)
1198 		goto close_srv;
1199 	err = xconnect(c, sockaddr(&addr), len);
1200 	if (err)
1201 		goto close_cli;
1202 
1203 	if (sotype == SOCK_STREAM) {
1204 		int p;
1205 
1206 		p = xaccept_nonblock(s, NULL, NULL);
1207 		if (p < 0)
1208 			goto close_cli;
1209 		xclose(p);
1210 	} else {
1211 		char b = 'a';
1212 		ssize_t n;
1213 
1214 		n = xsend(c, &b, sizeof(b), 0);
1215 		if (n == -1)
1216 			goto close_cli;
1217 
1218 		n = xrecv_nonblock(s, &b, sizeof(b), 0);
1219 		if (n == -1)
1220 			goto close_cli;
1221 	}
1222 
1223 	key = SK_PASS;
1224 	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1225 	if (err)
1226 		goto close_cli;
1227 	if (pass != 1)
1228 		FAIL("want pass count 1, have %d", pass);
1229 
1230 close_cli:
1231 	xclose(c);
1232 close_srv:
1233 	xclose(s);
1234 }
1235 
test_reuseport_select_connected(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1236 static void test_reuseport_select_connected(int family, int sotype,
1237 					    int sock_map, int verd_map,
1238 					    int reuseport_prog)
1239 {
1240 	struct sockaddr_storage addr;
1241 	int s, c0, c1, p0, err;
1242 	unsigned int drop;
1243 	socklen_t len;
1244 	u64 value;
1245 	u32 key;
1246 
1247 	zero_verdict_count(verd_map);
1248 
1249 	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1250 	if (s < 0)
1251 		return;
1252 
1253 	/* Populate sock_map[0] to avoid ENOENT on first connection */
1254 	key = 0;
1255 	value = s;
1256 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1257 	if (err)
1258 		goto close_srv;
1259 
1260 	len = sizeof(addr);
1261 	err = xgetsockname(s, sockaddr(&addr), &len);
1262 	if (err)
1263 		goto close_srv;
1264 
1265 	c0 = xsocket(family, sotype, 0);
1266 	if (c0 < 0)
1267 		goto close_srv;
1268 
1269 	err = xconnect(c0, sockaddr(&addr), len);
1270 	if (err)
1271 		goto close_cli0;
1272 
1273 	if (sotype == SOCK_STREAM) {
1274 		p0 = xaccept_nonblock(s, NULL, NULL);
1275 		if (p0 < 0)
1276 			goto close_cli0;
1277 	} else {
1278 		p0 = xsocket(family, sotype, 0);
1279 		if (p0 < 0)
1280 			goto close_cli0;
1281 
1282 		len = sizeof(addr);
1283 		err = xgetsockname(c0, sockaddr(&addr), &len);
1284 		if (err)
1285 			goto close_cli0;
1286 
1287 		err = xconnect(p0, sockaddr(&addr), len);
1288 		if (err)
1289 			goto close_cli0;
1290 	}
1291 
1292 	/* Update sock_map[0] to redirect to a connected socket */
1293 	key = 0;
1294 	value = p0;
1295 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1296 	if (err)
1297 		goto close_peer0;
1298 
1299 	c1 = xsocket(family, sotype, 0);
1300 	if (c1 < 0)
1301 		goto close_peer0;
1302 
1303 	len = sizeof(addr);
1304 	err = xgetsockname(s, sockaddr(&addr), &len);
1305 	if (err)
1306 		goto close_srv;
1307 
1308 	errno = 0;
1309 	err = connect(c1, sockaddr(&addr), len);
1310 	if (sotype == SOCK_DGRAM) {
1311 		char b = 'a';
1312 		ssize_t n;
1313 
1314 		n = xsend(c1, &b, sizeof(b), 0);
1315 		if (n == -1)
1316 			goto close_cli1;
1317 
1318 		n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1319 		err = n == -1;
1320 	}
1321 	if (!err || errno != ECONNREFUSED)
1322 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1323 
1324 	key = SK_DROP;
1325 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1326 	if (err)
1327 		goto close_cli1;
1328 	if (drop != 1)
1329 		FAIL("want drop count 1, have %d", drop);
1330 
1331 close_cli1:
1332 	xclose(c1);
1333 close_peer0:
1334 	xclose(p0);
1335 close_cli0:
1336 	xclose(c0);
1337 close_srv:
1338 	xclose(s);
1339 }
1340 
1341 /* Check that redirecting across reuseport groups is not allowed. */
test_reuseport_mixed_groups(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1342 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1343 					int verd_map, int reuseport_prog)
1344 {
1345 	struct sockaddr_storage addr;
1346 	int s1, s2, c, err;
1347 	unsigned int drop;
1348 	socklen_t len;
1349 	u64 value;
1350 	u32 key;
1351 
1352 	zero_verdict_count(verd_map);
1353 
1354 	/* Create two listeners, each in its own reuseport group */
1355 	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1356 	if (s1 < 0)
1357 		return;
1358 
1359 	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1360 	if (s2 < 0)
1361 		goto close_srv1;
1362 
1363 	key = 0;
1364 	value = s1;
1365 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1366 	if (err)
1367 		goto close_srv2;
1368 
1369 	key = 1;
1370 	value = s2;
1371 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1372 
1373 	/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1374 	len = sizeof(addr);
1375 	err = xgetsockname(s2, sockaddr(&addr), &len);
1376 	if (err)
1377 		goto close_srv2;
1378 
1379 	c = xsocket(family, sotype, 0);
1380 	if (c < 0)
1381 		goto close_srv2;
1382 
1383 	err = connect(c, sockaddr(&addr), len);
1384 	if (sotype == SOCK_DGRAM) {
1385 		char b = 'a';
1386 		ssize_t n;
1387 
1388 		n = xsend(c, &b, sizeof(b), 0);
1389 		if (n == -1)
1390 			goto close_cli;
1391 
1392 		n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1393 		err = n == -1;
1394 	}
1395 	if (!err || errno != ECONNREFUSED) {
1396 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1397 		goto close_cli;
1398 	}
1399 
1400 	/* Expect drop, can't redirect outside of reuseport group */
1401 	key = SK_DROP;
1402 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1403 	if (err)
1404 		goto close_cli;
1405 	if (drop != 1)
1406 		FAIL("want drop count 1, have %d", drop);
1407 
1408 close_cli:
1409 	xclose(c);
1410 close_srv2:
1411 	xclose(s2);
1412 close_srv1:
1413 	xclose(s1);
1414 }
1415 
1416 #define TEST(fn, ...)                                                          \
1417 	{                                                                      \
1418 		fn, #fn, __VA_ARGS__                                           \
1419 	}
1420 
test_ops_cleanup(const struct bpf_map * map)1421 static void test_ops_cleanup(const struct bpf_map *map)
1422 {
1423 	const struct bpf_map_def *def;
1424 	int err, mapfd;
1425 	u32 key;
1426 
1427 	def = bpf_map__def(map);
1428 	mapfd = bpf_map__fd(map);
1429 
1430 	for (key = 0; key < def->max_entries; key++) {
1431 		err = bpf_map_delete_elem(mapfd, &key);
1432 		if (err && errno != EINVAL && errno != ENOENT)
1433 			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1434 	}
1435 }
1436 
family_str(sa_family_t family)1437 static const char *family_str(sa_family_t family)
1438 {
1439 	switch (family) {
1440 	case AF_INET:
1441 		return "IPv4";
1442 	case AF_INET6:
1443 		return "IPv6";
1444 	default:
1445 		return "unknown";
1446 	}
1447 }
1448 
map_type_str(const struct bpf_map * map)1449 static const char *map_type_str(const struct bpf_map *map)
1450 {
1451 	const struct bpf_map_def *def;
1452 
1453 	def = bpf_map__def(map);
1454 	if (IS_ERR(def))
1455 		return "invalid";
1456 
1457 	switch (def->type) {
1458 	case BPF_MAP_TYPE_SOCKMAP:
1459 		return "sockmap";
1460 	case BPF_MAP_TYPE_SOCKHASH:
1461 		return "sockhash";
1462 	default:
1463 		return "unknown";
1464 	}
1465 }
1466 
sotype_str(int sotype)1467 static const char *sotype_str(int sotype)
1468 {
1469 	switch (sotype) {
1470 	case SOCK_DGRAM:
1471 		return "UDP";
1472 	case SOCK_STREAM:
1473 		return "TCP";
1474 	default:
1475 		return "unknown";
1476 	}
1477 }
1478 
test_ops(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1479 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1480 		     int family, int sotype)
1481 {
1482 	const struct op_test {
1483 		void (*fn)(int family, int sotype, int mapfd);
1484 		const char *name;
1485 		int sotype;
1486 	} tests[] = {
1487 		/* insert */
1488 		TEST(test_insert_invalid),
1489 		TEST(test_insert_opened),
1490 		TEST(test_insert_bound, SOCK_STREAM),
1491 		TEST(test_insert),
1492 		/* delete */
1493 		TEST(test_delete_after_insert),
1494 		TEST(test_delete_after_close),
1495 		/* lookup */
1496 		TEST(test_lookup_after_insert),
1497 		TEST(test_lookup_after_delete),
1498 		TEST(test_lookup_32_bit_value),
1499 		/* update */
1500 		TEST(test_update_existing),
1501 		/* races with insert/delete */
1502 		TEST(test_destroy_orphan_child, SOCK_STREAM),
1503 		TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1504 		TEST(test_race_insert_listen, SOCK_STREAM),
1505 		/* child clone */
1506 		TEST(test_clone_after_delete, SOCK_STREAM),
1507 		TEST(test_accept_after_delete, SOCK_STREAM),
1508 		TEST(test_accept_before_delete, SOCK_STREAM),
1509 	};
1510 	const char *family_name, *map_name, *sotype_name;
1511 	const struct op_test *t;
1512 	char s[MAX_TEST_NAME];
1513 	int map_fd;
1514 
1515 	family_name = family_str(family);
1516 	map_name = map_type_str(map);
1517 	sotype_name = sotype_str(sotype);
1518 	map_fd = bpf_map__fd(map);
1519 
1520 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1521 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1522 			 sotype_name, t->name);
1523 
1524 		if (t->sotype != 0 && t->sotype != sotype)
1525 			continue;
1526 
1527 		if (!test__start_subtest(s))
1528 			continue;
1529 
1530 		t->fn(family, sotype, map_fd);
1531 		test_ops_cleanup(map);
1532 	}
1533 }
1534 
test_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1535 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1536 		       int family, int sotype)
1537 {
1538 	const struct redir_test {
1539 		void (*fn)(struct test_sockmap_listen *skel,
1540 			   struct bpf_map *map, int family, int sotype);
1541 		const char *name;
1542 	} tests[] = {
1543 		TEST(test_skb_redir_to_connected),
1544 		TEST(test_skb_redir_to_listening),
1545 		TEST(test_msg_redir_to_connected),
1546 		TEST(test_msg_redir_to_listening),
1547 	};
1548 	const char *family_name, *map_name;
1549 	const struct redir_test *t;
1550 	char s[MAX_TEST_NAME];
1551 
1552 	family_name = family_str(family);
1553 	map_name = map_type_str(map);
1554 
1555 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1556 		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1557 			 t->name);
1558 
1559 		if (!test__start_subtest(s))
1560 			continue;
1561 
1562 		t->fn(skel, map, family, sotype);
1563 	}
1564 }
1565 
test_reuseport(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1566 static void test_reuseport(struct test_sockmap_listen *skel,
1567 			   struct bpf_map *map, int family, int sotype)
1568 {
1569 	const struct reuseport_test {
1570 		void (*fn)(int family, int sotype, int socket_map,
1571 			   int verdict_map, int reuseport_prog);
1572 		const char *name;
1573 		int sotype;
1574 	} tests[] = {
1575 		TEST(test_reuseport_select_listening),
1576 		TEST(test_reuseport_select_connected),
1577 		TEST(test_reuseport_mixed_groups),
1578 	};
1579 	int socket_map, verdict_map, reuseport_prog;
1580 	const char *family_name, *map_name, *sotype_name;
1581 	const struct reuseport_test *t;
1582 	char s[MAX_TEST_NAME];
1583 
1584 	family_name = family_str(family);
1585 	map_name = map_type_str(map);
1586 	sotype_name = sotype_str(sotype);
1587 
1588 	socket_map = bpf_map__fd(map);
1589 	verdict_map = bpf_map__fd(skel->maps.verdict_map);
1590 	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1591 
1592 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1593 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1594 			 sotype_name, t->name);
1595 
1596 		if (t->sotype != 0 && t->sotype != sotype)
1597 			continue;
1598 
1599 		if (!test__start_subtest(s))
1600 			continue;
1601 
1602 		t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1603 	}
1604 }
1605 
udp_redir_to_connected(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1606 static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
1607 				   int verd_mapfd, enum redir_mode mode)
1608 {
1609 	const char *log_prefix = redir_mode_str(mode);
1610 	struct sockaddr_storage addr;
1611 	int c0, c1, p0, p1;
1612 	unsigned int pass;
1613 	socklen_t len;
1614 	int err, n;
1615 	u64 value;
1616 	u32 key;
1617 	char b;
1618 
1619 	zero_verdict_count(verd_mapfd);
1620 
1621 	p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
1622 	if (p0 < 0)
1623 		return;
1624 	len = sizeof(addr);
1625 	err = xgetsockname(p0, sockaddr(&addr), &len);
1626 	if (err)
1627 		goto close_peer0;
1628 
1629 	c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
1630 	if (c0 < 0)
1631 		goto close_peer0;
1632 	err = xconnect(c0, sockaddr(&addr), len);
1633 	if (err)
1634 		goto close_cli0;
1635 	err = xgetsockname(c0, sockaddr(&addr), &len);
1636 	if (err)
1637 		goto close_cli0;
1638 	err = xconnect(p0, sockaddr(&addr), len);
1639 	if (err)
1640 		goto close_cli0;
1641 
1642 	p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
1643 	if (p1 < 0)
1644 		goto close_cli0;
1645 	err = xgetsockname(p1, sockaddr(&addr), &len);
1646 	if (err)
1647 		goto close_cli0;
1648 
1649 	c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
1650 	if (c1 < 0)
1651 		goto close_peer1;
1652 	err = xconnect(c1, sockaddr(&addr), len);
1653 	if (err)
1654 		goto close_cli1;
1655 	err = xgetsockname(c1, sockaddr(&addr), &len);
1656 	if (err)
1657 		goto close_cli1;
1658 	err = xconnect(p1, sockaddr(&addr), len);
1659 	if (err)
1660 		goto close_cli1;
1661 
1662 	key = 0;
1663 	value = p0;
1664 	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1665 	if (err)
1666 		goto close_cli1;
1667 
1668 	key = 1;
1669 	value = p1;
1670 	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1671 	if (err)
1672 		goto close_cli1;
1673 
1674 	n = write(c1, "a", 1);
1675 	if (n < 0)
1676 		FAIL_ERRNO("%s: write", log_prefix);
1677 	if (n == 0)
1678 		FAIL("%s: incomplete write", log_prefix);
1679 	if (n < 1)
1680 		goto close_cli1;
1681 
1682 	key = SK_PASS;
1683 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1684 	if (err)
1685 		goto close_cli1;
1686 	if (pass != 1)
1687 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1688 
1689 	n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
1690 	if (n < 0)
1691 		FAIL_ERRNO("%s: read", log_prefix);
1692 	if (n == 0)
1693 		FAIL("%s: incomplete read", log_prefix);
1694 
1695 close_cli1:
1696 	xclose(c1);
1697 close_peer1:
1698 	xclose(p1);
1699 close_cli0:
1700 	xclose(c0);
1701 close_peer0:
1702 	xclose(p0);
1703 }
1704 
udp_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1705 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1706 				       struct bpf_map *inner_map, int family)
1707 {
1708 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1709 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1710 	int sock_map = bpf_map__fd(inner_map);
1711 	int err;
1712 
1713 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1714 	if (err)
1715 		return;
1716 
1717 	skel->bss->test_ingress = false;
1718 	udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1719 			       REDIR_EGRESS);
1720 	skel->bss->test_ingress = true;
1721 	udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1722 			       REDIR_INGRESS);
1723 
1724 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1725 }
1726 
test_udp_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1727 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1728 			   int family)
1729 {
1730 	const char *family_name, *map_name;
1731 	char s[MAX_TEST_NAME];
1732 
1733 	family_name = family_str(family);
1734 	map_name = map_type_str(map);
1735 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1736 	if (!test__start_subtest(s))
1737 		return;
1738 	udp_skb_redir_to_connected(skel, map, family);
1739 }
1740 
run_tests(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1741 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1742 		      int family)
1743 {
1744 	test_ops(skel, map, family, SOCK_STREAM);
1745 	test_ops(skel, map, family, SOCK_DGRAM);
1746 	test_redir(skel, map, family, SOCK_STREAM);
1747 	test_reuseport(skel, map, family, SOCK_STREAM);
1748 	test_reuseport(skel, map, family, SOCK_DGRAM);
1749 	test_udp_redir(skel, map, family);
1750 }
1751 
test_sockmap_listen(void)1752 void test_sockmap_listen(void)
1753 {
1754 	struct test_sockmap_listen *skel;
1755 
1756 	skel = test_sockmap_listen__open_and_load();
1757 	if (!skel) {
1758 		FAIL("skeleton open/load failed");
1759 		return;
1760 	}
1761 
1762 	skel->bss->test_sockmap = true;
1763 	run_tests(skel, skel->maps.sock_map, AF_INET);
1764 	run_tests(skel, skel->maps.sock_map, AF_INET6);
1765 
1766 	skel->bss->test_sockmap = false;
1767 	run_tests(skel, skel->maps.sock_hash, AF_INET);
1768 	run_tests(skel, skel->maps.sock_hash, AF_INET6);
1769 
1770 	test_sockmap_listen__destroy(skel);
1771 }
1772