1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2020 Intel Corporation. */
3 
4 /*
5  * Some functions in this program are taken from
6  * Linux kernel samples/bpf/xdpsock* and modified
7  * for use.
8  *
9  * See test_xsk.sh for detailed information on test topology
10  * and prerequisite network setup.
11  *
12  * This test program contains two threads, each thread is single socket with
13  * a unique UMEM. It validates in-order packet delivery and packet content
14  * by sending packets to each other.
15  *
16  * Tests Information:
17  * ------------------
18  * These selftests test AF_XDP SKB and Native/DRV modes using veth
19  * Virtual Ethernet interfaces.
20  *
21  * For each mode, the following tests are run:
22  *    a. nopoll - soft-irq processing
23  *    b. poll - using poll() syscall
24  *    c. Socket Teardown
25  *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
26  *       both sockets, then repeat multiple times. Only nopoll mode is used
27  *    d. Bi-directional sockets
28  *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
29  *       completion rings on each socket, tx/rx in both directions. Only nopoll
30  *       mode is used
31  *    e. Statistics
32  *       Trigger some error conditions and ensure that the appropriate statistics
33  *       are incremented. Within this test, the following statistics are tested:
34  *       i.   rx dropped
35  *            Increase the UMEM frame headroom to a value which results in
36  *            insufficient space in the rx buffer for both the packet and the headroom.
37  *       ii.  tx invalid
38  *            Set the 'len' field of tx descriptors to an invalid value (umem frame
39  *            size + 1).
40  *       iii. rx ring full
41  *            Reduce the size of the RX ring to a fraction of the fill ring size.
42  *       iv.  fill queue empty
43  *            Do not populate the fill queue and then try to receive pkts.
44  *    f. bpf_link resource persistence
45  *       Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
46  *       then remove xsk sockets from queue 0 on both veth interfaces and
47  *       finally run a traffic on queues ids 1
48  *
49  * Total tests: 12
50  *
51  * Flow:
52  * -----
53  * - Single process spawns two threads: Tx and Rx
54  * - Each of these two threads attach to a veth interface within their assigned
55  *   namespaces
56  * - Each thread Creates one AF_XDP socket connected to a unique umem for each
57  *   veth interface
58  * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
59  * - Rx thread verifies if all 10k packets were received and delivered in-order,
60  *   and have the right content
61  *
62  * Enable/disable packet dump mode:
63  * --------------------------
64  * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
65  * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
66  */
67 
68 #define _GNU_SOURCE
69 #include <fcntl.h>
70 #include <errno.h>
71 #include <getopt.h>
72 #include <asm/barrier.h>
73 typedef __u16 __sum16;
74 #include <linux/if_link.h>
75 #include <linux/if_ether.h>
76 #include <linux/ip.h>
77 #include <linux/udp.h>
78 #include <arpa/inet.h>
79 #include <net/if.h>
80 #include <locale.h>
81 #include <poll.h>
82 #include <pthread.h>
83 #include <signal.h>
84 #include <stdbool.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <stddef.h>
89 #include <sys/mman.h>
90 #include <sys/resource.h>
91 #include <sys/types.h>
92 #include <sys/queue.h>
93 #include <time.h>
94 #include <unistd.h>
95 #include <stdatomic.h>
96 #include <bpf/xsk.h>
97 #include "xdpxceiver.h"
98 #include "../kselftest.h"
99 
100 static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
101 static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
102 static const char *IP1 = "192.168.100.162";
103 static const char *IP2 = "192.168.100.161";
104 static const u16 UDP_PORT1 = 2020;
105 static const u16 UDP_PORT2 = 2121;
106 
__exit_with_error(int error,const char * file,const char * func,int line)107 static void __exit_with_error(int error, const char *file, const char *func, int line)
108 {
109 	if (configured_mode == TEST_MODE_UNCONFIGURED) {
110 		ksft_exit_fail_msg
111 		("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
112 	} else {
113 		ksft_test_result_fail
114 		("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
115 		ksft_exit_xfail();
116 	}
117 }
118 
119 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
120 
121 #define print_ksft_result(void)\
122 	(ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
123 			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
124 			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
125 			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
126 			       test_type == TEST_TYPE_STATS ? "Stats" : "",\
127 			       test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
128 
memset32_htonl(void * dest,u32 val,u32 size)129 static void *memset32_htonl(void *dest, u32 val, u32 size)
130 {
131 	u32 *ptr = (u32 *)dest;
132 	int i;
133 
134 	val = htonl(val);
135 
136 	for (i = 0; i < (size & (~0x3)); i += 4)
137 		ptr[i >> 2] = val;
138 
139 	for (; i < size; i++)
140 		((char *)dest)[i] = ((char *)&val)[i & 3];
141 
142 	return dest;
143 }
144 
145 /*
146  * Fold a partial checksum
147  * This function code has been taken from
148  * Linux kernel include/asm-generic/checksum.h
149  */
csum_fold(__u32 csum)150 static __u16 csum_fold(__u32 csum)
151 {
152 	u32 sum = (__force u32)csum;
153 
154 	sum = (sum & 0xffff) + (sum >> 16);
155 	sum = (sum & 0xffff) + (sum >> 16);
156 	return (__force __u16)~sum;
157 }
158 
159 /*
160  * This function code has been taken from
161  * Linux kernel lib/checksum.c
162  */
from64to32(u64 x)163 static u32 from64to32(u64 x)
164 {
165 	/* add up 32-bit and 32-bit for 32+c bit */
166 	x = (x & 0xffffffff) + (x >> 32);
167 	/* add up carry.. */
168 	x = (x & 0xffffffff) + (x >> 32);
169 	return (u32)x;
170 }
171 
172 /*
173  * This function code has been taken from
174  * Linux kernel lib/checksum.c
175  */
csum_tcpudp_nofold(__be32 saddr,__be32 daddr,__u32 len,__u8 proto,__u32 sum)176 static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
177 {
178 	unsigned long long s = (__force u32)sum;
179 
180 	s += (__force u32)saddr;
181 	s += (__force u32)daddr;
182 #ifdef __BIG_ENDIAN__
183 	s += proto + len;
184 #else
185 	s += (proto + len) << 8;
186 #endif
187 	return (__force __u32)from64to32(s);
188 }
189 
190 /*
191  * This function has been taken from
192  * Linux kernel include/asm-generic/checksum.h
193  */
csum_tcpudp_magic(__be32 saddr,__be32 daddr,__u32 len,__u8 proto,__u32 sum)194 static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
195 {
196 	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
197 }
198 
udp_csum(u32 saddr,u32 daddr,u32 len,u8 proto,u16 * udp_pkt)199 static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
200 {
201 	u32 csum = 0;
202 	u32 cnt = 0;
203 
204 	/* udp hdr and data */
205 	for (; cnt < len; cnt += 2)
206 		csum += udp_pkt[cnt >> 1];
207 
208 	return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
209 }
210 
gen_eth_hdr(struct ifobject * ifobject,struct ethhdr * eth_hdr)211 static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
212 {
213 	memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
214 	memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
215 	eth_hdr->h_proto = htons(ETH_P_IP);
216 }
217 
gen_ip_hdr(struct ifobject * ifobject,struct iphdr * ip_hdr)218 static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
219 {
220 	ip_hdr->version = IP_PKT_VER;
221 	ip_hdr->ihl = 0x5;
222 	ip_hdr->tos = IP_PKT_TOS;
223 	ip_hdr->tot_len = htons(IP_PKT_SIZE);
224 	ip_hdr->id = 0;
225 	ip_hdr->frag_off = 0;
226 	ip_hdr->ttl = IPDEFTTL;
227 	ip_hdr->protocol = IPPROTO_UDP;
228 	ip_hdr->saddr = ifobject->src_ip;
229 	ip_hdr->daddr = ifobject->dst_ip;
230 	ip_hdr->check = 0;
231 }
232 
gen_udp_hdr(struct generic_data * data,struct ifobject * ifobject,struct udphdr * udp_hdr)233 static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject,
234 			struct udphdr *udp_hdr)
235 {
236 	udp_hdr->source = htons(ifobject->src_port);
237 	udp_hdr->dest = htons(ifobject->dst_port);
238 	udp_hdr->len = htons(UDP_PKT_SIZE);
239 	memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE);
240 }
241 
gen_udp_csum(struct udphdr * udp_hdr,struct iphdr * ip_hdr)242 static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
243 {
244 	udp_hdr->check = 0;
245 	udp_hdr->check =
246 	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
247 }
248 
gen_eth_frame(struct xsk_umem_info * umem,u64 addr)249 static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
250 {
251 	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
252 }
253 
xsk_configure_umem(struct ifobject * data,void * buffer,int idx)254 static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
255 {
256 	struct xsk_umem_config cfg = {
257 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
258 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
259 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
260 		.frame_headroom = frame_headroom,
261 		.flags = XSK_UMEM__DEFAULT_FLAGS
262 	};
263 	int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
264 	struct xsk_umem_info *umem;
265 	int ret;
266 
267 	umem = calloc(1, sizeof(struct xsk_umem_info));
268 	if (!umem)
269 		exit_with_error(errno);
270 
271 	ret = xsk_umem__create(&umem->umem, buffer, size,
272 			       &umem->fq, &umem->cq, &cfg);
273 	if (ret)
274 		exit_with_error(ret);
275 
276 	umem->buffer = buffer;
277 
278 	data->umem_arr[idx] = umem;
279 }
280 
xsk_populate_fill_ring(struct xsk_umem_info * umem)281 static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
282 {
283 	int ret, i;
284 	u32 idx = 0;
285 
286 	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
287 	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
288 		exit_with_error(ret);
289 	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
290 		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
291 	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
292 }
293 
xsk_configure_socket(struct ifobject * ifobject,int idx)294 static int xsk_configure_socket(struct ifobject *ifobject, int idx)
295 {
296 	struct xsk_socket_config cfg;
297 	struct xsk_socket_info *xsk;
298 	struct xsk_ring_cons *rxr;
299 	struct xsk_ring_prod *txr;
300 	int ret;
301 
302 	xsk = calloc(1, sizeof(struct xsk_socket_info));
303 	if (!xsk)
304 		exit_with_error(errno);
305 
306 	xsk->umem = ifobject->umem;
307 	cfg.rx_size = rxqsize;
308 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
309 	cfg.libbpf_flags = 0;
310 	cfg.xdp_flags = xdp_flags;
311 	cfg.bind_flags = xdp_bind_flags;
312 
313 	if (test_type != TEST_TYPE_BIDI) {
314 		rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
315 		txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
316 	} else {
317 		rxr = &xsk->rx;
318 		txr = &xsk->tx;
319 	}
320 
321 	ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
322 				 ifobject->umem->umem, rxr, txr, &cfg);
323 	if (ret)
324 		return 1;
325 
326 	ifobject->xsk_arr[idx] = xsk;
327 
328 	return 0;
329 }
330 
331 static struct option long_options[] = {
332 	{"interface", required_argument, 0, 'i'},
333 	{"queue", optional_argument, 0, 'q'},
334 	{"dump-pkts", optional_argument, 0, 'D'},
335 	{"verbose", no_argument, 0, 'v'},
336 	{"tx-pkt-count", optional_argument, 0, 'C'},
337 	{0, 0, 0, 0}
338 };
339 
usage(const char * prog)340 static void usage(const char *prog)
341 {
342 	const char *str =
343 	    "  Usage: %s [OPTIONS]\n"
344 	    "  Options:\n"
345 	    "  -i, --interface      Use interface\n"
346 	    "  -q, --queue=n        Use queue n (default 0)\n"
347 	    "  -D, --dump-pkts      Dump packets L2 - L5\n"
348 	    "  -v, --verbose        Verbose output\n"
349 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
350 	ksft_print_msg(str, prog);
351 }
352 
switch_namespace(const char * nsname)353 static int switch_namespace(const char *nsname)
354 {
355 	char fqns[26] = "/var/run/netns/";
356 	int nsfd;
357 
358 	if (!nsname || strlen(nsname) == 0)
359 		return -1;
360 
361 	strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
362 	nsfd = open(fqns, O_RDONLY);
363 
364 	if (nsfd == -1)
365 		exit_with_error(errno);
366 
367 	if (setns(nsfd, 0) == -1)
368 		exit_with_error(errno);
369 
370 	print_verbose("NS switched: %s\n", nsname);
371 
372 	return nsfd;
373 }
374 
validate_interfaces(void)375 static int validate_interfaces(void)
376 {
377 	bool ret = true;
378 
379 	for (int i = 0; i < MAX_INTERFACES; i++) {
380 		if (!strcmp(ifdict[i]->ifname, "")) {
381 			ret = false;
382 			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
383 		}
384 	}
385 	return ret;
386 }
387 
parse_command_line(int argc,char ** argv)388 static void parse_command_line(int argc, char **argv)
389 {
390 	int option_index, interface_index = 0, c;
391 
392 	opterr = 0;
393 
394 	for (;;) {
395 		c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
396 
397 		if (c == -1)
398 			break;
399 
400 		switch (c) {
401 		case 'i':
402 			if (interface_index == MAX_INTERFACES)
403 				break;
404 			char *sptr, *token;
405 
406 			sptr = strndupa(optarg, strlen(optarg));
407 			memcpy(ifdict[interface_index]->ifname,
408 			       strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
409 			token = strsep(&sptr, ",");
410 			if (token)
411 				memcpy(ifdict[interface_index]->nsname, token,
412 				       MAX_INTERFACES_NAMESPACE_CHARS);
413 			interface_index++;
414 			break;
415 		case 'D':
416 			debug_pkt_dump = 1;
417 			break;
418 		case 'C':
419 			opt_pkt_count = atoi(optarg);
420 			break;
421 		case 'v':
422 			opt_verbose = 1;
423 			break;
424 		default:
425 			usage(basename(argv[0]));
426 			ksft_exit_xfail();
427 		}
428 	}
429 
430 	if (!opt_pkt_count) {
431 		print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
432 		opt_pkt_count = DEFAULT_PKT_CNT;
433 	}
434 
435 	if (!validate_interfaces()) {
436 		usage(basename(argv[0]));
437 		ksft_exit_xfail();
438 	}
439 }
440 
kick_tx(struct xsk_socket_info * xsk)441 static void kick_tx(struct xsk_socket_info *xsk)
442 {
443 	int ret;
444 
445 	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
446 	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
447 		return;
448 	exit_with_error(errno);
449 }
450 
complete_tx_only(struct xsk_socket_info * xsk,int batch_size)451 static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
452 {
453 	unsigned int rcvd;
454 	u32 idx;
455 
456 	if (!xsk->outstanding_tx)
457 		return;
458 
459 	if (xsk_ring_prod__needs_wakeup(&xsk->tx))
460 		kick_tx(xsk);
461 
462 	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
463 	if (rcvd) {
464 		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
465 		xsk->outstanding_tx -= rcvd;
466 		xsk->tx_npkts += rcvd;
467 	}
468 }
469 
rx_pkt(struct xsk_socket_info * xsk,struct pollfd * fds)470 static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
471 {
472 	unsigned int rcvd, i;
473 	u32 idx_rx = 0, idx_fq = 0;
474 	int ret;
475 
476 	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
477 	if (!rcvd) {
478 		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
479 			ret = poll(fds, 1, POLL_TMOUT);
480 			if (ret < 0)
481 				exit_with_error(ret);
482 		}
483 		return;
484 	}
485 
486 	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
487 	while (ret != rcvd) {
488 		if (ret < 0)
489 			exit_with_error(ret);
490 		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
491 			ret = poll(fds, 1, POLL_TMOUT);
492 			if (ret < 0)
493 				exit_with_error(ret);
494 		}
495 		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
496 	}
497 
498 	for (i = 0; i < rcvd; i++) {
499 		u64 addr, orig;
500 
501 		addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
502 		xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
503 		orig = xsk_umem__extract_addr(addr);
504 
505 		addr = xsk_umem__add_offset_to_addr(addr);
506 		pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
507 		if (!pkt_node_rx)
508 			exit_with_error(errno);
509 
510 		pkt_node_rx->pkt_frame = malloc(PKT_SIZE);
511 		if (!pkt_node_rx->pkt_frame)
512 			exit_with_error(errno);
513 
514 		memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
515 		       PKT_SIZE);
516 
517 		TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
518 
519 		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
520 	}
521 
522 	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
523 	xsk_ring_cons__release(&xsk->rx, rcvd);
524 	xsk->rx_npkts += rcvd;
525 }
526 
tx_only(struct xsk_socket_info * xsk,u32 * frameptr,int batch_size)527 static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
528 {
529 	u32 idx = 0;
530 	unsigned int i;
531 	bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
532 	u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
533 
534 	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
535 		complete_tx_only(xsk, batch_size);
536 
537 	for (i = 0; i < batch_size; i++) {
538 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
539 
540 		tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
541 		tx_desc->len = len;
542 	}
543 
544 	xsk_ring_prod__submit(&xsk->tx, batch_size);
545 	if (!tx_invalid_test) {
546 		xsk->outstanding_tx += batch_size;
547 	} else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
548 		kick_tx(xsk);
549 	}
550 	*frameptr += batch_size;
551 	*frameptr %= num_frames;
552 	complete_tx_only(xsk, batch_size);
553 }
554 
get_batch_size(int pkt_cnt)555 static int get_batch_size(int pkt_cnt)
556 {
557 	if (!opt_pkt_count)
558 		return BATCH_SIZE;
559 
560 	if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
561 		return BATCH_SIZE;
562 
563 	return opt_pkt_count - pkt_cnt;
564 }
565 
complete_tx_only_all(struct ifobject * ifobject)566 static void complete_tx_only_all(struct ifobject *ifobject)
567 {
568 	bool pending;
569 
570 	do {
571 		pending = false;
572 		if (ifobject->xsk->outstanding_tx) {
573 			complete_tx_only(ifobject->xsk, BATCH_SIZE);
574 			pending = !!ifobject->xsk->outstanding_tx;
575 		}
576 	} while (pending);
577 }
578 
tx_only_all(struct ifobject * ifobject)579 static void tx_only_all(struct ifobject *ifobject)
580 {
581 	struct pollfd fds[MAX_SOCKS] = { };
582 	u32 frame_nb = 0;
583 	int pkt_cnt = 0;
584 	int ret;
585 
586 	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
587 	fds[0].events = POLLOUT;
588 
589 	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
590 		int batch_size = get_batch_size(pkt_cnt);
591 
592 		if (test_type == TEST_TYPE_POLL) {
593 			ret = poll(fds, 1, POLL_TMOUT);
594 			if (ret <= 0)
595 				continue;
596 
597 			if (!(fds[0].revents & POLLOUT))
598 				continue;
599 		}
600 
601 		tx_only(ifobject->xsk, &frame_nb, batch_size);
602 		pkt_cnt += batch_size;
603 	}
604 
605 	if (opt_pkt_count)
606 		complete_tx_only_all(ifobject);
607 }
608 
worker_pkt_dump(void)609 static void worker_pkt_dump(void)
610 {
611 	struct ethhdr *ethhdr;
612 	struct iphdr *iphdr;
613 	struct udphdr *udphdr;
614 	char s[128];
615 	int payload;
616 	void *ptr;
617 
618 	fprintf(stdout, "---------------------------------------\n");
619 	for (int iter = 0; iter < num_frames - 1; iter++) {
620 		ptr = pkt_buf[iter]->payload;
621 		ethhdr = ptr;
622 		iphdr = ptr + sizeof(*ethhdr);
623 		udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
624 
625 		/*extract L2 frame */
626 		fprintf(stdout, "DEBUG>> L2: dst mac: ");
627 		for (int i = 0; i < ETH_ALEN; i++)
628 			fprintf(stdout, "%02X", ethhdr->h_dest[i]);
629 
630 		fprintf(stdout, "\nDEBUG>> L2: src mac: ");
631 		for (int i = 0; i < ETH_ALEN; i++)
632 			fprintf(stdout, "%02X", ethhdr->h_source[i]);
633 
634 		/*extract L3 frame */
635 		fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
636 		fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
637 			inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
638 		fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
639 			inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
640 		/*extract L4 frame */
641 		fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
642 		fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
643 		/*extract L5 frame */
644 		payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
645 
646 		if (payload == EOT) {
647 			print_verbose("End-of-transmission frame received\n");
648 			fprintf(stdout, "---------------------------------------\n");
649 			break;
650 		}
651 		fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
652 		fprintf(stdout, "---------------------------------------\n");
653 	}
654 }
655 
worker_stats_validate(struct ifobject * ifobject)656 static void worker_stats_validate(struct ifobject *ifobject)
657 {
658 	struct xdp_statistics stats;
659 	socklen_t optlen;
660 	int err;
661 	struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
662 							ifdict[!ifobject->ifdict_index]->xsk->xsk :
663 							ifobject->xsk->xsk;
664 	int fd = xsk_socket__fd(xsk);
665 	unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
666 
667 	sigvar = 0;
668 
669 	optlen = sizeof(stats);
670 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
671 	if (err)
672 		return;
673 
674 	if (optlen == sizeof(struct xdp_statistics)) {
675 		switch (stat_test_type) {
676 		case STAT_TEST_RX_DROPPED:
677 			xsk_stat = stats.rx_dropped;
678 			break;
679 		case STAT_TEST_TX_INVALID:
680 			xsk_stat = stats.tx_invalid_descs;
681 			break;
682 		case STAT_TEST_RX_FULL:
683 			xsk_stat = stats.rx_ring_full;
684 			expected_stat -= RX_FULL_RXQSIZE;
685 			break;
686 		case STAT_TEST_RX_FILL_EMPTY:
687 			xsk_stat = stats.rx_fill_ring_empty_descs;
688 			break;
689 		default:
690 			break;
691 		}
692 
693 		if (xsk_stat == expected_stat)
694 			sigvar = 1;
695 	}
696 }
697 
worker_pkt_validate(void)698 static void worker_pkt_validate(void)
699 {
700 	u32 payloadseqnum = -2;
701 	struct iphdr *iphdr;
702 
703 	while (1) {
704 		pkt_node_rx_q = TAILQ_LAST(&head, head_s);
705 		if (!pkt_node_rx_q)
706 			break;
707 
708 		iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr));
709 
710 		/*do not increment pktcounter if !(tos=0x9 and ipv4) */
711 		if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
712 			payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
713 			if (debug_pkt_dump && payloadseqnum != EOT) {
714 				pkt_obj = malloc(sizeof(*pkt_obj));
715 				pkt_obj->payload = malloc(PKT_SIZE);
716 				memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
717 				pkt_buf[payloadseqnum] = pkt_obj;
718 			}
719 
720 			if (payloadseqnum == EOT) {
721 				print_verbose("End-of-transmission frame received: PASS\n");
722 				sigvar = 1;
723 				break;
724 			}
725 
726 			if (prev_pkt + 1 != payloadseqnum) {
727 				ksft_test_result_fail
728 				    ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
729 				     __func__, prev_pkt, payloadseqnum);
730 				ksft_exit_xfail();
731 			}
732 
733 			prev_pkt = payloadseqnum;
734 			pkt_counter++;
735 		} else {
736 			ksft_print_msg("Invalid frame received: ");
737 			ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
738 				       iphdr->tos);
739 		}
740 
741 		TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
742 		free(pkt_node_rx_q->pkt_frame);
743 		free(pkt_node_rx_q);
744 		pkt_node_rx_q = NULL;
745 	}
746 }
747 
thread_common_ops(struct ifobject * ifobject,void * bufs)748 static void thread_common_ops(struct ifobject *ifobject, void *bufs)
749 {
750 	int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
751 	int ctr = 0;
752 	int ret;
753 
754 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
755 
756 	if (test_type == TEST_TYPE_BPF_RES)
757 		umem_sz *= 2;
758 
759 	bufs = mmap(NULL, umem_sz,
760 		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
761 	if (bufs == MAP_FAILED)
762 		exit_with_error(errno);
763 
764 	xsk_configure_umem(ifobject, bufs, 0);
765 	ifobject->umem = ifobject->umem_arr[0];
766 	ret = xsk_configure_socket(ifobject, 0);
767 
768 	/* Retry Create Socket if it fails as xsk_socket__create()
769 	 * is asynchronous
770 	 */
771 	while (ret && ctr < SOCK_RECONF_CTR) {
772 		xsk_configure_umem(ifobject, bufs, 0);
773 		ifobject->umem = ifobject->umem_arr[0];
774 		ret = xsk_configure_socket(ifobject, 0);
775 		usleep(USLEEP_MAX);
776 		ctr++;
777 	}
778 
779 	if (ctr >= SOCK_RECONF_CTR)
780 		exit_with_error(ret);
781 
782 	ifobject->umem = ifobject->umem_arr[0];
783 	ifobject->xsk = ifobject->xsk_arr[0];
784 
785 	if (test_type == TEST_TYPE_BPF_RES) {
786 		xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
787 		ifobject->umem = ifobject->umem_arr[1];
788 		ret = xsk_configure_socket(ifobject, 1);
789 	}
790 
791 	ifobject->umem = ifobject->umem_arr[0];
792 	ifobject->xsk = ifobject->xsk_arr[0];
793 	print_verbose("Interface [%s] vector [%s]\n",
794 		      ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
795 }
796 
testapp_is_test_two_stepped(void)797 static bool testapp_is_test_two_stepped(void)
798 {
799 	return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
800 }
801 
testapp_cleanup_xsk_res(struct ifobject * ifobj)802 static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
803 {
804 	if (testapp_is_test_two_stepped()) {
805 		xsk_socket__delete(ifobj->xsk->xsk);
806 		(void)xsk_umem__delete(ifobj->umem->umem);
807 	}
808 }
809 
worker_testapp_validate_tx(void * arg)810 static void *worker_testapp_validate_tx(void *arg)
811 {
812 	struct udphdr *udp_hdr =
813 	    (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
814 	struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
815 	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
816 	struct ifobject *ifobject = (struct ifobject *)arg;
817 	struct generic_data data;
818 	void *bufs = NULL;
819 
820 	if (!second_step)
821 		thread_common_ops(ifobject, bufs);
822 
823 	for (int i = 0; i < num_frames; i++) {
824 		/*send EOT frame */
825 		if (i == (num_frames - 1))
826 			data.seqnum = -1;
827 		else
828 			data.seqnum = i;
829 		gen_udp_hdr(&data, ifobject, udp_hdr);
830 		gen_ip_hdr(ifobject, ip_hdr);
831 		gen_udp_csum(udp_hdr, ip_hdr);
832 		gen_eth_hdr(ifobject, eth_hdr);
833 		gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
834 	}
835 
836 	print_verbose("Sending %d packets on interface %s\n",
837 		      (opt_pkt_count - 1), ifobject->ifname);
838 	tx_only_all(ifobject);
839 
840 	testapp_cleanup_xsk_res(ifobject);
841 	pthread_exit(NULL);
842 }
843 
worker_testapp_validate_rx(void * arg)844 static void *worker_testapp_validate_rx(void *arg)
845 {
846 	struct ifobject *ifobject = (struct ifobject *)arg;
847 	struct pollfd fds[MAX_SOCKS] = { };
848 	void *bufs = NULL;
849 
850 	if (!second_step)
851 		thread_common_ops(ifobject, bufs);
852 
853 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
854 		xsk_populate_fill_ring(ifobject->umem);
855 
856 	TAILQ_INIT(&head);
857 	if (debug_pkt_dump) {
858 		pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
859 		if (!pkt_buf)
860 			exit_with_error(errno);
861 	}
862 
863 	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
864 	fds[0].events = POLLIN;
865 
866 	pthread_barrier_wait(&barr);
867 
868 	while (1) {
869 		if (test_type != TEST_TYPE_STATS) {
870 			rx_pkt(ifobject->xsk, fds);
871 			worker_pkt_validate();
872 		} else {
873 			worker_stats_validate(ifobject);
874 		}
875 		if (sigvar)
876 			break;
877 	}
878 
879 	print_verbose("Received %d packets on interface %s\n",
880 		      pkt_counter, ifobject->ifname);
881 
882 	if (test_type == TEST_TYPE_TEARDOWN)
883 		print_verbose("Destroying socket\n");
884 
885 	testapp_cleanup_xsk_res(ifobject);
886 	pthread_exit(NULL);
887 }
888 
testapp_validate(void)889 static void testapp_validate(void)
890 {
891 	bool bidi = test_type == TEST_TYPE_BIDI;
892 	bool bpf = test_type == TEST_TYPE_BPF_RES;
893 
894 	if (pthread_barrier_init(&barr, NULL, 2))
895 		exit_with_error(errno);
896 
897 	/*Spawn RX thread */
898 	pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
899 
900 	pthread_barrier_wait(&barr);
901 	if (pthread_barrier_destroy(&barr))
902 		exit_with_error(errno);
903 
904 	/*Spawn TX thread */
905 	pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
906 
907 	pthread_join(t1, NULL);
908 	pthread_join(t0, NULL);
909 
910 	if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
911 		worker_pkt_dump();
912 		for (int iter = 0; iter < num_frames - 1; iter++) {
913 			free(pkt_buf[iter]->payload);
914 			free(pkt_buf[iter]);
915 		}
916 		free(pkt_buf);
917 	}
918 
919 	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
920 		print_ksft_result();
921 }
922 
testapp_teardown(void)923 static void testapp_teardown(void)
924 {
925 	int i;
926 
927 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
928 		pkt_counter = 0;
929 		prev_pkt = -1;
930 		sigvar = 0;
931 		print_verbose("Creating socket\n");
932 		testapp_validate();
933 	}
934 
935 	print_ksft_result();
936 }
937 
swap_vectors(struct ifobject * ifobj1,struct ifobject * ifobj2)938 static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
939 {
940 	void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
941 	enum fvector tmp_vector = ifobj1->fv.vector;
942 
943 	ifobj1->func_ptr = ifobj2->func_ptr;
944 	ifobj1->fv.vector = ifobj2->fv.vector;
945 
946 	ifobj2->func_ptr = tmp_func_ptr;
947 	ifobj2->fv.vector = tmp_vector;
948 
949 	ifdict_tx = ifobj1;
950 	ifdict_rx = ifobj2;
951 }
952 
testapp_bidi(void)953 static void testapp_bidi(void)
954 {
955 	for (int i = 0; i < MAX_BIDI_ITER; i++) {
956 		pkt_counter = 0;
957 		prev_pkt = -1;
958 		sigvar = 0;
959 		print_verbose("Creating socket\n");
960 		testapp_validate();
961 		if (!second_step) {
962 			print_verbose("Switching Tx/Rx vectors\n");
963 			swap_vectors(ifdict[1], ifdict[0]);
964 		}
965 		second_step = true;
966 	}
967 
968 	swap_vectors(ifdict[0], ifdict[1]);
969 
970 	print_ksft_result();
971 }
972 
swap_xsk_res(void)973 static void swap_xsk_res(void)
974 {
975 	xsk_socket__delete(ifdict_tx->xsk->xsk);
976 	xsk_umem__delete(ifdict_tx->umem->umem);
977 	xsk_socket__delete(ifdict_rx->xsk->xsk);
978 	xsk_umem__delete(ifdict_rx->umem->umem);
979 	ifdict_tx->umem = ifdict_tx->umem_arr[1];
980 	ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
981 	ifdict_rx->umem = ifdict_rx->umem_arr[1];
982 	ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
983 }
984 
testapp_bpf_res(void)985 static void testapp_bpf_res(void)
986 {
987 	int i;
988 
989 	for (i = 0; i < MAX_BPF_ITER; i++) {
990 		pkt_counter = 0;
991 		prev_pkt = -1;
992 		sigvar = 0;
993 		print_verbose("Creating socket\n");
994 		testapp_validate();
995 		if (!second_step)
996 			swap_xsk_res();
997 		second_step = true;
998 	}
999 
1000 	print_ksft_result();
1001 }
1002 
testapp_stats(void)1003 static void testapp_stats(void)
1004 {
1005 	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
1006 		stat_test_type = i;
1007 
1008 		/* reset defaults */
1009 		rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
1010 		frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
1011 
1012 		switch (stat_test_type) {
1013 		case STAT_TEST_RX_DROPPED:
1014 			frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
1015 						XDP_PACKET_HEADROOM - 1;
1016 			break;
1017 		case STAT_TEST_RX_FULL:
1018 			rxqsize = RX_FULL_RXQSIZE;
1019 			break;
1020 		default:
1021 			break;
1022 		}
1023 		testapp_validate();
1024 	}
1025 
1026 	print_ksft_result();
1027 }
1028 
init_iface(struct ifobject * ifobj,const char * dst_mac,const char * src_mac,const char * dst_ip,const char * src_ip,const u16 dst_port,const u16 src_port,enum fvector vector)1029 static void init_iface(struct ifobject *ifobj, const char *dst_mac,
1030 		       const char *src_mac, const char *dst_ip,
1031 		       const char *src_ip, const u16 dst_port,
1032 		       const u16 src_port, enum fvector vector)
1033 {
1034 	struct in_addr ip;
1035 
1036 	memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
1037 	memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
1038 
1039 	inet_aton(dst_ip, &ip);
1040 	ifobj->dst_ip = ip.s_addr;
1041 
1042 	inet_aton(src_ip, &ip);
1043 	ifobj->src_ip = ip.s_addr;
1044 
1045 	ifobj->dst_port = dst_port;
1046 	ifobj->src_port = src_port;
1047 
1048 	if (vector == tx) {
1049 		ifobj->fv.vector = tx;
1050 		ifobj->func_ptr = worker_testapp_validate_tx;
1051 		ifdict_tx = ifobj;
1052 	} else {
1053 		ifobj->fv.vector = rx;
1054 		ifobj->func_ptr = worker_testapp_validate_rx;
1055 		ifdict_rx = ifobj;
1056 	}
1057 }
1058 
run_pkt_test(int mode,int type)1059 static void run_pkt_test(int mode, int type)
1060 {
1061 	test_type = type;
1062 
1063 	/* reset defaults after potential previous test */
1064 	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
1065 	pkt_counter = 0;
1066 	second_step = 0;
1067 	prev_pkt = -1;
1068 	sigvar = 0;
1069 	stat_test_type = -1;
1070 	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
1071 	frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
1072 
1073 	configured_mode = mode;
1074 
1075 	switch (mode) {
1076 	case (TEST_MODE_SKB):
1077 		xdp_flags |= XDP_FLAGS_SKB_MODE;
1078 		break;
1079 	case (TEST_MODE_DRV):
1080 		xdp_flags |= XDP_FLAGS_DRV_MODE;
1081 		break;
1082 	default:
1083 		break;
1084 	}
1085 
1086 	switch (test_type) {
1087 	case TEST_TYPE_STATS:
1088 		testapp_stats();
1089 		break;
1090 	case TEST_TYPE_TEARDOWN:
1091 		testapp_teardown();
1092 		break;
1093 	case TEST_TYPE_BIDI:
1094 		testapp_bidi();
1095 		break;
1096 	case TEST_TYPE_BPF_RES:
1097 		testapp_bpf_res();
1098 		break;
1099 	default:
1100 		testapp_validate();
1101 		break;
1102 	}
1103 }
1104 
main(int argc,char ** argv)1105 int main(int argc, char **argv)
1106 {
1107 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
1108 	bool failure = false;
1109 	int i, j;
1110 
1111 	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
1112 		exit_with_error(errno);
1113 
1114 	for (int i = 0; i < MAX_INTERFACES; i++) {
1115 		ifdict[i] = malloc(sizeof(struct ifobject));
1116 		if (!ifdict[i])
1117 			exit_with_error(errno);
1118 
1119 		ifdict[i]->ifdict_index = i;
1120 		ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
1121 		if (!ifdict[i]->xsk_arr) {
1122 			failure = true;
1123 			goto cleanup;
1124 		}
1125 		ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
1126 		if (!ifdict[i]->umem_arr) {
1127 			failure = true;
1128 			goto cleanup;
1129 		}
1130 	}
1131 
1132 	setlocale(LC_ALL, "");
1133 
1134 	parse_command_line(argc, argv);
1135 
1136 	num_frames = ++opt_pkt_count;
1137 
1138 	init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
1139 	init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
1140 
1141 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
1142 
1143 	for (i = 0; i < TEST_MODE_MAX; i++) {
1144 		for (j = 0; j < TEST_TYPE_MAX; j++)
1145 			run_pkt_test(i, j);
1146 	}
1147 
1148 cleanup:
1149 	for (int i = 0; i < MAX_INTERFACES; i++) {
1150 		if (ifdict[i]->ns_fd != -1)
1151 			close(ifdict[i]->ns_fd);
1152 		free(ifdict[i]->xsk_arr);
1153 		free(ifdict[i]->umem_arr);
1154 		free(ifdict[i]);
1155 	}
1156 
1157 	if (failure)
1158 		exit_with_error(errno);
1159 
1160 	ksft_exit_pass();
1161 
1162 	return 0;
1163 }
1164