1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
3  */
4 static const char *__doc__ =
5 	" XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
6 
7 #include <errno.h>
8 #include <signal.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdbool.h>
12 #include <string.h>
13 #include <unistd.h>
14 #include <locale.h>
15 #include <sys/resource.h>
16 #include <sys/sysinfo.h>
17 #include <getopt.h>
18 #include <net/if.h>
19 #include <time.h>
20 #include <linux/limits.h>
21 
22 #include <arpa/inet.h>
23 #include <linux/if_link.h>
24 
25 /* How many xdp_progs are defined in _kern.c */
26 #define MAX_PROG 6
27 
28 #include <bpf/bpf.h>
29 #include <bpf/libbpf.h>
30 
31 #include "bpf_util.h"
32 
33 static int ifindex = -1;
34 static char ifname_buf[IF_NAMESIZE];
35 static char *ifname;
36 static __u32 prog_id;
37 
38 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
39 static int n_cpus;
40 
41 enum map_type {
42 	CPU_MAP,
43 	RX_CNT,
44 	REDIRECT_ERR_CNT,
45 	CPUMAP_ENQUEUE_CNT,
46 	CPUMAP_KTHREAD_CNT,
47 	CPUS_AVAILABLE,
48 	CPUS_COUNT,
49 	CPUS_ITERATOR,
50 	EXCEPTION_CNT,
51 };
52 
53 static const char *const map_type_strings[] = {
54 	[CPU_MAP] = "cpu_map",
55 	[RX_CNT] = "rx_cnt",
56 	[REDIRECT_ERR_CNT] = "redirect_err_cnt",
57 	[CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
58 	[CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
59 	[CPUS_AVAILABLE] = "cpus_available",
60 	[CPUS_COUNT] = "cpus_count",
61 	[CPUS_ITERATOR] = "cpus_iterator",
62 	[EXCEPTION_CNT] = "exception_cnt",
63 };
64 
65 #define NUM_TP 5
66 #define NUM_MAP 9
67 struct bpf_link *tp_links[NUM_TP] = {};
68 static int map_fds[NUM_MAP];
69 static int tp_cnt = 0;
70 
71 /* Exit return codes */
72 #define EXIT_OK		0
73 #define EXIT_FAIL		1
74 #define EXIT_FAIL_OPTION	2
75 #define EXIT_FAIL_XDP		3
76 #define EXIT_FAIL_BPF		4
77 #define EXIT_FAIL_MEM		5
78 
79 static const struct option long_options[] = {
80 	{"help",	no_argument,		NULL, 'h' },
81 	{"dev",		required_argument,	NULL, 'd' },
82 	{"skb-mode",	no_argument,		NULL, 'S' },
83 	{"sec",		required_argument,	NULL, 's' },
84 	{"progname",	required_argument,	NULL, 'p' },
85 	{"qsize",	required_argument,	NULL, 'q' },
86 	{"cpu",		required_argument,	NULL, 'c' },
87 	{"stress-mode", no_argument,		NULL, 'x' },
88 	{"no-separators", no_argument,		NULL, 'z' },
89 	{"force",	no_argument,		NULL, 'F' },
90 	{"mprog-disable", no_argument,		NULL, 'n' },
91 	{"mprog-name",	required_argument,	NULL, 'e' },
92 	{"mprog-filename", required_argument,	NULL, 'f' },
93 	{"redirect-device", required_argument,	NULL, 'r' },
94 	{"redirect-map", required_argument,	NULL, 'm' },
95 	{0, 0, NULL,  0 }
96 };
97 
int_exit(int sig)98 static void int_exit(int sig)
99 {
100 	__u32 curr_prog_id = 0;
101 
102 	if (ifindex > -1) {
103 		if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
104 			printf("bpf_get_link_xdp_id failed\n");
105 			exit(EXIT_FAIL);
106 		}
107 		if (prog_id == curr_prog_id) {
108 			fprintf(stderr,
109 				"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
110 				ifindex, ifname);
111 			bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
112 		} else if (!curr_prog_id) {
113 			printf("couldn't find a prog id on a given iface\n");
114 		} else {
115 			printf("program on interface changed, not removing\n");
116 		}
117 	}
118 	/* Detach tracepoints */
119 	while (tp_cnt)
120 		bpf_link__destroy(tp_links[--tp_cnt]);
121 
122 	exit(EXIT_OK);
123 }
124 
print_avail_progs(struct bpf_object * obj)125 static void print_avail_progs(struct bpf_object *obj)
126 {
127 	struct bpf_program *pos;
128 
129 	bpf_object__for_each_program(pos, obj) {
130 		if (bpf_program__is_xdp(pos))
131 			printf(" %s\n", bpf_program__section_name(pos));
132 	}
133 }
134 
usage(char * argv[],struct bpf_object * obj)135 static void usage(char *argv[], struct bpf_object *obj)
136 {
137 	int i;
138 
139 	printf("\nDOCUMENTATION:\n%s\n", __doc__);
140 	printf("\n");
141 	printf(" Usage: %s (options-see-below)\n", argv[0]);
142 	printf(" Listing options:\n");
143 	for (i = 0; long_options[i].name != 0; i++) {
144 		printf(" --%-12s", long_options[i].name);
145 		if (long_options[i].flag != NULL)
146 			printf(" flag (internal value:%d)",
147 				*long_options[i].flag);
148 		else
149 			printf(" short-option: -%c",
150 				long_options[i].val);
151 		printf("\n");
152 	}
153 	printf("\n Programs to be used for --progname:\n");
154 	print_avail_progs(obj);
155 	printf("\n");
156 }
157 
158 /* gettime returns the current time of day in nanoseconds.
159  * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
160  *       clock_gettime (ns) =>  9ns (CLOCK_MONOTONIC_COARSE)
161  */
162 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
gettime(void)163 static __u64 gettime(void)
164 {
165 	struct timespec t;
166 	int res;
167 
168 	res = clock_gettime(CLOCK_MONOTONIC, &t);
169 	if (res < 0) {
170 		fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
171 		exit(EXIT_FAIL);
172 	}
173 	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
174 }
175 
176 /* Common stats data record shared with _kern.c */
177 struct datarec {
178 	__u64 processed;
179 	__u64 dropped;
180 	__u64 issue;
181 	__u64 xdp_pass;
182 	__u64 xdp_drop;
183 	__u64 xdp_redirect;
184 };
185 struct record {
186 	__u64 timestamp;
187 	struct datarec total;
188 	struct datarec *cpu;
189 };
190 struct stats_record {
191 	struct record rx_cnt;
192 	struct record redir_err;
193 	struct record kthread;
194 	struct record exception;
195 	struct record enq[];
196 };
197 
map_collect_percpu(int fd,__u32 key,struct record * rec)198 static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
199 {
200 	/* For percpu maps, userspace gets a value per possible CPU */
201 	unsigned int nr_cpus = bpf_num_possible_cpus();
202 	struct datarec values[nr_cpus];
203 	__u64 sum_xdp_redirect = 0;
204 	__u64 sum_xdp_pass = 0;
205 	__u64 sum_xdp_drop = 0;
206 	__u64 sum_processed = 0;
207 	__u64 sum_dropped = 0;
208 	__u64 sum_issue = 0;
209 	int i;
210 
211 	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
212 		fprintf(stderr,
213 			"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
214 		return false;
215 	}
216 	/* Get time as close as possible to reading map contents */
217 	rec->timestamp = gettime();
218 
219 	/* Record and sum values from each CPU */
220 	for (i = 0; i < nr_cpus; i++) {
221 		rec->cpu[i].processed = values[i].processed;
222 		sum_processed        += values[i].processed;
223 		rec->cpu[i].dropped = values[i].dropped;
224 		sum_dropped        += values[i].dropped;
225 		rec->cpu[i].issue = values[i].issue;
226 		sum_issue        += values[i].issue;
227 		rec->cpu[i].xdp_pass = values[i].xdp_pass;
228 		sum_xdp_pass += values[i].xdp_pass;
229 		rec->cpu[i].xdp_drop = values[i].xdp_drop;
230 		sum_xdp_drop += values[i].xdp_drop;
231 		rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
232 		sum_xdp_redirect += values[i].xdp_redirect;
233 	}
234 	rec->total.processed = sum_processed;
235 	rec->total.dropped   = sum_dropped;
236 	rec->total.issue     = sum_issue;
237 	rec->total.xdp_pass  = sum_xdp_pass;
238 	rec->total.xdp_drop  = sum_xdp_drop;
239 	rec->total.xdp_redirect = sum_xdp_redirect;
240 	return true;
241 }
242 
alloc_record_per_cpu(void)243 static struct datarec *alloc_record_per_cpu(void)
244 {
245 	unsigned int nr_cpus = bpf_num_possible_cpus();
246 	struct datarec *array;
247 
248 	array = calloc(nr_cpus, sizeof(struct datarec));
249 	if (!array) {
250 		fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
251 		exit(EXIT_FAIL_MEM);
252 	}
253 	return array;
254 }
255 
alloc_stats_record(void)256 static struct stats_record *alloc_stats_record(void)
257 {
258 	struct stats_record *rec;
259 	int i, size;
260 
261 	size = sizeof(*rec) + n_cpus * sizeof(struct record);
262 	rec = malloc(size);
263 	if (!rec) {
264 		fprintf(stderr, "Mem alloc error\n");
265 		exit(EXIT_FAIL_MEM);
266 	}
267 	memset(rec, 0, size);
268 	rec->rx_cnt.cpu    = alloc_record_per_cpu();
269 	rec->redir_err.cpu = alloc_record_per_cpu();
270 	rec->kthread.cpu   = alloc_record_per_cpu();
271 	rec->exception.cpu = alloc_record_per_cpu();
272 	for (i = 0; i < n_cpus; i++)
273 		rec->enq[i].cpu = alloc_record_per_cpu();
274 
275 	return rec;
276 }
277 
free_stats_record(struct stats_record * r)278 static void free_stats_record(struct stats_record *r)
279 {
280 	int i;
281 
282 	for (i = 0; i < n_cpus; i++)
283 		free(r->enq[i].cpu);
284 	free(r->exception.cpu);
285 	free(r->kthread.cpu);
286 	free(r->redir_err.cpu);
287 	free(r->rx_cnt.cpu);
288 	free(r);
289 }
290 
calc_period(struct record * r,struct record * p)291 static double calc_period(struct record *r, struct record *p)
292 {
293 	double period_ = 0;
294 	__u64 period = 0;
295 
296 	period = r->timestamp - p->timestamp;
297 	if (period > 0)
298 		period_ = ((double) period / NANOSEC_PER_SEC);
299 
300 	return period_;
301 }
302 
calc_pps(struct datarec * r,struct datarec * p,double period_)303 static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
304 {
305 	__u64 packets = 0;
306 	__u64 pps = 0;
307 
308 	if (period_ > 0) {
309 		packets = r->processed - p->processed;
310 		pps = packets / period_;
311 	}
312 	return pps;
313 }
314 
calc_drop_pps(struct datarec * r,struct datarec * p,double period_)315 static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
316 {
317 	__u64 packets = 0;
318 	__u64 pps = 0;
319 
320 	if (period_ > 0) {
321 		packets = r->dropped - p->dropped;
322 		pps = packets / period_;
323 	}
324 	return pps;
325 }
326 
calc_errs_pps(struct datarec * r,struct datarec * p,double period_)327 static __u64 calc_errs_pps(struct datarec *r,
328 			    struct datarec *p, double period_)
329 {
330 	__u64 packets = 0;
331 	__u64 pps = 0;
332 
333 	if (period_ > 0) {
334 		packets = r->issue - p->issue;
335 		pps = packets / period_;
336 	}
337 	return pps;
338 }
339 
calc_xdp_pps(struct datarec * r,struct datarec * p,double * xdp_pass,double * xdp_drop,double * xdp_redirect,double period_)340 static void calc_xdp_pps(struct datarec *r, struct datarec *p,
341 			 double *xdp_pass, double *xdp_drop,
342 			 double *xdp_redirect, double period_)
343 {
344 	*xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
345 	if (period_ > 0) {
346 		*xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
347 		*xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
348 		*xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
349 	}
350 }
351 
stats_print(struct stats_record * stats_rec,struct stats_record * stats_prev,char * prog_name,char * mprog_name,int mprog_fd)352 static void stats_print(struct stats_record *stats_rec,
353 			struct stats_record *stats_prev,
354 			char *prog_name, char *mprog_name, int mprog_fd)
355 {
356 	unsigned int nr_cpus = bpf_num_possible_cpus();
357 	double pps = 0, drop = 0, err = 0;
358 	bool mprog_enabled = false;
359 	struct record *rec, *prev;
360 	int to_cpu;
361 	double t;
362 	int i;
363 
364 	if (mprog_fd > 0)
365 		mprog_enabled = true;
366 
367 	/* Header */
368 	printf("Running XDP/eBPF prog_name:%s\n", prog_name);
369 	printf("%-15s %-7s %-14s %-11s %-9s\n",
370 	       "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
371 
372 	/* XDP rx_cnt */
373 	{
374 		char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
375 		char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
376 		char *errstr = "";
377 
378 		rec  = &stats_rec->rx_cnt;
379 		prev = &stats_prev->rx_cnt;
380 		t = calc_period(rec, prev);
381 		for (i = 0; i < nr_cpus; i++) {
382 			struct datarec *r = &rec->cpu[i];
383 			struct datarec *p = &prev->cpu[i];
384 
385 			pps = calc_pps(r, p, t);
386 			drop = calc_drop_pps(r, p, t);
387 			err  = calc_errs_pps(r, p, t);
388 			if (err > 0)
389 				errstr = "cpu-dest/err";
390 			if (pps > 0)
391 				printf(fmt_rx, "XDP-RX",
392 					i, pps, drop, err, errstr);
393 		}
394 		pps  = calc_pps(&rec->total, &prev->total, t);
395 		drop = calc_drop_pps(&rec->total, &prev->total, t);
396 		err  = calc_errs_pps(&rec->total, &prev->total, t);
397 		printf(fm2_rx, "XDP-RX", "total", pps, drop);
398 	}
399 
400 	/* cpumap enqueue stats */
401 	for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) {
402 		char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
403 		char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
404 		char *errstr = "";
405 
406 		rec  =  &stats_rec->enq[to_cpu];
407 		prev = &stats_prev->enq[to_cpu];
408 		t = calc_period(rec, prev);
409 		for (i = 0; i < nr_cpus; i++) {
410 			struct datarec *r = &rec->cpu[i];
411 			struct datarec *p = &prev->cpu[i];
412 
413 			pps  = calc_pps(r, p, t);
414 			drop = calc_drop_pps(r, p, t);
415 			err  = calc_errs_pps(r, p, t);
416 			if (err > 0) {
417 				errstr = "bulk-average";
418 				err = pps / err; /* calc average bulk size */
419 			}
420 			if (pps > 0)
421 				printf(fmt, "cpumap-enqueue",
422 				       i, to_cpu, pps, drop, err, errstr);
423 		}
424 		pps = calc_pps(&rec->total, &prev->total, t);
425 		if (pps > 0) {
426 			drop = calc_drop_pps(&rec->total, &prev->total, t);
427 			err  = calc_errs_pps(&rec->total, &prev->total, t);
428 			if (err > 0) {
429 				errstr = "bulk-average";
430 				err = pps / err; /* calc average bulk size */
431 			}
432 			printf(fm2, "cpumap-enqueue",
433 			       "sum", to_cpu, pps, drop, err, errstr);
434 		}
435 	}
436 
437 	/* cpumap kthread stats */
438 	{
439 		char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
440 		char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
441 		char *e_str = "";
442 
443 		rec  = &stats_rec->kthread;
444 		prev = &stats_prev->kthread;
445 		t = calc_period(rec, prev);
446 		for (i = 0; i < nr_cpus; i++) {
447 			struct datarec *r = &rec->cpu[i];
448 			struct datarec *p = &prev->cpu[i];
449 
450 			pps  = calc_pps(r, p, t);
451 			drop = calc_drop_pps(r, p, t);
452 			err  = calc_errs_pps(r, p, t);
453 			if (err > 0)
454 				e_str = "sched";
455 			if (pps > 0)
456 				printf(fmt_k, "cpumap_kthread",
457 				       i, pps, drop, err, e_str);
458 		}
459 		pps = calc_pps(&rec->total, &prev->total, t);
460 		drop = calc_drop_pps(&rec->total, &prev->total, t);
461 		err  = calc_errs_pps(&rec->total, &prev->total, t);
462 		if (err > 0)
463 			e_str = "sched-sum";
464 		printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
465 	}
466 
467 	/* XDP redirect err tracepoints (very unlikely) */
468 	{
469 		char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
470 		char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
471 
472 		rec  = &stats_rec->redir_err;
473 		prev = &stats_prev->redir_err;
474 		t = calc_period(rec, prev);
475 		for (i = 0; i < nr_cpus; i++) {
476 			struct datarec *r = &rec->cpu[i];
477 			struct datarec *p = &prev->cpu[i];
478 
479 			pps  = calc_pps(r, p, t);
480 			drop = calc_drop_pps(r, p, t);
481 			if (pps > 0)
482 				printf(fmt_err, "redirect_err", i, pps, drop);
483 		}
484 		pps = calc_pps(&rec->total, &prev->total, t);
485 		drop = calc_drop_pps(&rec->total, &prev->total, t);
486 		printf(fm2_err, "redirect_err", "total", pps, drop);
487 	}
488 
489 	/* XDP general exception tracepoints */
490 	{
491 		char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
492 		char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
493 
494 		rec  = &stats_rec->exception;
495 		prev = &stats_prev->exception;
496 		t = calc_period(rec, prev);
497 		for (i = 0; i < nr_cpus; i++) {
498 			struct datarec *r = &rec->cpu[i];
499 			struct datarec *p = &prev->cpu[i];
500 
501 			pps  = calc_pps(r, p, t);
502 			drop = calc_drop_pps(r, p, t);
503 			if (pps > 0)
504 				printf(fmt_err, "xdp_exception", i, pps, drop);
505 		}
506 		pps = calc_pps(&rec->total, &prev->total, t);
507 		drop = calc_drop_pps(&rec->total, &prev->total, t);
508 		printf(fm2_err, "xdp_exception", "total", pps, drop);
509 	}
510 
511 	/* CPUMAP attached XDP program that runs on remote/destination CPU */
512 	if (mprog_enabled) {
513 		char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
514 		char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
515 		double xdp_pass, xdp_drop, xdp_redirect;
516 
517 		printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
518 		printf("%-15s %-7s %-14s %-11s %-9s\n",
519 		       "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
520 
521 		rec  = &stats_rec->kthread;
522 		prev = &stats_prev->kthread;
523 		t = calc_period(rec, prev);
524 		for (i = 0; i < nr_cpus; i++) {
525 			struct datarec *r = &rec->cpu[i];
526 			struct datarec *p = &prev->cpu[i];
527 
528 			calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
529 				     &xdp_redirect, t);
530 			if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
531 				printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
532 				       xdp_redirect);
533 		}
534 		calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
535 			     &xdp_redirect, t);
536 		printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
537 	}
538 
539 	printf("\n");
540 	fflush(stdout);
541 }
542 
stats_collect(struct stats_record * rec)543 static void stats_collect(struct stats_record *rec)
544 {
545 	int fd, i;
546 
547 	fd = map_fds[RX_CNT];
548 	map_collect_percpu(fd, 0, &rec->rx_cnt);
549 
550 	fd = map_fds[REDIRECT_ERR_CNT];
551 	map_collect_percpu(fd, 1, &rec->redir_err);
552 
553 	fd = map_fds[CPUMAP_ENQUEUE_CNT];
554 	for (i = 0; i < n_cpus; i++)
555 		map_collect_percpu(fd, i, &rec->enq[i]);
556 
557 	fd = map_fds[CPUMAP_KTHREAD_CNT];
558 	map_collect_percpu(fd, 0, &rec->kthread);
559 
560 	fd = map_fds[EXCEPTION_CNT];
561 	map_collect_percpu(fd, 0, &rec->exception);
562 }
563 
564 
565 /* Pointer swap trick */
swap(struct stats_record ** a,struct stats_record ** b)566 static inline void swap(struct stats_record **a, struct stats_record **b)
567 {
568 	struct stats_record *tmp;
569 
570 	tmp = *a;
571 	*a = *b;
572 	*b = tmp;
573 }
574 
create_cpu_entry(__u32 cpu,struct bpf_cpumap_val * value,__u32 avail_idx,bool new)575 static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
576 			    __u32 avail_idx, bool new)
577 {
578 	__u32 curr_cpus_count = 0;
579 	__u32 key = 0;
580 	int ret;
581 
582 	/* Add a CPU entry to cpumap, as this allocate a cpu entry in
583 	 * the kernel for the cpu.
584 	 */
585 	ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
586 	if (ret) {
587 		fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
588 		exit(EXIT_FAIL_BPF);
589 	}
590 
591 	/* Inform bpf_prog's that a new CPU is available to select
592 	 * from via some control maps.
593 	 */
594 	ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
595 	if (ret) {
596 		fprintf(stderr, "Add to avail CPUs failed\n");
597 		exit(EXIT_FAIL_BPF);
598 	}
599 
600 	/* When not replacing/updating existing entry, bump the count */
601 	ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
602 	if (ret) {
603 		fprintf(stderr, "Failed reading curr cpus_count\n");
604 		exit(EXIT_FAIL_BPF);
605 	}
606 	if (new) {
607 		curr_cpus_count++;
608 		ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
609 					  &curr_cpus_count, 0);
610 		if (ret) {
611 			fprintf(stderr, "Failed write curr cpus_count\n");
612 			exit(EXIT_FAIL_BPF);
613 		}
614 	}
615 	/* map_fd[7] = cpus_iterator */
616 	printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
617 	       new ? "Add-new":"Replace", cpu, avail_idx,
618 	       value->qsize, value->bpf_prog.fd, curr_cpus_count);
619 
620 	return 0;
621 }
622 
623 /* CPUs are zero-indexed. Thus, add a special sentinel default value
624  * in map cpus_available to mark CPU index'es not configured
625  */
mark_cpus_unavailable(void)626 static void mark_cpus_unavailable(void)
627 {
628 	__u32 invalid_cpu = n_cpus;
629 	int ret, i;
630 
631 	for (i = 0; i < n_cpus; i++) {
632 		ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
633 					  &invalid_cpu, 0);
634 		if (ret) {
635 			fprintf(stderr, "Failed marking CPU unavailable\n");
636 			exit(EXIT_FAIL_BPF);
637 		}
638 	}
639 }
640 
641 /* Stress cpumap management code by concurrently changing underlying cpumap */
stress_cpumap(struct bpf_cpumap_val * value)642 static void stress_cpumap(struct bpf_cpumap_val *value)
643 {
644 	/* Changing qsize will cause kernel to free and alloc a new
645 	 * bpf_cpu_map_entry, with an associated/complicated tear-down
646 	 * procedure.
647 	 */
648 	value->qsize = 1024;
649 	create_cpu_entry(1, value, 0, false);
650 	value->qsize = 8;
651 	create_cpu_entry(1, value, 0, false);
652 	value->qsize = 16000;
653 	create_cpu_entry(1, value, 0, false);
654 }
655 
stats_poll(int interval,bool use_separators,char * prog_name,char * mprog_name,struct bpf_cpumap_val * value,bool stress_mode)656 static void stats_poll(int interval, bool use_separators, char *prog_name,
657 		       char *mprog_name, struct bpf_cpumap_val *value,
658 		       bool stress_mode)
659 {
660 	struct stats_record *record, *prev;
661 	int mprog_fd;
662 
663 	record = alloc_stats_record();
664 	prev   = alloc_stats_record();
665 	stats_collect(record);
666 
667 	/* Trick to pretty printf with thousands separators use %' */
668 	if (use_separators)
669 		setlocale(LC_NUMERIC, "en_US");
670 
671 	while (1) {
672 		swap(&prev, &record);
673 		mprog_fd = value->bpf_prog.fd;
674 		stats_collect(record);
675 		stats_print(record, prev, prog_name, mprog_name, mprog_fd);
676 		sleep(interval);
677 		if (stress_mode)
678 			stress_cpumap(value);
679 	}
680 
681 	free_stats_record(record);
682 	free_stats_record(prev);
683 }
684 
init_tracepoints(struct bpf_object * obj)685 static int init_tracepoints(struct bpf_object *obj)
686 {
687 	struct bpf_program *prog;
688 
689 	bpf_object__for_each_program(prog, obj) {
690 		if (bpf_program__is_tracepoint(prog) != true)
691 			continue;
692 
693 		tp_links[tp_cnt] = bpf_program__attach(prog);
694 		if (libbpf_get_error(tp_links[tp_cnt])) {
695 			tp_links[tp_cnt] = NULL;
696 			return -EINVAL;
697 		}
698 		tp_cnt++;
699 	}
700 
701 	return 0;
702 }
703 
init_map_fds(struct bpf_object * obj)704 static int init_map_fds(struct bpf_object *obj)
705 {
706 	enum map_type type;
707 
708 	for (type = 0; type < NUM_MAP; type++) {
709 		map_fds[type] =
710 			bpf_object__find_map_fd_by_name(obj,
711 							map_type_strings[type]);
712 
713 		if (map_fds[type] < 0)
714 			return -ENOENT;
715 	}
716 
717 	return 0;
718 }
719 
load_cpumap_prog(char * file_name,char * prog_name,char * redir_interface,char * redir_map)720 static int load_cpumap_prog(char *file_name, char *prog_name,
721 			    char *redir_interface, char *redir_map)
722 {
723 	struct bpf_prog_load_attr prog_load_attr = {
724 		.prog_type		= BPF_PROG_TYPE_XDP,
725 		.expected_attach_type	= BPF_XDP_CPUMAP,
726 		.file = file_name,
727 	};
728 	struct bpf_program *prog;
729 	struct bpf_object *obj;
730 	int fd;
731 
732 	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
733 		return -1;
734 
735 	if (fd < 0) {
736 		fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
737 			strerror(errno));
738 		return fd;
739 	}
740 
741 	if (redir_interface && redir_map) {
742 		int err, map_fd, ifindex_out, key = 0;
743 
744 		map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
745 		if (map_fd < 0)
746 			return map_fd;
747 
748 		ifindex_out = if_nametoindex(redir_interface);
749 		if (!ifindex_out)
750 			return -1;
751 
752 		err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
753 		if (err < 0)
754 			return err;
755 	}
756 
757 	prog = bpf_object__find_program_by_title(obj, prog_name);
758 	if (!prog) {
759 		fprintf(stderr, "bpf_object__find_program_by_title failed\n");
760 		return EXIT_FAIL;
761 	}
762 
763 	return bpf_program__fd(prog);
764 }
765 
main(int argc,char ** argv)766 int main(int argc, char **argv)
767 {
768 	char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
769 	char *mprog_filename = "xdp_redirect_kern.o";
770 	char *redir_interface = NULL, *redir_map = NULL;
771 	char *mprog_name = "xdp_redirect_dummy";
772 	bool mprog_disable = false;
773 	struct bpf_prog_load_attr prog_load_attr = {
774 		.prog_type	= BPF_PROG_TYPE_UNSPEC,
775 	};
776 	struct bpf_prog_info info = {};
777 	__u32 info_len = sizeof(info);
778 	struct bpf_cpumap_val value;
779 	bool use_separators = true;
780 	bool stress_mode = false;
781 	struct bpf_program *prog;
782 	struct bpf_object *obj;
783 	int err = EXIT_FAIL;
784 	char filename[256];
785 	int added_cpus = 0;
786 	int longindex = 0;
787 	int interval = 2;
788 	int add_cpu = -1;
789 	int opt, prog_fd;
790 	int *cpu, i;
791 	__u32 qsize;
792 
793 	n_cpus = get_nprocs_conf();
794 
795 	/* Notice: choosing he queue size is very important with the
796 	 * ixgbe driver, because it's driver page recycling trick is
797 	 * dependend on pages being returned quickly.  The number of
798 	 * out-standing packets in the system must be less-than 2x
799 	 * RX-ring size.
800 	 */
801 	qsize = 128+64;
802 
803 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
804 	prog_load_attr.file = filename;
805 
806 	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
807 		return err;
808 
809 	if (prog_fd < 0) {
810 		fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
811 			strerror(errno));
812 		return err;
813 	}
814 
815 	if (init_tracepoints(obj) < 0) {
816 		fprintf(stderr, "ERR: bpf_program__attach failed\n");
817 		return err;
818 	}
819 
820 	if (init_map_fds(obj) < 0) {
821 		fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
822 		return err;
823 	}
824 	mark_cpus_unavailable();
825 
826 	cpu = malloc(n_cpus * sizeof(int));
827 	if (!cpu) {
828 		fprintf(stderr, "failed to allocate cpu array\n");
829 		return err;
830 	}
831 	memset(cpu, 0, n_cpus * sizeof(int));
832 
833 	/* Parse commands line args */
834 	while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
835 				  long_options, &longindex)) != -1) {
836 		switch (opt) {
837 		case 'd':
838 			if (strlen(optarg) >= IF_NAMESIZE) {
839 				fprintf(stderr, "ERR: --dev name too long\n");
840 				goto error;
841 			}
842 			ifname = (char *)&ifname_buf;
843 			strncpy(ifname, optarg, IF_NAMESIZE);
844 			ifindex = if_nametoindex(ifname);
845 			if (ifindex == 0) {
846 				fprintf(stderr,
847 					"ERR: --dev name unknown err(%d):%s\n",
848 					errno, strerror(errno));
849 				goto error;
850 			}
851 			break;
852 		case 's':
853 			interval = atoi(optarg);
854 			break;
855 		case 'S':
856 			xdp_flags |= XDP_FLAGS_SKB_MODE;
857 			break;
858 		case 'x':
859 			stress_mode = true;
860 			break;
861 		case 'z':
862 			use_separators = false;
863 			break;
864 		case 'p':
865 			/* Selecting eBPF prog to load */
866 			prog_name = optarg;
867 			break;
868 		case 'n':
869 			mprog_disable = true;
870 			break;
871 		case 'f':
872 			mprog_filename = optarg;
873 			break;
874 		case 'e':
875 			mprog_name = optarg;
876 			break;
877 		case 'r':
878 			redir_interface = optarg;
879 			break;
880 		case 'm':
881 			redir_map = optarg;
882 			break;
883 		case 'c':
884 			/* Add multiple CPUs */
885 			add_cpu = strtoul(optarg, NULL, 0);
886 			if (add_cpu >= n_cpus) {
887 				fprintf(stderr,
888 				"--cpu nr too large for cpumap err(%d):%s\n",
889 					errno, strerror(errno));
890 				goto error;
891 			}
892 			cpu[added_cpus++] = add_cpu;
893 			break;
894 		case 'q':
895 			qsize = atoi(optarg);
896 			break;
897 		case 'F':
898 			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
899 			break;
900 		case 'h':
901 		error:
902 		default:
903 			free(cpu);
904 			usage(argv, obj);
905 			return EXIT_FAIL_OPTION;
906 		}
907 	}
908 
909 	if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
910 		xdp_flags |= XDP_FLAGS_DRV_MODE;
911 
912 	/* Required option */
913 	if (ifindex == -1) {
914 		fprintf(stderr, "ERR: required option --dev missing\n");
915 		usage(argv, obj);
916 		err = EXIT_FAIL_OPTION;
917 		goto out;
918 	}
919 	/* Required option */
920 	if (add_cpu == -1) {
921 		fprintf(stderr, "ERR: required option --cpu missing\n");
922 		fprintf(stderr, " Specify multiple --cpu option to add more\n");
923 		usage(argv, obj);
924 		err = EXIT_FAIL_OPTION;
925 		goto out;
926 	}
927 
928 	value.bpf_prog.fd = 0;
929 	if (!mprog_disable)
930 		value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
931 						     redir_interface, redir_map);
932 	if (value.bpf_prog.fd < 0) {
933 		err = value.bpf_prog.fd;
934 		goto out;
935 	}
936 	value.qsize = qsize;
937 
938 	for (i = 0; i < added_cpus; i++)
939 		create_cpu_entry(cpu[i], &value, i, true);
940 
941 	/* Remove XDP program when program is interrupted or killed */
942 	signal(SIGINT, int_exit);
943 	signal(SIGTERM, int_exit);
944 
945 	prog = bpf_object__find_program_by_title(obj, prog_name);
946 	if (!prog) {
947 		fprintf(stderr, "bpf_object__find_program_by_title failed\n");
948 		goto out;
949 	}
950 
951 	prog_fd = bpf_program__fd(prog);
952 	if (prog_fd < 0) {
953 		fprintf(stderr, "bpf_program__fd failed\n");
954 		goto out;
955 	}
956 
957 	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
958 		fprintf(stderr, "link set xdp fd failed\n");
959 		err = EXIT_FAIL_XDP;
960 		goto out;
961 	}
962 
963 	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
964 	if (err) {
965 		printf("can't get prog info - %s\n", strerror(errno));
966 		goto out;
967 	}
968 	prog_id = info.id;
969 
970 	stats_poll(interval, use_separators, prog_name, mprog_name,
971 		   &value, stress_mode);
972 
973 	err = EXIT_OK;
974 out:
975 	free(cpu);
976 	return err;
977 }
978