xref: /freebsd/tools/tools/netmap/pkt-gen.c (revision a3557ef0)
1 /*
2  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3  * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $
30  *
31  * Example program to show how to build a multithreaded packet
32  * source/sink using the netmap device.
33  *
34  * In this example we create a programmable number of threads
35  * to take care of all the queues of the interface used to
36  * send or receive traffic.
37  *
38  */
39 
40 #define _GNU_SOURCE	/* for CPU_SET() */
41 #include <stdio.h>
42 #define NETMAP_WITH_LIBS
43 #include <net/netmap_user.h>
44 
45 
46 #include <ctype.h>	// isprint()
47 #include <unistd.h>	// sysconf()
48 #include <sys/poll.h>
49 #include <arpa/inet.h>	/* ntohs */
50 #ifndef _WIN32
51 #include <sys/sysctl.h>	/* sysctl */
52 #endif
53 #include <ifaddrs.h>	/* getifaddrs */
54 #include <net/ethernet.h>
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/udp.h>
58 #include <netinet/ip6.h>
59 #ifdef linux
60 #define IPV6_VERSION	0x60
61 #define IPV6_DEFHLIM	64
62 #endif
63 #include <assert.h>
64 #include <math.h>
65 
66 #include <pthread.h>
67 
68 #ifndef NO_PCAP
69 #include <pcap/pcap.h>
70 #endif
71 
72 #include "ctrs.h"
73 
74 static void usage(int);
75 
76 #ifdef _WIN32
77 #define cpuset_t        DWORD_PTR   //uint64_t
78 static inline void CPU_ZERO(cpuset_t *p)
79 {
80 	*p = 0;
81 }
82 
83 static inline void CPU_SET(uint32_t i, cpuset_t *p)
84 {
85 	*p |= 1<< (i & 0x3f);
86 }
87 
88 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c)    //((void)a, 0)
89 #define TAP_CLONEDEV	"/dev/tap"
90 #define AF_LINK	18	//defined in winsocks.h
91 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
92 #include <net/if_dl.h>
93 
94 /*
95  * Convert an ASCII representation of an ethernet address to
96  * binary form.
97  */
98 struct ether_addr *
99 ether_aton(const char *a)
100 {
101 	int i;
102 	static struct ether_addr o;
103 	unsigned int o0, o1, o2, o3, o4, o5;
104 
105 	i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5);
106 
107 	if (i != 6)
108 		return (NULL);
109 
110 	o.octet[0]=o0;
111 	o.octet[1]=o1;
112 	o.octet[2]=o2;
113 	o.octet[3]=o3;
114 	o.octet[4]=o4;
115 	o.octet[5]=o5;
116 
117 	return ((struct ether_addr *)&o);
118 }
119 
120 /*
121  * Convert a binary representation of an ethernet address to
122  * an ASCII string.
123  */
124 char *
125 ether_ntoa(const struct ether_addr *n)
126 {
127 	int i;
128 	static char a[18];
129 
130 	i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x",
131 	    n->octet[0], n->octet[1], n->octet[2],
132 	    n->octet[3], n->octet[4], n->octet[5]);
133 	return (i < 17 ? NULL : (char *)&a);
134 }
135 #endif /* _WIN32 */
136 
137 #ifdef linux
138 
139 #define cpuset_t        cpu_set_t
140 
141 #define ifr_flagshigh  ifr_flags        /* only the low 16 bits here */
142 #define IFF_PPROMISC   IFF_PROMISC      /* IFF_PPROMISC does not exist */
143 #include <linux/ethtool.h>
144 #include <linux/sockios.h>
145 
146 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
147 #include <netinet/ether.h>      /* ether_aton */
148 #include <linux/if_packet.h>    /* sockaddr_ll */
149 #endif  /* linux */
150 
151 #ifdef __FreeBSD__
152 #include <sys/endian.h> /* le64toh */
153 #include <machine/param.h>
154 
155 #include <pthread_np.h> /* pthread w/ affinity */
156 #include <sys/cpuset.h> /* cpu_set */
157 #include <net/if_dl.h>  /* LLADDR */
158 #endif  /* __FreeBSD__ */
159 
160 #ifdef __APPLE__
161 
162 #define cpuset_t        uint64_t        // XXX
163 static inline void CPU_ZERO(cpuset_t *p)
164 {
165 	*p = 0;
166 }
167 
168 static inline void CPU_SET(uint32_t i, cpuset_t *p)
169 {
170 	*p |= 1<< (i & 0x3f);
171 }
172 
173 #define pthread_setaffinity_np(a, b, c) ((void)a, 0)
174 
175 #define ifr_flagshigh  ifr_flags        // XXX
176 #define IFF_PPROMISC   IFF_PROMISC
177 #include <net/if_dl.h>  /* LLADDR */
178 #define clock_gettime(a,b)      \
179 	do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
180 #endif  /* __APPLE__ */
181 
182 const char *default_payload="netmap pkt-gen DIRECT payload\n"
183 	"http://info.iet.unipi.it/~luigi/netmap/ ";
184 
185 const char *indirect_payload="netmap pkt-gen indirect payload\n"
186 	"http://info.iet.unipi.it/~luigi/netmap/ ";
187 
188 int verbose = 0;
189 int normalize = 1;
190 
191 #define VIRT_HDR_1	10	/* length of a base vnet-hdr */
192 #define VIRT_HDR_2	12	/* length of the extenede vnet-hdr */
193 #define VIRT_HDR_MAX	VIRT_HDR_2
194 struct virt_header {
195 	uint8_t fields[VIRT_HDR_MAX];
196 };
197 
198 #define MAX_BODYSIZE	65536
199 
200 struct pkt {
201 	struct virt_header vh;
202 	struct ether_header eh;
203 	union {
204 		struct {
205 			struct ip ip;
206 			struct udphdr udp;
207 			uint8_t body[MAX_BODYSIZE];	/* hardwired */
208 		} ipv4;
209 		struct {
210 			struct ip6_hdr ip;
211 			struct udphdr udp;
212 			uint8_t body[MAX_BODYSIZE];	/* hardwired */
213 		} ipv6;
214 	};
215 } __attribute__((__packed__));
216 
217 #define	PKT(p, f, af)	\
218     ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f)
219 
220 struct ip_range {
221 	char *name;
222 	union {
223 		struct {
224 			uint32_t start, end; /* same as struct in_addr */
225 		} ipv4;
226 		struct {
227 			struct in6_addr start, end;
228 			uint8_t sgroup, egroup;
229 		} ipv6;
230 	};
231 	uint16_t port0, port1;
232 };
233 
234 struct mac_range {
235 	char *name;
236 	struct ether_addr start, end;
237 };
238 
239 /* ifname can be netmap:foo-xxxx */
240 #define MAX_IFNAMELEN	64	/* our buffer for ifname */
241 #define MAX_PKTSIZE	MAX_BODYSIZE	/* XXX: + IP_HDR + ETH_HDR */
242 
243 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */
244 struct tstamp {
245 	uint32_t sec;
246 	uint32_t nsec;
247 };
248 
249 /*
250  * global arguments for all threads
251  */
252 
253 struct glob_arg {
254 	int af;		/* address family AF_INET/AF_INET6 */
255 	struct ip_range src_ip;
256 	struct ip_range dst_ip;
257 	struct mac_range dst_mac;
258 	struct mac_range src_mac;
259 	int pkt_size;
260 	int pkt_min_size;
261 	int burst;
262 	int forever;
263 	uint64_t npackets;	/* total packets to send */
264 	int frags;		/* fragments per packet */
265 	u_int frag_size;	/* size of each fragment */
266 	int nthreads;
267 	int cpus;	/* cpus used for running */
268 	int system_cpus;	/* cpus on the system */
269 
270 	int options;	/* testing */
271 #define OPT_PREFETCH	1
272 #define OPT_ACCESS	2
273 #define OPT_COPY	4
274 #define OPT_MEMCPY	8
275 #define OPT_TS		16	/* add a timestamp */
276 #define OPT_INDIRECT	32	/* use indirect buffers, tx only */
277 #define OPT_DUMP	64	/* dump rx/tx traffic */
278 #define OPT_RUBBISH	256	/* send wathever the buffers contain */
279 #define OPT_RANDOM_SRC  512
280 #define OPT_RANDOM_DST  1024
281 #define OPT_PPS_STATS   2048
282 	int dev_type;
283 #ifndef NO_PCAP
284 	pcap_t *p;
285 #endif
286 
287 	int tx_rate;
288 	struct timespec tx_period;
289 
290 	int affinity;
291 	int main_fd;
292 	struct nm_desc *nmd;
293 	int report_interval;		/* milliseconds between prints */
294 	void *(*td_body)(void *);
295 	int td_type;
296 	void *mmap_addr;
297 	char ifname[MAX_IFNAMELEN];
298 	char *nmr_config;
299 	int dummy_send;
300 	int virt_header;	/* send also the virt_header */
301 	char *packet_file;	/* -P option */
302 #define	STATS_WIN	15
303 	int win_idx;
304 	int64_t win[STATS_WIN];
305 	int wait_link;
306 	int framing;		/* #bits of framing (for bw output) */
307 };
308 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
309 
310 enum {
311 	TD_TYPE_SENDER = 1,
312 	TD_TYPE_RECEIVER,
313 	TD_TYPE_OTHER,
314 };
315 
316 /*
317  * Arguments for a new thread. The same structure is used by
318  * the source and the sink
319  */
320 struct targ {
321 	struct glob_arg *g;
322 	int used;
323 	int completed;
324 	int cancel;
325 	int fd;
326 	struct nm_desc *nmd;
327 	/* these ought to be volatile, but they are
328 	 * only sampled and errors should not accumulate
329 	 */
330 	struct my_ctrs ctr;
331 
332 	struct timespec tic, toc;
333 	int me;
334 	pthread_t thread;
335 	int affinity;
336 
337 	struct pkt pkt;
338 	void *frame;
339 	uint16_t seed[3];
340 	u_int frags;
341 	u_int frag_size;
342 };
343 
344 static __inline uint16_t
345 cksum_add(uint16_t sum, uint16_t a)
346 {
347 	uint16_t res;
348 
349 	res = sum + a;
350 	return (res + (res < a));
351 }
352 
353 static void
354 extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port)
355 {
356 	struct in_addr a;
357 	char *pp;
358 
359 	pp = strchr(name, ':');
360 	if (pp != NULL) {	/* do we have ports ? */
361 		*pp++ = '\0';
362 		*port = (uint16_t)strtol(pp, NULL, 0);
363 	}
364 
365 	inet_pton(AF_INET, name, &a);
366 	*addr = ntohl(a.s_addr);
367 }
368 
369 static void
370 extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port,
371     uint8_t *group)
372 {
373 	char *pp;
374 
375 	/*
376 	 * We accept IPv6 address in the following form:
377 	 *  group@[2001:DB8::1001]:port	(w/ brackets and port)
378 	 *  group@[2001:DB8::1]		(w/ brackets and w/o port)
379 	 *  group@2001:DB8::1234	(w/o brackets and w/o port)
380 	 */
381 	pp = strchr(name, '@');
382 	if (pp != NULL) {
383 		*pp++ = '\0';
384 		*group = (uint8_t)strtol(name, NULL, 0);
385 		if (*group > 7)
386 			*group = 7;
387 		name = pp;
388 	}
389 	if (name[0] == '[')
390 		name++;
391 	pp = strchr(name, ']');
392 	if (pp != NULL)
393 		*pp++ = '\0';
394 	if (pp != NULL && *pp != ':')
395 		pp = NULL;
396 	if (pp != NULL) {	/* do we have ports ? */
397 		*pp++ = '\0';
398 		*port = (uint16_t)strtol(pp, NULL, 0);
399 	}
400 	inet_pton(AF_INET6, name, addr);
401 }
402 /*
403  * extract the extremes from a range of ipv4 addresses.
404  * addr_lo[-addr_hi][:port_lo[-port_hi]]
405  */
406 static int
407 extract_ip_range(struct ip_range *r, int af)
408 {
409 	char *name, *ap, start[INET6_ADDRSTRLEN];
410 	char end[INET6_ADDRSTRLEN];
411 	struct in_addr a;
412 	uint32_t tmp;
413 
414 	if (verbose)
415 		D("extract IP range from %s", r->name);
416 
417 	name = strdup(r->name);
418 	if (name == NULL) {
419 		D("strdup failed");
420 		usage(-1);
421 	}
422 	/* the first - splits start/end of range */
423 	ap = strchr(name, '-');
424 	if (ap != NULL)
425 		*ap++ = '\0';
426 	r->port0 = 1234;	/* default port */
427 	if (af == AF_INET6) {
428 		r->ipv6.sgroup = 7; /* default group */
429 		extract_ipv6_addr(name, &r->ipv6.start, &r->port0,
430 		    &r->ipv6.sgroup);
431 	} else
432 		extract_ipv4_addr(name, &r->ipv4.start, &r->port0);
433 
434 	r->port1 = r->port0;
435 	if (af == AF_INET6) {
436 		if (ap != NULL) {
437 			r->ipv6.egroup = r->ipv6.sgroup;
438 			extract_ipv6_addr(ap, &r->ipv6.end, &r->port1,
439 			    &r->ipv6.egroup);
440 		} else {
441 			r->ipv6.end = r->ipv6.start;
442 			r->ipv6.egroup = r->ipv6.sgroup;
443 		}
444 	} else {
445 		if (ap != NULL) {
446 			extract_ipv4_addr(ap, &r->ipv4.end, &r->port1);
447 			if (r->ipv4.start > r->ipv4.end) {
448 				tmp = r->ipv4.end;
449 				r->ipv4.end = r->ipv4.start;
450 				r->ipv4.start = tmp;
451 			}
452 		} else
453 			r->ipv4.end = r->ipv4.start;
454 	}
455 
456 	if (r->port0 > r->port1) {
457 		tmp = r->port0;
458 		r->port0 = r->port1;
459 		r->port1 = tmp;
460 	}
461 	if (af == AF_INET) {
462 		a.s_addr = htonl(r->ipv4.start);
463 		inet_ntop(af, &a, start, sizeof(start));
464 		a.s_addr = htonl(r->ipv4.end);
465 		inet_ntop(af, &a, end, sizeof(end));
466 	} else {
467 		inet_ntop(af, &r->ipv6.start, start, sizeof(start));
468 		inet_ntop(af, &r->ipv6.end, end, sizeof(end));
469 	}
470 	if (af == AF_INET)
471 		D("range is %s:%d to %s:%d", start, r->port0, end, r->port1);
472 	else
473 		D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup,
474 		    start, r->port0, r->ipv6.egroup, end, r->port1);
475 
476 	free(name);
477 	if (r->port0 != r->port1 ||
478 	    (af == AF_INET && r->ipv4.start != r->ipv4.end) ||
479 	    (af == AF_INET6 &&
480 		!IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end)))
481 		return (OPT_COPY);
482 	return (0);
483 }
484 
485 static int
486 extract_mac_range(struct mac_range *r)
487 {
488 	struct ether_addr *e;
489 	if (verbose)
490 	    D("extract MAC range from %s", r->name);
491 
492 	e = ether_aton(r->name);
493 	if (e == NULL) {
494 		D("invalid MAC address '%s'", r->name);
495 		return 1;
496 	}
497 	bcopy(e, &r->start, 6);
498 	bcopy(e, &r->end, 6);
499 #if 0
500 	bcopy(targ->src_mac, eh->ether_shost, 6);
501 	p = index(targ->g->src_mac, '-');
502 	if (p)
503 		targ->src_mac_range = atoi(p+1);
504 
505 	bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
506 	bcopy(targ->dst_mac, eh->ether_dhost, 6);
507 	p = index(targ->g->dst_mac, '-');
508 	if (p)
509 		targ->dst_mac_range = atoi(p+1);
510 #endif
511 	if (verbose)
512 		D("%s starts at %s", r->name, ether_ntoa(&r->start));
513 	return 0;
514 }
515 
516 static int
517 get_if_mtu(const struct glob_arg *g)
518 {
519 	char ifname[IFNAMSIZ];
520 	struct ifreq ifreq;
521 	int s, ret;
522 
523 	if (!strncmp(g->ifname, "netmap:", 7) && !strchr(g->ifname, '{')
524 			&& !strchr(g->ifname, '}')) {
525 		/* Parse the interface name and ask the kernel for the
526 		 * MTU value. */
527 		strncpy(ifname, g->ifname+7, IFNAMSIZ-1);
528 		ifname[strcspn(ifname, "-*^{}/@")] = '\0';
529 
530 		s = socket(AF_INET, SOCK_DGRAM, 0);
531 		if (s < 0) {
532 			D("socket() failed: %s", strerror(errno));
533 			return s;
534 		}
535 
536 		memset(&ifreq, 0, sizeof(ifreq));
537 		strncpy(ifreq.ifr_name, ifname, IFNAMSIZ);
538 
539 		ret = ioctl(s, SIOCGIFMTU, &ifreq);
540 		if (ret) {
541 			D("ioctl(SIOCGIFMTU) failed: %s", strerror(errno));
542 		}
543 
544 		return ifreq.ifr_mtu;
545 	}
546 
547 	/* This is a pipe or a VALE port, where the MTU is very large,
548 	 * so we use some practical limit. */
549 	return 65536;
550 }
551 
552 static struct targ *targs;
553 static int global_nthreads;
554 
555 /* control-C handler */
556 static void
557 sigint_h(int sig)
558 {
559 	int i;
560 
561 	(void)sig;	/* UNUSED */
562 	D("received control-C on thread %p", (void *)pthread_self());
563 	for (i = 0; i < global_nthreads; i++) {
564 		targs[i].cancel = 1;
565 	}
566 }
567 
568 /* sysctl wrapper to return the number of active CPUs */
569 static int
570 system_ncpus(void)
571 {
572 	int ncpus;
573 #if defined (__FreeBSD__)
574 	int mib[2] = { CTL_HW, HW_NCPU };
575 	size_t len = sizeof(mib);
576 	sysctl(mib, 2, &ncpus, &len, NULL, 0);
577 #elif defined(linux)
578 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
579 #elif defined(_WIN32)
580 	{
581 		SYSTEM_INFO sysinfo;
582 		GetSystemInfo(&sysinfo);
583 		ncpus = sysinfo.dwNumberOfProcessors;
584 	}
585 #else /* others */
586 	ncpus = 1;
587 #endif /* others */
588 	return (ncpus);
589 }
590 
591 #ifdef __linux__
592 #define sockaddr_dl    sockaddr_ll
593 #define sdl_family     sll_family
594 #define AF_LINK        AF_PACKET
595 #define LLADDR(s)      s->sll_addr;
596 #include <linux/if_tun.h>
597 #define TAP_CLONEDEV	"/dev/net/tun"
598 #endif /* __linux__ */
599 
600 #ifdef __FreeBSD__
601 #include <net/if_tun.h>
602 #define TAP_CLONEDEV	"/dev/tap"
603 #endif /* __FreeBSD */
604 
605 #ifdef __APPLE__
606 // #warning TAP not supported on apple ?
607 #include <net/if_utun.h>
608 #define TAP_CLONEDEV	"/dev/tap"
609 #endif /* __APPLE__ */
610 
611 
612 /*
613  * parse the vale configuration in conf and put it in nmr.
614  * Return the flag set if necessary.
615  * The configuration may consist of 1 to 4 numbers separated
616  * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings.
617  * Missing numbers or zeroes stand for default values.
618  * As an additional convenience, if exactly one number
619  * is specified, then this is assigned to both #tx-slots and #rx-slots.
620  * If there is no 4th number, then the 3rd is assigned to both #tx-rings
621  * and #rx-rings.
622  */
623 int
624 parse_nmr_config(const char* conf, struct nmreq *nmr)
625 {
626 	char *w, *tok;
627 	int i, v;
628 
629 	if (conf == NULL || ! *conf)
630 		return 0;
631 	nmr->nr_tx_rings = nmr->nr_rx_rings = 0;
632 	nmr->nr_tx_slots = nmr->nr_rx_slots = 0;
633 	w = strdup(conf);
634 	for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) {
635 		v = atoi(tok);
636 		switch (i) {
637 		case 0:
638 			nmr->nr_tx_slots = nmr->nr_rx_slots = v;
639 			break;
640 		case 1:
641 			nmr->nr_rx_slots = v;
642 			break;
643 		case 2:
644 			nmr->nr_tx_rings = nmr->nr_rx_rings = v;
645 			break;
646 		case 3:
647 			nmr->nr_rx_rings = v;
648 			break;
649 		default:
650 			D("ignored config: %s", tok);
651 			break;
652 		}
653 	}
654 	D("txr %d txd %d rxr %d rxd %d",
655 			nmr->nr_tx_rings, nmr->nr_tx_slots,
656 			nmr->nr_rx_rings, nmr->nr_rx_slots);
657 	free(w);
658 	return (nmr->nr_tx_rings || nmr->nr_tx_slots ||
659 		nmr->nr_rx_rings || nmr->nr_rx_slots) ?
660 		NM_OPEN_RING_CFG : 0;
661 }
662 
663 
664 /*
665  * locate the src mac address for our interface, put it
666  * into the user-supplied buffer. return 0 if ok, -1 on error.
667  */
668 static int
669 source_hwaddr(const char *ifname, char *buf)
670 {
671 	struct ifaddrs *ifaphead, *ifap;
672 
673 	if (getifaddrs(&ifaphead) != 0) {
674 		D("getifaddrs %s failed", ifname);
675 		return (-1);
676 	}
677 
678 	for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
679 		struct sockaddr_dl *sdl =
680 			(struct sockaddr_dl *)ifap->ifa_addr;
681 		uint8_t *mac;
682 
683 		if (!sdl || sdl->sdl_family != AF_LINK)
684 			continue;
685 		if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0)
686 			continue;
687 		mac = (uint8_t *)LLADDR(sdl);
688 		sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
689 			mac[0], mac[1], mac[2],
690 			mac[3], mac[4], mac[5]);
691 		if (verbose)
692 			D("source hwaddr %s", buf);
693 		break;
694 	}
695 	freeifaddrs(ifaphead);
696 	return ifap ? 0 : 1;
697 }
698 
699 
700 /* set the thread affinity. */
701 static int
702 setaffinity(pthread_t me, int i)
703 {
704 	cpuset_t cpumask;
705 
706 	if (i == -1)
707 		return 0;
708 
709 	/* Set thread affinity affinity.*/
710 	CPU_ZERO(&cpumask);
711 	CPU_SET(i, &cpumask);
712 
713 	if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
714 		D("Unable to set affinity: %s", strerror(errno));
715 		return 1;
716 	}
717 	return 0;
718 }
719 
720 
721 /* Compute the checksum of the given ip header. */
722 static uint32_t
723 checksum(const void *data, uint16_t len, uint32_t sum)
724 {
725 	const uint8_t *addr = data;
726 	uint32_t i;
727 
728 	/* Checksum all the pairs of bytes first... */
729 	for (i = 0; i < (len & ~1U); i += 2) {
730 		sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
731 		if (sum > 0xFFFF)
732 			sum -= 0xFFFF;
733 	}
734 	/*
735 	 * If there's a single byte left over, checksum it, too.
736 	 * Network byte order is big-endian, so the remaining byte is
737 	 * the high byte.
738 	 */
739 	if (i < len) {
740 		sum += addr[i] << 8;
741 		if (sum > 0xFFFF)
742 			sum -= 0xFFFF;
743 	}
744 	return sum;
745 }
746 
747 static uint16_t
748 wrapsum(uint32_t sum)
749 {
750 	sum = ~sum & 0xFFFF;
751 	return (htons(sum));
752 }
753 
754 /* Check the payload of the packet for errors (use it for debug).
755  * Look for consecutive ascii representations of the size of the packet.
756  */
757 static void
758 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur)
759 {
760 	char buf[128];
761 	int i, j, i0;
762 	const unsigned char *p = (const unsigned char *)_p;
763 
764 	/* get the length in ASCII of the length of the packet. */
765 
766 	printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n",
767 		ring, cur, ring->slot[cur].buf_idx,
768 		ring->slot[cur].flags, len);
769 	/* hexdump routine */
770 	for (i = 0; i < len; ) {
771 		memset(buf, ' ', sizeof(buf));
772 		sprintf(buf, "%5d: ", i);
773 		i0 = i;
774 		for (j=0; j < 16 && i < len; i++, j++)
775 			sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i]));
776 		i = i0;
777 		for (j=0; j < 16 && i < len; i++, j++)
778 			sprintf(buf+7+j + 48, "%c",
779 				isprint(p[i]) ? p[i] : '.');
780 		printf("%s\n", buf);
781 	}
782 }
783 
784 /*
785  * Fill a packet with some payload.
786  * We create a UDP packet so the payload starts at
787  *	14+20+8 = 42 bytes.
788  */
789 #ifdef __linux__
790 #define uh_sport source
791 #define uh_dport dest
792 #define uh_ulen len
793 #define uh_sum check
794 #endif /* linux */
795 
796 static void
797 update_ip(struct pkt *pkt, struct targ *t)
798 {
799 	struct glob_arg *g = t->g;
800 	struct ip ip;
801 	struct udphdr udp;
802 	uint32_t oaddr, naddr;
803 	uint16_t oport, nport;
804 	uint16_t ip_sum, udp_sum;
805 
806 	memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
807 	memcpy(&udp, &pkt->ipv4.udp, sizeof(udp));
808 	do {
809 		ip_sum = udp_sum = 0;
810 		naddr = oaddr = ntohl(ip.ip_src.s_addr);
811 		nport = oport = ntohs(udp.uh_sport);
812 		if (g->options & OPT_RANDOM_SRC) {
813 			ip.ip_src.s_addr = nrand48(t->seed);
814 			udp.uh_sport = nrand48(t->seed);
815 			naddr = ntohl(ip.ip_src.s_addr);
816 			nport = ntohs(udp.uh_sport);
817 			break;
818 		}
819 		if (oport < g->src_ip.port1) {
820 			nport = oport + 1;
821 			udp.uh_sport = htons(nport);
822 			break;
823 		}
824 		nport = g->src_ip.port0;
825 		udp.uh_sport = htons(nport);
826 		if (oaddr < g->src_ip.ipv4.end) {
827 			naddr = oaddr + 1;
828 			ip.ip_src.s_addr = htonl(naddr);
829 			break;
830 		}
831 		naddr = g->src_ip.ipv4.start;
832 		ip.ip_src.s_addr = htonl(naddr);
833 	} while (0);
834 	/* update checksums if needed */
835 	if (oaddr != naddr) {
836 		ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
837 		ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
838 		ip_sum = cksum_add(ip_sum, naddr >> 16);
839 		ip_sum = cksum_add(ip_sum, naddr & 0xffff);
840 	}
841 	if (oport != nport) {
842 		udp_sum = cksum_add(udp_sum, ~oport);
843 		udp_sum = cksum_add(udp_sum, nport);
844 	}
845 	do {
846 		naddr = oaddr = ntohl(ip.ip_dst.s_addr);
847 		nport = oport = ntohs(udp.uh_dport);
848 		if (g->options & OPT_RANDOM_DST) {
849 			ip.ip_dst.s_addr = nrand48(t->seed);
850 			udp.uh_dport = nrand48(t->seed);
851 			naddr = ntohl(ip.ip_dst.s_addr);
852 			nport = ntohs(udp.uh_dport);
853 			break;
854 		}
855 		if (oport < g->dst_ip.port1) {
856 			nport = oport + 1;
857 			udp.uh_dport = htons(nport);
858 			break;
859 		}
860 		nport = g->dst_ip.port0;
861 		udp.uh_dport = htons(nport);
862 		if (oaddr < g->dst_ip.ipv4.end) {
863 			naddr = oaddr + 1;
864 			ip.ip_dst.s_addr = htonl(naddr);
865 			break;
866 		}
867 		naddr = g->dst_ip.ipv4.start;
868 		ip.ip_dst.s_addr = htonl(naddr);
869 	} while (0);
870 	/* update checksums */
871 	if (oaddr != naddr) {
872 		ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
873 		ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
874 		ip_sum = cksum_add(ip_sum, naddr >> 16);
875 		ip_sum = cksum_add(ip_sum, naddr & 0xffff);
876 	}
877 	if (oport != nport) {
878 		udp_sum = cksum_add(udp_sum, ~oport);
879 		udp_sum = cksum_add(udp_sum, nport);
880 	}
881 	if (udp_sum != 0)
882 		udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum));
883 	if (ip_sum != 0) {
884 		ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum));
885 		udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum));
886 	}
887 	memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
888 	memcpy(&pkt->ipv4.udp, &udp, sizeof(udp));
889 }
890 
891 #ifndef s6_addr16
892 #define	s6_addr16	__u6_addr.__u6_addr16
893 #endif
894 static void
895 update_ip6(struct pkt *pkt, struct targ *t)
896 {
897 	struct glob_arg *g = t->g;
898 	struct ip6_hdr ip6;
899 	struct udphdr udp;
900 	uint16_t udp_sum;
901 	uint16_t oaddr, naddr;
902 	uint16_t oport, nport;
903 	uint8_t group;
904 
905 	memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6));
906 	memcpy(&udp, &pkt->ipv6.udp, sizeof(udp));
907 	do {
908 		udp_sum = 0;
909 		group = g->src_ip.ipv6.sgroup;
910 		naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]);
911 		nport = oport = ntohs(udp.uh_sport);
912 		if (g->options & OPT_RANDOM_SRC) {
913 			ip6.ip6_src.s6_addr16[group] = nrand48(t->seed);
914 			udp.uh_sport = nrand48(t->seed);
915 			naddr = ntohs(ip6.ip6_src.s6_addr16[group]);
916 			nport = ntohs(udp.uh_sport);
917 			break;
918 		}
919 		if (oport < g->src_ip.port1) {
920 			nport = oport + 1;
921 			udp.uh_sport = htons(nport);
922 			break;
923 		}
924 		nport = g->src_ip.port0;
925 		udp.uh_sport = htons(nport);
926 		if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) {
927 			naddr = oaddr + 1;
928 			ip6.ip6_src.s6_addr16[group] = htons(naddr);
929 			break;
930 		}
931 		naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]);
932 		ip6.ip6_src.s6_addr16[group] = htons(naddr);
933 	} while (0);
934 	/* update checksums if needed */
935 	if (oaddr != naddr)
936 		udp_sum = cksum_add(~oaddr, naddr);
937 	if (oport != nport)
938 		udp_sum = cksum_add(udp_sum,
939 		    cksum_add(~oport, nport));
940 	do {
941 		group = g->dst_ip.ipv6.egroup;
942 		naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
943 		nport = oport = ntohs(udp.uh_dport);
944 		if (g->options & OPT_RANDOM_DST) {
945 			ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed);
946 			udp.uh_dport = nrand48(t->seed);
947 			naddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
948 			nport = ntohs(udp.uh_dport);
949 			break;
950 		}
951 		if (oport < g->dst_ip.port1) {
952 			nport = oport + 1;
953 			udp.uh_dport = htons(nport);
954 			break;
955 		}
956 		nport = g->dst_ip.port0;
957 		udp.uh_dport = htons(nport);
958 		if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) {
959 			naddr = oaddr + 1;
960 			ip6.ip6_dst.s6_addr16[group] = htons(naddr);
961 			break;
962 		}
963 		naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]);
964 		ip6.ip6_dst.s6_addr16[group] = htons(naddr);
965 	} while (0);
966 	/* update checksums */
967 	if (oaddr != naddr)
968 		udp_sum = cksum_add(udp_sum,
969 		    cksum_add(~oaddr, naddr));
970 	if (oport != nport)
971 		udp_sum = cksum_add(udp_sum,
972 		    cksum_add(~oport, nport));
973 	if (udp_sum != 0)
974 		udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum);
975 	memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
976 	memcpy(&pkt->ipv6.udp, &udp, sizeof(udp));
977 }
978 
979 static void
980 update_addresses(struct pkt *pkt, struct targ *t)
981 {
982 
983 	if (t->g->af == AF_INET)
984 		update_ip(pkt, t);
985 	else
986 		update_ip6(pkt, t);
987 }
988 /*
989  * initialize one packet and prepare for the next one.
990  * The copy could be done better instead of repeating it each time.
991  */
992 static void
993 initialize_packet(struct targ *targ)
994 {
995 	struct pkt *pkt = &targ->pkt;
996 	struct ether_header *eh;
997 	struct ip6_hdr ip6;
998 	struct ip ip;
999 	struct udphdr udp;
1000 	void *udp_ptr;
1001 	uint16_t paylen;
1002 	uint32_t csum = 0;
1003 	const char *payload = targ->g->options & OPT_INDIRECT ?
1004 		indirect_payload : default_payload;
1005 	int i, l0 = strlen(payload);
1006 
1007 #ifndef NO_PCAP
1008 	char errbuf[PCAP_ERRBUF_SIZE];
1009 	pcap_t *file;
1010 	struct pcap_pkthdr *header;
1011 	const unsigned char *packet;
1012 
1013 	/* Read a packet from a PCAP file if asked. */
1014 	if (targ->g->packet_file != NULL) {
1015 		if ((file = pcap_open_offline(targ->g->packet_file,
1016 			    errbuf)) == NULL)
1017 			D("failed to open pcap file %s",
1018 			    targ->g->packet_file);
1019 		if (pcap_next_ex(file, &header, &packet) < 0)
1020 			D("failed to read packet from %s",
1021 			    targ->g->packet_file);
1022 		if ((targ->frame = malloc(header->caplen)) == NULL)
1023 			D("out of memory");
1024 		bcopy(packet, (unsigned char *)targ->frame, header->caplen);
1025 		targ->g->pkt_size = header->caplen;
1026 		pcap_close(file);
1027 		return;
1028 	}
1029 #endif
1030 
1031 	paylen = targ->g->pkt_size - sizeof(*eh) -
1032 	    (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6));
1033 
1034 	/* create a nice NUL-terminated string */
1035 	for (i = 0; i < paylen; i += l0) {
1036 		if (l0 > paylen - i)
1037 			l0 = paylen - i; // last round
1038 		bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0);
1039 	}
1040 	PKT(pkt, body, targ->g->af)[i - 1] = '\0';
1041 
1042 	/* prepare the headers */
1043 	eh = &pkt->eh;
1044 	bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
1045 	bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
1046 
1047 	if (targ->g->af == AF_INET) {
1048 		eh->ether_type = htons(ETHERTYPE_IP);
1049 		memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
1050 		udp_ptr = &pkt->ipv4.udp;
1051 		ip.ip_v = IPVERSION;
1052 		ip.ip_hl = sizeof(ip) >> 2;
1053 		ip.ip_id = 0;
1054 		ip.ip_tos = IPTOS_LOWDELAY;
1055 		ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh));
1056 		ip.ip_id = 0;
1057 		ip.ip_off = htons(IP_DF); /* Don't fragment */
1058 		ip.ip_ttl = IPDEFTTL;
1059 		ip.ip_p = IPPROTO_UDP;
1060 		ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start);
1061 		ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start);
1062 		ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0));
1063 		memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
1064 	} else {
1065 		eh->ether_type = htons(ETHERTYPE_IPV6);
1066 		memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6));
1067 		udp_ptr = &pkt->ipv6.udp;
1068 		ip6.ip6_flow = 0;
1069 		ip6.ip6_plen = htons(paylen);
1070 		ip6.ip6_vfc = IPV6_VERSION;
1071 		ip6.ip6_nxt = IPPROTO_UDP;
1072 		ip6.ip6_hlim = IPV6_DEFHLIM;
1073 		ip6.ip6_src = targ->g->src_ip.ipv6.start;
1074 		ip6.ip6_dst = targ->g->dst_ip.ipv6.start;
1075 	}
1076 	memcpy(&udp, udp_ptr, sizeof(udp));
1077 
1078 	udp.uh_sport = htons(targ->g->src_ip.port0);
1079 	udp.uh_dport = htons(targ->g->dst_ip.port0);
1080 	udp.uh_ulen = htons(paylen);
1081 	if (targ->g->af == AF_INET) {
1082 		/* Magic: taken from sbin/dhclient/packet.c */
1083 		udp.uh_sum = wrapsum(
1084 		    checksum(&udp, sizeof(udp),	/* udp header */
1085 		    checksum(pkt->ipv4.body,	/* udp payload */
1086 		    paylen - sizeof(udp),
1087 		    checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */
1088 			2 * sizeof(pkt->ipv4.ip.ip_src),
1089 			IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen)))));
1090 		memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
1091 	} else {
1092 		/* Save part of pseudo header checksum into csum */
1093 		csum = IPPROTO_UDP << 24;
1094 		csum = checksum(&csum, sizeof(csum), paylen);
1095 		udp.uh_sum = wrapsum(
1096 		    checksum(udp_ptr, sizeof(udp),	/* udp header */
1097 		    checksum(pkt->ipv6.body,	/* udp payload */
1098 		    paylen - sizeof(udp),
1099 		    checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */
1100 			2 * sizeof(pkt->ipv6.ip.ip6_src), csum))));
1101 		memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
1102 	}
1103 	memcpy(udp_ptr, &udp, sizeof(udp));
1104 
1105 	bzero(&pkt->vh, sizeof(pkt->vh));
1106 	// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
1107 }
1108 
1109 static void
1110 get_vnet_hdr_len(struct glob_arg *g)
1111 {
1112 	struct nmreq req;
1113 	int err;
1114 
1115 	memset(&req, 0, sizeof(req));
1116 	bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name));
1117 	req.nr_version = NETMAP_API;
1118 	req.nr_cmd = NETMAP_VNET_HDR_GET;
1119 	err = ioctl(g->main_fd, NIOCREGIF, &req);
1120 	if (err) {
1121 		D("Unable to get virtio-net header length");
1122 		return;
1123 	}
1124 
1125 	g->virt_header = req.nr_arg1;
1126 	if (g->virt_header) {
1127 		D("Port requires virtio-net header, length = %d",
1128 		  g->virt_header);
1129 	}
1130 }
1131 
1132 static void
1133 set_vnet_hdr_len(struct glob_arg *g)
1134 {
1135 	int err, l = g->virt_header;
1136 	struct nmreq req;
1137 
1138 	if (l == 0)
1139 		return;
1140 
1141 	memset(&req, 0, sizeof(req));
1142 	bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name));
1143 	req.nr_version = NETMAP_API;
1144 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
1145 	req.nr_arg1 = l;
1146 	err = ioctl(g->main_fd, NIOCREGIF, &req);
1147 	if (err) {
1148 		D("Unable to set virtio-net header length %d", l);
1149 	}
1150 }
1151 
1152 /*
1153  * create and enqueue a batch of packets on a ring.
1154  * On the last one set NS_REPORT to tell the driver to generate
1155  * an interrupt when done.
1156  */
1157 static int
1158 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
1159 		int size, struct targ *t, u_int count, int options)
1160 {
1161 	u_int n, sent, head = ring->head;
1162 	u_int frags = t->frags;
1163 	u_int frag_size = t->frag_size;
1164 	struct netmap_slot *slot = &ring->slot[head];
1165 
1166 	n = nm_ring_space(ring);
1167 #if 0
1168 	if (options & (OPT_COPY | OPT_PREFETCH) ) {
1169 		for (sent = 0; sent < count; sent++) {
1170 			struct netmap_slot *slot = &ring->slot[head];
1171 			char *p = NETMAP_BUF(ring, slot->buf_idx);
1172 
1173 			__builtin_prefetch(p);
1174 			head = nm_ring_next(ring, head);
1175 		}
1176 		head = ring->head;
1177 	}
1178 #endif
1179 	for (sent = 0; sent < count && n >= frags; sent++, n--) {
1180 		char *p;
1181 		int buf_changed;
1182 		u_int tosend = size;
1183 
1184 		slot = &ring->slot[head];
1185 		p = NETMAP_BUF(ring, slot->buf_idx);
1186 		buf_changed = slot->flags & NS_BUF_CHANGED;
1187 
1188 		slot->flags = 0;
1189 		if (options & OPT_RUBBISH) {
1190 			/* do nothing */
1191 		} else if (options & OPT_INDIRECT) {
1192 			slot->flags |= NS_INDIRECT;
1193 			slot->ptr = (uint64_t)((uintptr_t)frame);
1194 		} else if (frags > 1) {
1195 			u_int i;
1196 			const char *f = frame;
1197 			char *fp = p;
1198 			for (i = 0; i < frags - 1; i++) {
1199 				memcpy(fp, f, frag_size);
1200 				slot->len = frag_size;
1201 				slot->flags = NS_MOREFRAG;
1202 				if (options & OPT_DUMP)
1203 					dump_payload(fp, frag_size, ring, head);
1204 				tosend -= frag_size;
1205 				f += frag_size;
1206 				head = nm_ring_next(ring, head);
1207 				slot = &ring->slot[head];
1208 				fp = NETMAP_BUF(ring, slot->buf_idx);
1209 			}
1210 			n -= (frags - 1);
1211 			p = fp;
1212 			slot->flags = 0;
1213 			memcpy(p, f, tosend);
1214 			update_addresses(pkt, t);
1215 		} else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) {
1216 			if (options & OPT_COPY)
1217 				nm_pkt_copy(frame, p, size);
1218 			else
1219 				memcpy(p, frame, size);
1220 			update_addresses(pkt, t);
1221 		} else if (options & OPT_PREFETCH) {
1222 			__builtin_prefetch(p);
1223 		}
1224 		slot->len = tosend;
1225 		if (options & OPT_DUMP)
1226 			dump_payload(p, tosend, ring, head);
1227 		head = nm_ring_next(ring, head);
1228 	}
1229 	if (sent) {
1230 		slot->flags |= NS_REPORT;
1231 		ring->head = ring->cur = head;
1232 	}
1233 	if (sent < count) {
1234 		/* tell netmap that we need more slots */
1235 		ring->cur = ring->tail;
1236 	}
1237 
1238 	return (sent);
1239 }
1240 
1241 /*
1242  * Index of the highest bit set
1243  */
1244 uint32_t
1245 msb64(uint64_t x)
1246 {
1247 	uint64_t m = 1ULL << 63;
1248 	int i;
1249 
1250 	for (i = 63; i >= 0; i--, m >>=1)
1251 		if (m & x)
1252 			return i;
1253 	return 0;
1254 }
1255 
1256 /*
1257  * wait until ts, either busy or sleeping if more than 1ms.
1258  * Return wakeup time.
1259  */
1260 static struct timespec
1261 wait_time(struct timespec ts)
1262 {
1263 	for (;;) {
1264 		struct timespec w, cur;
1265 		clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
1266 		w = timespec_sub(ts, cur);
1267 		if (w.tv_sec < 0)
1268 			return cur;
1269 		else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
1270 			poll(NULL, 0, 1);
1271 	}
1272 }
1273 
1274 /*
1275  * Send a packet, and wait for a response.
1276  * The payload (after UDP header, ofs 42) has a 4-byte sequence
1277  * followed by a struct timeval (or bintime?)
1278  */
1279 
1280 static void *
1281 ping_body(void *data)
1282 {
1283 	struct targ *targ = (struct targ *) data;
1284 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1285 	struct netmap_if *nifp = targ->nmd->nifp;
1286 	int i, m, rx = 0;
1287 	void *frame;
1288 	int size;
1289 	struct timespec ts, now, last_print;
1290 	struct timespec nexttime = {0, 0}; /* silence compiler */
1291 	uint64_t sent = 0, n = targ->g->npackets;
1292 	uint64_t count = 0, t_cur, t_min = ~0, av = 0;
1293 	uint64_t g_min = ~0, g_av = 0;
1294 	uint64_t buckets[64];	/* bins for delays, ns */
1295 	int rate_limit = targ->g->tx_rate, tosend = 0;
1296 
1297 	frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header;
1298 	size = targ->g->pkt_size + targ->g->virt_header;
1299 
1300 
1301 	if (targ->g->nthreads > 1) {
1302 		D("can only ping with 1 thread");
1303 		return NULL;
1304 	}
1305 
1306 	bzero(&buckets, sizeof(buckets));
1307 	clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
1308 	now = last_print;
1309 	if (rate_limit) {
1310 		targ->tic = timespec_add(now, (struct timespec){2,0});
1311 		targ->tic.tv_nsec = 0;
1312 		wait_time(targ->tic);
1313 		nexttime = targ->tic;
1314 	}
1315 	while (!targ->cancel && (n == 0 || sent < n)) {
1316 		struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
1317 		struct netmap_slot *slot;
1318 		char *p;
1319 		int rv;
1320 		uint64_t limit, event = 0;
1321 
1322 		if (rate_limit && tosend <= 0) {
1323 			tosend = targ->g->burst;
1324 			nexttime = timespec_add(nexttime, targ->g->tx_period);
1325 			wait_time(nexttime);
1326 		}
1327 
1328 		limit = rate_limit ? tosend : targ->g->burst;
1329 		if (n > 0 && n - sent < limit)
1330 			limit = n - sent;
1331 		for (m = 0; (unsigned)m < limit; m++) {
1332 			slot = &ring->slot[ring->head];
1333 			slot->len = size;
1334 			p = NETMAP_BUF(ring, slot->buf_idx);
1335 
1336 			if (nm_ring_empty(ring)) {
1337 				D("-- ouch, cannot send");
1338 				break;
1339 			} else {
1340 				struct tstamp *tp;
1341 				nm_pkt_copy(frame, p, size);
1342 				clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
1343 				bcopy(&sent, p+42, sizeof(sent));
1344 				tp = (struct tstamp *)(p+46);
1345 				tp->sec = (uint32_t)ts.tv_sec;
1346 				tp->nsec = (uint32_t)ts.tv_nsec;
1347 				sent++;
1348 				ring->head = ring->cur = nm_ring_next(ring, ring->head);
1349 			}
1350 		}
1351 		if (m > 0)
1352 			event++;
1353 		targ->ctr.pkts = sent;
1354 		targ->ctr.bytes = sent*size;
1355 		targ->ctr.events = event;
1356 		if (rate_limit)
1357 			tosend -= m;
1358 #ifdef BUSYWAIT
1359 		rv = ioctl(pfd.fd, NIOCTXSYNC, NULL);
1360 		if (rv < 0) {
1361 			D("TXSYNC error on queue %d: %s", targ->me,
1362 				strerror(errno));
1363 		}
1364 	again:
1365 		ioctl(pfd.fd, NIOCRXSYNC, NULL);
1366 #else
1367 		/* should use a parameter to decide how often to send */
1368 		if ( (rv = poll(&pfd, 1, 3000)) <= 0) {
1369 			D("poll error on queue %d: %s", targ->me,
1370 				(rv ? strerror(errno) : "timeout"));
1371 			continue;
1372 		}
1373 #endif /* BUSYWAIT */
1374 		/* see what we got back */
1375 		rx = 0;
1376 		for (i = targ->nmd->first_rx_ring;
1377 			i <= targ->nmd->last_rx_ring; i++) {
1378 			ring = NETMAP_RXRING(nifp, i);
1379 			while (!nm_ring_empty(ring)) {
1380 				uint32_t seq;
1381 				struct tstamp *tp;
1382 				int pos;
1383 
1384 				slot = &ring->slot[ring->head];
1385 				p = NETMAP_BUF(ring, slot->buf_idx);
1386 
1387 				clock_gettime(CLOCK_REALTIME_PRECISE, &now);
1388 				bcopy(p+42, &seq, sizeof(seq));
1389 				tp = (struct tstamp *)(p+46);
1390 				ts.tv_sec = (time_t)tp->sec;
1391 				ts.tv_nsec = (long)tp->nsec;
1392 				ts.tv_sec = now.tv_sec - ts.tv_sec;
1393 				ts.tv_nsec = now.tv_nsec - ts.tv_nsec;
1394 				if (ts.tv_nsec < 0) {
1395 					ts.tv_nsec += 1000000000;
1396 					ts.tv_sec--;
1397 				}
1398 				if (0) D("seq %d/%llu delta %d.%09d", seq,
1399 					(unsigned long long)sent,
1400 					(int)ts.tv_sec, (int)ts.tv_nsec);
1401 				t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec;
1402 				if (t_cur < t_min)
1403 					t_min = t_cur;
1404 				count ++;
1405 				av += t_cur;
1406 				pos = msb64(t_cur);
1407 				buckets[pos]++;
1408 				/* now store it in a bucket */
1409 				ring->head = ring->cur = nm_ring_next(ring, ring->head);
1410 				rx++;
1411 			}
1412 		}
1413 		//D("tx %d rx %d", sent, rx);
1414 		//usleep(100000);
1415 		ts.tv_sec = now.tv_sec - last_print.tv_sec;
1416 		ts.tv_nsec = now.tv_nsec - last_print.tv_nsec;
1417 		if (ts.tv_nsec < 0) {
1418 			ts.tv_nsec += 1000000000;
1419 			ts.tv_sec--;
1420 		}
1421 		if (ts.tv_sec >= 1) {
1422 			D("count %d RTT: min %d av %d ns",
1423 				(int)count, (int)t_min, (int)(av/count));
1424 			int k, j, kmin, off;
1425 			char buf[512];
1426 
1427 			for (kmin = 0; kmin < 64; kmin ++)
1428 				if (buckets[kmin])
1429 					break;
1430 			for (k = 63; k >= kmin; k--)
1431 				if (buckets[k])
1432 					break;
1433 			buf[0] = '\0';
1434 			off = 0;
1435 			for (j = kmin; j <= k; j++) {
1436 				off += sprintf(buf + off, " %5d", (int)buckets[j]);
1437 			}
1438 			D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf);
1439 			bzero(&buckets, sizeof(buckets));
1440 			count = 0;
1441 			g_av += av;
1442 			av = 0;
1443 			if (t_min < g_min)
1444 				g_min = t_min;
1445 			t_min = ~0;
1446 			last_print = now;
1447 		}
1448 #ifdef BUSYWAIT
1449 		if (rx < m && ts.tv_sec <= 3 && !targ->cancel)
1450 			goto again;
1451 #endif /* BUSYWAIT */
1452 	}
1453 
1454 	if (sent > 0) {
1455 		D("RTT over %llu packets: min %d av %d ns",
1456 			(long long unsigned)sent, (int)g_min,
1457 			(int)((double)g_av/sent));
1458 	}
1459 	targ->completed = 1;
1460 
1461 	/* reset the ``used`` flag. */
1462 	targ->used = 0;
1463 
1464 	return NULL;
1465 }
1466 
1467 
1468 /*
1469  * reply to ping requests
1470  */
1471 static void *
1472 pong_body(void *data)
1473 {
1474 	struct targ *targ = (struct targ *) data;
1475 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1476 	struct netmap_if *nifp = targ->nmd->nifp;
1477 	struct netmap_ring *txring, *rxring;
1478 	int i, rx = 0;
1479 	uint64_t sent = 0, n = targ->g->npackets;
1480 
1481 	if (targ->g->nthreads > 1) {
1482 		D("can only reply ping with 1 thread");
1483 		return NULL;
1484 	}
1485 	if (n > 0)
1486 		D("understood ponger %llu but don't know how to do it",
1487 			(unsigned long long)n);
1488 	while (!targ->cancel && (n == 0 || sent < n)) {
1489 		uint32_t txhead, txavail;
1490 //#define BUSYWAIT
1491 #ifdef BUSYWAIT
1492 		ioctl(pfd.fd, NIOCRXSYNC, NULL);
1493 #else
1494 		int rv;
1495 		if ( (rv = poll(&pfd, 1, 1000)) <= 0) {
1496 			D("poll error on queue %d: %s", targ->me,
1497 				rv ? strerror(errno) : "timeout");
1498 			continue;
1499 		}
1500 #endif
1501 		txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
1502 		txhead = txring->head;
1503 		txavail = nm_ring_space(txring);
1504 		/* see what we got back */
1505 		for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
1506 			rxring = NETMAP_RXRING(nifp, i);
1507 			while (!nm_ring_empty(rxring)) {
1508 				uint16_t *spkt, *dpkt;
1509 				uint32_t head = rxring->head;
1510 				struct netmap_slot *slot = &rxring->slot[head];
1511 				char *src, *dst;
1512 				src = NETMAP_BUF(rxring, slot->buf_idx);
1513 				//D("got pkt %p of size %d", src, slot->len);
1514 				rxring->head = rxring->cur = nm_ring_next(rxring, head);
1515 				rx++;
1516 				if (txavail == 0)
1517 					continue;
1518 				dst = NETMAP_BUF(txring,
1519 				    txring->slot[txhead].buf_idx);
1520 				/* copy... */
1521 				dpkt = (uint16_t *)dst;
1522 				spkt = (uint16_t *)src;
1523 				nm_pkt_copy(src, dst, slot->len);
1524 				/* swap source and destination MAC */
1525 				dpkt[0] = spkt[3];
1526 				dpkt[1] = spkt[4];
1527 				dpkt[2] = spkt[5];
1528 				dpkt[3] = spkt[0];
1529 				dpkt[4] = spkt[1];
1530 				dpkt[5] = spkt[2];
1531 				txring->slot[txhead].len = slot->len;
1532 				txhead = nm_ring_next(txring, txhead);
1533 				txavail--;
1534 				sent++;
1535 			}
1536 		}
1537 		txring->head = txring->cur = txhead;
1538 		targ->ctr.pkts = sent;
1539 #ifdef BUSYWAIT
1540 		ioctl(pfd.fd, NIOCTXSYNC, NULL);
1541 #endif
1542 		//D("tx %d rx %d", sent, rx);
1543 	}
1544 
1545 	targ->completed = 1;
1546 
1547 	/* reset the ``used`` flag. */
1548 	targ->used = 0;
1549 
1550 	return NULL;
1551 }
1552 
1553 
1554 static void *
1555 sender_body(void *data)
1556 {
1557 	struct targ *targ = (struct targ *) data;
1558 	struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
1559 	struct netmap_if *nifp;
1560 	struct netmap_ring *txring = NULL;
1561 	int i;
1562 	uint64_t n = targ->g->npackets / targ->g->nthreads;
1563 	uint64_t sent = 0;
1564 	uint64_t event = 0;
1565 	int options = targ->g->options | OPT_COPY;
1566 	struct timespec nexttime = { 0, 0}; // XXX silence compiler
1567 	int rate_limit = targ->g->tx_rate;
1568 	struct pkt *pkt = &targ->pkt;
1569 	void *frame;
1570 	int size;
1571 
1572 	if (targ->frame == NULL) {
1573 		frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
1574 		size = targ->g->pkt_size + targ->g->virt_header;
1575 	} else {
1576 		frame = targ->frame;
1577 		size = targ->g->pkt_size;
1578 	}
1579 
1580 	D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
1581 	if (setaffinity(targ->thread, targ->affinity))
1582 		goto quit;
1583 
1584 	/* main loop.*/
1585 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1586 	if (rate_limit) {
1587 		targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
1588 		targ->tic.tv_nsec = 0;
1589 		wait_time(targ->tic);
1590 		nexttime = targ->tic;
1591 	}
1592 	if (targ->g->dev_type == DEV_TAP) {
1593 	    D("writing to file desc %d", targ->g->main_fd);
1594 
1595 	    for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
1596 		if (write(targ->g->main_fd, frame, size) != -1)
1597 			sent++;
1598 		update_addresses(pkt, targ);
1599 		if (i > 10000) {
1600 			targ->ctr.pkts = sent;
1601 			targ->ctr.bytes = sent*size;
1602 			targ->ctr.events = sent;
1603 			i = 0;
1604 		}
1605 	    }
1606 #ifndef NO_PCAP
1607     } else if (targ->g->dev_type == DEV_PCAP) {
1608 	    pcap_t *p = targ->g->p;
1609 
1610 	    for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
1611 		if (pcap_inject(p, frame, size) != -1)
1612 			sent++;
1613 		update_addresses(pkt, targ);
1614 		if (i > 10000) {
1615 			targ->ctr.pkts = sent;
1616 			targ->ctr.bytes = sent*size;
1617 			targ->ctr.events = sent;
1618 			i = 0;
1619 		}
1620 	    }
1621 #endif /* NO_PCAP */
1622     } else {
1623 	int tosend = 0;
1624 	u_int bufsz, frag_size = targ->g->frag_size;
1625 
1626 	nifp = targ->nmd->nifp;
1627 	txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
1628 	bufsz = txring->nr_buf_size;
1629 	if (bufsz < frag_size)
1630 		frag_size = bufsz;
1631 	targ->frag_size = targ->g->pkt_size / targ->frags;
1632 	if (targ->frag_size > frag_size) {
1633 		targ->frags = targ->g->pkt_size / frag_size;
1634 		targ->frag_size = frag_size;
1635 		if (targ->g->pkt_size % frag_size != 0)
1636 			targ->frags++;
1637 	}
1638 	D("frags %u frag_size %u", targ->frags, targ->frag_size);
1639 	while (!targ->cancel && (n == 0 || sent < n)) {
1640 		int rv;
1641 
1642 		if (rate_limit && tosend <= 0) {
1643 			tosend = targ->g->burst;
1644 			nexttime = timespec_add(nexttime, targ->g->tx_period);
1645 			wait_time(nexttime);
1646 		}
1647 
1648 		/*
1649 		 * wait for available room in the send queue(s)
1650 		 */
1651 #ifdef BUSYWAIT
1652 		(void)rv;
1653 		if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
1654 			D("ioctl error on queue %d: %s", targ->me,
1655 					strerror(errno));
1656 			goto quit;
1657 		}
1658 #else /* !BUSYWAIT */
1659 		if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
1660 			if (targ->cancel)
1661 				break;
1662 			D("poll error on queue %d: %s", targ->me,
1663 				rv ? strerror(errno) : "timeout");
1664 			// goto quit;
1665 		}
1666 		if (pfd.revents & POLLERR) {
1667 			D("poll error on %d ring %d-%d", pfd.fd,
1668 				targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
1669 			goto quit;
1670 		}
1671 #endif /* !BUSYWAIT */
1672 		/*
1673 		 * scan our queues and send on those with room
1674 		 */
1675 		if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) {
1676 			D("drop copy");
1677 			options &= ~OPT_COPY;
1678 		}
1679 		for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
1680 			int m;
1681 			uint64_t limit = rate_limit ?  tosend : targ->g->burst;
1682 
1683 			if (n > 0 && n == sent)
1684 				break;
1685 
1686 			if (n > 0 && n - sent < limit)
1687 				limit = n - sent;
1688 			txring = NETMAP_TXRING(nifp, i);
1689 			if (nm_ring_empty(txring))
1690 				continue;
1691 
1692 			if (targ->g->pkt_min_size > 0) {
1693 				size = nrand48(targ->seed) %
1694 					(targ->g->pkt_size - targ->g->pkt_min_size) +
1695 					targ->g->pkt_min_size;
1696 			}
1697 			m = send_packets(txring, pkt, frame, size, targ,
1698 					 limit, options);
1699 			ND("limit %lu tail %d m %d",
1700 				limit, txring->tail, m);
1701 			sent += m;
1702 			if (m > 0) //XXX-ste: can m be 0?
1703 				event++;
1704 			targ->ctr.pkts = sent;
1705 			targ->ctr.bytes += m*size;
1706 			targ->ctr.events = event;
1707 			if (rate_limit) {
1708 				tosend -= m;
1709 				if (tosend <= 0)
1710 					break;
1711 			}
1712 		}
1713 	}
1714 	/* flush any remaining packets */
1715 	if (txring != NULL) {
1716 		D("flush tail %d head %d on thread %p",
1717 			txring->tail, txring->head,
1718 			(void *)pthread_self());
1719 		ioctl(pfd.fd, NIOCTXSYNC, NULL);
1720 	}
1721 
1722 	/* final part: wait all the TX queues to be empty. */
1723 	for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
1724 		txring = NETMAP_TXRING(nifp, i);
1725 		while (!targ->cancel && nm_tx_pending(txring)) {
1726 			RD(5, "pending tx tail %d head %d on ring %d",
1727 				txring->tail, txring->head, i);
1728 			ioctl(pfd.fd, NIOCTXSYNC, NULL);
1729 			usleep(1); /* wait 1 tick */
1730 		}
1731 	}
1732     } /* end DEV_NETMAP */
1733 
1734 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1735 	targ->completed = 1;
1736 	targ->ctr.pkts = sent;
1737 	targ->ctr.bytes = sent*size;
1738 	targ->ctr.events = event;
1739 quit:
1740 	/* reset the ``used`` flag. */
1741 	targ->used = 0;
1742 
1743 	return (NULL);
1744 }
1745 
1746 
1747 #ifndef NO_PCAP
1748 static void
1749 receive_pcap(u_char *user, const struct pcap_pkthdr * h,
1750 	const u_char * bytes)
1751 {
1752 	struct my_ctrs *ctr = (struct my_ctrs *)user;
1753 	(void)bytes;	/* UNUSED */
1754 	ctr->bytes += h->len;
1755 	ctr->pkts++;
1756 }
1757 #endif /* !NO_PCAP */
1758 
1759 
1760 static int
1761 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes)
1762 {
1763 	u_int head, rx, n;
1764 	uint64_t b = 0;
1765 	u_int complete = 0;
1766 
1767 	if (bytes == NULL)
1768 		bytes = &b;
1769 
1770 	head = ring->head;
1771 	n = nm_ring_space(ring);
1772 	if (n < limit)
1773 		limit = n;
1774 	for (rx = 0; rx < limit; rx++) {
1775 		struct netmap_slot *slot = &ring->slot[head];
1776 		char *p = NETMAP_BUF(ring, slot->buf_idx);
1777 
1778 		*bytes += slot->len;
1779 		if (dump)
1780 			dump_payload(p, slot->len, ring, head);
1781 		if (!(slot->flags & NS_MOREFRAG))
1782 			complete++;
1783 
1784 		head = nm_ring_next(ring, head);
1785 	}
1786 	ring->head = ring->cur = head;
1787 
1788 	return (complete);
1789 }
1790 
1791 static void *
1792 receiver_body(void *data)
1793 {
1794 	struct targ *targ = (struct targ *) data;
1795 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1796 	struct netmap_if *nifp;
1797 	struct netmap_ring *rxring;
1798 	int i;
1799 	struct my_ctrs cur;
1800 
1801 	memset(&cur, 0, sizeof(cur));
1802 
1803 	if (setaffinity(targ->thread, targ->affinity))
1804 		goto quit;
1805 
1806 	D("reading from %s fd %d main_fd %d",
1807 		targ->g->ifname, targ->fd, targ->g->main_fd);
1808 	/* unbounded wait for the first packet. */
1809 	for (;!targ->cancel;) {
1810 		i = poll(&pfd, 1, 1000);
1811 		if (i > 0 && !(pfd.revents & POLLERR))
1812 			break;
1813 		if (i < 0) {
1814 			D("poll() error: %s", strerror(errno));
1815 			goto quit;
1816 		}
1817 		if (pfd.revents & POLLERR) {
1818 			D("fd error");
1819 			goto quit;
1820 		}
1821 		RD(1, "waiting for initial packets, poll returns %d %d",
1822 			i, pfd.revents);
1823 	}
1824 	/* main loop, exit after 1s silence */
1825 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1826     if (targ->g->dev_type == DEV_TAP) {
1827 	while (!targ->cancel) {
1828 		char buf[MAX_BODYSIZE];
1829 		/* XXX should we poll ? */
1830 		i = read(targ->g->main_fd, buf, sizeof(buf));
1831 		if (i > 0) {
1832 			targ->ctr.pkts++;
1833 			targ->ctr.bytes += i;
1834 			targ->ctr.events++;
1835 		}
1836 	}
1837 #ifndef NO_PCAP
1838     } else if (targ->g->dev_type == DEV_PCAP) {
1839 	while (!targ->cancel) {
1840 		/* XXX should we poll ? */
1841 		pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap,
1842 			(u_char *)&targ->ctr);
1843 		targ->ctr.events++;
1844 	}
1845 #endif /* !NO_PCAP */
1846     } else {
1847 	int dump = targ->g->options & OPT_DUMP;
1848 
1849 	nifp = targ->nmd->nifp;
1850 	while (!targ->cancel) {
1851 		/* Once we started to receive packets, wait at most 1 seconds
1852 		   before quitting. */
1853 #ifdef BUSYWAIT
1854 		if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
1855 			D("ioctl error on queue %d: %s", targ->me,
1856 					strerror(errno));
1857 			goto quit;
1858 		}
1859 #else /* !BUSYWAIT */
1860 		if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
1861 			clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1862 			targ->toc.tv_sec -= 1; /* Subtract timeout time. */
1863 			goto out;
1864 		}
1865 
1866 		if (pfd.revents & POLLERR) {
1867 			D("poll err");
1868 			goto quit;
1869 		}
1870 #endif /* !BUSYWAIT */
1871 		uint64_t cur_space = 0;
1872 		for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
1873 			int m;
1874 
1875 			rxring = NETMAP_RXRING(nifp, i);
1876 			/* compute free space in the ring */
1877 			m = rxring->head + rxring->num_slots - rxring->tail;
1878 			if (m >= (int) rxring->num_slots)
1879 				m -= rxring->num_slots;
1880 			cur_space += m;
1881 			if (nm_ring_empty(rxring))
1882 				continue;
1883 
1884 			m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes);
1885 			cur.pkts += m;
1886 			if (m > 0)
1887 				cur.events++;
1888 		}
1889 		cur.min_space = targ->ctr.min_space;
1890 		if (cur_space < cur.min_space)
1891 			cur.min_space = cur_space;
1892 		targ->ctr = cur;
1893 	}
1894     }
1895 
1896 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1897 
1898 #if !defined(BUSYWAIT)
1899 out:
1900 #endif
1901 	targ->completed = 1;
1902 	targ->ctr = cur;
1903 
1904 quit:
1905 	/* reset the ``used`` flag. */
1906 	targ->used = 0;
1907 
1908 	return (NULL);
1909 }
1910 
1911 static void *
1912 txseq_body(void *data)
1913 {
1914 	struct targ *targ = (struct targ *) data;
1915 	struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
1916 	struct netmap_ring *ring;
1917 	int64_t sent = 0;
1918 	uint64_t event = 0;
1919 	int options = targ->g->options | OPT_COPY;
1920 	struct timespec nexttime = {0, 0};
1921 	int rate_limit = targ->g->tx_rate;
1922 	struct pkt *pkt = &targ->pkt;
1923 	int frags = targ->g->frags;
1924 	uint32_t sequence = 0;
1925 	int budget = 0;
1926 	void *frame;
1927 	int size;
1928 
1929 	if (targ->g->nthreads > 1) {
1930 		D("can only txseq ping with 1 thread");
1931 		return NULL;
1932 	}
1933 
1934 	if (targ->g->npackets > 0) {
1935 		D("Ignoring -n argument");
1936 	}
1937 
1938 	frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
1939 	size = targ->g->pkt_size + targ->g->virt_header;
1940 
1941 	D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
1942 	if (setaffinity(targ->thread, targ->affinity))
1943 		goto quit;
1944 
1945 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1946 	if (rate_limit) {
1947 		targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
1948 		targ->tic.tv_nsec = 0;
1949 		wait_time(targ->tic);
1950 		nexttime = targ->tic;
1951 	}
1952 
1953 	/* Only use the first queue. */
1954 	ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring);
1955 
1956 	while (!targ->cancel) {
1957 		int64_t limit;
1958 		unsigned int space;
1959 		unsigned int head;
1960 		int fcnt;
1961 		uint16_t sum = 0;
1962 		int rv;
1963 
1964 		if (!rate_limit) {
1965 			budget = targ->g->burst;
1966 
1967 		} else if (budget <= 0) {
1968 			budget = targ->g->burst;
1969 			nexttime = timespec_add(nexttime, targ->g->tx_period);
1970 			wait_time(nexttime);
1971 		}
1972 
1973 		/* wait for available room in the send queue */
1974 #ifdef BUSYWAIT
1975 		(void)rv;
1976 		if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
1977 			D("ioctl error on queue %d: %s", targ->me,
1978 					strerror(errno));
1979 			goto quit;
1980 		}
1981 #else /* !BUSYWAIT */
1982 		if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
1983 			if (targ->cancel)
1984 				break;
1985 			D("poll error on queue %d: %s", targ->me,
1986 				rv ? strerror(errno) : "timeout");
1987 			// goto quit;
1988 		}
1989 		if (pfd.revents & POLLERR) {
1990 			D("poll error on %d ring %d-%d", pfd.fd,
1991 				targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
1992 			goto quit;
1993 		}
1994 #endif /* !BUSYWAIT */
1995 
1996 		/* If no room poll() again. */
1997 		space = nm_ring_space(ring);
1998 		if (!space) {
1999 			continue;
2000 		}
2001 
2002 		limit = budget;
2003 
2004 		if (space < limit) {
2005 			limit = space;
2006 		}
2007 
2008 		/* Cut off ``limit`` to make sure is multiple of ``frags``. */
2009 		if (frags > 1) {
2010 			limit = (limit / frags) * frags;
2011 		}
2012 
2013 		limit = sent + limit; /* Convert to absolute. */
2014 
2015 		for (fcnt = frags, head = ring->head;
2016 				sent < limit; sent++, sequence++) {
2017 			struct netmap_slot *slot = &ring->slot[head];
2018 			char *p = NETMAP_BUF(ring, slot->buf_idx);
2019 			uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t;
2020 
2021 			memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum));
2022 
2023 			slot->flags = 0;
2024 			t = *w;
2025 			PKT(pkt, body, targ->g->af)[0] = sequence >> 24;
2026 			PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff;
2027 			sum = ~cksum_add(~sum, cksum_add(~t, *w));
2028 			t = *++w;
2029 			PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff;
2030 			PKT(pkt, body, targ->g->af)[3] = sequence & 0xff;
2031 			sum = ~cksum_add(~sum, cksum_add(~t, *w));
2032 			memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum));
2033 			nm_pkt_copy(frame, p, size);
2034 			if (fcnt == frags) {
2035 				update_addresses(pkt, targ);
2036 			}
2037 
2038 			if (options & OPT_DUMP) {
2039 				dump_payload(p, size, ring, head);
2040 			}
2041 
2042 			slot->len = size;
2043 
2044 			if (--fcnt > 0) {
2045 				slot->flags |= NS_MOREFRAG;
2046 			} else {
2047 				fcnt = frags;
2048 			}
2049 
2050 			if (sent == limit - 1) {
2051 				/* Make sure we don't push an incomplete
2052 				 * packet. */
2053 				assert(!(slot->flags & NS_MOREFRAG));
2054 				slot->flags |= NS_REPORT;
2055 			}
2056 
2057 			head = nm_ring_next(ring, head);
2058 			if (rate_limit) {
2059 				budget--;
2060 			}
2061 		}
2062 
2063 		ring->cur = ring->head = head;
2064 
2065 		event ++;
2066 		targ->ctr.pkts = sent;
2067 		targ->ctr.bytes = sent * size;
2068 		targ->ctr.events = event;
2069 	}
2070 
2071 	/* flush any remaining packets */
2072 	D("flush tail %d head %d on thread %p",
2073 		ring->tail, ring->head,
2074 		(void *)pthread_self());
2075 	ioctl(pfd.fd, NIOCTXSYNC, NULL);
2076 
2077 	/* final part: wait the TX queues to become empty. */
2078 	while (!targ->cancel && nm_tx_pending(ring)) {
2079 		RD(5, "pending tx tail %d head %d on ring %d",
2080 				ring->tail, ring->head, targ->nmd->first_tx_ring);
2081 		ioctl(pfd.fd, NIOCTXSYNC, NULL);
2082 		usleep(1); /* wait 1 tick */
2083 	}
2084 
2085 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
2086 	targ->completed = 1;
2087 	targ->ctr.pkts = sent;
2088 	targ->ctr.bytes = sent * size;
2089 	targ->ctr.events = event;
2090 quit:
2091 	/* reset the ``used`` flag. */
2092 	targ->used = 0;
2093 
2094 	return (NULL);
2095 }
2096 
2097 
2098 static char *
2099 multi_slot_to_string(struct netmap_ring *ring, unsigned int head,
2100 		     unsigned int nfrags, char *strbuf, size_t strbuflen)
2101 {
2102 	unsigned int f;
2103 	char *ret = strbuf;
2104 
2105 	for (f = 0; f < nfrags; f++) {
2106 		struct netmap_slot *slot = &ring->slot[head];
2107 		int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len,
2108 				 slot->flags);
2109 		if (m >= (int)strbuflen) {
2110 			break;
2111 		}
2112 		strbuf += m;
2113 		strbuflen -= m;
2114 
2115 		head = nm_ring_next(ring, head);
2116 	}
2117 
2118 	return ret;
2119 }
2120 
2121 static void *
2122 rxseq_body(void *data)
2123 {
2124 	struct targ *targ = (struct targ *) data;
2125 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
2126 	int dump = targ->g->options & OPT_DUMP;
2127 	struct netmap_ring *ring;
2128 	unsigned int frags_exp = 1;
2129 	struct my_ctrs cur;
2130 	unsigned int frags = 0;
2131 	int first_packet = 1;
2132 	int first_slot = 1;
2133 	int i, j, af, nrings;
2134 	uint32_t seq, *seq_exp = NULL;
2135 
2136 	memset(&cur, 0, sizeof(cur));
2137 
2138 	if (setaffinity(targ->thread, targ->affinity))
2139 		goto quit;
2140 
2141 	nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1;
2142 	seq_exp = calloc(nrings, sizeof(uint32_t));
2143 	if (seq_exp == NULL) {
2144 		D("failed to allocate seq array");
2145 		goto quit;
2146 	}
2147 
2148 	D("reading from %s fd %d main_fd %d",
2149 		targ->g->ifname, targ->fd, targ->g->main_fd);
2150 	/* unbounded wait for the first packet. */
2151 	for (;!targ->cancel;) {
2152 		i = poll(&pfd, 1, 1000);
2153 		if (i > 0 && !(pfd.revents & POLLERR))
2154 			break;
2155 		RD(1, "waiting for initial packets, poll returns %d %d",
2156 			i, pfd.revents);
2157 	}
2158 
2159 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
2160 
2161 
2162 	while (!targ->cancel) {
2163 		unsigned int head;
2164 		int limit;
2165 
2166 #ifdef BUSYWAIT
2167 		if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
2168 			D("ioctl error on queue %d: %s", targ->me,
2169 					strerror(errno));
2170 			goto quit;
2171 		}
2172 #else /* !BUSYWAIT */
2173 		if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
2174 			clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
2175 			targ->toc.tv_sec -= 1; /* Subtract timeout time. */
2176 			goto out;
2177 		}
2178 
2179 		if (pfd.revents & POLLERR) {
2180 			D("poll err");
2181 			goto quit;
2182 		}
2183 #endif /* !BUSYWAIT */
2184 
2185 		for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) {
2186 			ring = NETMAP_RXRING(targ->nmd->nifp, j);
2187 			if (nm_ring_empty(ring))
2188 				continue;
2189 
2190 			limit = nm_ring_space(ring);
2191 			if (limit > targ->g->burst)
2192 				limit = targ->g->burst;
2193 
2194 #if 0
2195 			/* Enable this if
2196 			 *     1) we remove the early-return optimization from
2197 			 *        the netmap poll implementation, or
2198 			 *     2) pipes get NS_MOREFRAG support.
2199 			 * With the current netmap implementation, an experiment like
2200 			 *    pkt-gen -i vale:1{1 -f txseq -F 9
2201 			 *    pkt-gen -i vale:1}1 -f rxseq
2202 			 * would get stuck as soon as we find nm_ring_space(ring) < 9,
2203 			 * since here limit is rounded to 0 and
2204 			 * pipe rxsync is not called anymore by the poll() of this loop.
2205 			 */
2206 			if (frags_exp > 1) {
2207 				int o = limit;
2208 				/* Cut off to the closest smaller multiple. */
2209 				limit = (limit / frags_exp) * frags_exp;
2210 				RD(2, "LIMIT %d --> %d", o, limit);
2211 			}
2212 #endif
2213 
2214 			for (head = ring->head, i = 0; i < limit; i++) {
2215 				struct netmap_slot *slot = &ring->slot[head];
2216 				char *p = NETMAP_BUF(ring, slot->buf_idx);
2217 				int len = slot->len;
2218 				struct pkt *pkt;
2219 
2220 				if (dump) {
2221 					dump_payload(p, slot->len, ring, head);
2222 				}
2223 
2224 				frags++;
2225 				if (!(slot->flags & NS_MOREFRAG)) {
2226 					if (first_packet) {
2227 						first_packet = 0;
2228 					} else if (frags != frags_exp) {
2229 						char prbuf[512];
2230 						RD(1, "Received packets with %u frags, "
2231 								"expected %u, '%s'", frags, frags_exp,
2232 								multi_slot_to_string(ring, head-frags+1,
2233 							       	frags,
2234 									prbuf, sizeof(prbuf)));
2235 					}
2236 					first_packet = 0;
2237 					frags_exp = frags;
2238 					frags = 0;
2239 				}
2240 
2241 				p -= sizeof(pkt->vh) - targ->g->virt_header;
2242 				len += sizeof(pkt->vh) - targ->g->virt_header;
2243 				pkt = (struct pkt *)p;
2244 				if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP)
2245 					af = AF_INET;
2246 				else
2247 					af = AF_INET6;
2248 
2249 				if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) +
2250 						sizeof(seq)) {
2251 					RD(1, "%s: packet too small (len=%u)", __func__,
2252 							slot->len);
2253 				} else {
2254 					seq = (PKT(pkt, body, af)[0] << 24) |
2255 						(PKT(pkt, body, af)[1] << 16) |
2256 						(PKT(pkt, body, af)[2] << 8) |
2257 						PKT(pkt, body, af)[3];
2258 					if (first_slot) {
2259 						/* Grab the first one, whatever it
2260 						   is. */
2261 						seq_exp[j] = seq;
2262 						first_slot = 0;
2263 					} else if (seq != seq_exp[j]) {
2264 						uint32_t delta = seq - seq_exp[j];
2265 
2266 						if (delta < (0xFFFFFFFF >> 1)) {
2267 							RD(2, "Sequence GAP: exp %u found %u",
2268 									seq_exp[j], seq);
2269 						} else {
2270 							RD(2, "Sequence OUT OF ORDER: "
2271 									"exp %u found %u", seq_exp[j], seq);
2272 						}
2273 						seq_exp[j] = seq;
2274 					}
2275 					seq_exp[j]++;
2276 				}
2277 
2278 				cur.bytes += slot->len;
2279 				head = nm_ring_next(ring, head);
2280 				cur.pkts++;
2281 			}
2282 
2283 			ring->cur = ring->head = head;
2284 
2285 			cur.events++;
2286 			targ->ctr = cur;
2287 		}
2288 	}
2289 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
2290 
2291 #ifndef BUSYWAIT
2292 out:
2293 #endif /* !BUSYWAIT */
2294 	targ->completed = 1;
2295 	targ->ctr = cur;
2296 
2297 quit:
2298 	if (seq_exp != NULL)
2299 		free(seq_exp);
2300 	/* reset the ``used`` flag. */
2301 	targ->used = 0;
2302 
2303 	return (NULL);
2304 }
2305 
2306 
2307 static void
2308 tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg)
2309 {
2310 	double bw, raw_bw, pps, abs;
2311 	char b1[40], b2[80], b3[80];
2312 	int size;
2313 
2314 	if (cur->pkts == 0) {
2315 		printf("%s nothing.\n", msg);
2316 		return;
2317 	}
2318 
2319 	size = (int)(cur->bytes / cur->pkts);
2320 
2321 	printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n",
2322 		msg,
2323 		(unsigned long long)cur->pkts,
2324 		(unsigned long long)cur->bytes,
2325 		(unsigned long long)cur->events, size, delta);
2326 	if (delta == 0)
2327 		delta = 1e-6;
2328 	if (size < 60)		/* correct for min packet size */
2329 		size = 60;
2330 	pps = cur->pkts / delta;
2331 	bw = (8.0 * cur->bytes) / delta;
2332 	raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta;
2333 	abs = cur->pkts / (double)(cur->events);
2334 
2335 	printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n",
2336 		norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs);
2337 }
2338 
2339 static void
2340 usage(int errcode)
2341 {
2342 /* This usage is generated from the pkt-gen man page:
2343  *   $ man pkt-gen > x
2344  * and pasted here adding the string terminators and endlines with simple
2345  * regular expressions. */
2346 	const char *cmd = "pkt-gen";
2347 	fprintf(stderr,
2348 		"Usage:\n"
2349 		"%s arguments\n"
2350 "     -h      Show program usage and exit.\n"
2351 "\n"
2352 "     -i interface\n"
2353 "             Name of the network interface that pkt-gen operates on.  It can be a system network interface\n"
2354 "             (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n"
2355 "             monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n"
2356 "             mented in netmap(4) (NIOCREGIF section).\n"
2357 "\n"
2358 "     -f function\n"
2359 "             The function to be executed by pkt-gen.  Specify tx for transmission, rx for reception, ping\n"
2360 "             for client-side ping-pong operation, and pong for server-side ping-pong operation.\n"
2361 "\n"
2362 "     -n count\n"
2363 "             Number of iterations of the pkt-gen function, with 0 meaning infinite).  In case of tx or rx,\n"
2364 "             count is the number of packets to receive or transmit.  In case of ping or pong, count is the\n"
2365 "             number of ping-pong transactions.\n"
2366 "\n"
2367 "     -l pkt_size\n"
2368 "             Packet size in bytes excluding CRC.  If passed a second time, use random sizes larger or\n"
2369 "             equal than the second one and lower than the first one.\n"
2370 "\n"
2371 "     -b burst_size\n"
2372 "             Transmit or receive up to burst_size packets at a time.\n"
2373 "\n"
2374 "     -4      Use IPv4 addresses.\n"
2375 "\n"
2376 "     -6      Use IPv6 addresses.\n"
2377 "\n"
2378 "     -d dst_ip[:port[-dst_ip:port]]\n"
2379 "             Destination IPv4/IPv6 address and port, single or range.\n"
2380 "\n"
2381 "     -s src_ip[:port[-src_ip:port]]\n"
2382 "             Source IPv4/IPv6 address and port, single or range.\n"
2383 "\n"
2384 "     -D dst_mac\n"
2385 "             Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n"
2386 "\n"
2387 "     -S src_mac\n"
2388 "             Source MAC address in colon notation.\n"
2389 "\n"
2390 "     -a cpu_id\n"
2391 "             Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3).  If more\n"
2392 "             threads are used, they are pinned to the subsequent CPUs, one per thread.\n"
2393 "\n"
2394 "     -c cpus\n"
2395 "             Maximum number of CPUs to use (0 means to use all the available ones).\n"
2396 "\n"
2397 "     -p threads\n"
2398 "             Number of threads to use.  By default, only a single thread is used to handle all the netmap\n"
2399 "             rings.  If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n"
2400 "             single RX ring (in rx mode), or a TX/RX ring couple.  The number of threads must be less or\n"
2401 "             equal than the number of TX (or RX) ring available in the device specified by interface.\n"
2402 "\n"
2403 "     -T report_ms\n"
2404 "             Number of milliseconds between reports.\n"
2405 "\n"
2406 "     -w wait_for_link_time\n"
2407 "             Number of seconds to wait before starting the pkt-gen function, useuful to make sure that the\n"
2408 "             network link is up.  A network device driver may take some time to enter netmap mode, or to\n"
2409 "             create a new transmit/receive ring pair when netmap(4) requests one.\n"
2410 "\n"
2411 "     -R rate\n"
2412 "             Packet transmission rate.  Not setting the packet transmission rate tells pkt-gen to transmit\n"
2413 "             packets as quickly as possible.  On servers from 2010 on-wards netmap(4) is able to com-\n"
2414 "             pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n"
2415 "             your intention is to saturate the link.\n"
2416 "\n"
2417 "     -X      Dump payload of each packet transmitted or received.\n"
2418 "\n"
2419 "     -H len  Add empty virtio-net-header with size 'len'.  Valid sizes are 0, 10 and 12.  This option is\n"
2420 "             only used with Virtual Machine technologies that use virtio as a network interface.\n"
2421 "\n"
2422 "     -P file\n"
2423 "             Load the packet to be transmitted from a pcap file rather than constructing it within\n"
2424 "             pkt-gen.\n"
2425 "\n"
2426 "     -z      Use random IPv4/IPv6 src address/port.\n"
2427 "\n"
2428 "     -Z      Use random IPv4/IPv6 dst address/port.\n"
2429 "\n"
2430 "     -N      Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n"
2431 "\n"
2432 "     -F num_frags\n"
2433 "             Send multi-slot packets, each one with num_frags fragments.  A multi-slot packet is repre-\n"
2434 "             sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n"
2435 "             last slot).  This is useful to transmit or receive packets larger than the netmap buffer\n"
2436 "             size.\n"
2437 "\n"
2438 "     -M frag_size\n"
2439 "             In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n"
2440 "             length divided by num_frags.\n"
2441 "\n"
2442 "     -I      Use indirect buffers.  It is only valid for transmitting on VALE ports, and it is implemented\n"
2443 "             by setting the NS_INDIRECT flag in the netmap slots.\n"
2444 "\n"
2445 "     -W      Exit immediately if all the RX rings are empty the first time they are examined.\n"
2446 "\n"
2447 "     -v      Increase the verbosity level.\n"
2448 "\n"
2449 "     -r      In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n"
2450 "             netmap buffers is (rubbish mode).\n"
2451 "\n"
2452 "     -A      Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n"
2453 "\n"
2454 "     -B      Take Ethernet framing and CRC into account when computing the average bps.  This adds 4 bytes\n"
2455 "             of CRC and 20 bytes of framing to each packet.\n"
2456 "\n"
2457 "     -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n"
2458 "             Configuration in terms of number of rings and slots to be used when opening the netmap port.\n"
2459 "             Such configuration has effect on software ports created on the fly, such as VALE ports and\n"
2460 "             netmap pipes.  The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n"
2461 "             rx_slots, tx_rings, rx_rings.  Missing numbers or zeroes stand for default values.  As an\n"
2462 "             additional convenience, if exactly one number is specified, then this is assigned to both\n"
2463 "             tx_slots and rx_slots.  If there is no fourth number, then the third one is assigned to both\n"
2464 "             tx_rings and rx_rings.\n"
2465 "\n"
2466 "     -o options		data generation options (parsed using atoi)\n"
2467 "				OPT_PREFETCH	1\n"
2468 "				OPT_ACCESS	2\n"
2469 "				OPT_COPY	4\n"
2470 "				OPT_MEMCPY	8\n"
2471 "				OPT_TS		16 (add a timestamp)\n"
2472 "				OPT_INDIRECT	32 (use indirect buffers)\n"
2473 "				OPT_DUMP	64 (dump rx/tx traffic)\n"
2474 "				OPT_RUBBISH	256\n"
2475 "					(send wathever the buffers contain)\n"
2476 "				OPT_RANDOM_SRC  512\n"
2477 "				OPT_RANDOM_DST  1024\n"
2478 "				OPT_PPS_STATS   2048\n"
2479 		     "",
2480 		cmd);
2481 	exit(errcode);
2482 }
2483 
2484 static void
2485 start_threads(struct glob_arg *g) {
2486 	int i;
2487 
2488 	targs = calloc(g->nthreads, sizeof(*targs));
2489 	struct targ *t;
2490 	/*
2491 	 * Now create the desired number of threads, each one
2492 	 * using a single descriptor.
2493 	 */
2494 	for (i = 0; i < g->nthreads; i++) {
2495 		uint64_t seed = time(0) | (time(0) << 32);
2496 		t = &targs[i];
2497 
2498 		bzero(t, sizeof(*t));
2499 		t->fd = -1; /* default, with pcap */
2500 		t->g = g;
2501 		memcpy(t->seed, &seed, sizeof(t->seed));
2502 
2503 		if (g->dev_type == DEV_NETMAP) {
2504 			struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
2505 			uint64_t nmd_flags = 0;
2506 			nmd.self = &nmd;
2507 
2508 			if (i > 0) {
2509 				/* the first thread uses the fd opened by the main
2510 				 * thread, the other threads re-open /dev/netmap
2511 				 */
2512 				if (g->nthreads > 1) {
2513 					nmd.req.nr_flags =
2514 						g->nmd->req.nr_flags & ~NR_REG_MASK;
2515 					nmd.req.nr_flags |= NR_REG_ONE_NIC;
2516 					nmd.req.nr_ringid = i;
2517 				}
2518 				/* Only touch one of the rings (rx is already ok) */
2519 				if (g->td_type == TD_TYPE_RECEIVER)
2520 					nmd_flags |= NETMAP_NO_TX_POLL;
2521 
2522 				/* register interface. Override ifname and ringid etc. */
2523 				t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
2524 						NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
2525 				if (t->nmd == NULL) {
2526 					D("Unable to open %s: %s",
2527 							t->g->ifname, strerror(errno));
2528 					continue;
2529 				}
2530 			} else {
2531 				t->nmd = g->nmd;
2532 			}
2533 			t->fd = t->nmd->fd;
2534 			t->frags = g->frags;
2535 		} else {
2536 			targs[i].fd = g->main_fd;
2537 		}
2538 		t->used = 1;
2539 		t->me = i;
2540 		if (g->affinity >= 0) {
2541 			t->affinity = (g->affinity + i) % g->cpus;
2542 		} else {
2543 			t->affinity = -1;
2544 		}
2545 		/* default, init packets */
2546 		initialize_packet(t);
2547 	}
2548 	/* Wait for PHY reset. */
2549 	D("Wait %d secs for phy reset", g->wait_link);
2550 	sleep(g->wait_link);
2551 	D("Ready...");
2552 
2553 	for (i = 0; i < g->nthreads; i++) {
2554 		t = &targs[i];
2555 		if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
2556 			D("Unable to create thread %d: %s", i, strerror(errno));
2557 			t->used = 0;
2558 		}
2559 	}
2560 }
2561 
2562 static void
2563 main_thread(struct glob_arg *g)
2564 {
2565 	int i;
2566 
2567 	struct my_ctrs prev, cur;
2568 	double delta_t;
2569 	struct timeval tic, toc;
2570 
2571 	prev.pkts = prev.bytes = prev.events = 0;
2572 	gettimeofday(&prev.t, NULL);
2573 	for (;;) {
2574 		char b1[40], b2[40], b3[40], b4[100];
2575 		uint64_t pps, usec;
2576 		struct my_ctrs x;
2577 		double abs;
2578 		int done = 0;
2579 
2580 		usec = wait_for_next_report(&prev.t, &cur.t,
2581 				g->report_interval);
2582 
2583 		cur.pkts = cur.bytes = cur.events = 0;
2584 		cur.min_space = 0;
2585 		if (usec < 10000) /* too short to be meaningful */
2586 			continue;
2587 		/* accumulate counts for all threads */
2588 		for (i = 0; i < g->nthreads; i++) {
2589 			cur.pkts += targs[i].ctr.pkts;
2590 			cur.bytes += targs[i].ctr.bytes;
2591 			cur.events += targs[i].ctr.events;
2592 			cur.min_space += targs[i].ctr.min_space;
2593 			targs[i].ctr.min_space = 99999;
2594 			if (targs[i].used == 0)
2595 				done++;
2596 		}
2597 		x.pkts = cur.pkts - prev.pkts;
2598 		x.bytes = cur.bytes - prev.bytes;
2599 		x.events = cur.events - prev.events;
2600 		pps = (x.pkts*1000000 + usec/2) / usec;
2601 		abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0;
2602 
2603 		if (!(g->options & OPT_PPS_STATS)) {
2604 			strcpy(b4, "");
2605 		} else {
2606 			/* Compute some pps stats using a sliding window. */
2607 			double ppsavg = 0.0, ppsdev = 0.0;
2608 			int nsamples = 0;
2609 
2610 			g->win[g->win_idx] = pps;
2611 			g->win_idx = (g->win_idx + 1) % STATS_WIN;
2612 
2613 			for (i = 0; i < STATS_WIN; i++) {
2614 				ppsavg += g->win[i];
2615 				if (g->win[i]) {
2616 					nsamples ++;
2617 				}
2618 			}
2619 			ppsavg /= nsamples;
2620 
2621 			for (i = 0; i < STATS_WIN; i++) {
2622 				if (g->win[i] == 0) {
2623 					continue;
2624 				}
2625 				ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg);
2626 			}
2627 			ppsdev /= nsamples;
2628 			ppsdev = sqrt(ppsdev);
2629 
2630 			snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]",
2631 				 norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize));
2632 		}
2633 
2634 		D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space",
2635 			norm(b1, pps, normalize), b4,
2636 			norm(b2, (double)x.pkts, normalize),
2637 			norm(b3, 1000000*((double)x.bytes*8+(double)x.pkts*g->framing)/usec, normalize),
2638 			(unsigned long long)usec,
2639 			abs, (int)cur.min_space);
2640 		prev = cur;
2641 
2642 		if (done == g->nthreads)
2643 			break;
2644 	}
2645 
2646 	timerclear(&tic);
2647 	timerclear(&toc);
2648 	cur.pkts = cur.bytes = cur.events = 0;
2649 	/* final round */
2650 	for (i = 0; i < g->nthreads; i++) {
2651 		struct timespec t_tic, t_toc;
2652 		/*
2653 		 * Join active threads, unregister interfaces and close
2654 		 * file descriptors.
2655 		 */
2656 		if (targs[i].used)
2657 			pthread_join(targs[i].thread, NULL); /* blocking */
2658 		if (g->dev_type == DEV_NETMAP) {
2659 			nm_close(targs[i].nmd);
2660 			targs[i].nmd = NULL;
2661 		} else {
2662 			close(targs[i].fd);
2663 		}
2664 
2665 		if (targs[i].completed == 0)
2666 			D("ouch, thread %d exited with error", i);
2667 
2668 		/*
2669 		 * Collect threads output and extract information about
2670 		 * how long it took to send all the packets.
2671 		 */
2672 		cur.pkts += targs[i].ctr.pkts;
2673 		cur.bytes += targs[i].ctr.bytes;
2674 		cur.events += targs[i].ctr.events;
2675 		/* collect the largest start (tic) and end (toc) times,
2676 		 * XXX maybe we should do the earliest tic, or do a weighted
2677 		 * average ?
2678 		 */
2679 		t_tic = timeval2spec(&tic);
2680 		t_toc = timeval2spec(&toc);
2681 		if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic))
2682 			tic = timespec2val(&targs[i].tic);
2683 		if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc))
2684 			toc = timespec2val(&targs[i].toc);
2685 	}
2686 
2687 	/* print output. */
2688 	timersub(&toc, &tic, &toc);
2689 	delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
2690 	if (g->td_type == TD_TYPE_SENDER)
2691 		tx_output(g, &cur, delta_t, "Sent");
2692 	else if (g->td_type == TD_TYPE_RECEIVER)
2693 		tx_output(g, &cur, delta_t, "Received");
2694 }
2695 
2696 struct td_desc {
2697 	int ty;
2698 	char *key;
2699 	void *f;
2700 	int default_burst;
2701 };
2702 
2703 static struct td_desc func[] = {
2704 	{ TD_TYPE_RECEIVER,	"rx",		receiver_body,	512},	/* default */
2705 	{ TD_TYPE_SENDER,	"tx",		sender_body,	512 },
2706 	{ TD_TYPE_OTHER,	"ping",		ping_body,	1 },
2707 	{ TD_TYPE_OTHER,	"pong",		pong_body,	1 },
2708 	{ TD_TYPE_SENDER,	"txseq",	txseq_body,	512 },
2709 	{ TD_TYPE_RECEIVER,	"rxseq",	rxseq_body,	512 },
2710 	{ 0,			NULL,		NULL, 		0 }
2711 };
2712 
2713 static int
2714 tap_alloc(char *dev)
2715 {
2716 	struct ifreq ifr;
2717 	int fd, err;
2718 	char *clonedev = TAP_CLONEDEV;
2719 
2720 	(void)err;
2721 	(void)dev;
2722 	/* Arguments taken by the function:
2723 	 *
2724 	 * char *dev: the name of an interface (or '\0'). MUST have enough
2725 	 *   space to hold the interface name if '\0' is passed
2726 	 * int flags: interface flags (eg, IFF_TUN etc.)
2727 	 */
2728 
2729 #ifdef __FreeBSD__
2730 	if (dev[3]) { /* tapSomething */
2731 		static char buf[128];
2732 		snprintf(buf, sizeof(buf), "/dev/%s", dev);
2733 		clonedev = buf;
2734 	}
2735 #endif
2736 	/* open the device */
2737 	if( (fd = open(clonedev, O_RDWR)) < 0 ) {
2738 		return fd;
2739 	}
2740 	D("%s open successful", clonedev);
2741 
2742 	/* preparation of the struct ifr, of type "struct ifreq" */
2743 	memset(&ifr, 0, sizeof(ifr));
2744 
2745 #ifdef linux
2746 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
2747 
2748 	if (*dev) {
2749 		/* if a device name was specified, put it in the structure; otherwise,
2750 		* the kernel will try to allocate the "next" device of the
2751 		* specified type */
2752 		size_t len = strlen(dev);
2753 		if (len > IFNAMSIZ) {
2754 			D("%s too long", dev);
2755 			return -1;
2756 		}
2757 		memcpy(ifr.ifr_name, dev, len);
2758 	}
2759 
2760 	/* try to create the device */
2761 	if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
2762 		D("failed to to a TUNSETIFF: %s", strerror(errno));
2763 		close(fd);
2764 		return err;
2765 	}
2766 
2767 	/* if the operation was successful, write back the name of the
2768 	* interface to the variable "dev", so the caller can know
2769 	* it. Note that the caller MUST reserve space in *dev (see calling
2770 	* code below) */
2771 	strcpy(dev, ifr.ifr_name);
2772 	D("new name is %s", dev);
2773 #endif /* linux */
2774 
2775 	/* this is the special file descriptor that the caller will use to talk
2776 	 * with the virtual interface */
2777 	return fd;
2778 }
2779 
2780 int
2781 main(int arc, char **argv)
2782 {
2783 	int i;
2784 	struct sigaction sa;
2785 	sigset_t ss;
2786 
2787 	struct glob_arg g;
2788 
2789 	int ch;
2790 	int devqueues = 1;	/* how many device queues */
2791 	int wait_link_arg = 0;
2792 
2793 	int pkt_size_done = 0;
2794 
2795 	struct td_desc *fn = func;
2796 
2797 	bzero(&g, sizeof(g));
2798 
2799 	g.main_fd = -1;
2800 	g.td_body = fn->f;
2801 	g.td_type = fn->ty;
2802 	g.report_interval = 1000;	/* report interval */
2803 	g.affinity = -1;
2804 	/* ip addresses can also be a range x.x.x.x-x.x.x.y */
2805 	g.af = AF_INET;		/* default */
2806 	g.src_ip.name = "10.0.0.1";
2807 	g.dst_ip.name = "10.1.0.1";
2808 	g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
2809 	g.src_mac.name = NULL;
2810 	g.pkt_size = 60;
2811 	g.pkt_min_size = 0;
2812 	g.nthreads = 1;
2813 	g.cpus = 1;		/* default */
2814 	g.forever = 1;
2815 	g.tx_rate = 0;
2816 	g.frags = 1;
2817 	g.frag_size = (u_int)-1;	/* use the netmap buffer size by default */
2818 	g.nmr_config = "";
2819 	g.virt_header = 0;
2820 	g.wait_link = 2;	/* wait 2 seconds for physical ports */
2821 
2822 	while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:"
2823 	    "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) {
2824 
2825 		switch(ch) {
2826 		default:
2827 			D("bad option %c %s", ch, optarg);
2828 			usage(-1);
2829 			break;
2830 
2831 		case 'h':
2832 			usage(0);
2833 			break;
2834 
2835 		case '4':
2836 			g.af = AF_INET;
2837 			break;
2838 
2839 		case '6':
2840 			g.af = AF_INET6;
2841 			break;
2842 
2843 		case 'N':
2844 			normalize = 0;
2845 			break;
2846 
2847 		case 'n':
2848 			g.npackets = strtoull(optarg, NULL, 10);
2849 			break;
2850 
2851 		case 'F':
2852 			i = atoi(optarg);
2853 			if (i < 1 || i > 63) {
2854 				D("invalid frags %d [1..63], ignore", i);
2855 				break;
2856 			}
2857 			g.frags = i;
2858 			break;
2859 
2860 		case 'M':
2861 			g.frag_size = atoi(optarg);
2862 			break;
2863 
2864 		case 'f':
2865 			for (fn = func; fn->key; fn++) {
2866 				if (!strcmp(fn->key, optarg))
2867 					break;
2868 			}
2869 			if (fn->key) {
2870 				g.td_body = fn->f;
2871 				g.td_type = fn->ty;
2872 			} else {
2873 				D("unrecognised function %s", optarg);
2874 			}
2875 			break;
2876 
2877 		case 'o':	/* data generation options */
2878 			g.options |= atoi(optarg);
2879 			break;
2880 
2881 		case 'a':       /* force affinity */
2882 			g.affinity = atoi(optarg);
2883 			break;
2884 
2885 		case 'i':	/* interface */
2886 			/* a prefix of tap: netmap: or pcap: forces the mode.
2887 			 * otherwise we guess
2888 			 */
2889 			D("interface is %s", optarg);
2890 			if (strlen(optarg) > MAX_IFNAMELEN - 8) {
2891 				D("ifname too long %s", optarg);
2892 				break;
2893 			}
2894 			strcpy(g.ifname, optarg);
2895 			if (!strcmp(optarg, "null")) {
2896 				g.dev_type = DEV_NETMAP;
2897 				g.dummy_send = 1;
2898 			} else if (!strncmp(optarg, "tap:", 4)) {
2899 				g.dev_type = DEV_TAP;
2900 				strcpy(g.ifname, optarg + 4);
2901 			} else if (!strncmp(optarg, "pcap:", 5)) {
2902 				g.dev_type = DEV_PCAP;
2903 				strcpy(g.ifname, optarg + 5);
2904 			} else if (!strncmp(optarg, "netmap:", 7) ||
2905 				   !strncmp(optarg, "vale", 4)) {
2906 				g.dev_type = DEV_NETMAP;
2907 			} else if (!strncmp(optarg, "tap", 3)) {
2908 				g.dev_type = DEV_TAP;
2909 			} else { /* prepend netmap: */
2910 				g.dev_type = DEV_NETMAP;
2911 				sprintf(g.ifname, "netmap:%s", optarg);
2912 			}
2913 			break;
2914 
2915 		case 'I':
2916 			g.options |= OPT_INDIRECT;	/* use indirect buffers */
2917 			break;
2918 
2919 		case 'l':	/* pkt_size */
2920 			if (pkt_size_done) {
2921 				g.pkt_min_size = atoi(optarg);
2922 			} else {
2923 				g.pkt_size = atoi(optarg);
2924 				pkt_size_done = 1;
2925 			}
2926 			break;
2927 
2928 		case 'd':
2929 			g.dst_ip.name = optarg;
2930 			break;
2931 
2932 		case 's':
2933 			g.src_ip.name = optarg;
2934 			break;
2935 
2936 		case 'T':	/* report interval */
2937 			g.report_interval = atoi(optarg);
2938 			break;
2939 
2940 		case 'w':
2941 			g.wait_link = atoi(optarg);
2942 			wait_link_arg = 1;
2943 			break;
2944 
2945 		case 'W':
2946 			g.forever = 0; /* exit RX with no traffic */
2947 			break;
2948 
2949 		case 'b':	/* burst */
2950 			g.burst = atoi(optarg);
2951 			break;
2952 		case 'c':
2953 			g.cpus = atoi(optarg);
2954 			break;
2955 		case 'p':
2956 			g.nthreads = atoi(optarg);
2957 			break;
2958 
2959 		case 'D': /* destination mac */
2960 			g.dst_mac.name = optarg;
2961 			break;
2962 
2963 		case 'S': /* source mac */
2964 			g.src_mac.name = optarg;
2965 			break;
2966 		case 'v':
2967 			verbose++;
2968 			break;
2969 		case 'R':
2970 			g.tx_rate = atoi(optarg);
2971 			break;
2972 		case 'X':
2973 			g.options |= OPT_DUMP;
2974 			break;
2975 		case 'C':
2976 			D("WARNING: the 'C' option is deprecated, use the '+conf:' libnetmap option instead");
2977 			g.nmr_config = strdup(optarg);
2978 			break;
2979 		case 'H':
2980 			g.virt_header = atoi(optarg);
2981 			break;
2982 		case 'P':
2983 			g.packet_file = strdup(optarg);
2984 			break;
2985 		case 'r':
2986 			g.options |= OPT_RUBBISH;
2987 			break;
2988 		case 'z':
2989 			g.options |= OPT_RANDOM_SRC;
2990 			break;
2991 		case 'Z':
2992 			g.options |= OPT_RANDOM_DST;
2993 			break;
2994 		case 'A':
2995 			g.options |= OPT_PPS_STATS;
2996 			break;
2997 		case 'B':
2998 			/* raw packets have4 bytes crc + 20 bytes framing */
2999 			// XXX maybe add an option to pass the IFG
3000 			g.framing = 24 * 8;
3001 			break;
3002 		}
3003 	}
3004 
3005 	if (strlen(g.ifname) <=0 ) {
3006 		D("missing ifname");
3007 		usage(-1);
3008 	}
3009 
3010 	if (g.burst == 0) {
3011 		g.burst = fn->default_burst;
3012 		D("using default burst size: %d", g.burst);
3013 	}
3014 
3015 	g.system_cpus = i = system_ncpus();
3016 	if (g.cpus < 0 || g.cpus > i) {
3017 		D("%d cpus is too high, have only %d cpus", g.cpus, i);
3018 		usage(-1);
3019 	}
3020 	D("running on %d cpus (have %d)", g.cpus, i);
3021 	if (g.cpus == 0)
3022 		g.cpus = i;
3023 
3024 	if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) {
3025 		g.wait_link = 0;
3026 	}
3027 
3028 	if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) {
3029 		D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE);
3030 		usage(-1);
3031 	}
3032 
3033 	if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) {
3034 		D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size);
3035 		usage(-1);
3036 	}
3037 
3038 	if (g.src_mac.name == NULL) {
3039 		static char mybuf[20] = "00:00:00:00:00:00";
3040 		/* retrieve source mac address. */
3041 		if (source_hwaddr(g.ifname, mybuf) == -1) {
3042 			D("Unable to retrieve source mac");
3043 			// continue, fail later
3044 		}
3045 		g.src_mac.name = mybuf;
3046 	}
3047 	/* extract address ranges */
3048 	if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac))
3049 		usage(-1);
3050 	g.options |= extract_ip_range(&g.src_ip, g.af);
3051 	g.options |= extract_ip_range(&g.dst_ip, g.af);
3052 
3053 	if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
3054 			&& g.virt_header != VIRT_HDR_2) {
3055 		D("bad virtio-net-header length");
3056 		usage(-1);
3057 	}
3058 
3059     if (g.dev_type == DEV_TAP) {
3060 	D("want to use tap %s", g.ifname);
3061 	g.main_fd = tap_alloc(g.ifname);
3062 	if (g.main_fd < 0) {
3063 		D("cannot open tap %s", g.ifname);
3064 		usage(-1);
3065 	}
3066 #ifndef NO_PCAP
3067     } else if (g.dev_type == DEV_PCAP) {
3068 	char pcap_errbuf[PCAP_ERRBUF_SIZE];
3069 
3070 	pcap_errbuf[0] = '\0'; // init the buffer
3071 	g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf);
3072 	if (g.p == NULL) {
3073 		D("cannot open pcap on %s", g.ifname);
3074 		usage(-1);
3075 	}
3076 	g.main_fd = pcap_fileno(g.p);
3077 	D("using pcap on %s fileno %d", g.ifname, g.main_fd);
3078 #endif /* !NO_PCAP */
3079     } else if (g.dummy_send) { /* but DEV_NETMAP */
3080 	D("using a dummy send routine");
3081     } else {
3082 	struct nm_desc base_nmd;
3083 	char errmsg[MAXERRMSG];
3084 	u_int flags;
3085 
3086 	bzero(&base_nmd, sizeof(base_nmd));
3087 
3088 	parse_nmr_config(g.nmr_config, &base_nmd.req);
3089 
3090 	base_nmd.req.nr_flags |= NR_ACCEPT_VNET_HDR;
3091 
3092 	if (nm_parse(g.ifname, &base_nmd, errmsg) < 0) {
3093 		D("Invalid name '%s': %s", g.ifname, errmsg);
3094 		goto out;
3095 	}
3096 
3097 	/*
3098 	 * Open the netmap device using nm_open().
3099 	 *
3100 	 * protocol stack and may cause a reset of the card,
3101 	 * which in turn may take some time for the PHY to
3102 	 * reconfigure. We do the open here to have time to reset.
3103 	 */
3104 	flags = NM_OPEN_IFNAME | NM_OPEN_ARG1 | NM_OPEN_ARG2 |
3105 		NM_OPEN_ARG3 | NM_OPEN_RING_CFG;
3106 	if (g.nthreads > 1) {
3107 		base_nmd.req.nr_flags &= ~NR_REG_MASK;
3108 		base_nmd.req.nr_flags |= NR_REG_ONE_NIC;
3109 		base_nmd.req.nr_ringid = 0;
3110 	}
3111 	g.nmd = nm_open(g.ifname, NULL, flags, &base_nmd);
3112 	if (g.nmd == NULL) {
3113 		D("Unable to open %s: %s", g.ifname, strerror(errno));
3114 		goto out;
3115 	}
3116 	g.main_fd = g.nmd->fd;
3117 	D("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10),
3118 				g.nmd->mem);
3119 
3120 	if (g.virt_header) {
3121 		/* Set the virtio-net header length, since the user asked
3122 		 * for it explicitely. */
3123 		set_vnet_hdr_len(&g);
3124 	} else {
3125 		/* Check whether the netmap port we opened requires us to send
3126 		 * and receive frames with virtio-net header. */
3127 		get_vnet_hdr_len(&g);
3128 	}
3129 
3130 	/* get num of queues in tx or rx */
3131 	if (g.td_type == TD_TYPE_SENDER)
3132 		devqueues = g.nmd->req.nr_tx_rings;
3133 	else
3134 		devqueues = g.nmd->req.nr_rx_rings;
3135 
3136 	/* validate provided nthreads. */
3137 	if (g.nthreads < 1 || g.nthreads > devqueues) {
3138 		D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
3139 		// continue, fail later
3140 	}
3141 
3142 	if (g.td_type == TD_TYPE_SENDER) {
3143 		int mtu = get_if_mtu(&g);
3144 
3145 		if (mtu > 0 && g.pkt_size > mtu) {
3146 			D("pkt_size (%d) must be <= mtu (%d)",
3147 				g.pkt_size, mtu);
3148 			return -1;
3149 		}
3150 	}
3151 
3152 	if (verbose) {
3153 		struct netmap_if *nifp = g.nmd->nifp;
3154 		struct nmreq *req = &g.nmd->req;
3155 
3156 		D("nifp at offset %d, %d tx %d rx region %d",
3157 		    req->nr_offset, req->nr_tx_rings, req->nr_rx_rings,
3158 		    req->nr_arg2);
3159 		for (i = 0; i <= req->nr_tx_rings; i++) {
3160 			struct netmap_ring *ring = NETMAP_TXRING(nifp, i);
3161 			D("   TX%d at 0x%p slots %d", i,
3162 			    (void *)((char *)ring - (char *)nifp), ring->num_slots);
3163 		}
3164 		for (i = 0; i <= req->nr_rx_rings; i++) {
3165 			struct netmap_ring *ring = NETMAP_RXRING(nifp, i);
3166 			D("   RX%d at 0x%p slots %d", i,
3167 			    (void *)((char *)ring - (char *)nifp), ring->num_slots);
3168 		}
3169 	}
3170 
3171 	/* Print some debug information. */
3172 	fprintf(stdout,
3173 		"%s %s: %d queues, %d threads and %d cpus.\n",
3174 		(g.td_type == TD_TYPE_SENDER) ? "Sending on" :
3175 			((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" :
3176 			"Working on"),
3177 		g.ifname,
3178 		devqueues,
3179 		g.nthreads,
3180 		g.cpus);
3181 	if (g.td_type == TD_TYPE_SENDER) {
3182 		fprintf(stdout, "%s -> %s (%s -> %s)\n",
3183 			g.src_ip.name, g.dst_ip.name,
3184 			g.src_mac.name, g.dst_mac.name);
3185 	}
3186 
3187 out:
3188 	/* Exit if something went wrong. */
3189 	if (g.main_fd < 0) {
3190 		D("aborting");
3191 		usage(-1);
3192 	}
3193     }
3194 
3195 
3196 	if (g.options) {
3197 		D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n",
3198 			g.options & OPT_PREFETCH ? " prefetch" : "",
3199 			g.options & OPT_ACCESS ? " access" : "",
3200 			g.options & OPT_MEMCPY ? " memcpy" : "",
3201 			g.options & OPT_INDIRECT ? " indirect" : "",
3202 			g.options & OPT_COPY ? " copy" : "",
3203 			g.options & OPT_RUBBISH ? " rubbish " : "");
3204 	}
3205 
3206 	g.tx_period.tv_sec = g.tx_period.tv_nsec = 0;
3207 	if (g.tx_rate > 0) {
3208 		/* try to have at least something every second,
3209 		 * reducing the burst size to some 0.01s worth of data
3210 		 * (but no less than one full set of fragments)
3211 	 	 */
3212 		uint64_t x;
3213 		int lim = (g.tx_rate)/300;
3214 		if (g.burst > lim)
3215 			g.burst = lim;
3216 		if (g.burst == 0)
3217 			g.burst = 1;
3218 		x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
3219 		g.tx_period.tv_nsec = x;
3220 		g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
3221 		g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
3222 	}
3223 	if (g.td_type == TD_TYPE_SENDER)
3224 	    D("Sending %d packets every  %ld.%09ld s",
3225 			g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec);
3226 	/* Install ^C handler. */
3227 	global_nthreads = g.nthreads;
3228 	sigemptyset(&ss);
3229 	sigaddset(&ss, SIGINT);
3230 	/* block SIGINT now, so that all created threads will inherit the mask */
3231 	if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) {
3232 		D("failed to block SIGINT: %s", strerror(errno));
3233 	}
3234 	start_threads(&g);
3235 	/* Install the handler and re-enable SIGINT for the main thread */
3236 	memset(&sa, 0, sizeof(sa));
3237 	sa.sa_handler = sigint_h;
3238 	if (sigaction(SIGINT, &sa, NULL) < 0) {
3239 		D("failed to install ^C handler: %s", strerror(errno));
3240 	}
3241 
3242 	if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) {
3243 		D("failed to re-enable SIGINT: %s", strerror(errno));
3244 	}
3245 	main_thread(&g);
3246 	free(targs);
3247 	return 0;
3248 }
3249 
3250 /* end of file */
3251