xref: /freebsd/tools/tools/netmap/pkt-gen.c (revision d6b92ffa)
1 /*
2  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3  * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $
30  *
31  * Example program to show how to build a multithreaded packet
32  * source/sink using the netmap device.
33  *
34  * In this example we create a programmable number of threads
35  * to take care of all the queues of the interface used to
36  * send or receive traffic.
37  *
38  */
39 
40 #define _GNU_SOURCE	/* for CPU_SET() */
41 #include <stdio.h>
42 #define NETMAP_WITH_LIBS
43 #include <net/netmap_user.h>
44 
45 
46 #include <ctype.h>	// isprint()
47 #include <unistd.h>	// sysconf()
48 #include <sys/poll.h>
49 #include <arpa/inet.h>	/* ntohs */
50 #ifndef _WIN32
51 #include <sys/sysctl.h>	/* sysctl */
52 #endif
53 #include <ifaddrs.h>	/* getifaddrs */
54 #include <net/ethernet.h>
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/udp.h>
58 #include <assert.h>
59 #include <math.h>
60 
61 #include <pthread.h>
62 
63 #ifndef NO_PCAP
64 #include <pcap/pcap.h>
65 #endif
66 
67 #include "ctrs.h"
68 
69 #ifdef _WIN32
70 #define cpuset_t        DWORD_PTR   //uint64_t
71 static inline void CPU_ZERO(cpuset_t *p)
72 {
73         *p = 0;
74 }
75 
76 static inline void CPU_SET(uint32_t i, cpuset_t *p)
77 {
78         *p |= 1<< (i & 0x3f);
79 }
80 
81 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c)    //((void)a, 0)
82 #define TAP_CLONEDEV	"/dev/tap"
83 #define AF_LINK	18	//defined in winsocks.h
84 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
85 #include <net/if_dl.h>
86 
87 /*
88  * Convert an ASCII representation of an ethernet address to
89  * binary form.
90  */
91 struct ether_addr *
92 ether_aton(const char *a)
93 {
94 	int i;
95 	static struct ether_addr o;
96 	unsigned int o0, o1, o2, o3, o4, o5;
97 
98 	i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5);
99 
100 	if (i != 6)
101 		return (NULL);
102 
103 	o.octet[0]=o0;
104 	o.octet[1]=o1;
105 	o.octet[2]=o2;
106 	o.octet[3]=o3;
107 	o.octet[4]=o4;
108 	o.octet[5]=o5;
109 
110 	return ((struct ether_addr *)&o);
111 }
112 
113 /*
114  * Convert a binary representation of an ethernet address to
115  * an ASCII string.
116  */
117 char *
118 ether_ntoa(const struct ether_addr *n)
119 {
120 	int i;
121 	static char a[18];
122 
123 	i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x",
124 	    n->octet[0], n->octet[1], n->octet[2],
125 	    n->octet[3], n->octet[4], n->octet[5]);
126 	return (i < 17 ? NULL : (char *)&a);
127 }
128 #endif /* _WIN32 */
129 
130 #ifdef linux
131 
132 #define cpuset_t        cpu_set_t
133 
134 #define ifr_flagshigh  ifr_flags        /* only the low 16 bits here */
135 #define IFF_PPROMISC   IFF_PROMISC      /* IFF_PPROMISC does not exist */
136 #include <linux/ethtool.h>
137 #include <linux/sockios.h>
138 
139 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
140 #include <netinet/ether.h>      /* ether_aton */
141 #include <linux/if_packet.h>    /* sockaddr_ll */
142 #endif  /* linux */
143 
144 #ifdef __FreeBSD__
145 #include <sys/endian.h> /* le64toh */
146 #include <machine/param.h>
147 
148 #include <pthread_np.h> /* pthread w/ affinity */
149 #include <sys/cpuset.h> /* cpu_set */
150 #include <net/if_dl.h>  /* LLADDR */
151 #endif  /* __FreeBSD__ */
152 
153 #ifdef __APPLE__
154 
155 #define cpuset_t        uint64_t        // XXX
156 static inline void CPU_ZERO(cpuset_t *p)
157 {
158         *p = 0;
159 }
160 
161 static inline void CPU_SET(uint32_t i, cpuset_t *p)
162 {
163         *p |= 1<< (i & 0x3f);
164 }
165 
166 #define pthread_setaffinity_np(a, b, c) ((void)a, 0)
167 
168 #define ifr_flagshigh  ifr_flags        // XXX
169 #define IFF_PPROMISC   IFF_PROMISC
170 #include <net/if_dl.h>  /* LLADDR */
171 #define clock_gettime(a,b)      \
172         do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
173 #endif  /* __APPLE__ */
174 
175 const char *default_payload="netmap pkt-gen DIRECT payload\n"
176 	"http://info.iet.unipi.it/~luigi/netmap/ ";
177 
178 const char *indirect_payload="netmap pkt-gen indirect payload\n"
179 	"http://info.iet.unipi.it/~luigi/netmap/ ";
180 
181 int verbose = 0;
182 
183 #define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */
184 
185 
186 #define VIRT_HDR_1	10	/* length of a base vnet-hdr */
187 #define VIRT_HDR_2	12	/* length of the extenede vnet-hdr */
188 #define VIRT_HDR_MAX	VIRT_HDR_2
189 struct virt_header {
190 	uint8_t fields[VIRT_HDR_MAX];
191 };
192 
193 #define MAX_BODYSIZE	16384
194 
195 struct pkt {
196 	struct virt_header vh;
197 	struct ether_header eh;
198 	struct ip ip;
199 	struct udphdr udp;
200 	uint8_t body[MAX_BODYSIZE];	// XXX hardwired
201 } __attribute__((__packed__));
202 
203 struct ip_range {
204 	char *name;
205 	uint32_t start, end; /* same as struct in_addr */
206 	uint16_t port0, port1;
207 };
208 
209 struct mac_range {
210 	char *name;
211 	struct ether_addr start, end;
212 };
213 
214 /* ifname can be netmap:foo-xxxx */
215 #define MAX_IFNAMELEN	64	/* our buffer for ifname */
216 //#define MAX_PKTSIZE	1536
217 #define MAX_PKTSIZE	MAX_BODYSIZE	/* XXX: + IP_HDR + ETH_HDR */
218 
219 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */
220 struct tstamp {
221 	uint32_t sec;
222 	uint32_t nsec;
223 };
224 
225 /*
226  * global arguments for all threads
227  */
228 
229 struct glob_arg {
230 	struct ip_range src_ip;
231 	struct ip_range dst_ip;
232 	struct mac_range dst_mac;
233 	struct mac_range src_mac;
234 	int pkt_size;
235 	int burst;
236 	int forever;
237 	uint64_t npackets;	/* total packets to send */
238 	int frags;	/* fragments per packet */
239 	int nthreads;
240 	int cpus;	/* cpus used for running */
241 	int system_cpus;	/* cpus on the system */
242 
243 	int options;	/* testing */
244 #define OPT_PREFETCH	1
245 #define OPT_ACCESS	2
246 #define OPT_COPY	4
247 #define OPT_MEMCPY	8
248 #define OPT_TS		16	/* add a timestamp */
249 #define OPT_INDIRECT	32	/* use indirect buffers, tx only */
250 #define OPT_DUMP	64	/* dump rx/tx traffic */
251 #define OPT_RUBBISH	256	/* send wathever the buffers contain */
252 #define OPT_RANDOM_SRC  512
253 #define OPT_RANDOM_DST  1024
254 #define OPT_PPS_STATS   2048
255 	int dev_type;
256 #ifndef NO_PCAP
257 	pcap_t *p;
258 #endif
259 
260 	int tx_rate;
261 	struct timespec tx_period;
262 
263 	int affinity;
264 	int main_fd;
265 	struct nm_desc *nmd;
266 	int report_interval;		/* milliseconds between prints */
267 	void *(*td_body)(void *);
268 	int td_type;
269 	void *mmap_addr;
270 	char ifname[MAX_IFNAMELEN];
271 	char *nmr_config;
272 	int dummy_send;
273 	int virt_header;	/* send also the virt_header */
274 	int extra_bufs;		/* goes in nr_arg3 */
275 	int extra_pipes;	/* goes in nr_arg1 */
276 	char *packet_file;	/* -P option */
277 #define	STATS_WIN	15
278 	int win_idx;
279 	int64_t win[STATS_WIN];
280 };
281 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
282 
283 
284 /*
285  * Arguments for a new thread. The same structure is used by
286  * the source and the sink
287  */
288 struct targ {
289 	struct glob_arg *g;
290 	int used;
291 	int completed;
292 	int cancel;
293 	int fd;
294 	struct nm_desc *nmd;
295 	/* these ought to be volatile, but they are
296 	 * only sampled and errors should not accumulate
297 	 */
298 	struct my_ctrs ctr;
299 
300 	struct timespec tic, toc;
301 	int me;
302 	pthread_t thread;
303 	int affinity;
304 
305 	struct pkt pkt;
306 	void *frame;
307 };
308 
309 
310 /*
311  * extract the extremes from a range of ipv4 addresses.
312  * addr_lo[-addr_hi][:port_lo[-port_hi]]
313  */
314 static void
315 extract_ip_range(struct ip_range *r)
316 {
317 	char *ap, *pp;
318 	struct in_addr a;
319 
320 	if (verbose)
321 		D("extract IP range from %s", r->name);
322 	r->port0 = r->port1 = 0;
323 	r->start = r->end = 0;
324 
325 	/* the first - splits start/end of range */
326 	ap = index(r->name, '-');	/* do we have ports ? */
327 	if (ap) {
328 		*ap++ = '\0';
329 	}
330 	/* grab the initial values (mandatory) */
331 	pp = index(r->name, ':');
332 	if (pp) {
333 		*pp++ = '\0';
334 		r->port0 = r->port1 = strtol(pp, NULL, 0);
335 	};
336 	inet_aton(r->name, &a);
337 	r->start = r->end = ntohl(a.s_addr);
338 	if (ap) {
339 		pp = index(ap, ':');
340 		if (pp) {
341 			*pp++ = '\0';
342 			if (*pp)
343 				r->port1 = strtol(pp, NULL, 0);
344 		}
345 		if (*ap) {
346 			inet_aton(ap, &a);
347 			r->end = ntohl(a.s_addr);
348 		}
349 	}
350 	if (r->port0 > r->port1) {
351 		uint16_t tmp = r->port0;
352 		r->port0 = r->port1;
353 		r->port1 = tmp;
354 	}
355 	if (r->start > r->end) {
356 		uint32_t tmp = r->start;
357 		r->start = r->end;
358 		r->end = tmp;
359 	}
360 	{
361 		struct in_addr a;
362 		char buf1[16]; // one ip address
363 
364 		a.s_addr = htonl(r->end);
365 		strncpy(buf1, inet_ntoa(a), sizeof(buf1));
366 		a.s_addr = htonl(r->start);
367 		if (1)
368 		    D("range is %s:%d to %s:%d",
369 			inet_ntoa(a), r->port0, buf1, r->port1);
370 	}
371 }
372 
373 static void
374 extract_mac_range(struct mac_range *r)
375 {
376 	if (verbose)
377 	    D("extract MAC range from %s", r->name);
378 	bcopy(ether_aton(r->name), &r->start, 6);
379 	bcopy(ether_aton(r->name), &r->end, 6);
380 #if 0
381 	bcopy(targ->src_mac, eh->ether_shost, 6);
382 	p = index(targ->g->src_mac, '-');
383 	if (p)
384 		targ->src_mac_range = atoi(p+1);
385 
386 	bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
387 	bcopy(targ->dst_mac, eh->ether_dhost, 6);
388 	p = index(targ->g->dst_mac, '-');
389 	if (p)
390 		targ->dst_mac_range = atoi(p+1);
391 #endif
392 	if (verbose)
393 		D("%s starts at %s", r->name, ether_ntoa(&r->start));
394 }
395 
396 static struct targ *targs;
397 static int global_nthreads;
398 
399 /* control-C handler */
400 static void
401 sigint_h(int sig)
402 {
403 	int i;
404 
405 	(void)sig;	/* UNUSED */
406 	D("received control-C on thread %p", (void *)pthread_self());
407 	for (i = 0; i < global_nthreads; i++) {
408 		targs[i].cancel = 1;
409 	}
410 }
411 
412 /* sysctl wrapper to return the number of active CPUs */
413 static int
414 system_ncpus(void)
415 {
416 	int ncpus;
417 #if defined (__FreeBSD__)
418 	int mib[2] = { CTL_HW, HW_NCPU };
419 	size_t len = sizeof(mib);
420 	sysctl(mib, 2, &ncpus, &len, NULL, 0);
421 #elif defined(linux)
422 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
423 #elif defined(_WIN32)
424 	{
425 		SYSTEM_INFO sysinfo;
426 		GetSystemInfo(&sysinfo);
427 		ncpus = sysinfo.dwNumberOfProcessors;
428 	}
429 #else /* others */
430 	ncpus = 1;
431 #endif /* others */
432 	return (ncpus);
433 }
434 
435 #ifdef __linux__
436 #define sockaddr_dl    sockaddr_ll
437 #define sdl_family     sll_family
438 #define AF_LINK        AF_PACKET
439 #define LLADDR(s)      s->sll_addr;
440 #include <linux/if_tun.h>
441 #define TAP_CLONEDEV	"/dev/net/tun"
442 #endif /* __linux__ */
443 
444 #ifdef __FreeBSD__
445 #include <net/if_tun.h>
446 #define TAP_CLONEDEV	"/dev/tap"
447 #endif /* __FreeBSD */
448 
449 #ifdef __APPLE__
450 // #warning TAP not supported on apple ?
451 #include <net/if_utun.h>
452 #define TAP_CLONEDEV	"/dev/tap"
453 #endif /* __APPLE__ */
454 
455 
456 /*
457  * parse the vale configuration in conf and put it in nmr.
458  * Return the flag set if necessary.
459  * The configuration may consist of 0 to 4 numbers separated
460  * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings.
461  * Missing numbers or zeroes stand for default values.
462  * As an additional convenience, if exactly one number
463  * is specified, then this is assigned to both #tx-slots and #rx-slots.
464  * If there is no 4th number, then the 3rd is assigned to both #tx-rings
465  * and #rx-rings.
466  */
467 int
468 parse_nmr_config(const char* conf, struct nmreq *nmr)
469 {
470 	char *w, *tok;
471 	int i, v;
472 
473 	nmr->nr_tx_rings = nmr->nr_rx_rings = 0;
474 	nmr->nr_tx_slots = nmr->nr_rx_slots = 0;
475 	if (conf == NULL || ! *conf)
476 		return 0;
477 	w = strdup(conf);
478 	for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) {
479 		v = atoi(tok);
480 		switch (i) {
481 		case 0:
482 			nmr->nr_tx_slots = nmr->nr_rx_slots = v;
483 			break;
484 		case 1:
485 			nmr->nr_rx_slots = v;
486 			break;
487 		case 2:
488 			nmr->nr_tx_rings = nmr->nr_rx_rings = v;
489 			break;
490 		case 3:
491 			nmr->nr_rx_rings = v;
492 			break;
493 		default:
494 			D("ignored config: %s", tok);
495 			break;
496 		}
497 	}
498 	D("txr %d txd %d rxr %d rxd %d",
499 			nmr->nr_tx_rings, nmr->nr_tx_slots,
500 			nmr->nr_rx_rings, nmr->nr_rx_slots);
501 	free(w);
502 	return (nmr->nr_tx_rings || nmr->nr_tx_slots ||
503                         nmr->nr_rx_rings || nmr->nr_rx_slots) ?
504 		NM_OPEN_RING_CFG : 0;
505 }
506 
507 
508 /*
509  * locate the src mac address for our interface, put it
510  * into the user-supplied buffer. return 0 if ok, -1 on error.
511  */
512 static int
513 source_hwaddr(const char *ifname, char *buf)
514 {
515 	struct ifaddrs *ifaphead, *ifap;
516 	int l = sizeof(ifap->ifa_name);
517 
518 	if (getifaddrs(&ifaphead) != 0) {
519 		D("getifaddrs %s failed", ifname);
520 		return (-1);
521 	}
522 
523 	for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
524 		struct sockaddr_dl *sdl =
525 			(struct sockaddr_dl *)ifap->ifa_addr;
526 		uint8_t *mac;
527 
528 		if (!sdl || sdl->sdl_family != AF_LINK)
529 			continue;
530 		if (strncmp(ifap->ifa_name, ifname, l) != 0)
531 			continue;
532 		mac = (uint8_t *)LLADDR(sdl);
533 		sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
534 			mac[0], mac[1], mac[2],
535 			mac[3], mac[4], mac[5]);
536 		if (verbose)
537 			D("source hwaddr %s", buf);
538 		break;
539 	}
540 	freeifaddrs(ifaphead);
541 	return ifap ? 0 : 1;
542 }
543 
544 
545 /* set the thread affinity. */
546 static int
547 setaffinity(pthread_t me, int i)
548 {
549 	cpuset_t cpumask;
550 
551 	if (i == -1)
552 		return 0;
553 
554 	/* Set thread affinity affinity.*/
555 	CPU_ZERO(&cpumask);
556 	CPU_SET(i, &cpumask);
557 
558 	if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
559 		D("Unable to set affinity: %s", strerror(errno));
560 		return 1;
561 	}
562 	return 0;
563 }
564 
565 /* Compute the checksum of the given ip header. */
566 static uint16_t
567 checksum(const void *data, uint16_t len, uint32_t sum)
568 {
569         const uint8_t *addr = data;
570 	uint32_t i;
571 
572         /* Checksum all the pairs of bytes first... */
573         for (i = 0; i < (len & ~1U); i += 2) {
574                 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
575                 if (sum > 0xFFFF)
576                         sum -= 0xFFFF;
577         }
578 	/*
579 	 * If there's a single byte left over, checksum it, too.
580 	 * Network byte order is big-endian, so the remaining byte is
581 	 * the high byte.
582 	 */
583 	if (i < len) {
584 		sum += addr[i] << 8;
585 		if (sum > 0xFFFF)
586 			sum -= 0xFFFF;
587 	}
588 	return sum;
589 }
590 
591 static u_int16_t
592 wrapsum(u_int32_t sum)
593 {
594 	sum = ~sum & 0xFFFF;
595 	return (htons(sum));
596 }
597 
598 /* Check the payload of the packet for errors (use it for debug).
599  * Look for consecutive ascii representations of the size of the packet.
600  */
601 static void
602 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur)
603 {
604 	char buf[128];
605 	int i, j, i0;
606 	const unsigned char *p = (const unsigned char *)_p;
607 
608 	/* get the length in ASCII of the length of the packet. */
609 
610 	printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n",
611 		ring, cur, ring->slot[cur].buf_idx,
612 		ring->slot[cur].flags, len);
613 	/* hexdump routine */
614 	for (i = 0; i < len; ) {
615 		memset(buf, sizeof(buf), ' ');
616 		sprintf(buf, "%5d: ", i);
617 		i0 = i;
618 		for (j=0; j < 16 && i < len; i++, j++)
619 			sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i]));
620 		i = i0;
621 		for (j=0; j < 16 && i < len; i++, j++)
622 			sprintf(buf+7+j + 48, "%c",
623 				isprint(p[i]) ? p[i] : '.');
624 		printf("%s\n", buf);
625 	}
626 }
627 
628 /*
629  * Fill a packet with some payload.
630  * We create a UDP packet so the payload starts at
631  *	14+20+8 = 42 bytes.
632  */
633 #ifdef __linux__
634 #define uh_sport source
635 #define uh_dport dest
636 #define uh_ulen len
637 #define uh_sum check
638 #endif /* linux */
639 
640 /*
641  * increment the addressed in the packet,
642  * starting from the least significant field.
643  *	DST_IP DST_PORT SRC_IP SRC_PORT
644  */
645 static void
646 update_addresses(struct pkt *pkt, struct glob_arg *g)
647 {
648 	uint32_t a;
649 	uint16_t p;
650 	struct ip *ip = &pkt->ip;
651 	struct udphdr *udp = &pkt->udp;
652 
653     do {
654     	/* XXX for now it doesn't handle non-random src, random dst */
655 	if (g->options & OPT_RANDOM_SRC) {
656 		udp->uh_sport = random();
657 		ip->ip_src.s_addr = random();
658 	} else {
659 		p = ntohs(udp->uh_sport);
660 		if (p < g->src_ip.port1) { /* just inc, no wrap */
661 			udp->uh_sport = htons(p + 1);
662 			break;
663 		}
664 		udp->uh_sport = htons(g->src_ip.port0);
665 
666 		a = ntohl(ip->ip_src.s_addr);
667 		if (a < g->src_ip.end) { /* just inc, no wrap */
668 			ip->ip_src.s_addr = htonl(a + 1);
669 			break;
670 		}
671 		ip->ip_src.s_addr = htonl(g->src_ip.start);
672 
673 		udp->uh_sport = htons(g->src_ip.port0);
674 	}
675 
676 	if (g->options & OPT_RANDOM_DST) {
677 		udp->uh_dport = random();
678 		ip->ip_dst.s_addr = random();
679 	} else {
680 		p = ntohs(udp->uh_dport);
681 		if (p < g->dst_ip.port1) { /* just inc, no wrap */
682 			udp->uh_dport = htons(p + 1);
683 			break;
684 		}
685 		udp->uh_dport = htons(g->dst_ip.port0);
686 
687 		a = ntohl(ip->ip_dst.s_addr);
688 		if (a < g->dst_ip.end) { /* just inc, no wrap */
689 			ip->ip_dst.s_addr = htonl(a + 1);
690 			break;
691 		}
692 	}
693 	ip->ip_dst.s_addr = htonl(g->dst_ip.start);
694     } while (0);
695     // update checksum
696 }
697 
698 /*
699  * initialize one packet and prepare for the next one.
700  * The copy could be done better instead of repeating it each time.
701  */
702 static void
703 initialize_packet(struct targ *targ)
704 {
705 	struct pkt *pkt = &targ->pkt;
706 	struct ether_header *eh;
707 	struct ip *ip;
708 	struct udphdr *udp;
709 	uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
710 	const char *payload = targ->g->options & OPT_INDIRECT ?
711 		indirect_payload : default_payload;
712 	int i, l0 = strlen(payload);
713 
714 #ifndef NO_PCAP
715 	char errbuf[PCAP_ERRBUF_SIZE];
716 	pcap_t *file;
717 	struct pcap_pkthdr *header;
718 	const unsigned char *packet;
719 
720 	/* Read a packet from a PCAP file if asked. */
721 	if (targ->g->packet_file != NULL) {
722 		if ((file = pcap_open_offline(targ->g->packet_file,
723 			    errbuf)) == NULL)
724 			D("failed to open pcap file %s",
725 			    targ->g->packet_file);
726 		if (pcap_next_ex(file, &header, &packet) < 0)
727 			D("failed to read packet from %s",
728 			    targ->g->packet_file);
729 		if ((targ->frame = malloc(header->caplen)) == NULL)
730 			D("out of memory");
731 		bcopy(packet, (unsigned char *)targ->frame, header->caplen);
732 		targ->g->pkt_size = header->caplen;
733 		pcap_close(file);
734 		return;
735 	}
736 #endif
737 
738 	/* create a nice NUL-terminated string */
739 	for (i = 0; i < paylen; i += l0) {
740 		if (l0 > paylen - i)
741 			l0 = paylen - i; // last round
742 		bcopy(payload, pkt->body + i, l0);
743 	}
744 	pkt->body[i-1] = '\0';
745 	ip = &pkt->ip;
746 
747 	/* prepare the headers */
748         ip->ip_v = IPVERSION;
749         ip->ip_hl = 5;
750         ip->ip_id = 0;
751         ip->ip_tos = IPTOS_LOWDELAY;
752 	ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
753         ip->ip_id = 0;
754         ip->ip_off = htons(IP_DF); /* Don't fragment */
755         ip->ip_ttl = IPDEFTTL;
756 	ip->ip_p = IPPROTO_UDP;
757 	ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start);
758 	ip->ip_src.s_addr = htonl(targ->g->src_ip.start);
759 	ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
760 
761 
762 	udp = &pkt->udp;
763         udp->uh_sport = htons(targ->g->src_ip.port0);
764         udp->uh_dport = htons(targ->g->dst_ip.port0);
765 	udp->uh_ulen = htons(paylen);
766 	/* Magic: taken from sbin/dhclient/packet.c */
767 	udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
768                     checksum(pkt->body,
769                         paylen - sizeof(*udp),
770                         checksum(&ip->ip_src, 2 * sizeof(ip->ip_src),
771                             IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)
772                         )
773                     )
774                 ));
775 
776 	eh = &pkt->eh;
777 	bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
778 	bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
779 	eh->ether_type = htons(ETHERTYPE_IP);
780 
781 	bzero(&pkt->vh, sizeof(pkt->vh));
782 	// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
783 }
784 
785 static void
786 get_vnet_hdr_len(struct glob_arg *g)
787 {
788 	struct nmreq req;
789 	int err;
790 
791 	memset(&req, 0, sizeof(req));
792 	bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name));
793 	req.nr_version = NETMAP_API;
794 	req.nr_cmd = NETMAP_VNET_HDR_GET;
795 	err = ioctl(g->main_fd, NIOCREGIF, &req);
796 	if (err) {
797 		D("Unable to get virtio-net header length");
798 		return;
799 	}
800 
801 	g->virt_header = req.nr_arg1;
802 	if (g->virt_header) {
803 		D("Port requires virtio-net header, length = %d",
804 		  g->virt_header);
805 	}
806 }
807 
808 static void
809 set_vnet_hdr_len(struct glob_arg *g)
810 {
811 	int err, l = g->virt_header;
812 	struct nmreq req;
813 
814 	if (l == 0)
815 		return;
816 
817 	memset(&req, 0, sizeof(req));
818 	bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name));
819 	req.nr_version = NETMAP_API;
820 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
821 	req.nr_arg1 = l;
822 	err = ioctl(g->main_fd, NIOCREGIF, &req);
823 	if (err) {
824 		D("Unable to set virtio-net header length %d", l);
825 	}
826 }
827 
828 
829 /*
830  * create and enqueue a batch of packets on a ring.
831  * On the last one set NS_REPORT to tell the driver to generate
832  * an interrupt when done.
833  */
834 static int
835 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
836 		int size, struct glob_arg *g, u_int count, int options,
837 		u_int nfrags)
838 {
839 	u_int n, sent, cur = ring->cur;
840 	u_int fcnt;
841 
842 	n = nm_ring_space(ring);
843 	if (n < count)
844 		count = n;
845 	if (count < nfrags) {
846 		D("truncating packet, no room for frags %d %d",
847 				count, nfrags);
848 	}
849 #if 0
850 	if (options & (OPT_COPY | OPT_PREFETCH) ) {
851 		for (sent = 0; sent < count; sent++) {
852 			struct netmap_slot *slot = &ring->slot[cur];
853 			char *p = NETMAP_BUF(ring, slot->buf_idx);
854 
855 			__builtin_prefetch(p);
856 			cur = nm_ring_next(ring, cur);
857 		}
858 		cur = ring->cur;
859 	}
860 #endif
861 	for (fcnt = nfrags, sent = 0; sent < count; sent++) {
862 		struct netmap_slot *slot = &ring->slot[cur];
863 		char *p = NETMAP_BUF(ring, slot->buf_idx);
864 		int buf_changed = slot->flags & NS_BUF_CHANGED;
865 
866 		slot->flags = 0;
867 		if (options & OPT_RUBBISH) {
868 			/* do nothing */
869 		} else if (options & OPT_INDIRECT) {
870 			slot->flags |= NS_INDIRECT;
871 			slot->ptr = (uint64_t)((uintptr_t)frame);
872 		} else if ((options & OPT_COPY) || buf_changed) {
873 			nm_pkt_copy(frame, p, size);
874 			if (fcnt == nfrags)
875 				update_addresses(pkt, g);
876 		} else if (options & OPT_MEMCPY) {
877 			memcpy(p, frame, size);
878 			if (fcnt == nfrags)
879 				update_addresses(pkt, g);
880 		} else if (options & OPT_PREFETCH) {
881 			__builtin_prefetch(p);
882 		}
883 		if (options & OPT_DUMP)
884 			dump_payload(p, size, ring, cur);
885 		slot->len = size;
886 		if (--fcnt > 0)
887 			slot->flags |= NS_MOREFRAG;
888 		else
889 			fcnt = nfrags;
890 		if (sent == count - 1) {
891 			slot->flags &= ~NS_MOREFRAG;
892 			slot->flags |= NS_REPORT;
893 		}
894 		cur = nm_ring_next(ring, cur);
895 	}
896 	ring->head = ring->cur = cur;
897 
898 	return (sent);
899 }
900 
901 /*
902  * Index of the highest bit set
903  */
904 uint32_t
905 msb64(uint64_t x)
906 {
907 	uint64_t m = 1ULL << 63;
908 	int i;
909 
910 	for (i = 63; i >= 0; i--, m >>=1)
911 		if (m & x)
912 			return i;
913 	return 0;
914 }
915 
916 /*
917  * Send a packet, and wait for a response.
918  * The payload (after UDP header, ofs 42) has a 4-byte sequence
919  * followed by a struct timeval (or bintime?)
920  */
921 #define	PAY_OFS	42	/* where in the pkt... */
922 
923 static void *
924 pinger_body(void *data)
925 {
926 	struct targ *targ = (struct targ *) data;
927 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
928 	struct netmap_if *nifp = targ->nmd->nifp;
929 	int i, rx = 0;
930 	void *frame;
931 	int size;
932 	struct timespec ts, now, last_print;
933 	uint64_t sent = 0, n = targ->g->npackets;
934 	uint64_t count = 0, t_cur, t_min = ~0, av = 0;
935 	uint64_t buckets[64];	/* bins for delays, ns */
936 
937 	frame = &targ->pkt;
938 	frame += sizeof(targ->pkt.vh) - targ->g->virt_header;
939 	size = targ->g->pkt_size + targ->g->virt_header;
940 
941 
942 	if (targ->g->nthreads > 1) {
943 		D("can only ping with 1 thread");
944 		return NULL;
945 	}
946 
947 	bzero(&buckets, sizeof(buckets));
948 	clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
949 	now = last_print;
950 	while (!targ->cancel && (n == 0 || sent < n)) {
951 		struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
952 		struct netmap_slot *slot;
953 		char *p;
954 	    for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */
955 		slot = &ring->slot[ring->cur];
956 		slot->len = size;
957 		p = NETMAP_BUF(ring, slot->buf_idx);
958 
959 		if (nm_ring_empty(ring)) {
960 			D("-- ouch, cannot send");
961 		} else {
962 			struct tstamp *tp;
963 			nm_pkt_copy(frame, p, size);
964 			clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
965 			bcopy(&sent, p+42, sizeof(sent));
966 			tp = (struct tstamp *)(p+46);
967 			tp->sec = (uint32_t)ts.tv_sec;
968 			tp->nsec = (uint32_t)ts.tv_nsec;
969 			sent++;
970 			ring->head = ring->cur = nm_ring_next(ring, ring->cur);
971 		}
972 	    }
973 		/* should use a parameter to decide how often to send */
974 		if (poll(&pfd, 1, 3000) <= 0) {
975 			D("poll error/timeout on queue %d: %s", targ->me,
976 				strerror(errno));
977 			continue;
978 		}
979 		/* see what we got back */
980 		for (i = targ->nmd->first_tx_ring;
981 			i <= targ->nmd->last_tx_ring; i++) {
982 			ring = NETMAP_RXRING(nifp, i);
983 			while (!nm_ring_empty(ring)) {
984 				uint32_t seq;
985 				struct tstamp *tp;
986 				int pos;
987 
988 				slot = &ring->slot[ring->cur];
989 				p = NETMAP_BUF(ring, slot->buf_idx);
990 
991 				clock_gettime(CLOCK_REALTIME_PRECISE, &now);
992 				bcopy(p+42, &seq, sizeof(seq));
993 				tp = (struct tstamp *)(p+46);
994 				ts.tv_sec = (time_t)tp->sec;
995 				ts.tv_nsec = (long)tp->nsec;
996 				ts.tv_sec = now.tv_sec - ts.tv_sec;
997 				ts.tv_nsec = now.tv_nsec - ts.tv_nsec;
998 				if (ts.tv_nsec < 0) {
999 					ts.tv_nsec += 1000000000;
1000 					ts.tv_sec--;
1001 				}
1002 				if (0) D("seq %d/%lu delta %d.%09d", seq, sent,
1003 					(int)ts.tv_sec, (int)ts.tv_nsec);
1004 				t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec;
1005 				if (t_cur < t_min)
1006 					t_min = t_cur;
1007 				count ++;
1008 				av += t_cur;
1009 				pos = msb64(t_cur);
1010 				buckets[pos]++;
1011 				/* now store it in a bucket */
1012 				ring->head = ring->cur = nm_ring_next(ring, ring->cur);
1013 				rx++;
1014 			}
1015 		}
1016 		//D("tx %d rx %d", sent, rx);
1017 		//usleep(100000);
1018 		ts.tv_sec = now.tv_sec - last_print.tv_sec;
1019 		ts.tv_nsec = now.tv_nsec - last_print.tv_nsec;
1020 		if (ts.tv_nsec < 0) {
1021 			ts.tv_nsec += 1000000000;
1022 			ts.tv_sec--;
1023 		}
1024 		if (ts.tv_sec >= 1) {
1025 			D("count %d RTT: min %d av %d ns",
1026 				(int)count, (int)t_min, (int)(av/count));
1027 			int k, j, kmin;
1028 			char buf[512];
1029 
1030 			for (kmin = 0; kmin < 64; kmin ++)
1031 				if (buckets[kmin])
1032 					break;
1033 			for (k = 63; k >= kmin; k--)
1034 				if (buckets[k])
1035 					break;
1036 			buf[0] = '\0';
1037 			for (j = kmin; j <= k; j++)
1038 				sprintf(buf, "%s %5d", buf, (int)buckets[j]);
1039 			D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf);
1040 			bzero(&buckets, sizeof(buckets));
1041 			count = 0;
1042 			av = 0;
1043 			t_min = ~0;
1044 			last_print = now;
1045 		}
1046 	}
1047 
1048 	/* reset the ``used`` flag. */
1049 	targ->used = 0;
1050 
1051 	return NULL;
1052 }
1053 
1054 
1055 /*
1056  * reply to ping requests
1057  */
1058 static void *
1059 ponger_body(void *data)
1060 {
1061 	struct targ *targ = (struct targ *) data;
1062 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1063 	struct netmap_if *nifp = targ->nmd->nifp;
1064 	struct netmap_ring *txring, *rxring;
1065 	int i, rx = 0;
1066 	uint64_t sent = 0, n = targ->g->npackets;
1067 
1068 	if (targ->g->nthreads > 1) {
1069 		D("can only reply ping with 1 thread");
1070 		return NULL;
1071 	}
1072 	D("understood ponger %lu but don't know how to do it", n);
1073 	while (!targ->cancel && (n == 0 || sent < n)) {
1074 		uint32_t txcur, txavail;
1075 //#define BUSYWAIT
1076 #ifdef BUSYWAIT
1077 		ioctl(pfd.fd, NIOCRXSYNC, NULL);
1078 #else
1079 		if (poll(&pfd, 1, 1000) <= 0) {
1080 			D("poll error/timeout on queue %d: %s", targ->me,
1081 				strerror(errno));
1082 			continue;
1083 		}
1084 #endif
1085 		txring = NETMAP_TXRING(nifp, 0);
1086 		txcur = txring->cur;
1087 		txavail = nm_ring_space(txring);
1088 		/* see what we got back */
1089 		for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
1090 			rxring = NETMAP_RXRING(nifp, i);
1091 			while (!nm_ring_empty(rxring)) {
1092 				uint16_t *spkt, *dpkt;
1093 				uint32_t cur = rxring->cur;
1094 				struct netmap_slot *slot = &rxring->slot[cur];
1095 				char *src, *dst;
1096 				src = NETMAP_BUF(rxring, slot->buf_idx);
1097 				//D("got pkt %p of size %d", src, slot->len);
1098 				rxring->head = rxring->cur = nm_ring_next(rxring, cur);
1099 				rx++;
1100 				if (txavail == 0)
1101 					continue;
1102 				dst = NETMAP_BUF(txring,
1103 				    txring->slot[txcur].buf_idx);
1104 				/* copy... */
1105 				dpkt = (uint16_t *)dst;
1106 				spkt = (uint16_t *)src;
1107 				nm_pkt_copy(src, dst, slot->len);
1108 				dpkt[0] = spkt[3];
1109 				dpkt[1] = spkt[4];
1110 				dpkt[2] = spkt[5];
1111 				dpkt[3] = spkt[0];
1112 				dpkt[4] = spkt[1];
1113 				dpkt[5] = spkt[2];
1114 				txring->slot[txcur].len = slot->len;
1115 				/* XXX swap src dst mac */
1116 				txcur = nm_ring_next(txring, txcur);
1117 				txavail--;
1118 				sent++;
1119 			}
1120 		}
1121 		txring->head = txring->cur = txcur;
1122 		targ->ctr.pkts = sent;
1123 #ifdef BUSYWAIT
1124 		ioctl(pfd.fd, NIOCTXSYNC, NULL);
1125 #endif
1126 		//D("tx %d rx %d", sent, rx);
1127 	}
1128 
1129 	/* reset the ``used`` flag. */
1130 	targ->used = 0;
1131 
1132 	return NULL;
1133 }
1134 
1135 
1136 /*
1137  * wait until ts, either busy or sleeping if more than 1ms.
1138  * Return wakeup time.
1139  */
1140 static struct timespec
1141 wait_time(struct timespec ts)
1142 {
1143 	for (;;) {
1144 		struct timespec w, cur;
1145 		clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
1146 		w = timespec_sub(ts, cur);
1147 		if (w.tv_sec < 0)
1148 			return cur;
1149 		else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
1150 			poll(NULL, 0, 1);
1151 	}
1152 }
1153 
1154 static void *
1155 sender_body(void *data)
1156 {
1157 	struct targ *targ = (struct targ *) data;
1158 	struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
1159 	struct netmap_if *nifp;
1160 	struct netmap_ring *txring = NULL;
1161 	int i;
1162 	uint64_t n = targ->g->npackets / targ->g->nthreads;
1163 	uint64_t sent = 0;
1164 	uint64_t event = 0;
1165 	int options = targ->g->options | OPT_COPY;
1166 	struct timespec nexttime = { 0, 0}; // XXX silence compiler
1167 	int rate_limit = targ->g->tx_rate;
1168 	struct pkt *pkt = &targ->pkt;
1169 	void *frame;
1170 	int size;
1171 
1172 	if (targ->frame == NULL) {
1173 		frame = pkt;
1174 		frame += sizeof(pkt->vh) - targ->g->virt_header;
1175 		size = targ->g->pkt_size + targ->g->virt_header;
1176 	} else {
1177 		frame = targ->frame;
1178 		size = targ->g->pkt_size;
1179 	}
1180 
1181 	D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
1182 	if (setaffinity(targ->thread, targ->affinity))
1183 		goto quit;
1184 
1185 	/* main loop.*/
1186 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1187 	if (rate_limit) {
1188 		targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
1189 		targ->tic.tv_nsec = 0;
1190 		wait_time(targ->tic);
1191 		nexttime = targ->tic;
1192 	}
1193         if (targ->g->dev_type == DEV_TAP) {
1194 	    D("writing to file desc %d", targ->g->main_fd);
1195 
1196 	    for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
1197 		if (write(targ->g->main_fd, frame, size) != -1)
1198 			sent++;
1199 		update_addresses(pkt, targ->g);
1200 		if (i > 10000) {
1201 			targ->ctr.pkts = sent;
1202 			targ->ctr.bytes = sent*size;
1203 			targ->ctr.events = sent;
1204 			i = 0;
1205 		}
1206 	    }
1207 #ifndef NO_PCAP
1208     } else if (targ->g->dev_type == DEV_PCAP) {
1209 	    pcap_t *p = targ->g->p;
1210 
1211 	    for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
1212 		if (pcap_inject(p, frame, size) != -1)
1213 			sent++;
1214 		update_addresses(pkt, targ->g);
1215 		if (i > 10000) {
1216 			targ->ctr.pkts = sent;
1217 			targ->ctr.bytes = sent*size;
1218 			targ->ctr.events = sent;
1219 			i = 0;
1220 		}
1221 	    }
1222 #endif /* NO_PCAP */
1223     } else {
1224 	int tosend = 0;
1225 	int frags = targ->g->frags;
1226 
1227 	nifp = targ->nmd->nifp;
1228 	while (!targ->cancel && (n == 0 || sent < n)) {
1229 
1230 		if (rate_limit && tosend <= 0) {
1231 			tosend = targ->g->burst;
1232 			nexttime = timespec_add(nexttime, targ->g->tx_period);
1233 			wait_time(nexttime);
1234 		}
1235 
1236 		/*
1237 		 * wait for available room in the send queue(s)
1238 		 */
1239 #ifdef BUSYWAIT
1240 		if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
1241 			D("ioctl error on queue %d: %s", targ->me,
1242 					strerror(errno));
1243 			goto quit;
1244 		}
1245 #else /* !BUSYWAIT */
1246 		if (poll(&pfd, 1, 2000) <= 0) {
1247 			if (targ->cancel)
1248 				break;
1249 			D("poll error/timeout on queue %d: %s", targ->me,
1250 				strerror(errno));
1251 			// goto quit;
1252 		}
1253 		if (pfd.revents & POLLERR) {
1254 			D("poll error on %d ring %d-%d", pfd.fd,
1255 				targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
1256 			goto quit;
1257 		}
1258 #endif /* !BUSYWAIT */
1259 		/*
1260 		 * scan our queues and send on those with room
1261 		 */
1262 		if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) {
1263 			D("drop copy");
1264 			options &= ~OPT_COPY;
1265 		}
1266 		for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
1267 			int m;
1268 			uint64_t limit = rate_limit ?  tosend : targ->g->burst;
1269 			if (n > 0 && n - sent < limit)
1270 				limit = n - sent;
1271 			txring = NETMAP_TXRING(nifp, i);
1272 			if (nm_ring_empty(txring))
1273 				continue;
1274 			if (frags > 1)
1275 				limit = ((limit + frags - 1) / frags) * frags;
1276 
1277 			m = send_packets(txring, pkt, frame, size, targ->g,
1278 					 limit, options, frags);
1279 			ND("limit %d tail %d frags %d m %d",
1280 				limit, txring->tail, frags, m);
1281 			sent += m;
1282 			if (m > 0) //XXX-ste: can m be 0?
1283 				event++;
1284 			targ->ctr.pkts = sent;
1285 			targ->ctr.bytes = sent*size;
1286 			targ->ctr.events = event;
1287 			if (rate_limit) {
1288 				tosend -= m;
1289 				if (tosend <= 0)
1290 					break;
1291 			}
1292 		}
1293 	}
1294 	/* flush any remaining packets */
1295 	D("flush tail %d head %d on thread %p",
1296 		txring->tail, txring->head,
1297 		(void *)pthread_self());
1298 	ioctl(pfd.fd, NIOCTXSYNC, NULL);
1299 
1300 	/* final part: wait all the TX queues to be empty. */
1301 	for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
1302 		txring = NETMAP_TXRING(nifp, i);
1303 		while (!targ->cancel && nm_tx_pending(txring)) {
1304 			RD(5, "pending tx tail %d head %d on ring %d",
1305 				txring->tail, txring->head, i);
1306 			ioctl(pfd.fd, NIOCTXSYNC, NULL);
1307 			usleep(1); /* wait 1 tick */
1308 		}
1309 	}
1310     } /* end DEV_NETMAP */
1311 
1312 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1313 	targ->completed = 1;
1314 	targ->ctr.pkts = sent;
1315 	targ->ctr.bytes = sent*size;
1316 	targ->ctr.events = event;
1317 quit:
1318 	/* reset the ``used`` flag. */
1319 	targ->used = 0;
1320 
1321 	return (NULL);
1322 }
1323 
1324 
1325 #ifndef NO_PCAP
1326 static void
1327 receive_pcap(u_char *user, const struct pcap_pkthdr * h,
1328 	const u_char * bytes)
1329 {
1330 	struct my_ctrs *ctr = (struct my_ctrs *)user;
1331 	(void)bytes;	/* UNUSED */
1332 	ctr->bytes += h->len;
1333 	ctr->pkts++;
1334 }
1335 #endif /* !NO_PCAP */
1336 
1337 
1338 static int
1339 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes)
1340 {
1341 	u_int cur, rx, n;
1342 	uint64_t b = 0;
1343 
1344 	if (bytes == NULL)
1345 		bytes = &b;
1346 
1347 	cur = ring->cur;
1348 	n = nm_ring_space(ring);
1349 	if (n < limit)
1350 		limit = n;
1351 	for (rx = 0; rx < limit; rx++) {
1352 		struct netmap_slot *slot = &ring->slot[cur];
1353 		char *p = NETMAP_BUF(ring, slot->buf_idx);
1354 
1355 		*bytes += slot->len;
1356 		if (dump)
1357 			dump_payload(p, slot->len, ring, cur);
1358 
1359 		cur = nm_ring_next(ring, cur);
1360 	}
1361 	ring->head = ring->cur = cur;
1362 
1363 	return (rx);
1364 }
1365 
1366 static void *
1367 receiver_body(void *data)
1368 {
1369 	struct targ *targ = (struct targ *) data;
1370 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1371 	struct netmap_if *nifp;
1372 	struct netmap_ring *rxring;
1373 	int i;
1374 	struct my_ctrs cur;
1375 
1376 	cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
1377 	cur.t.tv_usec = cur.t.tv_sec = 0; //  unused, just silence the compiler
1378 
1379 	if (setaffinity(targ->thread, targ->affinity))
1380 		goto quit;
1381 
1382 	D("reading from %s fd %d main_fd %d",
1383 		targ->g->ifname, targ->fd, targ->g->main_fd);
1384 	/* unbounded wait for the first packet. */
1385 	for (;!targ->cancel;) {
1386 		i = poll(&pfd, 1, 1000);
1387 		if (i > 0 && !(pfd.revents & POLLERR))
1388 			break;
1389 		RD(1, "waiting for initial packets, poll returns %d %d",
1390 			i, pfd.revents);
1391 	}
1392 	/* main loop, exit after 1s silence */
1393 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1394     if (targ->g->dev_type == DEV_TAP) {
1395 	while (!targ->cancel) {
1396 		char buf[MAX_BODYSIZE];
1397 		/* XXX should we poll ? */
1398 		i = read(targ->g->main_fd, buf, sizeof(buf));
1399 		if (i > 0) {
1400 			targ->ctr.pkts++;
1401 			targ->ctr.bytes += i;
1402 			targ->ctr.events++;
1403 		}
1404 	}
1405 #ifndef NO_PCAP
1406     } else if (targ->g->dev_type == DEV_PCAP) {
1407 	while (!targ->cancel) {
1408 		/* XXX should we poll ? */
1409 		pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap,
1410 			(u_char *)&targ->ctr);
1411                 targ->ctr.events++;
1412 	}
1413 #endif /* !NO_PCAP */
1414     } else {
1415 	int dump = targ->g->options & OPT_DUMP;
1416 
1417 	nifp = targ->nmd->nifp;
1418 	while (!targ->cancel) {
1419 		/* Once we started to receive packets, wait at most 1 seconds
1420 		   before quitting. */
1421 #ifdef BUSYWAIT
1422 		if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
1423 			D("ioctl error on queue %d: %s", targ->me,
1424 					strerror(errno));
1425 			goto quit;
1426 		}
1427 #else /* !BUSYWAIT */
1428 		if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
1429 			clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1430 			targ->toc.tv_sec -= 1; /* Subtract timeout time. */
1431 			goto out;
1432 		}
1433 
1434 		if (pfd.revents & POLLERR) {
1435 			D("poll err");
1436 			goto quit;
1437 		}
1438 #endif /* !BUSYWAIT */
1439 		uint64_t cur_space = 0;
1440 		for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
1441 			int m;
1442 
1443 			rxring = NETMAP_RXRING(nifp, i);
1444 			/* compute free space in the ring */
1445 			m = rxring->head + rxring->num_slots - rxring->tail;
1446 			if (m >= (int) rxring->num_slots)
1447 				m -= rxring->num_slots;
1448 			cur_space += m;
1449 			if (nm_ring_empty(rxring))
1450 				continue;
1451 
1452 			m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes);
1453 			cur.pkts += m;
1454 			if (m > 0) //XXX-ste: can m be 0?
1455 				cur.events++;
1456 		}
1457 		cur.min_space = targ->ctr.min_space;
1458 		if (cur_space < cur.min_space)
1459 			cur.min_space = cur_space;
1460 		targ->ctr = cur;
1461 	}
1462     }
1463 
1464 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1465 
1466 #if !defined(BUSYWAIT)
1467 out:
1468 #endif
1469 	targ->completed = 1;
1470 	targ->ctr = cur;
1471 
1472 quit:
1473 	/* reset the ``used`` flag. */
1474 	targ->used = 0;
1475 
1476 	return (NULL);
1477 }
1478 
1479 static void *
1480 txseq_body(void *data)
1481 {
1482 	struct targ *targ = (struct targ *) data;
1483 	struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
1484 	struct netmap_ring *ring;
1485 	int64_t sent = 0;
1486 	uint64_t event = 0;
1487 	int options = targ->g->options | OPT_COPY;
1488 	struct timespec nexttime = {0, 0};
1489 	int rate_limit = targ->g->tx_rate;
1490 	struct pkt *pkt = &targ->pkt;
1491 	int frags = targ->g->frags;
1492 	uint32_t sequence = 0;
1493 	int budget = 0;
1494 	void *frame;
1495 	int size;
1496 
1497 	if (targ->g->nthreads > 1) {
1498 		D("can only txseq ping with 1 thread");
1499 		return NULL;
1500 	}
1501 
1502 	if (targ->g->npackets > 0) {
1503 		D("Ignoring -n argument");
1504 	}
1505 
1506 	frame = pkt;
1507 	frame += sizeof(pkt->vh) - targ->g->virt_header;
1508 	size = targ->g->pkt_size + targ->g->virt_header;
1509 
1510 	D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
1511 	if (setaffinity(targ->thread, targ->affinity))
1512 		goto quit;
1513 
1514 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1515 	if (rate_limit) {
1516 		targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
1517 		targ->tic.tv_nsec = 0;
1518 		wait_time(targ->tic);
1519 		nexttime = targ->tic;
1520 	}
1521 
1522 	/* Only use the first queue. */
1523 	ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring);
1524 
1525 	while (!targ->cancel) {
1526 		int64_t limit;
1527 		unsigned int space;
1528 		unsigned int head;
1529 		int fcnt;
1530 
1531 		if (!rate_limit) {
1532 			budget = targ->g->burst;
1533 
1534 		} else if (budget <= 0) {
1535 			budget = targ->g->burst;
1536 			nexttime = timespec_add(nexttime, targ->g->tx_period);
1537 			wait_time(nexttime);
1538 		}
1539 
1540 		/* wait for available room in the send queue */
1541 		if (poll(&pfd, 1, 2000) <= 0) {
1542 			if (targ->cancel)
1543 				break;
1544 			D("poll error/timeout on queue %d: %s", targ->me,
1545 				strerror(errno));
1546 		}
1547 		if (pfd.revents & POLLERR) {
1548 			D("poll error on %d ring %d-%d", pfd.fd,
1549 				targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
1550 			goto quit;
1551 		}
1552 
1553 		/* If no room poll() again. */
1554 		space = nm_ring_space(ring);
1555 		if (!space) {
1556 			continue;
1557 		}
1558 
1559 		limit = budget;
1560 
1561 		if (space < limit) {
1562 			limit = space;
1563 		}
1564 
1565 		/* Cut off ``limit`` to make sure is multiple of ``frags``. */
1566 		if (frags > 1) {
1567 			limit = (limit / frags) * frags;
1568 		}
1569 
1570 		limit = sent + limit; /* Convert to absolute. */
1571 
1572 		for (fcnt = frags, head = ring->head;
1573 				sent < limit; sent++, sequence++) {
1574 			struct netmap_slot *slot = &ring->slot[head];
1575 			char *p = NETMAP_BUF(ring, slot->buf_idx);
1576 
1577 			slot->flags = 0;
1578 			pkt->body[0] = sequence >> 24;
1579 			pkt->body[1] = (sequence >> 16) & 0xff;
1580 			pkt->body[2] = (sequence >> 8) & 0xff;
1581 			pkt->body[3] = sequence & 0xff;
1582 			nm_pkt_copy(frame, p, size);
1583 			if (fcnt == frags) {
1584 				update_addresses(pkt, targ->g);
1585 			}
1586 
1587 			if (options & OPT_DUMP) {
1588 				dump_payload(p, size, ring, head);
1589 			}
1590 
1591 			slot->len = size;
1592 
1593 			if (--fcnt > 0) {
1594 				slot->flags |= NS_MOREFRAG;
1595 			} else {
1596 				fcnt = frags;
1597 			}
1598 
1599 			if (sent == limit - 1) {
1600 				/* Make sure we don't push an incomplete
1601 				 * packet. */
1602 				assert(!(slot->flags & NS_MOREFRAG));
1603 				slot->flags |= NS_REPORT;
1604 			}
1605 
1606 			head = nm_ring_next(ring, head);
1607 			if (rate_limit) {
1608 				budget--;
1609 			}
1610 		}
1611 
1612 		ring->cur = ring->head = head;
1613 
1614 		event ++;
1615 		targ->ctr.pkts = sent;
1616 		targ->ctr.bytes = sent * size;
1617 		targ->ctr.events = event;
1618 	}
1619 
1620 	/* flush any remaining packets */
1621 	D("flush tail %d head %d on thread %p",
1622 		ring->tail, ring->head,
1623 		(void *)pthread_self());
1624 	ioctl(pfd.fd, NIOCTXSYNC, NULL);
1625 
1626 	/* final part: wait the TX queues to become empty. */
1627 	while (!targ->cancel && nm_tx_pending(ring)) {
1628 		RD(5, "pending tx tail %d head %d on ring %d",
1629 				ring->tail, ring->head, targ->nmd->first_tx_ring);
1630 		ioctl(pfd.fd, NIOCTXSYNC, NULL);
1631 		usleep(1); /* wait 1 tick */
1632 	}
1633 
1634 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1635 	targ->completed = 1;
1636 	targ->ctr.pkts = sent;
1637 	targ->ctr.bytes = sent * size;
1638 	targ->ctr.events = event;
1639 quit:
1640 	/* reset the ``used`` flag. */
1641 	targ->used = 0;
1642 
1643 	return (NULL);
1644 }
1645 
1646 
1647 static char *
1648 multi_slot_to_string(struct netmap_ring *ring, unsigned int head,
1649 		     unsigned int nfrags, char *strbuf, size_t strbuflen)
1650 {
1651 	unsigned int f;
1652 	char *ret = strbuf;
1653 
1654 	for (f = 0; f < nfrags; f++) {
1655 		struct netmap_slot *slot = &ring->slot[head];
1656 		int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len,
1657 				 slot->flags);
1658 		if (m >= (int)strbuflen) {
1659 			break;
1660 		}
1661 		strbuf += m;
1662 		strbuflen -= m;
1663 
1664 		head = nm_ring_next(ring, head);
1665 	}
1666 
1667 	return ret;
1668 }
1669 
1670 static void *
1671 rxseq_body(void *data)
1672 {
1673 	struct targ *targ = (struct targ *) data;
1674 	struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1675 	int dump = targ->g->options & OPT_DUMP;
1676 	struct netmap_ring *ring;
1677 	unsigned int frags_exp = 1;
1678 	uint32_t seq_exp = 0;
1679 	struct my_ctrs cur;
1680 	unsigned int frags = 0;
1681 	int first_packet = 1;
1682 	int first_slot = 1;
1683 	int i;
1684 
1685 	cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
1686 	cur.t.tv_usec = cur.t.tv_sec = 0; //  unused, just silence the compiler
1687 
1688 	if (setaffinity(targ->thread, targ->affinity))
1689 		goto quit;
1690 
1691 	D("reading from %s fd %d main_fd %d",
1692 		targ->g->ifname, targ->fd, targ->g->main_fd);
1693 	/* unbounded wait for the first packet. */
1694 	for (;!targ->cancel;) {
1695 		i = poll(&pfd, 1, 1000);
1696 		if (i > 0 && !(pfd.revents & POLLERR))
1697 			break;
1698 		RD(1, "waiting for initial packets, poll returns %d %d",
1699 			i, pfd.revents);
1700 	}
1701 
1702 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1703 
1704 	ring = NETMAP_RXRING(targ->nmd->nifp, targ->nmd->first_rx_ring);
1705 
1706 	while (!targ->cancel) {
1707 		unsigned int head;
1708 		uint32_t seq;
1709 		int limit;
1710 
1711 		/* Once we started to receive packets, wait at most 1 seconds
1712 		   before quitting. */
1713 		if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
1714 			clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1715 			targ->toc.tv_sec -= 1; /* Subtract timeout time. */
1716 			goto out;
1717 		}
1718 
1719 		if (pfd.revents & POLLERR) {
1720 			D("poll err");
1721 			goto quit;
1722 		}
1723 
1724 		if (nm_ring_empty(ring))
1725 			continue;
1726 
1727 		limit = nm_ring_space(ring);
1728 		if (limit > targ->g->burst)
1729 			limit = targ->g->burst;
1730 
1731 #if 0
1732 		/* Enable this if
1733 		 *     1) we remove the early-return optimization from
1734 		 *        the netmap poll implementation, or
1735 		 *     2) pipes get NS_MOREFRAG support.
1736 		 * With the current netmap implementation, an experiment like
1737 		 *    pkt-gen -i vale:1{1 -f txseq -F 9
1738 		 *    pkt-gen -i vale:1}1 -f rxseq
1739 		 * would get stuck as soon as we find nm_ring_space(ring) < 9,
1740 		 * since here limit is rounded to 0 and
1741 		 * pipe rxsync is not called anymore by the poll() of this loop.
1742 		 */
1743 		if (frags_exp > 1) {
1744 			int o = limit;
1745 			/* Cut off to the closest smaller multiple. */
1746 			limit = (limit / frags_exp) * frags_exp;
1747 			RD(2, "LIMIT %d --> %d", o, limit);
1748 		}
1749 #endif
1750 
1751 		for (head = ring->head, i = 0; i < limit; i++) {
1752 			struct netmap_slot *slot = &ring->slot[head];
1753 			char *p = NETMAP_BUF(ring, slot->buf_idx);
1754 			int len = slot->len;
1755 			struct pkt *pkt;
1756 
1757 			if (dump) {
1758 				dump_payload(p, slot->len, ring, head);
1759 			}
1760 
1761 			frags++;
1762 			if (!(slot->flags & NS_MOREFRAG)) {
1763 				if (first_packet) {
1764 					first_packet = 0;
1765 				} else if (frags != frags_exp) {
1766 					char prbuf[512];
1767 					RD(1, "Received packets with %u frags, "
1768 					      "expected %u, '%s'", frags, frags_exp,
1769 					      multi_slot_to_string(ring, head-frags+1, frags,
1770 								   prbuf, sizeof(prbuf)));
1771 				}
1772 				first_packet = 0;
1773 				frags_exp = frags;
1774 				frags = 0;
1775 			}
1776 
1777 			p -= sizeof(pkt->vh) - targ->g->virt_header;
1778 			len += sizeof(pkt->vh) - targ->g->virt_header;
1779 			pkt = (struct pkt *)p;
1780 
1781 			if ((char *)pkt + len < ((char *)pkt->body) + sizeof(seq)) {
1782 				RD(1, "%s: packet too small (len=%u)", __func__,
1783 				      slot->len);
1784 			} else {
1785 				seq = (pkt->body[0] << 24) | (pkt->body[1] << 16)
1786 				      | (pkt->body[2] << 8) | pkt->body[3];
1787 				if (first_slot) {
1788 					/* Grab the first one, whatever it
1789 					   is. */
1790 					seq_exp = seq;
1791 					first_slot = 0;
1792 				} else if (seq != seq_exp) {
1793 					uint32_t delta = seq - seq_exp;
1794 
1795 					if (delta < (0xFFFFFFFF >> 1)) {
1796 						RD(2, "Sequence GAP: exp %u found %u",
1797 						      seq_exp, seq);
1798 					} else {
1799 						RD(2, "Sequence OUT OF ORDER: "
1800 						      "exp %u found %u", seq_exp, seq);
1801 					}
1802 					seq_exp = seq;
1803 				}
1804 				seq_exp++;
1805 			}
1806 
1807 			cur.bytes += slot->len;
1808 			head = nm_ring_next(ring, head);
1809 			cur.pkts++;
1810 		}
1811 
1812 		ring->cur = ring->head = head;
1813 
1814 		cur.events++;
1815 		targ->ctr = cur;
1816 	}
1817 
1818 	clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1819 
1820 out:
1821 	targ->completed = 1;
1822 	targ->ctr = cur;
1823 
1824 quit:
1825 	/* reset the ``used`` flag. */
1826 	targ->used = 0;
1827 
1828 	return (NULL);
1829 }
1830 
1831 
1832 static void
1833 tx_output(struct my_ctrs *cur, double delta, const char *msg)
1834 {
1835 	double bw, raw_bw, pps, abs;
1836 	char b1[40], b2[80], b3[80];
1837 	int size;
1838 
1839 	if (cur->pkts == 0) {
1840 		printf("%s nothing.\n", msg);
1841 		return;
1842 	}
1843 
1844 	size = (int)(cur->bytes / cur->pkts);
1845 
1846 	printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n",
1847 		msg,
1848 		(unsigned long long)cur->pkts,
1849 		(unsigned long long)cur->bytes,
1850 		(unsigned long long)cur->events, size, delta);
1851 	if (delta == 0)
1852 		delta = 1e-6;
1853 	if (size < 60)		/* correct for min packet size */
1854 		size = 60;
1855 	pps = cur->pkts / delta;
1856 	bw = (8.0 * cur->bytes) / delta;
1857 	/* raw packets have4 bytes crc + 20 bytes framing */
1858 	raw_bw = (8.0 * (cur->pkts * 24 + cur->bytes)) / delta;
1859 	abs = cur->pkts / (double)(cur->events);
1860 
1861 	printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n",
1862 		norm(b1, pps), norm(b2, bw), norm(b3, raw_bw), abs);
1863 }
1864 
1865 static void
1866 usage(void)
1867 {
1868 	const char *cmd = "pkt-gen";
1869 	fprintf(stderr,
1870 		"Usage:\n"
1871 		"%s arguments\n"
1872 		"\t-i interface		interface name\n"
1873 		"\t-f function		tx rx ping pong txseq rxseq\n"
1874 		"\t-n count		number of iterations (can be 0)\n"
1875 		"\t-t pkts_to_send	also forces tx mode\n"
1876 		"\t-r pkts_to_receive	also forces rx mode\n"
1877 		"\t-l pkt_size		in bytes excluding CRC\n"
1878 		"\t-d dst_ip[:port[-dst_ip:port]]   single or range\n"
1879 		"\t-s src_ip[:port[-src_ip:port]]   single or range\n"
1880 		"\t-D dst-mac\n"
1881 		"\t-S src-mac\n"
1882 		"\t-a cpu_id		use setaffinity\n"
1883 		"\t-b burst size		testing, mostly\n"
1884 		"\t-c cores		cores to use\n"
1885 		"\t-p threads		processes/threads to use\n"
1886 		"\t-T report_ms		milliseconds between reports\n"
1887 		"\t-w wait_for_link_time	in seconds\n"
1888 		"\t-R rate		in packets per second\n"
1889 		"\t-X			dump payload\n"
1890 		"\t-H len		add empty virtio-net-header with size 'len'\n"
1891 		"\t-E pipes		allocate extra space for a number of pipes\n"
1892 		"\t-r			do not touch the buffers (send rubbish)\n"
1893 	        "\t-P file		load packet from pcap file\n"
1894 		"\t-z			use random IPv4 src address/port\n"
1895 		"\t-Z			use random IPv4 dst address/port\n"
1896 		"\t-F num_frags		send multi-slot packets\n"
1897 		"\t-A			activate pps stats on receiver\n"
1898 		"",
1899 		cmd);
1900 
1901 	exit(0);
1902 }
1903 
1904 enum {
1905 	TD_TYPE_SENDER = 1,
1906 	TD_TYPE_RECEIVER,
1907 	TD_TYPE_OTHER,
1908 };
1909 
1910 static void
1911 start_threads(struct glob_arg *g)
1912 {
1913 	int i;
1914 
1915 	targs = calloc(g->nthreads, sizeof(*targs));
1916 	/*
1917 	 * Now create the desired number of threads, each one
1918 	 * using a single descriptor.
1919  	 */
1920 	for (i = 0; i < g->nthreads; i++) {
1921 		struct targ *t = &targs[i];
1922 
1923 		bzero(t, sizeof(*t));
1924 		t->fd = -1; /* default, with pcap */
1925 		t->g = g;
1926 
1927 	    if (g->dev_type == DEV_NETMAP) {
1928 		struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
1929 		uint64_t nmd_flags = 0;
1930 		nmd.self = &nmd;
1931 
1932 		if (i > 0) {
1933 			/* the first thread uses the fd opened by the main
1934 			 * thread, the other threads re-open /dev/netmap
1935 			 */
1936 			if (g->nthreads > 1) {
1937 				nmd.req.nr_flags =
1938 					g->nmd->req.nr_flags & ~NR_REG_MASK;
1939 				nmd.req.nr_flags |= NR_REG_ONE_NIC;
1940 				nmd.req.nr_ringid = i;
1941 			}
1942 			/* Only touch one of the rings (rx is already ok) */
1943 			if (g->td_type == TD_TYPE_RECEIVER)
1944 				nmd_flags |= NETMAP_NO_TX_POLL;
1945 
1946 			/* register interface. Override ifname and ringid etc. */
1947 			t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
1948 				NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
1949 			if (t->nmd == NULL) {
1950 				D("Unable to open %s: %s",
1951 					t->g->ifname, strerror(errno));
1952 				continue;
1953 			}
1954 		} else {
1955 			t->nmd = g->nmd;
1956 		}
1957 		t->fd = t->nmd->fd;
1958 
1959 	    } else {
1960 		targs[i].fd = g->main_fd;
1961 	    }
1962 		t->used = 1;
1963 		t->me = i;
1964 		if (g->affinity >= 0) {
1965 			t->affinity = (g->affinity + i) % g->system_cpus;
1966 		} else {
1967 			t->affinity = -1;
1968 		}
1969 		/* default, init packets */
1970 		initialize_packet(t);
1971 
1972 		if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
1973 			D("Unable to create thread %d: %s", i, strerror(errno));
1974 			t->used = 0;
1975 		}
1976 	}
1977 }
1978 
1979 static void
1980 main_thread(struct glob_arg *g)
1981 {
1982 	int i;
1983 
1984 	struct my_ctrs prev, cur;
1985 	double delta_t;
1986 	struct timeval tic, toc;
1987 
1988 	prev.pkts = prev.bytes = prev.events = 0;
1989 	gettimeofday(&prev.t, NULL);
1990 	for (;;) {
1991 		char b1[40], b2[40], b3[40], b4[70];
1992 		uint64_t pps, usec;
1993 		struct my_ctrs x;
1994 		double abs;
1995 		int done = 0;
1996 
1997 		usec = wait_for_next_report(&prev.t, &cur.t,
1998 				g->report_interval);
1999 
2000 		cur.pkts = cur.bytes = cur.events = 0;
2001 		cur.min_space = 0;
2002 		if (usec < 10000) /* too short to be meaningful */
2003 			continue;
2004 		/* accumulate counts for all threads */
2005 		for (i = 0; i < g->nthreads; i++) {
2006 			cur.pkts += targs[i].ctr.pkts;
2007 			cur.bytes += targs[i].ctr.bytes;
2008 			cur.events += targs[i].ctr.events;
2009 			cur.min_space += targs[i].ctr.min_space;
2010 			targs[i].ctr.min_space = 99999;
2011 			if (targs[i].used == 0)
2012 				done++;
2013 		}
2014 		x.pkts = cur.pkts - prev.pkts;
2015 		x.bytes = cur.bytes - prev.bytes;
2016 		x.events = cur.events - prev.events;
2017 		pps = (x.pkts*1000000 + usec/2) / usec;
2018 		abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0;
2019 
2020 		if (!(g->options & OPT_PPS_STATS)) {
2021 			strcpy(b4, "");
2022 		} else {
2023 			/* Compute some pps stats using a sliding window. */
2024 			double ppsavg = 0.0, ppsdev = 0.0;
2025 			int nsamples = 0;
2026 
2027 			g->win[g->win_idx] = pps;
2028 			g->win_idx = (g->win_idx + 1) % STATS_WIN;
2029 
2030 			for (i = 0; i < STATS_WIN; i++) {
2031 				ppsavg += g->win[i];
2032 				if (g->win[i]) {
2033 					nsamples ++;
2034 				}
2035 			}
2036 			ppsavg /= nsamples;
2037 
2038 			for (i = 0; i < STATS_WIN; i++) {
2039 				if (g->win[i] == 0) {
2040 					continue;
2041 				}
2042 				ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg);
2043 			}
2044 			ppsdev /= nsamples;
2045 			ppsdev = sqrt(ppsdev);
2046 
2047 			snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]",
2048 				 norm(b1, ppsavg), norm(b2, ppsdev));
2049 		}
2050 
2051 		D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space",
2052 			norm(b1, pps), b4,
2053 			norm(b2, (double)x.pkts),
2054 			norm(b3, (double)x.bytes*8),
2055 			(unsigned long long)usec,
2056 			abs, (int)cur.min_space);
2057 		prev = cur;
2058 
2059 		if (done == g->nthreads)
2060 			break;
2061 	}
2062 
2063 	timerclear(&tic);
2064 	timerclear(&toc);
2065 	cur.pkts = cur.bytes = cur.events = 0;
2066 	/* final round */
2067 	for (i = 0; i < g->nthreads; i++) {
2068 		struct timespec t_tic, t_toc;
2069 		/*
2070 		 * Join active threads, unregister interfaces and close
2071 		 * file descriptors.
2072 		 */
2073 		if (targs[i].used)
2074 			pthread_join(targs[i].thread, NULL); /* blocking */
2075 		if (g->dev_type == DEV_NETMAP) {
2076 			nm_close(targs[i].nmd);
2077 			targs[i].nmd = NULL;
2078 		} else {
2079 			close(targs[i].fd);
2080 		}
2081 
2082 		if (targs[i].completed == 0)
2083 			D("ouch, thread %d exited with error", i);
2084 
2085 		/*
2086 		 * Collect threads output and extract information about
2087 		 * how long it took to send all the packets.
2088 		 */
2089 		cur.pkts += targs[i].ctr.pkts;
2090 		cur.bytes += targs[i].ctr.bytes;
2091 		cur.events += targs[i].ctr.events;
2092 		/* collect the largest start (tic) and end (toc) times,
2093 		 * XXX maybe we should do the earliest tic, or do a weighted
2094 		 * average ?
2095 		 */
2096 		t_tic = timeval2spec(&tic);
2097 		t_toc = timeval2spec(&toc);
2098 		if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic))
2099 			tic = timespec2val(&targs[i].tic);
2100 		if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc))
2101 			toc = timespec2val(&targs[i].toc);
2102 	}
2103 
2104 	/* print output. */
2105 	timersub(&toc, &tic, &toc);
2106 	delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
2107 	if (g->td_type == TD_TYPE_SENDER)
2108 		tx_output(&cur, delta_t, "Sent");
2109 	else
2110 		tx_output(&cur, delta_t, "Received");
2111 }
2112 
2113 struct td_desc {
2114 	int ty;
2115 	char *key;
2116 	void *f;
2117 };
2118 
2119 static struct td_desc func[] = {
2120 	{ TD_TYPE_SENDER,	"tx",		sender_body },
2121 	{ TD_TYPE_RECEIVER,	"rx",		receiver_body },
2122 	{ TD_TYPE_OTHER,	"ping",		pinger_body },
2123 	{ TD_TYPE_OTHER,	"pong",		ponger_body },
2124 	{ TD_TYPE_SENDER,	"txseq",	txseq_body },
2125 	{ TD_TYPE_RECEIVER,	"rxseq",	rxseq_body },
2126 	{ 0,			NULL,	NULL }
2127 };
2128 
2129 static int
2130 tap_alloc(char *dev)
2131 {
2132 	struct ifreq ifr;
2133 	int fd, err;
2134 	char *clonedev = TAP_CLONEDEV;
2135 
2136 	(void)err;
2137 	(void)dev;
2138 	/* Arguments taken by the function:
2139 	 *
2140 	 * char *dev: the name of an interface (or '\0'). MUST have enough
2141 	 *   space to hold the interface name if '\0' is passed
2142 	 * int flags: interface flags (eg, IFF_TUN etc.)
2143 	 */
2144 
2145 #ifdef __FreeBSD__
2146 	if (dev[3]) { /* tapSomething */
2147 		static char buf[128];
2148 		snprintf(buf, sizeof(buf), "/dev/%s", dev);
2149 		clonedev = buf;
2150 	}
2151 #endif
2152 	/* open the device */
2153 	if( (fd = open(clonedev, O_RDWR)) < 0 ) {
2154 		return fd;
2155 	}
2156 	D("%s open successful", clonedev);
2157 
2158 	/* preparation of the struct ifr, of type "struct ifreq" */
2159 	memset(&ifr, 0, sizeof(ifr));
2160 
2161 #ifdef linux
2162 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
2163 
2164 	if (*dev) {
2165 		/* if a device name was specified, put it in the structure; otherwise,
2166 		* the kernel will try to allocate the "next" device of the
2167 		* specified type */
2168 		strncpy(ifr.ifr_name, dev, IFNAMSIZ);
2169 	}
2170 
2171 	/* try to create the device */
2172 	if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
2173 		D("failed to to a TUNSETIFF: %s", strerror(errno));
2174 		close(fd);
2175 		return err;
2176 	}
2177 
2178 	/* if the operation was successful, write back the name of the
2179 	* interface to the variable "dev", so the caller can know
2180 	* it. Note that the caller MUST reserve space in *dev (see calling
2181 	* code below) */
2182 	strcpy(dev, ifr.ifr_name);
2183 	D("new name is %s", dev);
2184 #endif /* linux */
2185 
2186         /* this is the special file descriptor that the caller will use to talk
2187          * with the virtual interface */
2188         return fd;
2189 }
2190 
2191 int
2192 main(int arc, char **argv)
2193 {
2194 	int i;
2195 	struct sigaction sa;
2196 	sigset_t ss;
2197 
2198 	struct glob_arg g;
2199 
2200 	int ch;
2201 	int wait_link = 2;
2202 	int devqueues = 1;	/* how many device queues */
2203 
2204 	bzero(&g, sizeof(g));
2205 
2206 	g.main_fd = -1;
2207 	g.td_body = receiver_body;
2208 	g.td_type = TD_TYPE_RECEIVER;
2209 	g.report_interval = 1000;	/* report interval */
2210 	g.affinity = -1;
2211 	/* ip addresses can also be a range x.x.x.x-x.x.x.y */
2212 	g.src_ip.name = "10.0.0.1";
2213 	g.dst_ip.name = "10.1.0.1";
2214 	g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
2215 	g.src_mac.name = NULL;
2216 	g.pkt_size = 60;
2217 	g.burst = 512;		// default
2218 	g.nthreads = 1;
2219 	g.cpus = 1;		// default
2220 	g.forever = 1;
2221 	g.tx_rate = 0;
2222 	g.frags = 1;
2223 	g.nmr_config = "";
2224 	g.virt_header = 0;
2225 
2226 	while ( (ch = getopt(arc, argv,
2227 			"a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:E:m:rP:zZA")) != -1) {
2228 		struct td_desc *fn;
2229 
2230 		switch(ch) {
2231 		default:
2232 			D("bad option %c %s", ch, optarg);
2233 			usage();
2234 			break;
2235 
2236 		case 'n':
2237 			g.npackets = strtoull(optarg, NULL, 10);
2238 			break;
2239 
2240 		case 'F':
2241 			i = atoi(optarg);
2242 			if (i < 1 || i > 63) {
2243 				D("invalid frags %d [1..63], ignore", i);
2244 				break;
2245 			}
2246 			g.frags = i;
2247 			break;
2248 
2249 		case 'f':
2250 			for (fn = func; fn->key; fn++) {
2251 				if (!strcmp(fn->key, optarg))
2252 					break;
2253 			}
2254 			if (fn->key) {
2255 				g.td_body = fn->f;
2256 				g.td_type = fn->ty;
2257 			} else {
2258 				D("unrecognised function %s", optarg);
2259 			}
2260 			break;
2261 
2262 		case 'o':	/* data generation options */
2263 			g.options = atoi(optarg);
2264 			break;
2265 
2266 		case 'a':       /* force affinity */
2267 			g.affinity = atoi(optarg);
2268 			break;
2269 
2270 		case 'i':	/* interface */
2271 			/* a prefix of tap: netmap: or pcap: forces the mode.
2272 			 * otherwise we guess
2273 			 */
2274 			D("interface is %s", optarg);
2275 			if (strlen(optarg) > MAX_IFNAMELEN - 8) {
2276 				D("ifname too long %s", optarg);
2277 				break;
2278 			}
2279 			strcpy(g.ifname, optarg);
2280 			if (!strcmp(optarg, "null")) {
2281 				g.dev_type = DEV_NETMAP;
2282 				g.dummy_send = 1;
2283 			} else if (!strncmp(optarg, "tap:", 4)) {
2284 				g.dev_type = DEV_TAP;
2285 				strcpy(g.ifname, optarg + 4);
2286 			} else if (!strncmp(optarg, "pcap:", 5)) {
2287 				g.dev_type = DEV_PCAP;
2288 				strcpy(g.ifname, optarg + 5);
2289 			} else if (!strncmp(optarg, "netmap:", 7) ||
2290 				   !strncmp(optarg, "vale", 4)) {
2291 				g.dev_type = DEV_NETMAP;
2292 			} else if (!strncmp(optarg, "tap", 3)) {
2293 				g.dev_type = DEV_TAP;
2294 			} else { /* prepend netmap: */
2295 				g.dev_type = DEV_NETMAP;
2296 				sprintf(g.ifname, "netmap:%s", optarg);
2297 			}
2298 			break;
2299 
2300 		case 'I':
2301 			g.options |= OPT_INDIRECT;	/* XXX use indirect buffer */
2302 			break;
2303 
2304 		case 'l':	/* pkt_size */
2305 			g.pkt_size = atoi(optarg);
2306 			break;
2307 
2308 		case 'd':
2309 			g.dst_ip.name = optarg;
2310 			break;
2311 
2312 		case 's':
2313 			g.src_ip.name = optarg;
2314 			break;
2315 
2316 		case 'T':	/* report interval */
2317 			g.report_interval = atoi(optarg);
2318 			break;
2319 
2320 		case 'w':
2321 			wait_link = atoi(optarg);
2322 			break;
2323 
2324 		case 'W': /* XXX changed default */
2325 			g.forever = 0; /* do not exit rx even with no traffic */
2326 			break;
2327 
2328 		case 'b':	/* burst */
2329 			g.burst = atoi(optarg);
2330 			break;
2331 		case 'c':
2332 			g.cpus = atoi(optarg);
2333 			break;
2334 		case 'p':
2335 			g.nthreads = atoi(optarg);
2336 			break;
2337 
2338 		case 'D': /* destination mac */
2339 			g.dst_mac.name = optarg;
2340 			break;
2341 
2342 		case 'S': /* source mac */
2343 			g.src_mac.name = optarg;
2344 			break;
2345 		case 'v':
2346 			verbose++;
2347 			break;
2348 		case 'R':
2349 			g.tx_rate = atoi(optarg);
2350 			break;
2351 		case 'X':
2352 			g.options |= OPT_DUMP;
2353 			break;
2354 		case 'C':
2355 			g.nmr_config = strdup(optarg);
2356 			break;
2357 		case 'H':
2358 			g.virt_header = atoi(optarg);
2359 			break;
2360 		case 'e': /* extra bufs */
2361 			g.extra_bufs = atoi(optarg);
2362 			break;
2363 		case 'E':
2364 			g.extra_pipes = atoi(optarg);
2365 			break;
2366 		case 'P':
2367 			g.packet_file = strdup(optarg);
2368 			break;
2369 		case 'm':
2370 			/* ignored */
2371 			break;
2372 		case 'r':
2373 			g.options |= OPT_RUBBISH;
2374 			break;
2375 		case 'z':
2376 			g.options |= OPT_RANDOM_SRC;
2377 			break;
2378 		case 'Z':
2379 			g.options |= OPT_RANDOM_DST;
2380 			break;
2381 		case 'A':
2382 			g.options |= OPT_PPS_STATS;
2383 			break;
2384 		}
2385 	}
2386 
2387 	if (strlen(g.ifname) <=0 ) {
2388 		D("missing ifname");
2389 		usage();
2390 	}
2391 
2392 	g.system_cpus = i = system_ncpus();
2393 	if (g.cpus < 0 || g.cpus > i) {
2394 		D("%d cpus is too high, have only %d cpus", g.cpus, i);
2395 		usage();
2396 	}
2397 D("running on %d cpus (have %d)", g.cpus, i);
2398 	if (g.cpus == 0)
2399 		g.cpus = i;
2400 
2401 	if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) {
2402 		D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE);
2403 		usage();
2404 	}
2405 
2406 	if (g.src_mac.name == NULL) {
2407 		static char mybuf[20] = "00:00:00:00:00:00";
2408 		/* retrieve source mac address. */
2409 		if (source_hwaddr(g.ifname, mybuf) == -1) {
2410 			D("Unable to retrieve source mac");
2411 			// continue, fail later
2412 		}
2413 		g.src_mac.name = mybuf;
2414 	}
2415 	/* extract address ranges */
2416 	extract_ip_range(&g.src_ip);
2417 	extract_ip_range(&g.dst_ip);
2418 	extract_mac_range(&g.src_mac);
2419 	extract_mac_range(&g.dst_mac);
2420 
2421 	if (g.src_ip.start != g.src_ip.end ||
2422 	    g.src_ip.port0 != g.src_ip.port1 ||
2423 	    g.dst_ip.start != g.dst_ip.end ||
2424 	    g.dst_ip.port0 != g.dst_ip.port1)
2425 		g.options |= OPT_COPY;
2426 
2427 	if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
2428 			&& g.virt_header != VIRT_HDR_2) {
2429 		D("bad virtio-net-header length");
2430 		usage();
2431 	}
2432 
2433     if (g.dev_type == DEV_TAP) {
2434 	D("want to use tap %s", g.ifname);
2435 	g.main_fd = tap_alloc(g.ifname);
2436 	if (g.main_fd < 0) {
2437 		D("cannot open tap %s", g.ifname);
2438 		usage();
2439 	}
2440 #ifndef NO_PCAP
2441     } else if (g.dev_type == DEV_PCAP) {
2442 	char pcap_errbuf[PCAP_ERRBUF_SIZE];
2443 
2444 	pcap_errbuf[0] = '\0'; // init the buffer
2445 	g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf);
2446 	if (g.p == NULL) {
2447 		D("cannot open pcap on %s", g.ifname);
2448 		usage();
2449 	}
2450 	g.main_fd = pcap_fileno(g.p);
2451 	D("using pcap on %s fileno %d", g.ifname, g.main_fd);
2452 #endif /* !NO_PCAP */
2453     } else if (g.dummy_send) { /* but DEV_NETMAP */
2454 	D("using a dummy send routine");
2455     } else {
2456 	struct nmreq base_nmd;
2457 
2458 	bzero(&base_nmd, sizeof(base_nmd));
2459 
2460 	parse_nmr_config(g.nmr_config, &base_nmd);
2461 	if (g.extra_bufs) {
2462 		base_nmd.nr_arg3 = g.extra_bufs;
2463 	}
2464 	if (g.extra_pipes) {
2465 	    base_nmd.nr_arg1 = g.extra_pipes;
2466 	}
2467 
2468 	base_nmd.nr_flags |= NR_ACCEPT_VNET_HDR;
2469 
2470 	/*
2471 	 * Open the netmap device using nm_open().
2472 	 *
2473 	 * protocol stack and may cause a reset of the card,
2474 	 * which in turn may take some time for the PHY to
2475 	 * reconfigure. We do the open here to have time to reset.
2476 	 */
2477 	g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL);
2478 	if (g.nmd == NULL) {
2479 		D("Unable to open %s: %s", g.ifname, strerror(errno));
2480 		goto out;
2481 	}
2482 
2483 	if (g.nthreads > 1) {
2484 		struct nm_desc saved_desc = *g.nmd;
2485 		saved_desc.self = &saved_desc;
2486 		saved_desc.mem = NULL;
2487 		nm_close(g.nmd);
2488 		saved_desc.req.nr_flags &= ~NR_REG_MASK;
2489 		saved_desc.req.nr_flags |= NR_REG_ONE_NIC;
2490 		saved_desc.req.nr_ringid = 0;
2491 		g.nmd = nm_open(g.ifname, &base_nmd, NM_OPEN_IFNAME, &saved_desc);
2492 		if (g.nmd == NULL) {
2493 			D("Unable to open %s: %s", g.ifname, strerror(errno));
2494 			goto out;
2495 		}
2496 	}
2497 	g.main_fd = g.nmd->fd;
2498 	D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem);
2499 
2500 	if (g.virt_header) {
2501 		/* Set the virtio-net header length, since the user asked
2502 		 * for it explicitely. */
2503 		set_vnet_hdr_len(&g);
2504 	} else {
2505 		/* Check whether the netmap port we opened requires us to send
2506 		 * and receive frames with virtio-net header. */
2507 		get_vnet_hdr_len(&g);
2508 	}
2509 
2510 	/* get num of queues in tx or rx */
2511 	if (g.td_type == TD_TYPE_SENDER)
2512 		devqueues = g.nmd->req.nr_tx_rings;
2513 	else
2514 		devqueues = g.nmd->req.nr_rx_rings;
2515 
2516 	/* validate provided nthreads. */
2517 	if (g.nthreads < 1 || g.nthreads > devqueues) {
2518 		D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
2519 		// continue, fail later
2520 	}
2521 
2522 	if (verbose) {
2523 		struct netmap_if *nifp = g.nmd->nifp;
2524 		struct nmreq *req = &g.nmd->req;
2525 
2526 		D("nifp at offset %d, %d tx %d rx region %d",
2527 		    req->nr_offset, req->nr_tx_rings, req->nr_rx_rings,
2528 		    req->nr_arg2);
2529 		for (i = 0; i <= req->nr_tx_rings; i++) {
2530 			struct netmap_ring *ring = NETMAP_TXRING(nifp, i);
2531 			D("   TX%d at 0x%p slots %d", i,
2532 			    (void *)((char *)ring - (char *)nifp), ring->num_slots);
2533 		}
2534 		for (i = 0; i <= req->nr_rx_rings; i++) {
2535 			struct netmap_ring *ring = NETMAP_RXRING(nifp, i);
2536 			D("   RX%d at 0x%p slots %d", i,
2537 			    (void *)((char *)ring - (char *)nifp), ring->num_slots);
2538 		}
2539 	}
2540 
2541 	/* Print some debug information. */
2542 	fprintf(stdout,
2543 		"%s %s: %d queues, %d threads and %d cpus.\n",
2544 		(g.td_type == TD_TYPE_SENDER) ? "Sending on" :
2545 			((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" :
2546 			"Working on"),
2547 		g.ifname,
2548 		devqueues,
2549 		g.nthreads,
2550 		g.cpus);
2551 	if (g.td_type == TD_TYPE_SENDER) {
2552 		fprintf(stdout, "%s -> %s (%s -> %s)\n",
2553 			g.src_ip.name, g.dst_ip.name,
2554 			g.src_mac.name, g.dst_mac.name);
2555 	}
2556 
2557 out:
2558 	/* Exit if something went wrong. */
2559 	if (g.main_fd < 0) {
2560 		D("aborting");
2561 		usage();
2562 	}
2563     }
2564 
2565 
2566 	if (g.options) {
2567 		D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n",
2568 			g.options & OPT_PREFETCH ? " prefetch" : "",
2569 			g.options & OPT_ACCESS ? " access" : "",
2570 			g.options & OPT_MEMCPY ? " memcpy" : "",
2571 			g.options & OPT_INDIRECT ? " indirect" : "",
2572 			g.options & OPT_COPY ? " copy" : "",
2573 			g.options & OPT_RUBBISH ? " rubbish " : "");
2574 	}
2575 
2576 	g.tx_period.tv_sec = g.tx_period.tv_nsec = 0;
2577 	if (g.tx_rate > 0) {
2578 		/* try to have at least something every second,
2579 		 * reducing the burst size to some 0.01s worth of data
2580 		 * (but no less than one full set of fragments)
2581 	 	 */
2582 		uint64_t x;
2583 		int lim = (g.tx_rate)/300;
2584 		if (g.burst > lim)
2585 			g.burst = lim;
2586 		if (g.burst < g.frags)
2587 			g.burst = g.frags;
2588 		x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
2589 		g.tx_period.tv_nsec = x;
2590 		g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
2591 		g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
2592 	}
2593 	if (g.td_type == TD_TYPE_SENDER)
2594 	    D("Sending %d packets every  %ld.%09ld s",
2595 			g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec);
2596 	/* Wait for PHY reset. */
2597 	D("Wait %d secs for phy reset", wait_link);
2598 	sleep(wait_link);
2599 	D("Ready...");
2600 
2601 	/* Install ^C handler. */
2602 	global_nthreads = g.nthreads;
2603 	sigemptyset(&ss);
2604 	sigaddset(&ss, SIGINT);
2605 	/* block SIGINT now, so that all created threads will inherit the mask */
2606 	if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) {
2607 		D("failed to block SIGINT: %s", strerror(errno));
2608 	}
2609 	start_threads(&g);
2610 	/* Install the handler and re-enable SIGINT for the main thread */
2611 	sa.sa_handler = sigint_h;
2612 	if (sigaction(SIGINT, &sa, NULL) < 0) {
2613 		D("failed to install ^C handler: %s", strerror(errno));
2614 	}
2615 
2616 	if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) {
2617 		D("failed to re-enable SIGINT: %s", strerror(errno));
2618 	}
2619 	main_thread(&g);
2620 	free(targs);
2621 	return 0;
2622 }
2623 
2624 /* end of file */
2625