1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40 #define _GNU_SOURCE /* for CPU_SET() */ 41 #include <arpa/inet.h> /* ntohs */ 42 #include <assert.h> 43 #include <ctype.h> // isprint() 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <ifaddrs.h> /* getifaddrs */ 47 #include <libnetmap.h> 48 #include <math.h> 49 #include <net/ethernet.h> 50 #include <netinet/in.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip6.h> 53 #include <netinet/udp.h> 54 #ifndef NO_PCAP 55 #include <pcap/pcap.h> 56 #endif 57 #include <pthread.h> 58 #include <signal.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <sys/ioctl.h> 63 #include <sys/poll.h> 64 #include <sys/stat.h> 65 #if !defined(_WIN32) && !defined(linux) 66 #include <sys/sysctl.h> /* sysctl */ 67 #endif 68 #include <sys/types.h> 69 #include <unistd.h> // sysconf() 70 #ifdef linux 71 #define IPV6_VERSION 0x60 72 #define IPV6_DEFHLIM 64 73 #endif 74 75 #include "ctrs.h" 76 77 static void usage(int); 78 79 #ifdef _WIN32 80 #define cpuset_t DWORD_PTR //uint64_t 81 static inline void CPU_ZERO(cpuset_t *p) 82 { 83 *p = 0; 84 } 85 86 static inline void CPU_SET(uint32_t i, cpuset_t *p) 87 { 88 *p |= 1<< (i & 0x3f); 89 } 90 91 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0) 92 #define TAP_CLONEDEV "/dev/tap" 93 #define AF_LINK 18 //defined in winsocks.h 94 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 95 #include <net/if_dl.h> 96 97 /* 98 * Convert an ASCII representation of an ethernet address to 99 * binary form. 100 */ 101 struct ether_addr * 102 ether_aton(const char *a) 103 { 104 int i; 105 static struct ether_addr o; 106 unsigned int o0, o1, o2, o3, o4, o5; 107 108 i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); 109 110 if (i != 6) 111 return (NULL); 112 113 o.octet[0]=o0; 114 o.octet[1]=o1; 115 o.octet[2]=o2; 116 o.octet[3]=o3; 117 o.octet[4]=o4; 118 o.octet[5]=o5; 119 120 return ((struct ether_addr *)&o); 121 } 122 123 /* 124 * Convert a binary representation of an ethernet address to 125 * an ASCII string. 126 */ 127 char * 128 ether_ntoa(const struct ether_addr *n) 129 { 130 int i; 131 static char a[18]; 132 133 i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x", 134 n->octet[0], n->octet[1], n->octet[2], 135 n->octet[3], n->octet[4], n->octet[5]); 136 return (i < 17 ? NULL : (char *)&a); 137 } 138 #endif /* _WIN32 */ 139 140 #ifdef linux 141 142 #define cpuset_t cpu_set_t 143 144 #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 145 #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 146 #include <linux/ethtool.h> 147 #include <linux/sockios.h> 148 149 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 150 #include <netinet/ether.h> /* ether_aton */ 151 #include <linux/if_packet.h> /* sockaddr_ll */ 152 #endif /* linux */ 153 154 #ifdef __FreeBSD__ 155 #include <sys/endian.h> /* le64toh */ 156 #include <machine/param.h> 157 158 #include <pthread_np.h> /* pthread w/ affinity */ 159 #include <sys/cpuset.h> /* cpu_set */ 160 #include <net/if_dl.h> /* LLADDR */ 161 #endif /* __FreeBSD__ */ 162 163 #ifdef __APPLE__ 164 165 #define cpuset_t uint64_t // XXX 166 static inline void CPU_ZERO(cpuset_t *p) 167 { 168 *p = 0; 169 } 170 171 static inline void CPU_SET(uint32_t i, cpuset_t *p) 172 { 173 *p |= 1<< (i & 0x3f); 174 } 175 176 #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 177 178 #define ifr_flagshigh ifr_flags // XXX 179 #define IFF_PPROMISC IFF_PROMISC 180 #include <net/if_dl.h> /* LLADDR */ 181 #define clock_gettime(a,b) \ 182 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 183 #endif /* __APPLE__ */ 184 185 static const char *default_payload = "netmap pkt-gen DIRECT payload\n" 186 "http://info.iet.unipi.it/~luigi/netmap/ "; 187 188 static const char *indirect_payload = "netmap pkt-gen indirect payload\n" 189 "http://info.iet.unipi.it/~luigi/netmap/ "; 190 191 static int verbose = 0; 192 static int normalize = 1; 193 194 #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 195 #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 196 #define VIRT_HDR_MAX VIRT_HDR_2 197 struct virt_header { 198 uint8_t fields[VIRT_HDR_MAX]; 199 }; 200 201 #define MAX_BODYSIZE 65536 202 203 struct pkt { 204 struct virt_header vh; 205 struct ether_header eh; 206 union { 207 struct { 208 struct ip ip; 209 struct udphdr udp; 210 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 211 } ipv4; 212 struct { 213 struct ip6_hdr ip; 214 struct udphdr udp; 215 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 216 } ipv6; 217 }; 218 } __attribute__((__packed__)); 219 220 #define PKT(p, f, af) \ 221 ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f) 222 223 struct ip_range { 224 const char *name; 225 union { 226 struct { 227 uint32_t start, end; /* same as struct in_addr */ 228 } ipv4; 229 struct { 230 struct in6_addr start, end; 231 uint8_t sgroup, egroup; 232 } ipv6; 233 }; 234 uint16_t port0, port1; 235 }; 236 237 struct mac_range { 238 const char *name; 239 struct ether_addr start, end; 240 }; 241 242 /* ifname can be netmap:foo-xxxx */ 243 #define MAX_IFNAMELEN 512 /* our buffer for ifname */ 244 //#define MAX_PKTSIZE 1536 245 #define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 246 247 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 248 struct tstamp { 249 uint32_t sec; 250 uint32_t nsec; 251 }; 252 253 /* 254 * global arguments for all threads 255 */ 256 257 struct glob_arg { 258 int af; /* address family AF_INET/AF_INET6 */ 259 struct ip_range src_ip; 260 struct ip_range dst_ip; 261 struct mac_range dst_mac; 262 struct mac_range src_mac; 263 int pkt_size; 264 int pkt_min_size; 265 int burst; 266 int forever; 267 uint64_t npackets; /* total packets to send */ 268 int frags; /* fragments per packet */ 269 u_int frag_size; /* size of each fragment */ 270 int nthreads; 271 int cpus; /* cpus used for running */ 272 int system_cpus; /* cpus on the system */ 273 274 int options; /* testing */ 275 #define OPT_PREFETCH 1 276 #define OPT_ACCESS 2 277 #define OPT_COPY 4 278 #define OPT_MEMCPY 8 279 #define OPT_TS 16 /* add a timestamp */ 280 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 281 #define OPT_DUMP 64 /* dump rx/tx traffic */ 282 #define OPT_RUBBISH 256 /* send whatever the buffers contain */ 283 #define OPT_RANDOM_SRC 512 284 #define OPT_RANDOM_DST 1024 285 #define OPT_PPS_STATS 2048 286 int dev_type; 287 #ifndef NO_PCAP 288 pcap_t *p; 289 #endif 290 291 int tx_rate; 292 struct timespec tx_period; 293 294 int affinity; 295 int main_fd; 296 struct nmport_d *nmd; 297 uint32_t orig_mode; 298 int report_interval; /* milliseconds between prints */ 299 void *(*td_body)(void *); 300 int td_type; 301 void *mmap_addr; 302 char ifname[MAX_IFNAMELEN]; 303 const char *nmr_config; 304 int dummy_send; 305 int virt_header; /* send also the virt_header */ 306 char *packet_file; /* -P option */ 307 #define STATS_WIN 15 308 int win_idx; 309 int64_t win[STATS_WIN]; 310 int wait_link; 311 int framing; /* #bits of framing (for bw output) */ 312 }; 313 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 314 315 enum { 316 TD_TYPE_SENDER = 1, 317 TD_TYPE_RECEIVER, 318 TD_TYPE_OTHER, 319 }; 320 321 /* 322 * Arguments for a new thread. The same structure is used by 323 * the source and the sink 324 */ 325 struct targ { 326 struct glob_arg *g; 327 int used; 328 int completed; 329 int cancel; 330 int fd; 331 struct nmport_d *nmd; 332 /* these ought to be volatile, but they are 333 * only sampled and errors should not accumulate 334 */ 335 struct my_ctrs ctr; 336 337 struct timespec tic, toc; 338 int me; 339 pthread_t thread; 340 int affinity; 341 342 struct pkt pkt; 343 void *frame; 344 uint16_t seed[3]; 345 u_int frags; 346 u_int frag_size; 347 }; 348 349 static __inline uint16_t 350 cksum_add(uint16_t sum, uint16_t a) 351 { 352 uint16_t res; 353 354 res = sum + a; 355 return (res + (res < a)); 356 } 357 358 static void 359 extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port) 360 { 361 struct in_addr a; 362 char *pp; 363 364 pp = strchr(name, ':'); 365 if (pp != NULL) { /* do we have ports ? */ 366 *pp++ = '\0'; 367 *port = (uint16_t)strtol(pp, NULL, 0); 368 } 369 370 inet_pton(AF_INET, name, &a); 371 *addr = ntohl(a.s_addr); 372 } 373 374 static void 375 extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port, 376 uint8_t *group) 377 { 378 char *pp; 379 380 /* 381 * We accept IPv6 address in the following form: 382 * group@[2001:DB8::1001]:port (w/ brackets and port) 383 * group@[2001:DB8::1] (w/ brackets and w/o port) 384 * group@2001:DB8::1234 (w/o brackets and w/o port) 385 */ 386 pp = strchr(name, '@'); 387 if (pp != NULL) { 388 *pp++ = '\0'; 389 *group = (uint8_t)strtol(name, NULL, 0); 390 if (*group > 7) 391 *group = 7; 392 name = pp; 393 } 394 if (name[0] == '[') 395 name++; 396 pp = strchr(name, ']'); 397 if (pp != NULL) 398 *pp++ = '\0'; 399 if (pp != NULL && *pp != ':') 400 pp = NULL; 401 if (pp != NULL) { /* do we have ports ? */ 402 *pp++ = '\0'; 403 *port = (uint16_t)strtol(pp, NULL, 0); 404 } 405 inet_pton(AF_INET6, name, addr); 406 } 407 /* 408 * extract the extremes from a range of ipv4 addresses. 409 * addr_lo[-addr_hi][:port_lo[-port_hi]] 410 */ 411 static int 412 extract_ip_range(struct ip_range *r, int af) 413 { 414 char *name, *ap, start[INET6_ADDRSTRLEN]; 415 char end[INET6_ADDRSTRLEN]; 416 struct in_addr a; 417 uint32_t tmp; 418 419 if (verbose) 420 D("extract IP range from %s", r->name); 421 422 name = strdup(r->name); 423 if (name == NULL) { 424 D("strdup failed"); 425 usage(-1); 426 } 427 /* the first - splits start/end of range */ 428 ap = strchr(name, '-'); 429 if (ap != NULL) 430 *ap++ = '\0'; 431 r->port0 = 1234; /* default port */ 432 if (af == AF_INET6) { 433 r->ipv6.sgroup = 7; /* default group */ 434 extract_ipv6_addr(name, &r->ipv6.start, &r->port0, 435 &r->ipv6.sgroup); 436 } else 437 extract_ipv4_addr(name, &r->ipv4.start, &r->port0); 438 439 r->port1 = r->port0; 440 if (af == AF_INET6) { 441 if (ap != NULL) { 442 r->ipv6.egroup = r->ipv6.sgroup; 443 extract_ipv6_addr(ap, &r->ipv6.end, &r->port1, 444 &r->ipv6.egroup); 445 } else { 446 r->ipv6.end = r->ipv6.start; 447 r->ipv6.egroup = r->ipv6.sgroup; 448 } 449 } else { 450 if (ap != NULL) { 451 extract_ipv4_addr(ap, &r->ipv4.end, &r->port1); 452 if (r->ipv4.start > r->ipv4.end) { 453 tmp = r->ipv4.end; 454 r->ipv4.end = r->ipv4.start; 455 r->ipv4.start = tmp; 456 } 457 } else 458 r->ipv4.end = r->ipv4.start; 459 } 460 461 if (r->port0 > r->port1) { 462 tmp = r->port0; 463 r->port0 = r->port1; 464 r->port1 = tmp; 465 } 466 if (af == AF_INET) { 467 a.s_addr = htonl(r->ipv4.start); 468 inet_ntop(af, &a, start, sizeof(start)); 469 a.s_addr = htonl(r->ipv4.end); 470 inet_ntop(af, &a, end, sizeof(end)); 471 } else { 472 inet_ntop(af, &r->ipv6.start, start, sizeof(start)); 473 inet_ntop(af, &r->ipv6.end, end, sizeof(end)); 474 } 475 if (af == AF_INET) 476 D("range is %s:%d to %s:%d", start, r->port0, end, r->port1); 477 else 478 D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup, 479 start, r->port0, r->ipv6.egroup, end, r->port1); 480 481 free(name); 482 if (r->port0 != r->port1 || 483 (af == AF_INET && r->ipv4.start != r->ipv4.end) || 484 (af == AF_INET6 && 485 !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end))) 486 return (OPT_COPY); 487 return (0); 488 } 489 490 static int 491 extract_mac_range(struct mac_range *r) 492 { 493 struct ether_addr *e; 494 if (verbose) 495 D("extract MAC range from %s", r->name); 496 497 e = ether_aton(r->name); 498 if (e == NULL) { 499 D("invalid MAC address '%s'", r->name); 500 return 1; 501 } 502 bcopy(e, &r->start, 6); 503 bcopy(e, &r->end, 6); 504 #if 0 505 bcopy(targ->src_mac, eh->ether_shost, 6); 506 p = index(targ->g->src_mac, '-'); 507 if (p) 508 targ->src_mac_range = atoi(p+1); 509 510 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 511 bcopy(targ->dst_mac, eh->ether_dhost, 6); 512 p = index(targ->g->dst_mac, '-'); 513 if (p) 514 targ->dst_mac_range = atoi(p+1); 515 #endif 516 if (verbose) 517 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 518 return 0; 519 } 520 521 static int 522 get_if_mtu(const struct glob_arg *g) 523 { 524 struct ifreq ifreq; 525 int s, ret; 526 const char *ifname = g->nmd->hdr.nr_name; 527 size_t len; 528 529 if (!strncmp(g->ifname, "netmap:", 7) && !strchr(ifname, '{') 530 && !strchr(ifname, '}')) { 531 532 len = strlen(ifname); 533 534 if (len > IFNAMSIZ) { 535 D("'%s' too long, cannot ask for MTU", ifname); 536 return -1; 537 } 538 539 s = socket(AF_INET, SOCK_DGRAM, 0); 540 if (s < 0) { 541 D("socket() failed: %s", strerror(errno)); 542 return s; 543 } 544 545 memset(&ifreq, 0, sizeof(ifreq)); 546 memcpy(ifreq.ifr_name, ifname, len); 547 548 ret = ioctl(s, SIOCGIFMTU, &ifreq); 549 if (ret) { 550 D("ioctl(SIOCGIFMTU) failed: %s", strerror(errno)); 551 } 552 553 close(s); 554 555 return ifreq.ifr_mtu; 556 } 557 558 /* This is a pipe or a VALE port, where the MTU is very large, 559 * so we use some practical limit. */ 560 return 65536; 561 } 562 563 static struct targ *targs; 564 static int global_nthreads; 565 566 /* control-C handler */ 567 static void 568 sigint_h(int sig) 569 { 570 int i; 571 572 (void)sig; /* UNUSED */ 573 D("received control-C on thread %p", (void *)pthread_self()); 574 for (i = 0; i < global_nthreads; i++) { 575 targs[i].cancel = 1; 576 } 577 } 578 579 /* sysctl wrapper to return the number of active CPUs */ 580 static int 581 system_ncpus(void) 582 { 583 int ncpus; 584 #if defined (__FreeBSD__) 585 int mib[2] = { CTL_HW, HW_NCPU }; 586 size_t len = sizeof(mib); 587 sysctl(mib, 2, &ncpus, &len, NULL, 0); 588 #elif defined(linux) 589 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 590 #elif defined(_WIN32) 591 { 592 SYSTEM_INFO sysinfo; 593 GetSystemInfo(&sysinfo); 594 ncpus = sysinfo.dwNumberOfProcessors; 595 } 596 #else /* others */ 597 ncpus = 1; 598 #endif /* others */ 599 return (ncpus); 600 } 601 602 #ifdef __linux__ 603 #define sockaddr_dl sockaddr_ll 604 #define sdl_family sll_family 605 #define AF_LINK AF_PACKET 606 #define LLADDR(s) s->sll_addr; 607 #include <linux/if_tun.h> 608 #define TAP_CLONEDEV "/dev/net/tun" 609 #endif /* __linux__ */ 610 611 #ifdef __FreeBSD__ 612 #include <net/if_tun.h> 613 #define TAP_CLONEDEV "/dev/tap" 614 #endif /* __FreeBSD */ 615 616 #ifdef __APPLE__ 617 // #warning TAP not supported on apple ? 618 #include <net/if_utun.h> 619 #define TAP_CLONEDEV "/dev/tap" 620 #endif /* __APPLE__ */ 621 622 623 /* 624 * parse the vale configuration in conf and put it in nmr. 625 * Return the flag set if necessary. 626 * The configuration may consist of 1 to 4 numbers separated 627 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 628 * Missing numbers or zeroes stand for default values. 629 * As an additional convenience, if exactly one number 630 * is specified, then this is assigned to both #tx-slots and #rx-slots. 631 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 632 * and #rx-rings. 633 */ 634 static int 635 parse_nmr_config(const char* conf, struct nmreq_register *nmr) 636 { 637 char *w, *tok; 638 int i, v; 639 640 if (conf == NULL || ! *conf) 641 return 0; 642 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 643 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 644 w = strdup(conf); 645 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 646 v = atoi(tok); 647 switch (i) { 648 case 0: 649 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 650 break; 651 case 1: 652 nmr->nr_rx_slots = v; 653 break; 654 case 2: 655 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 656 break; 657 case 3: 658 nmr->nr_rx_rings = v; 659 break; 660 default: 661 D("ignored config: %s", tok); 662 break; 663 } 664 } 665 D("txr %d txd %d rxr %d rxd %d", 666 nmr->nr_tx_rings, nmr->nr_tx_slots, 667 nmr->nr_rx_rings, nmr->nr_rx_slots); 668 free(w); 669 return 0; 670 } 671 672 673 /* 674 * locate the src mac address for our interface, put it 675 * into the user-supplied buffer. return 0 if ok, -1 on error. 676 */ 677 static int 678 source_hwaddr(const char *ifname, char *buf) 679 { 680 struct ifaddrs *ifaphead, *ifap; 681 682 if (getifaddrs(&ifaphead) != 0) { 683 D("getifaddrs %s failed", ifname); 684 return (-1); 685 } 686 687 /* remove 'netmap:' prefix before comparing interfaces */ 688 if (!strncmp(ifname, "netmap:", 7)) 689 ifname = &ifname[7]; 690 691 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 692 struct sockaddr_dl *sdl = 693 (struct sockaddr_dl *)ifap->ifa_addr; 694 uint8_t *mac; 695 696 if (!sdl || sdl->sdl_family != AF_LINK) 697 continue; 698 if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0) 699 continue; 700 mac = (uint8_t *)LLADDR(sdl); 701 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 702 mac[0], mac[1], mac[2], 703 mac[3], mac[4], mac[5]); 704 if (verbose) 705 D("source hwaddr %s", buf); 706 break; 707 } 708 freeifaddrs(ifaphead); 709 return ifap ? 0 : 1; 710 } 711 712 713 /* set the thread affinity. */ 714 static int 715 setaffinity(pthread_t me, int i) 716 { 717 cpuset_t cpumask; 718 719 if (i == -1) 720 return 0; 721 722 /* Set thread affinity affinity.*/ 723 CPU_ZERO(&cpumask); 724 CPU_SET(i, &cpumask); 725 726 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 727 D("Unable to set affinity: %s", strerror(errno)); 728 return 1; 729 } 730 return 0; 731 } 732 733 734 /* Compute the checksum of the given ip header. */ 735 static uint32_t 736 checksum(const void *data, uint16_t len, uint32_t sum) 737 { 738 const uint8_t *addr = data; 739 uint32_t i; 740 741 /* Checksum all the pairs of bytes first... */ 742 for (i = 0; i < (len & ~1U); i += 2) { 743 sum += (uint16_t)ntohs(*((const uint16_t *)(addr + i))); 744 if (sum > 0xFFFF) 745 sum -= 0xFFFF; 746 } 747 /* 748 * If there's a single byte left over, checksum it, too. 749 * Network byte order is big-endian, so the remaining byte is 750 * the high byte. 751 */ 752 if (i < len) { 753 sum += addr[i] << 8; 754 if (sum > 0xFFFF) 755 sum -= 0xFFFF; 756 } 757 return sum; 758 } 759 760 static uint16_t 761 wrapsum(uint32_t sum) 762 { 763 sum = ~sum & 0xFFFF; 764 return (htons(sum)); 765 } 766 767 /* Check the payload of the packet for errors (use it for debug). 768 * Look for consecutive ascii representations of the size of the packet. 769 */ 770 static void 771 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur) 772 { 773 char buf[128]; 774 int i, j, i0; 775 const unsigned char *p = (const unsigned char *)_p; 776 777 /* get the length in ASCII of the length of the packet. */ 778 779 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 780 ring, cur, ring->slot[cur].buf_idx, 781 ring->slot[cur].flags, len); 782 /* hexdump routine */ 783 for (i = 0; i < len; ) { 784 memset(buf, ' ', sizeof(buf)); 785 sprintf(buf, "%5d: ", i); 786 i0 = i; 787 for (j=0; j < 16 && i < len; i++, j++) 788 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 789 i = i0; 790 for (j=0; j < 16 && i < len; i++, j++) 791 sprintf(buf+7+j + 48, "%c", 792 isprint(p[i]) ? p[i] : '.'); 793 printf("%s\n", buf); 794 } 795 } 796 797 /* 798 * Fill a packet with some payload. 799 * We create a UDP packet so the payload starts at 800 * 14+20+8 = 42 bytes. 801 */ 802 #ifdef __linux__ 803 #define uh_sport source 804 #define uh_dport dest 805 #define uh_ulen len 806 #define uh_sum check 807 #endif /* linux */ 808 809 static uint16_t 810 new_ip_sum(uint16_t ip_sum, uint32_t oaddr, uint32_t naddr) 811 { 812 ip_sum = cksum_add(ip_sum, ~oaddr >> 16); 813 ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff); 814 ip_sum = cksum_add(ip_sum, naddr >> 16); 815 ip_sum = cksum_add(ip_sum, naddr & 0xffff); 816 return ip_sum; 817 } 818 819 static uint16_t 820 new_udp_sum(uint16_t udp_sum, uint16_t oport, uint16_t nport) 821 { 822 udp_sum = cksum_add(udp_sum, ~oport); 823 udp_sum = cksum_add(udp_sum, nport); 824 return udp_sum; 825 } 826 827 828 static void 829 update_ip(struct pkt *pkt, struct targ *t) 830 { 831 struct glob_arg *g = t->g; 832 struct ip ip; 833 struct udphdr udp; 834 uint32_t oaddr, naddr; 835 uint16_t oport, nport; 836 uint16_t ip_sum = 0, udp_sum = 0; 837 838 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 839 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp)); 840 do { 841 ip_sum = udp_sum = 0; 842 naddr = oaddr = ntohl(ip.ip_src.s_addr); 843 nport = oport = ntohs(udp.uh_sport); 844 if (g->options & OPT_RANDOM_SRC) { 845 ip.ip_src.s_addr = nrand48(t->seed); 846 udp.uh_sport = nrand48(t->seed); 847 naddr = ntohl(ip.ip_src.s_addr); 848 nport = ntohs(udp.uh_sport); 849 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 850 udp_sum = new_udp_sum(udp_sum, oport, nport); 851 } else { 852 if (oport < g->src_ip.port1) { 853 nport = oport + 1; 854 udp.uh_sport = htons(nport); 855 udp_sum = new_udp_sum(udp_sum, oport, nport); 856 break; 857 } 858 nport = g->src_ip.port0; 859 udp.uh_sport = htons(nport); 860 if (oaddr < g->src_ip.ipv4.end) { 861 naddr = oaddr + 1; 862 ip.ip_src.s_addr = htonl(naddr); 863 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 864 break; 865 } 866 naddr = g->src_ip.ipv4.start; 867 ip.ip_src.s_addr = htonl(naddr); 868 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 869 } 870 871 naddr = oaddr = ntohl(ip.ip_dst.s_addr); 872 nport = oport = ntohs(udp.uh_dport); 873 if (g->options & OPT_RANDOM_DST) { 874 ip.ip_dst.s_addr = nrand48(t->seed); 875 udp.uh_dport = nrand48(t->seed); 876 naddr = ntohl(ip.ip_dst.s_addr); 877 nport = ntohs(udp.uh_dport); 878 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 879 udp_sum = new_udp_sum(udp_sum, oport, nport); 880 } else { 881 if (oport < g->dst_ip.port1) { 882 nport = oport + 1; 883 udp.uh_dport = htons(nport); 884 udp_sum = new_udp_sum(udp_sum, oport, nport); 885 break; 886 } 887 nport = g->dst_ip.port0; 888 udp.uh_dport = htons(nport); 889 if (oaddr < g->dst_ip.ipv4.end) { 890 naddr = oaddr + 1; 891 ip.ip_dst.s_addr = htonl(naddr); 892 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 893 break; 894 } 895 naddr = g->dst_ip.ipv4.start; 896 ip.ip_dst.s_addr = htonl(naddr); 897 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 898 } 899 } while (0); 900 /* update checksums */ 901 if (udp_sum != 0) 902 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum)); 903 if (ip_sum != 0) { 904 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum)); 905 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum)); 906 } 907 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 908 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp)); 909 } 910 911 #ifndef s6_addr16 912 #define s6_addr16 __u6_addr.__u6_addr16 913 #endif 914 static void 915 update_ip6(struct pkt *pkt, struct targ *t) 916 { 917 struct glob_arg *g = t->g; 918 struct ip6_hdr ip6; 919 struct udphdr udp; 920 uint16_t udp_sum; 921 uint16_t oaddr, naddr; 922 uint16_t oport, nport; 923 uint8_t group; 924 925 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6)); 926 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp)); 927 do { 928 udp_sum = 0; 929 group = g->src_ip.ipv6.sgroup; 930 naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]); 931 nport = oport = ntohs(udp.uh_sport); 932 if (g->options & OPT_RANDOM_SRC) { 933 ip6.ip6_src.s6_addr16[group] = nrand48(t->seed); 934 udp.uh_sport = nrand48(t->seed); 935 naddr = ntohs(ip6.ip6_src.s6_addr16[group]); 936 nport = ntohs(udp.uh_sport); 937 break; 938 } 939 if (oport < g->src_ip.port1) { 940 nport = oport + 1; 941 udp.uh_sport = htons(nport); 942 break; 943 } 944 nport = g->src_ip.port0; 945 udp.uh_sport = htons(nport); 946 if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) { 947 naddr = oaddr + 1; 948 ip6.ip6_src.s6_addr16[group] = htons(naddr); 949 break; 950 } 951 naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]); 952 ip6.ip6_src.s6_addr16[group] = htons(naddr); 953 954 /* update checksums if needed */ 955 if (oaddr != naddr) 956 udp_sum = cksum_add(~oaddr, naddr); 957 if (oport != nport) 958 udp_sum = cksum_add(udp_sum, 959 cksum_add(~oport, nport)); 960 961 group = g->dst_ip.ipv6.egroup; 962 naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 963 nport = oport = ntohs(udp.uh_dport); 964 if (g->options & OPT_RANDOM_DST) { 965 ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed); 966 udp.uh_dport = nrand48(t->seed); 967 naddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 968 nport = ntohs(udp.uh_dport); 969 break; 970 } 971 if (oport < g->dst_ip.port1) { 972 nport = oport + 1; 973 udp.uh_dport = htons(nport); 974 break; 975 } 976 nport = g->dst_ip.port0; 977 udp.uh_dport = htons(nport); 978 if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) { 979 naddr = oaddr + 1; 980 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 981 break; 982 } 983 naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]); 984 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 985 } while (0); 986 /* update checksums */ 987 if (oaddr != naddr) 988 udp_sum = cksum_add(udp_sum, 989 cksum_add(~oaddr, naddr)); 990 if (oport != nport) 991 udp_sum = cksum_add(udp_sum, 992 cksum_add(~oport, nport)); 993 if (udp_sum != 0) 994 udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum); 995 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 996 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp)); 997 } 998 999 static void 1000 update_addresses(struct pkt *pkt, struct targ *t) 1001 { 1002 1003 if (t->g->af == AF_INET) 1004 update_ip(pkt, t); 1005 else 1006 update_ip6(pkt, t); 1007 } 1008 /* 1009 * initialize one packet and prepare for the next one. 1010 * The copy could be done better instead of repeating it each time. 1011 */ 1012 static void 1013 initialize_packet(struct targ *targ) 1014 { 1015 struct pkt *pkt = &targ->pkt; 1016 struct ether_header *eh; 1017 struct ip6_hdr ip6; 1018 struct ip ip; 1019 struct udphdr udp; 1020 void *udp_ptr; 1021 uint16_t paylen; 1022 uint32_t csum = 0; 1023 const char *payload = targ->g->options & OPT_INDIRECT ? 1024 indirect_payload : default_payload; 1025 int i, l0 = strlen(payload); 1026 1027 #ifndef NO_PCAP 1028 char errbuf[PCAP_ERRBUF_SIZE]; 1029 pcap_t *file; 1030 struct pcap_pkthdr *header; 1031 const unsigned char *packet; 1032 1033 /* Read a packet from a PCAP file if asked. */ 1034 if (targ->g->packet_file != NULL) { 1035 if ((file = pcap_open_offline(targ->g->packet_file, 1036 errbuf)) == NULL) 1037 D("failed to open pcap file %s", 1038 targ->g->packet_file); 1039 if (pcap_next_ex(file, &header, &packet) < 0) 1040 D("failed to read packet from %s", 1041 targ->g->packet_file); 1042 if ((targ->frame = malloc(header->caplen)) == NULL) 1043 D("out of memory"); 1044 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 1045 targ->g->pkt_size = header->caplen; 1046 pcap_close(file); 1047 return; 1048 } 1049 #endif 1050 1051 paylen = targ->g->pkt_size - sizeof(*eh) - 1052 (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6)); 1053 1054 /* create a nice NUL-terminated string */ 1055 for (i = 0; i < paylen; i += l0) { 1056 if (l0 > paylen - i) 1057 l0 = paylen - i; // last round 1058 bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0); 1059 } 1060 PKT(pkt, body, targ->g->af)[i - 1] = '\0'; 1061 1062 /* prepare the headers */ 1063 eh = &pkt->eh; 1064 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 1065 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 1066 1067 if (targ->g->af == AF_INET) { 1068 eh->ether_type = htons(ETHERTYPE_IP); 1069 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 1070 udp_ptr = &pkt->ipv4.udp; 1071 ip.ip_v = IPVERSION; 1072 ip.ip_hl = sizeof(ip) >> 2; 1073 ip.ip_id = 0; 1074 ip.ip_tos = IPTOS_LOWDELAY; 1075 ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh)); 1076 ip.ip_id = 0; 1077 ip.ip_off = htons(IP_DF); /* Don't fragment */ 1078 ip.ip_ttl = IPDEFTTL; 1079 ip.ip_p = IPPROTO_UDP; 1080 ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start); 1081 ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start); 1082 ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0)); 1083 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1084 } else { 1085 eh->ether_type = htons(ETHERTYPE_IPV6); 1086 memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6)); 1087 udp_ptr = &pkt->ipv6.udp; 1088 ip6.ip6_flow = 0; 1089 ip6.ip6_plen = htons(paylen); 1090 ip6.ip6_vfc = IPV6_VERSION; 1091 ip6.ip6_nxt = IPPROTO_UDP; 1092 ip6.ip6_hlim = IPV6_DEFHLIM; 1093 ip6.ip6_src = targ->g->src_ip.ipv6.start; 1094 ip6.ip6_dst = targ->g->dst_ip.ipv6.start; 1095 } 1096 memcpy(&udp, udp_ptr, sizeof(udp)); 1097 1098 udp.uh_sport = htons(targ->g->src_ip.port0); 1099 udp.uh_dport = htons(targ->g->dst_ip.port0); 1100 udp.uh_ulen = htons(paylen); 1101 if (targ->g->af == AF_INET) { 1102 /* Magic: taken from sbin/dhclient/packet.c */ 1103 udp.uh_sum = wrapsum( 1104 checksum(&udp, sizeof(udp), /* udp header */ 1105 checksum(pkt->ipv4.body, /* udp payload */ 1106 paylen - sizeof(udp), 1107 checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */ 1108 2 * sizeof(pkt->ipv4.ip.ip_src), 1109 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen))))); 1110 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1111 } else { 1112 /* Save part of pseudo header checksum into csum */ 1113 csum = IPPROTO_UDP << 24; 1114 csum = checksum(&csum, sizeof(csum), paylen); 1115 udp.uh_sum = wrapsum( 1116 checksum(udp_ptr, sizeof(udp), /* udp header */ 1117 checksum(pkt->ipv6.body, /* udp payload */ 1118 paylen - sizeof(udp), 1119 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */ 1120 2 * sizeof(pkt->ipv6.ip.ip6_src), csum)))); 1121 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 1122 } 1123 memcpy(udp_ptr, &udp, sizeof(udp)); 1124 1125 bzero(&pkt->vh, sizeof(pkt->vh)); 1126 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 1127 } 1128 1129 static void 1130 get_vnet_hdr_len(struct glob_arg *g) 1131 { 1132 struct nmreq_header hdr; 1133 struct nmreq_port_hdr ph; 1134 int err; 1135 1136 hdr = g->nmd->hdr; /* copy name and version */ 1137 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_GET; 1138 hdr.nr_options = 0; 1139 memset(&ph, 0, sizeof(ph)); 1140 hdr.nr_body = (uintptr_t)&ph; 1141 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1142 if (err) { 1143 D("Unable to get virtio-net header length"); 1144 return; 1145 } 1146 1147 g->virt_header = ph.nr_hdr_len; 1148 if (g->virt_header) { 1149 D("Port requires virtio-net header, length = %d", 1150 g->virt_header); 1151 } 1152 } 1153 1154 static void 1155 set_vnet_hdr_len(struct glob_arg *g) 1156 { 1157 int err, l = g->virt_header; 1158 struct nmreq_header hdr; 1159 struct nmreq_port_hdr ph; 1160 1161 if (l == 0) 1162 return; 1163 1164 hdr = g->nmd->hdr; /* copy name and version */ 1165 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_SET; 1166 hdr.nr_options = 0; 1167 memset(&ph, 0, sizeof(ph)); 1168 hdr.nr_body = (uintptr_t)&ph; 1169 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1170 if (err) { 1171 D("Unable to set virtio-net header length %d", l); 1172 } 1173 } 1174 1175 /* 1176 * create and enqueue a batch of packets on a ring. 1177 * On the last one set NS_REPORT to tell the driver to generate 1178 * an interrupt when done. 1179 */ 1180 static int 1181 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 1182 int size, struct targ *t, u_int count, int options) 1183 { 1184 u_int n, sent, head = ring->head; 1185 u_int frags = t->frags; 1186 u_int frag_size = t->frag_size; 1187 struct netmap_slot *slot = &ring->slot[head]; 1188 1189 n = nm_ring_space(ring); 1190 #if 0 1191 if (options & (OPT_COPY | OPT_PREFETCH) ) { 1192 for (sent = 0; sent < count; sent++) { 1193 struct netmap_slot *slot = &ring->slot[head]; 1194 char *p = NETMAP_BUF(ring, slot->buf_idx); 1195 1196 __builtin_prefetch(p); 1197 head = nm_ring_next(ring, head); 1198 } 1199 head = ring->head; 1200 } 1201 #endif 1202 for (sent = 0; sent < count && n >= frags; sent++, n--) { 1203 char *p; 1204 int buf_changed; 1205 u_int tosend = size; 1206 1207 slot = &ring->slot[head]; 1208 p = NETMAP_BUF(ring, slot->buf_idx); 1209 buf_changed = slot->flags & NS_BUF_CHANGED; 1210 1211 slot->flags = 0; 1212 if (options & OPT_RUBBISH) { 1213 /* do nothing */ 1214 } else if (options & OPT_INDIRECT) { 1215 slot->flags |= NS_INDIRECT; 1216 slot->ptr = (uint64_t)((uintptr_t)frame); 1217 } else if (frags > 1) { 1218 u_int i; 1219 const char *f = frame; 1220 char *fp = p; 1221 for (i = 0; i < frags - 1; i++) { 1222 memcpy(fp, f, frag_size); 1223 slot->len = frag_size; 1224 slot->flags = NS_MOREFRAG; 1225 if (options & OPT_DUMP) 1226 dump_payload(fp, frag_size, ring, head); 1227 tosend -= frag_size; 1228 f += frag_size; 1229 head = nm_ring_next(ring, head); 1230 slot = &ring->slot[head]; 1231 fp = NETMAP_BUF(ring, slot->buf_idx); 1232 } 1233 n -= (frags - 1); 1234 p = fp; 1235 slot->flags = 0; 1236 memcpy(p, f, tosend); 1237 update_addresses(pkt, t); 1238 } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) { 1239 if (options & OPT_COPY) 1240 nm_pkt_copy(frame, p, size); 1241 else 1242 memcpy(p, frame, size); 1243 update_addresses(pkt, t); 1244 } else if (options & OPT_PREFETCH) { 1245 __builtin_prefetch(p); 1246 } 1247 slot->len = tosend; 1248 if (options & OPT_DUMP) 1249 dump_payload(p, tosend, ring, head); 1250 head = nm_ring_next(ring, head); 1251 } 1252 if (sent) { 1253 slot->flags |= NS_REPORT; 1254 ring->head = ring->cur = head; 1255 } 1256 if (sent < count) { 1257 /* tell netmap that we need more slots */ 1258 ring->cur = ring->tail; 1259 } 1260 1261 return (sent); 1262 } 1263 1264 /* 1265 * Index of the highest bit set 1266 */ 1267 static uint32_t 1268 msb64(uint64_t x) 1269 { 1270 uint64_t m = 1ULL << 63; 1271 int i; 1272 1273 for (i = 63; i >= 0; i--, m >>=1) 1274 if (m & x) 1275 return i; 1276 return 0; 1277 } 1278 1279 /* 1280 * wait until ts, either busy or sleeping if more than 1ms. 1281 * Return wakeup time. 1282 */ 1283 static struct timespec 1284 wait_time(struct timespec ts) 1285 { 1286 for (;;) { 1287 struct timespec w, cur; 1288 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1289 w = timespec_sub(ts, cur); 1290 if (w.tv_sec < 0) 1291 return cur; 1292 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1293 poll(NULL, 0, 1); 1294 } 1295 } 1296 1297 /* 1298 * Send a packet, and wait for a response. 1299 * The payload (after UDP header, ofs 42) has a 4-byte sequence 1300 * followed by a struct timeval (or bintime?) 1301 */ 1302 1303 static void * 1304 ping_body(void *data) 1305 { 1306 struct targ *targ = (struct targ *) data; 1307 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1308 struct netmap_if *nifp = targ->nmd->nifp; 1309 int i, m, rx = 0; 1310 void *frame; 1311 int size; 1312 struct timespec ts, now, last_print; 1313 struct timespec nexttime = {0, 0}; /* silence compiler */ 1314 uint64_t sent = 0, n = targ->g->npackets; 1315 uint64_t count = 0, t_cur, t_min = ~0, av = 0; 1316 uint64_t g_min = ~0, g_av = 0; 1317 uint64_t buckets[64]; /* bins for delays, ns */ 1318 int rate_limit = targ->g->tx_rate, tosend = 0; 1319 1320 frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header; 1321 size = targ->g->pkt_size + targ->g->virt_header; 1322 1323 1324 if (targ->g->nthreads > 1) { 1325 D("can only ping with 1 thread"); 1326 return NULL; 1327 } 1328 1329 if (targ->g->af == AF_INET6) { 1330 D("Warning: ping-pong with IPv6 not supported"); 1331 } 1332 1333 bzero(&buckets, sizeof(buckets)); 1334 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 1335 now = last_print; 1336 if (rate_limit) { 1337 targ->tic = timespec_add(now, (struct timespec){2,0}); 1338 targ->tic.tv_nsec = 0; 1339 wait_time(targ->tic); 1340 nexttime = targ->tic; 1341 } 1342 while (!targ->cancel && (n == 0 || sent < n)) { 1343 struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1344 struct netmap_slot *slot; 1345 char *p; 1346 int rv; 1347 uint64_t limit, event = 0; 1348 1349 if (rate_limit && tosend <= 0) { 1350 tosend = targ->g->burst; 1351 nexttime = timespec_add(nexttime, targ->g->tx_period); 1352 wait_time(nexttime); 1353 } 1354 1355 limit = rate_limit ? tosend : targ->g->burst; 1356 if (n > 0 && n - sent < limit) 1357 limit = n - sent; 1358 for (m = 0; (unsigned)m < limit; m++) { 1359 slot = &ring->slot[ring->head]; 1360 slot->len = size; 1361 p = NETMAP_BUF(ring, slot->buf_idx); 1362 1363 if (nm_ring_empty(ring)) { 1364 D("-- ouch, cannot send"); 1365 break; 1366 } else { 1367 struct tstamp *tp; 1368 nm_pkt_copy(frame, p, size); 1369 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 1370 bcopy(&sent, p+42, sizeof(sent)); 1371 tp = (struct tstamp *)(p+46); 1372 tp->sec = (uint32_t)ts.tv_sec; 1373 tp->nsec = (uint32_t)ts.tv_nsec; 1374 sent++; 1375 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1376 } 1377 } 1378 if (m > 0) 1379 event++; 1380 targ->ctr.pkts = sent; 1381 targ->ctr.bytes = sent*size; 1382 targ->ctr.events = event; 1383 if (rate_limit) 1384 tosend -= m; 1385 #ifdef BUSYWAIT 1386 rv = ioctl(pfd.fd, NIOCTXSYNC, NULL); 1387 if (rv < 0) { 1388 D("TXSYNC error on queue %d: %s", targ->me, 1389 strerror(errno)); 1390 } 1391 again: 1392 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1393 #else 1394 /* should use a parameter to decide how often to send */ 1395 if ( (rv = poll(&pfd, 1, 3000)) <= 0) { 1396 D("poll error on queue %d: %s", targ->me, 1397 (rv ? strerror(errno) : "timeout")); 1398 continue; 1399 } 1400 #endif /* BUSYWAIT */ 1401 /* see what we got back */ 1402 rx = 0; 1403 for (i = targ->nmd->first_rx_ring; 1404 i <= targ->nmd->last_rx_ring; i++) { 1405 ring = NETMAP_RXRING(nifp, i); 1406 while (!nm_ring_empty(ring)) { 1407 uint32_t seq; 1408 struct tstamp *tp; 1409 int pos; 1410 1411 slot = &ring->slot[ring->head]; 1412 p = NETMAP_BUF(ring, slot->buf_idx); 1413 1414 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 1415 bcopy(p+42, &seq, sizeof(seq)); 1416 tp = (struct tstamp *)(p+46); 1417 ts.tv_sec = (time_t)tp->sec; 1418 ts.tv_nsec = (long)tp->nsec; 1419 ts.tv_sec = now.tv_sec - ts.tv_sec; 1420 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 1421 if (ts.tv_nsec < 0) { 1422 ts.tv_nsec += 1000000000; 1423 ts.tv_sec--; 1424 } 1425 if (0) D("seq %d/%llu delta %d.%09d", seq, 1426 (unsigned long long)sent, 1427 (int)ts.tv_sec, (int)ts.tv_nsec); 1428 t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec; 1429 if (t_cur < t_min) 1430 t_min = t_cur; 1431 count ++; 1432 av += t_cur; 1433 pos = msb64(t_cur); 1434 buckets[pos]++; 1435 /* now store it in a bucket */ 1436 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1437 rx++; 1438 } 1439 } 1440 //D("tx %d rx %d", sent, rx); 1441 //usleep(100000); 1442 ts.tv_sec = now.tv_sec - last_print.tv_sec; 1443 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 1444 if (ts.tv_nsec < 0) { 1445 ts.tv_nsec += 1000000000; 1446 ts.tv_sec--; 1447 } 1448 if (ts.tv_sec >= 1) { 1449 D("count %d RTT: min %d av %d ns", 1450 (int)count, (int)t_min, (int)(av/count)); 1451 int k, j, kmin, off; 1452 char buf[512]; 1453 1454 for (kmin = 0; kmin < 64; kmin ++) 1455 if (buckets[kmin]) 1456 break; 1457 for (k = 63; k >= kmin; k--) 1458 if (buckets[k]) 1459 break; 1460 buf[0] = '\0'; 1461 off = 0; 1462 for (j = kmin; j <= k; j++) { 1463 off += sprintf(buf + off, " %5d", (int)buckets[j]); 1464 } 1465 D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf); 1466 bzero(&buckets, sizeof(buckets)); 1467 count = 0; 1468 g_av += av; 1469 av = 0; 1470 if (t_min < g_min) 1471 g_min = t_min; 1472 t_min = ~0; 1473 last_print = now; 1474 } 1475 #ifdef BUSYWAIT 1476 if (rx < m && ts.tv_sec <= 3 && !targ->cancel) 1477 goto again; 1478 #endif /* BUSYWAIT */ 1479 } 1480 1481 if (sent > 0) { 1482 D("RTT over %llu packets: min %d av %d ns", 1483 (long long unsigned)sent, (int)g_min, 1484 (int)((double)g_av/sent)); 1485 } 1486 targ->completed = 1; 1487 1488 /* reset the ``used`` flag. */ 1489 targ->used = 0; 1490 1491 return NULL; 1492 } 1493 1494 1495 /* 1496 * reply to ping requests 1497 */ 1498 static void * 1499 pong_body(void *data) 1500 { 1501 struct targ *targ = (struct targ *) data; 1502 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1503 struct netmap_if *nifp = targ->nmd->nifp; 1504 struct netmap_ring *txring, *rxring; 1505 int i, rx = 0; 1506 uint64_t sent = 0, n = targ->g->npackets; 1507 1508 if (targ->g->nthreads > 1) { 1509 D("can only reply ping with 1 thread"); 1510 return NULL; 1511 } 1512 if (n > 0) 1513 D("understood ponger %llu but don't know how to do it", 1514 (unsigned long long)n); 1515 1516 if (targ->g->af == AF_INET6) { 1517 D("Warning: ping-pong with IPv6 not supported"); 1518 } 1519 1520 while (!targ->cancel && (n == 0 || sent < n)) { 1521 uint32_t txhead, txavail; 1522 //#define BUSYWAIT 1523 #ifdef BUSYWAIT 1524 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1525 #else 1526 int rv; 1527 if ( (rv = poll(&pfd, 1, 1000)) <= 0) { 1528 D("poll error on queue %d: %s", targ->me, 1529 rv ? strerror(errno) : "timeout"); 1530 continue; 1531 } 1532 #endif 1533 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1534 txhead = txring->head; 1535 txavail = nm_ring_space(txring); 1536 /* see what we got back */ 1537 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1538 rxring = NETMAP_RXRING(nifp, i); 1539 while (!nm_ring_empty(rxring)) { 1540 uint16_t *spkt, *dpkt; 1541 uint32_t head = rxring->head; 1542 struct netmap_slot *slot = &rxring->slot[head]; 1543 char *src, *dst; 1544 src = NETMAP_BUF(rxring, slot->buf_idx); 1545 //D("got pkt %p of size %d", src, slot->len); 1546 rxring->head = rxring->cur = nm_ring_next(rxring, head); 1547 rx++; 1548 if (txavail == 0) 1549 continue; 1550 dst = NETMAP_BUF(txring, 1551 txring->slot[txhead].buf_idx); 1552 /* copy... */ 1553 dpkt = (uint16_t *)dst; 1554 spkt = (uint16_t *)src; 1555 nm_pkt_copy(src, dst, slot->len); 1556 /* swap source and destination MAC */ 1557 dpkt[0] = spkt[3]; 1558 dpkt[1] = spkt[4]; 1559 dpkt[2] = spkt[5]; 1560 dpkt[3] = spkt[0]; 1561 dpkt[4] = spkt[1]; 1562 dpkt[5] = spkt[2]; 1563 /* swap source and destination IPv4 */ 1564 if (spkt[6] == htons(ETHERTYPE_IP)) { 1565 dpkt[13] = spkt[15]; 1566 dpkt[14] = spkt[16]; 1567 dpkt[15] = spkt[13]; 1568 dpkt[16] = spkt[14]; 1569 } 1570 txring->slot[txhead].len = slot->len; 1571 //dump_payload(dst, slot->len, txring, txhead); 1572 txhead = nm_ring_next(txring, txhead); 1573 txavail--; 1574 sent++; 1575 } 1576 } 1577 txring->head = txring->cur = txhead; 1578 targ->ctr.pkts = sent; 1579 #ifdef BUSYWAIT 1580 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1581 #endif 1582 //D("tx %d rx %d", sent, rx); 1583 } 1584 1585 targ->completed = 1; 1586 1587 /* reset the ``used`` flag. */ 1588 targ->used = 0; 1589 1590 return NULL; 1591 } 1592 1593 1594 static void * 1595 sender_body(void *data) 1596 { 1597 struct targ *targ = (struct targ *) data; 1598 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1599 struct netmap_if *nifp; 1600 struct netmap_ring *txring = NULL; 1601 int i; 1602 uint64_t n = targ->g->npackets / targ->g->nthreads; 1603 uint64_t sent = 0; 1604 uint64_t event = 0; 1605 int options = targ->g->options | OPT_COPY; 1606 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1607 int rate_limit = targ->g->tx_rate; 1608 struct pkt *pkt = &targ->pkt; 1609 void *frame; 1610 int size; 1611 1612 if (targ->frame == NULL) { 1613 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1614 size = targ->g->pkt_size + targ->g->virt_header; 1615 } else { 1616 frame = targ->frame; 1617 size = targ->g->pkt_size; 1618 } 1619 1620 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1621 if (setaffinity(targ->thread, targ->affinity)) 1622 goto quit; 1623 1624 /* main loop.*/ 1625 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1626 if (rate_limit) { 1627 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1628 targ->tic.tv_nsec = 0; 1629 wait_time(targ->tic); 1630 nexttime = targ->tic; 1631 } 1632 if (targ->g->dev_type == DEV_TAP) { 1633 D("writing to file desc %d", targ->g->main_fd); 1634 1635 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1636 if (write(targ->g->main_fd, frame, size) != -1) 1637 sent++; 1638 update_addresses(pkt, targ); 1639 if (i > 10000) { 1640 targ->ctr.pkts = sent; 1641 targ->ctr.bytes = sent*size; 1642 targ->ctr.events = sent; 1643 i = 0; 1644 } 1645 } 1646 #ifndef NO_PCAP 1647 } else if (targ->g->dev_type == DEV_PCAP) { 1648 pcap_t *p = targ->g->p; 1649 1650 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1651 if (pcap_inject(p, frame, size) != -1) 1652 sent++; 1653 update_addresses(pkt, targ); 1654 if (i > 10000) { 1655 targ->ctr.pkts = sent; 1656 targ->ctr.bytes = sent*size; 1657 targ->ctr.events = sent; 1658 i = 0; 1659 } 1660 } 1661 #endif /* NO_PCAP */ 1662 } else { 1663 int tosend = 0; 1664 u_int bufsz, frag_size = targ->g->frag_size; 1665 1666 nifp = targ->nmd->nifp; 1667 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1668 bufsz = txring->nr_buf_size; 1669 if (bufsz < frag_size) 1670 frag_size = bufsz; 1671 targ->frag_size = targ->g->pkt_size / targ->frags; 1672 if (targ->frag_size > frag_size) { 1673 targ->frags = targ->g->pkt_size / frag_size; 1674 targ->frag_size = frag_size; 1675 if (targ->g->pkt_size % frag_size != 0) 1676 targ->frags++; 1677 } 1678 D("frags %u frag_size %u", targ->frags, targ->frag_size); 1679 while (!targ->cancel && (n == 0 || sent < n)) { 1680 int rv; 1681 1682 if (rate_limit && tosend <= 0) { 1683 tosend = targ->g->burst; 1684 nexttime = timespec_add(nexttime, targ->g->tx_period); 1685 wait_time(nexttime); 1686 } 1687 1688 /* 1689 * wait for available room in the send queue(s) 1690 */ 1691 #ifdef BUSYWAIT 1692 (void)rv; 1693 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 1694 D("ioctl error on queue %d: %s", targ->me, 1695 strerror(errno)); 1696 goto quit; 1697 } 1698 #else /* !BUSYWAIT */ 1699 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 1700 if (targ->cancel) 1701 break; 1702 D("poll error on queue %d: %s", targ->me, 1703 rv ? strerror(errno) : "timeout"); 1704 // goto quit; 1705 } 1706 if (pfd.revents & POLLERR) { 1707 D("poll error on %d ring %d-%d", pfd.fd, 1708 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1709 goto quit; 1710 } 1711 #endif /* !BUSYWAIT */ 1712 /* 1713 * scan our queues and send on those with room 1714 */ 1715 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1716 D("drop copy"); 1717 options &= ~OPT_COPY; 1718 } 1719 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1720 int m; 1721 uint64_t limit = rate_limit ? tosend : targ->g->burst; 1722 1723 if (n > 0 && n == sent) 1724 break; 1725 1726 if (n > 0 && n - sent < limit) 1727 limit = n - sent; 1728 txring = NETMAP_TXRING(nifp, i); 1729 if (nm_ring_empty(txring)) 1730 continue; 1731 1732 if (targ->g->pkt_min_size > 0) { 1733 size = nrand48(targ->seed) % 1734 (targ->g->pkt_size - targ->g->pkt_min_size) + 1735 targ->g->pkt_min_size; 1736 } 1737 m = send_packets(txring, pkt, frame, size, targ, 1738 limit, options); 1739 ND("limit %lu tail %d m %d", 1740 limit, txring->tail, m); 1741 sent += m; 1742 if (m > 0) //XXX-ste: can m be 0? 1743 event++; 1744 targ->ctr.pkts = sent; 1745 targ->ctr.bytes += m*size; 1746 targ->ctr.events = event; 1747 if (rate_limit) { 1748 tosend -= m; 1749 if (tosend <= 0) 1750 break; 1751 } 1752 } 1753 } 1754 /* flush any remaining packets */ 1755 if (txring != NULL) { 1756 D("flush tail %d head %d on thread %p", 1757 txring->tail, txring->head, 1758 (void *)pthread_self()); 1759 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1760 } 1761 1762 /* final part: wait all the TX queues to be empty. */ 1763 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1764 txring = NETMAP_TXRING(nifp, i); 1765 while (!targ->cancel && nm_tx_pending(txring)) { 1766 RD(5, "pending tx tail %d head %d on ring %d", 1767 txring->tail, txring->head, i); 1768 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1769 usleep(1); /* wait 1 tick */ 1770 } 1771 } 1772 } /* end DEV_NETMAP */ 1773 1774 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1775 targ->completed = 1; 1776 targ->ctr.pkts = sent; 1777 targ->ctr.bytes = sent*size; 1778 targ->ctr.events = event; 1779 quit: 1780 /* reset the ``used`` flag. */ 1781 targ->used = 0; 1782 1783 return (NULL); 1784 } 1785 1786 1787 #ifndef NO_PCAP 1788 static void 1789 receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1790 const u_char * bytes) 1791 { 1792 struct my_ctrs *ctr = (struct my_ctrs *)user; 1793 (void)bytes; /* UNUSED */ 1794 ctr->bytes += h->len; 1795 ctr->pkts++; 1796 } 1797 #endif /* !NO_PCAP */ 1798 1799 1800 static int 1801 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes) 1802 { 1803 u_int head, rx, n; 1804 uint64_t b = 0; 1805 u_int complete = 0; 1806 1807 if (bytes == NULL) 1808 bytes = &b; 1809 1810 head = ring->head; 1811 n = nm_ring_space(ring); 1812 if (n < limit) 1813 limit = n; 1814 for (rx = 0; rx < limit; rx++) { 1815 struct netmap_slot *slot = &ring->slot[head]; 1816 char *p = NETMAP_BUF(ring, slot->buf_idx); 1817 1818 *bytes += slot->len; 1819 if (dump) 1820 dump_payload(p, slot->len, ring, head); 1821 if (!(slot->flags & NS_MOREFRAG)) 1822 complete++; 1823 1824 head = nm_ring_next(ring, head); 1825 } 1826 ring->head = ring->cur = head; 1827 1828 return (complete); 1829 } 1830 1831 static void * 1832 receiver_body(void *data) 1833 { 1834 struct targ *targ = (struct targ *) data; 1835 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1836 struct netmap_if *nifp; 1837 struct netmap_ring *rxring; 1838 int i; 1839 struct my_ctrs cur; 1840 uint64_t n = targ->g->npackets / targ->g->nthreads; 1841 1842 memset(&cur, 0, sizeof(cur)); 1843 1844 if (setaffinity(targ->thread, targ->affinity)) 1845 goto quit; 1846 1847 D("reading from %s fd %d main_fd %d", 1848 targ->g->ifname, targ->fd, targ->g->main_fd); 1849 /* unbounded wait for the first packet. */ 1850 for (;!targ->cancel;) { 1851 i = poll(&pfd, 1, 1000); 1852 if (i > 0 && !(pfd.revents & POLLERR)) 1853 break; 1854 if (i < 0) { 1855 D("poll() error: %s", strerror(errno)); 1856 goto quit; 1857 } 1858 if (pfd.revents & POLLERR) { 1859 D("fd error"); 1860 goto quit; 1861 } 1862 RD(1, "waiting for initial packets, poll returns %d %d", 1863 i, pfd.revents); 1864 } 1865 /* main loop, exit after 1s silence */ 1866 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1867 if (targ->g->dev_type == DEV_TAP) { 1868 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1869 char buf[MAX_BODYSIZE]; 1870 /* XXX should we poll ? */ 1871 i = read(targ->g->main_fd, buf, sizeof(buf)); 1872 if (i > 0) { 1873 targ->ctr.pkts++; 1874 targ->ctr.bytes += i; 1875 targ->ctr.events++; 1876 } 1877 } 1878 #ifndef NO_PCAP 1879 } else if (targ->g->dev_type == DEV_PCAP) { 1880 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1881 /* XXX should we poll ? */ 1882 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1883 (u_char *)&targ->ctr); 1884 targ->ctr.events++; 1885 } 1886 #endif /* !NO_PCAP */ 1887 } else { 1888 int dump = targ->g->options & OPT_DUMP; 1889 1890 nifp = targ->nmd->nifp; 1891 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1892 /* Once we started to receive packets, wait at most 1 seconds 1893 before quitting. */ 1894 #ifdef BUSYWAIT 1895 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 1896 D("ioctl error on queue %d: %s", targ->me, 1897 strerror(errno)); 1898 goto quit; 1899 } 1900 #else /* !BUSYWAIT */ 1901 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1902 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1903 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1904 goto out; 1905 } 1906 1907 if (pfd.revents & POLLERR) { 1908 D("poll err"); 1909 goto quit; 1910 } 1911 #endif /* !BUSYWAIT */ 1912 uint64_t cur_space = 0; 1913 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1914 int m; 1915 1916 rxring = NETMAP_RXRING(nifp, i); 1917 /* compute free space in the ring */ 1918 m = rxring->head + rxring->num_slots - rxring->tail; 1919 if (m >= (int) rxring->num_slots) 1920 m -= rxring->num_slots; 1921 cur_space += m; 1922 if (nm_ring_empty(rxring)) 1923 continue; 1924 1925 m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes); 1926 cur.pkts += m; 1927 if (m > 0) 1928 cur.events++; 1929 } 1930 cur.min_space = targ->ctr.min_space; 1931 if (cur_space < cur.min_space) 1932 cur.min_space = cur_space; 1933 targ->ctr = cur; 1934 } 1935 } 1936 1937 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1938 1939 #if !defined(BUSYWAIT) 1940 out: 1941 #endif 1942 targ->completed = 1; 1943 targ->ctr = cur; 1944 1945 quit: 1946 /* reset the ``used`` flag. */ 1947 targ->used = 0; 1948 1949 return (NULL); 1950 } 1951 1952 static void * 1953 txseq_body(void *data) 1954 { 1955 struct targ *targ = (struct targ *) data; 1956 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1957 struct netmap_ring *ring; 1958 int64_t sent = 0; 1959 uint64_t event = 0; 1960 int options = targ->g->options | OPT_COPY; 1961 struct timespec nexttime = {0, 0}; 1962 int rate_limit = targ->g->tx_rate; 1963 struct pkt *pkt = &targ->pkt; 1964 int frags = targ->g->frags; 1965 uint32_t sequence = 0; 1966 int budget = 0; 1967 void *frame; 1968 int size; 1969 1970 if (targ->g->nthreads > 1) { 1971 D("can only txseq ping with 1 thread"); 1972 return NULL; 1973 } 1974 1975 if (targ->g->npackets > 0) { 1976 D("Ignoring -n argument"); 1977 } 1978 1979 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1980 size = targ->g->pkt_size + targ->g->virt_header; 1981 1982 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1983 if (setaffinity(targ->thread, targ->affinity)) 1984 goto quit; 1985 1986 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1987 if (rate_limit) { 1988 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1989 targ->tic.tv_nsec = 0; 1990 wait_time(targ->tic); 1991 nexttime = targ->tic; 1992 } 1993 1994 /* Only use the first queue. */ 1995 ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring); 1996 1997 while (!targ->cancel) { 1998 int64_t limit; 1999 unsigned int space; 2000 unsigned int head; 2001 int fcnt; 2002 uint16_t sum = 0; 2003 int rv; 2004 2005 if (!rate_limit) { 2006 budget = targ->g->burst; 2007 2008 } else if (budget <= 0) { 2009 budget = targ->g->burst; 2010 nexttime = timespec_add(nexttime, targ->g->tx_period); 2011 wait_time(nexttime); 2012 } 2013 2014 /* wait for available room in the send queue */ 2015 #ifdef BUSYWAIT 2016 (void)rv; 2017 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 2018 D("ioctl error on queue %d: %s", targ->me, 2019 strerror(errno)); 2020 goto quit; 2021 } 2022 #else /* !BUSYWAIT */ 2023 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 2024 if (targ->cancel) 2025 break; 2026 D("poll error on queue %d: %s", targ->me, 2027 rv ? strerror(errno) : "timeout"); 2028 // goto quit; 2029 } 2030 if (pfd.revents & POLLERR) { 2031 D("poll error on %d ring %d-%d", pfd.fd, 2032 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 2033 goto quit; 2034 } 2035 #endif /* !BUSYWAIT */ 2036 2037 /* If no room poll() again. */ 2038 space = nm_ring_space(ring); 2039 if (!space) { 2040 continue; 2041 } 2042 2043 limit = budget; 2044 2045 if (space < limit) { 2046 limit = space; 2047 } 2048 2049 /* Cut off ``limit`` to make sure is multiple of ``frags``. */ 2050 if (frags > 1) { 2051 limit = (limit / frags) * frags; 2052 } 2053 2054 limit = sent + limit; /* Convert to absolute. */ 2055 2056 for (fcnt = frags, head = ring->head; 2057 sent < limit; sent++, sequence++) { 2058 struct netmap_slot *slot = &ring->slot[head]; 2059 char *p = NETMAP_BUF(ring, slot->buf_idx); 2060 uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t; 2061 2062 memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum)); 2063 2064 slot->flags = 0; 2065 t = *w; 2066 PKT(pkt, body, targ->g->af)[0] = sequence >> 24; 2067 PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff; 2068 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2069 t = *++w; 2070 PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff; 2071 PKT(pkt, body, targ->g->af)[3] = sequence & 0xff; 2072 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2073 memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum)); 2074 nm_pkt_copy(frame, p, size); 2075 if (fcnt == frags) { 2076 update_addresses(pkt, targ); 2077 } 2078 2079 if (options & OPT_DUMP) { 2080 dump_payload(p, size, ring, head); 2081 } 2082 2083 slot->len = size; 2084 2085 if (--fcnt > 0) { 2086 slot->flags |= NS_MOREFRAG; 2087 } else { 2088 fcnt = frags; 2089 } 2090 2091 if (sent == limit - 1) { 2092 /* Make sure we don't push an incomplete 2093 * packet. */ 2094 assert(!(slot->flags & NS_MOREFRAG)); 2095 slot->flags |= NS_REPORT; 2096 } 2097 2098 head = nm_ring_next(ring, head); 2099 if (rate_limit) { 2100 budget--; 2101 } 2102 } 2103 2104 ring->cur = ring->head = head; 2105 2106 event ++; 2107 targ->ctr.pkts = sent; 2108 targ->ctr.bytes = sent * size; 2109 targ->ctr.events = event; 2110 } 2111 2112 /* flush any remaining packets */ 2113 D("flush tail %d head %d on thread %p", 2114 ring->tail, ring->head, 2115 (void *)pthread_self()); 2116 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2117 2118 /* final part: wait the TX queues to become empty. */ 2119 while (!targ->cancel && nm_tx_pending(ring)) { 2120 RD(5, "pending tx tail %d head %d on ring %d", 2121 ring->tail, ring->head, targ->nmd->first_tx_ring); 2122 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2123 usleep(1); /* wait 1 tick */ 2124 } 2125 2126 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2127 targ->completed = 1; 2128 targ->ctr.pkts = sent; 2129 targ->ctr.bytes = sent * size; 2130 targ->ctr.events = event; 2131 quit: 2132 /* reset the ``used`` flag. */ 2133 targ->used = 0; 2134 2135 return (NULL); 2136 } 2137 2138 2139 static char * 2140 multi_slot_to_string(struct netmap_ring *ring, unsigned int head, 2141 unsigned int nfrags, char *strbuf, size_t strbuflen) 2142 { 2143 unsigned int f; 2144 char *ret = strbuf; 2145 2146 for (f = 0; f < nfrags; f++) { 2147 struct netmap_slot *slot = &ring->slot[head]; 2148 int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len, 2149 slot->flags); 2150 if (m >= (int)strbuflen) { 2151 break; 2152 } 2153 strbuf += m; 2154 strbuflen -= m; 2155 2156 head = nm_ring_next(ring, head); 2157 } 2158 2159 return ret; 2160 } 2161 2162 static void * 2163 rxseq_body(void *data) 2164 { 2165 struct targ *targ = (struct targ *) data; 2166 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 2167 int dump = targ->g->options & OPT_DUMP; 2168 struct netmap_ring *ring; 2169 unsigned int frags_exp = 1; 2170 struct my_ctrs cur; 2171 unsigned int frags = 0; 2172 int first_packet = 1; 2173 int first_slot = 1; 2174 int i, j, af, nrings; 2175 uint32_t seq, *seq_exp = NULL; 2176 2177 memset(&cur, 0, sizeof(cur)); 2178 2179 if (setaffinity(targ->thread, targ->affinity)) 2180 goto quit; 2181 2182 nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1; 2183 seq_exp = calloc(nrings, sizeof(uint32_t)); 2184 if (seq_exp == NULL) { 2185 D("failed to allocate seq array"); 2186 goto quit; 2187 } 2188 2189 D("reading from %s fd %d main_fd %d", 2190 targ->g->ifname, targ->fd, targ->g->main_fd); 2191 /* unbounded wait for the first packet. */ 2192 for (;!targ->cancel;) { 2193 i = poll(&pfd, 1, 1000); 2194 if (i > 0 && !(pfd.revents & POLLERR)) 2195 break; 2196 RD(1, "waiting for initial packets, poll returns %d %d", 2197 i, pfd.revents); 2198 } 2199 2200 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 2201 2202 2203 while (!targ->cancel) { 2204 unsigned int head; 2205 int limit; 2206 2207 #ifdef BUSYWAIT 2208 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 2209 D("ioctl error on queue %d: %s", targ->me, 2210 strerror(errno)); 2211 goto quit; 2212 } 2213 #else /* !BUSYWAIT */ 2214 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 2215 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2216 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 2217 goto out; 2218 } 2219 2220 if (pfd.revents & POLLERR) { 2221 D("poll err"); 2222 goto quit; 2223 } 2224 #endif /* !BUSYWAIT */ 2225 2226 for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) { 2227 ring = NETMAP_RXRING(targ->nmd->nifp, j); 2228 if (nm_ring_empty(ring)) 2229 continue; 2230 2231 limit = nm_ring_space(ring); 2232 if (limit > targ->g->burst) 2233 limit = targ->g->burst; 2234 2235 #if 0 2236 /* Enable this if 2237 * 1) we remove the early-return optimization from 2238 * the netmap poll implementation, or 2239 * 2) pipes get NS_MOREFRAG support. 2240 * With the current netmap implementation, an experiment like 2241 * pkt-gen -i vale:1{1 -f txseq -F 9 2242 * pkt-gen -i vale:1}1 -f rxseq 2243 * would get stuck as soon as we find nm_ring_space(ring) < 9, 2244 * since here limit is rounded to 0 and 2245 * pipe rxsync is not called anymore by the poll() of this loop. 2246 */ 2247 if (frags_exp > 1) { 2248 int o = limit; 2249 /* Cut off to the closest smaller multiple. */ 2250 limit = (limit / frags_exp) * frags_exp; 2251 RD(2, "LIMIT %d --> %d", o, limit); 2252 } 2253 #endif 2254 2255 for (head = ring->head, i = 0; i < limit; i++) { 2256 struct netmap_slot *slot = &ring->slot[head]; 2257 char *p = NETMAP_BUF(ring, slot->buf_idx); 2258 int len = slot->len; 2259 struct pkt *pkt; 2260 2261 if (dump) { 2262 dump_payload(p, slot->len, ring, head); 2263 } 2264 2265 frags++; 2266 if (!(slot->flags & NS_MOREFRAG)) { 2267 if (first_packet) { 2268 first_packet = 0; 2269 } else if (frags != frags_exp) { 2270 char prbuf[512]; 2271 RD(1, "Received packets with %u frags, " 2272 "expected %u, '%s'", frags, frags_exp, 2273 multi_slot_to_string(ring, head-frags+1, 2274 frags, 2275 prbuf, sizeof(prbuf))); 2276 } 2277 first_packet = 0; 2278 frags_exp = frags; 2279 frags = 0; 2280 } 2281 2282 p -= sizeof(pkt->vh) - targ->g->virt_header; 2283 len += sizeof(pkt->vh) - targ->g->virt_header; 2284 pkt = (struct pkt *)p; 2285 if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP) 2286 af = AF_INET; 2287 else 2288 af = AF_INET6; 2289 2290 if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) + 2291 sizeof(seq)) { 2292 RD(1, "%s: packet too small (len=%u)", __func__, 2293 slot->len); 2294 } else { 2295 seq = (PKT(pkt, body, af)[0] << 24) | 2296 (PKT(pkt, body, af)[1] << 16) | 2297 (PKT(pkt, body, af)[2] << 8) | 2298 PKT(pkt, body, af)[3]; 2299 if (first_slot) { 2300 /* Grab the first one, whatever it 2301 is. */ 2302 seq_exp[j] = seq; 2303 first_slot = 0; 2304 } else if (seq != seq_exp[j]) { 2305 uint32_t delta = seq - seq_exp[j]; 2306 2307 if (delta < (0xFFFFFFFF >> 1)) { 2308 RD(2, "Sequence GAP: exp %u found %u", 2309 seq_exp[j], seq); 2310 } else { 2311 RD(2, "Sequence OUT OF ORDER: " 2312 "exp %u found %u", seq_exp[j], seq); 2313 } 2314 seq_exp[j] = seq; 2315 } 2316 seq_exp[j]++; 2317 } 2318 2319 cur.bytes += slot->len; 2320 head = nm_ring_next(ring, head); 2321 cur.pkts++; 2322 } 2323 2324 ring->cur = ring->head = head; 2325 2326 cur.events++; 2327 targ->ctr = cur; 2328 } 2329 } 2330 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2331 2332 #ifndef BUSYWAIT 2333 out: 2334 #endif /* !BUSYWAIT */ 2335 targ->completed = 1; 2336 targ->ctr = cur; 2337 2338 quit: 2339 if (seq_exp != NULL) 2340 free(seq_exp); 2341 /* reset the ``used`` flag. */ 2342 targ->used = 0; 2343 2344 return (NULL); 2345 } 2346 2347 2348 static void 2349 tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg) 2350 { 2351 double bw, raw_bw, pps, abs; 2352 char b1[40], b2[80], b3[80]; 2353 int size; 2354 2355 if (cur->pkts == 0) { 2356 printf("%s nothing.\n", msg); 2357 return; 2358 } 2359 2360 size = (int)(cur->bytes / cur->pkts); 2361 2362 printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n", 2363 msg, 2364 (unsigned long long)cur->pkts, 2365 (unsigned long long)cur->bytes, 2366 (unsigned long long)cur->events, size, delta); 2367 if (delta == 0) 2368 delta = 1e-6; 2369 if (size < 60) /* correct for min packet size */ 2370 size = 60; 2371 pps = cur->pkts / delta; 2372 bw = (8.0 * cur->bytes) / delta; 2373 raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta; 2374 abs = cur->pkts / (double)(cur->events); 2375 2376 printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n", 2377 norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs); 2378 } 2379 2380 static void 2381 usage(int errcode) 2382 { 2383 /* This usage is generated from the pkt-gen man page: 2384 * $ man pkt-gen > x 2385 * and pasted here adding the string terminators and endlines with simple 2386 * regular expressions. */ 2387 const char *cmd = "pkt-gen"; 2388 fprintf(stderr, 2389 "Usage:\n" 2390 "%s arguments\n" 2391 " -h Show program usage and exit.\n" 2392 "\n" 2393 " -i interface\n" 2394 " Name of the network interface that pkt-gen operates on. It can be a system network interface\n" 2395 " (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n" 2396 " monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n" 2397 " mented in netmap(4) (NIOCREGIF section).\n" 2398 "\n" 2399 " -f function\n" 2400 " The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n" 2401 " for client-side ping-pong operation, and pong for server-side ping-pong operation.\n" 2402 "\n" 2403 " -n count\n" 2404 " Number of iterations of the pkt-gen function (with 0 meaning infinite). In case of tx or rx,\n" 2405 " count is the number of packets to receive or transmit. In case of ping or pong, count is the\n" 2406 " number of ping-pong transactions.\n" 2407 "\n" 2408 " -l pkt_size\n" 2409 " Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n" 2410 " equal than the second one and lower than the first one.\n" 2411 "\n" 2412 " -b burst_size\n" 2413 " Transmit or receive up to burst_size packets at a time.\n" 2414 "\n" 2415 " -4 Use IPv4 addresses.\n" 2416 "\n" 2417 " -6 Use IPv6 addresses.\n" 2418 "\n" 2419 " -d dst_ip[:port[-dst_ip:port]]\n" 2420 " Destination IPv4/IPv6 address and port, single or range.\n" 2421 "\n" 2422 " -s src_ip[:port[-src_ip:port]]\n" 2423 " Source IPv4/IPv6 address and port, single or range.\n" 2424 "\n" 2425 " -D dst_mac\n" 2426 " Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n" 2427 "\n" 2428 " -S src_mac\n" 2429 " Source MAC address in colon notation.\n" 2430 "\n" 2431 " -a cpu_id\n" 2432 " Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n" 2433 " threads are used, they are pinned to the subsequent CPUs, one per thread.\n" 2434 "\n" 2435 " -c cpus\n" 2436 " Maximum number of CPUs to use (0 means to use all the available ones).\n" 2437 "\n" 2438 " -p threads\n" 2439 " Number of threads to use. By default, only a single thread is used to handle all the netmap\n" 2440 " rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n" 2441 " single RX ring (in rx mode), or a TX/RX ring pair. The number of threads must be less than or\n" 2442 " equal to the number of TX (or RX) rings available in the device specified by interface.\n" 2443 "\n" 2444 " -T report_ms\n" 2445 " Number of milliseconds between reports.\n" 2446 "\n" 2447 " -w wait_for_link_time\n" 2448 " Number of seconds to wait before starting the pkt-gen function, useful to make sure that the\n" 2449 " network link is up. A network device driver may take some time to enter netmap mode, or to\n" 2450 " create a new transmit/receive ring pair when netmap(4) requests one.\n" 2451 "\n" 2452 " -R rate\n" 2453 " Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n" 2454 " packets as quickly as possible. On servers from 2010 onward netmap(4) is able to com-\n" 2455 " pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n" 2456 " your intention is to saturate the link.\n" 2457 "\n" 2458 " -X Dump payload of each packet transmitted or received.\n" 2459 "\n" 2460 " -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n" 2461 " only used with Virtual Machine technologies that use virtio as a network interface.\n" 2462 "\n" 2463 " -P file\n" 2464 " Load the packet to be transmitted from a pcap file rather than constructing it within\n" 2465 " pkt-gen.\n" 2466 "\n" 2467 " -z Use random IPv4/IPv6 src address/port.\n" 2468 "\n" 2469 " -Z Use random IPv4/IPv6 dst address/port.\n" 2470 "\n" 2471 " -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n" 2472 "\n" 2473 " -F num_frags\n" 2474 " Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n" 2475 " sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n" 2476 " last slot). This is useful to transmit or receive packets larger than the netmap buffer\n" 2477 " size.\n" 2478 "\n" 2479 " -M frag_size\n" 2480 " In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n" 2481 " length divided by num_frags.\n" 2482 "\n" 2483 " -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n" 2484 " by setting the NS_INDIRECT flag in the netmap slots.\n" 2485 "\n" 2486 " -W Exit immediately if all the RX rings are empty the first time they are examined.\n" 2487 "\n" 2488 " -v Increase the verbosity level.\n" 2489 "\n" 2490 " -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n" 2491 " netmap buffers is (rubbish mode).\n" 2492 "\n" 2493 " -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n" 2494 "\n" 2495 " -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n" 2496 " of CRC and 20 bytes of framing to each packet.\n" 2497 "\n" 2498 " -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n" 2499 " Configuration in terms of number of rings and slots to be used when opening the netmap port.\n" 2500 " Such configuration has an effect on software ports created on the fly, such as VALE ports and\n" 2501 " netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n" 2502 " rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n" 2503 " additional convenience, if exactly one number is specified, then this is assigned to both\n" 2504 " tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n" 2505 " tx_rings and rx_rings.\n" 2506 "\n" 2507 " -o options data generation options (parsed using atoi)\n" 2508 " OPT_PREFETCH 1\n" 2509 " OPT_ACCESS 2\n" 2510 " OPT_COPY 4\n" 2511 " OPT_MEMCPY 8\n" 2512 " OPT_TS 16 (add a timestamp)\n" 2513 " OPT_INDIRECT 32 (use indirect buffers)\n" 2514 " OPT_DUMP 64 (dump rx/tx traffic)\n" 2515 " OPT_RUBBISH 256\n" 2516 " (send whatever the buffers contain)\n" 2517 " OPT_RANDOM_SRC 512\n" 2518 " OPT_RANDOM_DST 1024\n" 2519 " OPT_PPS_STATS 2048\n" 2520 "", 2521 cmd); 2522 exit(errcode); 2523 } 2524 2525 static int 2526 start_threads(struct glob_arg *g) { 2527 int i; 2528 2529 targs = calloc(g->nthreads, sizeof(*targs)); 2530 struct targ *t; 2531 /* 2532 * Now create the desired number of threads, each one 2533 * using a single descriptor. 2534 */ 2535 for (i = 0; i < g->nthreads; i++) { 2536 uint64_t seed = (uint64_t)time(0) | ((uint64_t)time(0) << 32); 2537 t = &targs[i]; 2538 2539 bzero(t, sizeof(*t)); 2540 t->fd = -1; /* default, with pcap */ 2541 t->g = g; 2542 memcpy(t->seed, &seed, sizeof(t->seed)); 2543 2544 if (g->dev_type == DEV_NETMAP) { 2545 int m = -1; 2546 2547 /* 2548 * if the user wants both HW and SW rings, we need to 2549 * know when to switch from NR_REG_ONE_NIC to NR_REG_ONE_SW 2550 */ 2551 if (g->orig_mode == NR_REG_NIC_SW) { 2552 m = (g->td_type == TD_TYPE_RECEIVER ? 2553 g->nmd->reg.nr_rx_rings : 2554 g->nmd->reg.nr_tx_rings); 2555 } 2556 2557 if (i > 0) { 2558 int j; 2559 /* the first thread uses the fd opened by the main 2560 * thread, the other threads re-open /dev/netmap 2561 */ 2562 t->nmd = nmport_clone(g->nmd); 2563 if (t->nmd == NULL) 2564 return -1; 2565 2566 j = i; 2567 if (m > 0 && j >= m) { 2568 /* switch to the software rings */ 2569 t->nmd->reg.nr_mode = NR_REG_ONE_SW; 2570 j -= m; 2571 } 2572 t->nmd->reg.nr_ringid = j & NETMAP_RING_MASK; 2573 /* Only touch one of the rings (rx is already ok) */ 2574 if (g->td_type == TD_TYPE_RECEIVER) 2575 t->nmd->reg.nr_flags |= NETMAP_NO_TX_POLL; 2576 2577 /* register interface. Override ifname and ringid etc. */ 2578 if (nmport_open_desc(t->nmd) < 0) { 2579 nmport_undo_prepare(t->nmd); 2580 t->nmd = NULL; 2581 return -1; 2582 } 2583 } else { 2584 t->nmd = g->nmd; 2585 } 2586 t->fd = t->nmd->fd; 2587 t->frags = g->frags; 2588 } else { 2589 targs[i].fd = g->main_fd; 2590 } 2591 t->used = 1; 2592 t->me = i; 2593 if (g->affinity >= 0) { 2594 t->affinity = (g->affinity + i) % g->cpus; 2595 } else { 2596 t->affinity = -1; 2597 } 2598 /* default, init packets */ 2599 initialize_packet(t); 2600 } 2601 /* Wait for PHY reset. */ 2602 D("Wait %d secs for phy reset", g->wait_link); 2603 sleep(g->wait_link); 2604 D("Ready..."); 2605 2606 for (i = 0; i < g->nthreads; i++) { 2607 t = &targs[i]; 2608 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 2609 D("Unable to create thread %d: %s", i, strerror(errno)); 2610 t->used = 0; 2611 } 2612 } 2613 return 0; 2614 } 2615 2616 static void 2617 main_thread(struct glob_arg *g) 2618 { 2619 int i; 2620 2621 struct my_ctrs prev, cur; 2622 double delta_t; 2623 struct timeval tic, toc; 2624 2625 prev.pkts = prev.bytes = prev.events = 0; 2626 gettimeofday(&prev.t, NULL); 2627 for (;;) { 2628 char b1[40], b2[40], b3[40], b4[100]; 2629 uint64_t pps, usec; 2630 struct my_ctrs x; 2631 double abs; 2632 int done = 0; 2633 2634 usec = wait_for_next_report(&prev.t, &cur.t, 2635 g->report_interval); 2636 2637 cur.pkts = cur.bytes = cur.events = 0; 2638 cur.min_space = 0; 2639 if (usec < 10000) /* too short to be meaningful */ 2640 continue; 2641 /* accumulate counts for all threads */ 2642 for (i = 0; i < g->nthreads; i++) { 2643 cur.pkts += targs[i].ctr.pkts; 2644 cur.bytes += targs[i].ctr.bytes; 2645 cur.events += targs[i].ctr.events; 2646 cur.min_space += targs[i].ctr.min_space; 2647 targs[i].ctr.min_space = 99999; 2648 if (targs[i].used == 0) 2649 done++; 2650 } 2651 x.pkts = cur.pkts - prev.pkts; 2652 x.bytes = cur.bytes - prev.bytes; 2653 x.events = cur.events - prev.events; 2654 pps = (x.pkts*1000000 + usec/2) / usec; 2655 abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0; 2656 2657 if (!(g->options & OPT_PPS_STATS)) { 2658 strcpy(b4, ""); 2659 } else { 2660 /* Compute some pps stats using a sliding window. */ 2661 double ppsavg = 0.0, ppsdev = 0.0; 2662 int nsamples = 0; 2663 2664 g->win[g->win_idx] = pps; 2665 g->win_idx = (g->win_idx + 1) % STATS_WIN; 2666 2667 for (i = 0; i < STATS_WIN; i++) { 2668 ppsavg += g->win[i]; 2669 if (g->win[i]) { 2670 nsamples ++; 2671 } 2672 } 2673 ppsavg /= nsamples; 2674 2675 for (i = 0; i < STATS_WIN; i++) { 2676 if (g->win[i] == 0) { 2677 continue; 2678 } 2679 ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg); 2680 } 2681 ppsdev /= nsamples; 2682 ppsdev = sqrt(ppsdev); 2683 2684 snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]", 2685 norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize)); 2686 } 2687 2688 D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space", 2689 norm(b1, pps, normalize), b4, 2690 norm(b2, (double)x.pkts, normalize), 2691 norm(b3, 1000000*((double)x.bytes*8+(double)x.pkts*g->framing)/usec, normalize), 2692 (unsigned long long)usec, 2693 abs, (int)cur.min_space); 2694 prev = cur; 2695 2696 if (done == g->nthreads) 2697 break; 2698 } 2699 2700 timerclear(&tic); 2701 timerclear(&toc); 2702 cur.pkts = cur.bytes = cur.events = 0; 2703 /* final round */ 2704 for (i = 0; i < g->nthreads; i++) { 2705 struct timespec t_tic, t_toc; 2706 /* 2707 * Join active threads, unregister interfaces and close 2708 * file descriptors. 2709 */ 2710 if (targs[i].used) 2711 pthread_join(targs[i].thread, NULL); /* blocking */ 2712 if (g->dev_type == DEV_NETMAP) { 2713 nmport_close(targs[i].nmd); 2714 targs[i].nmd = NULL; 2715 } else { 2716 close(targs[i].fd); 2717 } 2718 2719 if (targs[i].completed == 0) 2720 D("ouch, thread %d exited with error", i); 2721 2722 /* 2723 * Collect threads output and extract information about 2724 * how long it took to send all the packets. 2725 */ 2726 cur.pkts += targs[i].ctr.pkts; 2727 cur.bytes += targs[i].ctr.bytes; 2728 cur.events += targs[i].ctr.events; 2729 /* collect the largest start (tic) and end (toc) times, 2730 * XXX maybe we should do the earliest tic, or do a weighted 2731 * average ? 2732 */ 2733 t_tic = timeval2spec(&tic); 2734 t_toc = timeval2spec(&toc); 2735 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 2736 tic = timespec2val(&targs[i].tic); 2737 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 2738 toc = timespec2val(&targs[i].toc); 2739 } 2740 2741 /* print output. */ 2742 timersub(&toc, &tic, &toc); 2743 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 2744 if (g->td_type == TD_TYPE_SENDER) 2745 tx_output(g, &cur, delta_t, "Sent"); 2746 else if (g->td_type == TD_TYPE_RECEIVER) 2747 tx_output(g, &cur, delta_t, "Received"); 2748 } 2749 2750 struct td_desc { 2751 int ty; 2752 const char *key; 2753 void *f; 2754 int default_burst; 2755 }; 2756 2757 static struct td_desc func[] = { 2758 { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */ 2759 { TD_TYPE_SENDER, "tx", sender_body, 512 }, 2760 { TD_TYPE_OTHER, "ping", ping_body, 1 }, 2761 { TD_TYPE_OTHER, "pong", pong_body, 1 }, 2762 { TD_TYPE_SENDER, "txseq", txseq_body, 512 }, 2763 { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 }, 2764 { 0, NULL, NULL, 0 } 2765 }; 2766 2767 static int 2768 tap_alloc(char *dev) 2769 { 2770 struct ifreq ifr; 2771 int fd, err; 2772 const char *clonedev = TAP_CLONEDEV; 2773 2774 (void)err; 2775 (void)dev; 2776 /* Arguments taken by the function: 2777 * 2778 * char *dev: the name of an interface (or '\0'). MUST have enough 2779 * space to hold the interface name if '\0' is passed 2780 * int flags: interface flags (eg, IFF_TUN etc.) 2781 */ 2782 2783 #ifdef __FreeBSD__ 2784 if (dev[3]) { /* tapSomething */ 2785 static char buf[128]; 2786 snprintf(buf, sizeof(buf), "/dev/%s", dev); 2787 clonedev = buf; 2788 } 2789 #endif 2790 /* open the device */ 2791 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 2792 return fd; 2793 } 2794 D("%s open successful", clonedev); 2795 2796 /* preparation of the struct ifr, of type "struct ifreq" */ 2797 memset(&ifr, 0, sizeof(ifr)); 2798 2799 #ifdef linux 2800 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 2801 2802 if (*dev) { 2803 /* if a device name was specified, put it in the structure; otherwise, 2804 * the kernel will try to allocate the "next" device of the 2805 * specified type */ 2806 size_t len = strlen(dev); 2807 if (len > IFNAMSIZ) { 2808 D("%s too long", dev); 2809 return -1; 2810 } 2811 memcpy(ifr.ifr_name, dev, len); 2812 } 2813 2814 /* try to create the device */ 2815 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 2816 D("failed to do a TUNSETIFF: %s", strerror(errno)); 2817 close(fd); 2818 return err; 2819 } 2820 2821 /* if the operation was successful, write back the name of the 2822 * interface to the variable "dev", so the caller can know 2823 * it. Note that the caller MUST reserve space in *dev (see calling 2824 * code below) */ 2825 strcpy(dev, ifr.ifr_name); 2826 D("new name is %s", dev); 2827 #endif /* linux */ 2828 2829 /* this is the special file descriptor that the caller will use to talk 2830 * with the virtual interface */ 2831 return fd; 2832 } 2833 2834 int 2835 main(int arc, char **argv) 2836 { 2837 int i; 2838 struct sigaction sa; 2839 sigset_t ss; 2840 2841 struct glob_arg g; 2842 2843 int ch; 2844 int devqueues = 1; /* how many device queues */ 2845 int wait_link_arg = 0; 2846 2847 int pkt_size_done = 0; 2848 2849 struct td_desc *fn = func; 2850 2851 bzero(&g, sizeof(g)); 2852 2853 g.main_fd = -1; 2854 g.td_body = fn->f; 2855 g.td_type = fn->ty; 2856 g.report_interval = 1000; /* report interval */ 2857 g.affinity = -1; 2858 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 2859 g.af = AF_INET; /* default */ 2860 g.src_ip.name = "10.0.0.1"; 2861 g.dst_ip.name = "10.1.0.1"; 2862 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 2863 g.src_mac.name = NULL; 2864 g.pkt_size = 60; 2865 g.pkt_min_size = 0; 2866 g.nthreads = 1; 2867 g.cpus = 1; /* default */ 2868 g.forever = 1; 2869 g.tx_rate = 0; 2870 g.frags = 1; 2871 g.frag_size = (u_int)-1; /* use the netmap buffer size by default */ 2872 g.nmr_config = ""; 2873 g.virt_header = 0; 2874 g.wait_link = 2; /* wait 2 seconds for physical ports */ 2875 2876 while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:" 2877 "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) { 2878 2879 switch(ch) { 2880 default: 2881 D("bad option %c %s", ch, optarg); 2882 usage(-1); 2883 break; 2884 2885 case 'h': 2886 usage(0); 2887 break; 2888 2889 case '4': 2890 g.af = AF_INET; 2891 break; 2892 2893 case '6': 2894 g.af = AF_INET6; 2895 break; 2896 2897 case 'N': 2898 normalize = 0; 2899 break; 2900 2901 case 'n': 2902 g.npackets = strtoull(optarg, NULL, 10); 2903 break; 2904 2905 case 'F': 2906 i = atoi(optarg); 2907 if (i < 1 || i > 63) { 2908 D("invalid frags %d [1..63], ignore", i); 2909 break; 2910 } 2911 g.frags = i; 2912 break; 2913 2914 case 'M': 2915 g.frag_size = atoi(optarg); 2916 break; 2917 2918 case 'f': 2919 for (fn = func; fn->key; fn++) { 2920 if (!strcmp(fn->key, optarg)) 2921 break; 2922 } 2923 if (fn->key) { 2924 g.td_body = fn->f; 2925 g.td_type = fn->ty; 2926 } else { 2927 D("unrecognised function %s", optarg); 2928 } 2929 break; 2930 2931 case 'o': /* data generation options */ 2932 g.options |= atoi(optarg); 2933 break; 2934 2935 case 'a': /* force affinity */ 2936 g.affinity = atoi(optarg); 2937 break; 2938 2939 case 'i': /* interface */ 2940 /* a prefix of tap: netmap: or pcap: forces the mode. 2941 * otherwise we guess 2942 */ 2943 D("interface is %s", optarg); 2944 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 2945 D("ifname too long %s", optarg); 2946 break; 2947 } 2948 strcpy(g.ifname, optarg); 2949 if (!strcmp(optarg, "null")) { 2950 g.dev_type = DEV_NETMAP; 2951 g.dummy_send = 1; 2952 } else if (!strncmp(optarg, "tap:", 4)) { 2953 g.dev_type = DEV_TAP; 2954 strcpy(g.ifname, optarg + 4); 2955 } else if (!strncmp(optarg, "pcap:", 5)) { 2956 g.dev_type = DEV_PCAP; 2957 strcpy(g.ifname, optarg + 5); 2958 } else if (!strncmp(optarg, "netmap:", 7) || 2959 !strncmp(optarg, "vale", 4)) { 2960 g.dev_type = DEV_NETMAP; 2961 } else if (!strncmp(optarg, "tap", 3)) { 2962 g.dev_type = DEV_TAP; 2963 } else { /* prepend netmap: */ 2964 g.dev_type = DEV_NETMAP; 2965 sprintf(g.ifname, "netmap:%s", optarg); 2966 } 2967 break; 2968 2969 case 'I': 2970 g.options |= OPT_INDIRECT; /* use indirect buffers */ 2971 break; 2972 2973 case 'l': /* pkt_size */ 2974 if (pkt_size_done) { 2975 g.pkt_min_size = atoi(optarg); 2976 } else { 2977 g.pkt_size = atoi(optarg); 2978 pkt_size_done = 1; 2979 } 2980 break; 2981 2982 case 'd': 2983 g.dst_ip.name = optarg; 2984 break; 2985 2986 case 's': 2987 g.src_ip.name = optarg; 2988 break; 2989 2990 case 'T': /* report interval */ 2991 g.report_interval = atoi(optarg); 2992 break; 2993 2994 case 'w': 2995 g.wait_link = atoi(optarg); 2996 wait_link_arg = 1; 2997 break; 2998 2999 case 'W': 3000 g.forever = 0; /* exit RX with no traffic */ 3001 break; 3002 3003 case 'b': /* burst */ 3004 g.burst = atoi(optarg); 3005 break; 3006 case 'c': 3007 g.cpus = atoi(optarg); 3008 break; 3009 case 'p': 3010 g.nthreads = atoi(optarg); 3011 break; 3012 3013 case 'D': /* destination mac */ 3014 g.dst_mac.name = optarg; 3015 break; 3016 3017 case 'S': /* source mac */ 3018 g.src_mac.name = optarg; 3019 break; 3020 case 'v': 3021 verbose++; 3022 break; 3023 case 'R': 3024 g.tx_rate = atoi(optarg); 3025 break; 3026 case 'X': 3027 g.options |= OPT_DUMP; 3028 break; 3029 case 'C': 3030 D("WARNING: the 'C' option is deprecated, use the '+conf:' libnetmap option instead"); 3031 g.nmr_config = strdup(optarg); 3032 break; 3033 case 'H': 3034 g.virt_header = atoi(optarg); 3035 break; 3036 case 'P': 3037 g.packet_file = strdup(optarg); 3038 break; 3039 case 'r': 3040 g.options |= OPT_RUBBISH; 3041 break; 3042 case 'z': 3043 g.options |= OPT_RANDOM_SRC; 3044 break; 3045 case 'Z': 3046 g.options |= OPT_RANDOM_DST; 3047 break; 3048 case 'A': 3049 g.options |= OPT_PPS_STATS; 3050 break; 3051 case 'B': 3052 /* raw packets have4 bytes crc + 20 bytes framing */ 3053 // XXX maybe add an option to pass the IFG 3054 g.framing = 24 * 8; 3055 break; 3056 } 3057 } 3058 3059 if (strlen(g.ifname) <=0 ) { 3060 D("missing ifname"); 3061 usage(-1); 3062 } 3063 3064 if (g.burst == 0) { 3065 g.burst = fn->default_burst; 3066 D("using default burst size: %d", g.burst); 3067 } 3068 3069 g.system_cpus = i = system_ncpus(); 3070 if (g.cpus < 0 || g.cpus > i) { 3071 D("%d cpus is too high, have only %d cpus", g.cpus, i); 3072 usage(-1); 3073 } 3074 D("running on %d cpus (have %d)", g.cpus, i); 3075 if (g.cpus == 0) 3076 g.cpus = i; 3077 3078 if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) { 3079 g.wait_link = 0; 3080 } 3081 3082 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 3083 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 3084 usage(-1); 3085 } 3086 3087 if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) { 3088 D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size); 3089 usage(-1); 3090 } 3091 3092 if (g.src_mac.name == NULL) { 3093 static char mybuf[20] = "00:00:00:00:00:00"; 3094 /* retrieve source mac address. */ 3095 if (source_hwaddr(g.ifname, mybuf) == -1) { 3096 D("Unable to retrieve source mac"); 3097 // continue, fail later 3098 } 3099 g.src_mac.name = mybuf; 3100 } 3101 /* extract address ranges */ 3102 if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac)) 3103 usage(-1); 3104 g.options |= extract_ip_range(&g.src_ip, g.af); 3105 g.options |= extract_ip_range(&g.dst_ip, g.af); 3106 3107 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 3108 && g.virt_header != VIRT_HDR_2) { 3109 D("bad virtio-net-header length"); 3110 usage(-1); 3111 } 3112 3113 if (g.dev_type == DEV_TAP) { 3114 D("want to use tap %s", g.ifname); 3115 g.main_fd = tap_alloc(g.ifname); 3116 if (g.main_fd < 0) { 3117 D("cannot open tap %s", g.ifname); 3118 usage(-1); 3119 } 3120 #ifndef NO_PCAP 3121 } else if (g.dev_type == DEV_PCAP) { 3122 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 3123 3124 pcap_errbuf[0] = '\0'; // init the buffer 3125 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 3126 if (g.p == NULL) { 3127 D("cannot open pcap on %s", g.ifname); 3128 usage(-1); 3129 } 3130 g.main_fd = pcap_fileno(g.p); 3131 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 3132 #endif /* !NO_PCAP */ 3133 } else if (g.dummy_send) { /* but DEV_NETMAP */ 3134 D("using a dummy send routine"); 3135 } else { 3136 g.nmd = nmport_prepare(g.ifname); 3137 if (g.nmd == NULL) 3138 goto out; 3139 3140 parse_nmr_config(g.nmr_config, &g.nmd->reg); 3141 3142 g.nmd->reg.nr_flags |= NR_ACCEPT_VNET_HDR; 3143 3144 /* 3145 * Open the netmap device using nm_open(). 3146 * 3147 * protocol stack and may cause a reset of the card, 3148 * which in turn may take some time for the PHY to 3149 * reconfigure. We do the open here to have time to reset. 3150 */ 3151 g.orig_mode = g.nmd->reg.nr_mode; 3152 if (g.nthreads > 1) { 3153 switch (g.orig_mode) { 3154 case NR_REG_ALL_NIC: 3155 case NR_REG_NIC_SW: 3156 g.nmd->reg.nr_mode = NR_REG_ONE_NIC; 3157 break; 3158 case NR_REG_SW: 3159 g.nmd->reg.nr_mode = NR_REG_ONE_SW; 3160 break; 3161 default: 3162 break; 3163 } 3164 g.nmd->reg.nr_ringid = 0; 3165 } 3166 if (nmport_open_desc(g.nmd) < 0) 3167 goto out; 3168 g.main_fd = g.nmd->fd; 3169 ND("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10), 3170 g.nmd->mem); 3171 3172 if (g.virt_header) { 3173 /* Set the virtio-net header length, since the user asked 3174 * for it explicitly. */ 3175 set_vnet_hdr_len(&g); 3176 } else { 3177 /* Check whether the netmap port we opened requires us to send 3178 * and receive frames with virtio-net header. */ 3179 get_vnet_hdr_len(&g); 3180 } 3181 3182 /* get num of queues in tx or rx */ 3183 if (g.td_type == TD_TYPE_SENDER) 3184 devqueues = g.nmd->reg.nr_tx_rings + g.nmd->reg.nr_host_tx_rings; 3185 else 3186 devqueues = g.nmd->reg.nr_rx_rings + g.nmd->reg.nr_host_rx_rings; 3187 3188 /* validate provided nthreads. */ 3189 if (g.nthreads < 1 || g.nthreads > devqueues) { 3190 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 3191 // continue, fail later 3192 } 3193 3194 if (g.td_type == TD_TYPE_SENDER) { 3195 int mtu = get_if_mtu(&g); 3196 3197 if (mtu > 0 && g.pkt_size > mtu) { 3198 D("pkt_size (%d) must be <= mtu (%d)", 3199 g.pkt_size, mtu); 3200 return -1; 3201 } 3202 } 3203 3204 if (verbose) { 3205 struct netmap_if *nifp = g.nmd->nifp; 3206 struct nmreq_register *req = &g.nmd->reg; 3207 3208 D("nifp at offset %"PRIu64" ntxqs %d nrxqs %d memid %d", 3209 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 3210 req->nr_mem_id); 3211 for (i = 0; i < req->nr_tx_rings + req->nr_host_tx_rings; i++) { 3212 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 3213 D(" TX%d at offset %p slots %d", i, 3214 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3215 } 3216 for (i = 0; i < req->nr_rx_rings + req->nr_host_rx_rings; i++) { 3217 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 3218 D(" RX%d at offset %p slots %d", i, 3219 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3220 } 3221 } 3222 3223 /* Print some debug information. */ 3224 fprintf(stdout, 3225 "%s %s: %d queues, %d threads and %d cpus.\n", 3226 (g.td_type == TD_TYPE_SENDER) ? "Sending on" : 3227 ((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" : 3228 "Working on"), 3229 g.ifname, 3230 devqueues, 3231 g.nthreads, 3232 g.cpus); 3233 if (g.td_type == TD_TYPE_SENDER) { 3234 fprintf(stdout, "%s -> %s (%s -> %s)\n", 3235 g.src_ip.name, g.dst_ip.name, 3236 g.src_mac.name, g.dst_mac.name); 3237 } 3238 3239 out: 3240 /* Exit if something went wrong. */ 3241 if (g.main_fd < 0) { 3242 D("aborting"); 3243 usage(-1); 3244 } 3245 } 3246 3247 3248 if (g.options) { 3249 D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n", 3250 g.options & OPT_PREFETCH ? " prefetch" : "", 3251 g.options & OPT_ACCESS ? " access" : "", 3252 g.options & OPT_MEMCPY ? " memcpy" : "", 3253 g.options & OPT_INDIRECT ? " indirect" : "", 3254 g.options & OPT_COPY ? " copy" : "", 3255 g.options & OPT_RUBBISH ? " rubbish " : ""); 3256 } 3257 3258 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 3259 if (g.tx_rate > 0) { 3260 /* try to have at least something every second, 3261 * reducing the burst size to some 0.01s worth of data 3262 * (but no less than one full set of fragments) 3263 */ 3264 uint64_t x; 3265 int lim = (g.tx_rate)/300; 3266 if (g.burst > lim) 3267 g.burst = lim; 3268 if (g.burst == 0) 3269 g.burst = 1; 3270 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 3271 g.tx_period.tv_nsec = x; 3272 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 3273 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 3274 } 3275 if (g.td_type == TD_TYPE_SENDER) 3276 D("Sending %d packets every %ld.%09ld s", 3277 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 3278 /* Install ^C handler. */ 3279 global_nthreads = g.nthreads; 3280 sigemptyset(&ss); 3281 sigaddset(&ss, SIGINT); 3282 /* block SIGINT now, so that all created threads will inherit the mask */ 3283 if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) { 3284 D("failed to block SIGINT: %s", strerror(errno)); 3285 } 3286 if (start_threads(&g) < 0) 3287 return 1; 3288 /* Install the handler and re-enable SIGINT for the main thread */ 3289 memset(&sa, 0, sizeof(sa)); 3290 sa.sa_handler = sigint_h; 3291 if (sigaction(SIGINT, &sa, NULL) < 0) { 3292 D("failed to install ^C handler: %s", strerror(errno)); 3293 } 3294 3295 if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) { 3296 D("failed to re-enable SIGINT: %s", strerror(errno)); 3297 } 3298 main_thread(&g); 3299 free(targs); 3300 return 0; 3301 } 3302 3303 /* end of file */ 3304