1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40 #define _GNU_SOURCE /* for CPU_SET() */ 41 #include <arpa/inet.h> /* ntohs */ 42 #include <assert.h> 43 #include <ctype.h> // isprint() 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <ifaddrs.h> /* getifaddrs */ 47 #include <libnetmap.h> 48 #include <math.h> 49 #include <net/ethernet.h> 50 #include <netinet/in.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip6.h> 53 #include <netinet/udp.h> 54 #ifndef NO_PCAP 55 #include <pcap/pcap.h> 56 #endif 57 #include <pthread.h> 58 #include <signal.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <sys/ioctl.h> 63 #include <sys/poll.h> 64 #include <sys/stat.h> 65 #if !defined(_WIN32) && !defined(linux) 66 #include <sys/sysctl.h> /* sysctl */ 67 #endif 68 #include <sys/types.h> 69 #include <unistd.h> // sysconf() 70 #ifdef linux 71 #define IPV6_VERSION 0x60 72 #define IPV6_DEFHLIM 64 73 #endif 74 75 #include "ctrs.h" 76 77 static void usage(int); 78 79 #ifdef _WIN32 80 #define cpuset_t DWORD_PTR //uint64_t 81 static inline void CPU_ZERO(cpuset_t *p) 82 { 83 *p = 0; 84 } 85 86 static inline void CPU_SET(uint32_t i, cpuset_t *p) 87 { 88 *p |= 1<< (i & 0x3f); 89 } 90 91 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0) 92 #define TAP_CLONEDEV "/dev/tap" 93 #define AF_LINK 18 //defined in winsocks.h 94 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 95 #include <net/if_dl.h> 96 97 /* 98 * Convert an ASCII representation of an ethernet address to 99 * binary form. 100 */ 101 struct ether_addr * 102 ether_aton(const char *a) 103 { 104 int i; 105 static struct ether_addr o; 106 unsigned int o0, o1, o2, o3, o4, o5; 107 108 i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); 109 110 if (i != 6) 111 return (NULL); 112 113 o.octet[0]=o0; 114 o.octet[1]=o1; 115 o.octet[2]=o2; 116 o.octet[3]=o3; 117 o.octet[4]=o4; 118 o.octet[5]=o5; 119 120 return ((struct ether_addr *)&o); 121 } 122 123 /* 124 * Convert a binary representation of an ethernet address to 125 * an ASCII string. 126 */ 127 char * 128 ether_ntoa(const struct ether_addr *n) 129 { 130 int i; 131 static char a[18]; 132 133 i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x", 134 n->octet[0], n->octet[1], n->octet[2], 135 n->octet[3], n->octet[4], n->octet[5]); 136 return (i < 17 ? NULL : (char *)&a); 137 } 138 #endif /* _WIN32 */ 139 140 #ifdef linux 141 142 #define cpuset_t cpu_set_t 143 144 #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 145 #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 146 #include <linux/ethtool.h> 147 #include <linux/sockios.h> 148 149 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 150 #include <netinet/ether.h> /* ether_aton */ 151 #include <linux/if_packet.h> /* sockaddr_ll */ 152 #endif /* linux */ 153 154 #ifdef __FreeBSD__ 155 #include <sys/endian.h> /* le64toh */ 156 #include <machine/param.h> 157 158 #include <pthread_np.h> /* pthread w/ affinity */ 159 #include <sys/cpuset.h> /* cpu_set */ 160 #include <net/if_dl.h> /* LLADDR */ 161 #endif /* __FreeBSD__ */ 162 163 #ifdef __APPLE__ 164 165 #define cpuset_t uint64_t // XXX 166 static inline void CPU_ZERO(cpuset_t *p) 167 { 168 *p = 0; 169 } 170 171 static inline void CPU_SET(uint32_t i, cpuset_t *p) 172 { 173 *p |= 1<< (i & 0x3f); 174 } 175 176 #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 177 178 #define ifr_flagshigh ifr_flags // XXX 179 #define IFF_PPROMISC IFF_PROMISC 180 #include <net/if_dl.h> /* LLADDR */ 181 #define clock_gettime(a,b) \ 182 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 183 #endif /* __APPLE__ */ 184 185 static const char *default_payload = "netmap pkt-gen DIRECT payload\n" 186 "http://info.iet.unipi.it/~luigi/netmap/ "; 187 188 static const char *indirect_payload = "netmap pkt-gen indirect payload\n" 189 "http://info.iet.unipi.it/~luigi/netmap/ "; 190 191 static int verbose = 0; 192 static int normalize = 1; 193 194 #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 195 #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 196 #define VIRT_HDR_MAX VIRT_HDR_2 197 struct virt_header { 198 uint8_t fields[VIRT_HDR_MAX]; 199 }; 200 201 #define MAX_BODYSIZE 65536 202 203 struct pkt { 204 struct virt_header vh; 205 struct ether_header eh; 206 union { 207 struct { 208 struct ip ip; 209 struct udphdr udp; 210 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 211 } ipv4; 212 struct { 213 struct ip6_hdr ip; 214 struct udphdr udp; 215 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 216 } ipv6; 217 }; 218 } __attribute__((__packed__)); 219 220 #define PKT(p, f, af) \ 221 ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f) 222 223 struct ip_range { 224 const char *name; 225 union { 226 struct { 227 uint32_t start, end; /* same as struct in_addr */ 228 } ipv4; 229 struct { 230 struct in6_addr start, end; 231 uint8_t sgroup, egroup; 232 } ipv6; 233 }; 234 uint16_t port0, port1; 235 }; 236 237 struct mac_range { 238 const char *name; 239 struct ether_addr start, end; 240 }; 241 242 /* ifname can be netmap:foo-xxxx */ 243 #define MAX_IFNAMELEN 512 /* our buffer for ifname */ 244 //#define MAX_PKTSIZE 1536 245 #define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 246 247 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 248 struct tstamp { 249 uint32_t sec; 250 uint32_t nsec; 251 }; 252 253 /* 254 * global arguments for all threads 255 */ 256 257 struct glob_arg { 258 int af; /* address family AF_INET/AF_INET6 */ 259 struct ip_range src_ip; 260 struct ip_range dst_ip; 261 struct mac_range dst_mac; 262 struct mac_range src_mac; 263 int pkt_size; 264 int pkt_min_size; 265 int burst; 266 int forever; 267 uint64_t npackets; /* total packets to send */ 268 int frags; /* fragments per packet */ 269 u_int frag_size; /* size of each fragment */ 270 int nthreads; 271 int cpus; /* cpus used for running */ 272 int system_cpus; /* cpus on the system */ 273 274 int options; /* testing */ 275 #define OPT_PREFETCH 1 276 #define OPT_ACCESS 2 277 #define OPT_COPY 4 278 #define OPT_MEMCPY 8 279 #define OPT_TS 16 /* add a timestamp */ 280 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 281 #define OPT_DUMP 64 /* dump rx/tx traffic */ 282 #define OPT_RUBBISH 256 /* send whatever the buffers contain */ 283 #define OPT_RANDOM_SRC 512 284 #define OPT_RANDOM_DST 1024 285 #define OPT_PPS_STATS 2048 286 int dev_type; 287 #ifndef NO_PCAP 288 pcap_t *p; 289 #endif 290 291 int tx_rate; 292 struct timespec tx_period; 293 294 int affinity; 295 int main_fd; 296 struct nmport_d *nmd; 297 uint32_t orig_mode; 298 int report_interval; /* milliseconds between prints */ 299 void *(*td_body)(void *); 300 int td_type; 301 void *mmap_addr; 302 char ifname[MAX_IFNAMELEN]; 303 const char *nmr_config; 304 int dummy_send; 305 int virt_header; /* send also the virt_header */ 306 char *packet_file; /* -P option */ 307 #define STATS_WIN 15 308 int win_idx; 309 int64_t win[STATS_WIN]; 310 int wait_link; 311 int framing; /* #bits of framing (for bw output) */ 312 }; 313 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 314 315 enum { 316 TD_TYPE_SENDER = 1, 317 TD_TYPE_RECEIVER, 318 TD_TYPE_OTHER, 319 }; 320 321 /* 322 * Arguments for a new thread. The same structure is used by 323 * the source and the sink 324 */ 325 struct targ { 326 struct glob_arg *g; 327 int used; 328 int completed; 329 int cancel; 330 int fd; 331 struct nmport_d *nmd; 332 /* these ought to be volatile, but they are 333 * only sampled and errors should not accumulate 334 */ 335 struct my_ctrs ctr; 336 337 struct timespec tic, toc; 338 int me; 339 pthread_t thread; 340 int affinity; 341 342 struct pkt pkt; 343 void *frame; 344 uint16_t seed[3]; 345 u_int frags; 346 u_int frag_size; 347 }; 348 349 static __inline uint16_t 350 cksum_add(uint16_t sum, uint16_t a) 351 { 352 uint16_t res; 353 354 res = sum + a; 355 return (res + (res < a)); 356 } 357 358 static void 359 extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port) 360 { 361 struct in_addr a; 362 char *pp; 363 364 pp = strchr(name, ':'); 365 if (pp != NULL) { /* do we have ports ? */ 366 *pp++ = '\0'; 367 *port = (uint16_t)strtol(pp, NULL, 0); 368 } 369 370 inet_pton(AF_INET, name, &a); 371 *addr = ntohl(a.s_addr); 372 } 373 374 static void 375 extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port, 376 uint8_t *group) 377 { 378 char *pp; 379 380 /* 381 * We accept IPv6 address in the following form: 382 * group@[2001:DB8::1001]:port (w/ brackets and port) 383 * group@[2001:DB8::1] (w/ brackets and w/o port) 384 * group@2001:DB8::1234 (w/o brackets and w/o port) 385 */ 386 pp = strchr(name, '@'); 387 if (pp != NULL) { 388 *pp++ = '\0'; 389 *group = (uint8_t)strtol(name, NULL, 0); 390 if (*group > 7) 391 *group = 7; 392 name = pp; 393 } 394 if (name[0] == '[') 395 name++; 396 pp = strchr(name, ']'); 397 if (pp != NULL) 398 *pp++ = '\0'; 399 if (pp != NULL && *pp != ':') 400 pp = NULL; 401 if (pp != NULL) { /* do we have ports ? */ 402 *pp++ = '\0'; 403 *port = (uint16_t)strtol(pp, NULL, 0); 404 } 405 inet_pton(AF_INET6, name, addr); 406 } 407 /* 408 * extract the extremes from a range of ipv4 addresses. 409 * addr_lo[-addr_hi][:port_lo[-port_hi]] 410 */ 411 static int 412 extract_ip_range(struct ip_range *r, int af) 413 { 414 char *name, *ap, start[INET6_ADDRSTRLEN]; 415 char end[INET6_ADDRSTRLEN]; 416 struct in_addr a; 417 uint32_t tmp; 418 419 if (verbose) 420 D("extract IP range from %s", r->name); 421 422 name = strdup(r->name); 423 if (name == NULL) { 424 D("strdup failed"); 425 usage(-1); 426 } 427 /* the first - splits start/end of range */ 428 ap = strchr(name, '-'); 429 if (ap != NULL) 430 *ap++ = '\0'; 431 r->port0 = 1234; /* default port */ 432 if (af == AF_INET6) { 433 r->ipv6.sgroup = 7; /* default group */ 434 extract_ipv6_addr(name, &r->ipv6.start, &r->port0, 435 &r->ipv6.sgroup); 436 } else 437 extract_ipv4_addr(name, &r->ipv4.start, &r->port0); 438 439 r->port1 = r->port0; 440 if (af == AF_INET6) { 441 if (ap != NULL) { 442 r->ipv6.egroup = r->ipv6.sgroup; 443 extract_ipv6_addr(ap, &r->ipv6.end, &r->port1, 444 &r->ipv6.egroup); 445 } else { 446 r->ipv6.end = r->ipv6.start; 447 r->ipv6.egroup = r->ipv6.sgroup; 448 } 449 } else { 450 if (ap != NULL) { 451 extract_ipv4_addr(ap, &r->ipv4.end, &r->port1); 452 if (r->ipv4.start > r->ipv4.end) { 453 tmp = r->ipv4.end; 454 r->ipv4.end = r->ipv4.start; 455 r->ipv4.start = tmp; 456 } 457 } else 458 r->ipv4.end = r->ipv4.start; 459 } 460 461 if (r->port0 > r->port1) { 462 tmp = r->port0; 463 r->port0 = r->port1; 464 r->port1 = tmp; 465 } 466 if (af == AF_INET) { 467 a.s_addr = htonl(r->ipv4.start); 468 inet_ntop(af, &a, start, sizeof(start)); 469 a.s_addr = htonl(r->ipv4.end); 470 inet_ntop(af, &a, end, sizeof(end)); 471 } else { 472 inet_ntop(af, &r->ipv6.start, start, sizeof(start)); 473 inet_ntop(af, &r->ipv6.end, end, sizeof(end)); 474 } 475 if (af == AF_INET) 476 D("range is %s:%d to %s:%d", start, r->port0, end, r->port1); 477 else 478 D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup, 479 start, r->port0, r->ipv6.egroup, end, r->port1); 480 481 free(name); 482 if (r->port0 != r->port1 || 483 (af == AF_INET && r->ipv4.start != r->ipv4.end) || 484 (af == AF_INET6 && 485 !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end))) 486 return (OPT_COPY); 487 return (0); 488 } 489 490 static int 491 extract_mac_range(struct mac_range *r) 492 { 493 struct ether_addr *e; 494 if (verbose) 495 D("extract MAC range from %s", r->name); 496 497 e = ether_aton(r->name); 498 if (e == NULL) { 499 D("invalid MAC address '%s'", r->name); 500 return 1; 501 } 502 bcopy(e, &r->start, 6); 503 bcopy(e, &r->end, 6); 504 #if 0 505 bcopy(targ->src_mac, eh->ether_shost, 6); 506 p = index(targ->g->src_mac, '-'); 507 if (p) 508 targ->src_mac_range = atoi(p+1); 509 510 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 511 bcopy(targ->dst_mac, eh->ether_dhost, 6); 512 p = index(targ->g->dst_mac, '-'); 513 if (p) 514 targ->dst_mac_range = atoi(p+1); 515 #endif 516 if (verbose) 517 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 518 return 0; 519 } 520 521 static int 522 get_if_mtu(const struct glob_arg *g) 523 { 524 struct ifreq ifreq; 525 int s, ret; 526 const char *ifname = g->nmd->hdr.nr_name; 527 size_t len; 528 529 if (!strncmp(g->ifname, "netmap:", 7) && !strchr(ifname, '{') 530 && !strchr(ifname, '}')) { 531 532 len = strlen(ifname); 533 534 if (len > IFNAMSIZ) { 535 D("'%s' too long, cannot ask for MTU", ifname); 536 return -1; 537 } 538 539 s = socket(AF_INET, SOCK_DGRAM, 0); 540 if (s < 0) { 541 D("socket() failed: %s", strerror(errno)); 542 return s; 543 } 544 545 memset(&ifreq, 0, sizeof(ifreq)); 546 memcpy(ifreq.ifr_name, ifname, len); 547 548 ret = ioctl(s, SIOCGIFMTU, &ifreq); 549 if (ret) { 550 D("ioctl(SIOCGIFMTU) failed: %s", strerror(errno)); 551 } 552 553 close(s); 554 555 return ifreq.ifr_mtu; 556 } 557 558 /* This is a pipe or a VALE port, where the MTU is very large, 559 * so we use some practical limit. */ 560 return 65536; 561 } 562 563 static struct targ *targs; 564 static int global_nthreads; 565 566 /* control-C handler */ 567 static void 568 sigint_h(int sig) 569 { 570 int i; 571 572 (void)sig; /* UNUSED */ 573 D("received control-C on thread %p", (void *)pthread_self()); 574 for (i = 0; i < global_nthreads; i++) { 575 targs[i].cancel = 1; 576 } 577 } 578 579 /* sysctl wrapper to return the number of active CPUs */ 580 static int 581 system_ncpus(void) 582 { 583 int ncpus; 584 #if defined (__FreeBSD__) 585 int mib[2] = { CTL_HW, HW_NCPU }; 586 size_t len = sizeof(mib); 587 sysctl(mib, 2, &ncpus, &len, NULL, 0); 588 #elif defined(linux) 589 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 590 #elif defined(_WIN32) 591 { 592 SYSTEM_INFO sysinfo; 593 GetSystemInfo(&sysinfo); 594 ncpus = sysinfo.dwNumberOfProcessors; 595 } 596 #else /* others */ 597 ncpus = 1; 598 #endif /* others */ 599 return (ncpus); 600 } 601 602 #ifdef __linux__ 603 #define sockaddr_dl sockaddr_ll 604 #define sdl_family sll_family 605 #define AF_LINK AF_PACKET 606 #define LLADDR(s) s->sll_addr; 607 #include <linux/if_tun.h> 608 #define TAP_CLONEDEV "/dev/net/tun" 609 #endif /* __linux__ */ 610 611 #ifdef __FreeBSD__ 612 #include <net/if_tun.h> 613 #define TAP_CLONEDEV "/dev/tap" 614 #endif /* __FreeBSD */ 615 616 #ifdef __APPLE__ 617 // #warning TAP not supported on apple ? 618 #include <net/if_utun.h> 619 #define TAP_CLONEDEV "/dev/tap" 620 #endif /* __APPLE__ */ 621 622 623 /* 624 * parse the vale configuration in conf and put it in nmr. 625 * Return the flag set if necessary. 626 * The configuration may consist of 1 to 4 numbers separated 627 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 628 * Missing numbers or zeroes stand for default values. 629 * As an additional convenience, if exactly one number 630 * is specified, then this is assigned to both #tx-slots and #rx-slots. 631 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 632 * and #rx-rings. 633 */ 634 static int 635 parse_nmr_config(const char* conf, struct nmreq_register *nmr) 636 { 637 char *w, *tok; 638 int i, v; 639 640 if (conf == NULL || ! *conf) 641 return 0; 642 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 643 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 644 w = strdup(conf); 645 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 646 v = atoi(tok); 647 switch (i) { 648 case 0: 649 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 650 break; 651 case 1: 652 nmr->nr_rx_slots = v; 653 break; 654 case 2: 655 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 656 break; 657 case 3: 658 nmr->nr_rx_rings = v; 659 break; 660 default: 661 D("ignored config: %s", tok); 662 break; 663 } 664 } 665 D("txr %d txd %d rxr %d rxd %d", 666 nmr->nr_tx_rings, nmr->nr_tx_slots, 667 nmr->nr_rx_rings, nmr->nr_rx_slots); 668 free(w); 669 return 0; 670 } 671 672 673 /* 674 * locate the src mac address for our interface, put it 675 * into the user-supplied buffer. return 0 if ok, -1 on error. 676 */ 677 static int 678 source_hwaddr(const char *ifname, char *buf) 679 { 680 struct ifaddrs *ifaphead, *ifap; 681 682 if (getifaddrs(&ifaphead) != 0) { 683 D("getifaddrs %s failed", ifname); 684 return (-1); 685 } 686 687 /* remove 'netmap:' prefix before comparing interfaces */ 688 if (!strncmp(ifname, "netmap:", 7)) 689 ifname = &ifname[7]; 690 691 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 692 struct sockaddr_dl *sdl = 693 (struct sockaddr_dl *)ifap->ifa_addr; 694 uint8_t *mac; 695 696 if (!sdl || sdl->sdl_family != AF_LINK) 697 continue; 698 if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0) 699 continue; 700 mac = (uint8_t *)LLADDR(sdl); 701 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 702 mac[0], mac[1], mac[2], 703 mac[3], mac[4], mac[5]); 704 if (verbose) 705 D("source hwaddr %s", buf); 706 break; 707 } 708 freeifaddrs(ifaphead); 709 return ifap ? 0 : 1; 710 } 711 712 713 /* set the thread affinity. */ 714 static int 715 setaffinity(pthread_t me, int i) 716 { 717 cpuset_t cpumask; 718 719 if (i == -1) 720 return 0; 721 722 /* Set thread affinity affinity.*/ 723 CPU_ZERO(&cpumask); 724 CPU_SET(i, &cpumask); 725 726 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 727 D("Unable to set affinity: %s", strerror(errno)); 728 return 1; 729 } 730 return 0; 731 } 732 733 734 /* Compute the checksum of the given ip header. */ 735 static uint32_t 736 checksum(const void *data, uint16_t len, uint32_t sum) 737 { 738 const uint8_t *addr = data; 739 uint32_t i; 740 741 /* Checksum all the pairs of bytes first... */ 742 for (i = 0; i < (len & ~1U); i += 2) { 743 sum += (uint16_t)ntohs(*((const uint16_t *)(addr + i))); 744 if (sum > 0xFFFF) 745 sum -= 0xFFFF; 746 } 747 /* 748 * If there's a single byte left over, checksum it, too. 749 * Network byte order is big-endian, so the remaining byte is 750 * the high byte. 751 */ 752 if (i < len) { 753 sum += addr[i] << 8; 754 if (sum > 0xFFFF) 755 sum -= 0xFFFF; 756 } 757 return sum; 758 } 759 760 static uint16_t 761 wrapsum(uint32_t sum) 762 { 763 sum = ~sum & 0xFFFF; 764 return (htons(sum)); 765 } 766 767 /* Check the payload of the packet for errors (use it for debug). 768 * Look for consecutive ascii representations of the size of the packet. 769 */ 770 static void 771 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur) 772 { 773 char buf[128]; 774 int i, j, i0; 775 const unsigned char *p = (const unsigned char *)_p; 776 777 /* get the length in ASCII of the length of the packet. */ 778 779 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 780 ring, cur, ring->slot[cur].buf_idx, 781 ring->slot[cur].flags, len); 782 /* hexdump routine */ 783 for (i = 0; i < len; ) { 784 memset(buf, ' ', sizeof(buf)); 785 sprintf(buf, "%5d: ", i); 786 i0 = i; 787 for (j=0; j < 16 && i < len; i++, j++) 788 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 789 i = i0; 790 for (j=0; j < 16 && i < len; i++, j++) 791 sprintf(buf+7+j + 48, "%c", 792 isprint(p[i]) ? p[i] : '.'); 793 printf("%s\n", buf); 794 } 795 } 796 797 /* 798 * Fill a packet with some payload. 799 * We create a UDP packet so the payload starts at 800 * 14+20+8 = 42 bytes. 801 */ 802 #ifdef __linux__ 803 #define uh_sport source 804 #define uh_dport dest 805 #define uh_ulen len 806 #define uh_sum check 807 #endif /* linux */ 808 809 static uint16_t 810 new_ip_sum(uint16_t ip_sum, uint32_t oaddr, uint32_t naddr) 811 { 812 ip_sum = cksum_add(ip_sum, ~oaddr >> 16); 813 ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff); 814 ip_sum = cksum_add(ip_sum, naddr >> 16); 815 ip_sum = cksum_add(ip_sum, naddr & 0xffff); 816 return ip_sum; 817 } 818 819 static uint16_t 820 new_udp_sum(uint16_t udp_sum, uint16_t oport, uint16_t nport) 821 { 822 udp_sum = cksum_add(udp_sum, ~oport); 823 udp_sum = cksum_add(udp_sum, nport); 824 return udp_sum; 825 } 826 827 828 static void 829 update_ip(struct pkt *pkt, struct targ *t) 830 { 831 struct glob_arg *g = t->g; 832 struct ip ip; 833 struct udphdr udp; 834 uint32_t oaddr, naddr; 835 uint16_t oport, nport; 836 uint16_t ip_sum = 0, udp_sum = 0; 837 838 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 839 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp)); 840 do { 841 ip_sum = udp_sum = 0; 842 naddr = oaddr = ntohl(ip.ip_src.s_addr); 843 nport = oport = ntohs(udp.uh_sport); 844 if (g->options & OPT_RANDOM_SRC) { 845 ip.ip_src.s_addr = nrand48(t->seed); 846 udp.uh_sport = nrand48(t->seed); 847 naddr = ntohl(ip.ip_src.s_addr); 848 nport = ntohs(udp.uh_sport); 849 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 850 udp_sum = new_udp_sum(udp_sum, oport, nport); 851 } else { 852 if (oport < g->src_ip.port1) { 853 nport = oport + 1; 854 udp.uh_sport = htons(nport); 855 udp_sum = new_udp_sum(udp_sum, oport, nport); 856 break; 857 } 858 nport = g->src_ip.port0; 859 udp.uh_sport = htons(nport); 860 if (oaddr < g->src_ip.ipv4.end) { 861 naddr = oaddr + 1; 862 ip.ip_src.s_addr = htonl(naddr); 863 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 864 break; 865 } 866 naddr = g->src_ip.ipv4.start; 867 ip.ip_src.s_addr = htonl(naddr); 868 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 869 } 870 871 naddr = oaddr = ntohl(ip.ip_dst.s_addr); 872 nport = oport = ntohs(udp.uh_dport); 873 if (g->options & OPT_RANDOM_DST) { 874 ip.ip_dst.s_addr = nrand48(t->seed); 875 udp.uh_dport = nrand48(t->seed); 876 naddr = ntohl(ip.ip_dst.s_addr); 877 nport = ntohs(udp.uh_dport); 878 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 879 udp_sum = new_udp_sum(udp_sum, oport, nport); 880 } else { 881 if (oport < g->dst_ip.port1) { 882 nport = oport + 1; 883 udp.uh_dport = htons(nport); 884 udp_sum = new_udp_sum(udp_sum, oport, nport); 885 break; 886 } 887 nport = g->dst_ip.port0; 888 udp.uh_dport = htons(nport); 889 if (oaddr < g->dst_ip.ipv4.end) { 890 naddr = oaddr + 1; 891 ip.ip_dst.s_addr = htonl(naddr); 892 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 893 break; 894 } 895 naddr = g->dst_ip.ipv4.start; 896 ip.ip_dst.s_addr = htonl(naddr); 897 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 898 } 899 } while (0); 900 /* update checksums */ 901 if (udp_sum != 0) 902 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum)); 903 if (ip_sum != 0) { 904 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum)); 905 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum)); 906 } 907 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 908 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp)); 909 } 910 911 #ifndef s6_addr16 912 #define s6_addr16 __u6_addr.__u6_addr16 913 #endif 914 static void 915 update_ip6(struct pkt *pkt, struct targ *t) 916 { 917 struct glob_arg *g = t->g; 918 struct ip6_hdr ip6; 919 struct udphdr udp; 920 uint16_t udp_sum; 921 uint16_t oaddr, naddr; 922 uint16_t oport, nport; 923 uint8_t group; 924 925 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6)); 926 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp)); 927 do { 928 udp_sum = 0; 929 group = g->src_ip.ipv6.sgroup; 930 naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]); 931 nport = oport = ntohs(udp.uh_sport); 932 if (g->options & OPT_RANDOM_SRC) { 933 ip6.ip6_src.s6_addr16[group] = nrand48(t->seed); 934 udp.uh_sport = nrand48(t->seed); 935 naddr = ntohs(ip6.ip6_src.s6_addr16[group]); 936 nport = ntohs(udp.uh_sport); 937 break; 938 } 939 if (oport < g->src_ip.port1) { 940 nport = oport + 1; 941 udp.uh_sport = htons(nport); 942 break; 943 } 944 nport = g->src_ip.port0; 945 udp.uh_sport = htons(nport); 946 if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) { 947 naddr = oaddr + 1; 948 ip6.ip6_src.s6_addr16[group] = htons(naddr); 949 break; 950 } 951 naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]); 952 ip6.ip6_src.s6_addr16[group] = htons(naddr); 953 954 /* update checksums if needed */ 955 if (oaddr != naddr) 956 udp_sum = cksum_add(~oaddr, naddr); 957 if (oport != nport) 958 udp_sum = cksum_add(udp_sum, 959 cksum_add(~oport, nport)); 960 961 group = g->dst_ip.ipv6.egroup; 962 naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 963 nport = oport = ntohs(udp.uh_dport); 964 if (g->options & OPT_RANDOM_DST) { 965 ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed); 966 udp.uh_dport = nrand48(t->seed); 967 naddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 968 nport = ntohs(udp.uh_dport); 969 break; 970 } 971 if (oport < g->dst_ip.port1) { 972 nport = oport + 1; 973 udp.uh_dport = htons(nport); 974 break; 975 } 976 nport = g->dst_ip.port0; 977 udp.uh_dport = htons(nport); 978 if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) { 979 naddr = oaddr + 1; 980 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 981 break; 982 } 983 naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]); 984 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 985 } while (0); 986 /* update checksums */ 987 if (oaddr != naddr) 988 udp_sum = cksum_add(udp_sum, 989 cksum_add(~oaddr, naddr)); 990 if (oport != nport) 991 udp_sum = cksum_add(udp_sum, 992 cksum_add(~oport, nport)); 993 if (udp_sum != 0) 994 udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum); 995 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 996 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp)); 997 } 998 999 static void 1000 update_addresses(struct pkt *pkt, struct targ *t) 1001 { 1002 1003 if (t->g->af == AF_INET) 1004 update_ip(pkt, t); 1005 else 1006 update_ip6(pkt, t); 1007 } 1008 /* 1009 * initialize one packet and prepare for the next one. 1010 * The copy could be done better instead of repeating it each time. 1011 */ 1012 static void 1013 initialize_packet(struct targ *targ) 1014 { 1015 struct pkt *pkt = &targ->pkt; 1016 struct ether_header *eh; 1017 struct ip6_hdr ip6; 1018 struct ip ip; 1019 struct udphdr udp; 1020 void *udp_ptr; 1021 uint16_t paylen; 1022 uint32_t csum = 0; 1023 const char *payload = targ->g->options & OPT_INDIRECT ? 1024 indirect_payload : default_payload; 1025 int i, l0 = strlen(payload); 1026 1027 #ifndef NO_PCAP 1028 char errbuf[PCAP_ERRBUF_SIZE]; 1029 pcap_t *file; 1030 struct pcap_pkthdr *header; 1031 const unsigned char *packet; 1032 1033 /* Read a packet from a PCAP file if asked. */ 1034 if (targ->g->packet_file != NULL) { 1035 if ((file = pcap_open_offline(targ->g->packet_file, 1036 errbuf)) == NULL) 1037 D("failed to open pcap file %s", 1038 targ->g->packet_file); 1039 if (pcap_next_ex(file, &header, &packet) < 0) 1040 D("failed to read packet from %s", 1041 targ->g->packet_file); 1042 if ((targ->frame = malloc(header->caplen)) == NULL) 1043 D("out of memory"); 1044 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 1045 targ->g->pkt_size = header->caplen; 1046 pcap_close(file); 1047 return; 1048 } 1049 #endif 1050 1051 paylen = targ->g->pkt_size - sizeof(*eh) - 1052 (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6)); 1053 1054 /* create a nice NUL-terminated string */ 1055 for (i = 0; i < paylen; i += l0) { 1056 if (l0 > paylen - i) 1057 l0 = paylen - i; // last round 1058 bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0); 1059 } 1060 PKT(pkt, body, targ->g->af)[i - 1] = '\0'; 1061 1062 /* prepare the headers */ 1063 eh = &pkt->eh; 1064 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 1065 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 1066 1067 if (targ->g->af == AF_INET) { 1068 eh->ether_type = htons(ETHERTYPE_IP); 1069 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 1070 udp_ptr = &pkt->ipv4.udp; 1071 ip.ip_v = IPVERSION; 1072 ip.ip_hl = sizeof(ip) >> 2; 1073 ip.ip_id = 0; 1074 ip.ip_tos = IPTOS_LOWDELAY; 1075 ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh)); 1076 ip.ip_id = 0; 1077 ip.ip_off = htons(IP_DF); /* Don't fragment */ 1078 ip.ip_ttl = IPDEFTTL; 1079 ip.ip_p = IPPROTO_UDP; 1080 ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start); 1081 ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start); 1082 ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0)); 1083 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1084 } else { 1085 eh->ether_type = htons(ETHERTYPE_IPV6); 1086 memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6)); 1087 udp_ptr = &pkt->ipv6.udp; 1088 ip6.ip6_flow = 0; 1089 ip6.ip6_plen = htons(paylen); 1090 ip6.ip6_vfc = IPV6_VERSION; 1091 ip6.ip6_nxt = IPPROTO_UDP; 1092 ip6.ip6_hlim = IPV6_DEFHLIM; 1093 ip6.ip6_src = targ->g->src_ip.ipv6.start; 1094 ip6.ip6_dst = targ->g->dst_ip.ipv6.start; 1095 } 1096 memcpy(&udp, udp_ptr, sizeof(udp)); 1097 1098 udp.uh_sport = htons(targ->g->src_ip.port0); 1099 udp.uh_dport = htons(targ->g->dst_ip.port0); 1100 udp.uh_ulen = htons(paylen); 1101 if (targ->g->af == AF_INET) { 1102 /* Magic: taken from sbin/dhclient/packet.c */ 1103 udp.uh_sum = wrapsum( 1104 checksum(&udp, sizeof(udp), /* udp header */ 1105 checksum(pkt->ipv4.body, /* udp payload */ 1106 paylen - sizeof(udp), 1107 checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */ 1108 2 * sizeof(pkt->ipv4.ip.ip_src), 1109 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen))))); 1110 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1111 } else { 1112 /* Save part of pseudo header checksum into csum */ 1113 csum = IPPROTO_UDP << 24; 1114 csum = checksum(&csum, sizeof(csum), paylen); 1115 udp.uh_sum = wrapsum( 1116 checksum(udp_ptr, sizeof(udp), /* udp header */ 1117 checksum(pkt->ipv6.body, /* udp payload */ 1118 paylen - sizeof(udp), 1119 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */ 1120 2 * sizeof(pkt->ipv6.ip.ip6_src), csum)))); 1121 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 1122 } 1123 memcpy(udp_ptr, &udp, sizeof(udp)); 1124 1125 bzero(&pkt->vh, sizeof(pkt->vh)); 1126 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 1127 } 1128 1129 static void 1130 get_vnet_hdr_len(struct glob_arg *g) 1131 { 1132 struct nmreq_header hdr; 1133 struct nmreq_port_hdr ph; 1134 int err; 1135 1136 hdr = g->nmd->hdr; /* copy name and version */ 1137 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_GET; 1138 hdr.nr_options = 0; 1139 memset(&ph, 0, sizeof(ph)); 1140 hdr.nr_body = (uintptr_t)&ph; 1141 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1142 if (err) { 1143 D("Unable to get virtio-net header length"); 1144 return; 1145 } 1146 1147 g->virt_header = ph.nr_hdr_len; 1148 if (g->virt_header) { 1149 D("Port requires virtio-net header, length = %d", 1150 g->virt_header); 1151 } 1152 } 1153 1154 static void 1155 set_vnet_hdr_len(struct glob_arg *g) 1156 { 1157 int err, l = g->virt_header; 1158 struct nmreq_header hdr; 1159 struct nmreq_port_hdr ph; 1160 1161 if (l == 0) 1162 return; 1163 1164 hdr = g->nmd->hdr; /* copy name and version */ 1165 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_SET; 1166 hdr.nr_options = 0; 1167 memset(&ph, 0, sizeof(ph)); 1168 hdr.nr_body = (uintptr_t)&ph; 1169 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1170 if (err) { 1171 D("Unable to set virtio-net header length %d", l); 1172 } 1173 } 1174 1175 /* 1176 * create and enqueue a batch of packets on a ring. 1177 * On the last one set NS_REPORT to tell the driver to generate 1178 * an interrupt when done. 1179 */ 1180 static int 1181 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 1182 int size, struct targ *t, u_int count, int options) 1183 { 1184 u_int n, sent, head = ring->head; 1185 u_int frags = t->frags; 1186 u_int frag_size = t->frag_size; 1187 struct netmap_slot *slot = &ring->slot[head]; 1188 1189 n = nm_ring_space(ring); 1190 #if 0 1191 if (options & (OPT_COPY | OPT_PREFETCH) ) { 1192 for (sent = 0; sent < count; sent++) { 1193 struct netmap_slot *slot = &ring->slot[head]; 1194 char *p = NETMAP_BUF(ring, slot->buf_idx); 1195 1196 __builtin_prefetch(p); 1197 head = nm_ring_next(ring, head); 1198 } 1199 head = ring->head; 1200 } 1201 #endif 1202 for (sent = 0; sent < count && n >= frags; sent++, n--) { 1203 char *p; 1204 int buf_changed; 1205 u_int tosend = size; 1206 1207 slot = &ring->slot[head]; 1208 p = NETMAP_BUF(ring, slot->buf_idx); 1209 buf_changed = slot->flags & NS_BUF_CHANGED; 1210 1211 slot->flags = 0; 1212 if (options & OPT_RUBBISH) { 1213 /* do nothing */ 1214 } else if (options & OPT_INDIRECT) { 1215 slot->flags |= NS_INDIRECT; 1216 slot->ptr = (uint64_t)((uintptr_t)frame); 1217 } else if (frags > 1) { 1218 u_int i; 1219 const char *f = frame; 1220 char *fp = p; 1221 for (i = 0; i < frags - 1; i++) { 1222 memcpy(fp, f, frag_size); 1223 slot->len = frag_size; 1224 slot->flags = NS_MOREFRAG; 1225 if (options & OPT_DUMP) 1226 dump_payload(fp, frag_size, ring, head); 1227 tosend -= frag_size; 1228 f += frag_size; 1229 head = nm_ring_next(ring, head); 1230 slot = &ring->slot[head]; 1231 fp = NETMAP_BUF(ring, slot->buf_idx); 1232 } 1233 n -= (frags - 1); 1234 p = fp; 1235 slot->flags = 0; 1236 memcpy(p, f, tosend); 1237 update_addresses(pkt, t); 1238 } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) { 1239 if (options & OPT_COPY) 1240 nm_pkt_copy(frame, p, size); 1241 else 1242 memcpy(p, frame, size); 1243 update_addresses(pkt, t); 1244 } else if (options & OPT_PREFETCH) { 1245 __builtin_prefetch(p); 1246 } 1247 slot->len = tosend; 1248 if (options & OPT_DUMP) 1249 dump_payload(p, tosend, ring, head); 1250 head = nm_ring_next(ring, head); 1251 } 1252 if (sent) { 1253 slot->flags |= NS_REPORT; 1254 ring->head = ring->cur = head; 1255 } 1256 if (sent < count) { 1257 /* tell netmap that we need more slots */ 1258 ring->cur = ring->tail; 1259 } 1260 1261 return (sent); 1262 } 1263 1264 /* 1265 * Index of the highest bit set 1266 */ 1267 static uint32_t 1268 msb64(uint64_t x) 1269 { 1270 uint64_t m = 1ULL << 63; 1271 int i; 1272 1273 for (i = 63; i >= 0; i--, m >>=1) 1274 if (m & x) 1275 return i; 1276 return 0; 1277 } 1278 1279 /* 1280 * wait until ts, either busy or sleeping if more than 1ms. 1281 * Return wakeup time. 1282 */ 1283 static struct timespec 1284 wait_time(struct timespec ts) 1285 { 1286 for (;;) { 1287 struct timespec w, cur; 1288 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1289 w = timespec_sub(ts, cur); 1290 if (w.tv_sec < 0) 1291 return cur; 1292 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1293 poll(NULL, 0, 1); 1294 } 1295 } 1296 1297 /* 1298 * Send a packet, and wait for a response. 1299 * The payload (after UDP header, ofs 42) has a 4-byte sequence 1300 * followed by a struct timeval (or bintime?) 1301 */ 1302 1303 static void * 1304 ping_body(void *data) 1305 { 1306 struct targ *targ = (struct targ *) data; 1307 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1308 struct netmap_if *nifp = targ->nmd->nifp; 1309 int i, m; 1310 void *frame; 1311 int size; 1312 struct timespec ts, now, last_print; 1313 struct timespec nexttime = {0, 0}; /* silence compiler */ 1314 uint64_t sent = 0, n = targ->g->npackets; 1315 uint64_t count = 0, t_cur, t_min = ~0, av = 0; 1316 uint64_t g_min = ~0, g_av = 0; 1317 uint64_t buckets[64]; /* bins for delays, ns */ 1318 int rate_limit = targ->g->tx_rate, tosend = 0; 1319 1320 frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header; 1321 size = targ->g->pkt_size + targ->g->virt_header; 1322 1323 1324 if (targ->g->nthreads > 1) { 1325 D("can only ping with 1 thread"); 1326 return NULL; 1327 } 1328 1329 if (targ->g->af == AF_INET6) { 1330 D("Warning: ping-pong with IPv6 not supported"); 1331 } 1332 1333 bzero(&buckets, sizeof(buckets)); 1334 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 1335 now = last_print; 1336 if (rate_limit) { 1337 targ->tic = timespec_add(now, (struct timespec){2,0}); 1338 targ->tic.tv_nsec = 0; 1339 wait_time(targ->tic); 1340 nexttime = targ->tic; 1341 } 1342 while (!targ->cancel && (n == 0 || sent < n)) { 1343 struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1344 struct netmap_slot *slot; 1345 char *p; 1346 int rv; 1347 uint64_t limit, event = 0; 1348 1349 if (rate_limit && tosend <= 0) { 1350 tosend = targ->g->burst; 1351 nexttime = timespec_add(nexttime, targ->g->tx_period); 1352 wait_time(nexttime); 1353 } 1354 1355 limit = rate_limit ? tosend : targ->g->burst; 1356 if (n > 0 && n - sent < limit) 1357 limit = n - sent; 1358 for (m = 0; (unsigned)m < limit; m++) { 1359 slot = &ring->slot[ring->head]; 1360 slot->len = size; 1361 p = NETMAP_BUF(ring, slot->buf_idx); 1362 1363 if (nm_ring_empty(ring)) { 1364 D("-- ouch, cannot send"); 1365 break; 1366 } else { 1367 struct tstamp *tp; 1368 nm_pkt_copy(frame, p, size); 1369 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 1370 bcopy(&sent, p+42, sizeof(sent)); 1371 tp = (struct tstamp *)(p+46); 1372 tp->sec = (uint32_t)ts.tv_sec; 1373 tp->nsec = (uint32_t)ts.tv_nsec; 1374 sent++; 1375 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1376 } 1377 } 1378 if (m > 0) 1379 event++; 1380 targ->ctr.pkts = sent; 1381 targ->ctr.bytes = sent*size; 1382 targ->ctr.events = event; 1383 if (rate_limit) 1384 tosend -= m; 1385 #ifdef BUSYWAIT 1386 rv = ioctl(pfd.fd, NIOCTXSYNC, NULL); 1387 if (rv < 0) { 1388 D("TXSYNC error on queue %d: %s", targ->me, 1389 strerror(errno)); 1390 } 1391 again: 1392 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1393 #else 1394 /* should use a parameter to decide how often to send */ 1395 if ( (rv = poll(&pfd, 1, 3000)) <= 0) { 1396 D("poll error on queue %d: %s", targ->me, 1397 (rv ? strerror(errno) : "timeout")); 1398 continue; 1399 } 1400 #endif /* BUSYWAIT */ 1401 /* see what we got back */ 1402 #ifdef BUSYWAIT 1403 int rx = 0; 1404 #endif 1405 for (i = targ->nmd->first_rx_ring; 1406 i <= targ->nmd->last_rx_ring; i++) { 1407 ring = NETMAP_RXRING(nifp, i); 1408 while (!nm_ring_empty(ring)) { 1409 uint32_t seq; 1410 struct tstamp *tp; 1411 int pos; 1412 1413 slot = &ring->slot[ring->head]; 1414 p = NETMAP_BUF(ring, slot->buf_idx); 1415 1416 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 1417 bcopy(p+42, &seq, sizeof(seq)); 1418 tp = (struct tstamp *)(p+46); 1419 ts.tv_sec = (time_t)tp->sec; 1420 ts.tv_nsec = (long)tp->nsec; 1421 ts.tv_sec = now.tv_sec - ts.tv_sec; 1422 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 1423 if (ts.tv_nsec < 0) { 1424 ts.tv_nsec += 1000000000; 1425 ts.tv_sec--; 1426 } 1427 if (0) D("seq %d/%llu delta %d.%09d", seq, 1428 (unsigned long long)sent, 1429 (int)ts.tv_sec, (int)ts.tv_nsec); 1430 t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec; 1431 if (t_cur < t_min) 1432 t_min = t_cur; 1433 count ++; 1434 av += t_cur; 1435 pos = msb64(t_cur); 1436 buckets[pos]++; 1437 /* now store it in a bucket */ 1438 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1439 #ifdef BUSYWAIT 1440 rx++; 1441 #endif 1442 } 1443 } 1444 //D("tx %d rx %d", sent, rx); 1445 //usleep(100000); 1446 ts.tv_sec = now.tv_sec - last_print.tv_sec; 1447 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 1448 if (ts.tv_nsec < 0) { 1449 ts.tv_nsec += 1000000000; 1450 ts.tv_sec--; 1451 } 1452 if (ts.tv_sec >= 1) { 1453 D("count %d RTT: min %d av %d ns", 1454 (int)count, (int)t_min, (int)(av/count)); 1455 int k, j, kmin, off; 1456 char buf[512]; 1457 1458 for (kmin = 0; kmin < 64; kmin ++) 1459 if (buckets[kmin]) 1460 break; 1461 for (k = 63; k >= kmin; k--) 1462 if (buckets[k]) 1463 break; 1464 buf[0] = '\0'; 1465 off = 0; 1466 for (j = kmin; j <= k; j++) { 1467 off += sprintf(buf + off, " %5d", (int)buckets[j]); 1468 } 1469 D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf); 1470 bzero(&buckets, sizeof(buckets)); 1471 count = 0; 1472 g_av += av; 1473 av = 0; 1474 if (t_min < g_min) 1475 g_min = t_min; 1476 t_min = ~0; 1477 last_print = now; 1478 } 1479 #ifdef BUSYWAIT 1480 if (rx < m && ts.tv_sec <= 3 && !targ->cancel) 1481 goto again; 1482 #endif /* BUSYWAIT */ 1483 } 1484 1485 if (sent > 0) { 1486 D("RTT over %llu packets: min %d av %d ns", 1487 (long long unsigned)sent, (int)g_min, 1488 (int)((double)g_av/sent)); 1489 } 1490 targ->completed = 1; 1491 1492 /* reset the ``used`` flag. */ 1493 targ->used = 0; 1494 1495 return NULL; 1496 } 1497 1498 1499 /* 1500 * reply to ping requests 1501 */ 1502 static void * 1503 pong_body(void *data) 1504 { 1505 struct targ *targ = (struct targ *) data; 1506 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1507 struct netmap_if *nifp = targ->nmd->nifp; 1508 struct netmap_ring *txring, *rxring; 1509 int i; 1510 uint64_t sent = 0, n = targ->g->npackets; 1511 1512 if (targ->g->nthreads > 1) { 1513 D("can only reply ping with 1 thread"); 1514 return NULL; 1515 } 1516 if (n > 0) 1517 D("understood ponger %llu but don't know how to do it", 1518 (unsigned long long)n); 1519 1520 if (targ->g->af == AF_INET6) { 1521 D("Warning: ping-pong with IPv6 not supported"); 1522 } 1523 1524 while (!targ->cancel && (n == 0 || sent < n)) { 1525 uint32_t txhead, txavail; 1526 //#define BUSYWAIT 1527 #ifdef BUSYWAIT 1528 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1529 #else 1530 int rv; 1531 if ( (rv = poll(&pfd, 1, 1000)) <= 0) { 1532 D("poll error on queue %d: %s", targ->me, 1533 rv ? strerror(errno) : "timeout"); 1534 continue; 1535 } 1536 #endif 1537 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1538 txhead = txring->head; 1539 txavail = nm_ring_space(txring); 1540 /* see what we got back */ 1541 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1542 rxring = NETMAP_RXRING(nifp, i); 1543 while (!nm_ring_empty(rxring)) { 1544 uint16_t *spkt, *dpkt; 1545 uint32_t head = rxring->head; 1546 struct netmap_slot *slot = &rxring->slot[head]; 1547 char *src, *dst; 1548 src = NETMAP_BUF(rxring, slot->buf_idx); 1549 //D("got pkt %p of size %d", src, slot->len); 1550 rxring->head = rxring->cur = nm_ring_next(rxring, head); 1551 if (txavail == 0) 1552 continue; 1553 dst = NETMAP_BUF(txring, 1554 txring->slot[txhead].buf_idx); 1555 /* copy... */ 1556 dpkt = (uint16_t *)dst; 1557 spkt = (uint16_t *)src; 1558 nm_pkt_copy(src, dst, slot->len); 1559 /* swap source and destination MAC */ 1560 dpkt[0] = spkt[3]; 1561 dpkt[1] = spkt[4]; 1562 dpkt[2] = spkt[5]; 1563 dpkt[3] = spkt[0]; 1564 dpkt[4] = spkt[1]; 1565 dpkt[5] = spkt[2]; 1566 /* swap source and destination IPv4 */ 1567 if (spkt[6] == htons(ETHERTYPE_IP)) { 1568 dpkt[13] = spkt[15]; 1569 dpkt[14] = spkt[16]; 1570 dpkt[15] = spkt[13]; 1571 dpkt[16] = spkt[14]; 1572 } 1573 txring->slot[txhead].len = slot->len; 1574 //dump_payload(dst, slot->len, txring, txhead); 1575 txhead = nm_ring_next(txring, txhead); 1576 txavail--; 1577 sent++; 1578 } 1579 } 1580 txring->head = txring->cur = txhead; 1581 targ->ctr.pkts = sent; 1582 #ifdef BUSYWAIT 1583 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1584 #endif 1585 } 1586 1587 targ->completed = 1; 1588 1589 /* reset the ``used`` flag. */ 1590 targ->used = 0; 1591 1592 return NULL; 1593 } 1594 1595 1596 static void * 1597 sender_body(void *data) 1598 { 1599 struct targ *targ = (struct targ *) data; 1600 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1601 struct netmap_if *nifp; 1602 struct netmap_ring *txring = NULL; 1603 int i; 1604 uint64_t n = targ->g->npackets / targ->g->nthreads; 1605 uint64_t sent = 0; 1606 uint64_t event = 0; 1607 int options = targ->g->options; 1608 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1609 int rate_limit = targ->g->tx_rate; 1610 struct pkt *pkt = &targ->pkt; 1611 void *frame; 1612 int size; 1613 1614 if (targ->frame == NULL) { 1615 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1616 size = targ->g->pkt_size + targ->g->virt_header; 1617 } else { 1618 frame = targ->frame; 1619 size = targ->g->pkt_size; 1620 } 1621 1622 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1623 if (setaffinity(targ->thread, targ->affinity)) 1624 goto quit; 1625 1626 /* main loop.*/ 1627 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1628 if (rate_limit) { 1629 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1630 targ->tic.tv_nsec = 0; 1631 wait_time(targ->tic); 1632 nexttime = targ->tic; 1633 } 1634 if (targ->g->dev_type == DEV_TAP) { 1635 D("writing to file desc %d", targ->g->main_fd); 1636 1637 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1638 if (write(targ->g->main_fd, frame, size) != -1) 1639 sent++; 1640 update_addresses(pkt, targ); 1641 if (i > 10000) { 1642 targ->ctr.pkts = sent; 1643 targ->ctr.bytes = sent*size; 1644 targ->ctr.events = sent; 1645 i = 0; 1646 } 1647 } 1648 #ifndef NO_PCAP 1649 } else if (targ->g->dev_type == DEV_PCAP) { 1650 pcap_t *p = targ->g->p; 1651 1652 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1653 if (pcap_inject(p, frame, size) != -1) 1654 sent++; 1655 update_addresses(pkt, targ); 1656 if (i > 10000) { 1657 targ->ctr.pkts = sent; 1658 targ->ctr.bytes = sent*size; 1659 targ->ctr.events = sent; 1660 i = 0; 1661 } 1662 } 1663 #endif /* NO_PCAP */ 1664 } else { 1665 int tosend = 0; 1666 u_int bufsz, frag_size = targ->g->frag_size; 1667 1668 nifp = targ->nmd->nifp; 1669 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1670 bufsz = txring->nr_buf_size; 1671 if (bufsz < frag_size) 1672 frag_size = bufsz; 1673 targ->frag_size = targ->g->pkt_size / targ->frags; 1674 if (targ->frag_size > frag_size) { 1675 targ->frags = targ->g->pkt_size / frag_size; 1676 targ->frag_size = frag_size; 1677 if (targ->g->pkt_size % frag_size != 0) 1678 targ->frags++; 1679 } 1680 D("frags %u frag_size %u", targ->frags, targ->frag_size); 1681 1682 /* mark all slots of all rings as changed so initial copy will be done */ 1683 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1684 uint32_t j; 1685 struct netmap_slot *slot; 1686 1687 txring = NETMAP_TXRING(nifp, i); 1688 for (j = 0; j < txring->num_slots; j++) { 1689 slot = &txring->slot[j]; 1690 slot->flags = NS_BUF_CHANGED; 1691 } 1692 } 1693 1694 while (!targ->cancel && (n == 0 || sent < n)) { 1695 int rv; 1696 1697 if (rate_limit && tosend <= 0) { 1698 tosend = targ->g->burst; 1699 nexttime = timespec_add(nexttime, targ->g->tx_period); 1700 wait_time(nexttime); 1701 } 1702 1703 /* 1704 * wait for available room in the send queue(s) 1705 */ 1706 #ifdef BUSYWAIT 1707 (void)rv; 1708 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 1709 D("ioctl error on queue %d: %s", targ->me, 1710 strerror(errno)); 1711 goto quit; 1712 } 1713 #else /* !BUSYWAIT */ 1714 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 1715 if (targ->cancel) 1716 break; 1717 D("poll error on queue %d: %s", targ->me, 1718 rv ? strerror(errno) : "timeout"); 1719 // goto quit; 1720 } 1721 if (pfd.revents & POLLERR) { 1722 D("poll error on %d ring %d-%d", pfd.fd, 1723 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1724 goto quit; 1725 } 1726 #endif /* !BUSYWAIT */ 1727 /* 1728 * scan our queues and send on those with room 1729 */ 1730 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1731 int m; 1732 uint64_t limit = rate_limit ? tosend : targ->g->burst; 1733 1734 if (n > 0 && n == sent) 1735 break; 1736 1737 if (n > 0 && n - sent < limit) 1738 limit = n - sent; 1739 txring = NETMAP_TXRING(nifp, i); 1740 if (nm_ring_empty(txring)) 1741 continue; 1742 1743 if (targ->g->pkt_min_size > 0) { 1744 size = nrand48(targ->seed) % 1745 (targ->g->pkt_size - targ->g->pkt_min_size) + 1746 targ->g->pkt_min_size; 1747 } 1748 m = send_packets(txring, pkt, frame, size, targ, 1749 limit, options); 1750 ND("limit %lu tail %d m %d", 1751 limit, txring->tail, m); 1752 sent += m; 1753 if (m > 0) //XXX-ste: can m be 0? 1754 event++; 1755 targ->ctr.pkts = sent; 1756 targ->ctr.bytes += m*size; 1757 targ->ctr.events = event; 1758 if (rate_limit) { 1759 tosend -= m; 1760 if (tosend <= 0) 1761 break; 1762 } 1763 } 1764 } 1765 /* flush any remaining packets */ 1766 if (txring != NULL) { 1767 D("flush tail %d head %d on thread %p", 1768 txring->tail, txring->head, 1769 (void *)pthread_self()); 1770 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1771 } 1772 1773 /* final part: wait all the TX queues to be empty. */ 1774 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1775 txring = NETMAP_TXRING(nifp, i); 1776 while (!targ->cancel && nm_tx_pending(txring)) { 1777 RD(5, "pending tx tail %d head %d on ring %d", 1778 txring->tail, txring->head, i); 1779 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1780 usleep(1); /* wait 1 tick */ 1781 } 1782 } 1783 } /* end DEV_NETMAP */ 1784 1785 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1786 targ->completed = 1; 1787 targ->ctr.pkts = sent; 1788 targ->ctr.bytes = sent*size; 1789 targ->ctr.events = event; 1790 quit: 1791 /* reset the ``used`` flag. */ 1792 targ->used = 0; 1793 1794 return (NULL); 1795 } 1796 1797 1798 #ifndef NO_PCAP 1799 static void 1800 receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1801 const u_char * bytes) 1802 { 1803 struct my_ctrs *ctr = (struct my_ctrs *)user; 1804 (void)bytes; /* UNUSED */ 1805 ctr->bytes += h->len; 1806 ctr->pkts++; 1807 } 1808 #endif /* !NO_PCAP */ 1809 1810 1811 static int 1812 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes) 1813 { 1814 u_int head, rx, n; 1815 uint64_t b = 0; 1816 u_int complete = 0; 1817 1818 if (bytes == NULL) 1819 bytes = &b; 1820 1821 head = ring->head; 1822 n = nm_ring_space(ring); 1823 if (n < limit) 1824 limit = n; 1825 for (rx = 0; rx < limit; rx++) { 1826 struct netmap_slot *slot = &ring->slot[head]; 1827 char *p = NETMAP_BUF(ring, slot->buf_idx); 1828 1829 *bytes += slot->len; 1830 if (dump) 1831 dump_payload(p, slot->len, ring, head); 1832 if (!(slot->flags & NS_MOREFRAG)) 1833 complete++; 1834 1835 head = nm_ring_next(ring, head); 1836 } 1837 ring->head = ring->cur = head; 1838 1839 return (complete); 1840 } 1841 1842 static void * 1843 receiver_body(void *data) 1844 { 1845 struct targ *targ = (struct targ *) data; 1846 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1847 struct netmap_if *nifp; 1848 struct netmap_ring *rxring; 1849 int i; 1850 struct my_ctrs cur; 1851 uint64_t n = targ->g->npackets / targ->g->nthreads; 1852 1853 memset(&cur, 0, sizeof(cur)); 1854 1855 if (setaffinity(targ->thread, targ->affinity)) 1856 goto quit; 1857 1858 D("reading from %s fd %d main_fd %d", 1859 targ->g->ifname, targ->fd, targ->g->main_fd); 1860 /* unbounded wait for the first packet. */ 1861 for (;!targ->cancel;) { 1862 i = poll(&pfd, 1, 1000); 1863 if (i > 0 && !(pfd.revents & POLLERR)) 1864 break; 1865 if (i < 0) { 1866 D("poll() error: %s", strerror(errno)); 1867 goto quit; 1868 } 1869 if (pfd.revents & POLLERR) { 1870 D("fd error"); 1871 goto quit; 1872 } 1873 RD(1, "waiting for initial packets, poll returns %d %d", 1874 i, pfd.revents); 1875 } 1876 /* main loop, exit after 1s silence */ 1877 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1878 if (targ->g->dev_type == DEV_TAP) { 1879 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1880 char buf[MAX_BODYSIZE]; 1881 /* XXX should we poll ? */ 1882 i = read(targ->g->main_fd, buf, sizeof(buf)); 1883 if (i > 0) { 1884 targ->ctr.pkts++; 1885 targ->ctr.bytes += i; 1886 targ->ctr.events++; 1887 } 1888 } 1889 #ifndef NO_PCAP 1890 } else if (targ->g->dev_type == DEV_PCAP) { 1891 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1892 /* XXX should we poll ? */ 1893 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1894 (u_char *)&targ->ctr); 1895 targ->ctr.events++; 1896 } 1897 #endif /* !NO_PCAP */ 1898 } else { 1899 int dump = targ->g->options & OPT_DUMP; 1900 1901 nifp = targ->nmd->nifp; 1902 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1903 /* Once we started to receive packets, wait at most 1 seconds 1904 before quitting. */ 1905 #ifdef BUSYWAIT 1906 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 1907 D("ioctl error on queue %d: %s", targ->me, 1908 strerror(errno)); 1909 goto quit; 1910 } 1911 #else /* !BUSYWAIT */ 1912 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1913 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1914 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1915 goto out; 1916 } 1917 1918 if (pfd.revents & POLLERR) { 1919 D("poll err"); 1920 goto quit; 1921 } 1922 #endif /* !BUSYWAIT */ 1923 uint64_t cur_space = 0; 1924 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1925 int m; 1926 1927 rxring = NETMAP_RXRING(nifp, i); 1928 /* compute free space in the ring */ 1929 m = rxring->head + rxring->num_slots - rxring->tail; 1930 if (m >= (int) rxring->num_slots) 1931 m -= rxring->num_slots; 1932 cur_space += m; 1933 if (nm_ring_empty(rxring)) 1934 continue; 1935 1936 m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes); 1937 cur.pkts += m; 1938 if (m > 0) 1939 cur.events++; 1940 } 1941 cur.min_space = targ->ctr.min_space; 1942 if (cur_space < cur.min_space) 1943 cur.min_space = cur_space; 1944 targ->ctr = cur; 1945 } 1946 } 1947 1948 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1949 1950 #if !defined(BUSYWAIT) 1951 out: 1952 #endif 1953 targ->completed = 1; 1954 targ->ctr = cur; 1955 1956 quit: 1957 /* reset the ``used`` flag. */ 1958 targ->used = 0; 1959 1960 return (NULL); 1961 } 1962 1963 static void * 1964 txseq_body(void *data) 1965 { 1966 struct targ *targ = (struct targ *) data; 1967 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1968 struct netmap_ring *ring; 1969 int64_t sent = 0; 1970 uint64_t event = 0; 1971 int options = targ->g->options | OPT_COPY; 1972 struct timespec nexttime = {0, 0}; 1973 int rate_limit = targ->g->tx_rate; 1974 struct pkt *pkt = &targ->pkt; 1975 int frags = targ->g->frags; 1976 uint32_t sequence = 0; 1977 int budget = 0; 1978 void *frame; 1979 int size; 1980 1981 if (targ->g->nthreads > 1) { 1982 D("can only txseq ping with 1 thread"); 1983 return NULL; 1984 } 1985 1986 if (targ->g->npackets > 0) { 1987 D("Ignoring -n argument"); 1988 } 1989 1990 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1991 size = targ->g->pkt_size + targ->g->virt_header; 1992 1993 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1994 if (setaffinity(targ->thread, targ->affinity)) 1995 goto quit; 1996 1997 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1998 if (rate_limit) { 1999 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 2000 targ->tic.tv_nsec = 0; 2001 wait_time(targ->tic); 2002 nexttime = targ->tic; 2003 } 2004 2005 /* Only use the first queue. */ 2006 ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring); 2007 2008 while (!targ->cancel) { 2009 int64_t limit; 2010 unsigned int space; 2011 unsigned int head; 2012 int fcnt; 2013 uint16_t sum = 0; 2014 int rv; 2015 2016 if (!rate_limit) { 2017 budget = targ->g->burst; 2018 2019 } else if (budget <= 0) { 2020 budget = targ->g->burst; 2021 nexttime = timespec_add(nexttime, targ->g->tx_period); 2022 wait_time(nexttime); 2023 } 2024 2025 /* wait for available room in the send queue */ 2026 #ifdef BUSYWAIT 2027 (void)rv; 2028 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 2029 D("ioctl error on queue %d: %s", targ->me, 2030 strerror(errno)); 2031 goto quit; 2032 } 2033 #else /* !BUSYWAIT */ 2034 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 2035 if (targ->cancel) 2036 break; 2037 D("poll error on queue %d: %s", targ->me, 2038 rv ? strerror(errno) : "timeout"); 2039 // goto quit; 2040 } 2041 if (pfd.revents & POLLERR) { 2042 D("poll error on %d ring %d-%d", pfd.fd, 2043 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 2044 goto quit; 2045 } 2046 #endif /* !BUSYWAIT */ 2047 2048 /* If no room poll() again. */ 2049 space = nm_ring_space(ring); 2050 if (!space) { 2051 continue; 2052 } 2053 2054 limit = budget; 2055 2056 if (space < limit) { 2057 limit = space; 2058 } 2059 2060 /* Cut off ``limit`` to make sure is multiple of ``frags``. */ 2061 if (frags > 1) { 2062 limit = (limit / frags) * frags; 2063 } 2064 2065 limit = sent + limit; /* Convert to absolute. */ 2066 2067 for (fcnt = frags, head = ring->head; 2068 sent < limit; sent++, sequence++) { 2069 struct netmap_slot *slot = &ring->slot[head]; 2070 char *p = NETMAP_BUF(ring, slot->buf_idx); 2071 uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t; 2072 2073 memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum)); 2074 2075 slot->flags = 0; 2076 t = *w; 2077 PKT(pkt, body, targ->g->af)[0] = sequence >> 24; 2078 PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff; 2079 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2080 t = *++w; 2081 PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff; 2082 PKT(pkt, body, targ->g->af)[3] = sequence & 0xff; 2083 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2084 memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum)); 2085 nm_pkt_copy(frame, p, size); 2086 if (fcnt == frags) { 2087 update_addresses(pkt, targ); 2088 } 2089 2090 if (options & OPT_DUMP) { 2091 dump_payload(p, size, ring, head); 2092 } 2093 2094 slot->len = size; 2095 2096 if (--fcnt > 0) { 2097 slot->flags |= NS_MOREFRAG; 2098 } else { 2099 fcnt = frags; 2100 } 2101 2102 if (sent == limit - 1) { 2103 /* Make sure we don't push an incomplete 2104 * packet. */ 2105 assert(!(slot->flags & NS_MOREFRAG)); 2106 slot->flags |= NS_REPORT; 2107 } 2108 2109 head = nm_ring_next(ring, head); 2110 if (rate_limit) { 2111 budget--; 2112 } 2113 } 2114 2115 ring->cur = ring->head = head; 2116 2117 event ++; 2118 targ->ctr.pkts = sent; 2119 targ->ctr.bytes = sent * size; 2120 targ->ctr.events = event; 2121 } 2122 2123 /* flush any remaining packets */ 2124 D("flush tail %d head %d on thread %p", 2125 ring->tail, ring->head, 2126 (void *)pthread_self()); 2127 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2128 2129 /* final part: wait the TX queues to become empty. */ 2130 while (!targ->cancel && nm_tx_pending(ring)) { 2131 RD(5, "pending tx tail %d head %d on ring %d", 2132 ring->tail, ring->head, targ->nmd->first_tx_ring); 2133 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2134 usleep(1); /* wait 1 tick */ 2135 } 2136 2137 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2138 targ->completed = 1; 2139 targ->ctr.pkts = sent; 2140 targ->ctr.bytes = sent * size; 2141 targ->ctr.events = event; 2142 quit: 2143 /* reset the ``used`` flag. */ 2144 targ->used = 0; 2145 2146 return (NULL); 2147 } 2148 2149 2150 static char * 2151 multi_slot_to_string(struct netmap_ring *ring, unsigned int head, 2152 unsigned int nfrags, char *strbuf, size_t strbuflen) 2153 { 2154 unsigned int f; 2155 char *ret = strbuf; 2156 2157 for (f = 0; f < nfrags; f++) { 2158 struct netmap_slot *slot = &ring->slot[head]; 2159 int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len, 2160 slot->flags); 2161 if (m >= (int)strbuflen) { 2162 break; 2163 } 2164 strbuf += m; 2165 strbuflen -= m; 2166 2167 head = nm_ring_next(ring, head); 2168 } 2169 2170 return ret; 2171 } 2172 2173 static void * 2174 rxseq_body(void *data) 2175 { 2176 struct targ *targ = (struct targ *) data; 2177 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 2178 int dump = targ->g->options & OPT_DUMP; 2179 struct netmap_ring *ring; 2180 unsigned int frags_exp = 1; 2181 struct my_ctrs cur; 2182 unsigned int frags = 0; 2183 int first_packet = 1; 2184 int first_slot = 1; 2185 int i, j, af, nrings; 2186 uint32_t seq, *seq_exp = NULL; 2187 2188 memset(&cur, 0, sizeof(cur)); 2189 2190 if (setaffinity(targ->thread, targ->affinity)) 2191 goto quit; 2192 2193 nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1; 2194 seq_exp = calloc(nrings, sizeof(uint32_t)); 2195 if (seq_exp == NULL) { 2196 D("failed to allocate seq array"); 2197 goto quit; 2198 } 2199 2200 D("reading from %s fd %d main_fd %d", 2201 targ->g->ifname, targ->fd, targ->g->main_fd); 2202 /* unbounded wait for the first packet. */ 2203 for (;!targ->cancel;) { 2204 i = poll(&pfd, 1, 1000); 2205 if (i > 0 && !(pfd.revents & POLLERR)) 2206 break; 2207 RD(1, "waiting for initial packets, poll returns %d %d", 2208 i, pfd.revents); 2209 } 2210 2211 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 2212 2213 2214 while (!targ->cancel) { 2215 unsigned int head; 2216 int limit; 2217 2218 #ifdef BUSYWAIT 2219 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 2220 D("ioctl error on queue %d: %s", targ->me, 2221 strerror(errno)); 2222 goto quit; 2223 } 2224 #else /* !BUSYWAIT */ 2225 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 2226 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2227 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 2228 goto out; 2229 } 2230 2231 if (pfd.revents & POLLERR) { 2232 D("poll err"); 2233 goto quit; 2234 } 2235 #endif /* !BUSYWAIT */ 2236 2237 for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) { 2238 ring = NETMAP_RXRING(targ->nmd->nifp, j); 2239 if (nm_ring_empty(ring)) 2240 continue; 2241 2242 limit = nm_ring_space(ring); 2243 if (limit > targ->g->burst) 2244 limit = targ->g->burst; 2245 2246 #if 0 2247 /* Enable this if 2248 * 1) we remove the early-return optimization from 2249 * the netmap poll implementation, or 2250 * 2) pipes get NS_MOREFRAG support. 2251 * With the current netmap implementation, an experiment like 2252 * pkt-gen -i vale:1{1 -f txseq -F 9 2253 * pkt-gen -i vale:1}1 -f rxseq 2254 * would get stuck as soon as we find nm_ring_space(ring) < 9, 2255 * since here limit is rounded to 0 and 2256 * pipe rxsync is not called anymore by the poll() of this loop. 2257 */ 2258 if (frags_exp > 1) { 2259 int o = limit; 2260 /* Cut off to the closest smaller multiple. */ 2261 limit = (limit / frags_exp) * frags_exp; 2262 RD(2, "LIMIT %d --> %d", o, limit); 2263 } 2264 #endif 2265 2266 for (head = ring->head, i = 0; i < limit; i++) { 2267 struct netmap_slot *slot = &ring->slot[head]; 2268 char *p = NETMAP_BUF(ring, slot->buf_idx); 2269 int len = slot->len; 2270 struct pkt *pkt; 2271 2272 if (dump) { 2273 dump_payload(p, slot->len, ring, head); 2274 } 2275 2276 frags++; 2277 if (!(slot->flags & NS_MOREFRAG)) { 2278 if (first_packet) { 2279 first_packet = 0; 2280 } else if (frags != frags_exp) { 2281 char prbuf[512]; 2282 RD(1, "Received packets with %u frags, " 2283 "expected %u, '%s'", frags, frags_exp, 2284 multi_slot_to_string(ring, head-frags+1, 2285 frags, 2286 prbuf, sizeof(prbuf))); 2287 } 2288 first_packet = 0; 2289 frags_exp = frags; 2290 frags = 0; 2291 } 2292 2293 p -= sizeof(pkt->vh) - targ->g->virt_header; 2294 len += sizeof(pkt->vh) - targ->g->virt_header; 2295 pkt = (struct pkt *)p; 2296 if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP) 2297 af = AF_INET; 2298 else 2299 af = AF_INET6; 2300 2301 if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) + 2302 sizeof(seq)) { 2303 RD(1, "%s: packet too small (len=%u)", __func__, 2304 slot->len); 2305 } else { 2306 seq = (PKT(pkt, body, af)[0] << 24) | 2307 (PKT(pkt, body, af)[1] << 16) | 2308 (PKT(pkt, body, af)[2] << 8) | 2309 PKT(pkt, body, af)[3]; 2310 if (first_slot) { 2311 /* Grab the first one, whatever it 2312 is. */ 2313 seq_exp[j] = seq; 2314 first_slot = 0; 2315 } else if (seq != seq_exp[j]) { 2316 uint32_t delta = seq - seq_exp[j]; 2317 2318 if (delta < (0xFFFFFFFF >> 1)) { 2319 RD(2, "Sequence GAP: exp %u found %u", 2320 seq_exp[j], seq); 2321 } else { 2322 RD(2, "Sequence OUT OF ORDER: " 2323 "exp %u found %u", seq_exp[j], seq); 2324 } 2325 seq_exp[j] = seq; 2326 } 2327 seq_exp[j]++; 2328 } 2329 2330 cur.bytes += slot->len; 2331 head = nm_ring_next(ring, head); 2332 cur.pkts++; 2333 } 2334 2335 ring->cur = ring->head = head; 2336 2337 cur.events++; 2338 targ->ctr = cur; 2339 } 2340 } 2341 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2342 2343 #ifndef BUSYWAIT 2344 out: 2345 #endif /* !BUSYWAIT */ 2346 targ->completed = 1; 2347 targ->ctr = cur; 2348 2349 quit: 2350 if (seq_exp != NULL) 2351 free(seq_exp); 2352 /* reset the ``used`` flag. */ 2353 targ->used = 0; 2354 2355 return (NULL); 2356 } 2357 2358 2359 static void 2360 tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg) 2361 { 2362 double bw, raw_bw, pps, abs; 2363 char b1[40], b2[80], b3[80]; 2364 int size; 2365 2366 if (cur->pkts == 0) { 2367 printf("%s nothing.\n", msg); 2368 return; 2369 } 2370 2371 size = (int)(cur->bytes / cur->pkts); 2372 2373 printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n", 2374 msg, 2375 (unsigned long long)cur->pkts, 2376 (unsigned long long)cur->bytes, 2377 (unsigned long long)cur->events, size, delta); 2378 if (delta == 0) 2379 delta = 1e-6; 2380 if (size < 60) /* correct for min packet size */ 2381 size = 60; 2382 pps = cur->pkts / delta; 2383 bw = (8.0 * cur->bytes) / delta; 2384 raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta; 2385 abs = cur->pkts / (double)(cur->events); 2386 2387 printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n", 2388 norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs); 2389 } 2390 2391 static void 2392 usage(int errcode) 2393 { 2394 /* This usage is generated from the pkt-gen man page: 2395 * $ man pkt-gen > x 2396 * and pasted here adding the string terminators and endlines with simple 2397 * regular expressions. */ 2398 const char *cmd = "pkt-gen"; 2399 fprintf(stderr, 2400 "Usage:\n" 2401 "%s arguments\n" 2402 " -h Show program usage and exit.\n" 2403 "\n" 2404 " -i interface\n" 2405 " Name of the network interface that pkt-gen operates on. It can be a system network interface\n" 2406 " (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n" 2407 " monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n" 2408 " mented in netmap(4) (NIOCREGIF section).\n" 2409 "\n" 2410 " -f function\n" 2411 " The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n" 2412 " for client-side ping-pong operation, and pong for server-side ping-pong operation.\n" 2413 "\n" 2414 " -n count\n" 2415 " Number of iterations of the pkt-gen function (with 0 meaning infinite). In case of tx or rx,\n" 2416 " count is the number of packets to receive or transmit. In case of ping or pong, count is the\n" 2417 " number of ping-pong transactions.\n" 2418 "\n" 2419 " -l pkt_size\n" 2420 " Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n" 2421 " equal than the second one and lower than the first one.\n" 2422 "\n" 2423 " -b burst_size\n" 2424 " Transmit or receive up to burst_size packets at a time.\n" 2425 "\n" 2426 " -4 Use IPv4 addresses.\n" 2427 "\n" 2428 " -6 Use IPv6 addresses.\n" 2429 "\n" 2430 " -d dst_ip[:port[-dst_ip:port]]\n" 2431 " Destination IPv4/IPv6 address and port, single or range.\n" 2432 "\n" 2433 " -s src_ip[:port[-src_ip:port]]\n" 2434 " Source IPv4/IPv6 address and port, single or range.\n" 2435 "\n" 2436 " -D dst_mac\n" 2437 " Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n" 2438 "\n" 2439 " -S src_mac\n" 2440 " Source MAC address in colon notation.\n" 2441 "\n" 2442 " -a cpu_id\n" 2443 " Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n" 2444 " threads are used, they are pinned to the subsequent CPUs, one per thread.\n" 2445 "\n" 2446 " -c cpus\n" 2447 " Maximum number of CPUs to use (0 means to use all the available ones).\n" 2448 "\n" 2449 " -p threads\n" 2450 " Number of threads to use. By default, only a single thread is used to handle all the netmap\n" 2451 " rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n" 2452 " single RX ring (in rx mode), or a TX/RX ring pair. The number of threads must be less than or\n" 2453 " equal to the number of TX (or RX) rings available in the device specified by interface.\n" 2454 "\n" 2455 " -T report_ms\n" 2456 " Number of milliseconds between reports.\n" 2457 "\n" 2458 " -w wait_for_link_time\n" 2459 " Number of seconds to wait before starting the pkt-gen function, useful to make sure that the\n" 2460 " network link is up. A network device driver may take some time to enter netmap mode, or to\n" 2461 " create a new transmit/receive ring pair when netmap(4) requests one.\n" 2462 "\n" 2463 " -R rate\n" 2464 " Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n" 2465 " packets as quickly as possible. On servers from 2010 onward netmap(4) is able to com-\n" 2466 " pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n" 2467 " your intention is to saturate the link.\n" 2468 "\n" 2469 " -X Dump payload of each packet transmitted or received.\n" 2470 "\n" 2471 " -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n" 2472 " only used with Virtual Machine technologies that use virtio as a network interface.\n" 2473 "\n" 2474 " -P file\n" 2475 " Load the packet to be transmitted from a pcap file rather than constructing it within\n" 2476 " pkt-gen.\n" 2477 "\n" 2478 " -z Use random IPv4/IPv6 src address/port.\n" 2479 "\n" 2480 " -Z Use random IPv4/IPv6 dst address/port.\n" 2481 "\n" 2482 " -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n" 2483 "\n" 2484 " -F num_frags\n" 2485 " Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n" 2486 " sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n" 2487 " last slot). This is useful to transmit or receive packets larger than the netmap buffer\n" 2488 " size.\n" 2489 "\n" 2490 " -M frag_size\n" 2491 " In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n" 2492 " length divided by num_frags.\n" 2493 "\n" 2494 " -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n" 2495 " by setting the NS_INDIRECT flag in the netmap slots.\n" 2496 "\n" 2497 " -W Exit immediately if all the RX rings are empty the first time they are examined.\n" 2498 "\n" 2499 " -v Increase the verbosity level.\n" 2500 "\n" 2501 " -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n" 2502 " netmap buffers is (rubbish mode).\n" 2503 "\n" 2504 " -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n" 2505 "\n" 2506 " -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n" 2507 " of CRC and 20 bytes of framing to each packet.\n" 2508 "\n" 2509 " -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n" 2510 " Configuration in terms of number of rings and slots to be used when opening the netmap port.\n" 2511 " Such configuration has an effect on software ports created on the fly, such as VALE ports and\n" 2512 " netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n" 2513 " rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n" 2514 " additional convenience, if exactly one number is specified, then this is assigned to both\n" 2515 " tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n" 2516 " tx_rings and rx_rings.\n" 2517 "\n" 2518 " -o options data generation options (parsed using atoi)\n" 2519 " OPT_PREFETCH 1\n" 2520 " OPT_ACCESS 2\n" 2521 " OPT_COPY 4\n" 2522 " OPT_MEMCPY 8\n" 2523 " OPT_TS 16 (add a timestamp)\n" 2524 " OPT_INDIRECT 32 (use indirect buffers)\n" 2525 " OPT_DUMP 64 (dump rx/tx traffic)\n" 2526 " OPT_RUBBISH 256\n" 2527 " (send whatever the buffers contain)\n" 2528 " OPT_RANDOM_SRC 512\n" 2529 " OPT_RANDOM_DST 1024\n" 2530 " OPT_PPS_STATS 2048\n" 2531 "", 2532 cmd); 2533 exit(errcode); 2534 } 2535 2536 static int 2537 start_threads(struct glob_arg *g) { 2538 int i; 2539 2540 targs = calloc(g->nthreads, sizeof(*targs)); 2541 struct targ *t; 2542 /* 2543 * Now create the desired number of threads, each one 2544 * using a single descriptor. 2545 */ 2546 for (i = 0; i < g->nthreads; i++) { 2547 uint64_t seed = (uint64_t)time(0) | ((uint64_t)time(0) << 32); 2548 t = &targs[i]; 2549 2550 bzero(t, sizeof(*t)); 2551 t->fd = -1; /* default, with pcap */ 2552 t->g = g; 2553 memcpy(t->seed, &seed, sizeof(t->seed)); 2554 2555 if (g->dev_type == DEV_NETMAP) { 2556 int m = -1; 2557 2558 /* 2559 * if the user wants both HW and SW rings, we need to 2560 * know when to switch from NR_REG_ONE_NIC to NR_REG_ONE_SW 2561 */ 2562 if (g->orig_mode == NR_REG_NIC_SW) { 2563 m = (g->td_type == TD_TYPE_RECEIVER ? 2564 g->nmd->reg.nr_rx_rings : 2565 g->nmd->reg.nr_tx_rings); 2566 } 2567 2568 if (i > 0) { 2569 int j; 2570 /* the first thread uses the fd opened by the main 2571 * thread, the other threads re-open /dev/netmap 2572 */ 2573 t->nmd = nmport_clone(g->nmd); 2574 if (t->nmd == NULL) 2575 return -1; 2576 2577 j = i; 2578 if (m > 0 && j >= m) { 2579 /* switch to the software rings */ 2580 t->nmd->reg.nr_mode = NR_REG_ONE_SW; 2581 j -= m; 2582 } 2583 t->nmd->reg.nr_ringid = j & NETMAP_RING_MASK; 2584 /* Only touch one of the rings (rx is already ok) */ 2585 if (g->td_type == TD_TYPE_RECEIVER) 2586 t->nmd->reg.nr_flags |= NETMAP_NO_TX_POLL; 2587 2588 /* register interface. Override ifname and ringid etc. */ 2589 if (nmport_open_desc(t->nmd) < 0) { 2590 nmport_undo_prepare(t->nmd); 2591 t->nmd = NULL; 2592 return -1; 2593 } 2594 } else { 2595 t->nmd = g->nmd; 2596 } 2597 t->fd = t->nmd->fd; 2598 t->frags = g->frags; 2599 } else { 2600 targs[i].fd = g->main_fd; 2601 } 2602 t->used = 1; 2603 t->me = i; 2604 if (g->affinity >= 0) { 2605 t->affinity = (g->affinity + i) % g->cpus; 2606 } else { 2607 t->affinity = -1; 2608 } 2609 /* default, init packets */ 2610 initialize_packet(t); 2611 } 2612 /* Wait for PHY reset. */ 2613 D("Wait %d secs for phy reset", g->wait_link); 2614 sleep(g->wait_link); 2615 D("Ready..."); 2616 2617 for (i = 0; i < g->nthreads; i++) { 2618 t = &targs[i]; 2619 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 2620 D("Unable to create thread %d: %s", i, strerror(errno)); 2621 t->used = 0; 2622 } 2623 } 2624 return 0; 2625 } 2626 2627 static void 2628 main_thread(struct glob_arg *g) 2629 { 2630 int i; 2631 2632 struct my_ctrs prev, cur; 2633 double delta_t; 2634 struct timeval tic, toc; 2635 2636 prev.pkts = prev.bytes = prev.events = 0; 2637 gettimeofday(&prev.t, NULL); 2638 for (;;) { 2639 char b1[40], b2[40], b3[40], b4[100]; 2640 uint64_t pps, usec; 2641 struct my_ctrs x; 2642 double abs; 2643 int done = 0; 2644 2645 usec = wait_for_next_report(&prev.t, &cur.t, 2646 g->report_interval); 2647 2648 cur.pkts = cur.bytes = cur.events = 0; 2649 cur.min_space = 0; 2650 if (usec < 10000) /* too short to be meaningful */ 2651 continue; 2652 /* accumulate counts for all threads */ 2653 for (i = 0; i < g->nthreads; i++) { 2654 cur.pkts += targs[i].ctr.pkts; 2655 cur.bytes += targs[i].ctr.bytes; 2656 cur.events += targs[i].ctr.events; 2657 cur.min_space += targs[i].ctr.min_space; 2658 targs[i].ctr.min_space = 99999; 2659 if (targs[i].used == 0) 2660 done++; 2661 } 2662 x.pkts = cur.pkts - prev.pkts; 2663 x.bytes = cur.bytes - prev.bytes; 2664 x.events = cur.events - prev.events; 2665 pps = (x.pkts*1000000 + usec/2) / usec; 2666 abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0; 2667 2668 if (!(g->options & OPT_PPS_STATS)) { 2669 strcpy(b4, ""); 2670 } else { 2671 /* Compute some pps stats using a sliding window. */ 2672 double ppsavg = 0.0, ppsdev = 0.0; 2673 int nsamples = 0; 2674 2675 g->win[g->win_idx] = pps; 2676 g->win_idx = (g->win_idx + 1) % STATS_WIN; 2677 2678 for (i = 0; i < STATS_WIN; i++) { 2679 ppsavg += g->win[i]; 2680 if (g->win[i]) { 2681 nsamples ++; 2682 } 2683 } 2684 ppsavg /= nsamples; 2685 2686 for (i = 0; i < STATS_WIN; i++) { 2687 if (g->win[i] == 0) { 2688 continue; 2689 } 2690 ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg); 2691 } 2692 ppsdev /= nsamples; 2693 ppsdev = sqrt(ppsdev); 2694 2695 snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]", 2696 norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize)); 2697 } 2698 2699 D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space", 2700 norm(b1, pps, normalize), b4, 2701 norm(b2, (double)x.pkts, normalize), 2702 norm(b3, 1000000*((double)x.bytes*8+(double)x.pkts*g->framing)/usec, normalize), 2703 (unsigned long long)usec, 2704 abs, (int)cur.min_space); 2705 prev = cur; 2706 2707 if (done == g->nthreads) 2708 break; 2709 } 2710 2711 timerclear(&tic); 2712 timerclear(&toc); 2713 cur.pkts = cur.bytes = cur.events = 0; 2714 /* final round */ 2715 for (i = 0; i < g->nthreads; i++) { 2716 struct timespec t_tic, t_toc; 2717 /* 2718 * Join active threads, unregister interfaces and close 2719 * file descriptors. 2720 */ 2721 if (targs[i].used) 2722 pthread_join(targs[i].thread, NULL); /* blocking */ 2723 if (g->dev_type == DEV_NETMAP) { 2724 nmport_close(targs[i].nmd); 2725 targs[i].nmd = NULL; 2726 } else { 2727 close(targs[i].fd); 2728 } 2729 2730 if (targs[i].completed == 0) 2731 D("ouch, thread %d exited with error", i); 2732 2733 /* 2734 * Collect threads output and extract information about 2735 * how long it took to send all the packets. 2736 */ 2737 cur.pkts += targs[i].ctr.pkts; 2738 cur.bytes += targs[i].ctr.bytes; 2739 cur.events += targs[i].ctr.events; 2740 /* collect the largest start (tic) and end (toc) times, 2741 * XXX maybe we should do the earliest tic, or do a weighted 2742 * average ? 2743 */ 2744 t_tic = timeval2spec(&tic); 2745 t_toc = timeval2spec(&toc); 2746 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 2747 tic = timespec2val(&targs[i].tic); 2748 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 2749 toc = timespec2val(&targs[i].toc); 2750 } 2751 2752 /* print output. */ 2753 timersub(&toc, &tic, &toc); 2754 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 2755 if (g->td_type == TD_TYPE_SENDER) 2756 tx_output(g, &cur, delta_t, "Sent"); 2757 else if (g->td_type == TD_TYPE_RECEIVER) 2758 tx_output(g, &cur, delta_t, "Received"); 2759 } 2760 2761 struct td_desc { 2762 int ty; 2763 const char *key; 2764 void *f; 2765 int default_burst; 2766 }; 2767 2768 static struct td_desc func[] = { 2769 { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */ 2770 { TD_TYPE_SENDER, "tx", sender_body, 512 }, 2771 { TD_TYPE_OTHER, "ping", ping_body, 1 }, 2772 { TD_TYPE_OTHER, "pong", pong_body, 1 }, 2773 { TD_TYPE_SENDER, "txseq", txseq_body, 512 }, 2774 { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 }, 2775 { 0, NULL, NULL, 0 } 2776 }; 2777 2778 static int 2779 tap_alloc(char *dev) 2780 { 2781 struct ifreq ifr; 2782 int fd, err; 2783 const char *clonedev = TAP_CLONEDEV; 2784 2785 (void)err; 2786 (void)dev; 2787 /* Arguments taken by the function: 2788 * 2789 * char *dev: the name of an interface (or '\0'). MUST have enough 2790 * space to hold the interface name if '\0' is passed 2791 * int flags: interface flags (eg, IFF_TUN etc.) 2792 */ 2793 2794 #ifdef __FreeBSD__ 2795 if (dev[3]) { /* tapSomething */ 2796 static char buf[128]; 2797 snprintf(buf, sizeof(buf), "/dev/%s", dev); 2798 clonedev = buf; 2799 } 2800 #endif 2801 /* open the device */ 2802 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 2803 return fd; 2804 } 2805 D("%s open successful", clonedev); 2806 2807 /* preparation of the struct ifr, of type "struct ifreq" */ 2808 memset(&ifr, 0, sizeof(ifr)); 2809 2810 #ifdef linux 2811 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 2812 2813 if (*dev) { 2814 /* if a device name was specified, put it in the structure; otherwise, 2815 * the kernel will try to allocate the "next" device of the 2816 * specified type */ 2817 size_t len = strlen(dev); 2818 if (len > IFNAMSIZ) { 2819 D("%s too long", dev); 2820 return -1; 2821 } 2822 memcpy(ifr.ifr_name, dev, len); 2823 } 2824 2825 /* try to create the device */ 2826 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 2827 D("failed to do a TUNSETIFF: %s", strerror(errno)); 2828 close(fd); 2829 return err; 2830 } 2831 2832 /* if the operation was successful, write back the name of the 2833 * interface to the variable "dev", so the caller can know 2834 * it. Note that the caller MUST reserve space in *dev (see calling 2835 * code below) */ 2836 strcpy(dev, ifr.ifr_name); 2837 D("new name is %s", dev); 2838 #endif /* linux */ 2839 2840 /* this is the special file descriptor that the caller will use to talk 2841 * with the virtual interface */ 2842 return fd; 2843 } 2844 2845 int 2846 main(int arc, char **argv) 2847 { 2848 int i; 2849 struct sigaction sa; 2850 sigset_t ss; 2851 2852 struct glob_arg g; 2853 2854 int ch; 2855 int devqueues = 1; /* how many device queues */ 2856 int wait_link_arg = 0; 2857 2858 int pkt_size_done = 0; 2859 2860 struct td_desc *fn = func; 2861 2862 bzero(&g, sizeof(g)); 2863 2864 g.main_fd = -1; 2865 g.td_body = fn->f; 2866 g.td_type = fn->ty; 2867 g.report_interval = 1000; /* report interval */ 2868 g.affinity = -1; 2869 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 2870 g.af = AF_INET; /* default */ 2871 g.src_ip.name = "10.0.0.1"; 2872 g.dst_ip.name = "10.1.0.1"; 2873 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 2874 g.src_mac.name = NULL; 2875 g.pkt_size = 60; 2876 g.pkt_min_size = 0; 2877 g.nthreads = 1; 2878 g.cpus = 1; /* default */ 2879 g.forever = 1; 2880 g.tx_rate = 0; 2881 g.frags = 1; 2882 g.frag_size = (u_int)-1; /* use the netmap buffer size by default */ 2883 g.nmr_config = ""; 2884 g.virt_header = 0; 2885 g.wait_link = 2; /* wait 2 seconds for physical ports */ 2886 2887 while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:" 2888 "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) { 2889 2890 switch(ch) { 2891 default: 2892 D("bad option %c %s", ch, optarg); 2893 usage(-1); 2894 break; 2895 2896 case 'h': 2897 usage(0); 2898 break; 2899 2900 case '4': 2901 g.af = AF_INET; 2902 break; 2903 2904 case '6': 2905 g.af = AF_INET6; 2906 break; 2907 2908 case 'N': 2909 normalize = 0; 2910 break; 2911 2912 case 'n': 2913 g.npackets = strtoull(optarg, NULL, 10); 2914 break; 2915 2916 case 'F': 2917 i = atoi(optarg); 2918 if (i < 1 || i > 63) { 2919 D("invalid frags %d [1..63], ignore", i); 2920 break; 2921 } 2922 g.frags = i; 2923 break; 2924 2925 case 'M': 2926 g.frag_size = atoi(optarg); 2927 break; 2928 2929 case 'f': 2930 for (fn = func; fn->key; fn++) { 2931 if (!strcmp(fn->key, optarg)) 2932 break; 2933 } 2934 if (fn->key) { 2935 g.td_body = fn->f; 2936 g.td_type = fn->ty; 2937 } else { 2938 D("unrecognised function %s", optarg); 2939 } 2940 break; 2941 2942 case 'o': /* data generation options */ 2943 g.options |= atoi(optarg); 2944 break; 2945 2946 case 'a': /* force affinity */ 2947 g.affinity = atoi(optarg); 2948 break; 2949 2950 case 'i': /* interface */ 2951 /* a prefix of tap: netmap: or pcap: forces the mode. 2952 * otherwise we guess 2953 */ 2954 D("interface is %s", optarg); 2955 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 2956 D("ifname too long %s", optarg); 2957 break; 2958 } 2959 strcpy(g.ifname, optarg); 2960 if (!strcmp(optarg, "null")) { 2961 g.dev_type = DEV_NETMAP; 2962 g.dummy_send = 1; 2963 } else if (!strncmp(optarg, "tap:", 4)) { 2964 g.dev_type = DEV_TAP; 2965 strcpy(g.ifname, optarg + 4); 2966 } else if (!strncmp(optarg, "pcap:", 5)) { 2967 g.dev_type = DEV_PCAP; 2968 strcpy(g.ifname, optarg + 5); 2969 } else if (!strncmp(optarg, "netmap:", 7) || 2970 !strncmp(optarg, "vale", 4)) { 2971 g.dev_type = DEV_NETMAP; 2972 } else if (!strncmp(optarg, "tap", 3)) { 2973 g.dev_type = DEV_TAP; 2974 } else { /* prepend netmap: */ 2975 g.dev_type = DEV_NETMAP; 2976 sprintf(g.ifname, "netmap:%s", optarg); 2977 } 2978 break; 2979 2980 case 'I': 2981 g.options |= OPT_INDIRECT; /* use indirect buffers */ 2982 break; 2983 2984 case 'l': /* pkt_size */ 2985 if (pkt_size_done) { 2986 g.pkt_min_size = atoi(optarg); 2987 } else { 2988 g.pkt_size = atoi(optarg); 2989 pkt_size_done = 1; 2990 } 2991 break; 2992 2993 case 'd': 2994 g.dst_ip.name = optarg; 2995 break; 2996 2997 case 's': 2998 g.src_ip.name = optarg; 2999 break; 3000 3001 case 'T': /* report interval */ 3002 g.report_interval = atoi(optarg); 3003 break; 3004 3005 case 'w': 3006 g.wait_link = atoi(optarg); 3007 wait_link_arg = 1; 3008 break; 3009 3010 case 'W': 3011 g.forever = 0; /* exit RX with no traffic */ 3012 break; 3013 3014 case 'b': /* burst */ 3015 g.burst = atoi(optarg); 3016 break; 3017 case 'c': 3018 g.cpus = atoi(optarg); 3019 break; 3020 case 'p': 3021 g.nthreads = atoi(optarg); 3022 break; 3023 3024 case 'D': /* destination mac */ 3025 g.dst_mac.name = optarg; 3026 break; 3027 3028 case 'S': /* source mac */ 3029 g.src_mac.name = optarg; 3030 break; 3031 case 'v': 3032 verbose++; 3033 break; 3034 case 'R': 3035 g.tx_rate = atoi(optarg); 3036 break; 3037 case 'X': 3038 g.options |= OPT_DUMP; 3039 break; 3040 case 'C': 3041 D("WARNING: the 'C' option is deprecated, use the '+conf:' libnetmap option instead"); 3042 g.nmr_config = strdup(optarg); 3043 break; 3044 case 'H': 3045 g.virt_header = atoi(optarg); 3046 break; 3047 case 'P': 3048 g.packet_file = strdup(optarg); 3049 break; 3050 case 'r': 3051 g.options |= OPT_RUBBISH; 3052 break; 3053 case 'z': 3054 g.options |= OPT_RANDOM_SRC; 3055 break; 3056 case 'Z': 3057 g.options |= OPT_RANDOM_DST; 3058 break; 3059 case 'A': 3060 g.options |= OPT_PPS_STATS; 3061 break; 3062 case 'B': 3063 /* raw packets have4 bytes crc + 20 bytes framing */ 3064 // XXX maybe add an option to pass the IFG 3065 g.framing = 24 * 8; 3066 break; 3067 } 3068 } 3069 3070 if (strlen(g.ifname) <=0 ) { 3071 D("missing ifname"); 3072 usage(-1); 3073 } 3074 3075 if (g.burst == 0) { 3076 g.burst = fn->default_burst; 3077 D("using default burst size: %d", g.burst); 3078 } 3079 3080 g.system_cpus = i = system_ncpus(); 3081 if (g.cpus < 0 || g.cpus > i) { 3082 D("%d cpus is too high, have only %d cpus", g.cpus, i); 3083 usage(-1); 3084 } 3085 D("running on %d cpus (have %d)", g.cpus, i); 3086 if (g.cpus == 0) 3087 g.cpus = i; 3088 3089 if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) { 3090 g.wait_link = 0; 3091 } 3092 3093 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 3094 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 3095 usage(-1); 3096 } 3097 3098 if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) { 3099 D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size); 3100 usage(-1); 3101 } 3102 3103 if (g.src_mac.name == NULL) { 3104 static char mybuf[20] = "00:00:00:00:00:00"; 3105 /* retrieve source mac address. */ 3106 if (source_hwaddr(g.ifname, mybuf) == -1) { 3107 D("Unable to retrieve source mac"); 3108 // continue, fail later 3109 } 3110 g.src_mac.name = mybuf; 3111 } 3112 /* extract address ranges */ 3113 if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac)) 3114 usage(-1); 3115 g.options |= extract_ip_range(&g.src_ip, g.af); 3116 g.options |= extract_ip_range(&g.dst_ip, g.af); 3117 3118 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 3119 && g.virt_header != VIRT_HDR_2) { 3120 D("bad virtio-net-header length"); 3121 usage(-1); 3122 } 3123 3124 if (g.dev_type == DEV_TAP) { 3125 D("want to use tap %s", g.ifname); 3126 g.main_fd = tap_alloc(g.ifname); 3127 if (g.main_fd < 0) { 3128 D("cannot open tap %s", g.ifname); 3129 usage(-1); 3130 } 3131 #ifndef NO_PCAP 3132 } else if (g.dev_type == DEV_PCAP) { 3133 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 3134 3135 pcap_errbuf[0] = '\0'; // init the buffer 3136 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 3137 if (g.p == NULL) { 3138 D("cannot open pcap on %s", g.ifname); 3139 usage(-1); 3140 } 3141 g.main_fd = pcap_fileno(g.p); 3142 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 3143 #endif /* !NO_PCAP */ 3144 } else if (g.dummy_send) { /* but DEV_NETMAP */ 3145 D("using a dummy send routine"); 3146 } else { 3147 g.nmd = nmport_prepare(g.ifname); 3148 if (g.nmd == NULL) 3149 goto out; 3150 3151 parse_nmr_config(g.nmr_config, &g.nmd->reg); 3152 3153 g.nmd->reg.nr_flags |= NR_ACCEPT_VNET_HDR; 3154 3155 /* 3156 * Open the netmap device using nm_open(). 3157 * 3158 * protocol stack and may cause a reset of the card, 3159 * which in turn may take some time for the PHY to 3160 * reconfigure. We do the open here to have time to reset. 3161 */ 3162 g.orig_mode = g.nmd->reg.nr_mode; 3163 if (g.nthreads > 1) { 3164 switch (g.orig_mode) { 3165 case NR_REG_ALL_NIC: 3166 case NR_REG_NIC_SW: 3167 g.nmd->reg.nr_mode = NR_REG_ONE_NIC; 3168 break; 3169 case NR_REG_SW: 3170 g.nmd->reg.nr_mode = NR_REG_ONE_SW; 3171 break; 3172 default: 3173 break; 3174 } 3175 g.nmd->reg.nr_ringid = 0; 3176 } 3177 if (nmport_open_desc(g.nmd) < 0) 3178 goto out; 3179 g.main_fd = g.nmd->fd; 3180 ND("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10), 3181 g.nmd->mem); 3182 3183 if (g.virt_header) { 3184 /* Set the virtio-net header length, since the user asked 3185 * for it explicitly. */ 3186 set_vnet_hdr_len(&g); 3187 } else { 3188 /* Check whether the netmap port we opened requires us to send 3189 * and receive frames with virtio-net header. */ 3190 get_vnet_hdr_len(&g); 3191 } 3192 3193 /* get num of queues in tx or rx */ 3194 if (g.td_type == TD_TYPE_SENDER) 3195 devqueues = g.nmd->reg.nr_tx_rings + g.nmd->reg.nr_host_tx_rings; 3196 else 3197 devqueues = g.nmd->reg.nr_rx_rings + g.nmd->reg.nr_host_rx_rings; 3198 3199 /* validate provided nthreads. */ 3200 if (g.nthreads < 1 || g.nthreads > devqueues) { 3201 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 3202 // continue, fail later 3203 } 3204 3205 if (g.td_type == TD_TYPE_SENDER) { 3206 int mtu = get_if_mtu(&g); 3207 3208 if (mtu > 0 && g.pkt_size > mtu) { 3209 D("pkt_size (%d) must be <= mtu (%d)", 3210 g.pkt_size, mtu); 3211 return -1; 3212 } 3213 } 3214 3215 if (verbose) { 3216 struct netmap_if *nifp = g.nmd->nifp; 3217 struct nmreq_register *req = &g.nmd->reg; 3218 3219 D("nifp at offset %"PRIu64" ntxqs %d nrxqs %d memid %d", 3220 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 3221 req->nr_mem_id); 3222 for (i = 0; i < req->nr_tx_rings + req->nr_host_tx_rings; i++) { 3223 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 3224 D(" TX%d at offset %p slots %d", i, 3225 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3226 } 3227 for (i = 0; i < req->nr_rx_rings + req->nr_host_rx_rings; i++) { 3228 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 3229 D(" RX%d at offset %p slots %d", i, 3230 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3231 } 3232 } 3233 3234 /* Print some debug information. */ 3235 fprintf(stdout, 3236 "%s %s: %d queues, %d threads and %d cpus.\n", 3237 (g.td_type == TD_TYPE_SENDER) ? "Sending on" : 3238 ((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" : 3239 "Working on"), 3240 g.ifname, 3241 devqueues, 3242 g.nthreads, 3243 g.cpus); 3244 if (g.td_type == TD_TYPE_SENDER) { 3245 fprintf(stdout, "%s -> %s (%s -> %s)\n", 3246 g.src_ip.name, g.dst_ip.name, 3247 g.src_mac.name, g.dst_mac.name); 3248 } 3249 3250 out: 3251 /* Exit if something went wrong. */ 3252 if (g.main_fd < 0) { 3253 D("aborting"); 3254 usage(-1); 3255 } 3256 } 3257 3258 3259 if (g.options) { 3260 D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n", 3261 g.options & OPT_PREFETCH ? " prefetch" : "", 3262 g.options & OPT_ACCESS ? " access" : "", 3263 g.options & OPT_MEMCPY ? " memcpy" : "", 3264 g.options & OPT_INDIRECT ? " indirect" : "", 3265 g.options & OPT_COPY ? " copy" : "", 3266 g.options & OPT_RUBBISH ? " rubbish " : ""); 3267 } 3268 3269 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 3270 if (g.tx_rate > 0) { 3271 /* try to have at least something every second, 3272 * reducing the burst size to some 0.01s worth of data 3273 * (but no less than one full set of fragments) 3274 */ 3275 uint64_t x; 3276 int lim = (g.tx_rate)/300; 3277 if (g.burst > lim) 3278 g.burst = lim; 3279 if (g.burst == 0) 3280 g.burst = 1; 3281 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 3282 g.tx_period.tv_nsec = x; 3283 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 3284 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 3285 } 3286 if (g.td_type == TD_TYPE_SENDER) 3287 D("Sending %d packets every %ld.%09ld s", 3288 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 3289 /* Install ^C handler. */ 3290 global_nthreads = g.nthreads; 3291 sigemptyset(&ss); 3292 sigaddset(&ss, SIGINT); 3293 /* block SIGINT now, so that all created threads will inherit the mask */ 3294 if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) { 3295 D("failed to block SIGINT: %s", strerror(errno)); 3296 } 3297 if (start_threads(&g) < 0) 3298 return 1; 3299 /* Install the handler and re-enable SIGINT for the main thread */ 3300 memset(&sa, 0, sizeof(sa)); 3301 sa.sa_handler = sigint_h; 3302 if (sigaction(SIGINT, &sa, NULL) < 0) { 3303 D("failed to install ^C handler: %s", strerror(errno)); 3304 } 3305 3306 if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) { 3307 D("failed to re-enable SIGINT: %s", strerror(errno)); 3308 } 3309 main_thread(&g); 3310 free(targs); 3311 return 0; 3312 } 3313 3314 /* end of file */ 3315