1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40 #define _GNU_SOURCE /* for CPU_SET() */ 41 #include <stdio.h> 42 #define NETMAP_WITH_LIBS 43 #include <net/netmap_user.h> 44 45 46 #include <ctype.h> // isprint() 47 #include <unistd.h> // sysconf() 48 #include <sys/poll.h> 49 #include <arpa/inet.h> /* ntohs */ 50 #ifndef _WIN32 51 #include <sys/sysctl.h> /* sysctl */ 52 #endif 53 #include <ifaddrs.h> /* getifaddrs */ 54 #include <net/ethernet.h> 55 #include <netinet/in.h> 56 #include <netinet/ip.h> 57 #include <netinet/udp.h> 58 #include <netinet/ip6.h> 59 #ifdef linux 60 #define IPV6_VERSION 0x60 61 #define IPV6_DEFHLIM 64 62 #endif 63 #include <assert.h> 64 #include <math.h> 65 66 #include <pthread.h> 67 68 #ifndef NO_PCAP 69 #include <pcap/pcap.h> 70 #endif 71 72 #include "ctrs.h" 73 74 static void usage(int); 75 76 #ifdef _WIN32 77 #define cpuset_t DWORD_PTR //uint64_t 78 static inline void CPU_ZERO(cpuset_t *p) 79 { 80 *p = 0; 81 } 82 83 static inline void CPU_SET(uint32_t i, cpuset_t *p) 84 { 85 *p |= 1<< (i & 0x3f); 86 } 87 88 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0) 89 #define TAP_CLONEDEV "/dev/tap" 90 #define AF_LINK 18 //defined in winsocks.h 91 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 92 #include <net/if_dl.h> 93 94 /* 95 * Convert an ASCII representation of an ethernet address to 96 * binary form. 97 */ 98 struct ether_addr * 99 ether_aton(const char *a) 100 { 101 int i; 102 static struct ether_addr o; 103 unsigned int o0, o1, o2, o3, o4, o5; 104 105 i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); 106 107 if (i != 6) 108 return (NULL); 109 110 o.octet[0]=o0; 111 o.octet[1]=o1; 112 o.octet[2]=o2; 113 o.octet[3]=o3; 114 o.octet[4]=o4; 115 o.octet[5]=o5; 116 117 return ((struct ether_addr *)&o); 118 } 119 120 /* 121 * Convert a binary representation of an ethernet address to 122 * an ASCII string. 123 */ 124 char * 125 ether_ntoa(const struct ether_addr *n) 126 { 127 int i; 128 static char a[18]; 129 130 i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x", 131 n->octet[0], n->octet[1], n->octet[2], 132 n->octet[3], n->octet[4], n->octet[5]); 133 return (i < 17 ? NULL : (char *)&a); 134 } 135 #endif /* _WIN32 */ 136 137 #ifdef linux 138 139 #define cpuset_t cpu_set_t 140 141 #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 142 #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 143 #include <linux/ethtool.h> 144 #include <linux/sockios.h> 145 146 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 147 #include <netinet/ether.h> /* ether_aton */ 148 #include <linux/if_packet.h> /* sockaddr_ll */ 149 #endif /* linux */ 150 151 #ifdef __FreeBSD__ 152 #include <sys/endian.h> /* le64toh */ 153 #include <machine/param.h> 154 155 #include <pthread_np.h> /* pthread w/ affinity */ 156 #include <sys/cpuset.h> /* cpu_set */ 157 #include <net/if_dl.h> /* LLADDR */ 158 #endif /* __FreeBSD__ */ 159 160 #ifdef __APPLE__ 161 162 #define cpuset_t uint64_t // XXX 163 static inline void CPU_ZERO(cpuset_t *p) 164 { 165 *p = 0; 166 } 167 168 static inline void CPU_SET(uint32_t i, cpuset_t *p) 169 { 170 *p |= 1<< (i & 0x3f); 171 } 172 173 #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 174 175 #define ifr_flagshigh ifr_flags // XXX 176 #define IFF_PPROMISC IFF_PROMISC 177 #include <net/if_dl.h> /* LLADDR */ 178 #define clock_gettime(a,b) \ 179 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 180 #endif /* __APPLE__ */ 181 182 const char *default_payload="netmap pkt-gen DIRECT payload\n" 183 "http://info.iet.unipi.it/~luigi/netmap/ "; 184 185 const char *indirect_payload="netmap pkt-gen indirect payload\n" 186 "http://info.iet.unipi.it/~luigi/netmap/ "; 187 188 int verbose = 0; 189 int normalize = 1; 190 191 #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 192 #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 193 #define VIRT_HDR_MAX VIRT_HDR_2 194 struct virt_header { 195 uint8_t fields[VIRT_HDR_MAX]; 196 }; 197 198 #define MAX_BODYSIZE 65536 199 200 struct pkt { 201 struct virt_header vh; 202 struct ether_header eh; 203 union { 204 struct { 205 struct ip ip; 206 struct udphdr udp; 207 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 208 } ipv4; 209 struct { 210 struct ip6_hdr ip; 211 struct udphdr udp; 212 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 213 } ipv6; 214 }; 215 } __attribute__((__packed__)); 216 217 #define PKT(p, f, af) \ 218 ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f) 219 220 struct ip_range { 221 char *name; 222 union { 223 struct { 224 uint32_t start, end; /* same as struct in_addr */ 225 } ipv4; 226 struct { 227 struct in6_addr start, end; 228 uint8_t sgroup, egroup; 229 } ipv6; 230 }; 231 uint16_t port0, port1; 232 }; 233 234 struct mac_range { 235 char *name; 236 struct ether_addr start, end; 237 }; 238 239 /* ifname can be netmap:foo-xxxx */ 240 #define MAX_IFNAMELEN 64 /* our buffer for ifname */ 241 #define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 242 243 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 244 struct tstamp { 245 uint32_t sec; 246 uint32_t nsec; 247 }; 248 249 /* 250 * global arguments for all threads 251 */ 252 253 struct glob_arg { 254 int af; /* address family AF_INET/AF_INET6 */ 255 struct ip_range src_ip; 256 struct ip_range dst_ip; 257 struct mac_range dst_mac; 258 struct mac_range src_mac; 259 int pkt_size; 260 int pkt_min_size; 261 int burst; 262 int forever; 263 uint64_t npackets; /* total packets to send */ 264 int frags; /* fragments per packet */ 265 u_int frag_size; /* size of each fragment */ 266 int nthreads; 267 int cpus; /* cpus used for running */ 268 int system_cpus; /* cpus on the system */ 269 270 int options; /* testing */ 271 #define OPT_PREFETCH 1 272 #define OPT_ACCESS 2 273 #define OPT_COPY 4 274 #define OPT_MEMCPY 8 275 #define OPT_TS 16 /* add a timestamp */ 276 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 277 #define OPT_DUMP 64 /* dump rx/tx traffic */ 278 #define OPT_RUBBISH 256 /* send wathever the buffers contain */ 279 #define OPT_RANDOM_SRC 512 280 #define OPT_RANDOM_DST 1024 281 #define OPT_PPS_STATS 2048 282 int dev_type; 283 #ifndef NO_PCAP 284 pcap_t *p; 285 #endif 286 287 int tx_rate; 288 struct timespec tx_period; 289 290 int affinity; 291 int main_fd; 292 struct nm_desc *nmd; 293 int report_interval; /* milliseconds between prints */ 294 void *(*td_body)(void *); 295 int td_type; 296 void *mmap_addr; 297 char ifname[MAX_IFNAMELEN]; 298 char *nmr_config; 299 int dummy_send; 300 int virt_header; /* send also the virt_header */ 301 char *packet_file; /* -P option */ 302 #define STATS_WIN 15 303 int win_idx; 304 int64_t win[STATS_WIN]; 305 int wait_link; 306 int framing; /* #bits of framing (for bw output) */ 307 }; 308 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 309 310 enum { 311 TD_TYPE_SENDER = 1, 312 TD_TYPE_RECEIVER, 313 TD_TYPE_OTHER, 314 }; 315 316 /* 317 * Arguments for a new thread. The same structure is used by 318 * the source and the sink 319 */ 320 struct targ { 321 struct glob_arg *g; 322 int used; 323 int completed; 324 int cancel; 325 int fd; 326 struct nm_desc *nmd; 327 /* these ought to be volatile, but they are 328 * only sampled and errors should not accumulate 329 */ 330 struct my_ctrs ctr; 331 332 struct timespec tic, toc; 333 int me; 334 pthread_t thread; 335 int affinity; 336 337 struct pkt pkt; 338 void *frame; 339 uint16_t seed[3]; 340 u_int frags; 341 u_int frag_size; 342 }; 343 344 static __inline uint16_t 345 cksum_add(uint16_t sum, uint16_t a) 346 { 347 uint16_t res; 348 349 res = sum + a; 350 return (res + (res < a)); 351 } 352 353 static void 354 extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port) 355 { 356 struct in_addr a; 357 char *pp; 358 359 pp = strchr(name, ':'); 360 if (pp != NULL) { /* do we have ports ? */ 361 *pp++ = '\0'; 362 *port = (uint16_t)strtol(pp, NULL, 0); 363 } 364 365 inet_pton(AF_INET, name, &a); 366 *addr = ntohl(a.s_addr); 367 } 368 369 static void 370 extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port, 371 uint8_t *group) 372 { 373 char *pp; 374 375 /* 376 * We accept IPv6 address in the following form: 377 * group@[2001:DB8::1001]:port (w/ brackets and port) 378 * group@[2001:DB8::1] (w/ brackets and w/o port) 379 * group@2001:DB8::1234 (w/o brackets and w/o port) 380 */ 381 pp = strchr(name, '@'); 382 if (pp != NULL) { 383 *pp++ = '\0'; 384 *group = (uint8_t)strtol(name, NULL, 0); 385 if (*group > 7) 386 *group = 7; 387 name = pp; 388 } 389 if (name[0] == '[') 390 name++; 391 pp = strchr(name, ']'); 392 if (pp != NULL) 393 *pp++ = '\0'; 394 if (pp != NULL && *pp != ':') 395 pp = NULL; 396 if (pp != NULL) { /* do we have ports ? */ 397 *pp++ = '\0'; 398 *port = (uint16_t)strtol(pp, NULL, 0); 399 } 400 inet_pton(AF_INET6, name, addr); 401 } 402 /* 403 * extract the extremes from a range of ipv4 addresses. 404 * addr_lo[-addr_hi][:port_lo[-port_hi]] 405 */ 406 static int 407 extract_ip_range(struct ip_range *r, int af) 408 { 409 char *name, *ap, start[INET6_ADDRSTRLEN]; 410 char end[INET6_ADDRSTRLEN]; 411 struct in_addr a; 412 uint32_t tmp; 413 414 if (verbose) 415 D("extract IP range from %s", r->name); 416 417 name = strdup(r->name); 418 if (name == NULL) { 419 D("strdup failed"); 420 usage(-1); 421 } 422 /* the first - splits start/end of range */ 423 ap = strchr(name, '-'); 424 if (ap != NULL) 425 *ap++ = '\0'; 426 r->port0 = 1234; /* default port */ 427 if (af == AF_INET6) { 428 r->ipv6.sgroup = 7; /* default group */ 429 extract_ipv6_addr(name, &r->ipv6.start, &r->port0, 430 &r->ipv6.sgroup); 431 } else 432 extract_ipv4_addr(name, &r->ipv4.start, &r->port0); 433 434 r->port1 = r->port0; 435 if (af == AF_INET6) { 436 if (ap != NULL) { 437 r->ipv6.egroup = r->ipv6.sgroup; 438 extract_ipv6_addr(ap, &r->ipv6.end, &r->port1, 439 &r->ipv6.egroup); 440 } else { 441 r->ipv6.end = r->ipv6.start; 442 r->ipv6.egroup = r->ipv6.sgroup; 443 } 444 } else { 445 if (ap != NULL) { 446 extract_ipv4_addr(ap, &r->ipv4.end, &r->port1); 447 if (r->ipv4.start > r->ipv4.end) { 448 tmp = r->ipv4.end; 449 r->ipv4.end = r->ipv4.start; 450 r->ipv4.start = tmp; 451 } 452 } else 453 r->ipv4.end = r->ipv4.start; 454 } 455 456 if (r->port0 > r->port1) { 457 tmp = r->port0; 458 r->port0 = r->port1; 459 r->port1 = tmp; 460 } 461 if (af == AF_INET) { 462 a.s_addr = htonl(r->ipv4.start); 463 inet_ntop(af, &a, start, sizeof(start)); 464 a.s_addr = htonl(r->ipv4.end); 465 inet_ntop(af, &a, end, sizeof(end)); 466 } else { 467 inet_ntop(af, &r->ipv6.start, start, sizeof(start)); 468 inet_ntop(af, &r->ipv6.end, end, sizeof(end)); 469 } 470 if (af == AF_INET) 471 D("range is %s:%d to %s:%d", start, r->port0, end, r->port1); 472 else 473 D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup, 474 start, r->port0, r->ipv6.egroup, end, r->port1); 475 476 free(name); 477 if (r->port0 != r->port1 || 478 (af == AF_INET && r->ipv4.start != r->ipv4.end) || 479 (af == AF_INET6 && 480 !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end))) 481 return (OPT_COPY); 482 return (0); 483 } 484 485 static int 486 extract_mac_range(struct mac_range *r) 487 { 488 struct ether_addr *e; 489 if (verbose) 490 D("extract MAC range from %s", r->name); 491 492 e = ether_aton(r->name); 493 if (e == NULL) { 494 D("invalid MAC address '%s'", r->name); 495 return 1; 496 } 497 bcopy(e, &r->start, 6); 498 bcopy(e, &r->end, 6); 499 #if 0 500 bcopy(targ->src_mac, eh->ether_shost, 6); 501 p = index(targ->g->src_mac, '-'); 502 if (p) 503 targ->src_mac_range = atoi(p+1); 504 505 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 506 bcopy(targ->dst_mac, eh->ether_dhost, 6); 507 p = index(targ->g->dst_mac, '-'); 508 if (p) 509 targ->dst_mac_range = atoi(p+1); 510 #endif 511 if (verbose) 512 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 513 return 0; 514 } 515 516 static int 517 get_if_mtu(const struct glob_arg *g) 518 { 519 char ifname[IFNAMSIZ]; 520 struct ifreq ifreq; 521 int s, ret; 522 523 if (!strncmp(g->ifname, "netmap:", 7) && !strchr(g->ifname, '{') 524 && !strchr(g->ifname, '}')) { 525 /* Parse the interface name and ask the kernel for the 526 * MTU value. */ 527 strncpy(ifname, g->ifname+7, IFNAMSIZ-1); 528 ifname[strcspn(ifname, "-*^{}/@")] = '\0'; 529 530 s = socket(AF_INET, SOCK_DGRAM, 0); 531 if (s < 0) { 532 D("socket() failed: %s", strerror(errno)); 533 return s; 534 } 535 536 memset(&ifreq, 0, sizeof(ifreq)); 537 strncpy(ifreq.ifr_name, ifname, IFNAMSIZ); 538 539 ret = ioctl(s, SIOCGIFMTU, &ifreq); 540 if (ret) { 541 D("ioctl(SIOCGIFMTU) failed: %s", strerror(errno)); 542 } 543 544 return ifreq.ifr_mtu; 545 } 546 547 /* This is a pipe or a VALE port, where the MTU is very large, 548 * so we use some practical limit. */ 549 return 65536; 550 } 551 552 static struct targ *targs; 553 static int global_nthreads; 554 555 /* control-C handler */ 556 static void 557 sigint_h(int sig) 558 { 559 int i; 560 561 (void)sig; /* UNUSED */ 562 D("received control-C on thread %p", (void *)pthread_self()); 563 for (i = 0; i < global_nthreads; i++) { 564 targs[i].cancel = 1; 565 } 566 } 567 568 /* sysctl wrapper to return the number of active CPUs */ 569 static int 570 system_ncpus(void) 571 { 572 int ncpus; 573 #if defined (__FreeBSD__) 574 int mib[2] = { CTL_HW, HW_NCPU }; 575 size_t len = sizeof(mib); 576 sysctl(mib, 2, &ncpus, &len, NULL, 0); 577 #elif defined(linux) 578 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 579 #elif defined(_WIN32) 580 { 581 SYSTEM_INFO sysinfo; 582 GetSystemInfo(&sysinfo); 583 ncpus = sysinfo.dwNumberOfProcessors; 584 } 585 #else /* others */ 586 ncpus = 1; 587 #endif /* others */ 588 return (ncpus); 589 } 590 591 #ifdef __linux__ 592 #define sockaddr_dl sockaddr_ll 593 #define sdl_family sll_family 594 #define AF_LINK AF_PACKET 595 #define LLADDR(s) s->sll_addr; 596 #include <linux/if_tun.h> 597 #define TAP_CLONEDEV "/dev/net/tun" 598 #endif /* __linux__ */ 599 600 #ifdef __FreeBSD__ 601 #include <net/if_tun.h> 602 #define TAP_CLONEDEV "/dev/tap" 603 #endif /* __FreeBSD */ 604 605 #ifdef __APPLE__ 606 // #warning TAP not supported on apple ? 607 #include <net/if_utun.h> 608 #define TAP_CLONEDEV "/dev/tap" 609 #endif /* __APPLE__ */ 610 611 612 /* 613 * parse the vale configuration in conf and put it in nmr. 614 * Return the flag set if necessary. 615 * The configuration may consist of 1 to 4 numbers separated 616 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 617 * Missing numbers or zeroes stand for default values. 618 * As an additional convenience, if exactly one number 619 * is specified, then this is assigned to both #tx-slots and #rx-slots. 620 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 621 * and #rx-rings. 622 */ 623 int 624 parse_nmr_config(const char* conf, struct nmreq *nmr) 625 { 626 char *w, *tok; 627 int i, v; 628 629 if (conf == NULL || ! *conf) 630 return 0; 631 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 632 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 633 w = strdup(conf); 634 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 635 v = atoi(tok); 636 switch (i) { 637 case 0: 638 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 639 break; 640 case 1: 641 nmr->nr_rx_slots = v; 642 break; 643 case 2: 644 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 645 break; 646 case 3: 647 nmr->nr_rx_rings = v; 648 break; 649 default: 650 D("ignored config: %s", tok); 651 break; 652 } 653 } 654 D("txr %d txd %d rxr %d rxd %d", 655 nmr->nr_tx_rings, nmr->nr_tx_slots, 656 nmr->nr_rx_rings, nmr->nr_rx_slots); 657 free(w); 658 return (nmr->nr_tx_rings || nmr->nr_tx_slots || 659 nmr->nr_rx_rings || nmr->nr_rx_slots) ? 660 NM_OPEN_RING_CFG : 0; 661 } 662 663 664 /* 665 * locate the src mac address for our interface, put it 666 * into the user-supplied buffer. return 0 if ok, -1 on error. 667 */ 668 static int 669 source_hwaddr(const char *ifname, char *buf) 670 { 671 struct ifaddrs *ifaphead, *ifap; 672 673 if (getifaddrs(&ifaphead) != 0) { 674 D("getifaddrs %s failed", ifname); 675 return (-1); 676 } 677 678 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 679 struct sockaddr_dl *sdl = 680 (struct sockaddr_dl *)ifap->ifa_addr; 681 uint8_t *mac; 682 683 if (!sdl || sdl->sdl_family != AF_LINK) 684 continue; 685 if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0) 686 continue; 687 mac = (uint8_t *)LLADDR(sdl); 688 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 689 mac[0], mac[1], mac[2], 690 mac[3], mac[4], mac[5]); 691 if (verbose) 692 D("source hwaddr %s", buf); 693 break; 694 } 695 freeifaddrs(ifaphead); 696 return ifap ? 0 : 1; 697 } 698 699 700 /* set the thread affinity. */ 701 static int 702 setaffinity(pthread_t me, int i) 703 { 704 cpuset_t cpumask; 705 706 if (i == -1) 707 return 0; 708 709 /* Set thread affinity affinity.*/ 710 CPU_ZERO(&cpumask); 711 CPU_SET(i, &cpumask); 712 713 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 714 D("Unable to set affinity: %s", strerror(errno)); 715 return 1; 716 } 717 return 0; 718 } 719 720 721 /* Compute the checksum of the given ip header. */ 722 static uint32_t 723 checksum(const void *data, uint16_t len, uint32_t sum) 724 { 725 const uint8_t *addr = data; 726 uint32_t i; 727 728 /* Checksum all the pairs of bytes first... */ 729 for (i = 0; i < (len & ~1U); i += 2) { 730 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 731 if (sum > 0xFFFF) 732 sum -= 0xFFFF; 733 } 734 /* 735 * If there's a single byte left over, checksum it, too. 736 * Network byte order is big-endian, so the remaining byte is 737 * the high byte. 738 */ 739 if (i < len) { 740 sum += addr[i] << 8; 741 if (sum > 0xFFFF) 742 sum -= 0xFFFF; 743 } 744 return sum; 745 } 746 747 static uint16_t 748 wrapsum(uint32_t sum) 749 { 750 sum = ~sum & 0xFFFF; 751 return (htons(sum)); 752 } 753 754 /* Check the payload of the packet for errors (use it for debug). 755 * Look for consecutive ascii representations of the size of the packet. 756 */ 757 static void 758 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur) 759 { 760 char buf[128]; 761 int i, j, i0; 762 const unsigned char *p = (const unsigned char *)_p; 763 764 /* get the length in ASCII of the length of the packet. */ 765 766 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 767 ring, cur, ring->slot[cur].buf_idx, 768 ring->slot[cur].flags, len); 769 /* hexdump routine */ 770 for (i = 0; i < len; ) { 771 memset(buf, ' ', sizeof(buf)); 772 sprintf(buf, "%5d: ", i); 773 i0 = i; 774 for (j=0; j < 16 && i < len; i++, j++) 775 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 776 i = i0; 777 for (j=0; j < 16 && i < len; i++, j++) 778 sprintf(buf+7+j + 48, "%c", 779 isprint(p[i]) ? p[i] : '.'); 780 printf("%s\n", buf); 781 } 782 } 783 784 /* 785 * Fill a packet with some payload. 786 * We create a UDP packet so the payload starts at 787 * 14+20+8 = 42 bytes. 788 */ 789 #ifdef __linux__ 790 #define uh_sport source 791 #define uh_dport dest 792 #define uh_ulen len 793 #define uh_sum check 794 #endif /* linux */ 795 796 static void 797 update_ip(struct pkt *pkt, struct targ *t) 798 { 799 struct glob_arg *g = t->g; 800 struct ip ip; 801 struct udphdr udp; 802 uint32_t oaddr, naddr; 803 uint16_t oport, nport; 804 uint16_t ip_sum, udp_sum; 805 806 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 807 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp)); 808 do { 809 ip_sum = udp_sum = 0; 810 naddr = oaddr = ntohl(ip.ip_src.s_addr); 811 nport = oport = ntohs(udp.uh_sport); 812 if (g->options & OPT_RANDOM_SRC) { 813 ip.ip_src.s_addr = nrand48(t->seed); 814 udp.uh_sport = nrand48(t->seed); 815 naddr = ntohl(ip.ip_src.s_addr); 816 nport = ntohs(udp.uh_sport); 817 break; 818 } 819 if (oport < g->src_ip.port1) { 820 nport = oport + 1; 821 udp.uh_sport = htons(nport); 822 break; 823 } 824 nport = g->src_ip.port0; 825 udp.uh_sport = htons(nport); 826 if (oaddr < g->src_ip.ipv4.end) { 827 naddr = oaddr + 1; 828 ip.ip_src.s_addr = htonl(naddr); 829 break; 830 } 831 naddr = g->src_ip.ipv4.start; 832 ip.ip_src.s_addr = htonl(naddr); 833 } while (0); 834 /* update checksums if needed */ 835 if (oaddr != naddr) { 836 ip_sum = cksum_add(ip_sum, ~oaddr >> 16); 837 ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff); 838 ip_sum = cksum_add(ip_sum, naddr >> 16); 839 ip_sum = cksum_add(ip_sum, naddr & 0xffff); 840 } 841 if (oport != nport) { 842 udp_sum = cksum_add(udp_sum, ~oport); 843 udp_sum = cksum_add(udp_sum, nport); 844 } 845 do { 846 naddr = oaddr = ntohl(ip.ip_dst.s_addr); 847 nport = oport = ntohs(udp.uh_dport); 848 if (g->options & OPT_RANDOM_DST) { 849 ip.ip_dst.s_addr = nrand48(t->seed); 850 udp.uh_dport = nrand48(t->seed); 851 naddr = ntohl(ip.ip_dst.s_addr); 852 nport = ntohs(udp.uh_dport); 853 break; 854 } 855 if (oport < g->dst_ip.port1) { 856 nport = oport + 1; 857 udp.uh_dport = htons(nport); 858 break; 859 } 860 nport = g->dst_ip.port0; 861 udp.uh_dport = htons(nport); 862 if (oaddr < g->dst_ip.ipv4.end) { 863 naddr = oaddr + 1; 864 ip.ip_dst.s_addr = htonl(naddr); 865 break; 866 } 867 naddr = g->dst_ip.ipv4.start; 868 ip.ip_dst.s_addr = htonl(naddr); 869 } while (0); 870 /* update checksums */ 871 if (oaddr != naddr) { 872 ip_sum = cksum_add(ip_sum, ~oaddr >> 16); 873 ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff); 874 ip_sum = cksum_add(ip_sum, naddr >> 16); 875 ip_sum = cksum_add(ip_sum, naddr & 0xffff); 876 } 877 if (oport != nport) { 878 udp_sum = cksum_add(udp_sum, ~oport); 879 udp_sum = cksum_add(udp_sum, nport); 880 } 881 if (udp_sum != 0) 882 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum)); 883 if (ip_sum != 0) { 884 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum)); 885 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum)); 886 } 887 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 888 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp)); 889 } 890 891 #ifndef s6_addr16 892 #define s6_addr16 __u6_addr.__u6_addr16 893 #endif 894 static void 895 update_ip6(struct pkt *pkt, struct targ *t) 896 { 897 struct glob_arg *g = t->g; 898 struct ip6_hdr ip6; 899 struct udphdr udp; 900 uint16_t udp_sum; 901 uint16_t oaddr, naddr; 902 uint16_t oport, nport; 903 uint8_t group; 904 905 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6)); 906 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp)); 907 do { 908 udp_sum = 0; 909 group = g->src_ip.ipv6.sgroup; 910 naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]); 911 nport = oport = ntohs(udp.uh_sport); 912 if (g->options & OPT_RANDOM_SRC) { 913 ip6.ip6_src.s6_addr16[group] = nrand48(t->seed); 914 udp.uh_sport = nrand48(t->seed); 915 naddr = ntohs(ip6.ip6_src.s6_addr16[group]); 916 nport = ntohs(udp.uh_sport); 917 break; 918 } 919 if (oport < g->src_ip.port1) { 920 nport = oport + 1; 921 udp.uh_sport = htons(nport); 922 break; 923 } 924 nport = g->src_ip.port0; 925 udp.uh_sport = htons(nport); 926 if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) { 927 naddr = oaddr + 1; 928 ip6.ip6_src.s6_addr16[group] = htons(naddr); 929 break; 930 } 931 naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]); 932 ip6.ip6_src.s6_addr16[group] = htons(naddr); 933 } while (0); 934 /* update checksums if needed */ 935 if (oaddr != naddr) 936 udp_sum = cksum_add(~oaddr, naddr); 937 if (oport != nport) 938 udp_sum = cksum_add(udp_sum, 939 cksum_add(~oport, nport)); 940 do { 941 group = g->dst_ip.ipv6.egroup; 942 naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 943 nport = oport = ntohs(udp.uh_dport); 944 if (g->options & OPT_RANDOM_DST) { 945 ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed); 946 udp.uh_dport = nrand48(t->seed); 947 naddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 948 nport = ntohs(udp.uh_dport); 949 break; 950 } 951 if (oport < g->dst_ip.port1) { 952 nport = oport + 1; 953 udp.uh_dport = htons(nport); 954 break; 955 } 956 nport = g->dst_ip.port0; 957 udp.uh_dport = htons(nport); 958 if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) { 959 naddr = oaddr + 1; 960 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 961 break; 962 } 963 naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]); 964 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 965 } while (0); 966 /* update checksums */ 967 if (oaddr != naddr) 968 udp_sum = cksum_add(udp_sum, 969 cksum_add(~oaddr, naddr)); 970 if (oport != nport) 971 udp_sum = cksum_add(udp_sum, 972 cksum_add(~oport, nport)); 973 if (udp_sum != 0) 974 udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum); 975 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 976 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp)); 977 } 978 979 static void 980 update_addresses(struct pkt *pkt, struct targ *t) 981 { 982 983 if (t->g->af == AF_INET) 984 update_ip(pkt, t); 985 else 986 update_ip6(pkt, t); 987 } 988 /* 989 * initialize one packet and prepare for the next one. 990 * The copy could be done better instead of repeating it each time. 991 */ 992 static void 993 initialize_packet(struct targ *targ) 994 { 995 struct pkt *pkt = &targ->pkt; 996 struct ether_header *eh; 997 struct ip6_hdr ip6; 998 struct ip ip; 999 struct udphdr udp; 1000 void *udp_ptr; 1001 uint16_t paylen; 1002 uint32_t csum = 0; 1003 const char *payload = targ->g->options & OPT_INDIRECT ? 1004 indirect_payload : default_payload; 1005 int i, l0 = strlen(payload); 1006 1007 #ifndef NO_PCAP 1008 char errbuf[PCAP_ERRBUF_SIZE]; 1009 pcap_t *file; 1010 struct pcap_pkthdr *header; 1011 const unsigned char *packet; 1012 1013 /* Read a packet from a PCAP file if asked. */ 1014 if (targ->g->packet_file != NULL) { 1015 if ((file = pcap_open_offline(targ->g->packet_file, 1016 errbuf)) == NULL) 1017 D("failed to open pcap file %s", 1018 targ->g->packet_file); 1019 if (pcap_next_ex(file, &header, &packet) < 0) 1020 D("failed to read packet from %s", 1021 targ->g->packet_file); 1022 if ((targ->frame = malloc(header->caplen)) == NULL) 1023 D("out of memory"); 1024 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 1025 targ->g->pkt_size = header->caplen; 1026 pcap_close(file); 1027 return; 1028 } 1029 #endif 1030 1031 paylen = targ->g->pkt_size - sizeof(*eh) - 1032 (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6)); 1033 1034 /* create a nice NUL-terminated string */ 1035 for (i = 0; i < paylen; i += l0) { 1036 if (l0 > paylen - i) 1037 l0 = paylen - i; // last round 1038 bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0); 1039 } 1040 PKT(pkt, body, targ->g->af)[i - 1] = '\0'; 1041 1042 /* prepare the headers */ 1043 eh = &pkt->eh; 1044 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 1045 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 1046 1047 if (targ->g->af == AF_INET) { 1048 eh->ether_type = htons(ETHERTYPE_IP); 1049 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 1050 udp_ptr = &pkt->ipv4.udp; 1051 ip.ip_v = IPVERSION; 1052 ip.ip_hl = sizeof(ip) >> 2; 1053 ip.ip_id = 0; 1054 ip.ip_tos = IPTOS_LOWDELAY; 1055 ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh)); 1056 ip.ip_id = 0; 1057 ip.ip_off = htons(IP_DF); /* Don't fragment */ 1058 ip.ip_ttl = IPDEFTTL; 1059 ip.ip_p = IPPROTO_UDP; 1060 ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start); 1061 ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start); 1062 ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0)); 1063 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1064 } else { 1065 eh->ether_type = htons(ETHERTYPE_IPV6); 1066 memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6)); 1067 udp_ptr = &pkt->ipv6.udp; 1068 ip6.ip6_flow = 0; 1069 ip6.ip6_plen = htons(paylen); 1070 ip6.ip6_vfc = IPV6_VERSION; 1071 ip6.ip6_nxt = IPPROTO_UDP; 1072 ip6.ip6_hlim = IPV6_DEFHLIM; 1073 ip6.ip6_src = targ->g->src_ip.ipv6.start; 1074 ip6.ip6_dst = targ->g->dst_ip.ipv6.start; 1075 } 1076 memcpy(&udp, udp_ptr, sizeof(udp)); 1077 1078 udp.uh_sport = htons(targ->g->src_ip.port0); 1079 udp.uh_dport = htons(targ->g->dst_ip.port0); 1080 udp.uh_ulen = htons(paylen); 1081 if (targ->g->af == AF_INET) { 1082 /* Magic: taken from sbin/dhclient/packet.c */ 1083 udp.uh_sum = wrapsum( 1084 checksum(&udp, sizeof(udp), /* udp header */ 1085 checksum(pkt->ipv4.body, /* udp payload */ 1086 paylen - sizeof(udp), 1087 checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */ 1088 2 * sizeof(pkt->ipv4.ip.ip_src), 1089 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen))))); 1090 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1091 } else { 1092 /* Save part of pseudo header checksum into csum */ 1093 csum = IPPROTO_UDP << 24; 1094 csum = checksum(&csum, sizeof(csum), paylen); 1095 udp.uh_sum = wrapsum( 1096 checksum(udp_ptr, sizeof(udp), /* udp header */ 1097 checksum(pkt->ipv6.body, /* udp payload */ 1098 paylen - sizeof(udp), 1099 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */ 1100 2 * sizeof(pkt->ipv6.ip.ip6_src), csum)))); 1101 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 1102 } 1103 memcpy(udp_ptr, &udp, sizeof(udp)); 1104 1105 bzero(&pkt->vh, sizeof(pkt->vh)); 1106 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 1107 } 1108 1109 static void 1110 get_vnet_hdr_len(struct glob_arg *g) 1111 { 1112 struct nmreq req; 1113 int err; 1114 1115 memset(&req, 0, sizeof(req)); 1116 bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name)); 1117 req.nr_version = NETMAP_API; 1118 req.nr_cmd = NETMAP_VNET_HDR_GET; 1119 err = ioctl(g->main_fd, NIOCREGIF, &req); 1120 if (err) { 1121 D("Unable to get virtio-net header length"); 1122 return; 1123 } 1124 1125 g->virt_header = req.nr_arg1; 1126 if (g->virt_header) { 1127 D("Port requires virtio-net header, length = %d", 1128 g->virt_header); 1129 } 1130 } 1131 1132 static void 1133 set_vnet_hdr_len(struct glob_arg *g) 1134 { 1135 int err, l = g->virt_header; 1136 struct nmreq req; 1137 1138 if (l == 0) 1139 return; 1140 1141 memset(&req, 0, sizeof(req)); 1142 bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name)); 1143 req.nr_version = NETMAP_API; 1144 req.nr_cmd = NETMAP_BDG_VNET_HDR; 1145 req.nr_arg1 = l; 1146 err = ioctl(g->main_fd, NIOCREGIF, &req); 1147 if (err) { 1148 D("Unable to set virtio-net header length %d", l); 1149 } 1150 } 1151 1152 /* 1153 * create and enqueue a batch of packets on a ring. 1154 * On the last one set NS_REPORT to tell the driver to generate 1155 * an interrupt when done. 1156 */ 1157 static int 1158 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 1159 int size, struct targ *t, u_int count, int options) 1160 { 1161 u_int n, sent, head = ring->head; 1162 u_int frags = t->frags; 1163 u_int frag_size = t->frag_size; 1164 struct netmap_slot *slot = &ring->slot[head]; 1165 1166 n = nm_ring_space(ring); 1167 #if 0 1168 if (options & (OPT_COPY | OPT_PREFETCH) ) { 1169 for (sent = 0; sent < count; sent++) { 1170 struct netmap_slot *slot = &ring->slot[head]; 1171 char *p = NETMAP_BUF(ring, slot->buf_idx); 1172 1173 __builtin_prefetch(p); 1174 head = nm_ring_next(ring, head); 1175 } 1176 head = ring->head; 1177 } 1178 #endif 1179 for (sent = 0; sent < count && n >= frags; sent++, n--) { 1180 char *p; 1181 int buf_changed; 1182 u_int tosend = size; 1183 1184 slot = &ring->slot[head]; 1185 p = NETMAP_BUF(ring, slot->buf_idx); 1186 buf_changed = slot->flags & NS_BUF_CHANGED; 1187 1188 slot->flags = 0; 1189 if (options & OPT_RUBBISH) { 1190 /* do nothing */ 1191 } else if (options & OPT_INDIRECT) { 1192 slot->flags |= NS_INDIRECT; 1193 slot->ptr = (uint64_t)((uintptr_t)frame); 1194 } else if (frags > 1) { 1195 u_int i; 1196 const char *f = frame; 1197 char *fp = p; 1198 for (i = 0; i < frags - 1; i++) { 1199 memcpy(fp, f, frag_size); 1200 slot->len = frag_size; 1201 slot->flags = NS_MOREFRAG; 1202 if (options & OPT_DUMP) 1203 dump_payload(fp, frag_size, ring, head); 1204 tosend -= frag_size; 1205 f += frag_size; 1206 head = nm_ring_next(ring, head); 1207 slot = &ring->slot[head]; 1208 fp = NETMAP_BUF(ring, slot->buf_idx); 1209 } 1210 n -= (frags - 1); 1211 p = fp; 1212 slot->flags = 0; 1213 memcpy(p, f, tosend); 1214 update_addresses(pkt, t); 1215 } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) { 1216 if (options & OPT_COPY) 1217 nm_pkt_copy(frame, p, size); 1218 else 1219 memcpy(p, frame, size); 1220 update_addresses(pkt, t); 1221 } else if (options & OPT_PREFETCH) { 1222 __builtin_prefetch(p); 1223 } 1224 slot->len = tosend; 1225 if (options & OPT_DUMP) 1226 dump_payload(p, tosend, ring, head); 1227 head = nm_ring_next(ring, head); 1228 } 1229 if (sent) { 1230 slot->flags |= NS_REPORT; 1231 ring->head = ring->cur = head; 1232 } 1233 if (sent < count) { 1234 /* tell netmap that we need more slots */ 1235 ring->cur = ring->tail; 1236 } 1237 1238 return (sent); 1239 } 1240 1241 /* 1242 * Index of the highest bit set 1243 */ 1244 uint32_t 1245 msb64(uint64_t x) 1246 { 1247 uint64_t m = 1ULL << 63; 1248 int i; 1249 1250 for (i = 63; i >= 0; i--, m >>=1) 1251 if (m & x) 1252 return i; 1253 return 0; 1254 } 1255 1256 /* 1257 * wait until ts, either busy or sleeping if more than 1ms. 1258 * Return wakeup time. 1259 */ 1260 static struct timespec 1261 wait_time(struct timespec ts) 1262 { 1263 for (;;) { 1264 struct timespec w, cur; 1265 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1266 w = timespec_sub(ts, cur); 1267 if (w.tv_sec < 0) 1268 return cur; 1269 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1270 poll(NULL, 0, 1); 1271 } 1272 } 1273 1274 /* 1275 * Send a packet, and wait for a response. 1276 * The payload (after UDP header, ofs 42) has a 4-byte sequence 1277 * followed by a struct timeval (or bintime?) 1278 */ 1279 1280 static void * 1281 ping_body(void *data) 1282 { 1283 struct targ *targ = (struct targ *) data; 1284 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1285 struct netmap_if *nifp = targ->nmd->nifp; 1286 int i, m, rx = 0; 1287 void *frame; 1288 int size; 1289 struct timespec ts, now, last_print; 1290 struct timespec nexttime = {0, 0}; /* silence compiler */ 1291 uint64_t sent = 0, n = targ->g->npackets; 1292 uint64_t count = 0, t_cur, t_min = ~0, av = 0; 1293 uint64_t g_min = ~0, g_av = 0; 1294 uint64_t buckets[64]; /* bins for delays, ns */ 1295 int rate_limit = targ->g->tx_rate, tosend = 0; 1296 1297 frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header; 1298 size = targ->g->pkt_size + targ->g->virt_header; 1299 1300 1301 if (targ->g->nthreads > 1) { 1302 D("can only ping with 1 thread"); 1303 return NULL; 1304 } 1305 1306 bzero(&buckets, sizeof(buckets)); 1307 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 1308 now = last_print; 1309 if (rate_limit) { 1310 targ->tic = timespec_add(now, (struct timespec){2,0}); 1311 targ->tic.tv_nsec = 0; 1312 wait_time(targ->tic); 1313 nexttime = targ->tic; 1314 } 1315 while (!targ->cancel && (n == 0 || sent < n)) { 1316 struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1317 struct netmap_slot *slot; 1318 char *p; 1319 int rv; 1320 uint64_t limit, event = 0; 1321 1322 if (rate_limit && tosend <= 0) { 1323 tosend = targ->g->burst; 1324 nexttime = timespec_add(nexttime, targ->g->tx_period); 1325 wait_time(nexttime); 1326 } 1327 1328 limit = rate_limit ? tosend : targ->g->burst; 1329 if (n > 0 && n - sent < limit) 1330 limit = n - sent; 1331 for (m = 0; (unsigned)m < limit; m++) { 1332 slot = &ring->slot[ring->head]; 1333 slot->len = size; 1334 p = NETMAP_BUF(ring, slot->buf_idx); 1335 1336 if (nm_ring_empty(ring)) { 1337 D("-- ouch, cannot send"); 1338 break; 1339 } else { 1340 struct tstamp *tp; 1341 nm_pkt_copy(frame, p, size); 1342 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 1343 bcopy(&sent, p+42, sizeof(sent)); 1344 tp = (struct tstamp *)(p+46); 1345 tp->sec = (uint32_t)ts.tv_sec; 1346 tp->nsec = (uint32_t)ts.tv_nsec; 1347 sent++; 1348 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1349 } 1350 } 1351 if (m > 0) 1352 event++; 1353 targ->ctr.pkts = sent; 1354 targ->ctr.bytes = sent*size; 1355 targ->ctr.events = event; 1356 if (rate_limit) 1357 tosend -= m; 1358 #ifdef BUSYWAIT 1359 rv = ioctl(pfd.fd, NIOCTXSYNC, NULL); 1360 if (rv < 0) { 1361 D("TXSYNC error on queue %d: %s", targ->me, 1362 strerror(errno)); 1363 } 1364 again: 1365 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1366 #else 1367 /* should use a parameter to decide how often to send */ 1368 if ( (rv = poll(&pfd, 1, 3000)) <= 0) { 1369 D("poll error on queue %d: %s", targ->me, 1370 (rv ? strerror(errno) : "timeout")); 1371 continue; 1372 } 1373 #endif /* BUSYWAIT */ 1374 /* see what we got back */ 1375 rx = 0; 1376 for (i = targ->nmd->first_rx_ring; 1377 i <= targ->nmd->last_rx_ring; i++) { 1378 ring = NETMAP_RXRING(nifp, i); 1379 while (!nm_ring_empty(ring)) { 1380 uint32_t seq; 1381 struct tstamp *tp; 1382 int pos; 1383 1384 slot = &ring->slot[ring->head]; 1385 p = NETMAP_BUF(ring, slot->buf_idx); 1386 1387 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 1388 bcopy(p+42, &seq, sizeof(seq)); 1389 tp = (struct tstamp *)(p+46); 1390 ts.tv_sec = (time_t)tp->sec; 1391 ts.tv_nsec = (long)tp->nsec; 1392 ts.tv_sec = now.tv_sec - ts.tv_sec; 1393 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 1394 if (ts.tv_nsec < 0) { 1395 ts.tv_nsec += 1000000000; 1396 ts.tv_sec--; 1397 } 1398 if (0) D("seq %d/%llu delta %d.%09d", seq, 1399 (unsigned long long)sent, 1400 (int)ts.tv_sec, (int)ts.tv_nsec); 1401 t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec; 1402 if (t_cur < t_min) 1403 t_min = t_cur; 1404 count ++; 1405 av += t_cur; 1406 pos = msb64(t_cur); 1407 buckets[pos]++; 1408 /* now store it in a bucket */ 1409 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1410 rx++; 1411 } 1412 } 1413 //D("tx %d rx %d", sent, rx); 1414 //usleep(100000); 1415 ts.tv_sec = now.tv_sec - last_print.tv_sec; 1416 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 1417 if (ts.tv_nsec < 0) { 1418 ts.tv_nsec += 1000000000; 1419 ts.tv_sec--; 1420 } 1421 if (ts.tv_sec >= 1) { 1422 D("count %d RTT: min %d av %d ns", 1423 (int)count, (int)t_min, (int)(av/count)); 1424 int k, j, kmin, off; 1425 char buf[512]; 1426 1427 for (kmin = 0; kmin < 64; kmin ++) 1428 if (buckets[kmin]) 1429 break; 1430 for (k = 63; k >= kmin; k--) 1431 if (buckets[k]) 1432 break; 1433 buf[0] = '\0'; 1434 off = 0; 1435 for (j = kmin; j <= k; j++) { 1436 off += sprintf(buf + off, " %5d", (int)buckets[j]); 1437 } 1438 D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf); 1439 bzero(&buckets, sizeof(buckets)); 1440 count = 0; 1441 g_av += av; 1442 av = 0; 1443 if (t_min < g_min) 1444 g_min = t_min; 1445 t_min = ~0; 1446 last_print = now; 1447 } 1448 #ifdef BUSYWAIT 1449 if (rx < m && ts.tv_sec <= 3 && !targ->cancel) 1450 goto again; 1451 #endif /* BUSYWAIT */ 1452 } 1453 1454 if (sent > 0) { 1455 D("RTT over %llu packets: min %d av %d ns", 1456 (long long unsigned)sent, (int)g_min, 1457 (int)((double)g_av/sent)); 1458 } 1459 targ->completed = 1; 1460 1461 /* reset the ``used`` flag. */ 1462 targ->used = 0; 1463 1464 return NULL; 1465 } 1466 1467 1468 /* 1469 * reply to ping requests 1470 */ 1471 static void * 1472 pong_body(void *data) 1473 { 1474 struct targ *targ = (struct targ *) data; 1475 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1476 struct netmap_if *nifp = targ->nmd->nifp; 1477 struct netmap_ring *txring, *rxring; 1478 int i, rx = 0; 1479 uint64_t sent = 0, n = targ->g->npackets; 1480 1481 if (targ->g->nthreads > 1) { 1482 D("can only reply ping with 1 thread"); 1483 return NULL; 1484 } 1485 if (n > 0) 1486 D("understood ponger %llu but don't know how to do it", 1487 (unsigned long long)n); 1488 while (!targ->cancel && (n == 0 || sent < n)) { 1489 uint32_t txhead, txavail; 1490 //#define BUSYWAIT 1491 #ifdef BUSYWAIT 1492 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1493 #else 1494 int rv; 1495 if ( (rv = poll(&pfd, 1, 1000)) <= 0) { 1496 D("poll error on queue %d: %s", targ->me, 1497 rv ? strerror(errno) : "timeout"); 1498 continue; 1499 } 1500 #endif 1501 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1502 txhead = txring->head; 1503 txavail = nm_ring_space(txring); 1504 /* see what we got back */ 1505 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1506 rxring = NETMAP_RXRING(nifp, i); 1507 while (!nm_ring_empty(rxring)) { 1508 uint16_t *spkt, *dpkt; 1509 uint32_t head = rxring->head; 1510 struct netmap_slot *slot = &rxring->slot[head]; 1511 char *src, *dst; 1512 src = NETMAP_BUF(rxring, slot->buf_idx); 1513 //D("got pkt %p of size %d", src, slot->len); 1514 rxring->head = rxring->cur = nm_ring_next(rxring, head); 1515 rx++; 1516 if (txavail == 0) 1517 continue; 1518 dst = NETMAP_BUF(txring, 1519 txring->slot[txhead].buf_idx); 1520 /* copy... */ 1521 dpkt = (uint16_t *)dst; 1522 spkt = (uint16_t *)src; 1523 nm_pkt_copy(src, dst, slot->len); 1524 /* swap source and destination MAC */ 1525 dpkt[0] = spkt[3]; 1526 dpkt[1] = spkt[4]; 1527 dpkt[2] = spkt[5]; 1528 dpkt[3] = spkt[0]; 1529 dpkt[4] = spkt[1]; 1530 dpkt[5] = spkt[2]; 1531 txring->slot[txhead].len = slot->len; 1532 txhead = nm_ring_next(txring, txhead); 1533 txavail--; 1534 sent++; 1535 } 1536 } 1537 txring->head = txring->cur = txhead; 1538 targ->ctr.pkts = sent; 1539 #ifdef BUSYWAIT 1540 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1541 #endif 1542 //D("tx %d rx %d", sent, rx); 1543 } 1544 1545 targ->completed = 1; 1546 1547 /* reset the ``used`` flag. */ 1548 targ->used = 0; 1549 1550 return NULL; 1551 } 1552 1553 1554 static void * 1555 sender_body(void *data) 1556 { 1557 struct targ *targ = (struct targ *) data; 1558 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1559 struct netmap_if *nifp; 1560 struct netmap_ring *txring = NULL; 1561 int i; 1562 uint64_t n = targ->g->npackets / targ->g->nthreads; 1563 uint64_t sent = 0; 1564 uint64_t event = 0; 1565 int options = targ->g->options | OPT_COPY; 1566 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1567 int rate_limit = targ->g->tx_rate; 1568 struct pkt *pkt = &targ->pkt; 1569 void *frame; 1570 int size; 1571 1572 if (targ->frame == NULL) { 1573 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1574 size = targ->g->pkt_size + targ->g->virt_header; 1575 } else { 1576 frame = targ->frame; 1577 size = targ->g->pkt_size; 1578 } 1579 1580 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1581 if (setaffinity(targ->thread, targ->affinity)) 1582 goto quit; 1583 1584 /* main loop.*/ 1585 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1586 if (rate_limit) { 1587 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1588 targ->tic.tv_nsec = 0; 1589 wait_time(targ->tic); 1590 nexttime = targ->tic; 1591 } 1592 if (targ->g->dev_type == DEV_TAP) { 1593 D("writing to file desc %d", targ->g->main_fd); 1594 1595 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1596 if (write(targ->g->main_fd, frame, size) != -1) 1597 sent++; 1598 update_addresses(pkt, targ); 1599 if (i > 10000) { 1600 targ->ctr.pkts = sent; 1601 targ->ctr.bytes = sent*size; 1602 targ->ctr.events = sent; 1603 i = 0; 1604 } 1605 } 1606 #ifndef NO_PCAP 1607 } else if (targ->g->dev_type == DEV_PCAP) { 1608 pcap_t *p = targ->g->p; 1609 1610 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1611 if (pcap_inject(p, frame, size) != -1) 1612 sent++; 1613 update_addresses(pkt, targ); 1614 if (i > 10000) { 1615 targ->ctr.pkts = sent; 1616 targ->ctr.bytes = sent*size; 1617 targ->ctr.events = sent; 1618 i = 0; 1619 } 1620 } 1621 #endif /* NO_PCAP */ 1622 } else { 1623 int tosend = 0; 1624 u_int bufsz, frag_size = targ->g->frag_size; 1625 1626 nifp = targ->nmd->nifp; 1627 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1628 bufsz = txring->nr_buf_size; 1629 if (bufsz < frag_size) 1630 frag_size = bufsz; 1631 targ->frag_size = targ->g->pkt_size / targ->frags; 1632 if (targ->frag_size > frag_size) { 1633 targ->frags = targ->g->pkt_size / frag_size; 1634 targ->frag_size = frag_size; 1635 if (targ->g->pkt_size % frag_size != 0) 1636 targ->frags++; 1637 } 1638 D("frags %u frag_size %u", targ->frags, targ->frag_size); 1639 while (!targ->cancel && (n == 0 || sent < n)) { 1640 int rv; 1641 1642 if (rate_limit && tosend <= 0) { 1643 tosend = targ->g->burst; 1644 nexttime = timespec_add(nexttime, targ->g->tx_period); 1645 wait_time(nexttime); 1646 } 1647 1648 /* 1649 * wait for available room in the send queue(s) 1650 */ 1651 #ifdef BUSYWAIT 1652 (void)rv; 1653 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 1654 D("ioctl error on queue %d: %s", targ->me, 1655 strerror(errno)); 1656 goto quit; 1657 } 1658 #else /* !BUSYWAIT */ 1659 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 1660 if (targ->cancel) 1661 break; 1662 D("poll error on queue %d: %s", targ->me, 1663 rv ? strerror(errno) : "timeout"); 1664 // goto quit; 1665 } 1666 if (pfd.revents & POLLERR) { 1667 D("poll error on %d ring %d-%d", pfd.fd, 1668 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1669 goto quit; 1670 } 1671 #endif /* !BUSYWAIT */ 1672 /* 1673 * scan our queues and send on those with room 1674 */ 1675 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1676 D("drop copy"); 1677 options &= ~OPT_COPY; 1678 } 1679 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1680 int m; 1681 uint64_t limit = rate_limit ? tosend : targ->g->burst; 1682 1683 if (n > 0 && n == sent) 1684 break; 1685 1686 if (n > 0 && n - sent < limit) 1687 limit = n - sent; 1688 txring = NETMAP_TXRING(nifp, i); 1689 if (nm_ring_empty(txring)) 1690 continue; 1691 1692 if (targ->g->pkt_min_size > 0) { 1693 size = nrand48(targ->seed) % 1694 (targ->g->pkt_size - targ->g->pkt_min_size) + 1695 targ->g->pkt_min_size; 1696 } 1697 m = send_packets(txring, pkt, frame, size, targ, 1698 limit, options); 1699 ND("limit %lu tail %d m %d", 1700 limit, txring->tail, m); 1701 sent += m; 1702 if (m > 0) //XXX-ste: can m be 0? 1703 event++; 1704 targ->ctr.pkts = sent; 1705 targ->ctr.bytes += m*size; 1706 targ->ctr.events = event; 1707 if (rate_limit) { 1708 tosend -= m; 1709 if (tosend <= 0) 1710 break; 1711 } 1712 } 1713 } 1714 /* flush any remaining packets */ 1715 if (txring != NULL) { 1716 D("flush tail %d head %d on thread %p", 1717 txring->tail, txring->head, 1718 (void *)pthread_self()); 1719 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1720 } 1721 1722 /* final part: wait all the TX queues to be empty. */ 1723 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1724 txring = NETMAP_TXRING(nifp, i); 1725 while (!targ->cancel && nm_tx_pending(txring)) { 1726 RD(5, "pending tx tail %d head %d on ring %d", 1727 txring->tail, txring->head, i); 1728 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1729 usleep(1); /* wait 1 tick */ 1730 } 1731 } 1732 } /* end DEV_NETMAP */ 1733 1734 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1735 targ->completed = 1; 1736 targ->ctr.pkts = sent; 1737 targ->ctr.bytes = sent*size; 1738 targ->ctr.events = event; 1739 quit: 1740 /* reset the ``used`` flag. */ 1741 targ->used = 0; 1742 1743 return (NULL); 1744 } 1745 1746 1747 #ifndef NO_PCAP 1748 static void 1749 receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1750 const u_char * bytes) 1751 { 1752 struct my_ctrs *ctr = (struct my_ctrs *)user; 1753 (void)bytes; /* UNUSED */ 1754 ctr->bytes += h->len; 1755 ctr->pkts++; 1756 } 1757 #endif /* !NO_PCAP */ 1758 1759 1760 static int 1761 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes) 1762 { 1763 u_int head, rx, n; 1764 uint64_t b = 0; 1765 u_int complete = 0; 1766 1767 if (bytes == NULL) 1768 bytes = &b; 1769 1770 head = ring->head; 1771 n = nm_ring_space(ring); 1772 if (n < limit) 1773 limit = n; 1774 for (rx = 0; rx < limit; rx++) { 1775 struct netmap_slot *slot = &ring->slot[head]; 1776 char *p = NETMAP_BUF(ring, slot->buf_idx); 1777 1778 *bytes += slot->len; 1779 if (dump) 1780 dump_payload(p, slot->len, ring, head); 1781 if (!(slot->flags & NS_MOREFRAG)) 1782 complete++; 1783 1784 head = nm_ring_next(ring, head); 1785 } 1786 ring->head = ring->cur = head; 1787 1788 return (complete); 1789 } 1790 1791 static void * 1792 receiver_body(void *data) 1793 { 1794 struct targ *targ = (struct targ *) data; 1795 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1796 struct netmap_if *nifp; 1797 struct netmap_ring *rxring; 1798 int i; 1799 struct my_ctrs cur; 1800 1801 memset(&cur, 0, sizeof(cur)); 1802 1803 if (setaffinity(targ->thread, targ->affinity)) 1804 goto quit; 1805 1806 D("reading from %s fd %d main_fd %d", 1807 targ->g->ifname, targ->fd, targ->g->main_fd); 1808 /* unbounded wait for the first packet. */ 1809 for (;!targ->cancel;) { 1810 i = poll(&pfd, 1, 1000); 1811 if (i > 0 && !(pfd.revents & POLLERR)) 1812 break; 1813 if (i < 0) { 1814 D("poll() error: %s", strerror(errno)); 1815 goto quit; 1816 } 1817 if (pfd.revents & POLLERR) { 1818 D("fd error"); 1819 goto quit; 1820 } 1821 RD(1, "waiting for initial packets, poll returns %d %d", 1822 i, pfd.revents); 1823 } 1824 /* main loop, exit after 1s silence */ 1825 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1826 if (targ->g->dev_type == DEV_TAP) { 1827 while (!targ->cancel) { 1828 char buf[MAX_BODYSIZE]; 1829 /* XXX should we poll ? */ 1830 i = read(targ->g->main_fd, buf, sizeof(buf)); 1831 if (i > 0) { 1832 targ->ctr.pkts++; 1833 targ->ctr.bytes += i; 1834 targ->ctr.events++; 1835 } 1836 } 1837 #ifndef NO_PCAP 1838 } else if (targ->g->dev_type == DEV_PCAP) { 1839 while (!targ->cancel) { 1840 /* XXX should we poll ? */ 1841 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1842 (u_char *)&targ->ctr); 1843 targ->ctr.events++; 1844 } 1845 #endif /* !NO_PCAP */ 1846 } else { 1847 int dump = targ->g->options & OPT_DUMP; 1848 1849 nifp = targ->nmd->nifp; 1850 while (!targ->cancel) { 1851 /* Once we started to receive packets, wait at most 1 seconds 1852 before quitting. */ 1853 #ifdef BUSYWAIT 1854 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 1855 D("ioctl error on queue %d: %s", targ->me, 1856 strerror(errno)); 1857 goto quit; 1858 } 1859 #else /* !BUSYWAIT */ 1860 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1861 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1862 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1863 goto out; 1864 } 1865 1866 if (pfd.revents & POLLERR) { 1867 D("poll err"); 1868 goto quit; 1869 } 1870 #endif /* !BUSYWAIT */ 1871 uint64_t cur_space = 0; 1872 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1873 int m; 1874 1875 rxring = NETMAP_RXRING(nifp, i); 1876 /* compute free space in the ring */ 1877 m = rxring->head + rxring->num_slots - rxring->tail; 1878 if (m >= (int) rxring->num_slots) 1879 m -= rxring->num_slots; 1880 cur_space += m; 1881 if (nm_ring_empty(rxring)) 1882 continue; 1883 1884 m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes); 1885 cur.pkts += m; 1886 if (m > 0) 1887 cur.events++; 1888 } 1889 cur.min_space = targ->ctr.min_space; 1890 if (cur_space < cur.min_space) 1891 cur.min_space = cur_space; 1892 targ->ctr = cur; 1893 } 1894 } 1895 1896 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1897 1898 #if !defined(BUSYWAIT) 1899 out: 1900 #endif 1901 targ->completed = 1; 1902 targ->ctr = cur; 1903 1904 quit: 1905 /* reset the ``used`` flag. */ 1906 targ->used = 0; 1907 1908 return (NULL); 1909 } 1910 1911 static void * 1912 txseq_body(void *data) 1913 { 1914 struct targ *targ = (struct targ *) data; 1915 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1916 struct netmap_ring *ring; 1917 int64_t sent = 0; 1918 uint64_t event = 0; 1919 int options = targ->g->options | OPT_COPY; 1920 struct timespec nexttime = {0, 0}; 1921 int rate_limit = targ->g->tx_rate; 1922 struct pkt *pkt = &targ->pkt; 1923 int frags = targ->g->frags; 1924 uint32_t sequence = 0; 1925 int budget = 0; 1926 void *frame; 1927 int size; 1928 1929 if (targ->g->nthreads > 1) { 1930 D("can only txseq ping with 1 thread"); 1931 return NULL; 1932 } 1933 1934 if (targ->g->npackets > 0) { 1935 D("Ignoring -n argument"); 1936 } 1937 1938 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1939 size = targ->g->pkt_size + targ->g->virt_header; 1940 1941 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1942 if (setaffinity(targ->thread, targ->affinity)) 1943 goto quit; 1944 1945 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1946 if (rate_limit) { 1947 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1948 targ->tic.tv_nsec = 0; 1949 wait_time(targ->tic); 1950 nexttime = targ->tic; 1951 } 1952 1953 /* Only use the first queue. */ 1954 ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring); 1955 1956 while (!targ->cancel) { 1957 int64_t limit; 1958 unsigned int space; 1959 unsigned int head; 1960 int fcnt; 1961 uint16_t sum = 0; 1962 int rv; 1963 1964 if (!rate_limit) { 1965 budget = targ->g->burst; 1966 1967 } else if (budget <= 0) { 1968 budget = targ->g->burst; 1969 nexttime = timespec_add(nexttime, targ->g->tx_period); 1970 wait_time(nexttime); 1971 } 1972 1973 /* wait for available room in the send queue */ 1974 #ifdef BUSYWAIT 1975 (void)rv; 1976 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 1977 D("ioctl error on queue %d: %s", targ->me, 1978 strerror(errno)); 1979 goto quit; 1980 } 1981 #else /* !BUSYWAIT */ 1982 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 1983 if (targ->cancel) 1984 break; 1985 D("poll error on queue %d: %s", targ->me, 1986 rv ? strerror(errno) : "timeout"); 1987 // goto quit; 1988 } 1989 if (pfd.revents & POLLERR) { 1990 D("poll error on %d ring %d-%d", pfd.fd, 1991 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1992 goto quit; 1993 } 1994 #endif /* !BUSYWAIT */ 1995 1996 /* If no room poll() again. */ 1997 space = nm_ring_space(ring); 1998 if (!space) { 1999 continue; 2000 } 2001 2002 limit = budget; 2003 2004 if (space < limit) { 2005 limit = space; 2006 } 2007 2008 /* Cut off ``limit`` to make sure is multiple of ``frags``. */ 2009 if (frags > 1) { 2010 limit = (limit / frags) * frags; 2011 } 2012 2013 limit = sent + limit; /* Convert to absolute. */ 2014 2015 for (fcnt = frags, head = ring->head; 2016 sent < limit; sent++, sequence++) { 2017 struct netmap_slot *slot = &ring->slot[head]; 2018 char *p = NETMAP_BUF(ring, slot->buf_idx); 2019 uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t; 2020 2021 memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum)); 2022 2023 slot->flags = 0; 2024 t = *w; 2025 PKT(pkt, body, targ->g->af)[0] = sequence >> 24; 2026 PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff; 2027 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2028 t = *++w; 2029 PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff; 2030 PKT(pkt, body, targ->g->af)[3] = sequence & 0xff; 2031 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2032 memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum)); 2033 nm_pkt_copy(frame, p, size); 2034 if (fcnt == frags) { 2035 update_addresses(pkt, targ); 2036 } 2037 2038 if (options & OPT_DUMP) { 2039 dump_payload(p, size, ring, head); 2040 } 2041 2042 slot->len = size; 2043 2044 if (--fcnt > 0) { 2045 slot->flags |= NS_MOREFRAG; 2046 } else { 2047 fcnt = frags; 2048 } 2049 2050 if (sent == limit - 1) { 2051 /* Make sure we don't push an incomplete 2052 * packet. */ 2053 assert(!(slot->flags & NS_MOREFRAG)); 2054 slot->flags |= NS_REPORT; 2055 } 2056 2057 head = nm_ring_next(ring, head); 2058 if (rate_limit) { 2059 budget--; 2060 } 2061 } 2062 2063 ring->cur = ring->head = head; 2064 2065 event ++; 2066 targ->ctr.pkts = sent; 2067 targ->ctr.bytes = sent * size; 2068 targ->ctr.events = event; 2069 } 2070 2071 /* flush any remaining packets */ 2072 D("flush tail %d head %d on thread %p", 2073 ring->tail, ring->head, 2074 (void *)pthread_self()); 2075 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2076 2077 /* final part: wait the TX queues to become empty. */ 2078 while (!targ->cancel && nm_tx_pending(ring)) { 2079 RD(5, "pending tx tail %d head %d on ring %d", 2080 ring->tail, ring->head, targ->nmd->first_tx_ring); 2081 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2082 usleep(1); /* wait 1 tick */ 2083 } 2084 2085 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2086 targ->completed = 1; 2087 targ->ctr.pkts = sent; 2088 targ->ctr.bytes = sent * size; 2089 targ->ctr.events = event; 2090 quit: 2091 /* reset the ``used`` flag. */ 2092 targ->used = 0; 2093 2094 return (NULL); 2095 } 2096 2097 2098 static char * 2099 multi_slot_to_string(struct netmap_ring *ring, unsigned int head, 2100 unsigned int nfrags, char *strbuf, size_t strbuflen) 2101 { 2102 unsigned int f; 2103 char *ret = strbuf; 2104 2105 for (f = 0; f < nfrags; f++) { 2106 struct netmap_slot *slot = &ring->slot[head]; 2107 int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len, 2108 slot->flags); 2109 if (m >= (int)strbuflen) { 2110 break; 2111 } 2112 strbuf += m; 2113 strbuflen -= m; 2114 2115 head = nm_ring_next(ring, head); 2116 } 2117 2118 return ret; 2119 } 2120 2121 static void * 2122 rxseq_body(void *data) 2123 { 2124 struct targ *targ = (struct targ *) data; 2125 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 2126 int dump = targ->g->options & OPT_DUMP; 2127 struct netmap_ring *ring; 2128 unsigned int frags_exp = 1; 2129 struct my_ctrs cur; 2130 unsigned int frags = 0; 2131 int first_packet = 1; 2132 int first_slot = 1; 2133 int i, j, af, nrings; 2134 uint32_t seq, *seq_exp = NULL; 2135 2136 memset(&cur, 0, sizeof(cur)); 2137 2138 if (setaffinity(targ->thread, targ->affinity)) 2139 goto quit; 2140 2141 nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1; 2142 seq_exp = calloc(nrings, sizeof(uint32_t)); 2143 if (seq_exp == NULL) { 2144 D("failed to allocate seq array"); 2145 goto quit; 2146 } 2147 2148 D("reading from %s fd %d main_fd %d", 2149 targ->g->ifname, targ->fd, targ->g->main_fd); 2150 /* unbounded wait for the first packet. */ 2151 for (;!targ->cancel;) { 2152 i = poll(&pfd, 1, 1000); 2153 if (i > 0 && !(pfd.revents & POLLERR)) 2154 break; 2155 RD(1, "waiting for initial packets, poll returns %d %d", 2156 i, pfd.revents); 2157 } 2158 2159 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 2160 2161 2162 while (!targ->cancel) { 2163 unsigned int head; 2164 int limit; 2165 2166 #ifdef BUSYWAIT 2167 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 2168 D("ioctl error on queue %d: %s", targ->me, 2169 strerror(errno)); 2170 goto quit; 2171 } 2172 #else /* !BUSYWAIT */ 2173 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 2174 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2175 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 2176 goto out; 2177 } 2178 2179 if (pfd.revents & POLLERR) { 2180 D("poll err"); 2181 goto quit; 2182 } 2183 #endif /* !BUSYWAIT */ 2184 2185 for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) { 2186 ring = NETMAP_RXRING(targ->nmd->nifp, j); 2187 if (nm_ring_empty(ring)) 2188 continue; 2189 2190 limit = nm_ring_space(ring); 2191 if (limit > targ->g->burst) 2192 limit = targ->g->burst; 2193 2194 #if 0 2195 /* Enable this if 2196 * 1) we remove the early-return optimization from 2197 * the netmap poll implementation, or 2198 * 2) pipes get NS_MOREFRAG support. 2199 * With the current netmap implementation, an experiment like 2200 * pkt-gen -i vale:1{1 -f txseq -F 9 2201 * pkt-gen -i vale:1}1 -f rxseq 2202 * would get stuck as soon as we find nm_ring_space(ring) < 9, 2203 * since here limit is rounded to 0 and 2204 * pipe rxsync is not called anymore by the poll() of this loop. 2205 */ 2206 if (frags_exp > 1) { 2207 int o = limit; 2208 /* Cut off to the closest smaller multiple. */ 2209 limit = (limit / frags_exp) * frags_exp; 2210 RD(2, "LIMIT %d --> %d", o, limit); 2211 } 2212 #endif 2213 2214 for (head = ring->head, i = 0; i < limit; i++) { 2215 struct netmap_slot *slot = &ring->slot[head]; 2216 char *p = NETMAP_BUF(ring, slot->buf_idx); 2217 int len = slot->len; 2218 struct pkt *pkt; 2219 2220 if (dump) { 2221 dump_payload(p, slot->len, ring, head); 2222 } 2223 2224 frags++; 2225 if (!(slot->flags & NS_MOREFRAG)) { 2226 if (first_packet) { 2227 first_packet = 0; 2228 } else if (frags != frags_exp) { 2229 char prbuf[512]; 2230 RD(1, "Received packets with %u frags, " 2231 "expected %u, '%s'", frags, frags_exp, 2232 multi_slot_to_string(ring, head-frags+1, 2233 frags, 2234 prbuf, sizeof(prbuf))); 2235 } 2236 first_packet = 0; 2237 frags_exp = frags; 2238 frags = 0; 2239 } 2240 2241 p -= sizeof(pkt->vh) - targ->g->virt_header; 2242 len += sizeof(pkt->vh) - targ->g->virt_header; 2243 pkt = (struct pkt *)p; 2244 if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP) 2245 af = AF_INET; 2246 else 2247 af = AF_INET6; 2248 2249 if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) + 2250 sizeof(seq)) { 2251 RD(1, "%s: packet too small (len=%u)", __func__, 2252 slot->len); 2253 } else { 2254 seq = (PKT(pkt, body, af)[0] << 24) | 2255 (PKT(pkt, body, af)[1] << 16) | 2256 (PKT(pkt, body, af)[2] << 8) | 2257 PKT(pkt, body, af)[3]; 2258 if (first_slot) { 2259 /* Grab the first one, whatever it 2260 is. */ 2261 seq_exp[j] = seq; 2262 first_slot = 0; 2263 } else if (seq != seq_exp[j]) { 2264 uint32_t delta = seq - seq_exp[j]; 2265 2266 if (delta < (0xFFFFFFFF >> 1)) { 2267 RD(2, "Sequence GAP: exp %u found %u", 2268 seq_exp[j], seq); 2269 } else { 2270 RD(2, "Sequence OUT OF ORDER: " 2271 "exp %u found %u", seq_exp[j], seq); 2272 } 2273 seq_exp[j] = seq; 2274 } 2275 seq_exp[j]++; 2276 } 2277 2278 cur.bytes += slot->len; 2279 head = nm_ring_next(ring, head); 2280 cur.pkts++; 2281 } 2282 2283 ring->cur = ring->head = head; 2284 2285 cur.events++; 2286 targ->ctr = cur; 2287 } 2288 } 2289 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2290 2291 #ifndef BUSYWAIT 2292 out: 2293 #endif /* !BUSYWAIT */ 2294 targ->completed = 1; 2295 targ->ctr = cur; 2296 2297 quit: 2298 if (seq_exp != NULL) 2299 free(seq_exp); 2300 /* reset the ``used`` flag. */ 2301 targ->used = 0; 2302 2303 return (NULL); 2304 } 2305 2306 2307 static void 2308 tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg) 2309 { 2310 double bw, raw_bw, pps, abs; 2311 char b1[40], b2[80], b3[80]; 2312 int size; 2313 2314 if (cur->pkts == 0) { 2315 printf("%s nothing.\n", msg); 2316 return; 2317 } 2318 2319 size = (int)(cur->bytes / cur->pkts); 2320 2321 printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n", 2322 msg, 2323 (unsigned long long)cur->pkts, 2324 (unsigned long long)cur->bytes, 2325 (unsigned long long)cur->events, size, delta); 2326 if (delta == 0) 2327 delta = 1e-6; 2328 if (size < 60) /* correct for min packet size */ 2329 size = 60; 2330 pps = cur->pkts / delta; 2331 bw = (8.0 * cur->bytes) / delta; 2332 raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta; 2333 abs = cur->pkts / (double)(cur->events); 2334 2335 printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n", 2336 norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs); 2337 } 2338 2339 static void 2340 usage(int errcode) 2341 { 2342 /* This usage is generated from the pkt-gen man page: 2343 * $ man pkt-gen > x 2344 * and pasted here adding the string terminators and endlines with simple 2345 * regular expressions. */ 2346 const char *cmd = "pkt-gen"; 2347 fprintf(stderr, 2348 "Usage:\n" 2349 "%s arguments\n" 2350 " -h Show program usage and exit.\n" 2351 "\n" 2352 " -i interface\n" 2353 " Name of the network interface that pkt-gen operates on. It can be a system network interface\n" 2354 " (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n" 2355 " monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n" 2356 " mented in netmap(4) (NIOCREGIF section).\n" 2357 "\n" 2358 " -f function\n" 2359 " The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n" 2360 " for client-side ping-pong operation, and pong for server-side ping-pong operation.\n" 2361 "\n" 2362 " -n count\n" 2363 " Number of iterations of the pkt-gen function, with 0 meaning infinite). In case of tx or rx,\n" 2364 " count is the number of packets to receive or transmit. In case of ping or pong, count is the\n" 2365 " number of ping-pong transactions.\n" 2366 "\n" 2367 " -l pkt_size\n" 2368 " Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n" 2369 " equal than the second one and lower than the first one.\n" 2370 "\n" 2371 " -b burst_size\n" 2372 " Transmit or receive up to burst_size packets at a time.\n" 2373 "\n" 2374 " -4 Use IPv4 addresses.\n" 2375 "\n" 2376 " -6 Use IPv6 addresses.\n" 2377 "\n" 2378 " -d dst_ip[:port[-dst_ip:port]]\n" 2379 " Destination IPv4/IPv6 address and port, single or range.\n" 2380 "\n" 2381 " -s src_ip[:port[-src_ip:port]]\n" 2382 " Source IPv4/IPv6 address and port, single or range.\n" 2383 "\n" 2384 " -D dst_mac\n" 2385 " Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n" 2386 "\n" 2387 " -S src_mac\n" 2388 " Source MAC address in colon notation.\n" 2389 "\n" 2390 " -a cpu_id\n" 2391 " Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n" 2392 " threads are used, they are pinned to the subsequent CPUs, one per thread.\n" 2393 "\n" 2394 " -c cpus\n" 2395 " Maximum number of CPUs to use (0 means to use all the available ones).\n" 2396 "\n" 2397 " -p threads\n" 2398 " Number of threads to use. By default, only a single thread is used to handle all the netmap\n" 2399 " rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n" 2400 " single RX ring (in rx mode), or a TX/RX ring couple. The number of threads must be less or\n" 2401 " equal than the number of TX (or RX) ring available in the device specified by interface.\n" 2402 "\n" 2403 " -T report_ms\n" 2404 " Number of milliseconds between reports.\n" 2405 "\n" 2406 " -w wait_for_link_time\n" 2407 " Number of seconds to wait before starting the pkt-gen function, useuful to make sure that the\n" 2408 " network link is up. A network device driver may take some time to enter netmap mode, or to\n" 2409 " create a new transmit/receive ring pair when netmap(4) requests one.\n" 2410 "\n" 2411 " -R rate\n" 2412 " Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n" 2413 " packets as quickly as possible. On servers from 2010 on-wards netmap(4) is able to com-\n" 2414 " pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n" 2415 " your intention is to saturate the link.\n" 2416 "\n" 2417 " -X Dump payload of each packet transmitted or received.\n" 2418 "\n" 2419 " -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n" 2420 " only used with Virtual Machine technologies that use virtio as a network interface.\n" 2421 "\n" 2422 " -P file\n" 2423 " Load the packet to be transmitted from a pcap file rather than constructing it within\n" 2424 " pkt-gen.\n" 2425 "\n" 2426 " -z Use random IPv4/IPv6 src address/port.\n" 2427 "\n" 2428 " -Z Use random IPv4/IPv6 dst address/port.\n" 2429 "\n" 2430 " -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n" 2431 "\n" 2432 " -F num_frags\n" 2433 " Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n" 2434 " sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n" 2435 " last slot). This is useful to transmit or receive packets larger than the netmap buffer\n" 2436 " size.\n" 2437 "\n" 2438 " -M frag_size\n" 2439 " In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n" 2440 " length divided by num_frags.\n" 2441 "\n" 2442 " -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n" 2443 " by setting the NS_INDIRECT flag in the netmap slots.\n" 2444 "\n" 2445 " -W Exit immediately if all the RX rings are empty the first time they are examined.\n" 2446 "\n" 2447 " -v Increase the verbosity level.\n" 2448 "\n" 2449 " -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n" 2450 " netmap buffers is (rubbish mode).\n" 2451 "\n" 2452 " -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n" 2453 "\n" 2454 " -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n" 2455 " of CRC and 20 bytes of framing to each packet.\n" 2456 "\n" 2457 " -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n" 2458 " Configuration in terms of number of rings and slots to be used when opening the netmap port.\n" 2459 " Such configuration has effect on software ports created on the fly, such as VALE ports and\n" 2460 " netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n" 2461 " rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n" 2462 " additional convenience, if exactly one number is specified, then this is assigned to both\n" 2463 " tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n" 2464 " tx_rings and rx_rings.\n" 2465 "\n" 2466 " -o options data generation options (parsed using atoi)\n" 2467 " OPT_PREFETCH 1\n" 2468 " OPT_ACCESS 2\n" 2469 " OPT_COPY 4\n" 2470 " OPT_MEMCPY 8\n" 2471 " OPT_TS 16 (add a timestamp)\n" 2472 " OPT_INDIRECT 32 (use indirect buffers)\n" 2473 " OPT_DUMP 64 (dump rx/tx traffic)\n" 2474 " OPT_RUBBISH 256\n" 2475 " (send wathever the buffers contain)\n" 2476 " OPT_RANDOM_SRC 512\n" 2477 " OPT_RANDOM_DST 1024\n" 2478 " OPT_PPS_STATS 2048\n" 2479 "", 2480 cmd); 2481 exit(errcode); 2482 } 2483 2484 static void 2485 start_threads(struct glob_arg *g) { 2486 int i; 2487 2488 targs = calloc(g->nthreads, sizeof(*targs)); 2489 struct targ *t; 2490 /* 2491 * Now create the desired number of threads, each one 2492 * using a single descriptor. 2493 */ 2494 for (i = 0; i < g->nthreads; i++) { 2495 uint64_t seed = time(0) | (time(0) << 32); 2496 t = &targs[i]; 2497 2498 bzero(t, sizeof(*t)); 2499 t->fd = -1; /* default, with pcap */ 2500 t->g = g; 2501 memcpy(t->seed, &seed, sizeof(t->seed)); 2502 2503 if (g->dev_type == DEV_NETMAP) { 2504 struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ 2505 uint64_t nmd_flags = 0; 2506 nmd.self = &nmd; 2507 2508 if (i > 0) { 2509 /* the first thread uses the fd opened by the main 2510 * thread, the other threads re-open /dev/netmap 2511 */ 2512 if (g->nthreads > 1) { 2513 nmd.req.nr_flags = 2514 g->nmd->req.nr_flags & ~NR_REG_MASK; 2515 nmd.req.nr_flags |= NR_REG_ONE_NIC; 2516 nmd.req.nr_ringid = i; 2517 } 2518 /* Only touch one of the rings (rx is already ok) */ 2519 if (g->td_type == TD_TYPE_RECEIVER) 2520 nmd_flags |= NETMAP_NO_TX_POLL; 2521 2522 /* register interface. Override ifname and ringid etc. */ 2523 t->nmd = nm_open(t->g->ifname, NULL, nmd_flags | 2524 NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); 2525 if (t->nmd == NULL) { 2526 D("Unable to open %s: %s", 2527 t->g->ifname, strerror(errno)); 2528 continue; 2529 } 2530 } else { 2531 t->nmd = g->nmd; 2532 } 2533 t->fd = t->nmd->fd; 2534 t->frags = g->frags; 2535 } else { 2536 targs[i].fd = g->main_fd; 2537 } 2538 t->used = 1; 2539 t->me = i; 2540 if (g->affinity >= 0) { 2541 t->affinity = (g->affinity + i) % g->cpus; 2542 } else { 2543 t->affinity = -1; 2544 } 2545 /* default, init packets */ 2546 initialize_packet(t); 2547 } 2548 /* Wait for PHY reset. */ 2549 D("Wait %d secs for phy reset", g->wait_link); 2550 sleep(g->wait_link); 2551 D("Ready..."); 2552 2553 for (i = 0; i < g->nthreads; i++) { 2554 t = &targs[i]; 2555 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 2556 D("Unable to create thread %d: %s", i, strerror(errno)); 2557 t->used = 0; 2558 } 2559 } 2560 } 2561 2562 static void 2563 main_thread(struct glob_arg *g) 2564 { 2565 int i; 2566 2567 struct my_ctrs prev, cur; 2568 double delta_t; 2569 struct timeval tic, toc; 2570 2571 prev.pkts = prev.bytes = prev.events = 0; 2572 gettimeofday(&prev.t, NULL); 2573 for (;;) { 2574 char b1[40], b2[40], b3[40], b4[100]; 2575 uint64_t pps, usec; 2576 struct my_ctrs x; 2577 double abs; 2578 int done = 0; 2579 2580 usec = wait_for_next_report(&prev.t, &cur.t, 2581 g->report_interval); 2582 2583 cur.pkts = cur.bytes = cur.events = 0; 2584 cur.min_space = 0; 2585 if (usec < 10000) /* too short to be meaningful */ 2586 continue; 2587 /* accumulate counts for all threads */ 2588 for (i = 0; i < g->nthreads; i++) { 2589 cur.pkts += targs[i].ctr.pkts; 2590 cur.bytes += targs[i].ctr.bytes; 2591 cur.events += targs[i].ctr.events; 2592 cur.min_space += targs[i].ctr.min_space; 2593 targs[i].ctr.min_space = 99999; 2594 if (targs[i].used == 0) 2595 done++; 2596 } 2597 x.pkts = cur.pkts - prev.pkts; 2598 x.bytes = cur.bytes - prev.bytes; 2599 x.events = cur.events - prev.events; 2600 pps = (x.pkts*1000000 + usec/2) / usec; 2601 abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0; 2602 2603 if (!(g->options & OPT_PPS_STATS)) { 2604 strcpy(b4, ""); 2605 } else { 2606 /* Compute some pps stats using a sliding window. */ 2607 double ppsavg = 0.0, ppsdev = 0.0; 2608 int nsamples = 0; 2609 2610 g->win[g->win_idx] = pps; 2611 g->win_idx = (g->win_idx + 1) % STATS_WIN; 2612 2613 for (i = 0; i < STATS_WIN; i++) { 2614 ppsavg += g->win[i]; 2615 if (g->win[i]) { 2616 nsamples ++; 2617 } 2618 } 2619 ppsavg /= nsamples; 2620 2621 for (i = 0; i < STATS_WIN; i++) { 2622 if (g->win[i] == 0) { 2623 continue; 2624 } 2625 ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg); 2626 } 2627 ppsdev /= nsamples; 2628 ppsdev = sqrt(ppsdev); 2629 2630 snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]", 2631 norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize)); 2632 } 2633 2634 D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space", 2635 norm(b1, pps, normalize), b4, 2636 norm(b2, (double)x.pkts, normalize), 2637 norm(b3, (double)x.bytes*8+(double)x.pkts*g->framing, normalize), 2638 (unsigned long long)usec, 2639 abs, (int)cur.min_space); 2640 prev = cur; 2641 2642 if (done == g->nthreads) 2643 break; 2644 } 2645 2646 timerclear(&tic); 2647 timerclear(&toc); 2648 cur.pkts = cur.bytes = cur.events = 0; 2649 /* final round */ 2650 for (i = 0; i < g->nthreads; i++) { 2651 struct timespec t_tic, t_toc; 2652 /* 2653 * Join active threads, unregister interfaces and close 2654 * file descriptors. 2655 */ 2656 if (targs[i].used) 2657 pthread_join(targs[i].thread, NULL); /* blocking */ 2658 if (g->dev_type == DEV_NETMAP) { 2659 nm_close(targs[i].nmd); 2660 targs[i].nmd = NULL; 2661 } else { 2662 close(targs[i].fd); 2663 } 2664 2665 if (targs[i].completed == 0) 2666 D("ouch, thread %d exited with error", i); 2667 2668 /* 2669 * Collect threads output and extract information about 2670 * how long it took to send all the packets. 2671 */ 2672 cur.pkts += targs[i].ctr.pkts; 2673 cur.bytes += targs[i].ctr.bytes; 2674 cur.events += targs[i].ctr.events; 2675 /* collect the largest start (tic) and end (toc) times, 2676 * XXX maybe we should do the earliest tic, or do a weighted 2677 * average ? 2678 */ 2679 t_tic = timeval2spec(&tic); 2680 t_toc = timeval2spec(&toc); 2681 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 2682 tic = timespec2val(&targs[i].tic); 2683 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 2684 toc = timespec2val(&targs[i].toc); 2685 } 2686 2687 /* print output. */ 2688 timersub(&toc, &tic, &toc); 2689 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 2690 if (g->td_type == TD_TYPE_SENDER) 2691 tx_output(g, &cur, delta_t, "Sent"); 2692 else if (g->td_type == TD_TYPE_RECEIVER) 2693 tx_output(g, &cur, delta_t, "Received"); 2694 } 2695 2696 struct td_desc { 2697 int ty; 2698 char *key; 2699 void *f; 2700 int default_burst; 2701 }; 2702 2703 static struct td_desc func[] = { 2704 { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */ 2705 { TD_TYPE_SENDER, "tx", sender_body, 512 }, 2706 { TD_TYPE_OTHER, "ping", ping_body, 1 }, 2707 { TD_TYPE_OTHER, "pong", pong_body, 1 }, 2708 { TD_TYPE_SENDER, "txseq", txseq_body, 512 }, 2709 { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 }, 2710 { 0, NULL, NULL, 0 } 2711 }; 2712 2713 static int 2714 tap_alloc(char *dev) 2715 { 2716 struct ifreq ifr; 2717 int fd, err; 2718 char *clonedev = TAP_CLONEDEV; 2719 2720 (void)err; 2721 (void)dev; 2722 /* Arguments taken by the function: 2723 * 2724 * char *dev: the name of an interface (or '\0'). MUST have enough 2725 * space to hold the interface name if '\0' is passed 2726 * int flags: interface flags (eg, IFF_TUN etc.) 2727 */ 2728 2729 #ifdef __FreeBSD__ 2730 if (dev[3]) { /* tapSomething */ 2731 static char buf[128]; 2732 snprintf(buf, sizeof(buf), "/dev/%s", dev); 2733 clonedev = buf; 2734 } 2735 #endif 2736 /* open the device */ 2737 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 2738 return fd; 2739 } 2740 D("%s open successful", clonedev); 2741 2742 /* preparation of the struct ifr, of type "struct ifreq" */ 2743 memset(&ifr, 0, sizeof(ifr)); 2744 2745 #ifdef linux 2746 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 2747 2748 if (*dev) { 2749 /* if a device name was specified, put it in the structure; otherwise, 2750 * the kernel will try to allocate the "next" device of the 2751 * specified type */ 2752 size_t len = strlen(dev); 2753 if (len > IFNAMSIZ) { 2754 D("%s too long", dev); 2755 return -1; 2756 } 2757 memcpy(ifr.ifr_name, dev, len); 2758 } 2759 2760 /* try to create the device */ 2761 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 2762 D("failed to to a TUNSETIFF: %s", strerror(errno)); 2763 close(fd); 2764 return err; 2765 } 2766 2767 /* if the operation was successful, write back the name of the 2768 * interface to the variable "dev", so the caller can know 2769 * it. Note that the caller MUST reserve space in *dev (see calling 2770 * code below) */ 2771 strcpy(dev, ifr.ifr_name); 2772 D("new name is %s", dev); 2773 #endif /* linux */ 2774 2775 /* this is the special file descriptor that the caller will use to talk 2776 * with the virtual interface */ 2777 return fd; 2778 } 2779 2780 int 2781 main(int arc, char **argv) 2782 { 2783 int i; 2784 struct sigaction sa; 2785 sigset_t ss; 2786 2787 struct glob_arg g; 2788 2789 int ch; 2790 int devqueues = 1; /* how many device queues */ 2791 int wait_link_arg = 0; 2792 2793 int pkt_size_done = 0; 2794 2795 struct td_desc *fn = func; 2796 2797 bzero(&g, sizeof(g)); 2798 2799 g.main_fd = -1; 2800 g.td_body = fn->f; 2801 g.td_type = fn->ty; 2802 g.report_interval = 1000; /* report interval */ 2803 g.affinity = -1; 2804 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 2805 g.af = AF_INET; /* default */ 2806 g.src_ip.name = "10.0.0.1"; 2807 g.dst_ip.name = "10.1.0.1"; 2808 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 2809 g.src_mac.name = NULL; 2810 g.pkt_size = 60; 2811 g.pkt_min_size = 0; 2812 g.nthreads = 1; 2813 g.cpus = 1; /* default */ 2814 g.forever = 1; 2815 g.tx_rate = 0; 2816 g.frags = 1; 2817 g.frag_size = (u_int)-1; /* use the netmap buffer size by default */ 2818 g.nmr_config = ""; 2819 g.virt_header = 0; 2820 g.wait_link = 2; /* wait 2 seconds for physical ports */ 2821 2822 while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:" 2823 "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) { 2824 2825 switch(ch) { 2826 default: 2827 D("bad option %c %s", ch, optarg); 2828 usage(-1); 2829 break; 2830 2831 case 'h': 2832 usage(0); 2833 break; 2834 2835 case '4': 2836 g.af = AF_INET; 2837 break; 2838 2839 case '6': 2840 g.af = AF_INET6; 2841 break; 2842 2843 case 'N': 2844 normalize = 0; 2845 break; 2846 2847 case 'n': 2848 g.npackets = strtoull(optarg, NULL, 10); 2849 break; 2850 2851 case 'F': 2852 i = atoi(optarg); 2853 if (i < 1 || i > 63) { 2854 D("invalid frags %d [1..63], ignore", i); 2855 break; 2856 } 2857 g.frags = i; 2858 break; 2859 2860 case 'M': 2861 g.frag_size = atoi(optarg); 2862 break; 2863 2864 case 'f': 2865 for (fn = func; fn->key; fn++) { 2866 if (!strcmp(fn->key, optarg)) 2867 break; 2868 } 2869 if (fn->key) { 2870 g.td_body = fn->f; 2871 g.td_type = fn->ty; 2872 } else { 2873 D("unrecognised function %s", optarg); 2874 } 2875 break; 2876 2877 case 'o': /* data generation options */ 2878 g.options |= atoi(optarg); 2879 break; 2880 2881 case 'a': /* force affinity */ 2882 g.affinity = atoi(optarg); 2883 break; 2884 2885 case 'i': /* interface */ 2886 /* a prefix of tap: netmap: or pcap: forces the mode. 2887 * otherwise we guess 2888 */ 2889 D("interface is %s", optarg); 2890 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 2891 D("ifname too long %s", optarg); 2892 break; 2893 } 2894 strcpy(g.ifname, optarg); 2895 if (!strcmp(optarg, "null")) { 2896 g.dev_type = DEV_NETMAP; 2897 g.dummy_send = 1; 2898 } else if (!strncmp(optarg, "tap:", 4)) { 2899 g.dev_type = DEV_TAP; 2900 strcpy(g.ifname, optarg + 4); 2901 } else if (!strncmp(optarg, "pcap:", 5)) { 2902 g.dev_type = DEV_PCAP; 2903 strcpy(g.ifname, optarg + 5); 2904 } else if (!strncmp(optarg, "netmap:", 7) || 2905 !strncmp(optarg, "vale", 4)) { 2906 g.dev_type = DEV_NETMAP; 2907 } else if (!strncmp(optarg, "tap", 3)) { 2908 g.dev_type = DEV_TAP; 2909 } else { /* prepend netmap: */ 2910 g.dev_type = DEV_NETMAP; 2911 sprintf(g.ifname, "netmap:%s", optarg); 2912 } 2913 break; 2914 2915 case 'I': 2916 g.options |= OPT_INDIRECT; /* use indirect buffers */ 2917 break; 2918 2919 case 'l': /* pkt_size */ 2920 if (pkt_size_done) { 2921 g.pkt_min_size = atoi(optarg); 2922 } else { 2923 g.pkt_size = atoi(optarg); 2924 pkt_size_done = 1; 2925 } 2926 break; 2927 2928 case 'd': 2929 g.dst_ip.name = optarg; 2930 break; 2931 2932 case 's': 2933 g.src_ip.name = optarg; 2934 break; 2935 2936 case 'T': /* report interval */ 2937 g.report_interval = atoi(optarg); 2938 break; 2939 2940 case 'w': 2941 g.wait_link = atoi(optarg); 2942 wait_link_arg = 1; 2943 break; 2944 2945 case 'W': 2946 g.forever = 0; /* exit RX with no traffic */ 2947 break; 2948 2949 case 'b': /* burst */ 2950 g.burst = atoi(optarg); 2951 break; 2952 case 'c': 2953 g.cpus = atoi(optarg); 2954 break; 2955 case 'p': 2956 g.nthreads = atoi(optarg); 2957 break; 2958 2959 case 'D': /* destination mac */ 2960 g.dst_mac.name = optarg; 2961 break; 2962 2963 case 'S': /* source mac */ 2964 g.src_mac.name = optarg; 2965 break; 2966 case 'v': 2967 verbose++; 2968 break; 2969 case 'R': 2970 g.tx_rate = atoi(optarg); 2971 break; 2972 case 'X': 2973 g.options |= OPT_DUMP; 2974 break; 2975 case 'C': 2976 g.nmr_config = strdup(optarg); 2977 break; 2978 case 'H': 2979 g.virt_header = atoi(optarg); 2980 break; 2981 case 'P': 2982 g.packet_file = strdup(optarg); 2983 break; 2984 case 'r': 2985 g.options |= OPT_RUBBISH; 2986 break; 2987 case 'z': 2988 g.options |= OPT_RANDOM_SRC; 2989 break; 2990 case 'Z': 2991 g.options |= OPT_RANDOM_DST; 2992 break; 2993 case 'A': 2994 g.options |= OPT_PPS_STATS; 2995 break; 2996 case 'B': 2997 /* raw packets have4 bytes crc + 20 bytes framing */ 2998 // XXX maybe add an option to pass the IFG 2999 g.framing = 24 * 8; 3000 break; 3001 } 3002 } 3003 3004 if (strlen(g.ifname) <=0 ) { 3005 D("missing ifname"); 3006 usage(-1); 3007 } 3008 3009 if (g.burst == 0) { 3010 g.burst = fn->default_burst; 3011 D("using default burst size: %d", g.burst); 3012 } 3013 3014 g.system_cpus = i = system_ncpus(); 3015 if (g.cpus < 0 || g.cpus > i) { 3016 D("%d cpus is too high, have only %d cpus", g.cpus, i); 3017 usage(-1); 3018 } 3019 D("running on %d cpus (have %d)", g.cpus, i); 3020 if (g.cpus == 0) 3021 g.cpus = i; 3022 3023 if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) { 3024 g.wait_link = 0; 3025 } 3026 3027 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 3028 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 3029 usage(-1); 3030 } 3031 3032 if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) { 3033 D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size); 3034 usage(-1); 3035 } 3036 3037 if (g.src_mac.name == NULL) { 3038 static char mybuf[20] = "00:00:00:00:00:00"; 3039 /* retrieve source mac address. */ 3040 if (source_hwaddr(g.ifname, mybuf) == -1) { 3041 D("Unable to retrieve source mac"); 3042 // continue, fail later 3043 } 3044 g.src_mac.name = mybuf; 3045 } 3046 /* extract address ranges */ 3047 if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac)) 3048 usage(-1); 3049 g.options |= extract_ip_range(&g.src_ip, g.af); 3050 g.options |= extract_ip_range(&g.dst_ip, g.af); 3051 3052 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 3053 && g.virt_header != VIRT_HDR_2) { 3054 D("bad virtio-net-header length"); 3055 usage(-1); 3056 } 3057 3058 if (g.dev_type == DEV_TAP) { 3059 D("want to use tap %s", g.ifname); 3060 g.main_fd = tap_alloc(g.ifname); 3061 if (g.main_fd < 0) { 3062 D("cannot open tap %s", g.ifname); 3063 usage(-1); 3064 } 3065 #ifndef NO_PCAP 3066 } else if (g.dev_type == DEV_PCAP) { 3067 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 3068 3069 pcap_errbuf[0] = '\0'; // init the buffer 3070 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 3071 if (g.p == NULL) { 3072 D("cannot open pcap on %s", g.ifname); 3073 usage(-1); 3074 } 3075 g.main_fd = pcap_fileno(g.p); 3076 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 3077 #endif /* !NO_PCAP */ 3078 } else if (g.dummy_send) { /* but DEV_NETMAP */ 3079 D("using a dummy send routine"); 3080 } else { 3081 struct nm_desc base_nmd; 3082 char errmsg[MAXERRMSG]; 3083 u_int flags; 3084 3085 bzero(&base_nmd, sizeof(base_nmd)); 3086 3087 parse_nmr_config(g.nmr_config, &base_nmd.req); 3088 3089 base_nmd.req.nr_flags |= NR_ACCEPT_VNET_HDR; 3090 3091 if (nm_parse(g.ifname, &base_nmd, errmsg) < 0) { 3092 D("Invalid name '%s': %s", g.ifname, errmsg); 3093 goto out; 3094 } 3095 3096 /* 3097 * Open the netmap device using nm_open(). 3098 * 3099 * protocol stack and may cause a reset of the card, 3100 * which in turn may take some time for the PHY to 3101 * reconfigure. We do the open here to have time to reset. 3102 */ 3103 flags = NM_OPEN_IFNAME | NM_OPEN_ARG1 | NM_OPEN_ARG2 | 3104 NM_OPEN_ARG3 | NM_OPEN_RING_CFG; 3105 if (g.nthreads > 1) { 3106 base_nmd.req.nr_flags &= ~NR_REG_MASK; 3107 base_nmd.req.nr_flags |= NR_REG_ONE_NIC; 3108 base_nmd.req.nr_ringid = 0; 3109 } 3110 g.nmd = nm_open(g.ifname, NULL, flags, &base_nmd); 3111 if (g.nmd == NULL) { 3112 D("Unable to open %s: %s", g.ifname, strerror(errno)); 3113 goto out; 3114 } 3115 g.main_fd = g.nmd->fd; 3116 D("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10), 3117 g.nmd->mem); 3118 3119 if (g.virt_header) { 3120 /* Set the virtio-net header length, since the user asked 3121 * for it explicitely. */ 3122 set_vnet_hdr_len(&g); 3123 } else { 3124 /* Check whether the netmap port we opened requires us to send 3125 * and receive frames with virtio-net header. */ 3126 get_vnet_hdr_len(&g); 3127 } 3128 3129 /* get num of queues in tx or rx */ 3130 if (g.td_type == TD_TYPE_SENDER) 3131 devqueues = g.nmd->req.nr_tx_rings; 3132 else 3133 devqueues = g.nmd->req.nr_rx_rings; 3134 3135 /* validate provided nthreads. */ 3136 if (g.nthreads < 1 || g.nthreads > devqueues) { 3137 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 3138 // continue, fail later 3139 } 3140 3141 if (g.td_type == TD_TYPE_SENDER) { 3142 int mtu = get_if_mtu(&g); 3143 3144 if (mtu > 0 && g.pkt_size > mtu) { 3145 D("pkt_size (%d) must be <= mtu (%d)", 3146 g.pkt_size, mtu); 3147 return -1; 3148 } 3149 } 3150 3151 if (verbose) { 3152 struct netmap_if *nifp = g.nmd->nifp; 3153 struct nmreq *req = &g.nmd->req; 3154 3155 D("nifp at offset %d, %d tx %d rx region %d", 3156 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 3157 req->nr_arg2); 3158 for (i = 0; i <= req->nr_tx_rings; i++) { 3159 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 3160 D(" TX%d at 0x%p slots %d", i, 3161 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3162 } 3163 for (i = 0; i <= req->nr_rx_rings; i++) { 3164 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 3165 D(" RX%d at 0x%p slots %d", i, 3166 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3167 } 3168 } 3169 3170 /* Print some debug information. */ 3171 fprintf(stdout, 3172 "%s %s: %d queues, %d threads and %d cpus.\n", 3173 (g.td_type == TD_TYPE_SENDER) ? "Sending on" : 3174 ((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" : 3175 "Working on"), 3176 g.ifname, 3177 devqueues, 3178 g.nthreads, 3179 g.cpus); 3180 if (g.td_type == TD_TYPE_SENDER) { 3181 fprintf(stdout, "%s -> %s (%s -> %s)\n", 3182 g.src_ip.name, g.dst_ip.name, 3183 g.src_mac.name, g.dst_mac.name); 3184 } 3185 3186 out: 3187 /* Exit if something went wrong. */ 3188 if (g.main_fd < 0) { 3189 D("aborting"); 3190 usage(-1); 3191 } 3192 } 3193 3194 3195 if (g.options) { 3196 D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n", 3197 g.options & OPT_PREFETCH ? " prefetch" : "", 3198 g.options & OPT_ACCESS ? " access" : "", 3199 g.options & OPT_MEMCPY ? " memcpy" : "", 3200 g.options & OPT_INDIRECT ? " indirect" : "", 3201 g.options & OPT_COPY ? " copy" : "", 3202 g.options & OPT_RUBBISH ? " rubbish " : ""); 3203 } 3204 3205 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 3206 if (g.tx_rate > 0) { 3207 /* try to have at least something every second, 3208 * reducing the burst size to some 0.01s worth of data 3209 * (but no less than one full set of fragments) 3210 */ 3211 uint64_t x; 3212 int lim = (g.tx_rate)/300; 3213 if (g.burst > lim) 3214 g.burst = lim; 3215 if (g.burst == 0) 3216 g.burst = 1; 3217 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 3218 g.tx_period.tv_nsec = x; 3219 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 3220 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 3221 } 3222 if (g.td_type == TD_TYPE_SENDER) 3223 D("Sending %d packets every %ld.%09ld s", 3224 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 3225 /* Install ^C handler. */ 3226 global_nthreads = g.nthreads; 3227 sigemptyset(&ss); 3228 sigaddset(&ss, SIGINT); 3229 /* block SIGINT now, so that all created threads will inherit the mask */ 3230 if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) { 3231 D("failed to block SIGINT: %s", strerror(errno)); 3232 } 3233 start_threads(&g); 3234 /* Install the handler and re-enable SIGINT for the main thread */ 3235 memset(&sa, 0, sizeof(sa)); 3236 sa.sa_handler = sigint_h; 3237 if (sigaction(SIGINT, &sa, NULL) < 0) { 3238 D("failed to install ^C handler: %s", strerror(errno)); 3239 } 3240 3241 if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) { 3242 D("failed to re-enable SIGINT: %s", strerror(errno)); 3243 } 3244 main_thread(&g); 3245 free(targs); 3246 return 0; 3247 } 3248 3249 /* end of file */ 3250