1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 29 * 30 * Example program to show how to build a multithreaded packet 31 * source/sink using the netmap device. 32 * 33 * In this example we create a programmable number of threads 34 * to take care of all the queues of the interface used to 35 * send or receive traffic. 36 * 37 */ 38 39 #define _GNU_SOURCE /* for CPU_SET() */ 40 #include <arpa/inet.h> /* ntohs */ 41 #include <assert.h> 42 #include <ctype.h> // isprint() 43 #include <errno.h> 44 #include <fcntl.h> 45 #include <ifaddrs.h> /* getifaddrs */ 46 #include <libnetmap.h> 47 #include <math.h> 48 #include <net/ethernet.h> 49 #include <netinet/in.h> 50 #include <netinet/ip.h> 51 #include <netinet/ip6.h> 52 #include <netinet/udp.h> 53 #ifndef NO_PCAP 54 #include <pcap/pcap.h> 55 #endif 56 #include <pthread.h> 57 #include <signal.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <sys/ioctl.h> 62 #include <sys/poll.h> 63 #include <sys/stat.h> 64 #if !defined(_WIN32) && !defined(linux) 65 #include <sys/sysctl.h> /* sysctl */ 66 #endif 67 #include <sys/types.h> 68 #include <unistd.h> // sysconf() 69 #ifdef linux 70 #define IPV6_VERSION 0x60 71 #define IPV6_DEFHLIM 64 72 #endif 73 74 #include "ctrs.h" 75 76 static void usage(int); 77 78 #ifdef _WIN32 79 #define cpuset_t DWORD_PTR //uint64_t 80 static inline void CPU_ZERO(cpuset_t *p) 81 { 82 *p = 0; 83 } 84 85 static inline void CPU_SET(uint32_t i, cpuset_t *p) 86 { 87 *p |= 1<< (i & 0x3f); 88 } 89 90 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0) 91 #define TAP_CLONEDEV "/dev/tap" 92 #define AF_LINK 18 //defined in winsocks.h 93 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 94 #include <net/if_dl.h> 95 96 /* 97 * Convert an ASCII representation of an ethernet address to 98 * binary form. 99 */ 100 struct ether_addr * 101 ether_aton(const char *a) 102 { 103 int i; 104 static struct ether_addr o; 105 unsigned int o0, o1, o2, o3, o4, o5; 106 107 i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); 108 109 if (i != 6) 110 return (NULL); 111 112 o.octet[0]=o0; 113 o.octet[1]=o1; 114 o.octet[2]=o2; 115 o.octet[3]=o3; 116 o.octet[4]=o4; 117 o.octet[5]=o5; 118 119 return ((struct ether_addr *)&o); 120 } 121 122 /* 123 * Convert a binary representation of an ethernet address to 124 * an ASCII string. 125 */ 126 char * 127 ether_ntoa(const struct ether_addr *n) 128 { 129 int i; 130 static char a[18]; 131 132 i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x", 133 n->octet[0], n->octet[1], n->octet[2], 134 n->octet[3], n->octet[4], n->octet[5]); 135 return (i < 17 ? NULL : (char *)&a); 136 } 137 #endif /* _WIN32 */ 138 139 #ifdef linux 140 141 #define cpuset_t cpu_set_t 142 143 #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 144 #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 145 #include <linux/ethtool.h> 146 #include <linux/sockios.h> 147 148 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 149 #include <netinet/ether.h> /* ether_aton */ 150 #include <linux/if_packet.h> /* sockaddr_ll */ 151 #endif /* linux */ 152 153 #ifdef __FreeBSD__ 154 #include <sys/endian.h> /* le64toh */ 155 #include <machine/param.h> 156 157 #include <pthread_np.h> /* pthread w/ affinity */ 158 #include <sys/cpuset.h> /* cpu_set */ 159 #include <net/if_dl.h> /* LLADDR */ 160 #endif /* __FreeBSD__ */ 161 162 #ifdef __APPLE__ 163 164 #define cpuset_t uint64_t // XXX 165 static inline void CPU_ZERO(cpuset_t *p) 166 { 167 *p = 0; 168 } 169 170 static inline void CPU_SET(uint32_t i, cpuset_t *p) 171 { 172 *p |= 1<< (i & 0x3f); 173 } 174 175 #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 176 177 #define ifr_flagshigh ifr_flags // XXX 178 #define IFF_PPROMISC IFF_PROMISC 179 #include <net/if_dl.h> /* LLADDR */ 180 #define clock_gettime(a,b) \ 181 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 182 #endif /* __APPLE__ */ 183 184 static const char *default_payload = "netmap pkt-gen DIRECT payload\n" 185 "http://info.iet.unipi.it/~luigi/netmap/ "; 186 187 static const char *indirect_payload = "netmap pkt-gen indirect payload\n" 188 "http://info.iet.unipi.it/~luigi/netmap/ "; 189 190 static int verbose = 0; 191 static int normalize = 1; 192 193 #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 194 #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 195 #define VIRT_HDR_MAX VIRT_HDR_2 196 struct virt_header { 197 uint8_t fields[VIRT_HDR_MAX]; 198 }; 199 200 #define MAX_BODYSIZE 65536 201 202 struct pkt { 203 struct virt_header vh; 204 struct ether_header eh; 205 union { 206 struct { 207 struct ip ip; 208 struct udphdr udp; 209 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 210 } ipv4; 211 struct { 212 struct ip6_hdr ip; 213 struct udphdr udp; 214 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 215 } ipv6; 216 }; 217 } __attribute__((__packed__)); 218 219 #define PKT(p, f, af) \ 220 ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f) 221 222 struct ip_range { 223 const char *name; 224 union { 225 struct { 226 uint32_t start, end; /* same as struct in_addr */ 227 } ipv4; 228 struct { 229 struct in6_addr start, end; 230 uint8_t sgroup, egroup; 231 } ipv6; 232 }; 233 uint16_t port0, port1; 234 }; 235 236 struct mac_range { 237 const char *name; 238 struct ether_addr start, end; 239 }; 240 241 /* ifname can be netmap:foo-xxxx */ 242 #define MAX_IFNAMELEN 512 /* our buffer for ifname */ 243 //#define MAX_PKTSIZE 1536 244 #define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 245 246 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 247 struct tstamp { 248 uint32_t sec; 249 uint32_t nsec; 250 }; 251 252 /* 253 * global arguments for all threads 254 */ 255 256 struct glob_arg { 257 int af; /* address family AF_INET/AF_INET6 */ 258 struct ip_range src_ip; 259 struct ip_range dst_ip; 260 struct mac_range dst_mac; 261 struct mac_range src_mac; 262 int pkt_size; 263 int pkt_min_size; 264 int burst; 265 int forever; 266 uint64_t npackets; /* total packets to send */ 267 int frags; /* fragments per packet */ 268 u_int frag_size; /* size of each fragment */ 269 int nthreads; 270 int cpus; /* cpus used for running */ 271 int system_cpus; /* cpus on the system */ 272 273 int options; /* testing */ 274 #define OPT_PREFETCH 1 275 #define OPT_ACCESS 2 276 #define OPT_COPY 4 277 #define OPT_MEMCPY 8 278 #define OPT_TS 16 /* add a timestamp */ 279 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 280 #define OPT_DUMP 64 /* dump rx/tx traffic */ 281 #define OPT_RUBBISH 256 /* send whatever the buffers contain */ 282 #define OPT_RANDOM_SRC 512 283 #define OPT_RANDOM_DST 1024 284 #define OPT_PPS_STATS 2048 285 #define OPT_UPDATE_CSUM 4096 286 int dev_type; 287 #ifndef NO_PCAP 288 pcap_t *p; 289 #endif 290 291 int tx_rate; 292 struct timespec tx_period; 293 294 int affinity; 295 int main_fd; 296 struct nmport_d *nmd; 297 uint32_t orig_mode; 298 int report_interval; /* milliseconds between prints */ 299 void *(*td_body)(void *); 300 int td_type; 301 void *mmap_addr; 302 char ifname[MAX_IFNAMELEN]; 303 const char *nmr_config; 304 int dummy_send; 305 int virt_header; /* send also the virt_header */ 306 char *packet_file; /* -P option */ 307 #define STATS_WIN 15 308 int win_idx; 309 int64_t win[STATS_WIN]; 310 int wait_link; 311 int framing; /* #bits of framing (for bw output) */ 312 }; 313 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 314 315 enum { 316 TD_TYPE_SENDER = 1, 317 TD_TYPE_RECEIVER, 318 TD_TYPE_OTHER, 319 }; 320 321 /* 322 * Arguments for a new thread. The same structure is used by 323 * the source and the sink 324 */ 325 struct targ { 326 struct glob_arg *g; 327 int used; 328 int completed; 329 int cancel; 330 int fd; 331 struct nmport_d *nmd; 332 /* these ought to be volatile, but they are 333 * only sampled and errors should not accumulate 334 */ 335 struct my_ctrs ctr; 336 337 struct timespec tic, toc; 338 int me; 339 pthread_t thread; 340 int affinity; 341 342 struct pkt pkt; 343 void *frame; 344 uint16_t seed[3]; 345 u_int frags; 346 u_int frag_size; 347 }; 348 349 static __inline uint16_t 350 cksum_add(uint16_t sum, uint16_t a) 351 { 352 uint16_t res; 353 354 res = sum + a; 355 return (res + (res < a)); 356 } 357 358 static void 359 extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port) 360 { 361 struct in_addr a; 362 char *pp; 363 364 pp = strchr(name, ':'); 365 if (pp != NULL) { /* do we have ports ? */ 366 *pp++ = '\0'; 367 *port = (uint16_t)strtol(pp, NULL, 0); 368 } 369 370 inet_pton(AF_INET, name, &a); 371 *addr = ntohl(a.s_addr); 372 } 373 374 static void 375 extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port, 376 uint8_t *group) 377 { 378 char *pp; 379 380 /* 381 * We accept IPv6 address in the following form: 382 * group@[2001:DB8::1001]:port (w/ brackets and port) 383 * group@[2001:DB8::1] (w/ brackets and w/o port) 384 * group@2001:DB8::1234 (w/o brackets and w/o port) 385 */ 386 pp = strchr(name, '@'); 387 if (pp != NULL) { 388 *pp++ = '\0'; 389 *group = (uint8_t)strtol(name, NULL, 0); 390 if (*group > 7) 391 *group = 7; 392 name = pp; 393 } 394 if (name[0] == '[') 395 name++; 396 pp = strchr(name, ']'); 397 if (pp != NULL) 398 *pp++ = '\0'; 399 if (pp != NULL && *pp != ':') 400 pp = NULL; 401 if (pp != NULL) { /* do we have ports ? */ 402 *pp++ = '\0'; 403 *port = (uint16_t)strtol(pp, NULL, 0); 404 } 405 inet_pton(AF_INET6, name, addr); 406 } 407 /* 408 * extract the extremes from a range of ipv4 addresses. 409 * addr_lo[-addr_hi][:port_lo[-port_hi]] 410 */ 411 static int 412 extract_ip_range(struct ip_range *r, int af) 413 { 414 char *name, *ap, start[INET6_ADDRSTRLEN]; 415 char end[INET6_ADDRSTRLEN]; 416 struct in_addr a; 417 uint32_t tmp; 418 419 if (verbose) 420 D("extract IP range from %s", r->name); 421 422 name = strdup(r->name); 423 if (name == NULL) { 424 D("strdup failed"); 425 usage(-1); 426 } 427 /* the first - splits start/end of range */ 428 ap = strchr(name, '-'); 429 if (ap != NULL) 430 *ap++ = '\0'; 431 r->port0 = 1234; /* default port */ 432 if (af == AF_INET6) { 433 r->ipv6.sgroup = 7; /* default group */ 434 extract_ipv6_addr(name, &r->ipv6.start, &r->port0, 435 &r->ipv6.sgroup); 436 } else 437 extract_ipv4_addr(name, &r->ipv4.start, &r->port0); 438 439 r->port1 = r->port0; 440 if (af == AF_INET6) { 441 if (ap != NULL) { 442 r->ipv6.egroup = r->ipv6.sgroup; 443 extract_ipv6_addr(ap, &r->ipv6.end, &r->port1, 444 &r->ipv6.egroup); 445 } else { 446 r->ipv6.end = r->ipv6.start; 447 r->ipv6.egroup = r->ipv6.sgroup; 448 } 449 } else { 450 if (ap != NULL) { 451 extract_ipv4_addr(ap, &r->ipv4.end, &r->port1); 452 if (r->ipv4.start > r->ipv4.end) { 453 tmp = r->ipv4.end; 454 r->ipv4.end = r->ipv4.start; 455 r->ipv4.start = tmp; 456 } 457 } else 458 r->ipv4.end = r->ipv4.start; 459 } 460 461 if (r->port0 > r->port1) { 462 tmp = r->port0; 463 r->port0 = r->port1; 464 r->port1 = tmp; 465 } 466 if (af == AF_INET) { 467 a.s_addr = htonl(r->ipv4.start); 468 inet_ntop(af, &a, start, sizeof(start)); 469 a.s_addr = htonl(r->ipv4.end); 470 inet_ntop(af, &a, end, sizeof(end)); 471 } else { 472 inet_ntop(af, &r->ipv6.start, start, sizeof(start)); 473 inet_ntop(af, &r->ipv6.end, end, sizeof(end)); 474 } 475 if (af == AF_INET) 476 D("range is %s:%d to %s:%d", start, r->port0, end, r->port1); 477 else 478 D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup, 479 start, r->port0, r->ipv6.egroup, end, r->port1); 480 481 free(name); 482 if (r->port0 != r->port1 || 483 (af == AF_INET && r->ipv4.start != r->ipv4.end) || 484 (af == AF_INET6 && 485 !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end))) 486 return (OPT_COPY); 487 return (0); 488 } 489 490 static int 491 extract_mac_range(struct mac_range *r) 492 { 493 struct ether_addr *e; 494 if (verbose) 495 D("extract MAC range from %s", r->name); 496 497 e = ether_aton(r->name); 498 if (e == NULL) { 499 D("invalid MAC address '%s'", r->name); 500 return 1; 501 } 502 bcopy(e, &r->start, 6); 503 bcopy(e, &r->end, 6); 504 #if 0 505 bcopy(targ->src_mac, eh->ether_shost, 6); 506 p = index(targ->g->src_mac, '-'); 507 if (p) 508 targ->src_mac_range = atoi(p+1); 509 510 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 511 bcopy(targ->dst_mac, eh->ether_dhost, 6); 512 p = index(targ->g->dst_mac, '-'); 513 if (p) 514 targ->dst_mac_range = atoi(p+1); 515 #endif 516 if (verbose) 517 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 518 return 0; 519 } 520 521 static int 522 get_if_mtu(const struct glob_arg *g) 523 { 524 struct ifreq ifreq; 525 int s, ret; 526 const char *ifname = g->nmd->hdr.nr_name; 527 size_t len; 528 529 if (!strncmp(g->ifname, "netmap:", 7) && !strchr(ifname, '{') 530 && !strchr(ifname, '}')) { 531 532 len = strlen(ifname); 533 534 if (len > IFNAMSIZ) { 535 D("'%s' too long, cannot ask for MTU", ifname); 536 return -1; 537 } 538 539 s = socket(AF_INET, SOCK_DGRAM, 0); 540 if (s < 0) { 541 D("socket() failed: %s", strerror(errno)); 542 return s; 543 } 544 545 memset(&ifreq, 0, sizeof(ifreq)); 546 memcpy(ifreq.ifr_name, ifname, len); 547 548 ret = ioctl(s, SIOCGIFMTU, &ifreq); 549 if (ret) { 550 D("ioctl(SIOCGIFMTU) failed: %s", strerror(errno)); 551 } 552 553 close(s); 554 555 return ifreq.ifr_mtu; 556 } 557 558 /* This is a pipe or a VALE port, where the MTU is very large, 559 * so we use some practical limit. */ 560 return 65536; 561 } 562 563 static struct targ *targs; 564 static int global_nthreads; 565 566 /* control-C handler */ 567 static void 568 sigint_h(int sig) 569 { 570 int i; 571 572 (void)sig; /* UNUSED */ 573 D("received control-C on thread %p", (void *)pthread_self()); 574 for (i = 0; i < global_nthreads; i++) { 575 targs[i].cancel = 1; 576 } 577 } 578 579 /* sysctl wrapper to return the number of active CPUs */ 580 static int 581 system_ncpus(void) 582 { 583 int ncpus; 584 #if defined (__FreeBSD__) 585 int mib[2] = { CTL_HW, HW_NCPU }; 586 size_t len = sizeof(mib); 587 sysctl(mib, 2, &ncpus, &len, NULL, 0); 588 #elif defined(linux) 589 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 590 #elif defined(_WIN32) 591 { 592 SYSTEM_INFO sysinfo; 593 GetSystemInfo(&sysinfo); 594 ncpus = sysinfo.dwNumberOfProcessors; 595 } 596 #else /* others */ 597 ncpus = 1; 598 #endif /* others */ 599 return (ncpus); 600 } 601 602 #ifdef __linux__ 603 #define sockaddr_dl sockaddr_ll 604 #define sdl_family sll_family 605 #define AF_LINK AF_PACKET 606 #define LLADDR(s) s->sll_addr; 607 #include <linux/if_tun.h> 608 #define TAP_CLONEDEV "/dev/net/tun" 609 #endif /* __linux__ */ 610 611 #ifdef __FreeBSD__ 612 #include <net/if_tun.h> 613 #define TAP_CLONEDEV "/dev/tap" 614 #endif /* __FreeBSD */ 615 616 #ifdef __APPLE__ 617 // #warning TAP not supported on apple ? 618 #include <net/if_utun.h> 619 #define TAP_CLONEDEV "/dev/tap" 620 #endif /* __APPLE__ */ 621 622 623 /* 624 * parse the vale configuration in conf and put it in nmr. 625 * Return the flag set if necessary. 626 * The configuration may consist of 1 to 4 numbers separated 627 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 628 * Missing numbers or zeroes stand for default values. 629 * As an additional convenience, if exactly one number 630 * is specified, then this is assigned to both #tx-slots and #rx-slots. 631 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 632 * and #rx-rings. 633 */ 634 static int 635 parse_nmr_config(const char* conf, struct nmreq_register *nmr) 636 { 637 char *w, *tok; 638 int i, v; 639 640 if (conf == NULL || ! *conf) 641 return 0; 642 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 643 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 644 w = strdup(conf); 645 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 646 v = atoi(tok); 647 switch (i) { 648 case 0: 649 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 650 break; 651 case 1: 652 nmr->nr_rx_slots = v; 653 break; 654 case 2: 655 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 656 break; 657 case 3: 658 nmr->nr_rx_rings = v; 659 break; 660 default: 661 D("ignored config: %s", tok); 662 break; 663 } 664 } 665 D("txr %d txd %d rxr %d rxd %d", 666 nmr->nr_tx_rings, nmr->nr_tx_slots, 667 nmr->nr_rx_rings, nmr->nr_rx_slots); 668 free(w); 669 return 0; 670 } 671 672 673 /* 674 * locate the src mac address for our interface, put it 675 * into the user-supplied buffer. return 0 if ok, -1 on error. 676 */ 677 static int 678 source_hwaddr(const char *ifname, char *buf) 679 { 680 struct ifaddrs *ifaphead, *ifap; 681 682 if (getifaddrs(&ifaphead) != 0) { 683 D("getifaddrs %s failed", ifname); 684 return (-1); 685 } 686 687 /* remove 'netmap:' prefix before comparing interfaces */ 688 if (!strncmp(ifname, "netmap:", 7)) 689 ifname = &ifname[7]; 690 691 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 692 struct sockaddr_dl *sdl = 693 (struct sockaddr_dl *)ifap->ifa_addr; 694 uint8_t *mac; 695 696 if (!sdl || sdl->sdl_family != AF_LINK) 697 continue; 698 if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0) 699 continue; 700 mac = (uint8_t *)LLADDR(sdl); 701 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 702 mac[0], mac[1], mac[2], 703 mac[3], mac[4], mac[5]); 704 if (verbose) 705 D("source hwaddr %s", buf); 706 break; 707 } 708 freeifaddrs(ifaphead); 709 return ifap ? 0 : 1; 710 } 711 712 713 /* set the thread affinity. */ 714 static int 715 setaffinity(pthread_t me, int i) 716 { 717 cpuset_t cpumask; 718 719 if (i == -1) 720 return 0; 721 722 /* Set thread affinity affinity.*/ 723 CPU_ZERO(&cpumask); 724 CPU_SET(i, &cpumask); 725 726 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 727 D("Unable to set affinity: %s", strerror(errno)); 728 return 1; 729 } 730 return 0; 731 } 732 733 734 /* Compute the checksum of the given ip header. */ 735 static uint32_t 736 checksum(const void *data, uint16_t len, uint32_t sum) 737 { 738 const uint8_t *addr = data; 739 uint32_t i; 740 741 /* Checksum all the pairs of bytes first... */ 742 for (i = 0; i < (len & ~1U); i += 2) { 743 sum += (uint16_t)ntohs(*((const uint16_t *)(addr + i))); 744 if (sum > 0xFFFF) 745 sum -= 0xFFFF; 746 } 747 /* 748 * If there's a single byte left over, checksum it, too. 749 * Network byte order is big-endian, so the remaining byte is 750 * the high byte. 751 */ 752 if (i < len) { 753 sum += addr[i] << 8; 754 if (sum > 0xFFFF) 755 sum -= 0xFFFF; 756 } 757 return sum; 758 } 759 760 static uint16_t 761 wrapsum(uint32_t sum) 762 { 763 sum = ~sum & 0xFFFF; 764 return (htons(sum)); 765 } 766 767 /* Check the payload of the packet for errors (use it for debug). 768 * Look for consecutive ascii representations of the size of the packet. 769 */ 770 static void 771 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur) 772 { 773 char buf[128]; 774 int i, j, i0; 775 const unsigned char *p = (const unsigned char *)_p; 776 777 /* get the length in ASCII of the length of the packet. */ 778 779 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 780 ring, cur, ring->slot[cur].buf_idx, 781 ring->slot[cur].flags, len); 782 /* hexdump routine */ 783 for (i = 0; i < len; ) { 784 memset(buf, ' ', sizeof(buf)); 785 sprintf(buf, "%5d: ", i); 786 i0 = i; 787 for (j=0; j < 16 && i < len; i++, j++) 788 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 789 i = i0; 790 for (j=0; j < 16 && i < len; i++, j++) 791 sprintf(buf+7+j + 48, "%c", 792 isprint(p[i]) ? p[i] : '.'); 793 printf("%s\n", buf); 794 } 795 } 796 797 /* 798 * Fill a packet with some payload. 799 * We create a UDP packet so the payload starts at 800 * 14+20+8 = 42 bytes. 801 */ 802 #ifdef __linux__ 803 #define uh_sport source 804 #define uh_dport dest 805 #define uh_ulen len 806 #define uh_sum check 807 #endif /* linux */ 808 809 static uint16_t 810 new_ip_sum(uint16_t ip_sum, uint32_t oaddr, uint32_t naddr) 811 { 812 ip_sum = cksum_add(ip_sum, ~oaddr >> 16); 813 ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff); 814 ip_sum = cksum_add(ip_sum, naddr >> 16); 815 ip_sum = cksum_add(ip_sum, naddr & 0xffff); 816 return ip_sum; 817 } 818 819 static uint16_t 820 new_udp_sum(uint16_t udp_sum, uint16_t oport, uint16_t nport) 821 { 822 udp_sum = cksum_add(udp_sum, ~oport); 823 udp_sum = cksum_add(udp_sum, nport); 824 return udp_sum; 825 } 826 827 828 static void 829 update_ip(struct pkt *pkt, struct targ *t) 830 { 831 struct glob_arg *g = t->g; 832 struct ip ip; 833 struct udphdr udp; 834 uint32_t oaddr, naddr; 835 uint16_t oport, nport; 836 uint16_t ip_sum = 0, udp_sum = 0; 837 838 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 839 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp)); 840 do { 841 ip_sum = udp_sum = 0; 842 naddr = oaddr = ntohl(ip.ip_src.s_addr); 843 nport = oport = ntohs(udp.uh_sport); 844 if (g->options & OPT_RANDOM_SRC) { 845 ip.ip_src.s_addr = nrand48(t->seed); 846 udp.uh_sport = nrand48(t->seed); 847 naddr = ntohl(ip.ip_src.s_addr); 848 nport = ntohs(udp.uh_sport); 849 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 850 udp_sum = new_udp_sum(udp_sum, oport, nport); 851 } else { 852 if (oport < g->src_ip.port1) { 853 nport = oport + 1; 854 udp.uh_sport = htons(nport); 855 udp_sum = new_udp_sum(udp_sum, oport, nport); 856 break; 857 } 858 nport = g->src_ip.port0; 859 udp.uh_sport = htons(nport); 860 if (oaddr < g->src_ip.ipv4.end) { 861 naddr = oaddr + 1; 862 ip.ip_src.s_addr = htonl(naddr); 863 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 864 break; 865 } 866 naddr = g->src_ip.ipv4.start; 867 ip.ip_src.s_addr = htonl(naddr); 868 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 869 } 870 871 naddr = oaddr = ntohl(ip.ip_dst.s_addr); 872 nport = oport = ntohs(udp.uh_dport); 873 if (g->options & OPT_RANDOM_DST) { 874 ip.ip_dst.s_addr = nrand48(t->seed); 875 udp.uh_dport = nrand48(t->seed); 876 naddr = ntohl(ip.ip_dst.s_addr); 877 nport = ntohs(udp.uh_dport); 878 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 879 udp_sum = new_udp_sum(udp_sum, oport, nport); 880 } else { 881 if (oport < g->dst_ip.port1) { 882 nport = oport + 1; 883 udp.uh_dport = htons(nport); 884 udp_sum = new_udp_sum(udp_sum, oport, nport); 885 break; 886 } 887 nport = g->dst_ip.port0; 888 udp.uh_dport = htons(nport); 889 if (oaddr < g->dst_ip.ipv4.end) { 890 naddr = oaddr + 1; 891 ip.ip_dst.s_addr = htonl(naddr); 892 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 893 break; 894 } 895 naddr = g->dst_ip.ipv4.start; 896 ip.ip_dst.s_addr = htonl(naddr); 897 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 898 } 899 } while (0); 900 /* update checksums */ 901 if (udp_sum != 0) 902 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum)); 903 if (ip_sum != 0) { 904 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum)); 905 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum)); 906 } 907 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 908 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp)); 909 } 910 911 #ifndef s6_addr16 912 #define s6_addr16 __u6_addr.__u6_addr16 913 #endif 914 static void 915 update_ip6(struct pkt *pkt, struct targ *t) 916 { 917 struct glob_arg *g = t->g; 918 struct ip6_hdr ip6; 919 struct udphdr udp; 920 uint16_t udp_sum; 921 uint16_t oaddr, naddr; 922 uint16_t oport, nport; 923 uint8_t group; 924 925 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6)); 926 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp)); 927 do { 928 udp_sum = 0; 929 group = g->src_ip.ipv6.sgroup; 930 naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]); 931 nport = oport = ntohs(udp.uh_sport); 932 if (g->options & OPT_RANDOM_SRC) { 933 ip6.ip6_src.s6_addr16[group] = nrand48(t->seed); 934 udp.uh_sport = nrand48(t->seed); 935 naddr = ntohs(ip6.ip6_src.s6_addr16[group]); 936 nport = ntohs(udp.uh_sport); 937 break; 938 } 939 if (oport < g->src_ip.port1) { 940 nport = oport + 1; 941 udp.uh_sport = htons(nport); 942 break; 943 } 944 nport = g->src_ip.port0; 945 udp.uh_sport = htons(nport); 946 if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) { 947 naddr = oaddr + 1; 948 ip6.ip6_src.s6_addr16[group] = htons(naddr); 949 break; 950 } 951 naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]); 952 ip6.ip6_src.s6_addr16[group] = htons(naddr); 953 954 /* update checksums if needed */ 955 if (oaddr != naddr) 956 udp_sum = cksum_add(~oaddr, naddr); 957 if (oport != nport) 958 udp_sum = cksum_add(udp_sum, 959 cksum_add(~oport, nport)); 960 961 group = g->dst_ip.ipv6.egroup; 962 naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 963 nport = oport = ntohs(udp.uh_dport); 964 if (g->options & OPT_RANDOM_DST) { 965 ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed); 966 udp.uh_dport = nrand48(t->seed); 967 naddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 968 nport = ntohs(udp.uh_dport); 969 break; 970 } 971 if (oport < g->dst_ip.port1) { 972 nport = oport + 1; 973 udp.uh_dport = htons(nport); 974 break; 975 } 976 nport = g->dst_ip.port0; 977 udp.uh_dport = htons(nport); 978 if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) { 979 naddr = oaddr + 1; 980 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 981 break; 982 } 983 naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]); 984 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 985 } while (0); 986 /* update checksums */ 987 if (oaddr != naddr) 988 udp_sum = cksum_add(udp_sum, 989 cksum_add(~oaddr, naddr)); 990 if (oport != nport) 991 udp_sum = cksum_add(udp_sum, 992 cksum_add(~oport, nport)); 993 if (udp_sum != 0) 994 udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum); 995 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 996 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp)); 997 } 998 999 static void 1000 update_addresses(struct pkt *pkt, struct targ *t) 1001 { 1002 1003 if (t->g->af == AF_INET) 1004 update_ip(pkt, t); 1005 else 1006 update_ip6(pkt, t); 1007 } 1008 1009 static void 1010 update_ip_size(struct pkt *pkt, int size) 1011 { 1012 struct ip ip; 1013 struct udphdr udp; 1014 uint16_t oiplen, niplen; 1015 uint16_t nudplen; 1016 uint16_t ip_sum = 0; 1017 1018 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 1019 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp)); 1020 1021 oiplen = ntohs(ip.ip_len); 1022 niplen = size - sizeof(struct ether_header); 1023 ip.ip_len = htons(niplen); 1024 nudplen = niplen - sizeof(struct ip); 1025 udp.uh_ulen = htons(nudplen); 1026 ip_sum = new_udp_sum(ip_sum, oiplen, niplen); 1027 1028 /* update checksums */ 1029 if (ip_sum != 0) 1030 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum)); 1031 1032 udp.uh_sum = 0; 1033 /* Magic: taken from sbin/dhclient/packet.c */ 1034 udp.uh_sum = wrapsum( 1035 checksum(&udp, sizeof(udp), /* udp header */ 1036 checksum(pkt->ipv4.body, /* udp payload */ 1037 nudplen - sizeof(udp), 1038 checksum(&ip.ip_src, /* pseudo header */ 1039 2 * sizeof(ip.ip_src), 1040 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen))))); 1041 1042 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1043 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp)); 1044 } 1045 1046 static void 1047 update_ip6_size(struct pkt *pkt, int size) 1048 { 1049 struct ip6_hdr ip6; 1050 struct udphdr udp; 1051 uint16_t niplen, nudplen; 1052 uint32_t csum; 1053 1054 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6)); 1055 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp)); 1056 1057 nudplen = niplen = size - sizeof(struct ether_header) - sizeof(ip6); 1058 ip6.ip6_plen = htons(niplen); 1059 udp.uh_ulen = htons(nudplen); 1060 1061 /* Save part of pseudo header checksum into csum */ 1062 udp.uh_sum = 0; 1063 csum = IPPROTO_UDP << 24; 1064 csum = checksum(&csum, sizeof(csum), nudplen); 1065 udp.uh_sum = wrapsum( 1066 checksum(&udp, sizeof(udp), /* udp header */ 1067 checksum(pkt->ipv6.body, /* udp payload */ 1068 nudplen - sizeof(udp), 1069 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */ 1070 2 * sizeof(pkt->ipv6.ip.ip6_src), csum)))); 1071 1072 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 1073 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp)); 1074 } 1075 1076 static void 1077 update_size(struct pkt *pkt, struct targ *t, int size) 1078 { 1079 if (t->g->options & OPT_UPDATE_CSUM) { 1080 if (t->g->af == AF_INET) 1081 update_ip_size(pkt, size); 1082 else 1083 update_ip6_size(pkt, size); 1084 } 1085 } 1086 1087 /* 1088 * initialize one packet and prepare for the next one. 1089 * The copy could be done better instead of repeating it each time. 1090 */ 1091 static void 1092 initialize_packet(struct targ *targ) 1093 { 1094 struct pkt *pkt = &targ->pkt; 1095 struct ether_header *eh; 1096 struct ip6_hdr ip6; 1097 struct ip ip; 1098 struct udphdr udp; 1099 void *udp_ptr; 1100 uint16_t paylen; 1101 uint32_t csum = 0; 1102 const char *payload = targ->g->options & OPT_INDIRECT ? 1103 indirect_payload : default_payload; 1104 int i, l0 = strlen(payload); 1105 1106 #ifndef NO_PCAP 1107 char errbuf[PCAP_ERRBUF_SIZE]; 1108 pcap_t *file; 1109 struct pcap_pkthdr *header; 1110 const unsigned char *packet; 1111 1112 /* Read a packet from a PCAP file if asked. */ 1113 if (targ->g->packet_file != NULL) { 1114 if ((file = pcap_open_offline(targ->g->packet_file, 1115 errbuf)) == NULL) 1116 D("failed to open pcap file %s", 1117 targ->g->packet_file); 1118 if (pcap_next_ex(file, &header, &packet) < 0) 1119 D("failed to read packet from %s", 1120 targ->g->packet_file); 1121 if ((targ->frame = malloc(header->caplen)) == NULL) 1122 D("out of memory"); 1123 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 1124 targ->g->pkt_size = header->caplen; 1125 pcap_close(file); 1126 return; 1127 } 1128 #endif 1129 1130 paylen = targ->g->pkt_size - sizeof(*eh) - 1131 (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6)); 1132 1133 /* create a nice NUL-terminated string */ 1134 for (i = 0; i < paylen; i += l0) { 1135 if (l0 > paylen - i) 1136 l0 = paylen - i; // last round 1137 bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0); 1138 } 1139 PKT(pkt, body, targ->g->af)[i - 1] = '\0'; 1140 1141 /* prepare the headers */ 1142 eh = &pkt->eh; 1143 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 1144 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 1145 1146 if (targ->g->af == AF_INET) { 1147 eh->ether_type = htons(ETHERTYPE_IP); 1148 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 1149 udp_ptr = &pkt->ipv4.udp; 1150 ip.ip_v = IPVERSION; 1151 ip.ip_hl = sizeof(ip) >> 2; 1152 ip.ip_id = 0; 1153 ip.ip_tos = IPTOS_LOWDELAY; 1154 ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh)); 1155 ip.ip_id = 0; 1156 ip.ip_off = htons(IP_DF); /* Don't fragment */ 1157 ip.ip_ttl = IPDEFTTL; 1158 ip.ip_p = IPPROTO_UDP; 1159 ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start); 1160 ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start); 1161 ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0)); 1162 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1163 } else { 1164 eh->ether_type = htons(ETHERTYPE_IPV6); 1165 memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6)); 1166 udp_ptr = &pkt->ipv6.udp; 1167 ip6.ip6_flow = 0; 1168 ip6.ip6_plen = htons(paylen); 1169 ip6.ip6_vfc = IPV6_VERSION; 1170 ip6.ip6_nxt = IPPROTO_UDP; 1171 ip6.ip6_hlim = IPV6_DEFHLIM; 1172 ip6.ip6_src = targ->g->src_ip.ipv6.start; 1173 ip6.ip6_dst = targ->g->dst_ip.ipv6.start; 1174 } 1175 memcpy(&udp, udp_ptr, sizeof(udp)); 1176 1177 udp.uh_sport = htons(targ->g->src_ip.port0); 1178 udp.uh_dport = htons(targ->g->dst_ip.port0); 1179 udp.uh_ulen = htons(paylen); 1180 if (targ->g->af == AF_INET) { 1181 /* Magic: taken from sbin/dhclient/packet.c */ 1182 udp.uh_sum = wrapsum( 1183 checksum(&udp, sizeof(udp), /* udp header */ 1184 checksum(pkt->ipv4.body, /* udp payload */ 1185 paylen - sizeof(udp), 1186 checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */ 1187 2 * sizeof(pkt->ipv4.ip.ip_src), 1188 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen))))); 1189 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1190 } else { 1191 /* Save part of pseudo header checksum into csum */ 1192 csum = IPPROTO_UDP << 24; 1193 csum = checksum(&csum, sizeof(csum), paylen); 1194 udp.uh_sum = wrapsum( 1195 checksum(udp_ptr, sizeof(udp), /* udp header */ 1196 checksum(pkt->ipv6.body, /* udp payload */ 1197 paylen - sizeof(udp), 1198 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */ 1199 2 * sizeof(pkt->ipv6.ip.ip6_src), csum)))); 1200 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 1201 } 1202 memcpy(udp_ptr, &udp, sizeof(udp)); 1203 1204 bzero(&pkt->vh, sizeof(pkt->vh)); 1205 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 1206 } 1207 1208 static void 1209 get_vnet_hdr_len(struct glob_arg *g) 1210 { 1211 struct nmreq_header hdr; 1212 struct nmreq_port_hdr ph; 1213 int err; 1214 1215 hdr = g->nmd->hdr; /* copy name and version */ 1216 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_GET; 1217 hdr.nr_options = 0; 1218 memset(&ph, 0, sizeof(ph)); 1219 hdr.nr_body = (uintptr_t)&ph; 1220 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1221 if (err) { 1222 D("Unable to get virtio-net header length"); 1223 return; 1224 } 1225 1226 g->virt_header = ph.nr_hdr_len; 1227 if (g->virt_header) { 1228 D("Port requires virtio-net header, length = %d", 1229 g->virt_header); 1230 } 1231 } 1232 1233 static void 1234 set_vnet_hdr_len(struct glob_arg *g) 1235 { 1236 int err, l = g->virt_header; 1237 struct nmreq_header hdr; 1238 struct nmreq_port_hdr ph; 1239 1240 if (l == 0) 1241 return; 1242 1243 hdr = g->nmd->hdr; /* copy name and version */ 1244 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_SET; 1245 hdr.nr_options = 0; 1246 memset(&ph, 0, sizeof(ph)); 1247 hdr.nr_body = (uintptr_t)&ph; 1248 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1249 if (err) { 1250 D("Unable to set virtio-net header length %d", l); 1251 } 1252 } 1253 1254 /* 1255 * create and enqueue a batch of packets on a ring. 1256 * On the last one set NS_REPORT to tell the driver to generate 1257 * an interrupt when done. 1258 */ 1259 static int 1260 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 1261 int size, struct targ *t, u_int count, int options) 1262 { 1263 u_int n, sent, head = ring->head; 1264 u_int frags = t->frags; 1265 u_int frag_size = t->frag_size; 1266 struct netmap_slot *slot = &ring->slot[head]; 1267 1268 n = nm_ring_space(ring); 1269 #if 0 1270 if (options & (OPT_COPY | OPT_PREFETCH) ) { 1271 for (sent = 0; sent < count; sent++) { 1272 struct netmap_slot *slot = &ring->slot[head]; 1273 char *p = NETMAP_BUF(ring, slot->buf_idx); 1274 1275 __builtin_prefetch(p); 1276 head = nm_ring_next(ring, head); 1277 } 1278 head = ring->head; 1279 } 1280 #endif 1281 for (sent = 0; sent < count && n >= frags; sent++, n--) { 1282 char *p; 1283 int buf_changed; 1284 u_int tosend = size; 1285 1286 slot = &ring->slot[head]; 1287 p = NETMAP_BUF(ring, slot->buf_idx); 1288 buf_changed = slot->flags & NS_BUF_CHANGED; 1289 1290 slot->flags = 0; 1291 if (options & OPT_RUBBISH) { 1292 /* do nothing */ 1293 } else if (options & OPT_INDIRECT) { 1294 slot->flags |= NS_INDIRECT; 1295 slot->ptr = (uint64_t)((uintptr_t)frame); 1296 } else if (frags > 1) { 1297 u_int i; 1298 const char *f = frame; 1299 char *fp = p; 1300 for (i = 0; i < frags - 1; i++) { 1301 memcpy(fp, f, frag_size); 1302 slot->len = frag_size; 1303 slot->flags = NS_MOREFRAG; 1304 if (options & OPT_DUMP) 1305 dump_payload(fp, frag_size, ring, head); 1306 tosend -= frag_size; 1307 f += frag_size; 1308 head = nm_ring_next(ring, head); 1309 slot = &ring->slot[head]; 1310 fp = NETMAP_BUF(ring, slot->buf_idx); 1311 } 1312 n -= (frags - 1); 1313 p = fp; 1314 slot->flags = 0; 1315 memcpy(p, f, tosend); 1316 update_addresses(pkt, t); 1317 } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) { 1318 if (options & OPT_COPY) 1319 nm_pkt_copy(frame, p, size); 1320 else 1321 memcpy(p, frame, size); 1322 update_addresses(pkt, t); 1323 } else if (options & OPT_PREFETCH) { 1324 __builtin_prefetch(p); 1325 } 1326 slot->len = tosend; 1327 if (options & OPT_DUMP) 1328 dump_payload(p, tosend, ring, head); 1329 head = nm_ring_next(ring, head); 1330 } 1331 if (sent) { 1332 slot->flags |= NS_REPORT; 1333 ring->head = ring->cur = head; 1334 } 1335 if (sent < count) { 1336 /* tell netmap that we need more slots */ 1337 ring->cur = ring->tail; 1338 } 1339 1340 return (sent); 1341 } 1342 1343 /* 1344 * Index of the highest bit set 1345 */ 1346 static uint32_t 1347 msb64(uint64_t x) 1348 { 1349 uint64_t m = 1ULL << 63; 1350 int i; 1351 1352 for (i = 63; i >= 0; i--, m >>=1) 1353 if (m & x) 1354 return i; 1355 return 0; 1356 } 1357 1358 /* 1359 * wait until ts, either busy or sleeping if more than 1ms. 1360 * Return wakeup time. 1361 */ 1362 static struct timespec 1363 wait_time(struct timespec ts) 1364 { 1365 for (;;) { 1366 struct timespec w, cur; 1367 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1368 w = timespec_sub(ts, cur); 1369 if (w.tv_sec < 0) 1370 return cur; 1371 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1372 poll(NULL, 0, 1); 1373 } 1374 } 1375 1376 /* 1377 * Send a packet, and wait for a response. 1378 * The payload (after UDP header, ofs 42) has a 4-byte sequence 1379 * followed by a struct timeval (or bintime?) 1380 */ 1381 1382 static void * 1383 ping_body(void *data) 1384 { 1385 struct targ *targ = (struct targ *) data; 1386 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1387 struct netmap_if *nifp = targ->nmd->nifp; 1388 int i, m; 1389 void *frame; 1390 int size; 1391 struct timespec ts, now, last_print; 1392 struct timespec nexttime = {0, 0}; /* silence compiler */ 1393 uint64_t sent = 0, n = targ->g->npackets; 1394 uint64_t count = 0, t_cur, t_min = ~0, av = 0; 1395 uint64_t g_min = ~0, g_av = 0; 1396 uint64_t buckets[64]; /* bins for delays, ns */ 1397 int rate_limit = targ->g->tx_rate, tosend = 0; 1398 1399 frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header; 1400 size = targ->g->pkt_size + targ->g->virt_header; 1401 1402 1403 if (targ->g->nthreads > 1) { 1404 D("can only ping with 1 thread"); 1405 return NULL; 1406 } 1407 1408 if (targ->g->af == AF_INET6) { 1409 D("Warning: ping-pong with IPv6 not supported"); 1410 } 1411 1412 bzero(&buckets, sizeof(buckets)); 1413 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 1414 now = last_print; 1415 if (rate_limit) { 1416 targ->tic = timespec_add(now, (struct timespec){2,0}); 1417 targ->tic.tv_nsec = 0; 1418 wait_time(targ->tic); 1419 nexttime = targ->tic; 1420 } 1421 while (!targ->cancel && (n == 0 || sent < n)) { 1422 struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1423 struct netmap_slot *slot; 1424 char *p; 1425 int rv; 1426 uint64_t limit, event = 0; 1427 1428 if (rate_limit && tosend <= 0) { 1429 tosend = targ->g->burst; 1430 nexttime = timespec_add(nexttime, targ->g->tx_period); 1431 wait_time(nexttime); 1432 } 1433 1434 limit = rate_limit ? tosend : targ->g->burst; 1435 if (n > 0 && n - sent < limit) 1436 limit = n - sent; 1437 for (m = 0; (unsigned)m < limit; m++) { 1438 slot = &ring->slot[ring->head]; 1439 slot->len = size; 1440 p = NETMAP_BUF(ring, slot->buf_idx); 1441 1442 if (nm_ring_empty(ring)) { 1443 D("-- ouch, cannot send"); 1444 break; 1445 } else { 1446 struct tstamp *tp; 1447 nm_pkt_copy(frame, p, size); 1448 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 1449 bcopy(&sent, p+42, sizeof(sent)); 1450 tp = (struct tstamp *)(p+46); 1451 tp->sec = (uint32_t)ts.tv_sec; 1452 tp->nsec = (uint32_t)ts.tv_nsec; 1453 sent++; 1454 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1455 } 1456 } 1457 if (m > 0) 1458 event++; 1459 targ->ctr.pkts = sent; 1460 targ->ctr.bytes = sent*size; 1461 targ->ctr.events = event; 1462 if (rate_limit) 1463 tosend -= m; 1464 #ifdef BUSYWAIT 1465 rv = ioctl(pfd.fd, NIOCTXSYNC, NULL); 1466 if (rv < 0) { 1467 D("TXSYNC error on queue %d: %s", targ->me, 1468 strerror(errno)); 1469 } 1470 again: 1471 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1472 #else 1473 /* should use a parameter to decide how often to send */ 1474 if ( (rv = poll(&pfd, 1, 3000)) <= 0) { 1475 D("poll error on queue %d: %s", targ->me, 1476 (rv ? strerror(errno) : "timeout")); 1477 continue; 1478 } 1479 #endif /* BUSYWAIT */ 1480 /* see what we got back */ 1481 #ifdef BUSYWAIT 1482 int rx = 0; 1483 #endif 1484 for (i = targ->nmd->first_rx_ring; 1485 i <= targ->nmd->last_rx_ring; i++) { 1486 ring = NETMAP_RXRING(nifp, i); 1487 while (!nm_ring_empty(ring)) { 1488 uint32_t seq; 1489 struct tstamp *tp; 1490 int pos; 1491 1492 slot = &ring->slot[ring->head]; 1493 p = NETMAP_BUF(ring, slot->buf_idx); 1494 1495 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 1496 bcopy(p+42, &seq, sizeof(seq)); 1497 tp = (struct tstamp *)(p+46); 1498 ts.tv_sec = (time_t)tp->sec; 1499 ts.tv_nsec = (long)tp->nsec; 1500 ts.tv_sec = now.tv_sec - ts.tv_sec; 1501 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 1502 if (ts.tv_nsec < 0) { 1503 ts.tv_nsec += 1000000000; 1504 ts.tv_sec--; 1505 } 1506 if (0) D("seq %d/%llu delta %d.%09d", seq, 1507 (unsigned long long)sent, 1508 (int)ts.tv_sec, (int)ts.tv_nsec); 1509 t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec; 1510 if (t_cur < t_min) 1511 t_min = t_cur; 1512 count ++; 1513 av += t_cur; 1514 pos = msb64(t_cur); 1515 buckets[pos]++; 1516 /* now store it in a bucket */ 1517 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1518 #ifdef BUSYWAIT 1519 rx++; 1520 #endif 1521 } 1522 } 1523 //D("tx %d rx %d", sent, rx); 1524 //usleep(100000); 1525 ts.tv_sec = now.tv_sec - last_print.tv_sec; 1526 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 1527 if (ts.tv_nsec < 0) { 1528 ts.tv_nsec += 1000000000; 1529 ts.tv_sec--; 1530 } 1531 if (ts.tv_sec >= 1) { 1532 D("count %d RTT: min %d av %d ns", 1533 (int)count, (int)t_min, (int)(av/count)); 1534 int k, j, kmin, off; 1535 char buf[512]; 1536 1537 for (kmin = 0; kmin < 64; kmin ++) 1538 if (buckets[kmin]) 1539 break; 1540 for (k = 63; k >= kmin; k--) 1541 if (buckets[k]) 1542 break; 1543 buf[0] = '\0'; 1544 off = 0; 1545 for (j = kmin; j <= k; j++) { 1546 off += sprintf(buf + off, " %5d", (int)buckets[j]); 1547 } 1548 D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf); 1549 bzero(&buckets, sizeof(buckets)); 1550 count = 0; 1551 g_av += av; 1552 av = 0; 1553 if (t_min < g_min) 1554 g_min = t_min; 1555 t_min = ~0; 1556 last_print = now; 1557 } 1558 #ifdef BUSYWAIT 1559 if (rx < m && ts.tv_sec <= 3 && !targ->cancel) 1560 goto again; 1561 #endif /* BUSYWAIT */ 1562 } 1563 1564 if (sent > 0) { 1565 D("RTT over %llu packets: min %d av %d ns", 1566 (long long unsigned)sent, (int)g_min, 1567 (int)((double)g_av/sent)); 1568 } 1569 targ->completed = 1; 1570 1571 /* reset the ``used`` flag. */ 1572 targ->used = 0; 1573 1574 return NULL; 1575 } 1576 1577 1578 /* 1579 * reply to ping requests 1580 */ 1581 static void * 1582 pong_body(void *data) 1583 { 1584 struct targ *targ = (struct targ *) data; 1585 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1586 struct netmap_if *nifp = targ->nmd->nifp; 1587 struct netmap_ring *txring, *rxring; 1588 int i; 1589 uint64_t sent = 0, n = targ->g->npackets; 1590 1591 if (targ->g->nthreads > 1) { 1592 D("can only reply ping with 1 thread"); 1593 return NULL; 1594 } 1595 if (n > 0) 1596 D("understood ponger %llu but don't know how to do it", 1597 (unsigned long long)n); 1598 1599 if (targ->g->af == AF_INET6) { 1600 D("Warning: ping-pong with IPv6 not supported"); 1601 } 1602 1603 while (!targ->cancel && (n == 0 || sent < n)) { 1604 uint32_t txhead, txavail; 1605 //#define BUSYWAIT 1606 #ifdef BUSYWAIT 1607 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1608 #else 1609 int rv; 1610 if ( (rv = poll(&pfd, 1, 1000)) <= 0) { 1611 D("poll error on queue %d: %s", targ->me, 1612 rv ? strerror(errno) : "timeout"); 1613 continue; 1614 } 1615 #endif 1616 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1617 txhead = txring->head; 1618 txavail = nm_ring_space(txring); 1619 /* see what we got back */ 1620 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1621 rxring = NETMAP_RXRING(nifp, i); 1622 while (!nm_ring_empty(rxring)) { 1623 uint16_t *spkt, *dpkt; 1624 uint32_t head = rxring->head; 1625 struct netmap_slot *slot = &rxring->slot[head]; 1626 char *src, *dst; 1627 src = NETMAP_BUF(rxring, slot->buf_idx); 1628 //D("got pkt %p of size %d", src, slot->len); 1629 rxring->head = rxring->cur = nm_ring_next(rxring, head); 1630 if (txavail == 0) 1631 continue; 1632 dst = NETMAP_BUF(txring, 1633 txring->slot[txhead].buf_idx); 1634 /* copy... */ 1635 dpkt = (uint16_t *)dst; 1636 spkt = (uint16_t *)src; 1637 nm_pkt_copy(src, dst, slot->len); 1638 /* swap source and destination MAC */ 1639 dpkt[0] = spkt[3]; 1640 dpkt[1] = spkt[4]; 1641 dpkt[2] = spkt[5]; 1642 dpkt[3] = spkt[0]; 1643 dpkt[4] = spkt[1]; 1644 dpkt[5] = spkt[2]; 1645 /* swap source and destination IPv4 */ 1646 if (spkt[6] == htons(ETHERTYPE_IP)) { 1647 dpkt[13] = spkt[15]; 1648 dpkt[14] = spkt[16]; 1649 dpkt[15] = spkt[13]; 1650 dpkt[16] = spkt[14]; 1651 } 1652 txring->slot[txhead].len = slot->len; 1653 //dump_payload(dst, slot->len, txring, txhead); 1654 txhead = nm_ring_next(txring, txhead); 1655 txavail--; 1656 sent++; 1657 } 1658 } 1659 txring->head = txring->cur = txhead; 1660 targ->ctr.pkts = sent; 1661 #ifdef BUSYWAIT 1662 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1663 #endif 1664 } 1665 1666 targ->completed = 1; 1667 1668 /* reset the ``used`` flag. */ 1669 targ->used = 0; 1670 1671 return NULL; 1672 } 1673 1674 1675 static void * 1676 sender_body(void *data) 1677 { 1678 struct targ *targ = (struct targ *) data; 1679 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1680 struct netmap_if *nifp; 1681 struct netmap_ring *txring = NULL; 1682 int i; 1683 uint64_t n = targ->g->npackets / targ->g->nthreads; 1684 uint64_t sent = 0; 1685 uint64_t event = 0; 1686 int options = targ->g->options; 1687 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1688 int rate_limit = targ->g->tx_rate; 1689 struct pkt *pkt = &targ->pkt; 1690 void *frame; 1691 int size; 1692 1693 if (targ->frame == NULL) { 1694 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1695 size = targ->g->pkt_size + targ->g->virt_header; 1696 } else { 1697 frame = targ->frame; 1698 size = targ->g->pkt_size; 1699 } 1700 1701 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1702 if (setaffinity(targ->thread, targ->affinity)) 1703 goto quit; 1704 1705 /* main loop.*/ 1706 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1707 if (rate_limit) { 1708 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1709 targ->tic.tv_nsec = 0; 1710 wait_time(targ->tic); 1711 nexttime = targ->tic; 1712 } 1713 if (targ->g->dev_type == DEV_TAP) { 1714 D("writing to file desc %d", targ->g->main_fd); 1715 1716 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1717 if (write(targ->g->main_fd, frame, size) != -1) 1718 sent++; 1719 update_addresses(pkt, targ); 1720 if (i > 10000) { 1721 targ->ctr.pkts = sent; 1722 targ->ctr.bytes = sent*size; 1723 targ->ctr.events = sent; 1724 i = 0; 1725 } 1726 } 1727 #ifndef NO_PCAP 1728 } else if (targ->g->dev_type == DEV_PCAP) { 1729 pcap_t *p = targ->g->p; 1730 1731 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1732 if (pcap_inject(p, frame, size) != -1) 1733 sent++; 1734 update_addresses(pkt, targ); 1735 if (i > 10000) { 1736 targ->ctr.pkts = sent; 1737 targ->ctr.bytes = sent*size; 1738 targ->ctr.events = sent; 1739 i = 0; 1740 } 1741 } 1742 #endif /* NO_PCAP */ 1743 } else { 1744 int tosend = 0; 1745 u_int bufsz, frag_size = targ->g->frag_size; 1746 1747 nifp = targ->nmd->nifp; 1748 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1749 bufsz = txring->nr_buf_size; 1750 if (bufsz < frag_size) 1751 frag_size = bufsz; 1752 targ->frag_size = targ->g->pkt_size / targ->frags; 1753 if (targ->frag_size > frag_size) { 1754 targ->frags = targ->g->pkt_size / frag_size; 1755 targ->frag_size = frag_size; 1756 if (targ->g->pkt_size % frag_size != 0) 1757 targ->frags++; 1758 } 1759 D("frags %u frag_size %u", targ->frags, targ->frag_size); 1760 1761 /* mark all slots of all rings as changed so initial copy will be done */ 1762 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1763 uint32_t j; 1764 struct netmap_slot *slot; 1765 1766 txring = NETMAP_TXRING(nifp, i); 1767 for (j = 0; j < txring->num_slots; j++) { 1768 slot = &txring->slot[j]; 1769 slot->flags = NS_BUF_CHANGED; 1770 } 1771 } 1772 1773 while (!targ->cancel && (n == 0 || sent < n)) { 1774 int rv; 1775 1776 if (rate_limit && tosend <= 0) { 1777 tosend = targ->g->burst; 1778 nexttime = timespec_add(nexttime, targ->g->tx_period); 1779 wait_time(nexttime); 1780 } 1781 1782 /* 1783 * wait for available room in the send queue(s) 1784 */ 1785 #ifdef BUSYWAIT 1786 (void)rv; 1787 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 1788 D("ioctl error on queue %d: %s", targ->me, 1789 strerror(errno)); 1790 goto quit; 1791 } 1792 #else /* !BUSYWAIT */ 1793 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 1794 if (targ->cancel) 1795 break; 1796 D("poll error on queue %d: %s", targ->me, 1797 rv ? strerror(errno) : "timeout"); 1798 // goto quit; 1799 } 1800 if (pfd.revents & POLLERR) { 1801 D("poll error on %d ring %d-%d", pfd.fd, 1802 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1803 goto quit; 1804 } 1805 #endif /* !BUSYWAIT */ 1806 /* 1807 * scan our queues and send on those with room 1808 */ 1809 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1810 int m; 1811 uint64_t limit = rate_limit ? tosend : targ->g->burst; 1812 1813 if (n > 0 && n == sent) 1814 break; 1815 1816 if (n > 0 && n - sent < limit) 1817 limit = n - sent; 1818 txring = NETMAP_TXRING(nifp, i); 1819 if (nm_ring_empty(txring)) 1820 continue; 1821 1822 if (targ->g->pkt_min_size > 0) { 1823 size = nrand48(targ->seed) % 1824 (targ->g->pkt_size - targ->g->pkt_min_size) + 1825 targ->g->pkt_min_size; 1826 update_size(pkt, targ, size); 1827 } 1828 m = send_packets(txring, pkt, frame, size, targ, 1829 limit, options); 1830 ND("limit %lu tail %d m %d", 1831 limit, txring->tail, m); 1832 sent += m; 1833 if (m > 0) //XXX-ste: can m be 0? 1834 event++; 1835 targ->ctr.pkts = sent; 1836 targ->ctr.bytes += m*size; 1837 targ->ctr.events = event; 1838 if (rate_limit) { 1839 tosend -= m; 1840 if (tosend <= 0) 1841 break; 1842 } 1843 } 1844 } 1845 /* flush any remaining packets */ 1846 if (txring != NULL) { 1847 D("flush tail %d head %d on thread %p", 1848 txring->tail, txring->head, 1849 (void *)pthread_self()); 1850 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1851 } 1852 1853 /* final part: wait all the TX queues to be empty. */ 1854 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1855 txring = NETMAP_TXRING(nifp, i); 1856 while (!targ->cancel && nm_tx_pending(txring)) { 1857 RD(5, "pending tx tail %d head %d on ring %d", 1858 txring->tail, txring->head, i); 1859 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1860 usleep(1); /* wait 1 tick */ 1861 } 1862 } 1863 } /* end DEV_NETMAP */ 1864 1865 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1866 targ->completed = 1; 1867 targ->ctr.pkts = sent; 1868 targ->ctr.bytes = sent*size; 1869 targ->ctr.events = event; 1870 quit: 1871 /* reset the ``used`` flag. */ 1872 targ->used = 0; 1873 1874 return (NULL); 1875 } 1876 1877 1878 #ifndef NO_PCAP 1879 static void 1880 receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1881 const u_char * bytes) 1882 { 1883 struct my_ctrs *ctr = (struct my_ctrs *)user; 1884 (void)bytes; /* UNUSED */ 1885 ctr->bytes += h->len; 1886 ctr->pkts++; 1887 } 1888 #endif /* !NO_PCAP */ 1889 1890 1891 static int 1892 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes) 1893 { 1894 u_int head, rx, n; 1895 uint64_t b = 0; 1896 u_int complete = 0; 1897 1898 if (bytes == NULL) 1899 bytes = &b; 1900 1901 head = ring->head; 1902 n = nm_ring_space(ring); 1903 if (n < limit) 1904 limit = n; 1905 for (rx = 0; rx < limit; rx++) { 1906 struct netmap_slot *slot = &ring->slot[head]; 1907 char *p = NETMAP_BUF(ring, slot->buf_idx); 1908 1909 *bytes += slot->len; 1910 if (dump) 1911 dump_payload(p, slot->len, ring, head); 1912 if (!(slot->flags & NS_MOREFRAG)) 1913 complete++; 1914 1915 head = nm_ring_next(ring, head); 1916 } 1917 ring->head = ring->cur = head; 1918 1919 return (complete); 1920 } 1921 1922 static void * 1923 receiver_body(void *data) 1924 { 1925 struct targ *targ = (struct targ *) data; 1926 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1927 struct netmap_if *nifp; 1928 struct netmap_ring *rxring; 1929 int i; 1930 struct my_ctrs cur; 1931 uint64_t n = targ->g->npackets / targ->g->nthreads; 1932 1933 memset(&cur, 0, sizeof(cur)); 1934 1935 if (setaffinity(targ->thread, targ->affinity)) 1936 goto quit; 1937 1938 D("reading from %s fd %d main_fd %d", 1939 targ->g->ifname, targ->fd, targ->g->main_fd); 1940 /* unbounded wait for the first packet. */ 1941 for (;!targ->cancel;) { 1942 i = poll(&pfd, 1, 1000); 1943 if (i > 0 && !(pfd.revents & POLLERR)) 1944 break; 1945 if (i < 0) { 1946 D("poll() error: %s", strerror(errno)); 1947 goto quit; 1948 } 1949 if (pfd.revents & POLLERR) { 1950 D("fd error"); 1951 goto quit; 1952 } 1953 RD(1, "waiting for initial packets, poll returns %d %d", 1954 i, pfd.revents); 1955 } 1956 /* main loop, exit after 1s silence */ 1957 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1958 if (targ->g->dev_type == DEV_TAP) { 1959 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1960 char buf[MAX_BODYSIZE]; 1961 /* XXX should we poll ? */ 1962 i = read(targ->g->main_fd, buf, sizeof(buf)); 1963 if (i > 0) { 1964 targ->ctr.pkts++; 1965 targ->ctr.bytes += i; 1966 targ->ctr.events++; 1967 } 1968 } 1969 #ifndef NO_PCAP 1970 } else if (targ->g->dev_type == DEV_PCAP) { 1971 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1972 /* XXX should we poll ? */ 1973 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1974 (u_char *)&targ->ctr); 1975 targ->ctr.events++; 1976 } 1977 #endif /* !NO_PCAP */ 1978 } else { 1979 int dump = targ->g->options & OPT_DUMP; 1980 1981 nifp = targ->nmd->nifp; 1982 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1983 /* Once we started to receive packets, wait at most 1 seconds 1984 before quitting. */ 1985 #ifdef BUSYWAIT 1986 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 1987 D("ioctl error on queue %d: %s", targ->me, 1988 strerror(errno)); 1989 goto quit; 1990 } 1991 #else /* !BUSYWAIT */ 1992 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1993 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1994 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1995 goto out; 1996 } 1997 1998 if (pfd.revents & POLLERR) { 1999 D("poll err"); 2000 goto quit; 2001 } 2002 #endif /* !BUSYWAIT */ 2003 uint64_t cur_space = 0; 2004 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 2005 int m; 2006 2007 rxring = NETMAP_RXRING(nifp, i); 2008 /* compute free space in the ring */ 2009 m = rxring->head + rxring->num_slots - rxring->tail; 2010 if (m >= (int) rxring->num_slots) 2011 m -= rxring->num_slots; 2012 cur_space += m; 2013 if (nm_ring_empty(rxring)) 2014 continue; 2015 2016 m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes); 2017 cur.pkts += m; 2018 if (m > 0) 2019 cur.events++; 2020 } 2021 cur.min_space = targ->ctr.min_space; 2022 if (cur_space < cur.min_space) 2023 cur.min_space = cur_space; 2024 targ->ctr = cur; 2025 } 2026 } 2027 2028 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2029 2030 #if !defined(BUSYWAIT) 2031 out: 2032 #endif 2033 targ->completed = 1; 2034 targ->ctr = cur; 2035 2036 quit: 2037 /* reset the ``used`` flag. */ 2038 targ->used = 0; 2039 2040 return (NULL); 2041 } 2042 2043 static void * 2044 txseq_body(void *data) 2045 { 2046 struct targ *targ = (struct targ *) data; 2047 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 2048 struct netmap_ring *ring; 2049 int64_t sent = 0; 2050 uint64_t event = 0; 2051 int options = targ->g->options | OPT_COPY; 2052 struct timespec nexttime = {0, 0}; 2053 int rate_limit = targ->g->tx_rate; 2054 struct pkt *pkt = &targ->pkt; 2055 int frags = targ->g->frags; 2056 uint32_t sequence = 0; 2057 int budget = 0; 2058 void *frame; 2059 int size; 2060 2061 if (targ->g->nthreads > 1) { 2062 D("can only txseq ping with 1 thread"); 2063 return NULL; 2064 } 2065 2066 if (targ->g->npackets > 0) { 2067 D("Ignoring -n argument"); 2068 } 2069 2070 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 2071 size = targ->g->pkt_size + targ->g->virt_header; 2072 2073 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 2074 if (setaffinity(targ->thread, targ->affinity)) 2075 goto quit; 2076 2077 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 2078 if (rate_limit) { 2079 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 2080 targ->tic.tv_nsec = 0; 2081 wait_time(targ->tic); 2082 nexttime = targ->tic; 2083 } 2084 2085 /* Only use the first queue. */ 2086 ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring); 2087 2088 while (!targ->cancel) { 2089 int64_t limit; 2090 unsigned int space; 2091 unsigned int head; 2092 int fcnt; 2093 uint16_t sum = 0; 2094 int rv; 2095 2096 if (!rate_limit) { 2097 budget = targ->g->burst; 2098 2099 } else if (budget <= 0) { 2100 budget = targ->g->burst; 2101 nexttime = timespec_add(nexttime, targ->g->tx_period); 2102 wait_time(nexttime); 2103 } 2104 2105 /* wait for available room in the send queue */ 2106 #ifdef BUSYWAIT 2107 (void)rv; 2108 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 2109 D("ioctl error on queue %d: %s", targ->me, 2110 strerror(errno)); 2111 goto quit; 2112 } 2113 #else /* !BUSYWAIT */ 2114 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 2115 if (targ->cancel) 2116 break; 2117 D("poll error on queue %d: %s", targ->me, 2118 rv ? strerror(errno) : "timeout"); 2119 // goto quit; 2120 } 2121 if (pfd.revents & POLLERR) { 2122 D("poll error on %d ring %d-%d", pfd.fd, 2123 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 2124 goto quit; 2125 } 2126 #endif /* !BUSYWAIT */ 2127 2128 /* If no room poll() again. */ 2129 space = nm_ring_space(ring); 2130 if (!space) { 2131 continue; 2132 } 2133 2134 limit = budget; 2135 2136 if (space < limit) { 2137 limit = space; 2138 } 2139 2140 /* Cut off ``limit`` to make sure is multiple of ``frags``. */ 2141 if (frags > 1) { 2142 limit = (limit / frags) * frags; 2143 } 2144 2145 limit = sent + limit; /* Convert to absolute. */ 2146 2147 for (fcnt = frags, head = ring->head; 2148 sent < limit; sent++, sequence++) { 2149 struct netmap_slot *slot = &ring->slot[head]; 2150 char *p = NETMAP_BUF(ring, slot->buf_idx); 2151 uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t; 2152 2153 memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum)); 2154 2155 slot->flags = 0; 2156 t = *w; 2157 PKT(pkt, body, targ->g->af)[0] = sequence >> 24; 2158 PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff; 2159 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2160 t = *++w; 2161 PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff; 2162 PKT(pkt, body, targ->g->af)[3] = sequence & 0xff; 2163 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2164 memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum)); 2165 nm_pkt_copy(frame, p, size); 2166 if (fcnt == frags) { 2167 update_addresses(pkt, targ); 2168 } 2169 2170 if (options & OPT_DUMP) { 2171 dump_payload(p, size, ring, head); 2172 } 2173 2174 slot->len = size; 2175 2176 if (--fcnt > 0) { 2177 slot->flags |= NS_MOREFRAG; 2178 } else { 2179 fcnt = frags; 2180 } 2181 2182 if (sent == limit - 1) { 2183 /* Make sure we don't push an incomplete 2184 * packet. */ 2185 assert(!(slot->flags & NS_MOREFRAG)); 2186 slot->flags |= NS_REPORT; 2187 } 2188 2189 head = nm_ring_next(ring, head); 2190 if (rate_limit) { 2191 budget--; 2192 } 2193 } 2194 2195 ring->cur = ring->head = head; 2196 2197 event ++; 2198 targ->ctr.pkts = sent; 2199 targ->ctr.bytes = sent * size; 2200 targ->ctr.events = event; 2201 } 2202 2203 /* flush any remaining packets */ 2204 D("flush tail %d head %d on thread %p", 2205 ring->tail, ring->head, 2206 (void *)pthread_self()); 2207 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2208 2209 /* final part: wait the TX queues to become empty. */ 2210 while (!targ->cancel && nm_tx_pending(ring)) { 2211 RD(5, "pending tx tail %d head %d on ring %d", 2212 ring->tail, ring->head, targ->nmd->first_tx_ring); 2213 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2214 usleep(1); /* wait 1 tick */ 2215 } 2216 2217 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2218 targ->completed = 1; 2219 targ->ctr.pkts = sent; 2220 targ->ctr.bytes = sent * size; 2221 targ->ctr.events = event; 2222 quit: 2223 /* reset the ``used`` flag. */ 2224 targ->used = 0; 2225 2226 return (NULL); 2227 } 2228 2229 2230 static char * 2231 multi_slot_to_string(struct netmap_ring *ring, unsigned int head, 2232 unsigned int nfrags, char *strbuf, size_t strbuflen) 2233 { 2234 unsigned int f; 2235 char *ret = strbuf; 2236 2237 for (f = 0; f < nfrags; f++) { 2238 struct netmap_slot *slot = &ring->slot[head]; 2239 int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len, 2240 slot->flags); 2241 if (m >= (int)strbuflen) { 2242 break; 2243 } 2244 strbuf += m; 2245 strbuflen -= m; 2246 2247 head = nm_ring_next(ring, head); 2248 } 2249 2250 return ret; 2251 } 2252 2253 static void * 2254 rxseq_body(void *data) 2255 { 2256 struct targ *targ = (struct targ *) data; 2257 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 2258 int dump = targ->g->options & OPT_DUMP; 2259 struct netmap_ring *ring; 2260 unsigned int frags_exp = 1; 2261 struct my_ctrs cur; 2262 unsigned int frags = 0; 2263 int first_packet = 1; 2264 int first_slot = 1; 2265 int i, j, af, nrings; 2266 uint32_t seq, *seq_exp = NULL; 2267 2268 memset(&cur, 0, sizeof(cur)); 2269 2270 if (setaffinity(targ->thread, targ->affinity)) 2271 goto quit; 2272 2273 nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1; 2274 seq_exp = calloc(nrings, sizeof(uint32_t)); 2275 if (seq_exp == NULL) { 2276 D("failed to allocate seq array"); 2277 goto quit; 2278 } 2279 2280 D("reading from %s fd %d main_fd %d", 2281 targ->g->ifname, targ->fd, targ->g->main_fd); 2282 /* unbounded wait for the first packet. */ 2283 for (;!targ->cancel;) { 2284 i = poll(&pfd, 1, 1000); 2285 if (i > 0 && !(pfd.revents & POLLERR)) 2286 break; 2287 RD(1, "waiting for initial packets, poll returns %d %d", 2288 i, pfd.revents); 2289 } 2290 2291 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 2292 2293 2294 while (!targ->cancel) { 2295 unsigned int head; 2296 int limit; 2297 2298 #ifdef BUSYWAIT 2299 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 2300 D("ioctl error on queue %d: %s", targ->me, 2301 strerror(errno)); 2302 goto quit; 2303 } 2304 #else /* !BUSYWAIT */ 2305 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 2306 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2307 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 2308 goto out; 2309 } 2310 2311 if (pfd.revents & POLLERR) { 2312 D("poll err"); 2313 goto quit; 2314 } 2315 #endif /* !BUSYWAIT */ 2316 2317 for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) { 2318 ring = NETMAP_RXRING(targ->nmd->nifp, j); 2319 if (nm_ring_empty(ring)) 2320 continue; 2321 2322 limit = nm_ring_space(ring); 2323 if (limit > targ->g->burst) 2324 limit = targ->g->burst; 2325 2326 #if 0 2327 /* Enable this if 2328 * 1) we remove the early-return optimization from 2329 * the netmap poll implementation, or 2330 * 2) pipes get NS_MOREFRAG support. 2331 * With the current netmap implementation, an experiment like 2332 * pkt-gen -i vale:1{1 -f txseq -F 9 2333 * pkt-gen -i vale:1}1 -f rxseq 2334 * would get stuck as soon as we find nm_ring_space(ring) < 9, 2335 * since here limit is rounded to 0 and 2336 * pipe rxsync is not called anymore by the poll() of this loop. 2337 */ 2338 if (frags_exp > 1) { 2339 int o = limit; 2340 /* Cut off to the closest smaller multiple. */ 2341 limit = (limit / frags_exp) * frags_exp; 2342 RD(2, "LIMIT %d --> %d", o, limit); 2343 } 2344 #endif 2345 2346 for (head = ring->head, i = 0; i < limit; i++) { 2347 struct netmap_slot *slot = &ring->slot[head]; 2348 char *p = NETMAP_BUF(ring, slot->buf_idx); 2349 int len = slot->len; 2350 struct pkt *pkt; 2351 2352 if (dump) { 2353 dump_payload(p, slot->len, ring, head); 2354 } 2355 2356 frags++; 2357 if (!(slot->flags & NS_MOREFRAG)) { 2358 if (first_packet) { 2359 first_packet = 0; 2360 } else if (frags != frags_exp) { 2361 char prbuf[512]; 2362 RD(1, "Received packets with %u frags, " 2363 "expected %u, '%s'", frags, frags_exp, 2364 multi_slot_to_string(ring, head-frags+1, 2365 frags, 2366 prbuf, sizeof(prbuf))); 2367 } 2368 first_packet = 0; 2369 frags_exp = frags; 2370 frags = 0; 2371 } 2372 2373 p -= sizeof(pkt->vh) - targ->g->virt_header; 2374 len += sizeof(pkt->vh) - targ->g->virt_header; 2375 pkt = (struct pkt *)p; 2376 if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP) 2377 af = AF_INET; 2378 else 2379 af = AF_INET6; 2380 2381 if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) + 2382 sizeof(seq)) { 2383 RD(1, "%s: packet too small (len=%u)", __func__, 2384 slot->len); 2385 } else { 2386 seq = (PKT(pkt, body, af)[0] << 24) | 2387 (PKT(pkt, body, af)[1] << 16) | 2388 (PKT(pkt, body, af)[2] << 8) | 2389 PKT(pkt, body, af)[3]; 2390 if (first_slot) { 2391 /* Grab the first one, whatever it 2392 is. */ 2393 seq_exp[j] = seq; 2394 first_slot = 0; 2395 } else if (seq != seq_exp[j]) { 2396 uint32_t delta = seq - seq_exp[j]; 2397 2398 if (delta < (0xFFFFFFFF >> 1)) { 2399 RD(2, "Sequence GAP: exp %u found %u", 2400 seq_exp[j], seq); 2401 } else { 2402 RD(2, "Sequence OUT OF ORDER: " 2403 "exp %u found %u", seq_exp[j], seq); 2404 } 2405 seq_exp[j] = seq; 2406 } 2407 seq_exp[j]++; 2408 } 2409 2410 cur.bytes += slot->len; 2411 head = nm_ring_next(ring, head); 2412 cur.pkts++; 2413 } 2414 2415 ring->cur = ring->head = head; 2416 2417 cur.events++; 2418 targ->ctr = cur; 2419 } 2420 } 2421 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2422 2423 #ifndef BUSYWAIT 2424 out: 2425 #endif /* !BUSYWAIT */ 2426 targ->completed = 1; 2427 targ->ctr = cur; 2428 2429 quit: 2430 if (seq_exp != NULL) 2431 free(seq_exp); 2432 /* reset the ``used`` flag. */ 2433 targ->used = 0; 2434 2435 return (NULL); 2436 } 2437 2438 2439 static void 2440 tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg) 2441 { 2442 double bw, raw_bw, pps, abs; 2443 char b1[40], b2[80], b3[80]; 2444 int size; 2445 2446 if (cur->pkts == 0) { 2447 printf("%s nothing.\n", msg); 2448 return; 2449 } 2450 2451 size = (int)(cur->bytes / cur->pkts); 2452 2453 printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n", 2454 msg, 2455 (unsigned long long)cur->pkts, 2456 (unsigned long long)cur->bytes, 2457 (unsigned long long)cur->events, size, delta); 2458 if (delta == 0) 2459 delta = 1e-6; 2460 if (size < 60) /* correct for min packet size */ 2461 size = 60; 2462 pps = cur->pkts / delta; 2463 bw = (8.0 * cur->bytes) / delta; 2464 raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta; 2465 abs = cur->pkts / (double)(cur->events); 2466 2467 printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n", 2468 norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs); 2469 } 2470 2471 static void 2472 usage(int errcode) 2473 { 2474 /* This usage is generated from the pkt-gen man page: 2475 * $ man pkt-gen > x 2476 * and pasted here adding the string terminators and endlines with simple 2477 * regular expressions. */ 2478 const char *cmd = "pkt-gen"; 2479 fprintf(stderr, 2480 "Usage:\n" 2481 "%s arguments\n" 2482 " -h Show program usage and exit.\n" 2483 "\n" 2484 " -i interface\n" 2485 " Name of the network interface that pkt-gen operates on. It can be a system network interface\n" 2486 " (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n" 2487 " monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n" 2488 " mented in netmap(4) (NIOCREGIF section).\n" 2489 "\n" 2490 " -f function\n" 2491 " The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n" 2492 " for client-side ping-pong operation, and pong for server-side ping-pong operation.\n" 2493 "\n" 2494 " -n count\n" 2495 " Number of iterations of the pkt-gen function (with 0 meaning infinite). In case of tx or rx,\n" 2496 " count is the number of packets to receive or transmit. In case of ping or pong, count is the\n" 2497 " number of ping-pong transactions.\n" 2498 "\n" 2499 " -l pkt_size\n" 2500 " Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n" 2501 " equal than the second one and lower than the first one.\n" 2502 "\n" 2503 " -b burst_size\n" 2504 " Transmit or receive up to burst_size packets at a time.\n" 2505 "\n" 2506 " -4 Use IPv4 addresses.\n" 2507 "\n" 2508 " -6 Use IPv6 addresses.\n" 2509 "\n" 2510 " -d dst_ip[:port[-dst_ip:port]]\n" 2511 " Destination IPv4/IPv6 address and port, single or range.\n" 2512 "\n" 2513 " -s src_ip[:port[-src_ip:port]]\n" 2514 " Source IPv4/IPv6 address and port, single or range.\n" 2515 "\n" 2516 " -D dst_mac\n" 2517 " Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n" 2518 "\n" 2519 " -S src_mac\n" 2520 " Source MAC address in colon notation.\n" 2521 "\n" 2522 " -a cpu_id\n" 2523 " Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n" 2524 " threads are used, they are pinned to the subsequent CPUs, one per thread.\n" 2525 "\n" 2526 " -c cpus\n" 2527 " Maximum number of CPUs to use (0 means to use all the available ones).\n" 2528 "\n" 2529 " -p threads\n" 2530 " Number of threads to use. By default, only a single thread is used to handle all the netmap\n" 2531 " rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n" 2532 " single RX ring (in rx mode), or a TX/RX ring pair. The number of threads must be less than or\n" 2533 " equal to the number of TX (or RX) rings available in the device specified by interface.\n" 2534 "\n" 2535 " -T report_ms\n" 2536 " Number of milliseconds between reports.\n" 2537 "\n" 2538 " -w wait_for_link_time\n" 2539 " Number of seconds to wait before starting the pkt-gen function, useful to make sure that the\n" 2540 " network link is up. A network device driver may take some time to enter netmap mode, or to\n" 2541 " create a new transmit/receive ring pair when netmap(4) requests one.\n" 2542 "\n" 2543 " -R rate\n" 2544 " Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n" 2545 " packets as quickly as possible. On servers from 2010 onward netmap(4) is able to com-\n" 2546 " pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n" 2547 " your intention is to saturate the link.\n" 2548 "\n" 2549 " -X Dump payload of each packet transmitted or received.\n" 2550 "\n" 2551 " -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n" 2552 " only used with Virtual Machine technologies that use virtio as a network interface.\n" 2553 "\n" 2554 " -P file\n" 2555 " Load the packet to be transmitted from a pcap file rather than constructing it within\n" 2556 " pkt-gen.\n" 2557 "\n" 2558 " -z Use random IPv4/IPv6 src address/port.\n" 2559 "\n" 2560 " -Z Use random IPv4/IPv6 dst address/port.\n" 2561 "\n" 2562 " -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n" 2563 "\n" 2564 " -F num_frags\n" 2565 " Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n" 2566 " sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n" 2567 " last slot). This is useful to transmit or receive packets larger than the netmap buffer\n" 2568 " size.\n" 2569 "\n" 2570 " -M frag_size\n" 2571 " In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n" 2572 " length divided by num_frags.\n" 2573 "\n" 2574 " -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n" 2575 " by setting the NS_INDIRECT flag in the netmap slots.\n" 2576 "\n" 2577 " -W Exit immediately if all the RX rings are empty the first time they are examined.\n" 2578 "\n" 2579 " -v Increase the verbosity level.\n" 2580 "\n" 2581 " -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n" 2582 " netmap buffers is (rubbish mode).\n" 2583 "\n" 2584 " -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n" 2585 "\n" 2586 " -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n" 2587 " of CRC and 20 bytes of framing to each packet.\n" 2588 "\n" 2589 " -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n" 2590 " Configuration in terms of number of rings and slots to be used when opening the netmap port.\n" 2591 " Such configuration has an effect on software ports created on the fly, such as VALE ports and\n" 2592 " netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n" 2593 " rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n" 2594 " additional convenience, if exactly one number is specified, then this is assigned to both\n" 2595 " tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n" 2596 " tx_rings and rx_rings.\n" 2597 "\n" 2598 " -o options data generation options (parsed using atoi)\n" 2599 " OPT_PREFETCH 1\n" 2600 " OPT_ACCESS 2\n" 2601 " OPT_COPY 4\n" 2602 " OPT_MEMCPY 8\n" 2603 " OPT_TS 16 (add a timestamp)\n" 2604 " OPT_INDIRECT 32 (use indirect buffers)\n" 2605 " OPT_DUMP 64 (dump rx/tx traffic)\n" 2606 " OPT_RUBBISH 256\n" 2607 " (send whatever the buffers contain)\n" 2608 " OPT_RANDOM_SRC 512\n" 2609 " OPT_RANDOM_DST 1024\n" 2610 " OPT_PPS_STATS 2048\n" 2611 " OPT_UPDATE_CSUM 4096\n" 2612 "", 2613 cmd); 2614 exit(errcode); 2615 } 2616 2617 static int 2618 start_threads(struct glob_arg *g) { 2619 int i; 2620 2621 targs = calloc(g->nthreads, sizeof(*targs)); 2622 struct targ *t; 2623 /* 2624 * Now create the desired number of threads, each one 2625 * using a single descriptor. 2626 */ 2627 for (i = 0; i < g->nthreads; i++) { 2628 uint64_t seed = (uint64_t)time(0) | ((uint64_t)time(0) << 32); 2629 t = &targs[i]; 2630 2631 bzero(t, sizeof(*t)); 2632 t->fd = -1; /* default, with pcap */ 2633 t->g = g; 2634 memcpy(t->seed, &seed, sizeof(t->seed)); 2635 2636 if (g->dev_type == DEV_NETMAP) { 2637 int m = -1; 2638 2639 /* 2640 * if the user wants both HW and SW rings, we need to 2641 * know when to switch from NR_REG_ONE_NIC to NR_REG_ONE_SW 2642 */ 2643 if (g->orig_mode == NR_REG_NIC_SW) { 2644 m = (g->td_type == TD_TYPE_RECEIVER ? 2645 g->nmd->reg.nr_rx_rings : 2646 g->nmd->reg.nr_tx_rings); 2647 } 2648 2649 if (i > 0) { 2650 int j; 2651 /* the first thread uses the fd opened by the main 2652 * thread, the other threads re-open /dev/netmap 2653 */ 2654 t->nmd = nmport_clone(g->nmd); 2655 if (t->nmd == NULL) 2656 return -1; 2657 2658 j = i; 2659 if (m > 0 && j >= m) { 2660 /* switch to the software rings */ 2661 t->nmd->reg.nr_mode = NR_REG_ONE_SW; 2662 j -= m; 2663 } 2664 t->nmd->reg.nr_ringid = j & NETMAP_RING_MASK; 2665 /* Only touch one of the rings (rx is already ok) */ 2666 if (g->td_type == TD_TYPE_RECEIVER) 2667 t->nmd->reg.nr_flags |= NETMAP_NO_TX_POLL; 2668 2669 /* register interface. Override ifname and ringid etc. */ 2670 if (nmport_open_desc(t->nmd) < 0) { 2671 nmport_undo_prepare(t->nmd); 2672 t->nmd = NULL; 2673 return -1; 2674 } 2675 } else { 2676 t->nmd = g->nmd; 2677 } 2678 t->fd = t->nmd->fd; 2679 t->frags = g->frags; 2680 } else { 2681 targs[i].fd = g->main_fd; 2682 } 2683 t->used = 1; 2684 t->me = i; 2685 if (g->affinity >= 0) { 2686 t->affinity = (g->affinity + i) % g->cpus; 2687 } else { 2688 t->affinity = -1; 2689 } 2690 /* default, init packets */ 2691 initialize_packet(t); 2692 } 2693 /* Wait for PHY reset. */ 2694 D("Wait %d secs for phy reset", g->wait_link); 2695 sleep(g->wait_link); 2696 D("Ready..."); 2697 2698 for (i = 0; i < g->nthreads; i++) { 2699 t = &targs[i]; 2700 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 2701 D("Unable to create thread %d: %s", i, strerror(errno)); 2702 t->used = 0; 2703 } 2704 } 2705 return 0; 2706 } 2707 2708 static void 2709 main_thread(struct glob_arg *g) 2710 { 2711 int i; 2712 2713 struct my_ctrs prev, cur; 2714 double delta_t; 2715 struct timeval tic, toc; 2716 2717 prev.pkts = prev.bytes = prev.events = 0; 2718 gettimeofday(&prev.t, NULL); 2719 for (;;) { 2720 char b1[40], b2[40], b3[40], b4[100]; 2721 uint64_t pps, usec; 2722 struct my_ctrs x; 2723 double abs; 2724 int done = 0; 2725 2726 usec = wait_for_next_report(&prev.t, &cur.t, 2727 g->report_interval); 2728 2729 cur.pkts = cur.bytes = cur.events = 0; 2730 cur.min_space = 0; 2731 if (usec < 10000) /* too short to be meaningful */ 2732 continue; 2733 /* accumulate counts for all threads */ 2734 for (i = 0; i < g->nthreads; i++) { 2735 cur.pkts += targs[i].ctr.pkts; 2736 cur.bytes += targs[i].ctr.bytes; 2737 cur.events += targs[i].ctr.events; 2738 cur.min_space += targs[i].ctr.min_space; 2739 targs[i].ctr.min_space = 99999; 2740 if (targs[i].used == 0) 2741 done++; 2742 } 2743 x.pkts = cur.pkts - prev.pkts; 2744 x.bytes = cur.bytes - prev.bytes; 2745 x.events = cur.events - prev.events; 2746 pps = (x.pkts*1000000 + usec/2) / usec; 2747 abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0; 2748 2749 if (!(g->options & OPT_PPS_STATS)) { 2750 strcpy(b4, ""); 2751 } else { 2752 /* Compute some pps stats using a sliding window. */ 2753 double ppsavg = 0.0, ppsdev = 0.0; 2754 int nsamples = 0; 2755 2756 g->win[g->win_idx] = pps; 2757 g->win_idx = (g->win_idx + 1) % STATS_WIN; 2758 2759 for (i = 0; i < STATS_WIN; i++) { 2760 ppsavg += g->win[i]; 2761 if (g->win[i]) { 2762 nsamples ++; 2763 } 2764 } 2765 ppsavg /= nsamples; 2766 2767 for (i = 0; i < STATS_WIN; i++) { 2768 if (g->win[i] == 0) { 2769 continue; 2770 } 2771 ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg); 2772 } 2773 ppsdev /= nsamples; 2774 ppsdev = sqrt(ppsdev); 2775 2776 snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]", 2777 norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize)); 2778 } 2779 2780 D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space", 2781 norm(b1, pps, normalize), b4, 2782 norm(b2, (double)x.pkts, normalize), 2783 norm(b3, 1000000*((double)x.bytes*8+(double)x.pkts*g->framing)/usec, normalize), 2784 (unsigned long long)usec, 2785 abs, (int)cur.min_space); 2786 prev = cur; 2787 2788 if (done == g->nthreads) 2789 break; 2790 } 2791 2792 timerclear(&tic); 2793 timerclear(&toc); 2794 cur.pkts = cur.bytes = cur.events = 0; 2795 /* final round */ 2796 for (i = 0; i < g->nthreads; i++) { 2797 struct timespec t_tic, t_toc; 2798 /* 2799 * Join active threads, unregister interfaces and close 2800 * file descriptors. 2801 */ 2802 if (targs[i].used) 2803 pthread_join(targs[i].thread, NULL); /* blocking */ 2804 if (g->dev_type == DEV_NETMAP) { 2805 nmport_close(targs[i].nmd); 2806 targs[i].nmd = NULL; 2807 } else { 2808 close(targs[i].fd); 2809 } 2810 2811 if (targs[i].completed == 0) 2812 D("ouch, thread %d exited with error", i); 2813 2814 /* 2815 * Collect threads output and extract information about 2816 * how long it took to send all the packets. 2817 */ 2818 cur.pkts += targs[i].ctr.pkts; 2819 cur.bytes += targs[i].ctr.bytes; 2820 cur.events += targs[i].ctr.events; 2821 /* collect the largest start (tic) and end (toc) times, 2822 * XXX maybe we should do the earliest tic, or do a weighted 2823 * average ? 2824 */ 2825 t_tic = timeval2spec(&tic); 2826 t_toc = timeval2spec(&toc); 2827 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 2828 tic = timespec2val(&targs[i].tic); 2829 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 2830 toc = timespec2val(&targs[i].toc); 2831 } 2832 2833 /* print output. */ 2834 timersub(&toc, &tic, &toc); 2835 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 2836 if (g->td_type == TD_TYPE_SENDER) 2837 tx_output(g, &cur, delta_t, "Sent"); 2838 else if (g->td_type == TD_TYPE_RECEIVER) 2839 tx_output(g, &cur, delta_t, "Received"); 2840 } 2841 2842 struct td_desc { 2843 int ty; 2844 const char *key; 2845 void *f; 2846 int default_burst; 2847 }; 2848 2849 static struct td_desc func[] = { 2850 { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */ 2851 { TD_TYPE_SENDER, "tx", sender_body, 512 }, 2852 { TD_TYPE_OTHER, "ping", ping_body, 1 }, 2853 { TD_TYPE_OTHER, "pong", pong_body, 1 }, 2854 { TD_TYPE_SENDER, "txseq", txseq_body, 512 }, 2855 { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 }, 2856 { 0, NULL, NULL, 0 } 2857 }; 2858 2859 static int 2860 tap_alloc(char *dev) 2861 { 2862 struct ifreq ifr; 2863 int fd, err; 2864 const char *clonedev = TAP_CLONEDEV; 2865 2866 (void)err; 2867 (void)dev; 2868 /* Arguments taken by the function: 2869 * 2870 * char *dev: the name of an interface (or '\0'). MUST have enough 2871 * space to hold the interface name if '\0' is passed 2872 * int flags: interface flags (eg, IFF_TUN etc.) 2873 */ 2874 2875 #ifdef __FreeBSD__ 2876 if (dev[3]) { /* tapSomething */ 2877 static char buf[128]; 2878 snprintf(buf, sizeof(buf), "/dev/%s", dev); 2879 clonedev = buf; 2880 } 2881 #endif 2882 /* open the device */ 2883 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 2884 return fd; 2885 } 2886 D("%s open successful", clonedev); 2887 2888 /* preparation of the struct ifr, of type "struct ifreq" */ 2889 memset(&ifr, 0, sizeof(ifr)); 2890 2891 #ifdef linux 2892 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 2893 2894 if (*dev) { 2895 /* if a device name was specified, put it in the structure; otherwise, 2896 * the kernel will try to allocate the "next" device of the 2897 * specified type */ 2898 size_t len = strlen(dev); 2899 if (len > IFNAMSIZ) { 2900 D("%s too long", dev); 2901 return -1; 2902 } 2903 memcpy(ifr.ifr_name, dev, len); 2904 } 2905 2906 /* try to create the device */ 2907 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 2908 D("failed to do a TUNSETIFF: %s", strerror(errno)); 2909 close(fd); 2910 return err; 2911 } 2912 2913 /* if the operation was successful, write back the name of the 2914 * interface to the variable "dev", so the caller can know 2915 * it. Note that the caller MUST reserve space in *dev (see calling 2916 * code below) */ 2917 strcpy(dev, ifr.ifr_name); 2918 D("new name is %s", dev); 2919 #endif /* linux */ 2920 2921 /* this is the special file descriptor that the caller will use to talk 2922 * with the virtual interface */ 2923 return fd; 2924 } 2925 2926 int 2927 main(int arc, char **argv) 2928 { 2929 int i; 2930 struct sigaction sa; 2931 sigset_t ss; 2932 2933 struct glob_arg g; 2934 2935 int ch; 2936 int devqueues = 1; /* how many device queues */ 2937 int wait_link_arg = 0; 2938 2939 int pkt_size_done = 0; 2940 2941 struct td_desc *fn = func; 2942 2943 bzero(&g, sizeof(g)); 2944 2945 g.main_fd = -1; 2946 g.td_body = fn->f; 2947 g.td_type = fn->ty; 2948 g.report_interval = 1000; /* report interval */ 2949 g.affinity = -1; 2950 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 2951 g.af = AF_INET; /* default */ 2952 g.src_ip.name = "10.0.0.1"; 2953 g.dst_ip.name = "10.1.0.1"; 2954 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 2955 g.src_mac.name = NULL; 2956 g.pkt_size = 60; 2957 g.pkt_min_size = 0; 2958 g.nthreads = 1; 2959 g.cpus = 1; /* default */ 2960 g.forever = 1; 2961 g.tx_rate = 0; 2962 g.frags = 1; 2963 g.frag_size = (u_int)-1; /* use the netmap buffer size by default */ 2964 g.nmr_config = ""; 2965 g.virt_header = 0; 2966 g.wait_link = 2; /* wait 2 seconds for physical ports */ 2967 2968 while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:" 2969 "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) { 2970 2971 switch(ch) { 2972 default: 2973 D("bad option %c %s", ch, optarg); 2974 usage(-1); 2975 break; 2976 2977 case 'h': 2978 usage(0); 2979 break; 2980 2981 case '4': 2982 g.af = AF_INET; 2983 break; 2984 2985 case '6': 2986 g.af = AF_INET6; 2987 break; 2988 2989 case 'N': 2990 normalize = 0; 2991 break; 2992 2993 case 'n': 2994 g.npackets = strtoull(optarg, NULL, 10); 2995 break; 2996 2997 case 'F': 2998 i = atoi(optarg); 2999 if (i < 1 || i > 63) { 3000 D("invalid frags %d [1..63], ignore", i); 3001 break; 3002 } 3003 g.frags = i; 3004 break; 3005 3006 case 'M': 3007 g.frag_size = atoi(optarg); 3008 break; 3009 3010 case 'f': 3011 for (fn = func; fn->key; fn++) { 3012 if (!strcmp(fn->key, optarg)) 3013 break; 3014 } 3015 if (fn->key) { 3016 g.td_body = fn->f; 3017 g.td_type = fn->ty; 3018 } else { 3019 D("unrecognised function %s", optarg); 3020 } 3021 break; 3022 3023 case 'o': /* data generation options */ 3024 g.options |= atoi(optarg); 3025 break; 3026 3027 case 'a': /* force affinity */ 3028 g.affinity = atoi(optarg); 3029 break; 3030 3031 case 'i': /* interface */ 3032 /* a prefix of tap: netmap: or pcap: forces the mode. 3033 * otherwise we guess 3034 */ 3035 D("interface is %s", optarg); 3036 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 3037 D("ifname too long %s", optarg); 3038 break; 3039 } 3040 strcpy(g.ifname, optarg); 3041 if (!strcmp(optarg, "null")) { 3042 g.dev_type = DEV_NETMAP; 3043 g.dummy_send = 1; 3044 } else if (!strncmp(optarg, "tap:", 4)) { 3045 g.dev_type = DEV_TAP; 3046 strcpy(g.ifname, optarg + 4); 3047 } else if (!strncmp(optarg, "pcap:", 5)) { 3048 g.dev_type = DEV_PCAP; 3049 strcpy(g.ifname, optarg + 5); 3050 } else if (!strncmp(optarg, "netmap:", 7) || 3051 !strncmp(optarg, "vale", 4)) { 3052 g.dev_type = DEV_NETMAP; 3053 } else if (!strncmp(optarg, "tap", 3)) { 3054 g.dev_type = DEV_TAP; 3055 } else { /* prepend netmap: */ 3056 g.dev_type = DEV_NETMAP; 3057 sprintf(g.ifname, "netmap:%s", optarg); 3058 } 3059 break; 3060 3061 case 'I': 3062 g.options |= OPT_INDIRECT; /* use indirect buffers */ 3063 break; 3064 3065 case 'l': /* pkt_size */ 3066 if (pkt_size_done) { 3067 g.pkt_min_size = atoi(optarg); 3068 } else { 3069 g.pkt_size = atoi(optarg); 3070 pkt_size_done = 1; 3071 } 3072 break; 3073 3074 case 'd': 3075 g.dst_ip.name = optarg; 3076 break; 3077 3078 case 's': 3079 g.src_ip.name = optarg; 3080 break; 3081 3082 case 'T': /* report interval */ 3083 g.report_interval = atoi(optarg); 3084 break; 3085 3086 case 'w': 3087 g.wait_link = atoi(optarg); 3088 wait_link_arg = 1; 3089 break; 3090 3091 case 'W': 3092 g.forever = 0; /* exit RX with no traffic */ 3093 break; 3094 3095 case 'b': /* burst */ 3096 g.burst = atoi(optarg); 3097 break; 3098 case 'c': 3099 g.cpus = atoi(optarg); 3100 break; 3101 case 'p': 3102 g.nthreads = atoi(optarg); 3103 break; 3104 3105 case 'D': /* destination mac */ 3106 g.dst_mac.name = optarg; 3107 break; 3108 3109 case 'S': /* source mac */ 3110 g.src_mac.name = optarg; 3111 break; 3112 case 'v': 3113 verbose++; 3114 break; 3115 case 'R': 3116 g.tx_rate = atoi(optarg); 3117 break; 3118 case 'X': 3119 g.options |= OPT_DUMP; 3120 break; 3121 case 'C': 3122 D("WARNING: the 'C' option is deprecated, use the '+conf:' libnetmap option instead"); 3123 g.nmr_config = strdup(optarg); 3124 break; 3125 case 'H': 3126 g.virt_header = atoi(optarg); 3127 break; 3128 case 'P': 3129 g.packet_file = strdup(optarg); 3130 break; 3131 case 'r': 3132 g.options |= OPT_RUBBISH; 3133 break; 3134 case 'z': 3135 g.options |= OPT_RANDOM_SRC; 3136 break; 3137 case 'Z': 3138 g.options |= OPT_RANDOM_DST; 3139 break; 3140 case 'A': 3141 g.options |= OPT_PPS_STATS; 3142 break; 3143 case 'B': 3144 /* raw packets have4 bytes crc + 20 bytes framing */ 3145 // XXX maybe add an option to pass the IFG 3146 g.framing = 24 * 8; 3147 break; 3148 } 3149 } 3150 3151 if (strlen(g.ifname) <=0 ) { 3152 D("missing ifname"); 3153 usage(-1); 3154 } 3155 3156 if (g.burst == 0) { 3157 g.burst = fn->default_burst; 3158 D("using default burst size: %d", g.burst); 3159 } 3160 3161 g.system_cpus = i = system_ncpus(); 3162 if (g.cpus < 0 || g.cpus > i) { 3163 D("%d cpus is too high, have only %d cpus", g.cpus, i); 3164 usage(-1); 3165 } 3166 D("running on %d cpus (have %d)", g.cpus, i); 3167 if (g.cpus == 0) 3168 g.cpus = i; 3169 3170 if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) { 3171 g.wait_link = 0; 3172 } 3173 3174 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 3175 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 3176 usage(-1); 3177 } 3178 3179 if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) { 3180 D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size); 3181 usage(-1); 3182 } 3183 3184 if (g.src_mac.name == NULL) { 3185 static char mybuf[20] = "00:00:00:00:00:00"; 3186 /* retrieve source mac address. */ 3187 if (source_hwaddr(g.ifname, mybuf) == -1) { 3188 D("Unable to retrieve source mac"); 3189 // continue, fail later 3190 } 3191 g.src_mac.name = mybuf; 3192 } 3193 /* extract address ranges */ 3194 if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac)) 3195 usage(-1); 3196 g.options |= extract_ip_range(&g.src_ip, g.af); 3197 g.options |= extract_ip_range(&g.dst_ip, g.af); 3198 3199 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 3200 && g.virt_header != VIRT_HDR_2) { 3201 D("bad virtio-net-header length"); 3202 usage(-1); 3203 } 3204 3205 if (g.dev_type == DEV_TAP) { 3206 D("want to use tap %s", g.ifname); 3207 g.main_fd = tap_alloc(g.ifname); 3208 if (g.main_fd < 0) { 3209 D("cannot open tap %s", g.ifname); 3210 usage(-1); 3211 } 3212 #ifndef NO_PCAP 3213 } else if (g.dev_type == DEV_PCAP) { 3214 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 3215 3216 pcap_errbuf[0] = '\0'; // init the buffer 3217 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 3218 if (g.p == NULL) { 3219 D("cannot open pcap on %s", g.ifname); 3220 usage(-1); 3221 } 3222 g.main_fd = pcap_fileno(g.p); 3223 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 3224 #endif /* !NO_PCAP */ 3225 } else if (g.dummy_send) { /* but DEV_NETMAP */ 3226 D("using a dummy send routine"); 3227 } else { 3228 g.nmd = nmport_prepare(g.ifname); 3229 if (g.nmd == NULL) 3230 goto out; 3231 3232 parse_nmr_config(g.nmr_config, &g.nmd->reg); 3233 3234 g.nmd->reg.nr_flags |= NR_ACCEPT_VNET_HDR; 3235 3236 /* 3237 * Open the netmap device using nm_open(). 3238 * 3239 * protocol stack and may cause a reset of the card, 3240 * which in turn may take some time for the PHY to 3241 * reconfigure. We do the open here to have time to reset. 3242 */ 3243 g.orig_mode = g.nmd->reg.nr_mode; 3244 if (g.nthreads > 1) { 3245 switch (g.orig_mode) { 3246 case NR_REG_ALL_NIC: 3247 case NR_REG_NIC_SW: 3248 g.nmd->reg.nr_mode = NR_REG_ONE_NIC; 3249 break; 3250 case NR_REG_SW: 3251 g.nmd->reg.nr_mode = NR_REG_ONE_SW; 3252 break; 3253 default: 3254 break; 3255 } 3256 g.nmd->reg.nr_ringid = 0; 3257 } 3258 if (nmport_open_desc(g.nmd) < 0) 3259 goto out; 3260 g.main_fd = g.nmd->fd; 3261 ND("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10), 3262 g.nmd->mem); 3263 3264 if (g.virt_header) { 3265 /* Set the virtio-net header length, since the user asked 3266 * for it explicitly. */ 3267 set_vnet_hdr_len(&g); 3268 } else { 3269 /* Check whether the netmap port we opened requires us to send 3270 * and receive frames with virtio-net header. */ 3271 get_vnet_hdr_len(&g); 3272 } 3273 3274 /* get num of queues in tx or rx */ 3275 if (g.td_type == TD_TYPE_SENDER) 3276 devqueues = g.nmd->reg.nr_tx_rings + g.nmd->reg.nr_host_tx_rings; 3277 else 3278 devqueues = g.nmd->reg.nr_rx_rings + g.nmd->reg.nr_host_rx_rings; 3279 3280 /* validate provided nthreads. */ 3281 if (g.nthreads < 1 || g.nthreads > devqueues) { 3282 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 3283 // continue, fail later 3284 } 3285 3286 if (g.td_type == TD_TYPE_SENDER) { 3287 int mtu = get_if_mtu(&g); 3288 3289 if (mtu > 0 && g.pkt_size > mtu) { 3290 D("pkt_size (%d) must be <= mtu (%d)", 3291 g.pkt_size, mtu); 3292 return -1; 3293 } 3294 } 3295 3296 if (verbose) { 3297 struct netmap_if *nifp = g.nmd->nifp; 3298 struct nmreq_register *req = &g.nmd->reg; 3299 3300 D("nifp at offset %"PRIu64" ntxqs %d nrxqs %d memid %d", 3301 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 3302 req->nr_mem_id); 3303 for (i = 0; i < req->nr_tx_rings + req->nr_host_tx_rings; i++) { 3304 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 3305 D(" TX%d at offset %p slots %d", i, 3306 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3307 } 3308 for (i = 0; i < req->nr_rx_rings + req->nr_host_rx_rings; i++) { 3309 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 3310 D(" RX%d at offset %p slots %d", i, 3311 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3312 } 3313 } 3314 3315 /* Print some debug information. */ 3316 fprintf(stdout, 3317 "%s %s: %d queues, %d threads and %d cpus.\n", 3318 (g.td_type == TD_TYPE_SENDER) ? "Sending on" : 3319 ((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" : 3320 "Working on"), 3321 g.ifname, 3322 devqueues, 3323 g.nthreads, 3324 g.cpus); 3325 if (g.td_type == TD_TYPE_SENDER) { 3326 fprintf(stdout, "%s -> %s (%s -> %s)\n", 3327 g.src_ip.name, g.dst_ip.name, 3328 g.src_mac.name, g.dst_mac.name); 3329 } 3330 3331 out: 3332 /* Exit if something went wrong. */ 3333 if (g.main_fd < 0) { 3334 D("aborting"); 3335 usage(-1); 3336 } 3337 } 3338 3339 3340 if (g.options) { 3341 D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n", 3342 g.options & OPT_PREFETCH ? " prefetch" : "", 3343 g.options & OPT_ACCESS ? " access" : "", 3344 g.options & OPT_MEMCPY ? " memcpy" : "", 3345 g.options & OPT_INDIRECT ? " indirect" : "", 3346 g.options & OPT_COPY ? " copy" : "", 3347 g.options & OPT_RUBBISH ? " rubbish " : ""); 3348 } 3349 3350 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 3351 if (g.tx_rate > 0) { 3352 /* try to have at least something every second, 3353 * reducing the burst size to some 0.01s worth of data 3354 * (but no less than one full set of fragments) 3355 */ 3356 uint64_t x; 3357 int lim = (g.tx_rate)/300; 3358 if (g.burst > lim) 3359 g.burst = lim; 3360 if (g.burst == 0) 3361 g.burst = 1; 3362 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 3363 g.tx_period.tv_nsec = x; 3364 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 3365 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 3366 } 3367 if (g.td_type == TD_TYPE_SENDER) 3368 D("Sending %d packets every %jd.%09ld s", 3369 g.burst, (intmax_t)g.tx_period.tv_sec, g.tx_period.tv_nsec); 3370 /* Install ^C handler. */ 3371 global_nthreads = g.nthreads; 3372 sigemptyset(&ss); 3373 sigaddset(&ss, SIGINT); 3374 /* block SIGINT now, so that all created threads will inherit the mask */ 3375 if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) { 3376 D("failed to block SIGINT: %s", strerror(errno)); 3377 } 3378 if (start_threads(&g) < 0) 3379 return 1; 3380 /* Install the handler and re-enable SIGINT for the main thread */ 3381 memset(&sa, 0, sizeof(sa)); 3382 sa.sa_handler = sigint_h; 3383 if (sigaction(SIGINT, &sa, NULL) < 0) { 3384 D("failed to install ^C handler: %s", strerror(errno)); 3385 } 3386 3387 if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) { 3388 D("failed to re-enable SIGINT: %s", strerror(errno)); 3389 } 3390 main_thread(&g); 3391 free(targs); 3392 return 0; 3393 } 3394 3395 /* end of file */ 3396