1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40 #define _GNU_SOURCE /* for CPU_SET() */ 41 #include <arpa/inet.h> /* ntohs */ 42 #include <assert.h> 43 #include <ctype.h> // isprint() 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <ifaddrs.h> /* getifaddrs */ 47 #include <libnetmap.h> 48 #include <math.h> 49 #include <net/ethernet.h> 50 #include <netinet/in.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip6.h> 53 #include <netinet/udp.h> 54 #ifndef NO_PCAP 55 #include <pcap/pcap.h> 56 #endif 57 #include <pthread.h> 58 #include <signal.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <sys/ioctl.h> 63 #include <sys/poll.h> 64 #include <sys/stat.h> 65 #if !defined(_WIN32) && !defined(linux) 66 #include <sys/sysctl.h> /* sysctl */ 67 #endif 68 #include <sys/types.h> 69 #include <unistd.h> // sysconf() 70 #ifdef linux 71 #define IPV6_VERSION 0x60 72 #define IPV6_DEFHLIM 64 73 #endif 74 75 #include "ctrs.h" 76 77 static void usage(int); 78 79 #ifdef _WIN32 80 #define cpuset_t DWORD_PTR //uint64_t 81 static inline void CPU_ZERO(cpuset_t *p) 82 { 83 *p = 0; 84 } 85 86 static inline void CPU_SET(uint32_t i, cpuset_t *p) 87 { 88 *p |= 1<< (i & 0x3f); 89 } 90 91 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0) 92 #define TAP_CLONEDEV "/dev/tap" 93 #define AF_LINK 18 //defined in winsocks.h 94 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 95 #include <net/if_dl.h> 96 97 /* 98 * Convert an ASCII representation of an ethernet address to 99 * binary form. 100 */ 101 struct ether_addr * 102 ether_aton(const char *a) 103 { 104 int i; 105 static struct ether_addr o; 106 unsigned int o0, o1, o2, o3, o4, o5; 107 108 i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); 109 110 if (i != 6) 111 return (NULL); 112 113 o.octet[0]=o0; 114 o.octet[1]=o1; 115 o.octet[2]=o2; 116 o.octet[3]=o3; 117 o.octet[4]=o4; 118 o.octet[5]=o5; 119 120 return ((struct ether_addr *)&o); 121 } 122 123 /* 124 * Convert a binary representation of an ethernet address to 125 * an ASCII string. 126 */ 127 char * 128 ether_ntoa(const struct ether_addr *n) 129 { 130 int i; 131 static char a[18]; 132 133 i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x", 134 n->octet[0], n->octet[1], n->octet[2], 135 n->octet[3], n->octet[4], n->octet[5]); 136 return (i < 17 ? NULL : (char *)&a); 137 } 138 #endif /* _WIN32 */ 139 140 #ifdef linux 141 142 #define cpuset_t cpu_set_t 143 144 #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 145 #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 146 #include <linux/ethtool.h> 147 #include <linux/sockios.h> 148 149 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 150 #include <netinet/ether.h> /* ether_aton */ 151 #include <linux/if_packet.h> /* sockaddr_ll */ 152 #endif /* linux */ 153 154 #ifdef __FreeBSD__ 155 #include <sys/endian.h> /* le64toh */ 156 #include <machine/param.h> 157 158 #include <pthread_np.h> /* pthread w/ affinity */ 159 #include <sys/cpuset.h> /* cpu_set */ 160 #include <net/if_dl.h> /* LLADDR */ 161 #endif /* __FreeBSD__ */ 162 163 #ifdef __APPLE__ 164 165 #define cpuset_t uint64_t // XXX 166 static inline void CPU_ZERO(cpuset_t *p) 167 { 168 *p = 0; 169 } 170 171 static inline void CPU_SET(uint32_t i, cpuset_t *p) 172 { 173 *p |= 1<< (i & 0x3f); 174 } 175 176 #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 177 178 #define ifr_flagshigh ifr_flags // XXX 179 #define IFF_PPROMISC IFF_PROMISC 180 #include <net/if_dl.h> /* LLADDR */ 181 #define clock_gettime(a,b) \ 182 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 183 #endif /* __APPLE__ */ 184 185 static const char *default_payload = "netmap pkt-gen DIRECT payload\n" 186 "http://info.iet.unipi.it/~luigi/netmap/ "; 187 188 static const char *indirect_payload = "netmap pkt-gen indirect payload\n" 189 "http://info.iet.unipi.it/~luigi/netmap/ "; 190 191 static int verbose = 0; 192 static int normalize = 1; 193 194 #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 195 #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 196 #define VIRT_HDR_MAX VIRT_HDR_2 197 struct virt_header { 198 uint8_t fields[VIRT_HDR_MAX]; 199 }; 200 201 #define MAX_BODYSIZE 65536 202 203 struct pkt { 204 struct virt_header vh; 205 struct ether_header eh; 206 union { 207 struct { 208 struct ip ip; 209 struct udphdr udp; 210 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 211 } ipv4; 212 struct { 213 struct ip6_hdr ip; 214 struct udphdr udp; 215 uint8_t body[MAX_BODYSIZE]; /* hardwired */ 216 } ipv6; 217 }; 218 } __attribute__((__packed__)); 219 220 #define PKT(p, f, af) \ 221 ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f) 222 223 struct ip_range { 224 const char *name; 225 union { 226 struct { 227 uint32_t start, end; /* same as struct in_addr */ 228 } ipv4; 229 struct { 230 struct in6_addr start, end; 231 uint8_t sgroup, egroup; 232 } ipv6; 233 }; 234 uint16_t port0, port1; 235 }; 236 237 struct mac_range { 238 const char *name; 239 struct ether_addr start, end; 240 }; 241 242 /* ifname can be netmap:foo-xxxx */ 243 #define MAX_IFNAMELEN 512 /* our buffer for ifname */ 244 //#define MAX_PKTSIZE 1536 245 #define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 246 247 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 248 struct tstamp { 249 uint32_t sec; 250 uint32_t nsec; 251 }; 252 253 /* 254 * global arguments for all threads 255 */ 256 257 struct glob_arg { 258 int af; /* address family AF_INET/AF_INET6 */ 259 struct ip_range src_ip; 260 struct ip_range dst_ip; 261 struct mac_range dst_mac; 262 struct mac_range src_mac; 263 int pkt_size; 264 int pkt_min_size; 265 int burst; 266 int forever; 267 uint64_t npackets; /* total packets to send */ 268 int frags; /* fragments per packet */ 269 u_int frag_size; /* size of each fragment */ 270 int nthreads; 271 int cpus; /* cpus used for running */ 272 int system_cpus; /* cpus on the system */ 273 274 int options; /* testing */ 275 #define OPT_PREFETCH 1 276 #define OPT_ACCESS 2 277 #define OPT_COPY 4 278 #define OPT_MEMCPY 8 279 #define OPT_TS 16 /* add a timestamp */ 280 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 281 #define OPT_DUMP 64 /* dump rx/tx traffic */ 282 #define OPT_RUBBISH 256 /* send whatever the buffers contain */ 283 #define OPT_RANDOM_SRC 512 284 #define OPT_RANDOM_DST 1024 285 #define OPT_PPS_STATS 2048 286 #define OPT_UPDATE_CSUM 4096 287 int dev_type; 288 #ifndef NO_PCAP 289 pcap_t *p; 290 #endif 291 292 int tx_rate; 293 struct timespec tx_period; 294 295 int affinity; 296 int main_fd; 297 struct nmport_d *nmd; 298 uint32_t orig_mode; 299 int report_interval; /* milliseconds between prints */ 300 void *(*td_body)(void *); 301 int td_type; 302 void *mmap_addr; 303 char ifname[MAX_IFNAMELEN]; 304 const char *nmr_config; 305 int dummy_send; 306 int virt_header; /* send also the virt_header */ 307 char *packet_file; /* -P option */ 308 #define STATS_WIN 15 309 int win_idx; 310 int64_t win[STATS_WIN]; 311 int wait_link; 312 int framing; /* #bits of framing (for bw output) */ 313 }; 314 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 315 316 enum { 317 TD_TYPE_SENDER = 1, 318 TD_TYPE_RECEIVER, 319 TD_TYPE_OTHER, 320 }; 321 322 /* 323 * Arguments for a new thread. The same structure is used by 324 * the source and the sink 325 */ 326 struct targ { 327 struct glob_arg *g; 328 int used; 329 int completed; 330 int cancel; 331 int fd; 332 struct nmport_d *nmd; 333 /* these ought to be volatile, but they are 334 * only sampled and errors should not accumulate 335 */ 336 struct my_ctrs ctr; 337 338 struct timespec tic, toc; 339 int me; 340 pthread_t thread; 341 int affinity; 342 343 struct pkt pkt; 344 void *frame; 345 uint16_t seed[3]; 346 u_int frags; 347 u_int frag_size; 348 }; 349 350 static __inline uint16_t 351 cksum_add(uint16_t sum, uint16_t a) 352 { 353 uint16_t res; 354 355 res = sum + a; 356 return (res + (res < a)); 357 } 358 359 static void 360 extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port) 361 { 362 struct in_addr a; 363 char *pp; 364 365 pp = strchr(name, ':'); 366 if (pp != NULL) { /* do we have ports ? */ 367 *pp++ = '\0'; 368 *port = (uint16_t)strtol(pp, NULL, 0); 369 } 370 371 inet_pton(AF_INET, name, &a); 372 *addr = ntohl(a.s_addr); 373 } 374 375 static void 376 extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port, 377 uint8_t *group) 378 { 379 char *pp; 380 381 /* 382 * We accept IPv6 address in the following form: 383 * group@[2001:DB8::1001]:port (w/ brackets and port) 384 * group@[2001:DB8::1] (w/ brackets and w/o port) 385 * group@2001:DB8::1234 (w/o brackets and w/o port) 386 */ 387 pp = strchr(name, '@'); 388 if (pp != NULL) { 389 *pp++ = '\0'; 390 *group = (uint8_t)strtol(name, NULL, 0); 391 if (*group > 7) 392 *group = 7; 393 name = pp; 394 } 395 if (name[0] == '[') 396 name++; 397 pp = strchr(name, ']'); 398 if (pp != NULL) 399 *pp++ = '\0'; 400 if (pp != NULL && *pp != ':') 401 pp = NULL; 402 if (pp != NULL) { /* do we have ports ? */ 403 *pp++ = '\0'; 404 *port = (uint16_t)strtol(pp, NULL, 0); 405 } 406 inet_pton(AF_INET6, name, addr); 407 } 408 /* 409 * extract the extremes from a range of ipv4 addresses. 410 * addr_lo[-addr_hi][:port_lo[-port_hi]] 411 */ 412 static int 413 extract_ip_range(struct ip_range *r, int af) 414 { 415 char *name, *ap, start[INET6_ADDRSTRLEN]; 416 char end[INET6_ADDRSTRLEN]; 417 struct in_addr a; 418 uint32_t tmp; 419 420 if (verbose) 421 D("extract IP range from %s", r->name); 422 423 name = strdup(r->name); 424 if (name == NULL) { 425 D("strdup failed"); 426 usage(-1); 427 } 428 /* the first - splits start/end of range */ 429 ap = strchr(name, '-'); 430 if (ap != NULL) 431 *ap++ = '\0'; 432 r->port0 = 1234; /* default port */ 433 if (af == AF_INET6) { 434 r->ipv6.sgroup = 7; /* default group */ 435 extract_ipv6_addr(name, &r->ipv6.start, &r->port0, 436 &r->ipv6.sgroup); 437 } else 438 extract_ipv4_addr(name, &r->ipv4.start, &r->port0); 439 440 r->port1 = r->port0; 441 if (af == AF_INET6) { 442 if (ap != NULL) { 443 r->ipv6.egroup = r->ipv6.sgroup; 444 extract_ipv6_addr(ap, &r->ipv6.end, &r->port1, 445 &r->ipv6.egroup); 446 } else { 447 r->ipv6.end = r->ipv6.start; 448 r->ipv6.egroup = r->ipv6.sgroup; 449 } 450 } else { 451 if (ap != NULL) { 452 extract_ipv4_addr(ap, &r->ipv4.end, &r->port1); 453 if (r->ipv4.start > r->ipv4.end) { 454 tmp = r->ipv4.end; 455 r->ipv4.end = r->ipv4.start; 456 r->ipv4.start = tmp; 457 } 458 } else 459 r->ipv4.end = r->ipv4.start; 460 } 461 462 if (r->port0 > r->port1) { 463 tmp = r->port0; 464 r->port0 = r->port1; 465 r->port1 = tmp; 466 } 467 if (af == AF_INET) { 468 a.s_addr = htonl(r->ipv4.start); 469 inet_ntop(af, &a, start, sizeof(start)); 470 a.s_addr = htonl(r->ipv4.end); 471 inet_ntop(af, &a, end, sizeof(end)); 472 } else { 473 inet_ntop(af, &r->ipv6.start, start, sizeof(start)); 474 inet_ntop(af, &r->ipv6.end, end, sizeof(end)); 475 } 476 if (af == AF_INET) 477 D("range is %s:%d to %s:%d", start, r->port0, end, r->port1); 478 else 479 D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup, 480 start, r->port0, r->ipv6.egroup, end, r->port1); 481 482 free(name); 483 if (r->port0 != r->port1 || 484 (af == AF_INET && r->ipv4.start != r->ipv4.end) || 485 (af == AF_INET6 && 486 !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end))) 487 return (OPT_COPY); 488 return (0); 489 } 490 491 static int 492 extract_mac_range(struct mac_range *r) 493 { 494 struct ether_addr *e; 495 if (verbose) 496 D("extract MAC range from %s", r->name); 497 498 e = ether_aton(r->name); 499 if (e == NULL) { 500 D("invalid MAC address '%s'", r->name); 501 return 1; 502 } 503 bcopy(e, &r->start, 6); 504 bcopy(e, &r->end, 6); 505 #if 0 506 bcopy(targ->src_mac, eh->ether_shost, 6); 507 p = index(targ->g->src_mac, '-'); 508 if (p) 509 targ->src_mac_range = atoi(p+1); 510 511 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 512 bcopy(targ->dst_mac, eh->ether_dhost, 6); 513 p = index(targ->g->dst_mac, '-'); 514 if (p) 515 targ->dst_mac_range = atoi(p+1); 516 #endif 517 if (verbose) 518 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 519 return 0; 520 } 521 522 static int 523 get_if_mtu(const struct glob_arg *g) 524 { 525 struct ifreq ifreq; 526 int s, ret; 527 const char *ifname = g->nmd->hdr.nr_name; 528 size_t len; 529 530 if (!strncmp(g->ifname, "netmap:", 7) && !strchr(ifname, '{') 531 && !strchr(ifname, '}')) { 532 533 len = strlen(ifname); 534 535 if (len > IFNAMSIZ) { 536 D("'%s' too long, cannot ask for MTU", ifname); 537 return -1; 538 } 539 540 s = socket(AF_INET, SOCK_DGRAM, 0); 541 if (s < 0) { 542 D("socket() failed: %s", strerror(errno)); 543 return s; 544 } 545 546 memset(&ifreq, 0, sizeof(ifreq)); 547 memcpy(ifreq.ifr_name, ifname, len); 548 549 ret = ioctl(s, SIOCGIFMTU, &ifreq); 550 if (ret) { 551 D("ioctl(SIOCGIFMTU) failed: %s", strerror(errno)); 552 } 553 554 close(s); 555 556 return ifreq.ifr_mtu; 557 } 558 559 /* This is a pipe or a VALE port, where the MTU is very large, 560 * so we use some practical limit. */ 561 return 65536; 562 } 563 564 static struct targ *targs; 565 static int global_nthreads; 566 567 /* control-C handler */ 568 static void 569 sigint_h(int sig) 570 { 571 int i; 572 573 (void)sig; /* UNUSED */ 574 D("received control-C on thread %p", (void *)pthread_self()); 575 for (i = 0; i < global_nthreads; i++) { 576 targs[i].cancel = 1; 577 } 578 } 579 580 /* sysctl wrapper to return the number of active CPUs */ 581 static int 582 system_ncpus(void) 583 { 584 int ncpus; 585 #if defined (__FreeBSD__) 586 int mib[2] = { CTL_HW, HW_NCPU }; 587 size_t len = sizeof(mib); 588 sysctl(mib, 2, &ncpus, &len, NULL, 0); 589 #elif defined(linux) 590 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 591 #elif defined(_WIN32) 592 { 593 SYSTEM_INFO sysinfo; 594 GetSystemInfo(&sysinfo); 595 ncpus = sysinfo.dwNumberOfProcessors; 596 } 597 #else /* others */ 598 ncpus = 1; 599 #endif /* others */ 600 return (ncpus); 601 } 602 603 #ifdef __linux__ 604 #define sockaddr_dl sockaddr_ll 605 #define sdl_family sll_family 606 #define AF_LINK AF_PACKET 607 #define LLADDR(s) s->sll_addr; 608 #include <linux/if_tun.h> 609 #define TAP_CLONEDEV "/dev/net/tun" 610 #endif /* __linux__ */ 611 612 #ifdef __FreeBSD__ 613 #include <net/if_tun.h> 614 #define TAP_CLONEDEV "/dev/tap" 615 #endif /* __FreeBSD */ 616 617 #ifdef __APPLE__ 618 // #warning TAP not supported on apple ? 619 #include <net/if_utun.h> 620 #define TAP_CLONEDEV "/dev/tap" 621 #endif /* __APPLE__ */ 622 623 624 /* 625 * parse the vale configuration in conf and put it in nmr. 626 * Return the flag set if necessary. 627 * The configuration may consist of 1 to 4 numbers separated 628 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 629 * Missing numbers or zeroes stand for default values. 630 * As an additional convenience, if exactly one number 631 * is specified, then this is assigned to both #tx-slots and #rx-slots. 632 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 633 * and #rx-rings. 634 */ 635 static int 636 parse_nmr_config(const char* conf, struct nmreq_register *nmr) 637 { 638 char *w, *tok; 639 int i, v; 640 641 if (conf == NULL || ! *conf) 642 return 0; 643 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 644 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 645 w = strdup(conf); 646 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 647 v = atoi(tok); 648 switch (i) { 649 case 0: 650 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 651 break; 652 case 1: 653 nmr->nr_rx_slots = v; 654 break; 655 case 2: 656 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 657 break; 658 case 3: 659 nmr->nr_rx_rings = v; 660 break; 661 default: 662 D("ignored config: %s", tok); 663 break; 664 } 665 } 666 D("txr %d txd %d rxr %d rxd %d", 667 nmr->nr_tx_rings, nmr->nr_tx_slots, 668 nmr->nr_rx_rings, nmr->nr_rx_slots); 669 free(w); 670 return 0; 671 } 672 673 674 /* 675 * locate the src mac address for our interface, put it 676 * into the user-supplied buffer. return 0 if ok, -1 on error. 677 */ 678 static int 679 source_hwaddr(const char *ifname, char *buf) 680 { 681 struct ifaddrs *ifaphead, *ifap; 682 683 if (getifaddrs(&ifaphead) != 0) { 684 D("getifaddrs %s failed", ifname); 685 return (-1); 686 } 687 688 /* remove 'netmap:' prefix before comparing interfaces */ 689 if (!strncmp(ifname, "netmap:", 7)) 690 ifname = &ifname[7]; 691 692 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 693 struct sockaddr_dl *sdl = 694 (struct sockaddr_dl *)ifap->ifa_addr; 695 uint8_t *mac; 696 697 if (!sdl || sdl->sdl_family != AF_LINK) 698 continue; 699 if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0) 700 continue; 701 mac = (uint8_t *)LLADDR(sdl); 702 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 703 mac[0], mac[1], mac[2], 704 mac[3], mac[4], mac[5]); 705 if (verbose) 706 D("source hwaddr %s", buf); 707 break; 708 } 709 freeifaddrs(ifaphead); 710 return ifap ? 0 : 1; 711 } 712 713 714 /* set the thread affinity. */ 715 static int 716 setaffinity(pthread_t me, int i) 717 { 718 cpuset_t cpumask; 719 720 if (i == -1) 721 return 0; 722 723 /* Set thread affinity affinity.*/ 724 CPU_ZERO(&cpumask); 725 CPU_SET(i, &cpumask); 726 727 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 728 D("Unable to set affinity: %s", strerror(errno)); 729 return 1; 730 } 731 return 0; 732 } 733 734 735 /* Compute the checksum of the given ip header. */ 736 static uint32_t 737 checksum(const void *data, uint16_t len, uint32_t sum) 738 { 739 const uint8_t *addr = data; 740 uint32_t i; 741 742 /* Checksum all the pairs of bytes first... */ 743 for (i = 0; i < (len & ~1U); i += 2) { 744 sum += (uint16_t)ntohs(*((const uint16_t *)(addr + i))); 745 if (sum > 0xFFFF) 746 sum -= 0xFFFF; 747 } 748 /* 749 * If there's a single byte left over, checksum it, too. 750 * Network byte order is big-endian, so the remaining byte is 751 * the high byte. 752 */ 753 if (i < len) { 754 sum += addr[i] << 8; 755 if (sum > 0xFFFF) 756 sum -= 0xFFFF; 757 } 758 return sum; 759 } 760 761 static uint16_t 762 wrapsum(uint32_t sum) 763 { 764 sum = ~sum & 0xFFFF; 765 return (htons(sum)); 766 } 767 768 /* Check the payload of the packet for errors (use it for debug). 769 * Look for consecutive ascii representations of the size of the packet. 770 */ 771 static void 772 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur) 773 { 774 char buf[128]; 775 int i, j, i0; 776 const unsigned char *p = (const unsigned char *)_p; 777 778 /* get the length in ASCII of the length of the packet. */ 779 780 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 781 ring, cur, ring->slot[cur].buf_idx, 782 ring->slot[cur].flags, len); 783 /* hexdump routine */ 784 for (i = 0; i < len; ) { 785 memset(buf, ' ', sizeof(buf)); 786 sprintf(buf, "%5d: ", i); 787 i0 = i; 788 for (j=0; j < 16 && i < len; i++, j++) 789 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 790 i = i0; 791 for (j=0; j < 16 && i < len; i++, j++) 792 sprintf(buf+7+j + 48, "%c", 793 isprint(p[i]) ? p[i] : '.'); 794 printf("%s\n", buf); 795 } 796 } 797 798 /* 799 * Fill a packet with some payload. 800 * We create a UDP packet so the payload starts at 801 * 14+20+8 = 42 bytes. 802 */ 803 #ifdef __linux__ 804 #define uh_sport source 805 #define uh_dport dest 806 #define uh_ulen len 807 #define uh_sum check 808 #endif /* linux */ 809 810 static uint16_t 811 new_ip_sum(uint16_t ip_sum, uint32_t oaddr, uint32_t naddr) 812 { 813 ip_sum = cksum_add(ip_sum, ~oaddr >> 16); 814 ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff); 815 ip_sum = cksum_add(ip_sum, naddr >> 16); 816 ip_sum = cksum_add(ip_sum, naddr & 0xffff); 817 return ip_sum; 818 } 819 820 static uint16_t 821 new_udp_sum(uint16_t udp_sum, uint16_t oport, uint16_t nport) 822 { 823 udp_sum = cksum_add(udp_sum, ~oport); 824 udp_sum = cksum_add(udp_sum, nport); 825 return udp_sum; 826 } 827 828 829 static void 830 update_ip(struct pkt *pkt, struct targ *t) 831 { 832 struct glob_arg *g = t->g; 833 struct ip ip; 834 struct udphdr udp; 835 uint32_t oaddr, naddr; 836 uint16_t oport, nport; 837 uint16_t ip_sum = 0, udp_sum = 0; 838 839 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 840 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp)); 841 do { 842 ip_sum = udp_sum = 0; 843 naddr = oaddr = ntohl(ip.ip_src.s_addr); 844 nport = oport = ntohs(udp.uh_sport); 845 if (g->options & OPT_RANDOM_SRC) { 846 ip.ip_src.s_addr = nrand48(t->seed); 847 udp.uh_sport = nrand48(t->seed); 848 naddr = ntohl(ip.ip_src.s_addr); 849 nport = ntohs(udp.uh_sport); 850 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 851 udp_sum = new_udp_sum(udp_sum, oport, nport); 852 } else { 853 if (oport < g->src_ip.port1) { 854 nport = oport + 1; 855 udp.uh_sport = htons(nport); 856 udp_sum = new_udp_sum(udp_sum, oport, nport); 857 break; 858 } 859 nport = g->src_ip.port0; 860 udp.uh_sport = htons(nport); 861 if (oaddr < g->src_ip.ipv4.end) { 862 naddr = oaddr + 1; 863 ip.ip_src.s_addr = htonl(naddr); 864 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 865 break; 866 } 867 naddr = g->src_ip.ipv4.start; 868 ip.ip_src.s_addr = htonl(naddr); 869 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 870 } 871 872 naddr = oaddr = ntohl(ip.ip_dst.s_addr); 873 nport = oport = ntohs(udp.uh_dport); 874 if (g->options & OPT_RANDOM_DST) { 875 ip.ip_dst.s_addr = nrand48(t->seed); 876 udp.uh_dport = nrand48(t->seed); 877 naddr = ntohl(ip.ip_dst.s_addr); 878 nport = ntohs(udp.uh_dport); 879 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 880 udp_sum = new_udp_sum(udp_sum, oport, nport); 881 } else { 882 if (oport < g->dst_ip.port1) { 883 nport = oport + 1; 884 udp.uh_dport = htons(nport); 885 udp_sum = new_udp_sum(udp_sum, oport, nport); 886 break; 887 } 888 nport = g->dst_ip.port0; 889 udp.uh_dport = htons(nport); 890 if (oaddr < g->dst_ip.ipv4.end) { 891 naddr = oaddr + 1; 892 ip.ip_dst.s_addr = htonl(naddr); 893 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 894 break; 895 } 896 naddr = g->dst_ip.ipv4.start; 897 ip.ip_dst.s_addr = htonl(naddr); 898 ip_sum = new_ip_sum(ip_sum, oaddr, naddr); 899 } 900 } while (0); 901 /* update checksums */ 902 if (udp_sum != 0) 903 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum)); 904 if (ip_sum != 0) { 905 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum)); 906 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum)); 907 } 908 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 909 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp)); 910 } 911 912 #ifndef s6_addr16 913 #define s6_addr16 __u6_addr.__u6_addr16 914 #endif 915 static void 916 update_ip6(struct pkt *pkt, struct targ *t) 917 { 918 struct glob_arg *g = t->g; 919 struct ip6_hdr ip6; 920 struct udphdr udp; 921 uint16_t udp_sum; 922 uint16_t oaddr, naddr; 923 uint16_t oport, nport; 924 uint8_t group; 925 926 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6)); 927 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp)); 928 do { 929 udp_sum = 0; 930 group = g->src_ip.ipv6.sgroup; 931 naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]); 932 nport = oport = ntohs(udp.uh_sport); 933 if (g->options & OPT_RANDOM_SRC) { 934 ip6.ip6_src.s6_addr16[group] = nrand48(t->seed); 935 udp.uh_sport = nrand48(t->seed); 936 naddr = ntohs(ip6.ip6_src.s6_addr16[group]); 937 nport = ntohs(udp.uh_sport); 938 break; 939 } 940 if (oport < g->src_ip.port1) { 941 nport = oport + 1; 942 udp.uh_sport = htons(nport); 943 break; 944 } 945 nport = g->src_ip.port0; 946 udp.uh_sport = htons(nport); 947 if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) { 948 naddr = oaddr + 1; 949 ip6.ip6_src.s6_addr16[group] = htons(naddr); 950 break; 951 } 952 naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]); 953 ip6.ip6_src.s6_addr16[group] = htons(naddr); 954 955 /* update checksums if needed */ 956 if (oaddr != naddr) 957 udp_sum = cksum_add(~oaddr, naddr); 958 if (oport != nport) 959 udp_sum = cksum_add(udp_sum, 960 cksum_add(~oport, nport)); 961 962 group = g->dst_ip.ipv6.egroup; 963 naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 964 nport = oport = ntohs(udp.uh_dport); 965 if (g->options & OPT_RANDOM_DST) { 966 ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed); 967 udp.uh_dport = nrand48(t->seed); 968 naddr = ntohs(ip6.ip6_dst.s6_addr16[group]); 969 nport = ntohs(udp.uh_dport); 970 break; 971 } 972 if (oport < g->dst_ip.port1) { 973 nport = oport + 1; 974 udp.uh_dport = htons(nport); 975 break; 976 } 977 nport = g->dst_ip.port0; 978 udp.uh_dport = htons(nport); 979 if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) { 980 naddr = oaddr + 1; 981 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 982 break; 983 } 984 naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]); 985 ip6.ip6_dst.s6_addr16[group] = htons(naddr); 986 } while (0); 987 /* update checksums */ 988 if (oaddr != naddr) 989 udp_sum = cksum_add(udp_sum, 990 cksum_add(~oaddr, naddr)); 991 if (oport != nport) 992 udp_sum = cksum_add(udp_sum, 993 cksum_add(~oport, nport)); 994 if (udp_sum != 0) 995 udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum); 996 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 997 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp)); 998 } 999 1000 static void 1001 update_addresses(struct pkt *pkt, struct targ *t) 1002 { 1003 1004 if (t->g->af == AF_INET) 1005 update_ip(pkt, t); 1006 else 1007 update_ip6(pkt, t); 1008 } 1009 1010 static void 1011 update_ip_size(struct pkt *pkt, int size) 1012 { 1013 struct ip ip; 1014 struct udphdr udp; 1015 uint16_t oiplen, niplen; 1016 uint16_t nudplen; 1017 uint16_t ip_sum = 0; 1018 1019 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 1020 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp)); 1021 1022 oiplen = ntohs(ip.ip_len); 1023 niplen = size - sizeof(struct ether_header); 1024 ip.ip_len = htons(niplen); 1025 nudplen = niplen - sizeof(struct ip); 1026 udp.uh_ulen = htons(nudplen); 1027 ip_sum = new_udp_sum(ip_sum, oiplen, niplen); 1028 1029 /* update checksums */ 1030 if (ip_sum != 0) 1031 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum)); 1032 1033 udp.uh_sum = 0; 1034 /* Magic: taken from sbin/dhclient/packet.c */ 1035 udp.uh_sum = wrapsum( 1036 checksum(&udp, sizeof(udp), /* udp header */ 1037 checksum(pkt->ipv4.body, /* udp payload */ 1038 nudplen - sizeof(udp), 1039 checksum(&ip.ip_src, /* pseudo header */ 1040 2 * sizeof(ip.ip_src), 1041 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen))))); 1042 1043 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1044 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp)); 1045 } 1046 1047 static void 1048 update_ip6_size(struct pkt *pkt, int size) 1049 { 1050 struct ip6_hdr ip6; 1051 struct udphdr udp; 1052 uint16_t niplen, nudplen; 1053 uint32_t csum; 1054 1055 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6)); 1056 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp)); 1057 1058 nudplen = niplen = size - sizeof(struct ether_header) - sizeof(ip6); 1059 ip6.ip6_plen = htons(niplen); 1060 udp.uh_ulen = htons(nudplen); 1061 1062 /* Save part of pseudo header checksum into csum */ 1063 udp.uh_sum = 0; 1064 csum = IPPROTO_UDP << 24; 1065 csum = checksum(&csum, sizeof(csum), nudplen); 1066 udp.uh_sum = wrapsum( 1067 checksum(&udp, sizeof(udp), /* udp header */ 1068 checksum(pkt->ipv6.body, /* udp payload */ 1069 nudplen - sizeof(udp), 1070 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */ 1071 2 * sizeof(pkt->ipv6.ip.ip6_src), csum)))); 1072 1073 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 1074 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp)); 1075 } 1076 1077 static void 1078 update_size(struct pkt *pkt, struct targ *t, int size) 1079 { 1080 if (t->g->options & OPT_UPDATE_CSUM) { 1081 if (t->g->af == AF_INET) 1082 update_ip_size(pkt, size); 1083 else 1084 update_ip6_size(pkt, size); 1085 } 1086 } 1087 1088 /* 1089 * initialize one packet and prepare for the next one. 1090 * The copy could be done better instead of repeating it each time. 1091 */ 1092 static void 1093 initialize_packet(struct targ *targ) 1094 { 1095 struct pkt *pkt = &targ->pkt; 1096 struct ether_header *eh; 1097 struct ip6_hdr ip6; 1098 struct ip ip; 1099 struct udphdr udp; 1100 void *udp_ptr; 1101 uint16_t paylen; 1102 uint32_t csum = 0; 1103 const char *payload = targ->g->options & OPT_INDIRECT ? 1104 indirect_payload : default_payload; 1105 int i, l0 = strlen(payload); 1106 1107 #ifndef NO_PCAP 1108 char errbuf[PCAP_ERRBUF_SIZE]; 1109 pcap_t *file; 1110 struct pcap_pkthdr *header; 1111 const unsigned char *packet; 1112 1113 /* Read a packet from a PCAP file if asked. */ 1114 if (targ->g->packet_file != NULL) { 1115 if ((file = pcap_open_offline(targ->g->packet_file, 1116 errbuf)) == NULL) 1117 D("failed to open pcap file %s", 1118 targ->g->packet_file); 1119 if (pcap_next_ex(file, &header, &packet) < 0) 1120 D("failed to read packet from %s", 1121 targ->g->packet_file); 1122 if ((targ->frame = malloc(header->caplen)) == NULL) 1123 D("out of memory"); 1124 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 1125 targ->g->pkt_size = header->caplen; 1126 pcap_close(file); 1127 return; 1128 } 1129 #endif 1130 1131 paylen = targ->g->pkt_size - sizeof(*eh) - 1132 (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6)); 1133 1134 /* create a nice NUL-terminated string */ 1135 for (i = 0; i < paylen; i += l0) { 1136 if (l0 > paylen - i) 1137 l0 = paylen - i; // last round 1138 bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0); 1139 } 1140 PKT(pkt, body, targ->g->af)[i - 1] = '\0'; 1141 1142 /* prepare the headers */ 1143 eh = &pkt->eh; 1144 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 1145 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 1146 1147 if (targ->g->af == AF_INET) { 1148 eh->ether_type = htons(ETHERTYPE_IP); 1149 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip)); 1150 udp_ptr = &pkt->ipv4.udp; 1151 ip.ip_v = IPVERSION; 1152 ip.ip_hl = sizeof(ip) >> 2; 1153 ip.ip_id = 0; 1154 ip.ip_tos = IPTOS_LOWDELAY; 1155 ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh)); 1156 ip.ip_id = 0; 1157 ip.ip_off = htons(IP_DF); /* Don't fragment */ 1158 ip.ip_ttl = IPDEFTTL; 1159 ip.ip_p = IPPROTO_UDP; 1160 ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start); 1161 ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start); 1162 ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0)); 1163 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1164 } else { 1165 eh->ether_type = htons(ETHERTYPE_IPV6); 1166 memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6)); 1167 udp_ptr = &pkt->ipv6.udp; 1168 ip6.ip6_flow = 0; 1169 ip6.ip6_plen = htons(paylen); 1170 ip6.ip6_vfc = IPV6_VERSION; 1171 ip6.ip6_nxt = IPPROTO_UDP; 1172 ip6.ip6_hlim = IPV6_DEFHLIM; 1173 ip6.ip6_src = targ->g->src_ip.ipv6.start; 1174 ip6.ip6_dst = targ->g->dst_ip.ipv6.start; 1175 } 1176 memcpy(&udp, udp_ptr, sizeof(udp)); 1177 1178 udp.uh_sport = htons(targ->g->src_ip.port0); 1179 udp.uh_dport = htons(targ->g->dst_ip.port0); 1180 udp.uh_ulen = htons(paylen); 1181 if (targ->g->af == AF_INET) { 1182 /* Magic: taken from sbin/dhclient/packet.c */ 1183 udp.uh_sum = wrapsum( 1184 checksum(&udp, sizeof(udp), /* udp header */ 1185 checksum(pkt->ipv4.body, /* udp payload */ 1186 paylen - sizeof(udp), 1187 checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */ 1188 2 * sizeof(pkt->ipv4.ip.ip_src), 1189 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen))))); 1190 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip)); 1191 } else { 1192 /* Save part of pseudo header checksum into csum */ 1193 csum = IPPROTO_UDP << 24; 1194 csum = checksum(&csum, sizeof(csum), paylen); 1195 udp.uh_sum = wrapsum( 1196 checksum(udp_ptr, sizeof(udp), /* udp header */ 1197 checksum(pkt->ipv6.body, /* udp payload */ 1198 paylen - sizeof(udp), 1199 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */ 1200 2 * sizeof(pkt->ipv6.ip.ip6_src), csum)))); 1201 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6)); 1202 } 1203 memcpy(udp_ptr, &udp, sizeof(udp)); 1204 1205 bzero(&pkt->vh, sizeof(pkt->vh)); 1206 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 1207 } 1208 1209 static void 1210 get_vnet_hdr_len(struct glob_arg *g) 1211 { 1212 struct nmreq_header hdr; 1213 struct nmreq_port_hdr ph; 1214 int err; 1215 1216 hdr = g->nmd->hdr; /* copy name and version */ 1217 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_GET; 1218 hdr.nr_options = 0; 1219 memset(&ph, 0, sizeof(ph)); 1220 hdr.nr_body = (uintptr_t)&ph; 1221 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1222 if (err) { 1223 D("Unable to get virtio-net header length"); 1224 return; 1225 } 1226 1227 g->virt_header = ph.nr_hdr_len; 1228 if (g->virt_header) { 1229 D("Port requires virtio-net header, length = %d", 1230 g->virt_header); 1231 } 1232 } 1233 1234 static void 1235 set_vnet_hdr_len(struct glob_arg *g) 1236 { 1237 int err, l = g->virt_header; 1238 struct nmreq_header hdr; 1239 struct nmreq_port_hdr ph; 1240 1241 if (l == 0) 1242 return; 1243 1244 hdr = g->nmd->hdr; /* copy name and version */ 1245 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_SET; 1246 hdr.nr_options = 0; 1247 memset(&ph, 0, sizeof(ph)); 1248 hdr.nr_body = (uintptr_t)&ph; 1249 err = ioctl(g->main_fd, NIOCCTRL, &hdr); 1250 if (err) { 1251 D("Unable to set virtio-net header length %d", l); 1252 } 1253 } 1254 1255 /* 1256 * create and enqueue a batch of packets on a ring. 1257 * On the last one set NS_REPORT to tell the driver to generate 1258 * an interrupt when done. 1259 */ 1260 static int 1261 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 1262 int size, struct targ *t, u_int count, int options) 1263 { 1264 u_int n, sent, head = ring->head; 1265 u_int frags = t->frags; 1266 u_int frag_size = t->frag_size; 1267 struct netmap_slot *slot = &ring->slot[head]; 1268 1269 n = nm_ring_space(ring); 1270 #if 0 1271 if (options & (OPT_COPY | OPT_PREFETCH) ) { 1272 for (sent = 0; sent < count; sent++) { 1273 struct netmap_slot *slot = &ring->slot[head]; 1274 char *p = NETMAP_BUF(ring, slot->buf_idx); 1275 1276 __builtin_prefetch(p); 1277 head = nm_ring_next(ring, head); 1278 } 1279 head = ring->head; 1280 } 1281 #endif 1282 for (sent = 0; sent < count && n >= frags; sent++, n--) { 1283 char *p; 1284 int buf_changed; 1285 u_int tosend = size; 1286 1287 slot = &ring->slot[head]; 1288 p = NETMAP_BUF(ring, slot->buf_idx); 1289 buf_changed = slot->flags & NS_BUF_CHANGED; 1290 1291 slot->flags = 0; 1292 if (options & OPT_RUBBISH) { 1293 /* do nothing */ 1294 } else if (options & OPT_INDIRECT) { 1295 slot->flags |= NS_INDIRECT; 1296 slot->ptr = (uint64_t)((uintptr_t)frame); 1297 } else if (frags > 1) { 1298 u_int i; 1299 const char *f = frame; 1300 char *fp = p; 1301 for (i = 0; i < frags - 1; i++) { 1302 memcpy(fp, f, frag_size); 1303 slot->len = frag_size; 1304 slot->flags = NS_MOREFRAG; 1305 if (options & OPT_DUMP) 1306 dump_payload(fp, frag_size, ring, head); 1307 tosend -= frag_size; 1308 f += frag_size; 1309 head = nm_ring_next(ring, head); 1310 slot = &ring->slot[head]; 1311 fp = NETMAP_BUF(ring, slot->buf_idx); 1312 } 1313 n -= (frags - 1); 1314 p = fp; 1315 slot->flags = 0; 1316 memcpy(p, f, tosend); 1317 update_addresses(pkt, t); 1318 } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) { 1319 if (options & OPT_COPY) 1320 nm_pkt_copy(frame, p, size); 1321 else 1322 memcpy(p, frame, size); 1323 update_addresses(pkt, t); 1324 } else if (options & OPT_PREFETCH) { 1325 __builtin_prefetch(p); 1326 } 1327 slot->len = tosend; 1328 if (options & OPT_DUMP) 1329 dump_payload(p, tosend, ring, head); 1330 head = nm_ring_next(ring, head); 1331 } 1332 if (sent) { 1333 slot->flags |= NS_REPORT; 1334 ring->head = ring->cur = head; 1335 } 1336 if (sent < count) { 1337 /* tell netmap that we need more slots */ 1338 ring->cur = ring->tail; 1339 } 1340 1341 return (sent); 1342 } 1343 1344 /* 1345 * Index of the highest bit set 1346 */ 1347 static uint32_t 1348 msb64(uint64_t x) 1349 { 1350 uint64_t m = 1ULL << 63; 1351 int i; 1352 1353 for (i = 63; i >= 0; i--, m >>=1) 1354 if (m & x) 1355 return i; 1356 return 0; 1357 } 1358 1359 /* 1360 * wait until ts, either busy or sleeping if more than 1ms. 1361 * Return wakeup time. 1362 */ 1363 static struct timespec 1364 wait_time(struct timespec ts) 1365 { 1366 for (;;) { 1367 struct timespec w, cur; 1368 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1369 w = timespec_sub(ts, cur); 1370 if (w.tv_sec < 0) 1371 return cur; 1372 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1373 poll(NULL, 0, 1); 1374 } 1375 } 1376 1377 /* 1378 * Send a packet, and wait for a response. 1379 * The payload (after UDP header, ofs 42) has a 4-byte sequence 1380 * followed by a struct timeval (or bintime?) 1381 */ 1382 1383 static void * 1384 ping_body(void *data) 1385 { 1386 struct targ *targ = (struct targ *) data; 1387 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1388 struct netmap_if *nifp = targ->nmd->nifp; 1389 int i, m; 1390 void *frame; 1391 int size; 1392 struct timespec ts, now, last_print; 1393 struct timespec nexttime = {0, 0}; /* silence compiler */ 1394 uint64_t sent = 0, n = targ->g->npackets; 1395 uint64_t count = 0, t_cur, t_min = ~0, av = 0; 1396 uint64_t g_min = ~0, g_av = 0; 1397 uint64_t buckets[64]; /* bins for delays, ns */ 1398 int rate_limit = targ->g->tx_rate, tosend = 0; 1399 1400 frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header; 1401 size = targ->g->pkt_size + targ->g->virt_header; 1402 1403 1404 if (targ->g->nthreads > 1) { 1405 D("can only ping with 1 thread"); 1406 return NULL; 1407 } 1408 1409 if (targ->g->af == AF_INET6) { 1410 D("Warning: ping-pong with IPv6 not supported"); 1411 } 1412 1413 bzero(&buckets, sizeof(buckets)); 1414 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 1415 now = last_print; 1416 if (rate_limit) { 1417 targ->tic = timespec_add(now, (struct timespec){2,0}); 1418 targ->tic.tv_nsec = 0; 1419 wait_time(targ->tic); 1420 nexttime = targ->tic; 1421 } 1422 while (!targ->cancel && (n == 0 || sent < n)) { 1423 struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1424 struct netmap_slot *slot; 1425 char *p; 1426 int rv; 1427 uint64_t limit, event = 0; 1428 1429 if (rate_limit && tosend <= 0) { 1430 tosend = targ->g->burst; 1431 nexttime = timespec_add(nexttime, targ->g->tx_period); 1432 wait_time(nexttime); 1433 } 1434 1435 limit = rate_limit ? tosend : targ->g->burst; 1436 if (n > 0 && n - sent < limit) 1437 limit = n - sent; 1438 for (m = 0; (unsigned)m < limit; m++) { 1439 slot = &ring->slot[ring->head]; 1440 slot->len = size; 1441 p = NETMAP_BUF(ring, slot->buf_idx); 1442 1443 if (nm_ring_empty(ring)) { 1444 D("-- ouch, cannot send"); 1445 break; 1446 } else { 1447 struct tstamp *tp; 1448 nm_pkt_copy(frame, p, size); 1449 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 1450 bcopy(&sent, p+42, sizeof(sent)); 1451 tp = (struct tstamp *)(p+46); 1452 tp->sec = (uint32_t)ts.tv_sec; 1453 tp->nsec = (uint32_t)ts.tv_nsec; 1454 sent++; 1455 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1456 } 1457 } 1458 if (m > 0) 1459 event++; 1460 targ->ctr.pkts = sent; 1461 targ->ctr.bytes = sent*size; 1462 targ->ctr.events = event; 1463 if (rate_limit) 1464 tosend -= m; 1465 #ifdef BUSYWAIT 1466 rv = ioctl(pfd.fd, NIOCTXSYNC, NULL); 1467 if (rv < 0) { 1468 D("TXSYNC error on queue %d: %s", targ->me, 1469 strerror(errno)); 1470 } 1471 again: 1472 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1473 #else 1474 /* should use a parameter to decide how often to send */ 1475 if ( (rv = poll(&pfd, 1, 3000)) <= 0) { 1476 D("poll error on queue %d: %s", targ->me, 1477 (rv ? strerror(errno) : "timeout")); 1478 continue; 1479 } 1480 #endif /* BUSYWAIT */ 1481 /* see what we got back */ 1482 #ifdef BUSYWAIT 1483 int rx = 0; 1484 #endif 1485 for (i = targ->nmd->first_rx_ring; 1486 i <= targ->nmd->last_rx_ring; i++) { 1487 ring = NETMAP_RXRING(nifp, i); 1488 while (!nm_ring_empty(ring)) { 1489 uint32_t seq; 1490 struct tstamp *tp; 1491 int pos; 1492 1493 slot = &ring->slot[ring->head]; 1494 p = NETMAP_BUF(ring, slot->buf_idx); 1495 1496 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 1497 bcopy(p+42, &seq, sizeof(seq)); 1498 tp = (struct tstamp *)(p+46); 1499 ts.tv_sec = (time_t)tp->sec; 1500 ts.tv_nsec = (long)tp->nsec; 1501 ts.tv_sec = now.tv_sec - ts.tv_sec; 1502 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 1503 if (ts.tv_nsec < 0) { 1504 ts.tv_nsec += 1000000000; 1505 ts.tv_sec--; 1506 } 1507 if (0) D("seq %d/%llu delta %d.%09d", seq, 1508 (unsigned long long)sent, 1509 (int)ts.tv_sec, (int)ts.tv_nsec); 1510 t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec; 1511 if (t_cur < t_min) 1512 t_min = t_cur; 1513 count ++; 1514 av += t_cur; 1515 pos = msb64(t_cur); 1516 buckets[pos]++; 1517 /* now store it in a bucket */ 1518 ring->head = ring->cur = nm_ring_next(ring, ring->head); 1519 #ifdef BUSYWAIT 1520 rx++; 1521 #endif 1522 } 1523 } 1524 //D("tx %d rx %d", sent, rx); 1525 //usleep(100000); 1526 ts.tv_sec = now.tv_sec - last_print.tv_sec; 1527 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 1528 if (ts.tv_nsec < 0) { 1529 ts.tv_nsec += 1000000000; 1530 ts.tv_sec--; 1531 } 1532 if (ts.tv_sec >= 1) { 1533 D("count %d RTT: min %d av %d ns", 1534 (int)count, (int)t_min, (int)(av/count)); 1535 int k, j, kmin, off; 1536 char buf[512]; 1537 1538 for (kmin = 0; kmin < 64; kmin ++) 1539 if (buckets[kmin]) 1540 break; 1541 for (k = 63; k >= kmin; k--) 1542 if (buckets[k]) 1543 break; 1544 buf[0] = '\0'; 1545 off = 0; 1546 for (j = kmin; j <= k; j++) { 1547 off += sprintf(buf + off, " %5d", (int)buckets[j]); 1548 } 1549 D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf); 1550 bzero(&buckets, sizeof(buckets)); 1551 count = 0; 1552 g_av += av; 1553 av = 0; 1554 if (t_min < g_min) 1555 g_min = t_min; 1556 t_min = ~0; 1557 last_print = now; 1558 } 1559 #ifdef BUSYWAIT 1560 if (rx < m && ts.tv_sec <= 3 && !targ->cancel) 1561 goto again; 1562 #endif /* BUSYWAIT */ 1563 } 1564 1565 if (sent > 0) { 1566 D("RTT over %llu packets: min %d av %d ns", 1567 (long long unsigned)sent, (int)g_min, 1568 (int)((double)g_av/sent)); 1569 } 1570 targ->completed = 1; 1571 1572 /* reset the ``used`` flag. */ 1573 targ->used = 0; 1574 1575 return NULL; 1576 } 1577 1578 1579 /* 1580 * reply to ping requests 1581 */ 1582 static void * 1583 pong_body(void *data) 1584 { 1585 struct targ *targ = (struct targ *) data; 1586 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1587 struct netmap_if *nifp = targ->nmd->nifp; 1588 struct netmap_ring *txring, *rxring; 1589 int i; 1590 uint64_t sent = 0, n = targ->g->npackets; 1591 1592 if (targ->g->nthreads > 1) { 1593 D("can only reply ping with 1 thread"); 1594 return NULL; 1595 } 1596 if (n > 0) 1597 D("understood ponger %llu but don't know how to do it", 1598 (unsigned long long)n); 1599 1600 if (targ->g->af == AF_INET6) { 1601 D("Warning: ping-pong with IPv6 not supported"); 1602 } 1603 1604 while (!targ->cancel && (n == 0 || sent < n)) { 1605 uint32_t txhead, txavail; 1606 //#define BUSYWAIT 1607 #ifdef BUSYWAIT 1608 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1609 #else 1610 int rv; 1611 if ( (rv = poll(&pfd, 1, 1000)) <= 0) { 1612 D("poll error on queue %d: %s", targ->me, 1613 rv ? strerror(errno) : "timeout"); 1614 continue; 1615 } 1616 #endif 1617 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1618 txhead = txring->head; 1619 txavail = nm_ring_space(txring); 1620 /* see what we got back */ 1621 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1622 rxring = NETMAP_RXRING(nifp, i); 1623 while (!nm_ring_empty(rxring)) { 1624 uint16_t *spkt, *dpkt; 1625 uint32_t head = rxring->head; 1626 struct netmap_slot *slot = &rxring->slot[head]; 1627 char *src, *dst; 1628 src = NETMAP_BUF(rxring, slot->buf_idx); 1629 //D("got pkt %p of size %d", src, slot->len); 1630 rxring->head = rxring->cur = nm_ring_next(rxring, head); 1631 if (txavail == 0) 1632 continue; 1633 dst = NETMAP_BUF(txring, 1634 txring->slot[txhead].buf_idx); 1635 /* copy... */ 1636 dpkt = (uint16_t *)dst; 1637 spkt = (uint16_t *)src; 1638 nm_pkt_copy(src, dst, slot->len); 1639 /* swap source and destination MAC */ 1640 dpkt[0] = spkt[3]; 1641 dpkt[1] = spkt[4]; 1642 dpkt[2] = spkt[5]; 1643 dpkt[3] = spkt[0]; 1644 dpkt[4] = spkt[1]; 1645 dpkt[5] = spkt[2]; 1646 /* swap source and destination IPv4 */ 1647 if (spkt[6] == htons(ETHERTYPE_IP)) { 1648 dpkt[13] = spkt[15]; 1649 dpkt[14] = spkt[16]; 1650 dpkt[15] = spkt[13]; 1651 dpkt[16] = spkt[14]; 1652 } 1653 txring->slot[txhead].len = slot->len; 1654 //dump_payload(dst, slot->len, txring, txhead); 1655 txhead = nm_ring_next(txring, txhead); 1656 txavail--; 1657 sent++; 1658 } 1659 } 1660 txring->head = txring->cur = txhead; 1661 targ->ctr.pkts = sent; 1662 #ifdef BUSYWAIT 1663 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1664 #endif 1665 } 1666 1667 targ->completed = 1; 1668 1669 /* reset the ``used`` flag. */ 1670 targ->used = 0; 1671 1672 return NULL; 1673 } 1674 1675 1676 static void * 1677 sender_body(void *data) 1678 { 1679 struct targ *targ = (struct targ *) data; 1680 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1681 struct netmap_if *nifp; 1682 struct netmap_ring *txring = NULL; 1683 int i; 1684 uint64_t n = targ->g->npackets / targ->g->nthreads; 1685 uint64_t sent = 0; 1686 uint64_t event = 0; 1687 int options = targ->g->options; 1688 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1689 int rate_limit = targ->g->tx_rate; 1690 struct pkt *pkt = &targ->pkt; 1691 void *frame; 1692 int size; 1693 1694 if (targ->frame == NULL) { 1695 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 1696 size = targ->g->pkt_size + targ->g->virt_header; 1697 } else { 1698 frame = targ->frame; 1699 size = targ->g->pkt_size; 1700 } 1701 1702 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1703 if (setaffinity(targ->thread, targ->affinity)) 1704 goto quit; 1705 1706 /* main loop.*/ 1707 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1708 if (rate_limit) { 1709 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1710 targ->tic.tv_nsec = 0; 1711 wait_time(targ->tic); 1712 nexttime = targ->tic; 1713 } 1714 if (targ->g->dev_type == DEV_TAP) { 1715 D("writing to file desc %d", targ->g->main_fd); 1716 1717 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1718 if (write(targ->g->main_fd, frame, size) != -1) 1719 sent++; 1720 update_addresses(pkt, targ); 1721 if (i > 10000) { 1722 targ->ctr.pkts = sent; 1723 targ->ctr.bytes = sent*size; 1724 targ->ctr.events = sent; 1725 i = 0; 1726 } 1727 } 1728 #ifndef NO_PCAP 1729 } else if (targ->g->dev_type == DEV_PCAP) { 1730 pcap_t *p = targ->g->p; 1731 1732 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1733 if (pcap_inject(p, frame, size) != -1) 1734 sent++; 1735 update_addresses(pkt, targ); 1736 if (i > 10000) { 1737 targ->ctr.pkts = sent; 1738 targ->ctr.bytes = sent*size; 1739 targ->ctr.events = sent; 1740 i = 0; 1741 } 1742 } 1743 #endif /* NO_PCAP */ 1744 } else { 1745 int tosend = 0; 1746 u_int bufsz, frag_size = targ->g->frag_size; 1747 1748 nifp = targ->nmd->nifp; 1749 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring); 1750 bufsz = txring->nr_buf_size; 1751 if (bufsz < frag_size) 1752 frag_size = bufsz; 1753 targ->frag_size = targ->g->pkt_size / targ->frags; 1754 if (targ->frag_size > frag_size) { 1755 targ->frags = targ->g->pkt_size / frag_size; 1756 targ->frag_size = frag_size; 1757 if (targ->g->pkt_size % frag_size != 0) 1758 targ->frags++; 1759 } 1760 D("frags %u frag_size %u", targ->frags, targ->frag_size); 1761 1762 /* mark all slots of all rings as changed so initial copy will be done */ 1763 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1764 uint32_t j; 1765 struct netmap_slot *slot; 1766 1767 txring = NETMAP_TXRING(nifp, i); 1768 for (j = 0; j < txring->num_slots; j++) { 1769 slot = &txring->slot[j]; 1770 slot->flags = NS_BUF_CHANGED; 1771 } 1772 } 1773 1774 while (!targ->cancel && (n == 0 || sent < n)) { 1775 int rv; 1776 1777 if (rate_limit && tosend <= 0) { 1778 tosend = targ->g->burst; 1779 nexttime = timespec_add(nexttime, targ->g->tx_period); 1780 wait_time(nexttime); 1781 } 1782 1783 /* 1784 * wait for available room in the send queue(s) 1785 */ 1786 #ifdef BUSYWAIT 1787 (void)rv; 1788 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 1789 D("ioctl error on queue %d: %s", targ->me, 1790 strerror(errno)); 1791 goto quit; 1792 } 1793 #else /* !BUSYWAIT */ 1794 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 1795 if (targ->cancel) 1796 break; 1797 D("poll error on queue %d: %s", targ->me, 1798 rv ? strerror(errno) : "timeout"); 1799 // goto quit; 1800 } 1801 if (pfd.revents & POLLERR) { 1802 D("poll error on %d ring %d-%d", pfd.fd, 1803 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1804 goto quit; 1805 } 1806 #endif /* !BUSYWAIT */ 1807 /* 1808 * scan our queues and send on those with room 1809 */ 1810 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1811 int m; 1812 uint64_t limit = rate_limit ? tosend : targ->g->burst; 1813 1814 if (n > 0 && n == sent) 1815 break; 1816 1817 if (n > 0 && n - sent < limit) 1818 limit = n - sent; 1819 txring = NETMAP_TXRING(nifp, i); 1820 if (nm_ring_empty(txring)) 1821 continue; 1822 1823 if (targ->g->pkt_min_size > 0) { 1824 size = nrand48(targ->seed) % 1825 (targ->g->pkt_size - targ->g->pkt_min_size) + 1826 targ->g->pkt_min_size; 1827 update_size(pkt, targ, size); 1828 } 1829 m = send_packets(txring, pkt, frame, size, targ, 1830 limit, options); 1831 ND("limit %lu tail %d m %d", 1832 limit, txring->tail, m); 1833 sent += m; 1834 if (m > 0) //XXX-ste: can m be 0? 1835 event++; 1836 targ->ctr.pkts = sent; 1837 targ->ctr.bytes += m*size; 1838 targ->ctr.events = event; 1839 if (rate_limit) { 1840 tosend -= m; 1841 if (tosend <= 0) 1842 break; 1843 } 1844 } 1845 } 1846 /* flush any remaining packets */ 1847 if (txring != NULL) { 1848 D("flush tail %d head %d on thread %p", 1849 txring->tail, txring->head, 1850 (void *)pthread_self()); 1851 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1852 } 1853 1854 /* final part: wait all the TX queues to be empty. */ 1855 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1856 txring = NETMAP_TXRING(nifp, i); 1857 while (!targ->cancel && nm_tx_pending(txring)) { 1858 RD(5, "pending tx tail %d head %d on ring %d", 1859 txring->tail, txring->head, i); 1860 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1861 usleep(1); /* wait 1 tick */ 1862 } 1863 } 1864 } /* end DEV_NETMAP */ 1865 1866 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1867 targ->completed = 1; 1868 targ->ctr.pkts = sent; 1869 targ->ctr.bytes = sent*size; 1870 targ->ctr.events = event; 1871 quit: 1872 /* reset the ``used`` flag. */ 1873 targ->used = 0; 1874 1875 return (NULL); 1876 } 1877 1878 1879 #ifndef NO_PCAP 1880 static void 1881 receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1882 const u_char * bytes) 1883 { 1884 struct my_ctrs *ctr = (struct my_ctrs *)user; 1885 (void)bytes; /* UNUSED */ 1886 ctr->bytes += h->len; 1887 ctr->pkts++; 1888 } 1889 #endif /* !NO_PCAP */ 1890 1891 1892 static int 1893 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes) 1894 { 1895 u_int head, rx, n; 1896 uint64_t b = 0; 1897 u_int complete = 0; 1898 1899 if (bytes == NULL) 1900 bytes = &b; 1901 1902 head = ring->head; 1903 n = nm_ring_space(ring); 1904 if (n < limit) 1905 limit = n; 1906 for (rx = 0; rx < limit; rx++) { 1907 struct netmap_slot *slot = &ring->slot[head]; 1908 char *p = NETMAP_BUF(ring, slot->buf_idx); 1909 1910 *bytes += slot->len; 1911 if (dump) 1912 dump_payload(p, slot->len, ring, head); 1913 if (!(slot->flags & NS_MOREFRAG)) 1914 complete++; 1915 1916 head = nm_ring_next(ring, head); 1917 } 1918 ring->head = ring->cur = head; 1919 1920 return (complete); 1921 } 1922 1923 static void * 1924 receiver_body(void *data) 1925 { 1926 struct targ *targ = (struct targ *) data; 1927 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1928 struct netmap_if *nifp; 1929 struct netmap_ring *rxring; 1930 int i; 1931 struct my_ctrs cur; 1932 uint64_t n = targ->g->npackets / targ->g->nthreads; 1933 1934 memset(&cur, 0, sizeof(cur)); 1935 1936 if (setaffinity(targ->thread, targ->affinity)) 1937 goto quit; 1938 1939 D("reading from %s fd %d main_fd %d", 1940 targ->g->ifname, targ->fd, targ->g->main_fd); 1941 /* unbounded wait for the first packet. */ 1942 for (;!targ->cancel;) { 1943 i = poll(&pfd, 1, 1000); 1944 if (i > 0 && !(pfd.revents & POLLERR)) 1945 break; 1946 if (i < 0) { 1947 D("poll() error: %s", strerror(errno)); 1948 goto quit; 1949 } 1950 if (pfd.revents & POLLERR) { 1951 D("fd error"); 1952 goto quit; 1953 } 1954 RD(1, "waiting for initial packets, poll returns %d %d", 1955 i, pfd.revents); 1956 } 1957 /* main loop, exit after 1s silence */ 1958 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1959 if (targ->g->dev_type == DEV_TAP) { 1960 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1961 char buf[MAX_BODYSIZE]; 1962 /* XXX should we poll ? */ 1963 i = read(targ->g->main_fd, buf, sizeof(buf)); 1964 if (i > 0) { 1965 targ->ctr.pkts++; 1966 targ->ctr.bytes += i; 1967 targ->ctr.events++; 1968 } 1969 } 1970 #ifndef NO_PCAP 1971 } else if (targ->g->dev_type == DEV_PCAP) { 1972 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1973 /* XXX should we poll ? */ 1974 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1975 (u_char *)&targ->ctr); 1976 targ->ctr.events++; 1977 } 1978 #endif /* !NO_PCAP */ 1979 } else { 1980 int dump = targ->g->options & OPT_DUMP; 1981 1982 nifp = targ->nmd->nifp; 1983 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) { 1984 /* Once we started to receive packets, wait at most 1 seconds 1985 before quitting. */ 1986 #ifdef BUSYWAIT 1987 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 1988 D("ioctl error on queue %d: %s", targ->me, 1989 strerror(errno)); 1990 goto quit; 1991 } 1992 #else /* !BUSYWAIT */ 1993 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1994 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1995 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1996 goto out; 1997 } 1998 1999 if (pfd.revents & POLLERR) { 2000 D("poll err"); 2001 goto quit; 2002 } 2003 #endif /* !BUSYWAIT */ 2004 uint64_t cur_space = 0; 2005 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 2006 int m; 2007 2008 rxring = NETMAP_RXRING(nifp, i); 2009 /* compute free space in the ring */ 2010 m = rxring->head + rxring->num_slots - rxring->tail; 2011 if (m >= (int) rxring->num_slots) 2012 m -= rxring->num_slots; 2013 cur_space += m; 2014 if (nm_ring_empty(rxring)) 2015 continue; 2016 2017 m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes); 2018 cur.pkts += m; 2019 if (m > 0) 2020 cur.events++; 2021 } 2022 cur.min_space = targ->ctr.min_space; 2023 if (cur_space < cur.min_space) 2024 cur.min_space = cur_space; 2025 targ->ctr = cur; 2026 } 2027 } 2028 2029 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2030 2031 #if !defined(BUSYWAIT) 2032 out: 2033 #endif 2034 targ->completed = 1; 2035 targ->ctr = cur; 2036 2037 quit: 2038 /* reset the ``used`` flag. */ 2039 targ->used = 0; 2040 2041 return (NULL); 2042 } 2043 2044 static void * 2045 txseq_body(void *data) 2046 { 2047 struct targ *targ = (struct targ *) data; 2048 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 2049 struct netmap_ring *ring; 2050 int64_t sent = 0; 2051 uint64_t event = 0; 2052 int options = targ->g->options | OPT_COPY; 2053 struct timespec nexttime = {0, 0}; 2054 int rate_limit = targ->g->tx_rate; 2055 struct pkt *pkt = &targ->pkt; 2056 int frags = targ->g->frags; 2057 uint32_t sequence = 0; 2058 int budget = 0; 2059 void *frame; 2060 int size; 2061 2062 if (targ->g->nthreads > 1) { 2063 D("can only txseq ping with 1 thread"); 2064 return NULL; 2065 } 2066 2067 if (targ->g->npackets > 0) { 2068 D("Ignoring -n argument"); 2069 } 2070 2071 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header; 2072 size = targ->g->pkt_size + targ->g->virt_header; 2073 2074 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 2075 if (setaffinity(targ->thread, targ->affinity)) 2076 goto quit; 2077 2078 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 2079 if (rate_limit) { 2080 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 2081 targ->tic.tv_nsec = 0; 2082 wait_time(targ->tic); 2083 nexttime = targ->tic; 2084 } 2085 2086 /* Only use the first queue. */ 2087 ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring); 2088 2089 while (!targ->cancel) { 2090 int64_t limit; 2091 unsigned int space; 2092 unsigned int head; 2093 int fcnt; 2094 uint16_t sum = 0; 2095 int rv; 2096 2097 if (!rate_limit) { 2098 budget = targ->g->burst; 2099 2100 } else if (budget <= 0) { 2101 budget = targ->g->burst; 2102 nexttime = timespec_add(nexttime, targ->g->tx_period); 2103 wait_time(nexttime); 2104 } 2105 2106 /* wait for available room in the send queue */ 2107 #ifdef BUSYWAIT 2108 (void)rv; 2109 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 2110 D("ioctl error on queue %d: %s", targ->me, 2111 strerror(errno)); 2112 goto quit; 2113 } 2114 #else /* !BUSYWAIT */ 2115 if ( (rv = poll(&pfd, 1, 2000)) <= 0) { 2116 if (targ->cancel) 2117 break; 2118 D("poll error on queue %d: %s", targ->me, 2119 rv ? strerror(errno) : "timeout"); 2120 // goto quit; 2121 } 2122 if (pfd.revents & POLLERR) { 2123 D("poll error on %d ring %d-%d", pfd.fd, 2124 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 2125 goto quit; 2126 } 2127 #endif /* !BUSYWAIT */ 2128 2129 /* If no room poll() again. */ 2130 space = nm_ring_space(ring); 2131 if (!space) { 2132 continue; 2133 } 2134 2135 limit = budget; 2136 2137 if (space < limit) { 2138 limit = space; 2139 } 2140 2141 /* Cut off ``limit`` to make sure is multiple of ``frags``. */ 2142 if (frags > 1) { 2143 limit = (limit / frags) * frags; 2144 } 2145 2146 limit = sent + limit; /* Convert to absolute. */ 2147 2148 for (fcnt = frags, head = ring->head; 2149 sent < limit; sent++, sequence++) { 2150 struct netmap_slot *slot = &ring->slot[head]; 2151 char *p = NETMAP_BUF(ring, slot->buf_idx); 2152 uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t; 2153 2154 memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum)); 2155 2156 slot->flags = 0; 2157 t = *w; 2158 PKT(pkt, body, targ->g->af)[0] = sequence >> 24; 2159 PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff; 2160 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2161 t = *++w; 2162 PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff; 2163 PKT(pkt, body, targ->g->af)[3] = sequence & 0xff; 2164 sum = ~cksum_add(~sum, cksum_add(~t, *w)); 2165 memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum)); 2166 nm_pkt_copy(frame, p, size); 2167 if (fcnt == frags) { 2168 update_addresses(pkt, targ); 2169 } 2170 2171 if (options & OPT_DUMP) { 2172 dump_payload(p, size, ring, head); 2173 } 2174 2175 slot->len = size; 2176 2177 if (--fcnt > 0) { 2178 slot->flags |= NS_MOREFRAG; 2179 } else { 2180 fcnt = frags; 2181 } 2182 2183 if (sent == limit - 1) { 2184 /* Make sure we don't push an incomplete 2185 * packet. */ 2186 assert(!(slot->flags & NS_MOREFRAG)); 2187 slot->flags |= NS_REPORT; 2188 } 2189 2190 head = nm_ring_next(ring, head); 2191 if (rate_limit) { 2192 budget--; 2193 } 2194 } 2195 2196 ring->cur = ring->head = head; 2197 2198 event ++; 2199 targ->ctr.pkts = sent; 2200 targ->ctr.bytes = sent * size; 2201 targ->ctr.events = event; 2202 } 2203 2204 /* flush any remaining packets */ 2205 D("flush tail %d head %d on thread %p", 2206 ring->tail, ring->head, 2207 (void *)pthread_self()); 2208 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2209 2210 /* final part: wait the TX queues to become empty. */ 2211 while (!targ->cancel && nm_tx_pending(ring)) { 2212 RD(5, "pending tx tail %d head %d on ring %d", 2213 ring->tail, ring->head, targ->nmd->first_tx_ring); 2214 ioctl(pfd.fd, NIOCTXSYNC, NULL); 2215 usleep(1); /* wait 1 tick */ 2216 } 2217 2218 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2219 targ->completed = 1; 2220 targ->ctr.pkts = sent; 2221 targ->ctr.bytes = sent * size; 2222 targ->ctr.events = event; 2223 quit: 2224 /* reset the ``used`` flag. */ 2225 targ->used = 0; 2226 2227 return (NULL); 2228 } 2229 2230 2231 static char * 2232 multi_slot_to_string(struct netmap_ring *ring, unsigned int head, 2233 unsigned int nfrags, char *strbuf, size_t strbuflen) 2234 { 2235 unsigned int f; 2236 char *ret = strbuf; 2237 2238 for (f = 0; f < nfrags; f++) { 2239 struct netmap_slot *slot = &ring->slot[head]; 2240 int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len, 2241 slot->flags); 2242 if (m >= (int)strbuflen) { 2243 break; 2244 } 2245 strbuf += m; 2246 strbuflen -= m; 2247 2248 head = nm_ring_next(ring, head); 2249 } 2250 2251 return ret; 2252 } 2253 2254 static void * 2255 rxseq_body(void *data) 2256 { 2257 struct targ *targ = (struct targ *) data; 2258 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 2259 int dump = targ->g->options & OPT_DUMP; 2260 struct netmap_ring *ring; 2261 unsigned int frags_exp = 1; 2262 struct my_ctrs cur; 2263 unsigned int frags = 0; 2264 int first_packet = 1; 2265 int first_slot = 1; 2266 int i, j, af, nrings; 2267 uint32_t seq, *seq_exp = NULL; 2268 2269 memset(&cur, 0, sizeof(cur)); 2270 2271 if (setaffinity(targ->thread, targ->affinity)) 2272 goto quit; 2273 2274 nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1; 2275 seq_exp = calloc(nrings, sizeof(uint32_t)); 2276 if (seq_exp == NULL) { 2277 D("failed to allocate seq array"); 2278 goto quit; 2279 } 2280 2281 D("reading from %s fd %d main_fd %d", 2282 targ->g->ifname, targ->fd, targ->g->main_fd); 2283 /* unbounded wait for the first packet. */ 2284 for (;!targ->cancel;) { 2285 i = poll(&pfd, 1, 1000); 2286 if (i > 0 && !(pfd.revents & POLLERR)) 2287 break; 2288 RD(1, "waiting for initial packets, poll returns %d %d", 2289 i, pfd.revents); 2290 } 2291 2292 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 2293 2294 2295 while (!targ->cancel) { 2296 unsigned int head; 2297 int limit; 2298 2299 #ifdef BUSYWAIT 2300 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 2301 D("ioctl error on queue %d: %s", targ->me, 2302 strerror(errno)); 2303 goto quit; 2304 } 2305 #else /* !BUSYWAIT */ 2306 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 2307 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2308 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 2309 goto out; 2310 } 2311 2312 if (pfd.revents & POLLERR) { 2313 D("poll err"); 2314 goto quit; 2315 } 2316 #endif /* !BUSYWAIT */ 2317 2318 for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) { 2319 ring = NETMAP_RXRING(targ->nmd->nifp, j); 2320 if (nm_ring_empty(ring)) 2321 continue; 2322 2323 limit = nm_ring_space(ring); 2324 if (limit > targ->g->burst) 2325 limit = targ->g->burst; 2326 2327 #if 0 2328 /* Enable this if 2329 * 1) we remove the early-return optimization from 2330 * the netmap poll implementation, or 2331 * 2) pipes get NS_MOREFRAG support. 2332 * With the current netmap implementation, an experiment like 2333 * pkt-gen -i vale:1{1 -f txseq -F 9 2334 * pkt-gen -i vale:1}1 -f rxseq 2335 * would get stuck as soon as we find nm_ring_space(ring) < 9, 2336 * since here limit is rounded to 0 and 2337 * pipe rxsync is not called anymore by the poll() of this loop. 2338 */ 2339 if (frags_exp > 1) { 2340 int o = limit; 2341 /* Cut off to the closest smaller multiple. */ 2342 limit = (limit / frags_exp) * frags_exp; 2343 RD(2, "LIMIT %d --> %d", o, limit); 2344 } 2345 #endif 2346 2347 for (head = ring->head, i = 0; i < limit; i++) { 2348 struct netmap_slot *slot = &ring->slot[head]; 2349 char *p = NETMAP_BUF(ring, slot->buf_idx); 2350 int len = slot->len; 2351 struct pkt *pkt; 2352 2353 if (dump) { 2354 dump_payload(p, slot->len, ring, head); 2355 } 2356 2357 frags++; 2358 if (!(slot->flags & NS_MOREFRAG)) { 2359 if (first_packet) { 2360 first_packet = 0; 2361 } else if (frags != frags_exp) { 2362 char prbuf[512]; 2363 RD(1, "Received packets with %u frags, " 2364 "expected %u, '%s'", frags, frags_exp, 2365 multi_slot_to_string(ring, head-frags+1, 2366 frags, 2367 prbuf, sizeof(prbuf))); 2368 } 2369 first_packet = 0; 2370 frags_exp = frags; 2371 frags = 0; 2372 } 2373 2374 p -= sizeof(pkt->vh) - targ->g->virt_header; 2375 len += sizeof(pkt->vh) - targ->g->virt_header; 2376 pkt = (struct pkt *)p; 2377 if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP) 2378 af = AF_INET; 2379 else 2380 af = AF_INET6; 2381 2382 if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) + 2383 sizeof(seq)) { 2384 RD(1, "%s: packet too small (len=%u)", __func__, 2385 slot->len); 2386 } else { 2387 seq = (PKT(pkt, body, af)[0] << 24) | 2388 (PKT(pkt, body, af)[1] << 16) | 2389 (PKT(pkt, body, af)[2] << 8) | 2390 PKT(pkt, body, af)[3]; 2391 if (first_slot) { 2392 /* Grab the first one, whatever it 2393 is. */ 2394 seq_exp[j] = seq; 2395 first_slot = 0; 2396 } else if (seq != seq_exp[j]) { 2397 uint32_t delta = seq - seq_exp[j]; 2398 2399 if (delta < (0xFFFFFFFF >> 1)) { 2400 RD(2, "Sequence GAP: exp %u found %u", 2401 seq_exp[j], seq); 2402 } else { 2403 RD(2, "Sequence OUT OF ORDER: " 2404 "exp %u found %u", seq_exp[j], seq); 2405 } 2406 seq_exp[j] = seq; 2407 } 2408 seq_exp[j]++; 2409 } 2410 2411 cur.bytes += slot->len; 2412 head = nm_ring_next(ring, head); 2413 cur.pkts++; 2414 } 2415 2416 ring->cur = ring->head = head; 2417 2418 cur.events++; 2419 targ->ctr = cur; 2420 } 2421 } 2422 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 2423 2424 #ifndef BUSYWAIT 2425 out: 2426 #endif /* !BUSYWAIT */ 2427 targ->completed = 1; 2428 targ->ctr = cur; 2429 2430 quit: 2431 if (seq_exp != NULL) 2432 free(seq_exp); 2433 /* reset the ``used`` flag. */ 2434 targ->used = 0; 2435 2436 return (NULL); 2437 } 2438 2439 2440 static void 2441 tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg) 2442 { 2443 double bw, raw_bw, pps, abs; 2444 char b1[40], b2[80], b3[80]; 2445 int size; 2446 2447 if (cur->pkts == 0) { 2448 printf("%s nothing.\n", msg); 2449 return; 2450 } 2451 2452 size = (int)(cur->bytes / cur->pkts); 2453 2454 printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n", 2455 msg, 2456 (unsigned long long)cur->pkts, 2457 (unsigned long long)cur->bytes, 2458 (unsigned long long)cur->events, size, delta); 2459 if (delta == 0) 2460 delta = 1e-6; 2461 if (size < 60) /* correct for min packet size */ 2462 size = 60; 2463 pps = cur->pkts / delta; 2464 bw = (8.0 * cur->bytes) / delta; 2465 raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta; 2466 abs = cur->pkts / (double)(cur->events); 2467 2468 printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n", 2469 norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs); 2470 } 2471 2472 static void 2473 usage(int errcode) 2474 { 2475 /* This usage is generated from the pkt-gen man page: 2476 * $ man pkt-gen > x 2477 * and pasted here adding the string terminators and endlines with simple 2478 * regular expressions. */ 2479 const char *cmd = "pkt-gen"; 2480 fprintf(stderr, 2481 "Usage:\n" 2482 "%s arguments\n" 2483 " -h Show program usage and exit.\n" 2484 "\n" 2485 " -i interface\n" 2486 " Name of the network interface that pkt-gen operates on. It can be a system network interface\n" 2487 " (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n" 2488 " monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n" 2489 " mented in netmap(4) (NIOCREGIF section).\n" 2490 "\n" 2491 " -f function\n" 2492 " The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n" 2493 " for client-side ping-pong operation, and pong for server-side ping-pong operation.\n" 2494 "\n" 2495 " -n count\n" 2496 " Number of iterations of the pkt-gen function (with 0 meaning infinite). In case of tx or rx,\n" 2497 " count is the number of packets to receive or transmit. In case of ping or pong, count is the\n" 2498 " number of ping-pong transactions.\n" 2499 "\n" 2500 " -l pkt_size\n" 2501 " Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n" 2502 " equal than the second one and lower than the first one.\n" 2503 "\n" 2504 " -b burst_size\n" 2505 " Transmit or receive up to burst_size packets at a time.\n" 2506 "\n" 2507 " -4 Use IPv4 addresses.\n" 2508 "\n" 2509 " -6 Use IPv6 addresses.\n" 2510 "\n" 2511 " -d dst_ip[:port[-dst_ip:port]]\n" 2512 " Destination IPv4/IPv6 address and port, single or range.\n" 2513 "\n" 2514 " -s src_ip[:port[-src_ip:port]]\n" 2515 " Source IPv4/IPv6 address and port, single or range.\n" 2516 "\n" 2517 " -D dst_mac\n" 2518 " Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n" 2519 "\n" 2520 " -S src_mac\n" 2521 " Source MAC address in colon notation.\n" 2522 "\n" 2523 " -a cpu_id\n" 2524 " Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n" 2525 " threads are used, they are pinned to the subsequent CPUs, one per thread.\n" 2526 "\n" 2527 " -c cpus\n" 2528 " Maximum number of CPUs to use (0 means to use all the available ones).\n" 2529 "\n" 2530 " -p threads\n" 2531 " Number of threads to use. By default, only a single thread is used to handle all the netmap\n" 2532 " rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n" 2533 " single RX ring (in rx mode), or a TX/RX ring pair. The number of threads must be less than or\n" 2534 " equal to the number of TX (or RX) rings available in the device specified by interface.\n" 2535 "\n" 2536 " -T report_ms\n" 2537 " Number of milliseconds between reports.\n" 2538 "\n" 2539 " -w wait_for_link_time\n" 2540 " Number of seconds to wait before starting the pkt-gen function, useful to make sure that the\n" 2541 " network link is up. A network device driver may take some time to enter netmap mode, or to\n" 2542 " create a new transmit/receive ring pair when netmap(4) requests one.\n" 2543 "\n" 2544 " -R rate\n" 2545 " Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n" 2546 " packets as quickly as possible. On servers from 2010 onward netmap(4) is able to com-\n" 2547 " pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n" 2548 " your intention is to saturate the link.\n" 2549 "\n" 2550 " -X Dump payload of each packet transmitted or received.\n" 2551 "\n" 2552 " -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n" 2553 " only used with Virtual Machine technologies that use virtio as a network interface.\n" 2554 "\n" 2555 " -P file\n" 2556 " Load the packet to be transmitted from a pcap file rather than constructing it within\n" 2557 " pkt-gen.\n" 2558 "\n" 2559 " -z Use random IPv4/IPv6 src address/port.\n" 2560 "\n" 2561 " -Z Use random IPv4/IPv6 dst address/port.\n" 2562 "\n" 2563 " -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n" 2564 "\n" 2565 " -F num_frags\n" 2566 " Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n" 2567 " sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n" 2568 " last slot). This is useful to transmit or receive packets larger than the netmap buffer\n" 2569 " size.\n" 2570 "\n" 2571 " -M frag_size\n" 2572 " In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n" 2573 " length divided by num_frags.\n" 2574 "\n" 2575 " -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n" 2576 " by setting the NS_INDIRECT flag in the netmap slots.\n" 2577 "\n" 2578 " -W Exit immediately if all the RX rings are empty the first time they are examined.\n" 2579 "\n" 2580 " -v Increase the verbosity level.\n" 2581 "\n" 2582 " -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n" 2583 " netmap buffers is (rubbish mode).\n" 2584 "\n" 2585 " -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n" 2586 "\n" 2587 " -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n" 2588 " of CRC and 20 bytes of framing to each packet.\n" 2589 "\n" 2590 " -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n" 2591 " Configuration in terms of number of rings and slots to be used when opening the netmap port.\n" 2592 " Such configuration has an effect on software ports created on the fly, such as VALE ports and\n" 2593 " netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n" 2594 " rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n" 2595 " additional convenience, if exactly one number is specified, then this is assigned to both\n" 2596 " tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n" 2597 " tx_rings and rx_rings.\n" 2598 "\n" 2599 " -o options data generation options (parsed using atoi)\n" 2600 " OPT_PREFETCH 1\n" 2601 " OPT_ACCESS 2\n" 2602 " OPT_COPY 4\n" 2603 " OPT_MEMCPY 8\n" 2604 " OPT_TS 16 (add a timestamp)\n" 2605 " OPT_INDIRECT 32 (use indirect buffers)\n" 2606 " OPT_DUMP 64 (dump rx/tx traffic)\n" 2607 " OPT_RUBBISH 256\n" 2608 " (send whatever the buffers contain)\n" 2609 " OPT_RANDOM_SRC 512\n" 2610 " OPT_RANDOM_DST 1024\n" 2611 " OPT_PPS_STATS 2048\n" 2612 " OPT_UPDATE_CSUM 4096\n" 2613 "", 2614 cmd); 2615 exit(errcode); 2616 } 2617 2618 static int 2619 start_threads(struct glob_arg *g) { 2620 int i; 2621 2622 targs = calloc(g->nthreads, sizeof(*targs)); 2623 struct targ *t; 2624 /* 2625 * Now create the desired number of threads, each one 2626 * using a single descriptor. 2627 */ 2628 for (i = 0; i < g->nthreads; i++) { 2629 uint64_t seed = (uint64_t)time(0) | ((uint64_t)time(0) << 32); 2630 t = &targs[i]; 2631 2632 bzero(t, sizeof(*t)); 2633 t->fd = -1; /* default, with pcap */ 2634 t->g = g; 2635 memcpy(t->seed, &seed, sizeof(t->seed)); 2636 2637 if (g->dev_type == DEV_NETMAP) { 2638 int m = -1; 2639 2640 /* 2641 * if the user wants both HW and SW rings, we need to 2642 * know when to switch from NR_REG_ONE_NIC to NR_REG_ONE_SW 2643 */ 2644 if (g->orig_mode == NR_REG_NIC_SW) { 2645 m = (g->td_type == TD_TYPE_RECEIVER ? 2646 g->nmd->reg.nr_rx_rings : 2647 g->nmd->reg.nr_tx_rings); 2648 } 2649 2650 if (i > 0) { 2651 int j; 2652 /* the first thread uses the fd opened by the main 2653 * thread, the other threads re-open /dev/netmap 2654 */ 2655 t->nmd = nmport_clone(g->nmd); 2656 if (t->nmd == NULL) 2657 return -1; 2658 2659 j = i; 2660 if (m > 0 && j >= m) { 2661 /* switch to the software rings */ 2662 t->nmd->reg.nr_mode = NR_REG_ONE_SW; 2663 j -= m; 2664 } 2665 t->nmd->reg.nr_ringid = j & NETMAP_RING_MASK; 2666 /* Only touch one of the rings (rx is already ok) */ 2667 if (g->td_type == TD_TYPE_RECEIVER) 2668 t->nmd->reg.nr_flags |= NETMAP_NO_TX_POLL; 2669 2670 /* register interface. Override ifname and ringid etc. */ 2671 if (nmport_open_desc(t->nmd) < 0) { 2672 nmport_undo_prepare(t->nmd); 2673 t->nmd = NULL; 2674 return -1; 2675 } 2676 } else { 2677 t->nmd = g->nmd; 2678 } 2679 t->fd = t->nmd->fd; 2680 t->frags = g->frags; 2681 } else { 2682 targs[i].fd = g->main_fd; 2683 } 2684 t->used = 1; 2685 t->me = i; 2686 if (g->affinity >= 0) { 2687 t->affinity = (g->affinity + i) % g->cpus; 2688 } else { 2689 t->affinity = -1; 2690 } 2691 /* default, init packets */ 2692 initialize_packet(t); 2693 } 2694 /* Wait for PHY reset. */ 2695 D("Wait %d secs for phy reset", g->wait_link); 2696 sleep(g->wait_link); 2697 D("Ready..."); 2698 2699 for (i = 0; i < g->nthreads; i++) { 2700 t = &targs[i]; 2701 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 2702 D("Unable to create thread %d: %s", i, strerror(errno)); 2703 t->used = 0; 2704 } 2705 } 2706 return 0; 2707 } 2708 2709 static void 2710 main_thread(struct glob_arg *g) 2711 { 2712 int i; 2713 2714 struct my_ctrs prev, cur; 2715 double delta_t; 2716 struct timeval tic, toc; 2717 2718 prev.pkts = prev.bytes = prev.events = 0; 2719 gettimeofday(&prev.t, NULL); 2720 for (;;) { 2721 char b1[40], b2[40], b3[40], b4[100]; 2722 uint64_t pps, usec; 2723 struct my_ctrs x; 2724 double abs; 2725 int done = 0; 2726 2727 usec = wait_for_next_report(&prev.t, &cur.t, 2728 g->report_interval); 2729 2730 cur.pkts = cur.bytes = cur.events = 0; 2731 cur.min_space = 0; 2732 if (usec < 10000) /* too short to be meaningful */ 2733 continue; 2734 /* accumulate counts for all threads */ 2735 for (i = 0; i < g->nthreads; i++) { 2736 cur.pkts += targs[i].ctr.pkts; 2737 cur.bytes += targs[i].ctr.bytes; 2738 cur.events += targs[i].ctr.events; 2739 cur.min_space += targs[i].ctr.min_space; 2740 targs[i].ctr.min_space = 99999; 2741 if (targs[i].used == 0) 2742 done++; 2743 } 2744 x.pkts = cur.pkts - prev.pkts; 2745 x.bytes = cur.bytes - prev.bytes; 2746 x.events = cur.events - prev.events; 2747 pps = (x.pkts*1000000 + usec/2) / usec; 2748 abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0; 2749 2750 if (!(g->options & OPT_PPS_STATS)) { 2751 strcpy(b4, ""); 2752 } else { 2753 /* Compute some pps stats using a sliding window. */ 2754 double ppsavg = 0.0, ppsdev = 0.0; 2755 int nsamples = 0; 2756 2757 g->win[g->win_idx] = pps; 2758 g->win_idx = (g->win_idx + 1) % STATS_WIN; 2759 2760 for (i = 0; i < STATS_WIN; i++) { 2761 ppsavg += g->win[i]; 2762 if (g->win[i]) { 2763 nsamples ++; 2764 } 2765 } 2766 ppsavg /= nsamples; 2767 2768 for (i = 0; i < STATS_WIN; i++) { 2769 if (g->win[i] == 0) { 2770 continue; 2771 } 2772 ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg); 2773 } 2774 ppsdev /= nsamples; 2775 ppsdev = sqrt(ppsdev); 2776 2777 snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]", 2778 norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize)); 2779 } 2780 2781 D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space", 2782 norm(b1, pps, normalize), b4, 2783 norm(b2, (double)x.pkts, normalize), 2784 norm(b3, 1000000*((double)x.bytes*8+(double)x.pkts*g->framing)/usec, normalize), 2785 (unsigned long long)usec, 2786 abs, (int)cur.min_space); 2787 prev = cur; 2788 2789 if (done == g->nthreads) 2790 break; 2791 } 2792 2793 timerclear(&tic); 2794 timerclear(&toc); 2795 cur.pkts = cur.bytes = cur.events = 0; 2796 /* final round */ 2797 for (i = 0; i < g->nthreads; i++) { 2798 struct timespec t_tic, t_toc; 2799 /* 2800 * Join active threads, unregister interfaces and close 2801 * file descriptors. 2802 */ 2803 if (targs[i].used) 2804 pthread_join(targs[i].thread, NULL); /* blocking */ 2805 if (g->dev_type == DEV_NETMAP) { 2806 nmport_close(targs[i].nmd); 2807 targs[i].nmd = NULL; 2808 } else { 2809 close(targs[i].fd); 2810 } 2811 2812 if (targs[i].completed == 0) 2813 D("ouch, thread %d exited with error", i); 2814 2815 /* 2816 * Collect threads output and extract information about 2817 * how long it took to send all the packets. 2818 */ 2819 cur.pkts += targs[i].ctr.pkts; 2820 cur.bytes += targs[i].ctr.bytes; 2821 cur.events += targs[i].ctr.events; 2822 /* collect the largest start (tic) and end (toc) times, 2823 * XXX maybe we should do the earliest tic, or do a weighted 2824 * average ? 2825 */ 2826 t_tic = timeval2spec(&tic); 2827 t_toc = timeval2spec(&toc); 2828 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 2829 tic = timespec2val(&targs[i].tic); 2830 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 2831 toc = timespec2val(&targs[i].toc); 2832 } 2833 2834 /* print output. */ 2835 timersub(&toc, &tic, &toc); 2836 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 2837 if (g->td_type == TD_TYPE_SENDER) 2838 tx_output(g, &cur, delta_t, "Sent"); 2839 else if (g->td_type == TD_TYPE_RECEIVER) 2840 tx_output(g, &cur, delta_t, "Received"); 2841 } 2842 2843 struct td_desc { 2844 int ty; 2845 const char *key; 2846 void *f; 2847 int default_burst; 2848 }; 2849 2850 static struct td_desc func[] = { 2851 { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */ 2852 { TD_TYPE_SENDER, "tx", sender_body, 512 }, 2853 { TD_TYPE_OTHER, "ping", ping_body, 1 }, 2854 { TD_TYPE_OTHER, "pong", pong_body, 1 }, 2855 { TD_TYPE_SENDER, "txseq", txseq_body, 512 }, 2856 { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 }, 2857 { 0, NULL, NULL, 0 } 2858 }; 2859 2860 static int 2861 tap_alloc(char *dev) 2862 { 2863 struct ifreq ifr; 2864 int fd, err; 2865 const char *clonedev = TAP_CLONEDEV; 2866 2867 (void)err; 2868 (void)dev; 2869 /* Arguments taken by the function: 2870 * 2871 * char *dev: the name of an interface (or '\0'). MUST have enough 2872 * space to hold the interface name if '\0' is passed 2873 * int flags: interface flags (eg, IFF_TUN etc.) 2874 */ 2875 2876 #ifdef __FreeBSD__ 2877 if (dev[3]) { /* tapSomething */ 2878 static char buf[128]; 2879 snprintf(buf, sizeof(buf), "/dev/%s", dev); 2880 clonedev = buf; 2881 } 2882 #endif 2883 /* open the device */ 2884 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 2885 return fd; 2886 } 2887 D("%s open successful", clonedev); 2888 2889 /* preparation of the struct ifr, of type "struct ifreq" */ 2890 memset(&ifr, 0, sizeof(ifr)); 2891 2892 #ifdef linux 2893 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 2894 2895 if (*dev) { 2896 /* if a device name was specified, put it in the structure; otherwise, 2897 * the kernel will try to allocate the "next" device of the 2898 * specified type */ 2899 size_t len = strlen(dev); 2900 if (len > IFNAMSIZ) { 2901 D("%s too long", dev); 2902 return -1; 2903 } 2904 memcpy(ifr.ifr_name, dev, len); 2905 } 2906 2907 /* try to create the device */ 2908 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 2909 D("failed to do a TUNSETIFF: %s", strerror(errno)); 2910 close(fd); 2911 return err; 2912 } 2913 2914 /* if the operation was successful, write back the name of the 2915 * interface to the variable "dev", so the caller can know 2916 * it. Note that the caller MUST reserve space in *dev (see calling 2917 * code below) */ 2918 strcpy(dev, ifr.ifr_name); 2919 D("new name is %s", dev); 2920 #endif /* linux */ 2921 2922 /* this is the special file descriptor that the caller will use to talk 2923 * with the virtual interface */ 2924 return fd; 2925 } 2926 2927 int 2928 main(int arc, char **argv) 2929 { 2930 int i; 2931 struct sigaction sa; 2932 sigset_t ss; 2933 2934 struct glob_arg g; 2935 2936 int ch; 2937 int devqueues = 1; /* how many device queues */ 2938 int wait_link_arg = 0; 2939 2940 int pkt_size_done = 0; 2941 2942 struct td_desc *fn = func; 2943 2944 bzero(&g, sizeof(g)); 2945 2946 g.main_fd = -1; 2947 g.td_body = fn->f; 2948 g.td_type = fn->ty; 2949 g.report_interval = 1000; /* report interval */ 2950 g.affinity = -1; 2951 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 2952 g.af = AF_INET; /* default */ 2953 g.src_ip.name = "10.0.0.1"; 2954 g.dst_ip.name = "10.1.0.1"; 2955 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 2956 g.src_mac.name = NULL; 2957 g.pkt_size = 60; 2958 g.pkt_min_size = 0; 2959 g.nthreads = 1; 2960 g.cpus = 1; /* default */ 2961 g.forever = 1; 2962 g.tx_rate = 0; 2963 g.frags = 1; 2964 g.frag_size = (u_int)-1; /* use the netmap buffer size by default */ 2965 g.nmr_config = ""; 2966 g.virt_header = 0; 2967 g.wait_link = 2; /* wait 2 seconds for physical ports */ 2968 2969 while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:" 2970 "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) { 2971 2972 switch(ch) { 2973 default: 2974 D("bad option %c %s", ch, optarg); 2975 usage(-1); 2976 break; 2977 2978 case 'h': 2979 usage(0); 2980 break; 2981 2982 case '4': 2983 g.af = AF_INET; 2984 break; 2985 2986 case '6': 2987 g.af = AF_INET6; 2988 break; 2989 2990 case 'N': 2991 normalize = 0; 2992 break; 2993 2994 case 'n': 2995 g.npackets = strtoull(optarg, NULL, 10); 2996 break; 2997 2998 case 'F': 2999 i = atoi(optarg); 3000 if (i < 1 || i > 63) { 3001 D("invalid frags %d [1..63], ignore", i); 3002 break; 3003 } 3004 g.frags = i; 3005 break; 3006 3007 case 'M': 3008 g.frag_size = atoi(optarg); 3009 break; 3010 3011 case 'f': 3012 for (fn = func; fn->key; fn++) { 3013 if (!strcmp(fn->key, optarg)) 3014 break; 3015 } 3016 if (fn->key) { 3017 g.td_body = fn->f; 3018 g.td_type = fn->ty; 3019 } else { 3020 D("unrecognised function %s", optarg); 3021 } 3022 break; 3023 3024 case 'o': /* data generation options */ 3025 g.options |= atoi(optarg); 3026 break; 3027 3028 case 'a': /* force affinity */ 3029 g.affinity = atoi(optarg); 3030 break; 3031 3032 case 'i': /* interface */ 3033 /* a prefix of tap: netmap: or pcap: forces the mode. 3034 * otherwise we guess 3035 */ 3036 D("interface is %s", optarg); 3037 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 3038 D("ifname too long %s", optarg); 3039 break; 3040 } 3041 strcpy(g.ifname, optarg); 3042 if (!strcmp(optarg, "null")) { 3043 g.dev_type = DEV_NETMAP; 3044 g.dummy_send = 1; 3045 } else if (!strncmp(optarg, "tap:", 4)) { 3046 g.dev_type = DEV_TAP; 3047 strcpy(g.ifname, optarg + 4); 3048 } else if (!strncmp(optarg, "pcap:", 5)) { 3049 g.dev_type = DEV_PCAP; 3050 strcpy(g.ifname, optarg + 5); 3051 } else if (!strncmp(optarg, "netmap:", 7) || 3052 !strncmp(optarg, "vale", 4)) { 3053 g.dev_type = DEV_NETMAP; 3054 } else if (!strncmp(optarg, "tap", 3)) { 3055 g.dev_type = DEV_TAP; 3056 } else { /* prepend netmap: */ 3057 g.dev_type = DEV_NETMAP; 3058 sprintf(g.ifname, "netmap:%s", optarg); 3059 } 3060 break; 3061 3062 case 'I': 3063 g.options |= OPT_INDIRECT; /* use indirect buffers */ 3064 break; 3065 3066 case 'l': /* pkt_size */ 3067 if (pkt_size_done) { 3068 g.pkt_min_size = atoi(optarg); 3069 } else { 3070 g.pkt_size = atoi(optarg); 3071 pkt_size_done = 1; 3072 } 3073 break; 3074 3075 case 'd': 3076 g.dst_ip.name = optarg; 3077 break; 3078 3079 case 's': 3080 g.src_ip.name = optarg; 3081 break; 3082 3083 case 'T': /* report interval */ 3084 g.report_interval = atoi(optarg); 3085 break; 3086 3087 case 'w': 3088 g.wait_link = atoi(optarg); 3089 wait_link_arg = 1; 3090 break; 3091 3092 case 'W': 3093 g.forever = 0; /* exit RX with no traffic */ 3094 break; 3095 3096 case 'b': /* burst */ 3097 g.burst = atoi(optarg); 3098 break; 3099 case 'c': 3100 g.cpus = atoi(optarg); 3101 break; 3102 case 'p': 3103 g.nthreads = atoi(optarg); 3104 break; 3105 3106 case 'D': /* destination mac */ 3107 g.dst_mac.name = optarg; 3108 break; 3109 3110 case 'S': /* source mac */ 3111 g.src_mac.name = optarg; 3112 break; 3113 case 'v': 3114 verbose++; 3115 break; 3116 case 'R': 3117 g.tx_rate = atoi(optarg); 3118 break; 3119 case 'X': 3120 g.options |= OPT_DUMP; 3121 break; 3122 case 'C': 3123 D("WARNING: the 'C' option is deprecated, use the '+conf:' libnetmap option instead"); 3124 g.nmr_config = strdup(optarg); 3125 break; 3126 case 'H': 3127 g.virt_header = atoi(optarg); 3128 break; 3129 case 'P': 3130 g.packet_file = strdup(optarg); 3131 break; 3132 case 'r': 3133 g.options |= OPT_RUBBISH; 3134 break; 3135 case 'z': 3136 g.options |= OPT_RANDOM_SRC; 3137 break; 3138 case 'Z': 3139 g.options |= OPT_RANDOM_DST; 3140 break; 3141 case 'A': 3142 g.options |= OPT_PPS_STATS; 3143 break; 3144 case 'B': 3145 /* raw packets have4 bytes crc + 20 bytes framing */ 3146 // XXX maybe add an option to pass the IFG 3147 g.framing = 24 * 8; 3148 break; 3149 } 3150 } 3151 3152 if (strlen(g.ifname) <=0 ) { 3153 D("missing ifname"); 3154 usage(-1); 3155 } 3156 3157 if (g.burst == 0) { 3158 g.burst = fn->default_burst; 3159 D("using default burst size: %d", g.burst); 3160 } 3161 3162 g.system_cpus = i = system_ncpus(); 3163 if (g.cpus < 0 || g.cpus > i) { 3164 D("%d cpus is too high, have only %d cpus", g.cpus, i); 3165 usage(-1); 3166 } 3167 D("running on %d cpus (have %d)", g.cpus, i); 3168 if (g.cpus == 0) 3169 g.cpus = i; 3170 3171 if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) { 3172 g.wait_link = 0; 3173 } 3174 3175 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 3176 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 3177 usage(-1); 3178 } 3179 3180 if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) { 3181 D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size); 3182 usage(-1); 3183 } 3184 3185 if (g.src_mac.name == NULL) { 3186 static char mybuf[20] = "00:00:00:00:00:00"; 3187 /* retrieve source mac address. */ 3188 if (source_hwaddr(g.ifname, mybuf) == -1) { 3189 D("Unable to retrieve source mac"); 3190 // continue, fail later 3191 } 3192 g.src_mac.name = mybuf; 3193 } 3194 /* extract address ranges */ 3195 if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac)) 3196 usage(-1); 3197 g.options |= extract_ip_range(&g.src_ip, g.af); 3198 g.options |= extract_ip_range(&g.dst_ip, g.af); 3199 3200 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 3201 && g.virt_header != VIRT_HDR_2) { 3202 D("bad virtio-net-header length"); 3203 usage(-1); 3204 } 3205 3206 if (g.dev_type == DEV_TAP) { 3207 D("want to use tap %s", g.ifname); 3208 g.main_fd = tap_alloc(g.ifname); 3209 if (g.main_fd < 0) { 3210 D("cannot open tap %s", g.ifname); 3211 usage(-1); 3212 } 3213 #ifndef NO_PCAP 3214 } else if (g.dev_type == DEV_PCAP) { 3215 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 3216 3217 pcap_errbuf[0] = '\0'; // init the buffer 3218 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 3219 if (g.p == NULL) { 3220 D("cannot open pcap on %s", g.ifname); 3221 usage(-1); 3222 } 3223 g.main_fd = pcap_fileno(g.p); 3224 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 3225 #endif /* !NO_PCAP */ 3226 } else if (g.dummy_send) { /* but DEV_NETMAP */ 3227 D("using a dummy send routine"); 3228 } else { 3229 g.nmd = nmport_prepare(g.ifname); 3230 if (g.nmd == NULL) 3231 goto out; 3232 3233 parse_nmr_config(g.nmr_config, &g.nmd->reg); 3234 3235 g.nmd->reg.nr_flags |= NR_ACCEPT_VNET_HDR; 3236 3237 /* 3238 * Open the netmap device using nm_open(). 3239 * 3240 * protocol stack and may cause a reset of the card, 3241 * which in turn may take some time for the PHY to 3242 * reconfigure. We do the open here to have time to reset. 3243 */ 3244 g.orig_mode = g.nmd->reg.nr_mode; 3245 if (g.nthreads > 1) { 3246 switch (g.orig_mode) { 3247 case NR_REG_ALL_NIC: 3248 case NR_REG_NIC_SW: 3249 g.nmd->reg.nr_mode = NR_REG_ONE_NIC; 3250 break; 3251 case NR_REG_SW: 3252 g.nmd->reg.nr_mode = NR_REG_ONE_SW; 3253 break; 3254 default: 3255 break; 3256 } 3257 g.nmd->reg.nr_ringid = 0; 3258 } 3259 if (nmport_open_desc(g.nmd) < 0) 3260 goto out; 3261 g.main_fd = g.nmd->fd; 3262 ND("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10), 3263 g.nmd->mem); 3264 3265 if (g.virt_header) { 3266 /* Set the virtio-net header length, since the user asked 3267 * for it explicitly. */ 3268 set_vnet_hdr_len(&g); 3269 } else { 3270 /* Check whether the netmap port we opened requires us to send 3271 * and receive frames with virtio-net header. */ 3272 get_vnet_hdr_len(&g); 3273 } 3274 3275 /* get num of queues in tx or rx */ 3276 if (g.td_type == TD_TYPE_SENDER) 3277 devqueues = g.nmd->reg.nr_tx_rings + g.nmd->reg.nr_host_tx_rings; 3278 else 3279 devqueues = g.nmd->reg.nr_rx_rings + g.nmd->reg.nr_host_rx_rings; 3280 3281 /* validate provided nthreads. */ 3282 if (g.nthreads < 1 || g.nthreads > devqueues) { 3283 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 3284 // continue, fail later 3285 } 3286 3287 if (g.td_type == TD_TYPE_SENDER) { 3288 int mtu = get_if_mtu(&g); 3289 3290 if (mtu > 0 && g.pkt_size > mtu) { 3291 D("pkt_size (%d) must be <= mtu (%d)", 3292 g.pkt_size, mtu); 3293 return -1; 3294 } 3295 } 3296 3297 if (verbose) { 3298 struct netmap_if *nifp = g.nmd->nifp; 3299 struct nmreq_register *req = &g.nmd->reg; 3300 3301 D("nifp at offset %"PRIu64" ntxqs %d nrxqs %d memid %d", 3302 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 3303 req->nr_mem_id); 3304 for (i = 0; i < req->nr_tx_rings + req->nr_host_tx_rings; i++) { 3305 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 3306 D(" TX%d at offset %p slots %d", i, 3307 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3308 } 3309 for (i = 0; i < req->nr_rx_rings + req->nr_host_rx_rings; i++) { 3310 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 3311 D(" RX%d at offset %p slots %d", i, 3312 (void *)((char *)ring - (char *)nifp), ring->num_slots); 3313 } 3314 } 3315 3316 /* Print some debug information. */ 3317 fprintf(stdout, 3318 "%s %s: %d queues, %d threads and %d cpus.\n", 3319 (g.td_type == TD_TYPE_SENDER) ? "Sending on" : 3320 ((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" : 3321 "Working on"), 3322 g.ifname, 3323 devqueues, 3324 g.nthreads, 3325 g.cpus); 3326 if (g.td_type == TD_TYPE_SENDER) { 3327 fprintf(stdout, "%s -> %s (%s -> %s)\n", 3328 g.src_ip.name, g.dst_ip.name, 3329 g.src_mac.name, g.dst_mac.name); 3330 } 3331 3332 out: 3333 /* Exit if something went wrong. */ 3334 if (g.main_fd < 0) { 3335 D("aborting"); 3336 usage(-1); 3337 } 3338 } 3339 3340 3341 if (g.options) { 3342 D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n", 3343 g.options & OPT_PREFETCH ? " prefetch" : "", 3344 g.options & OPT_ACCESS ? " access" : "", 3345 g.options & OPT_MEMCPY ? " memcpy" : "", 3346 g.options & OPT_INDIRECT ? " indirect" : "", 3347 g.options & OPT_COPY ? " copy" : "", 3348 g.options & OPT_RUBBISH ? " rubbish " : ""); 3349 } 3350 3351 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 3352 if (g.tx_rate > 0) { 3353 /* try to have at least something every second, 3354 * reducing the burst size to some 0.01s worth of data 3355 * (but no less than one full set of fragments) 3356 */ 3357 uint64_t x; 3358 int lim = (g.tx_rate)/300; 3359 if (g.burst > lim) 3360 g.burst = lim; 3361 if (g.burst == 0) 3362 g.burst = 1; 3363 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 3364 g.tx_period.tv_nsec = x; 3365 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 3366 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 3367 } 3368 if (g.td_type == TD_TYPE_SENDER) 3369 D("Sending %d packets every %jd.%09ld s", 3370 g.burst, (intmax_t)g.tx_period.tv_sec, g.tx_period.tv_nsec); 3371 /* Install ^C handler. */ 3372 global_nthreads = g.nthreads; 3373 sigemptyset(&ss); 3374 sigaddset(&ss, SIGINT); 3375 /* block SIGINT now, so that all created threads will inherit the mask */ 3376 if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) { 3377 D("failed to block SIGINT: %s", strerror(errno)); 3378 } 3379 if (start_threads(&g) < 0) 3380 return 1; 3381 /* Install the handler and re-enable SIGINT for the main thread */ 3382 memset(&sa, 0, sizeof(sa)); 3383 sa.sa_handler = sigint_h; 3384 if (sigaction(SIGINT, &sa, NULL) < 0) { 3385 D("failed to install ^C handler: %s", strerror(errno)); 3386 } 3387 3388 if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) { 3389 D("failed to re-enable SIGINT: %s", strerror(errno)); 3390 } 3391 main_thread(&g); 3392 free(targs); 3393 return 0; 3394 } 3395 3396 /* end of file */ 3397