1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40 #define _GNU_SOURCE /* for CPU_SET() */ 41 #include <stdio.h> 42 #define NETMAP_WITH_LIBS 43 #include <net/netmap_user.h> 44 45 46 #include <ctype.h> // isprint() 47 #include <unistd.h> // sysconf() 48 #include <sys/poll.h> 49 #include <arpa/inet.h> /* ntohs */ 50 #ifndef _WIN32 51 #include <sys/sysctl.h> /* sysctl */ 52 #endif 53 #include <ifaddrs.h> /* getifaddrs */ 54 #include <net/ethernet.h> 55 #include <netinet/in.h> 56 #include <netinet/ip.h> 57 #include <netinet/udp.h> 58 #include <assert.h> 59 #include <math.h> 60 61 #include <pthread.h> 62 63 #ifndef NO_PCAP 64 #include <pcap/pcap.h> 65 #endif 66 67 #include "ctrs.h" 68 69 #ifdef _WIN32 70 #define cpuset_t DWORD_PTR //uint64_t 71 static inline void CPU_ZERO(cpuset_t *p) 72 { 73 *p = 0; 74 } 75 76 static inline void CPU_SET(uint32_t i, cpuset_t *p) 77 { 78 *p |= 1<< (i & 0x3f); 79 } 80 81 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0) 82 #define TAP_CLONEDEV "/dev/tap" 83 #define AF_LINK 18 //defined in winsocks.h 84 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 85 #include <net/if_dl.h> 86 87 /* 88 * Convert an ASCII representation of an ethernet address to 89 * binary form. 90 */ 91 struct ether_addr * 92 ether_aton(const char *a) 93 { 94 int i; 95 static struct ether_addr o; 96 unsigned int o0, o1, o2, o3, o4, o5; 97 98 i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); 99 100 if (i != 6) 101 return (NULL); 102 103 o.octet[0]=o0; 104 o.octet[1]=o1; 105 o.octet[2]=o2; 106 o.octet[3]=o3; 107 o.octet[4]=o4; 108 o.octet[5]=o5; 109 110 return ((struct ether_addr *)&o); 111 } 112 113 /* 114 * Convert a binary representation of an ethernet address to 115 * an ASCII string. 116 */ 117 char * 118 ether_ntoa(const struct ether_addr *n) 119 { 120 int i; 121 static char a[18]; 122 123 i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x", 124 n->octet[0], n->octet[1], n->octet[2], 125 n->octet[3], n->octet[4], n->octet[5]); 126 return (i < 17 ? NULL : (char *)&a); 127 } 128 #endif /* _WIN32 */ 129 130 #ifdef linux 131 132 #define cpuset_t cpu_set_t 133 134 #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 135 #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 136 #include <linux/ethtool.h> 137 #include <linux/sockios.h> 138 139 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 140 #include <netinet/ether.h> /* ether_aton */ 141 #include <linux/if_packet.h> /* sockaddr_ll */ 142 #endif /* linux */ 143 144 #ifdef __FreeBSD__ 145 #include <sys/endian.h> /* le64toh */ 146 #include <machine/param.h> 147 148 #include <pthread_np.h> /* pthread w/ affinity */ 149 #include <sys/cpuset.h> /* cpu_set */ 150 #include <net/if_dl.h> /* LLADDR */ 151 #endif /* __FreeBSD__ */ 152 153 #ifdef __APPLE__ 154 155 #define cpuset_t uint64_t // XXX 156 static inline void CPU_ZERO(cpuset_t *p) 157 { 158 *p = 0; 159 } 160 161 static inline void CPU_SET(uint32_t i, cpuset_t *p) 162 { 163 *p |= 1<< (i & 0x3f); 164 } 165 166 #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 167 168 #define ifr_flagshigh ifr_flags // XXX 169 #define IFF_PPROMISC IFF_PROMISC 170 #include <net/if_dl.h> /* LLADDR */ 171 #define clock_gettime(a,b) \ 172 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 173 #endif /* __APPLE__ */ 174 175 const char *default_payload="netmap pkt-gen DIRECT payload\n" 176 "http://info.iet.unipi.it/~luigi/netmap/ "; 177 178 const char *indirect_payload="netmap pkt-gen indirect payload\n" 179 "http://info.iet.unipi.it/~luigi/netmap/ "; 180 181 int verbose = 0; 182 183 #define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ 184 185 186 #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 187 #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 188 #define VIRT_HDR_MAX VIRT_HDR_2 189 struct virt_header { 190 uint8_t fields[VIRT_HDR_MAX]; 191 }; 192 193 #define MAX_BODYSIZE 16384 194 195 struct pkt { 196 struct virt_header vh; 197 struct ether_header eh; 198 struct ip ip; 199 struct udphdr udp; 200 uint8_t body[MAX_BODYSIZE]; // XXX hardwired 201 } __attribute__((__packed__)); 202 203 struct ip_range { 204 char *name; 205 uint32_t start, end; /* same as struct in_addr */ 206 uint16_t port0, port1; 207 }; 208 209 struct mac_range { 210 char *name; 211 struct ether_addr start, end; 212 }; 213 214 /* ifname can be netmap:foo-xxxx */ 215 #define MAX_IFNAMELEN 64 /* our buffer for ifname */ 216 //#define MAX_PKTSIZE 1536 217 #define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 218 219 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 220 struct tstamp { 221 uint32_t sec; 222 uint32_t nsec; 223 }; 224 225 /* 226 * global arguments for all threads 227 */ 228 229 struct glob_arg { 230 struct ip_range src_ip; 231 struct ip_range dst_ip; 232 struct mac_range dst_mac; 233 struct mac_range src_mac; 234 int pkt_size; 235 int burst; 236 int forever; 237 uint64_t npackets; /* total packets to send */ 238 int frags; /* fragments per packet */ 239 int nthreads; 240 int cpus; /* cpus used for running */ 241 int system_cpus; /* cpus on the system */ 242 243 int options; /* testing */ 244 #define OPT_PREFETCH 1 245 #define OPT_ACCESS 2 246 #define OPT_COPY 4 247 #define OPT_MEMCPY 8 248 #define OPT_TS 16 /* add a timestamp */ 249 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 250 #define OPT_DUMP 64 /* dump rx/tx traffic */ 251 #define OPT_RUBBISH 256 /* send wathever the buffers contain */ 252 #define OPT_RANDOM_SRC 512 253 #define OPT_RANDOM_DST 1024 254 #define OPT_PPS_STATS 2048 255 int dev_type; 256 #ifndef NO_PCAP 257 pcap_t *p; 258 #endif 259 260 int tx_rate; 261 struct timespec tx_period; 262 263 int affinity; 264 int main_fd; 265 struct nm_desc *nmd; 266 int report_interval; /* milliseconds between prints */ 267 void *(*td_body)(void *); 268 int td_type; 269 void *mmap_addr; 270 char ifname[MAX_IFNAMELEN]; 271 char *nmr_config; 272 int dummy_send; 273 int virt_header; /* send also the virt_header */ 274 int extra_bufs; /* goes in nr_arg3 */ 275 int extra_pipes; /* goes in nr_arg1 */ 276 char *packet_file; /* -P option */ 277 #define STATS_WIN 15 278 int win_idx; 279 int64_t win[STATS_WIN]; 280 }; 281 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 282 283 284 /* 285 * Arguments for a new thread. The same structure is used by 286 * the source and the sink 287 */ 288 struct targ { 289 struct glob_arg *g; 290 int used; 291 int completed; 292 int cancel; 293 int fd; 294 struct nm_desc *nmd; 295 /* these ought to be volatile, but they are 296 * only sampled and errors should not accumulate 297 */ 298 struct my_ctrs ctr; 299 300 struct timespec tic, toc; 301 int me; 302 pthread_t thread; 303 int affinity; 304 305 struct pkt pkt; 306 void *frame; 307 }; 308 309 310 /* 311 * extract the extremes from a range of ipv4 addresses. 312 * addr_lo[-addr_hi][:port_lo[-port_hi]] 313 */ 314 static void 315 extract_ip_range(struct ip_range *r) 316 { 317 char *ap, *pp; 318 struct in_addr a; 319 320 if (verbose) 321 D("extract IP range from %s", r->name); 322 r->port0 = r->port1 = 0; 323 r->start = r->end = 0; 324 325 /* the first - splits start/end of range */ 326 ap = index(r->name, '-'); /* do we have ports ? */ 327 if (ap) { 328 *ap++ = '\0'; 329 } 330 /* grab the initial values (mandatory) */ 331 pp = index(r->name, ':'); 332 if (pp) { 333 *pp++ = '\0'; 334 r->port0 = r->port1 = strtol(pp, NULL, 0); 335 }; 336 inet_aton(r->name, &a); 337 r->start = r->end = ntohl(a.s_addr); 338 if (ap) { 339 pp = index(ap, ':'); 340 if (pp) { 341 *pp++ = '\0'; 342 if (*pp) 343 r->port1 = strtol(pp, NULL, 0); 344 } 345 if (*ap) { 346 inet_aton(ap, &a); 347 r->end = ntohl(a.s_addr); 348 } 349 } 350 if (r->port0 > r->port1) { 351 uint16_t tmp = r->port0; 352 r->port0 = r->port1; 353 r->port1 = tmp; 354 } 355 if (r->start > r->end) { 356 uint32_t tmp = r->start; 357 r->start = r->end; 358 r->end = tmp; 359 } 360 { 361 struct in_addr a; 362 char buf1[16]; // one ip address 363 364 a.s_addr = htonl(r->end); 365 strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 366 a.s_addr = htonl(r->start); 367 if (1) 368 D("range is %s:%d to %s:%d", 369 inet_ntoa(a), r->port0, buf1, r->port1); 370 } 371 } 372 373 static void 374 extract_mac_range(struct mac_range *r) 375 { 376 if (verbose) 377 D("extract MAC range from %s", r->name); 378 bcopy(ether_aton(r->name), &r->start, 6); 379 bcopy(ether_aton(r->name), &r->end, 6); 380 #if 0 381 bcopy(targ->src_mac, eh->ether_shost, 6); 382 p = index(targ->g->src_mac, '-'); 383 if (p) 384 targ->src_mac_range = atoi(p+1); 385 386 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 387 bcopy(targ->dst_mac, eh->ether_dhost, 6); 388 p = index(targ->g->dst_mac, '-'); 389 if (p) 390 targ->dst_mac_range = atoi(p+1); 391 #endif 392 if (verbose) 393 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 394 } 395 396 static struct targ *targs; 397 static int global_nthreads; 398 399 /* control-C handler */ 400 static void 401 sigint_h(int sig) 402 { 403 int i; 404 405 (void)sig; /* UNUSED */ 406 D("received control-C on thread %p", (void *)pthread_self()); 407 for (i = 0; i < global_nthreads; i++) { 408 targs[i].cancel = 1; 409 } 410 } 411 412 /* sysctl wrapper to return the number of active CPUs */ 413 static int 414 system_ncpus(void) 415 { 416 int ncpus; 417 #if defined (__FreeBSD__) 418 int mib[2] = { CTL_HW, HW_NCPU }; 419 size_t len = sizeof(mib); 420 sysctl(mib, 2, &ncpus, &len, NULL, 0); 421 #elif defined(linux) 422 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 423 #elif defined(_WIN32) 424 { 425 SYSTEM_INFO sysinfo; 426 GetSystemInfo(&sysinfo); 427 ncpus = sysinfo.dwNumberOfProcessors; 428 } 429 #else /* others */ 430 ncpus = 1; 431 #endif /* others */ 432 return (ncpus); 433 } 434 435 #ifdef __linux__ 436 #define sockaddr_dl sockaddr_ll 437 #define sdl_family sll_family 438 #define AF_LINK AF_PACKET 439 #define LLADDR(s) s->sll_addr; 440 #include <linux/if_tun.h> 441 #define TAP_CLONEDEV "/dev/net/tun" 442 #endif /* __linux__ */ 443 444 #ifdef __FreeBSD__ 445 #include <net/if_tun.h> 446 #define TAP_CLONEDEV "/dev/tap" 447 #endif /* __FreeBSD */ 448 449 #ifdef __APPLE__ 450 // #warning TAP not supported on apple ? 451 #include <net/if_utun.h> 452 #define TAP_CLONEDEV "/dev/tap" 453 #endif /* __APPLE__ */ 454 455 456 /* 457 * parse the vale configuration in conf and put it in nmr. 458 * Return the flag set if necessary. 459 * The configuration may consist of 0 to 4 numbers separated 460 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 461 * Missing numbers or zeroes stand for default values. 462 * As an additional convenience, if exactly one number 463 * is specified, then this is assigned to both #tx-slots and #rx-slots. 464 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 465 * and #rx-rings. 466 */ 467 int 468 parse_nmr_config(const char* conf, struct nmreq *nmr) 469 { 470 char *w, *tok; 471 int i, v; 472 473 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 474 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 475 if (conf == NULL || ! *conf) 476 return 0; 477 w = strdup(conf); 478 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 479 v = atoi(tok); 480 switch (i) { 481 case 0: 482 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 483 break; 484 case 1: 485 nmr->nr_rx_slots = v; 486 break; 487 case 2: 488 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 489 break; 490 case 3: 491 nmr->nr_rx_rings = v; 492 break; 493 default: 494 D("ignored config: %s", tok); 495 break; 496 } 497 } 498 D("txr %d txd %d rxr %d rxd %d", 499 nmr->nr_tx_rings, nmr->nr_tx_slots, 500 nmr->nr_rx_rings, nmr->nr_rx_slots); 501 free(w); 502 return (nmr->nr_tx_rings || nmr->nr_tx_slots || 503 nmr->nr_rx_rings || nmr->nr_rx_slots) ? 504 NM_OPEN_RING_CFG : 0; 505 } 506 507 508 /* 509 * locate the src mac address for our interface, put it 510 * into the user-supplied buffer. return 0 if ok, -1 on error. 511 */ 512 static int 513 source_hwaddr(const char *ifname, char *buf) 514 { 515 struct ifaddrs *ifaphead, *ifap; 516 int l = sizeof(ifap->ifa_name); 517 518 if (getifaddrs(&ifaphead) != 0) { 519 D("getifaddrs %s failed", ifname); 520 return (-1); 521 } 522 523 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 524 struct sockaddr_dl *sdl = 525 (struct sockaddr_dl *)ifap->ifa_addr; 526 uint8_t *mac; 527 528 if (!sdl || sdl->sdl_family != AF_LINK) 529 continue; 530 if (strncmp(ifap->ifa_name, ifname, l) != 0) 531 continue; 532 mac = (uint8_t *)LLADDR(sdl); 533 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 534 mac[0], mac[1], mac[2], 535 mac[3], mac[4], mac[5]); 536 if (verbose) 537 D("source hwaddr %s", buf); 538 break; 539 } 540 freeifaddrs(ifaphead); 541 return ifap ? 0 : 1; 542 } 543 544 545 /* set the thread affinity. */ 546 static int 547 setaffinity(pthread_t me, int i) 548 { 549 cpuset_t cpumask; 550 551 if (i == -1) 552 return 0; 553 554 /* Set thread affinity affinity.*/ 555 CPU_ZERO(&cpumask); 556 CPU_SET(i, &cpumask); 557 558 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 559 D("Unable to set affinity: %s", strerror(errno)); 560 return 1; 561 } 562 return 0; 563 } 564 565 /* Compute the checksum of the given ip header. */ 566 static uint16_t 567 checksum(const void *data, uint16_t len, uint32_t sum) 568 { 569 const uint8_t *addr = data; 570 uint32_t i; 571 572 /* Checksum all the pairs of bytes first... */ 573 for (i = 0; i < (len & ~1U); i += 2) { 574 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 575 if (sum > 0xFFFF) 576 sum -= 0xFFFF; 577 } 578 /* 579 * If there's a single byte left over, checksum it, too. 580 * Network byte order is big-endian, so the remaining byte is 581 * the high byte. 582 */ 583 if (i < len) { 584 sum += addr[i] << 8; 585 if (sum > 0xFFFF) 586 sum -= 0xFFFF; 587 } 588 return sum; 589 } 590 591 static u_int16_t 592 wrapsum(u_int32_t sum) 593 { 594 sum = ~sum & 0xFFFF; 595 return (htons(sum)); 596 } 597 598 /* Check the payload of the packet for errors (use it for debug). 599 * Look for consecutive ascii representations of the size of the packet. 600 */ 601 static void 602 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur) 603 { 604 char buf[128]; 605 int i, j, i0; 606 const unsigned char *p = (const unsigned char *)_p; 607 608 /* get the length in ASCII of the length of the packet. */ 609 610 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 611 ring, cur, ring->slot[cur].buf_idx, 612 ring->slot[cur].flags, len); 613 /* hexdump routine */ 614 for (i = 0; i < len; ) { 615 memset(buf, sizeof(buf), ' '); 616 sprintf(buf, "%5d: ", i); 617 i0 = i; 618 for (j=0; j < 16 && i < len; i++, j++) 619 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 620 i = i0; 621 for (j=0; j < 16 && i < len; i++, j++) 622 sprintf(buf+7+j + 48, "%c", 623 isprint(p[i]) ? p[i] : '.'); 624 printf("%s\n", buf); 625 } 626 } 627 628 /* 629 * Fill a packet with some payload. 630 * We create a UDP packet so the payload starts at 631 * 14+20+8 = 42 bytes. 632 */ 633 #ifdef __linux__ 634 #define uh_sport source 635 #define uh_dport dest 636 #define uh_ulen len 637 #define uh_sum check 638 #endif /* linux */ 639 640 /* 641 * increment the addressed in the packet, 642 * starting from the least significant field. 643 * DST_IP DST_PORT SRC_IP SRC_PORT 644 */ 645 static void 646 update_addresses(struct pkt *pkt, struct glob_arg *g) 647 { 648 uint32_t a; 649 uint16_t p; 650 struct ip *ip = &pkt->ip; 651 struct udphdr *udp = &pkt->udp; 652 653 do { 654 /* XXX for now it doesn't handle non-random src, random dst */ 655 if (g->options & OPT_RANDOM_SRC) { 656 udp->uh_sport = random(); 657 ip->ip_src.s_addr = random(); 658 } else { 659 p = ntohs(udp->uh_sport); 660 if (p < g->src_ip.port1) { /* just inc, no wrap */ 661 udp->uh_sport = htons(p + 1); 662 break; 663 } 664 udp->uh_sport = htons(g->src_ip.port0); 665 666 a = ntohl(ip->ip_src.s_addr); 667 if (a < g->src_ip.end) { /* just inc, no wrap */ 668 ip->ip_src.s_addr = htonl(a + 1); 669 break; 670 } 671 ip->ip_src.s_addr = htonl(g->src_ip.start); 672 673 udp->uh_sport = htons(g->src_ip.port0); 674 } 675 676 if (g->options & OPT_RANDOM_DST) { 677 udp->uh_dport = random(); 678 ip->ip_dst.s_addr = random(); 679 } else { 680 p = ntohs(udp->uh_dport); 681 if (p < g->dst_ip.port1) { /* just inc, no wrap */ 682 udp->uh_dport = htons(p + 1); 683 break; 684 } 685 udp->uh_dport = htons(g->dst_ip.port0); 686 687 a = ntohl(ip->ip_dst.s_addr); 688 if (a < g->dst_ip.end) { /* just inc, no wrap */ 689 ip->ip_dst.s_addr = htonl(a + 1); 690 break; 691 } 692 } 693 ip->ip_dst.s_addr = htonl(g->dst_ip.start); 694 } while (0); 695 // update checksum 696 } 697 698 /* 699 * initialize one packet and prepare for the next one. 700 * The copy could be done better instead of repeating it each time. 701 */ 702 static void 703 initialize_packet(struct targ *targ) 704 { 705 struct pkt *pkt = &targ->pkt; 706 struct ether_header *eh; 707 struct ip *ip; 708 struct udphdr *udp; 709 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 710 const char *payload = targ->g->options & OPT_INDIRECT ? 711 indirect_payload : default_payload; 712 int i, l0 = strlen(payload); 713 714 #ifndef NO_PCAP 715 char errbuf[PCAP_ERRBUF_SIZE]; 716 pcap_t *file; 717 struct pcap_pkthdr *header; 718 const unsigned char *packet; 719 720 /* Read a packet from a PCAP file if asked. */ 721 if (targ->g->packet_file != NULL) { 722 if ((file = pcap_open_offline(targ->g->packet_file, 723 errbuf)) == NULL) 724 D("failed to open pcap file %s", 725 targ->g->packet_file); 726 if (pcap_next_ex(file, &header, &packet) < 0) 727 D("failed to read packet from %s", 728 targ->g->packet_file); 729 if ((targ->frame = malloc(header->caplen)) == NULL) 730 D("out of memory"); 731 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 732 targ->g->pkt_size = header->caplen; 733 pcap_close(file); 734 return; 735 } 736 #endif 737 738 /* create a nice NUL-terminated string */ 739 for (i = 0; i < paylen; i += l0) { 740 if (l0 > paylen - i) 741 l0 = paylen - i; // last round 742 bcopy(payload, pkt->body + i, l0); 743 } 744 pkt->body[i-1] = '\0'; 745 ip = &pkt->ip; 746 747 /* prepare the headers */ 748 ip->ip_v = IPVERSION; 749 ip->ip_hl = 5; 750 ip->ip_id = 0; 751 ip->ip_tos = IPTOS_LOWDELAY; 752 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 753 ip->ip_id = 0; 754 ip->ip_off = htons(IP_DF); /* Don't fragment */ 755 ip->ip_ttl = IPDEFTTL; 756 ip->ip_p = IPPROTO_UDP; 757 ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 758 ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 759 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 760 761 762 udp = &pkt->udp; 763 udp->uh_sport = htons(targ->g->src_ip.port0); 764 udp->uh_dport = htons(targ->g->dst_ip.port0); 765 udp->uh_ulen = htons(paylen); 766 /* Magic: taken from sbin/dhclient/packet.c */ 767 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 768 checksum(pkt->body, 769 paylen - sizeof(*udp), 770 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 771 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 772 ) 773 ) 774 )); 775 776 eh = &pkt->eh; 777 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 778 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 779 eh->ether_type = htons(ETHERTYPE_IP); 780 781 bzero(&pkt->vh, sizeof(pkt->vh)); 782 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 783 } 784 785 static void 786 get_vnet_hdr_len(struct glob_arg *g) 787 { 788 struct nmreq req; 789 int err; 790 791 memset(&req, 0, sizeof(req)); 792 bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name)); 793 req.nr_version = NETMAP_API; 794 req.nr_cmd = NETMAP_VNET_HDR_GET; 795 err = ioctl(g->main_fd, NIOCREGIF, &req); 796 if (err) { 797 D("Unable to get virtio-net header length"); 798 return; 799 } 800 801 g->virt_header = req.nr_arg1; 802 if (g->virt_header) { 803 D("Port requires virtio-net header, length = %d", 804 g->virt_header); 805 } 806 } 807 808 static void 809 set_vnet_hdr_len(struct glob_arg *g) 810 { 811 int err, l = g->virt_header; 812 struct nmreq req; 813 814 if (l == 0) 815 return; 816 817 memset(&req, 0, sizeof(req)); 818 bcopy(g->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name)); 819 req.nr_version = NETMAP_API; 820 req.nr_cmd = NETMAP_BDG_VNET_HDR; 821 req.nr_arg1 = l; 822 err = ioctl(g->main_fd, NIOCREGIF, &req); 823 if (err) { 824 D("Unable to set virtio-net header length %d", l); 825 } 826 } 827 828 829 /* 830 * create and enqueue a batch of packets on a ring. 831 * On the last one set NS_REPORT to tell the driver to generate 832 * an interrupt when done. 833 */ 834 static int 835 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 836 int size, struct glob_arg *g, u_int count, int options, 837 u_int nfrags) 838 { 839 u_int n, sent, cur = ring->cur; 840 u_int fcnt; 841 842 n = nm_ring_space(ring); 843 if (n < count) 844 count = n; 845 if (count < nfrags) { 846 D("truncating packet, no room for frags %d %d", 847 count, nfrags); 848 } 849 #if 0 850 if (options & (OPT_COPY | OPT_PREFETCH) ) { 851 for (sent = 0; sent < count; sent++) { 852 struct netmap_slot *slot = &ring->slot[cur]; 853 char *p = NETMAP_BUF(ring, slot->buf_idx); 854 855 __builtin_prefetch(p); 856 cur = nm_ring_next(ring, cur); 857 } 858 cur = ring->cur; 859 } 860 #endif 861 for (fcnt = nfrags, sent = 0; sent < count; sent++) { 862 struct netmap_slot *slot = &ring->slot[cur]; 863 char *p = NETMAP_BUF(ring, slot->buf_idx); 864 int buf_changed = slot->flags & NS_BUF_CHANGED; 865 866 slot->flags = 0; 867 if (options & OPT_RUBBISH) { 868 /* do nothing */ 869 } else if (options & OPT_INDIRECT) { 870 slot->flags |= NS_INDIRECT; 871 slot->ptr = (uint64_t)((uintptr_t)frame); 872 } else if ((options & OPT_COPY) || buf_changed) { 873 nm_pkt_copy(frame, p, size); 874 if (fcnt == nfrags) 875 update_addresses(pkt, g); 876 } else if (options & OPT_MEMCPY) { 877 memcpy(p, frame, size); 878 if (fcnt == nfrags) 879 update_addresses(pkt, g); 880 } else if (options & OPT_PREFETCH) { 881 __builtin_prefetch(p); 882 } 883 if (options & OPT_DUMP) 884 dump_payload(p, size, ring, cur); 885 slot->len = size; 886 if (--fcnt > 0) 887 slot->flags |= NS_MOREFRAG; 888 else 889 fcnt = nfrags; 890 if (sent == count - 1) { 891 slot->flags &= ~NS_MOREFRAG; 892 slot->flags |= NS_REPORT; 893 } 894 cur = nm_ring_next(ring, cur); 895 } 896 ring->head = ring->cur = cur; 897 898 return (sent); 899 } 900 901 /* 902 * Index of the highest bit set 903 */ 904 uint32_t 905 msb64(uint64_t x) 906 { 907 uint64_t m = 1ULL << 63; 908 int i; 909 910 for (i = 63; i >= 0; i--, m >>=1) 911 if (m & x) 912 return i; 913 return 0; 914 } 915 916 /* 917 * Send a packet, and wait for a response. 918 * The payload (after UDP header, ofs 42) has a 4-byte sequence 919 * followed by a struct timeval (or bintime?) 920 */ 921 #define PAY_OFS 42 /* where in the pkt... */ 922 923 static void * 924 pinger_body(void *data) 925 { 926 struct targ *targ = (struct targ *) data; 927 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 928 struct netmap_if *nifp = targ->nmd->nifp; 929 int i, rx = 0; 930 void *frame; 931 int size; 932 struct timespec ts, now, last_print; 933 uint64_t sent = 0, n = targ->g->npackets; 934 uint64_t count = 0, t_cur, t_min = ~0, av = 0; 935 uint64_t buckets[64]; /* bins for delays, ns */ 936 937 frame = &targ->pkt; 938 frame += sizeof(targ->pkt.vh) - targ->g->virt_header; 939 size = targ->g->pkt_size + targ->g->virt_header; 940 941 942 if (targ->g->nthreads > 1) { 943 D("can only ping with 1 thread"); 944 return NULL; 945 } 946 947 bzero(&buckets, sizeof(buckets)); 948 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 949 now = last_print; 950 while (!targ->cancel && (n == 0 || sent < n)) { 951 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 952 struct netmap_slot *slot; 953 char *p; 954 for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ 955 slot = &ring->slot[ring->cur]; 956 slot->len = size; 957 p = NETMAP_BUF(ring, slot->buf_idx); 958 959 if (nm_ring_empty(ring)) { 960 D("-- ouch, cannot send"); 961 } else { 962 struct tstamp *tp; 963 nm_pkt_copy(frame, p, size); 964 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 965 bcopy(&sent, p+42, sizeof(sent)); 966 tp = (struct tstamp *)(p+46); 967 tp->sec = (uint32_t)ts.tv_sec; 968 tp->nsec = (uint32_t)ts.tv_nsec; 969 sent++; 970 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 971 } 972 } 973 /* should use a parameter to decide how often to send */ 974 if (poll(&pfd, 1, 3000) <= 0) { 975 D("poll error/timeout on queue %d: %s", targ->me, 976 strerror(errno)); 977 continue; 978 } 979 /* see what we got back */ 980 for (i = targ->nmd->first_tx_ring; 981 i <= targ->nmd->last_tx_ring; i++) { 982 ring = NETMAP_RXRING(nifp, i); 983 while (!nm_ring_empty(ring)) { 984 uint32_t seq; 985 struct tstamp *tp; 986 int pos; 987 988 slot = &ring->slot[ring->cur]; 989 p = NETMAP_BUF(ring, slot->buf_idx); 990 991 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 992 bcopy(p+42, &seq, sizeof(seq)); 993 tp = (struct tstamp *)(p+46); 994 ts.tv_sec = (time_t)tp->sec; 995 ts.tv_nsec = (long)tp->nsec; 996 ts.tv_sec = now.tv_sec - ts.tv_sec; 997 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 998 if (ts.tv_nsec < 0) { 999 ts.tv_nsec += 1000000000; 1000 ts.tv_sec--; 1001 } 1002 if (0) D("seq %d/%lu delta %d.%09d", seq, sent, 1003 (int)ts.tv_sec, (int)ts.tv_nsec); 1004 t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec; 1005 if (t_cur < t_min) 1006 t_min = t_cur; 1007 count ++; 1008 av += t_cur; 1009 pos = msb64(t_cur); 1010 buckets[pos]++; 1011 /* now store it in a bucket */ 1012 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 1013 rx++; 1014 } 1015 } 1016 //D("tx %d rx %d", sent, rx); 1017 //usleep(100000); 1018 ts.tv_sec = now.tv_sec - last_print.tv_sec; 1019 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 1020 if (ts.tv_nsec < 0) { 1021 ts.tv_nsec += 1000000000; 1022 ts.tv_sec--; 1023 } 1024 if (ts.tv_sec >= 1) { 1025 D("count %d RTT: min %d av %d ns", 1026 (int)count, (int)t_min, (int)(av/count)); 1027 int k, j, kmin; 1028 char buf[512]; 1029 1030 for (kmin = 0; kmin < 64; kmin ++) 1031 if (buckets[kmin]) 1032 break; 1033 for (k = 63; k >= kmin; k--) 1034 if (buckets[k]) 1035 break; 1036 buf[0] = '\0'; 1037 for (j = kmin; j <= k; j++) 1038 sprintf(buf, "%s %5d", buf, (int)buckets[j]); 1039 D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf); 1040 bzero(&buckets, sizeof(buckets)); 1041 count = 0; 1042 av = 0; 1043 t_min = ~0; 1044 last_print = now; 1045 } 1046 } 1047 1048 /* reset the ``used`` flag. */ 1049 targ->used = 0; 1050 1051 return NULL; 1052 } 1053 1054 1055 /* 1056 * reply to ping requests 1057 */ 1058 static void * 1059 ponger_body(void *data) 1060 { 1061 struct targ *targ = (struct targ *) data; 1062 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1063 struct netmap_if *nifp = targ->nmd->nifp; 1064 struct netmap_ring *txring, *rxring; 1065 int i, rx = 0; 1066 uint64_t sent = 0, n = targ->g->npackets; 1067 1068 if (targ->g->nthreads > 1) { 1069 D("can only reply ping with 1 thread"); 1070 return NULL; 1071 } 1072 D("understood ponger %lu but don't know how to do it", n); 1073 while (!targ->cancel && (n == 0 || sent < n)) { 1074 uint32_t txcur, txavail; 1075 //#define BUSYWAIT 1076 #ifdef BUSYWAIT 1077 ioctl(pfd.fd, NIOCRXSYNC, NULL); 1078 #else 1079 if (poll(&pfd, 1, 1000) <= 0) { 1080 D("poll error/timeout on queue %d: %s", targ->me, 1081 strerror(errno)); 1082 continue; 1083 } 1084 #endif 1085 txring = NETMAP_TXRING(nifp, 0); 1086 txcur = txring->cur; 1087 txavail = nm_ring_space(txring); 1088 /* see what we got back */ 1089 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1090 rxring = NETMAP_RXRING(nifp, i); 1091 while (!nm_ring_empty(rxring)) { 1092 uint16_t *spkt, *dpkt; 1093 uint32_t cur = rxring->cur; 1094 struct netmap_slot *slot = &rxring->slot[cur]; 1095 char *src, *dst; 1096 src = NETMAP_BUF(rxring, slot->buf_idx); 1097 //D("got pkt %p of size %d", src, slot->len); 1098 rxring->head = rxring->cur = nm_ring_next(rxring, cur); 1099 rx++; 1100 if (txavail == 0) 1101 continue; 1102 dst = NETMAP_BUF(txring, 1103 txring->slot[txcur].buf_idx); 1104 /* copy... */ 1105 dpkt = (uint16_t *)dst; 1106 spkt = (uint16_t *)src; 1107 nm_pkt_copy(src, dst, slot->len); 1108 dpkt[0] = spkt[3]; 1109 dpkt[1] = spkt[4]; 1110 dpkt[2] = spkt[5]; 1111 dpkt[3] = spkt[0]; 1112 dpkt[4] = spkt[1]; 1113 dpkt[5] = spkt[2]; 1114 txring->slot[txcur].len = slot->len; 1115 /* XXX swap src dst mac */ 1116 txcur = nm_ring_next(txring, txcur); 1117 txavail--; 1118 sent++; 1119 } 1120 } 1121 txring->head = txring->cur = txcur; 1122 targ->ctr.pkts = sent; 1123 #ifdef BUSYWAIT 1124 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1125 #endif 1126 //D("tx %d rx %d", sent, rx); 1127 } 1128 1129 /* reset the ``used`` flag. */ 1130 targ->used = 0; 1131 1132 return NULL; 1133 } 1134 1135 1136 /* 1137 * wait until ts, either busy or sleeping if more than 1ms. 1138 * Return wakeup time. 1139 */ 1140 static struct timespec 1141 wait_time(struct timespec ts) 1142 { 1143 for (;;) { 1144 struct timespec w, cur; 1145 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1146 w = timespec_sub(ts, cur); 1147 if (w.tv_sec < 0) 1148 return cur; 1149 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1150 poll(NULL, 0, 1); 1151 } 1152 } 1153 1154 static void * 1155 sender_body(void *data) 1156 { 1157 struct targ *targ = (struct targ *) data; 1158 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1159 struct netmap_if *nifp; 1160 struct netmap_ring *txring = NULL; 1161 int i; 1162 uint64_t n = targ->g->npackets / targ->g->nthreads; 1163 uint64_t sent = 0; 1164 uint64_t event = 0; 1165 int options = targ->g->options | OPT_COPY; 1166 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1167 int rate_limit = targ->g->tx_rate; 1168 struct pkt *pkt = &targ->pkt; 1169 void *frame; 1170 int size; 1171 1172 if (targ->frame == NULL) { 1173 frame = pkt; 1174 frame += sizeof(pkt->vh) - targ->g->virt_header; 1175 size = targ->g->pkt_size + targ->g->virt_header; 1176 } else { 1177 frame = targ->frame; 1178 size = targ->g->pkt_size; 1179 } 1180 1181 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1182 if (setaffinity(targ->thread, targ->affinity)) 1183 goto quit; 1184 1185 /* main loop.*/ 1186 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1187 if (rate_limit) { 1188 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1189 targ->tic.tv_nsec = 0; 1190 wait_time(targ->tic); 1191 nexttime = targ->tic; 1192 } 1193 if (targ->g->dev_type == DEV_TAP) { 1194 D("writing to file desc %d", targ->g->main_fd); 1195 1196 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1197 if (write(targ->g->main_fd, frame, size) != -1) 1198 sent++; 1199 update_addresses(pkt, targ->g); 1200 if (i > 10000) { 1201 targ->ctr.pkts = sent; 1202 targ->ctr.bytes = sent*size; 1203 targ->ctr.events = sent; 1204 i = 0; 1205 } 1206 } 1207 #ifndef NO_PCAP 1208 } else if (targ->g->dev_type == DEV_PCAP) { 1209 pcap_t *p = targ->g->p; 1210 1211 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1212 if (pcap_inject(p, frame, size) != -1) 1213 sent++; 1214 update_addresses(pkt, targ->g); 1215 if (i > 10000) { 1216 targ->ctr.pkts = sent; 1217 targ->ctr.bytes = sent*size; 1218 targ->ctr.events = sent; 1219 i = 0; 1220 } 1221 } 1222 #endif /* NO_PCAP */ 1223 } else { 1224 int tosend = 0; 1225 int frags = targ->g->frags; 1226 1227 nifp = targ->nmd->nifp; 1228 while (!targ->cancel && (n == 0 || sent < n)) { 1229 1230 if (rate_limit && tosend <= 0) { 1231 tosend = targ->g->burst; 1232 nexttime = timespec_add(nexttime, targ->g->tx_period); 1233 wait_time(nexttime); 1234 } 1235 1236 /* 1237 * wait for available room in the send queue(s) 1238 */ 1239 #ifdef BUSYWAIT 1240 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) { 1241 D("ioctl error on queue %d: %s", targ->me, 1242 strerror(errno)); 1243 goto quit; 1244 } 1245 #else /* !BUSYWAIT */ 1246 if (poll(&pfd, 1, 2000) <= 0) { 1247 if (targ->cancel) 1248 break; 1249 D("poll error/timeout on queue %d: %s", targ->me, 1250 strerror(errno)); 1251 // goto quit; 1252 } 1253 if (pfd.revents & POLLERR) { 1254 D("poll error on %d ring %d-%d", pfd.fd, 1255 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1256 goto quit; 1257 } 1258 #endif /* !BUSYWAIT */ 1259 /* 1260 * scan our queues and send on those with room 1261 */ 1262 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1263 D("drop copy"); 1264 options &= ~OPT_COPY; 1265 } 1266 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1267 int m; 1268 uint64_t limit = rate_limit ? tosend : targ->g->burst; 1269 if (n > 0 && n - sent < limit) 1270 limit = n - sent; 1271 txring = NETMAP_TXRING(nifp, i); 1272 if (nm_ring_empty(txring)) 1273 continue; 1274 if (frags > 1) 1275 limit = ((limit + frags - 1) / frags) * frags; 1276 1277 m = send_packets(txring, pkt, frame, size, targ->g, 1278 limit, options, frags); 1279 ND("limit %d tail %d frags %d m %d", 1280 limit, txring->tail, frags, m); 1281 sent += m; 1282 if (m > 0) //XXX-ste: can m be 0? 1283 event++; 1284 targ->ctr.pkts = sent; 1285 targ->ctr.bytes = sent*size; 1286 targ->ctr.events = event; 1287 if (rate_limit) { 1288 tosend -= m; 1289 if (tosend <= 0) 1290 break; 1291 } 1292 } 1293 } 1294 /* flush any remaining packets */ 1295 D("flush tail %d head %d on thread %p", 1296 txring->tail, txring->head, 1297 (void *)pthread_self()); 1298 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1299 1300 /* final part: wait all the TX queues to be empty. */ 1301 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1302 txring = NETMAP_TXRING(nifp, i); 1303 while (!targ->cancel && nm_tx_pending(txring)) { 1304 RD(5, "pending tx tail %d head %d on ring %d", 1305 txring->tail, txring->head, i); 1306 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1307 usleep(1); /* wait 1 tick */ 1308 } 1309 } 1310 } /* end DEV_NETMAP */ 1311 1312 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1313 targ->completed = 1; 1314 targ->ctr.pkts = sent; 1315 targ->ctr.bytes = sent*size; 1316 targ->ctr.events = event; 1317 quit: 1318 /* reset the ``used`` flag. */ 1319 targ->used = 0; 1320 1321 return (NULL); 1322 } 1323 1324 1325 #ifndef NO_PCAP 1326 static void 1327 receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1328 const u_char * bytes) 1329 { 1330 struct my_ctrs *ctr = (struct my_ctrs *)user; 1331 (void)bytes; /* UNUSED */ 1332 ctr->bytes += h->len; 1333 ctr->pkts++; 1334 } 1335 #endif /* !NO_PCAP */ 1336 1337 1338 static int 1339 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes) 1340 { 1341 u_int cur, rx, n; 1342 uint64_t b = 0; 1343 1344 if (bytes == NULL) 1345 bytes = &b; 1346 1347 cur = ring->cur; 1348 n = nm_ring_space(ring); 1349 if (n < limit) 1350 limit = n; 1351 for (rx = 0; rx < limit; rx++) { 1352 struct netmap_slot *slot = &ring->slot[cur]; 1353 char *p = NETMAP_BUF(ring, slot->buf_idx); 1354 1355 *bytes += slot->len; 1356 if (dump) 1357 dump_payload(p, slot->len, ring, cur); 1358 1359 cur = nm_ring_next(ring, cur); 1360 } 1361 ring->head = ring->cur = cur; 1362 1363 return (rx); 1364 } 1365 1366 static void * 1367 receiver_body(void *data) 1368 { 1369 struct targ *targ = (struct targ *) data; 1370 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1371 struct netmap_if *nifp; 1372 struct netmap_ring *rxring; 1373 int i; 1374 struct my_ctrs cur; 1375 1376 cur.pkts = cur.bytes = cur.events = cur.min_space = 0; 1377 cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler 1378 1379 if (setaffinity(targ->thread, targ->affinity)) 1380 goto quit; 1381 1382 D("reading from %s fd %d main_fd %d", 1383 targ->g->ifname, targ->fd, targ->g->main_fd); 1384 /* unbounded wait for the first packet. */ 1385 for (;!targ->cancel;) { 1386 i = poll(&pfd, 1, 1000); 1387 if (i > 0 && !(pfd.revents & POLLERR)) 1388 break; 1389 RD(1, "waiting for initial packets, poll returns %d %d", 1390 i, pfd.revents); 1391 } 1392 /* main loop, exit after 1s silence */ 1393 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1394 if (targ->g->dev_type == DEV_TAP) { 1395 while (!targ->cancel) { 1396 char buf[MAX_BODYSIZE]; 1397 /* XXX should we poll ? */ 1398 i = read(targ->g->main_fd, buf, sizeof(buf)); 1399 if (i > 0) { 1400 targ->ctr.pkts++; 1401 targ->ctr.bytes += i; 1402 targ->ctr.events++; 1403 } 1404 } 1405 #ifndef NO_PCAP 1406 } else if (targ->g->dev_type == DEV_PCAP) { 1407 while (!targ->cancel) { 1408 /* XXX should we poll ? */ 1409 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1410 (u_char *)&targ->ctr); 1411 targ->ctr.events++; 1412 } 1413 #endif /* !NO_PCAP */ 1414 } else { 1415 int dump = targ->g->options & OPT_DUMP; 1416 1417 nifp = targ->nmd->nifp; 1418 while (!targ->cancel) { 1419 /* Once we started to receive packets, wait at most 1 seconds 1420 before quitting. */ 1421 #ifdef BUSYWAIT 1422 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) { 1423 D("ioctl error on queue %d: %s", targ->me, 1424 strerror(errno)); 1425 goto quit; 1426 } 1427 #else /* !BUSYWAIT */ 1428 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1429 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1430 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1431 goto out; 1432 } 1433 1434 if (pfd.revents & POLLERR) { 1435 D("poll err"); 1436 goto quit; 1437 } 1438 #endif /* !BUSYWAIT */ 1439 uint64_t cur_space = 0; 1440 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1441 int m; 1442 1443 rxring = NETMAP_RXRING(nifp, i); 1444 /* compute free space in the ring */ 1445 m = rxring->head + rxring->num_slots - rxring->tail; 1446 if (m >= (int) rxring->num_slots) 1447 m -= rxring->num_slots; 1448 cur_space += m; 1449 if (nm_ring_empty(rxring)) 1450 continue; 1451 1452 m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes); 1453 cur.pkts += m; 1454 if (m > 0) //XXX-ste: can m be 0? 1455 cur.events++; 1456 } 1457 cur.min_space = targ->ctr.min_space; 1458 if (cur_space < cur.min_space) 1459 cur.min_space = cur_space; 1460 targ->ctr = cur; 1461 } 1462 } 1463 1464 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1465 1466 #if !defined(BUSYWAIT) 1467 out: 1468 #endif 1469 targ->completed = 1; 1470 targ->ctr = cur; 1471 1472 quit: 1473 /* reset the ``used`` flag. */ 1474 targ->used = 0; 1475 1476 return (NULL); 1477 } 1478 1479 static void * 1480 txseq_body(void *data) 1481 { 1482 struct targ *targ = (struct targ *) data; 1483 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1484 struct netmap_ring *ring; 1485 int64_t sent = 0; 1486 uint64_t event = 0; 1487 int options = targ->g->options | OPT_COPY; 1488 struct timespec nexttime = {0, 0}; 1489 int rate_limit = targ->g->tx_rate; 1490 struct pkt *pkt = &targ->pkt; 1491 int frags = targ->g->frags; 1492 uint32_t sequence = 0; 1493 int budget = 0; 1494 void *frame; 1495 int size; 1496 1497 if (targ->g->nthreads > 1) { 1498 D("can only txseq ping with 1 thread"); 1499 return NULL; 1500 } 1501 1502 if (targ->g->npackets > 0) { 1503 D("Ignoring -n argument"); 1504 } 1505 1506 frame = pkt; 1507 frame += sizeof(pkt->vh) - targ->g->virt_header; 1508 size = targ->g->pkt_size + targ->g->virt_header; 1509 1510 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1511 if (setaffinity(targ->thread, targ->affinity)) 1512 goto quit; 1513 1514 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1515 if (rate_limit) { 1516 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1517 targ->tic.tv_nsec = 0; 1518 wait_time(targ->tic); 1519 nexttime = targ->tic; 1520 } 1521 1522 /* Only use the first queue. */ 1523 ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring); 1524 1525 while (!targ->cancel) { 1526 int64_t limit; 1527 unsigned int space; 1528 unsigned int head; 1529 int fcnt; 1530 1531 if (!rate_limit) { 1532 budget = targ->g->burst; 1533 1534 } else if (budget <= 0) { 1535 budget = targ->g->burst; 1536 nexttime = timespec_add(nexttime, targ->g->tx_period); 1537 wait_time(nexttime); 1538 } 1539 1540 /* wait for available room in the send queue */ 1541 if (poll(&pfd, 1, 2000) <= 0) { 1542 if (targ->cancel) 1543 break; 1544 D("poll error/timeout on queue %d: %s", targ->me, 1545 strerror(errno)); 1546 } 1547 if (pfd.revents & POLLERR) { 1548 D("poll error on %d ring %d-%d", pfd.fd, 1549 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring); 1550 goto quit; 1551 } 1552 1553 /* If no room poll() again. */ 1554 space = nm_ring_space(ring); 1555 if (!space) { 1556 continue; 1557 } 1558 1559 limit = budget; 1560 1561 if (space < limit) { 1562 limit = space; 1563 } 1564 1565 /* Cut off ``limit`` to make sure is multiple of ``frags``. */ 1566 if (frags > 1) { 1567 limit = (limit / frags) * frags; 1568 } 1569 1570 limit = sent + limit; /* Convert to absolute. */ 1571 1572 for (fcnt = frags, head = ring->head; 1573 sent < limit; sent++, sequence++) { 1574 struct netmap_slot *slot = &ring->slot[head]; 1575 char *p = NETMAP_BUF(ring, slot->buf_idx); 1576 1577 slot->flags = 0; 1578 pkt->body[0] = sequence >> 24; 1579 pkt->body[1] = (sequence >> 16) & 0xff; 1580 pkt->body[2] = (sequence >> 8) & 0xff; 1581 pkt->body[3] = sequence & 0xff; 1582 nm_pkt_copy(frame, p, size); 1583 if (fcnt == frags) { 1584 update_addresses(pkt, targ->g); 1585 } 1586 1587 if (options & OPT_DUMP) { 1588 dump_payload(p, size, ring, head); 1589 } 1590 1591 slot->len = size; 1592 1593 if (--fcnt > 0) { 1594 slot->flags |= NS_MOREFRAG; 1595 } else { 1596 fcnt = frags; 1597 } 1598 1599 if (sent == limit - 1) { 1600 /* Make sure we don't push an incomplete 1601 * packet. */ 1602 assert(!(slot->flags & NS_MOREFRAG)); 1603 slot->flags |= NS_REPORT; 1604 } 1605 1606 head = nm_ring_next(ring, head); 1607 if (rate_limit) { 1608 budget--; 1609 } 1610 } 1611 1612 ring->cur = ring->head = head; 1613 1614 event ++; 1615 targ->ctr.pkts = sent; 1616 targ->ctr.bytes = sent * size; 1617 targ->ctr.events = event; 1618 } 1619 1620 /* flush any remaining packets */ 1621 D("flush tail %d head %d on thread %p", 1622 ring->tail, ring->head, 1623 (void *)pthread_self()); 1624 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1625 1626 /* final part: wait the TX queues to become empty. */ 1627 while (!targ->cancel && nm_tx_pending(ring)) { 1628 RD(5, "pending tx tail %d head %d on ring %d", 1629 ring->tail, ring->head, targ->nmd->first_tx_ring); 1630 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1631 usleep(1); /* wait 1 tick */ 1632 } 1633 1634 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1635 targ->completed = 1; 1636 targ->ctr.pkts = sent; 1637 targ->ctr.bytes = sent * size; 1638 targ->ctr.events = event; 1639 quit: 1640 /* reset the ``used`` flag. */ 1641 targ->used = 0; 1642 1643 return (NULL); 1644 } 1645 1646 1647 static char * 1648 multi_slot_to_string(struct netmap_ring *ring, unsigned int head, 1649 unsigned int nfrags, char *strbuf, size_t strbuflen) 1650 { 1651 unsigned int f; 1652 char *ret = strbuf; 1653 1654 for (f = 0; f < nfrags; f++) { 1655 struct netmap_slot *slot = &ring->slot[head]; 1656 int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len, 1657 slot->flags); 1658 if (m >= (int)strbuflen) { 1659 break; 1660 } 1661 strbuf += m; 1662 strbuflen -= m; 1663 1664 head = nm_ring_next(ring, head); 1665 } 1666 1667 return ret; 1668 } 1669 1670 static void * 1671 rxseq_body(void *data) 1672 { 1673 struct targ *targ = (struct targ *) data; 1674 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1675 int dump = targ->g->options & OPT_DUMP; 1676 struct netmap_ring *ring; 1677 unsigned int frags_exp = 1; 1678 uint32_t seq_exp = 0; 1679 struct my_ctrs cur; 1680 unsigned int frags = 0; 1681 int first_packet = 1; 1682 int first_slot = 1; 1683 int i; 1684 1685 cur.pkts = cur.bytes = cur.events = cur.min_space = 0; 1686 cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler 1687 1688 if (setaffinity(targ->thread, targ->affinity)) 1689 goto quit; 1690 1691 D("reading from %s fd %d main_fd %d", 1692 targ->g->ifname, targ->fd, targ->g->main_fd); 1693 /* unbounded wait for the first packet. */ 1694 for (;!targ->cancel;) { 1695 i = poll(&pfd, 1, 1000); 1696 if (i > 0 && !(pfd.revents & POLLERR)) 1697 break; 1698 RD(1, "waiting for initial packets, poll returns %d %d", 1699 i, pfd.revents); 1700 } 1701 1702 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1703 1704 ring = NETMAP_RXRING(targ->nmd->nifp, targ->nmd->first_rx_ring); 1705 1706 while (!targ->cancel) { 1707 unsigned int head; 1708 uint32_t seq; 1709 int limit; 1710 1711 /* Once we started to receive packets, wait at most 1 seconds 1712 before quitting. */ 1713 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1714 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1715 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1716 goto out; 1717 } 1718 1719 if (pfd.revents & POLLERR) { 1720 D("poll err"); 1721 goto quit; 1722 } 1723 1724 if (nm_ring_empty(ring)) 1725 continue; 1726 1727 limit = nm_ring_space(ring); 1728 if (limit > targ->g->burst) 1729 limit = targ->g->burst; 1730 1731 #if 0 1732 /* Enable this if 1733 * 1) we remove the early-return optimization from 1734 * the netmap poll implementation, or 1735 * 2) pipes get NS_MOREFRAG support. 1736 * With the current netmap implementation, an experiment like 1737 * pkt-gen -i vale:1{1 -f txseq -F 9 1738 * pkt-gen -i vale:1}1 -f rxseq 1739 * would get stuck as soon as we find nm_ring_space(ring) < 9, 1740 * since here limit is rounded to 0 and 1741 * pipe rxsync is not called anymore by the poll() of this loop. 1742 */ 1743 if (frags_exp > 1) { 1744 int o = limit; 1745 /* Cut off to the closest smaller multiple. */ 1746 limit = (limit / frags_exp) * frags_exp; 1747 RD(2, "LIMIT %d --> %d", o, limit); 1748 } 1749 #endif 1750 1751 for (head = ring->head, i = 0; i < limit; i++) { 1752 struct netmap_slot *slot = &ring->slot[head]; 1753 char *p = NETMAP_BUF(ring, slot->buf_idx); 1754 int len = slot->len; 1755 struct pkt *pkt; 1756 1757 if (dump) { 1758 dump_payload(p, slot->len, ring, head); 1759 } 1760 1761 frags++; 1762 if (!(slot->flags & NS_MOREFRAG)) { 1763 if (first_packet) { 1764 first_packet = 0; 1765 } else if (frags != frags_exp) { 1766 char prbuf[512]; 1767 RD(1, "Received packets with %u frags, " 1768 "expected %u, '%s'", frags, frags_exp, 1769 multi_slot_to_string(ring, head-frags+1, frags, 1770 prbuf, sizeof(prbuf))); 1771 } 1772 first_packet = 0; 1773 frags_exp = frags; 1774 frags = 0; 1775 } 1776 1777 p -= sizeof(pkt->vh) - targ->g->virt_header; 1778 len += sizeof(pkt->vh) - targ->g->virt_header; 1779 pkt = (struct pkt *)p; 1780 1781 if ((char *)pkt + len < ((char *)pkt->body) + sizeof(seq)) { 1782 RD(1, "%s: packet too small (len=%u)", __func__, 1783 slot->len); 1784 } else { 1785 seq = (pkt->body[0] << 24) | (pkt->body[1] << 16) 1786 | (pkt->body[2] << 8) | pkt->body[3]; 1787 if (first_slot) { 1788 /* Grab the first one, whatever it 1789 is. */ 1790 seq_exp = seq; 1791 first_slot = 0; 1792 } else if (seq != seq_exp) { 1793 uint32_t delta = seq - seq_exp; 1794 1795 if (delta < (0xFFFFFFFF >> 1)) { 1796 RD(2, "Sequence GAP: exp %u found %u", 1797 seq_exp, seq); 1798 } else { 1799 RD(2, "Sequence OUT OF ORDER: " 1800 "exp %u found %u", seq_exp, seq); 1801 } 1802 seq_exp = seq; 1803 } 1804 seq_exp++; 1805 } 1806 1807 cur.bytes += slot->len; 1808 head = nm_ring_next(ring, head); 1809 cur.pkts++; 1810 } 1811 1812 ring->cur = ring->head = head; 1813 1814 cur.events++; 1815 targ->ctr = cur; 1816 } 1817 1818 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1819 1820 out: 1821 targ->completed = 1; 1822 targ->ctr = cur; 1823 1824 quit: 1825 /* reset the ``used`` flag. */ 1826 targ->used = 0; 1827 1828 return (NULL); 1829 } 1830 1831 1832 static void 1833 tx_output(struct my_ctrs *cur, double delta, const char *msg) 1834 { 1835 double bw, raw_bw, pps, abs; 1836 char b1[40], b2[80], b3[80]; 1837 int size; 1838 1839 if (cur->pkts == 0) { 1840 printf("%s nothing.\n", msg); 1841 return; 1842 } 1843 1844 size = (int)(cur->bytes / cur->pkts); 1845 1846 printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n", 1847 msg, 1848 (unsigned long long)cur->pkts, 1849 (unsigned long long)cur->bytes, 1850 (unsigned long long)cur->events, size, delta); 1851 if (delta == 0) 1852 delta = 1e-6; 1853 if (size < 60) /* correct for min packet size */ 1854 size = 60; 1855 pps = cur->pkts / delta; 1856 bw = (8.0 * cur->bytes) / delta; 1857 /* raw packets have4 bytes crc + 20 bytes framing */ 1858 raw_bw = (8.0 * (cur->pkts * 24 + cur->bytes)) / delta; 1859 abs = cur->pkts / (double)(cur->events); 1860 1861 printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n", 1862 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw), abs); 1863 } 1864 1865 static void 1866 usage(void) 1867 { 1868 const char *cmd = "pkt-gen"; 1869 fprintf(stderr, 1870 "Usage:\n" 1871 "%s arguments\n" 1872 "\t-i interface interface name\n" 1873 "\t-f function tx rx ping pong txseq rxseq\n" 1874 "\t-n count number of iterations (can be 0)\n" 1875 "\t-t pkts_to_send also forces tx mode\n" 1876 "\t-r pkts_to_receive also forces rx mode\n" 1877 "\t-l pkt_size in bytes excluding CRC\n" 1878 "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1879 "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1880 "\t-D dst-mac\n" 1881 "\t-S src-mac\n" 1882 "\t-a cpu_id use setaffinity\n" 1883 "\t-b burst size testing, mostly\n" 1884 "\t-c cores cores to use\n" 1885 "\t-p threads processes/threads to use\n" 1886 "\t-T report_ms milliseconds between reports\n" 1887 "\t-w wait_for_link_time in seconds\n" 1888 "\t-R rate in packets per second\n" 1889 "\t-X dump payload\n" 1890 "\t-H len add empty virtio-net-header with size 'len'\n" 1891 "\t-E pipes allocate extra space for a number of pipes\n" 1892 "\t-r do not touch the buffers (send rubbish)\n" 1893 "\t-P file load packet from pcap file\n" 1894 "\t-z use random IPv4 src address/port\n" 1895 "\t-Z use random IPv4 dst address/port\n" 1896 "\t-F num_frags send multi-slot packets\n" 1897 "\t-A activate pps stats on receiver\n" 1898 "", 1899 cmd); 1900 1901 exit(0); 1902 } 1903 1904 enum { 1905 TD_TYPE_SENDER = 1, 1906 TD_TYPE_RECEIVER, 1907 TD_TYPE_OTHER, 1908 }; 1909 1910 static void 1911 start_threads(struct glob_arg *g) 1912 { 1913 int i; 1914 1915 targs = calloc(g->nthreads, sizeof(*targs)); 1916 /* 1917 * Now create the desired number of threads, each one 1918 * using a single descriptor. 1919 */ 1920 for (i = 0; i < g->nthreads; i++) { 1921 struct targ *t = &targs[i]; 1922 1923 bzero(t, sizeof(*t)); 1924 t->fd = -1; /* default, with pcap */ 1925 t->g = g; 1926 1927 if (g->dev_type == DEV_NETMAP) { 1928 struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ 1929 uint64_t nmd_flags = 0; 1930 nmd.self = &nmd; 1931 1932 if (i > 0) { 1933 /* the first thread uses the fd opened by the main 1934 * thread, the other threads re-open /dev/netmap 1935 */ 1936 if (g->nthreads > 1) { 1937 nmd.req.nr_flags = 1938 g->nmd->req.nr_flags & ~NR_REG_MASK; 1939 nmd.req.nr_flags |= NR_REG_ONE_NIC; 1940 nmd.req.nr_ringid = i; 1941 } 1942 /* Only touch one of the rings (rx is already ok) */ 1943 if (g->td_type == TD_TYPE_RECEIVER) 1944 nmd_flags |= NETMAP_NO_TX_POLL; 1945 1946 /* register interface. Override ifname and ringid etc. */ 1947 t->nmd = nm_open(t->g->ifname, NULL, nmd_flags | 1948 NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); 1949 if (t->nmd == NULL) { 1950 D("Unable to open %s: %s", 1951 t->g->ifname, strerror(errno)); 1952 continue; 1953 } 1954 } else { 1955 t->nmd = g->nmd; 1956 } 1957 t->fd = t->nmd->fd; 1958 1959 } else { 1960 targs[i].fd = g->main_fd; 1961 } 1962 t->used = 1; 1963 t->me = i; 1964 if (g->affinity >= 0) { 1965 t->affinity = (g->affinity + i) % g->system_cpus; 1966 } else { 1967 t->affinity = -1; 1968 } 1969 /* default, init packets */ 1970 initialize_packet(t); 1971 1972 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 1973 D("Unable to create thread %d: %s", i, strerror(errno)); 1974 t->used = 0; 1975 } 1976 } 1977 } 1978 1979 static void 1980 main_thread(struct glob_arg *g) 1981 { 1982 int i; 1983 1984 struct my_ctrs prev, cur; 1985 double delta_t; 1986 struct timeval tic, toc; 1987 1988 prev.pkts = prev.bytes = prev.events = 0; 1989 gettimeofday(&prev.t, NULL); 1990 for (;;) { 1991 char b1[40], b2[40], b3[40], b4[70]; 1992 uint64_t pps, usec; 1993 struct my_ctrs x; 1994 double abs; 1995 int done = 0; 1996 1997 usec = wait_for_next_report(&prev.t, &cur.t, 1998 g->report_interval); 1999 2000 cur.pkts = cur.bytes = cur.events = 0; 2001 cur.min_space = 0; 2002 if (usec < 10000) /* too short to be meaningful */ 2003 continue; 2004 /* accumulate counts for all threads */ 2005 for (i = 0; i < g->nthreads; i++) { 2006 cur.pkts += targs[i].ctr.pkts; 2007 cur.bytes += targs[i].ctr.bytes; 2008 cur.events += targs[i].ctr.events; 2009 cur.min_space += targs[i].ctr.min_space; 2010 targs[i].ctr.min_space = 99999; 2011 if (targs[i].used == 0) 2012 done++; 2013 } 2014 x.pkts = cur.pkts - prev.pkts; 2015 x.bytes = cur.bytes - prev.bytes; 2016 x.events = cur.events - prev.events; 2017 pps = (x.pkts*1000000 + usec/2) / usec; 2018 abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0; 2019 2020 if (!(g->options & OPT_PPS_STATS)) { 2021 strcpy(b4, ""); 2022 } else { 2023 /* Compute some pps stats using a sliding window. */ 2024 double ppsavg = 0.0, ppsdev = 0.0; 2025 int nsamples = 0; 2026 2027 g->win[g->win_idx] = pps; 2028 g->win_idx = (g->win_idx + 1) % STATS_WIN; 2029 2030 for (i = 0; i < STATS_WIN; i++) { 2031 ppsavg += g->win[i]; 2032 if (g->win[i]) { 2033 nsamples ++; 2034 } 2035 } 2036 ppsavg /= nsamples; 2037 2038 for (i = 0; i < STATS_WIN; i++) { 2039 if (g->win[i] == 0) { 2040 continue; 2041 } 2042 ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg); 2043 } 2044 ppsdev /= nsamples; 2045 ppsdev = sqrt(ppsdev); 2046 2047 snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]", 2048 norm(b1, ppsavg), norm(b2, ppsdev)); 2049 } 2050 2051 D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space", 2052 norm(b1, pps), b4, 2053 norm(b2, (double)x.pkts), 2054 norm(b3, (double)x.bytes*8), 2055 (unsigned long long)usec, 2056 abs, (int)cur.min_space); 2057 prev = cur; 2058 2059 if (done == g->nthreads) 2060 break; 2061 } 2062 2063 timerclear(&tic); 2064 timerclear(&toc); 2065 cur.pkts = cur.bytes = cur.events = 0; 2066 /* final round */ 2067 for (i = 0; i < g->nthreads; i++) { 2068 struct timespec t_tic, t_toc; 2069 /* 2070 * Join active threads, unregister interfaces and close 2071 * file descriptors. 2072 */ 2073 if (targs[i].used) 2074 pthread_join(targs[i].thread, NULL); /* blocking */ 2075 if (g->dev_type == DEV_NETMAP) { 2076 nm_close(targs[i].nmd); 2077 targs[i].nmd = NULL; 2078 } else { 2079 close(targs[i].fd); 2080 } 2081 2082 if (targs[i].completed == 0) 2083 D("ouch, thread %d exited with error", i); 2084 2085 /* 2086 * Collect threads output and extract information about 2087 * how long it took to send all the packets. 2088 */ 2089 cur.pkts += targs[i].ctr.pkts; 2090 cur.bytes += targs[i].ctr.bytes; 2091 cur.events += targs[i].ctr.events; 2092 /* collect the largest start (tic) and end (toc) times, 2093 * XXX maybe we should do the earliest tic, or do a weighted 2094 * average ? 2095 */ 2096 t_tic = timeval2spec(&tic); 2097 t_toc = timeval2spec(&toc); 2098 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 2099 tic = timespec2val(&targs[i].tic); 2100 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 2101 toc = timespec2val(&targs[i].toc); 2102 } 2103 2104 /* print output. */ 2105 timersub(&toc, &tic, &toc); 2106 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 2107 if (g->td_type == TD_TYPE_SENDER) 2108 tx_output(&cur, delta_t, "Sent"); 2109 else 2110 tx_output(&cur, delta_t, "Received"); 2111 } 2112 2113 struct td_desc { 2114 int ty; 2115 char *key; 2116 void *f; 2117 }; 2118 2119 static struct td_desc func[] = { 2120 { TD_TYPE_SENDER, "tx", sender_body }, 2121 { TD_TYPE_RECEIVER, "rx", receiver_body }, 2122 { TD_TYPE_OTHER, "ping", pinger_body }, 2123 { TD_TYPE_OTHER, "pong", ponger_body }, 2124 { TD_TYPE_SENDER, "txseq", txseq_body }, 2125 { TD_TYPE_RECEIVER, "rxseq", rxseq_body }, 2126 { 0, NULL, NULL } 2127 }; 2128 2129 static int 2130 tap_alloc(char *dev) 2131 { 2132 struct ifreq ifr; 2133 int fd, err; 2134 char *clonedev = TAP_CLONEDEV; 2135 2136 (void)err; 2137 (void)dev; 2138 /* Arguments taken by the function: 2139 * 2140 * char *dev: the name of an interface (or '\0'). MUST have enough 2141 * space to hold the interface name if '\0' is passed 2142 * int flags: interface flags (eg, IFF_TUN etc.) 2143 */ 2144 2145 #ifdef __FreeBSD__ 2146 if (dev[3]) { /* tapSomething */ 2147 static char buf[128]; 2148 snprintf(buf, sizeof(buf), "/dev/%s", dev); 2149 clonedev = buf; 2150 } 2151 #endif 2152 /* open the device */ 2153 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 2154 return fd; 2155 } 2156 D("%s open successful", clonedev); 2157 2158 /* preparation of the struct ifr, of type "struct ifreq" */ 2159 memset(&ifr, 0, sizeof(ifr)); 2160 2161 #ifdef linux 2162 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 2163 2164 if (*dev) { 2165 /* if a device name was specified, put it in the structure; otherwise, 2166 * the kernel will try to allocate the "next" device of the 2167 * specified type */ 2168 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 2169 } 2170 2171 /* try to create the device */ 2172 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 2173 D("failed to to a TUNSETIFF: %s", strerror(errno)); 2174 close(fd); 2175 return err; 2176 } 2177 2178 /* if the operation was successful, write back the name of the 2179 * interface to the variable "dev", so the caller can know 2180 * it. Note that the caller MUST reserve space in *dev (see calling 2181 * code below) */ 2182 strcpy(dev, ifr.ifr_name); 2183 D("new name is %s", dev); 2184 #endif /* linux */ 2185 2186 /* this is the special file descriptor that the caller will use to talk 2187 * with the virtual interface */ 2188 return fd; 2189 } 2190 2191 int 2192 main(int arc, char **argv) 2193 { 2194 int i; 2195 struct sigaction sa; 2196 sigset_t ss; 2197 2198 struct glob_arg g; 2199 2200 int ch; 2201 int wait_link = 2; 2202 int devqueues = 1; /* how many device queues */ 2203 2204 bzero(&g, sizeof(g)); 2205 2206 g.main_fd = -1; 2207 g.td_body = receiver_body; 2208 g.td_type = TD_TYPE_RECEIVER; 2209 g.report_interval = 1000; /* report interval */ 2210 g.affinity = -1; 2211 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 2212 g.src_ip.name = "10.0.0.1"; 2213 g.dst_ip.name = "10.1.0.1"; 2214 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 2215 g.src_mac.name = NULL; 2216 g.pkt_size = 60; 2217 g.burst = 512; // default 2218 g.nthreads = 1; 2219 g.cpus = 1; // default 2220 g.forever = 1; 2221 g.tx_rate = 0; 2222 g.frags = 1; 2223 g.nmr_config = ""; 2224 g.virt_header = 0; 2225 2226 while ( (ch = getopt(arc, argv, 2227 "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:E:m:rP:zZA")) != -1) { 2228 struct td_desc *fn; 2229 2230 switch(ch) { 2231 default: 2232 D("bad option %c %s", ch, optarg); 2233 usage(); 2234 break; 2235 2236 case 'n': 2237 g.npackets = strtoull(optarg, NULL, 10); 2238 break; 2239 2240 case 'F': 2241 i = atoi(optarg); 2242 if (i < 1 || i > 63) { 2243 D("invalid frags %d [1..63], ignore", i); 2244 break; 2245 } 2246 g.frags = i; 2247 break; 2248 2249 case 'f': 2250 for (fn = func; fn->key; fn++) { 2251 if (!strcmp(fn->key, optarg)) 2252 break; 2253 } 2254 if (fn->key) { 2255 g.td_body = fn->f; 2256 g.td_type = fn->ty; 2257 } else { 2258 D("unrecognised function %s", optarg); 2259 } 2260 break; 2261 2262 case 'o': /* data generation options */ 2263 g.options = atoi(optarg); 2264 break; 2265 2266 case 'a': /* force affinity */ 2267 g.affinity = atoi(optarg); 2268 break; 2269 2270 case 'i': /* interface */ 2271 /* a prefix of tap: netmap: or pcap: forces the mode. 2272 * otherwise we guess 2273 */ 2274 D("interface is %s", optarg); 2275 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 2276 D("ifname too long %s", optarg); 2277 break; 2278 } 2279 strcpy(g.ifname, optarg); 2280 if (!strcmp(optarg, "null")) { 2281 g.dev_type = DEV_NETMAP; 2282 g.dummy_send = 1; 2283 } else if (!strncmp(optarg, "tap:", 4)) { 2284 g.dev_type = DEV_TAP; 2285 strcpy(g.ifname, optarg + 4); 2286 } else if (!strncmp(optarg, "pcap:", 5)) { 2287 g.dev_type = DEV_PCAP; 2288 strcpy(g.ifname, optarg + 5); 2289 } else if (!strncmp(optarg, "netmap:", 7) || 2290 !strncmp(optarg, "vale", 4)) { 2291 g.dev_type = DEV_NETMAP; 2292 } else if (!strncmp(optarg, "tap", 3)) { 2293 g.dev_type = DEV_TAP; 2294 } else { /* prepend netmap: */ 2295 g.dev_type = DEV_NETMAP; 2296 sprintf(g.ifname, "netmap:%s", optarg); 2297 } 2298 break; 2299 2300 case 'I': 2301 g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 2302 break; 2303 2304 case 'l': /* pkt_size */ 2305 g.pkt_size = atoi(optarg); 2306 break; 2307 2308 case 'd': 2309 g.dst_ip.name = optarg; 2310 break; 2311 2312 case 's': 2313 g.src_ip.name = optarg; 2314 break; 2315 2316 case 'T': /* report interval */ 2317 g.report_interval = atoi(optarg); 2318 break; 2319 2320 case 'w': 2321 wait_link = atoi(optarg); 2322 break; 2323 2324 case 'W': /* XXX changed default */ 2325 g.forever = 0; /* do not exit rx even with no traffic */ 2326 break; 2327 2328 case 'b': /* burst */ 2329 g.burst = atoi(optarg); 2330 break; 2331 case 'c': 2332 g.cpus = atoi(optarg); 2333 break; 2334 case 'p': 2335 g.nthreads = atoi(optarg); 2336 break; 2337 2338 case 'D': /* destination mac */ 2339 g.dst_mac.name = optarg; 2340 break; 2341 2342 case 'S': /* source mac */ 2343 g.src_mac.name = optarg; 2344 break; 2345 case 'v': 2346 verbose++; 2347 break; 2348 case 'R': 2349 g.tx_rate = atoi(optarg); 2350 break; 2351 case 'X': 2352 g.options |= OPT_DUMP; 2353 break; 2354 case 'C': 2355 g.nmr_config = strdup(optarg); 2356 break; 2357 case 'H': 2358 g.virt_header = atoi(optarg); 2359 break; 2360 case 'e': /* extra bufs */ 2361 g.extra_bufs = atoi(optarg); 2362 break; 2363 case 'E': 2364 g.extra_pipes = atoi(optarg); 2365 break; 2366 case 'P': 2367 g.packet_file = strdup(optarg); 2368 break; 2369 case 'm': 2370 /* ignored */ 2371 break; 2372 case 'r': 2373 g.options |= OPT_RUBBISH; 2374 break; 2375 case 'z': 2376 g.options |= OPT_RANDOM_SRC; 2377 break; 2378 case 'Z': 2379 g.options |= OPT_RANDOM_DST; 2380 break; 2381 case 'A': 2382 g.options |= OPT_PPS_STATS; 2383 break; 2384 } 2385 } 2386 2387 if (strlen(g.ifname) <=0 ) { 2388 D("missing ifname"); 2389 usage(); 2390 } 2391 2392 g.system_cpus = i = system_ncpus(); 2393 if (g.cpus < 0 || g.cpus > i) { 2394 D("%d cpus is too high, have only %d cpus", g.cpus, i); 2395 usage(); 2396 } 2397 D("running on %d cpus (have %d)", g.cpus, i); 2398 if (g.cpus == 0) 2399 g.cpus = i; 2400 2401 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 2402 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 2403 usage(); 2404 } 2405 2406 if (g.src_mac.name == NULL) { 2407 static char mybuf[20] = "00:00:00:00:00:00"; 2408 /* retrieve source mac address. */ 2409 if (source_hwaddr(g.ifname, mybuf) == -1) { 2410 D("Unable to retrieve source mac"); 2411 // continue, fail later 2412 } 2413 g.src_mac.name = mybuf; 2414 } 2415 /* extract address ranges */ 2416 extract_ip_range(&g.src_ip); 2417 extract_ip_range(&g.dst_ip); 2418 extract_mac_range(&g.src_mac); 2419 extract_mac_range(&g.dst_mac); 2420 2421 if (g.src_ip.start != g.src_ip.end || 2422 g.src_ip.port0 != g.src_ip.port1 || 2423 g.dst_ip.start != g.dst_ip.end || 2424 g.dst_ip.port0 != g.dst_ip.port1) 2425 g.options |= OPT_COPY; 2426 2427 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 2428 && g.virt_header != VIRT_HDR_2) { 2429 D("bad virtio-net-header length"); 2430 usage(); 2431 } 2432 2433 if (g.dev_type == DEV_TAP) { 2434 D("want to use tap %s", g.ifname); 2435 g.main_fd = tap_alloc(g.ifname); 2436 if (g.main_fd < 0) { 2437 D("cannot open tap %s", g.ifname); 2438 usage(); 2439 } 2440 #ifndef NO_PCAP 2441 } else if (g.dev_type == DEV_PCAP) { 2442 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 2443 2444 pcap_errbuf[0] = '\0'; // init the buffer 2445 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 2446 if (g.p == NULL) { 2447 D("cannot open pcap on %s", g.ifname); 2448 usage(); 2449 } 2450 g.main_fd = pcap_fileno(g.p); 2451 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 2452 #endif /* !NO_PCAP */ 2453 } else if (g.dummy_send) { /* but DEV_NETMAP */ 2454 D("using a dummy send routine"); 2455 } else { 2456 struct nmreq base_nmd; 2457 2458 bzero(&base_nmd, sizeof(base_nmd)); 2459 2460 parse_nmr_config(g.nmr_config, &base_nmd); 2461 if (g.extra_bufs) { 2462 base_nmd.nr_arg3 = g.extra_bufs; 2463 } 2464 if (g.extra_pipes) { 2465 base_nmd.nr_arg1 = g.extra_pipes; 2466 } 2467 2468 base_nmd.nr_flags |= NR_ACCEPT_VNET_HDR; 2469 2470 /* 2471 * Open the netmap device using nm_open(). 2472 * 2473 * protocol stack and may cause a reset of the card, 2474 * which in turn may take some time for the PHY to 2475 * reconfigure. We do the open here to have time to reset. 2476 */ 2477 g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL); 2478 if (g.nmd == NULL) { 2479 D("Unable to open %s: %s", g.ifname, strerror(errno)); 2480 goto out; 2481 } 2482 2483 if (g.nthreads > 1) { 2484 struct nm_desc saved_desc = *g.nmd; 2485 saved_desc.self = &saved_desc; 2486 saved_desc.mem = NULL; 2487 nm_close(g.nmd); 2488 saved_desc.req.nr_flags &= ~NR_REG_MASK; 2489 saved_desc.req.nr_flags |= NR_REG_ONE_NIC; 2490 saved_desc.req.nr_ringid = 0; 2491 g.nmd = nm_open(g.ifname, &base_nmd, NM_OPEN_IFNAME, &saved_desc); 2492 if (g.nmd == NULL) { 2493 D("Unable to open %s: %s", g.ifname, strerror(errno)); 2494 goto out; 2495 } 2496 } 2497 g.main_fd = g.nmd->fd; 2498 D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); 2499 2500 if (g.virt_header) { 2501 /* Set the virtio-net header length, since the user asked 2502 * for it explicitely. */ 2503 set_vnet_hdr_len(&g); 2504 } else { 2505 /* Check whether the netmap port we opened requires us to send 2506 * and receive frames with virtio-net header. */ 2507 get_vnet_hdr_len(&g); 2508 } 2509 2510 /* get num of queues in tx or rx */ 2511 if (g.td_type == TD_TYPE_SENDER) 2512 devqueues = g.nmd->req.nr_tx_rings; 2513 else 2514 devqueues = g.nmd->req.nr_rx_rings; 2515 2516 /* validate provided nthreads. */ 2517 if (g.nthreads < 1 || g.nthreads > devqueues) { 2518 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 2519 // continue, fail later 2520 } 2521 2522 if (verbose) { 2523 struct netmap_if *nifp = g.nmd->nifp; 2524 struct nmreq *req = &g.nmd->req; 2525 2526 D("nifp at offset %d, %d tx %d rx region %d", 2527 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 2528 req->nr_arg2); 2529 for (i = 0; i <= req->nr_tx_rings; i++) { 2530 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 2531 D(" TX%d at 0x%p slots %d", i, 2532 (void *)((char *)ring - (char *)nifp), ring->num_slots); 2533 } 2534 for (i = 0; i <= req->nr_rx_rings; i++) { 2535 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 2536 D(" RX%d at 0x%p slots %d", i, 2537 (void *)((char *)ring - (char *)nifp), ring->num_slots); 2538 } 2539 } 2540 2541 /* Print some debug information. */ 2542 fprintf(stdout, 2543 "%s %s: %d queues, %d threads and %d cpus.\n", 2544 (g.td_type == TD_TYPE_SENDER) ? "Sending on" : 2545 ((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" : 2546 "Working on"), 2547 g.ifname, 2548 devqueues, 2549 g.nthreads, 2550 g.cpus); 2551 if (g.td_type == TD_TYPE_SENDER) { 2552 fprintf(stdout, "%s -> %s (%s -> %s)\n", 2553 g.src_ip.name, g.dst_ip.name, 2554 g.src_mac.name, g.dst_mac.name); 2555 } 2556 2557 out: 2558 /* Exit if something went wrong. */ 2559 if (g.main_fd < 0) { 2560 D("aborting"); 2561 usage(); 2562 } 2563 } 2564 2565 2566 if (g.options) { 2567 D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n", 2568 g.options & OPT_PREFETCH ? " prefetch" : "", 2569 g.options & OPT_ACCESS ? " access" : "", 2570 g.options & OPT_MEMCPY ? " memcpy" : "", 2571 g.options & OPT_INDIRECT ? " indirect" : "", 2572 g.options & OPT_COPY ? " copy" : "", 2573 g.options & OPT_RUBBISH ? " rubbish " : ""); 2574 } 2575 2576 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 2577 if (g.tx_rate > 0) { 2578 /* try to have at least something every second, 2579 * reducing the burst size to some 0.01s worth of data 2580 * (but no less than one full set of fragments) 2581 */ 2582 uint64_t x; 2583 int lim = (g.tx_rate)/300; 2584 if (g.burst > lim) 2585 g.burst = lim; 2586 if (g.burst < g.frags) 2587 g.burst = g.frags; 2588 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 2589 g.tx_period.tv_nsec = x; 2590 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 2591 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 2592 } 2593 if (g.td_type == TD_TYPE_SENDER) 2594 D("Sending %d packets every %ld.%09ld s", 2595 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 2596 /* Wait for PHY reset. */ 2597 D("Wait %d secs for phy reset", wait_link); 2598 sleep(wait_link); 2599 D("Ready..."); 2600 2601 /* Install ^C handler. */ 2602 global_nthreads = g.nthreads; 2603 sigemptyset(&ss); 2604 sigaddset(&ss, SIGINT); 2605 /* block SIGINT now, so that all created threads will inherit the mask */ 2606 if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) { 2607 D("failed to block SIGINT: %s", strerror(errno)); 2608 } 2609 start_threads(&g); 2610 /* Install the handler and re-enable SIGINT for the main thread */ 2611 sa.sa_handler = sigint_h; 2612 if (sigaction(SIGINT, &sa, NULL) < 0) { 2613 D("failed to install ^C handler: %s", strerror(errno)); 2614 } 2615 2616 if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) { 2617 D("failed to re-enable SIGINT: %s", strerror(errno)); 2618 } 2619 main_thread(&g); 2620 free(targs); 2621 return 0; 2622 } 2623 2624 /* end of file */ 2625