1 /* 2 * Copyright (c) 2008 Damien Miller <djm@mindrot.org> 3 * Copyright (c) 2011 Christiano F. Haesbaert <haesbaert@haesbaert.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/time.h> 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/resource.h> 23 #include <sys/queue.h> 24 25 #include <net/route.h> 26 27 #include <netinet/in.h> 28 #include <netinet/in_systm.h> 29 #include <netinet/ip.h> 30 #include <netinet/tcp.h> 31 #include <netinet/tcp_timer.h> 32 #include <netinet/tcp_fsm.h> 33 #include <netinet/in_pcb.h> 34 #include <netinet/tcp_var.h> 35 36 #include <arpa/inet.h> 37 38 #include <unistd.h> 39 #include <limits.h> 40 #include <stdlib.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <errno.h> 44 #include <event.h> 45 #include <netdb.h> 46 #include <signal.h> 47 #include <err.h> 48 #include <fcntl.h> 49 #include <poll.h> 50 51 #include <kvm.h> 52 #include <nlist.h> 53 54 #define DEFAULT_PORT "12345" 55 #define DEFAULT_STATS_INTERVAL 1000 /* ms */ 56 #define DEFAULT_BUF (256 * 1024) 57 #define DEFAULT_UDP_PKT (1500 - 28) /* TODO don't hardcode this */ 58 #define TCP_MODE !ptb->uflag 59 #define UDP_MODE ptb->uflag 60 #define MAX_FD 1024 61 62 /* Our tcpbench globals */ 63 struct { 64 u_int Vflag; /* rtableid */ 65 int Sflag; /* Socket buffer size (tcp mode) */ 66 u_int rflag; /* Report rate (ms) */ 67 int sflag; /* True if server */ 68 int vflag; /* Verbose */ 69 int uflag; /* UDP mode */ 70 kvm_t *kvmh; /* Kvm handler */ 71 char **kvars; /* Kvm enabled vars */ 72 u_long ktcbtab; /* Ktcb */ 73 char *dummybuf; /* IO buffer */ 74 size_t dummybuf_len; /* IO buffer len */ 75 } tcpbench, *ptb; 76 77 /* stats for a single tcp connection, udp uses only one */ 78 struct statctx { 79 TAILQ_ENTRY(statctx) entry; 80 struct timeval t_start, t_last; 81 unsigned long long bytes; 82 int fd; 83 char *buf; 84 size_t buflen; 85 struct event ev; 86 /* TCP only */ 87 u_long tcp_tcbaddr; 88 /* UDP only */ 89 u_long udp_slice_pkts; 90 }; 91 92 static void signal_handler(int, short, void *); 93 static void saddr_ntop(const struct sockaddr *, socklen_t, char *, size_t); 94 static void drop_gid(void); 95 static void set_slice_timer(int); 96 static void print_tcp_header(void); 97 static void kget(u_long, void *, size_t); 98 static u_long kfind_tcb(int); 99 static void kupdate_stats(u_long, struct inpcb *, struct tcpcb *, 100 struct socket *); 101 static void list_kvars(void); 102 static void check_kvar(const char *); 103 static char ** check_prepare_kvars(char *); 104 static void stats_prepare(struct statctx *); 105 static void tcp_stats_display(unsigned long long, long double, float, 106 struct statctx *, struct inpcb *, struct tcpcb *, struct socket *); 107 static void tcp_process_slice(int, short, void *); 108 static void tcp_server_handle_sc(int, short, void *); 109 static void tcp_server_accept(int, short, void *); 110 static void server_init(struct addrinfo *, struct statctx *); 111 static void client_handle_sc(int, short, void *); 112 static void client_init(struct addrinfo *, int, struct statctx *); 113 static int clock_gettime_tv(clockid_t, struct timeval *); 114 static void udp_server_handle_sc(int, short, void *); 115 static void udp_process_slice(int, short, void *); 116 117 /* 118 * We account the mainstats here, that is the stats 119 * for all connections, all variables starting with slice 120 * are used to account information for the timeslice 121 * between each output. Peak variables record the highest 122 * between all slices so far. 123 */ 124 static struct { 125 unsigned long long slice_bytes; /* bytes for last slice */ 126 long double peak_mbps; /* peak mbps so far */ 127 int nconns; /* connected clients */ 128 struct event timer; /* process timer */ 129 } mainstats; 130 131 /* When adding variables, also add to tcp_stats_display() */ 132 static const char *allowed_kvars[] = { 133 "inpcb.inp_flags", 134 "sockb.so_rcv.sb_cc", 135 "sockb.so_rcv.sb_wat", 136 "sockb.so_rcv.sb_hiwat", 137 "sockb.so_snd.sb_cc", 138 "sockb.so_snd.sb_wat", 139 "sockb.so_snd.sb_hiwat", 140 "tcpcb.snd_una", 141 "tcpcb.snd_nxt", 142 "tcpcb.snd_wl1", 143 "tcpcb.snd_wl2", 144 "tcpcb.snd_wnd", 145 "tcpcb.rcv_wnd", 146 "tcpcb.rcv_nxt", 147 "tcpcb.rcv_adv", 148 "tcpcb.snd_max", 149 "tcpcb.snd_cwnd", 150 "tcpcb.snd_ssthresh", 151 "tcpcb.t_rcvtime", 152 "tcpcb.t_rtttime", 153 "tcpcb.t_rtseq", 154 "tcpcb.t_srtt", 155 "tcpcb.t_rttvar", 156 "tcpcb.t_rttmin", 157 "tcpcb.max_sndwnd", 158 "tcpcb.snd_scale", 159 "tcpcb.rcv_scale", 160 "tcpcb.last_ack_sent", 161 "tcpcb.rfbuf_cnt", 162 "tcpcb.rfbuf_ts", 163 "tcpcb.ts_recent_age", 164 "tcpcb.ts_recent", 165 NULL 166 }; 167 168 TAILQ_HEAD(, statctx) sc_queue; 169 170 static void __dead 171 usage(void) 172 { 173 fprintf(stderr, 174 "usage: tcpbench -l\n" 175 " tcpbench [-uv] [-B buf] [-k kvars] [-n connections] [-p port]\n" 176 " [-r interval] [-S space] [-V rtable] hostname\n" 177 " tcpbench -s [-uv] [-B buf] [-k kvars] [-p port]\n" 178 " [-r interval] [-S space] [-V rtable]\n"); 179 exit(1); 180 } 181 182 static void 183 signal_handler(int sig, short event, void *bula) 184 { 185 /* 186 * signal handler rules don't apply, libevent decouples for us 187 */ 188 switch (sig) { 189 case SIGINT: 190 case SIGTERM: 191 case SIGHUP: 192 warnx("Terminated by signal %d", sig); 193 exit(0); 194 break; /* NOTREACHED */ 195 default: 196 errx(1, "unexpected signal %d", sig); 197 break; /* NOTREACHED */ 198 } 199 } 200 201 static void 202 saddr_ntop(const struct sockaddr *addr, socklen_t alen, char *buf, size_t len) 203 { 204 char hbuf[NI_MAXHOST], pbuf[NI_MAXSERV]; 205 int herr; 206 207 if ((herr = getnameinfo(addr, alen, hbuf, sizeof(hbuf), 208 pbuf, sizeof(pbuf), NI_NUMERICHOST|NI_NUMERICSERV)) != 0) { 209 if (herr == EAI_SYSTEM) 210 err(1, "getnameinfo"); 211 else 212 errx(1, "getnameinfo: %s", gai_strerror(herr)); 213 } 214 snprintf(buf, len, "[%s]:%s", hbuf, pbuf); 215 } 216 217 static void 218 drop_gid(void) 219 { 220 gid_t gid; 221 222 gid = getgid(); 223 if (setresgid(gid, gid, gid) == -1) 224 err(1, "setresgid"); 225 } 226 227 static void 228 set_slice_timer(int on) 229 { 230 struct timeval tv; 231 232 if (ptb->rflag == 0) 233 return; 234 235 if (on) { 236 if (evtimer_pending(&mainstats.timer, NULL)) 237 return; 238 timerclear(&tv); 239 /* XXX Is there a better way to do this ? */ 240 tv.tv_sec = ptb->rflag / 1000; 241 tv.tv_usec = (ptb->rflag % 1000) * 1000; 242 243 evtimer_add(&mainstats.timer, &tv); 244 } else if (evtimer_pending(&mainstats.timer, NULL)) 245 evtimer_del(&mainstats.timer); 246 } 247 248 static int 249 clock_gettime_tv(clockid_t clock_id, struct timeval *tv) 250 { 251 struct timespec ts; 252 253 if (clock_gettime(clock_id, &ts) == -1) 254 return (-1); 255 256 TIMESPEC_TO_TIMEVAL(tv, &ts); 257 258 return (0); 259 } 260 261 static void 262 print_tcp_header(void) 263 { 264 char **kv; 265 266 printf("%12s %14s %12s %8s ", "elapsed_ms", "bytes", "mbps", 267 "bwidth"); 268 for (kv = ptb->kvars; ptb->kvars != NULL && *kv != NULL; kv++) 269 printf("%s%s", kv != ptb->kvars ? "," : "", *kv); 270 printf("\n"); 271 } 272 273 static void 274 kget(u_long addr, void *buf, size_t size) 275 { 276 if (kvm_read(ptb->kvmh, addr, buf, size) != (ssize_t)size) 277 errx(1, "kvm_read: %s", kvm_geterr(ptb->kvmh)); 278 } 279 280 static u_long 281 kfind_tcb(int sock) 282 { 283 struct inpcbtable tcbtab; 284 struct inpcb *head, *next, *prev; 285 struct inpcb inpcb; 286 struct tcpcb tcpcb; 287 288 struct sockaddr_storage me, them; 289 socklen_t melen, themlen; 290 struct sockaddr_in *in4; 291 struct sockaddr_in6 *in6; 292 char tmp1[64], tmp2[64]; 293 int nretry; 294 295 nretry = 10; 296 melen = themlen = sizeof(struct sockaddr_storage); 297 if (getsockname(sock, (struct sockaddr *)&me, &melen) == -1) 298 err(1, "getsockname"); 299 if (getpeername(sock, (struct sockaddr *)&them, &themlen) == -1) 300 err(1, "getpeername"); 301 if (me.ss_family != them.ss_family) 302 errx(1, "%s: me.ss_family != them.ss_family", __func__); 303 if (me.ss_family != AF_INET && me.ss_family != AF_INET6) 304 errx(1, "%s: unknown socket family", __func__); 305 if (ptb->vflag >= 2) { 306 saddr_ntop((struct sockaddr *)&me, me.ss_len, 307 tmp1, sizeof(tmp1)); 308 saddr_ntop((struct sockaddr *)&them, them.ss_len, 309 tmp2, sizeof(tmp2)); 310 fprintf(stderr, "Our socket local %s remote %s\n", tmp1, tmp2); 311 } 312 if (ptb->vflag >= 2) 313 fprintf(stderr, "Using PCB table at %lu\n", ptb->ktcbtab); 314 retry: 315 kget(ptb->ktcbtab, &tcbtab, sizeof(tcbtab)); 316 prev = head = (struct inpcb *)&CIRCLEQ_FIRST( 317 &((struct inpcbtable *)ptb->ktcbtab)->inpt_queue); 318 next = CIRCLEQ_FIRST(&tcbtab.inpt_queue); 319 320 if (ptb->vflag >= 2) 321 fprintf(stderr, "PCB head at %p\n", head); 322 while (next != head) { 323 if (ptb->vflag >= 2) 324 fprintf(stderr, "Checking PCB %p\n", next); 325 kget((u_long)next, &inpcb, sizeof(inpcb)); 326 if (CIRCLEQ_PREV(&inpcb, inp_queue) != prev) { 327 if (nretry--) { 328 warnx("pcb prev pointer insane"); 329 goto retry; 330 } 331 else 332 errx(1, "pcb prev pointer insane," 333 " all attempts exausted"); 334 } 335 prev = next; 336 next = CIRCLEQ_NEXT(&inpcb, inp_queue); 337 338 if (me.ss_family == AF_INET) { 339 if ((inpcb.inp_flags & INP_IPV6) != 0) { 340 if (ptb->vflag >= 2) 341 fprintf(stderr, "Skip: INP_IPV6"); 342 continue; 343 } 344 if (ptb->vflag >= 2) { 345 inet_ntop(AF_INET, &inpcb.inp_laddr, 346 tmp1, sizeof(tmp1)); 347 inet_ntop(AF_INET, &inpcb.inp_faddr, 348 tmp2, sizeof(tmp2)); 349 fprintf(stderr, "PCB %p local: [%s]:%d " 350 "remote: [%s]:%d\n", prev, 351 tmp1, inpcb.inp_lport, 352 tmp2, inpcb.inp_fport); 353 } 354 in4 = (struct sockaddr_in *)&me; 355 if (memcmp(&in4->sin_addr, &inpcb.inp_laddr, 356 sizeof(struct in_addr)) != 0 || 357 in4->sin_port != inpcb.inp_lport) 358 continue; 359 in4 = (struct sockaddr_in *)&them; 360 if (memcmp(&in4->sin_addr, &inpcb.inp_faddr, 361 sizeof(struct in_addr)) != 0 || 362 in4->sin_port != inpcb.inp_fport) 363 continue; 364 } else { 365 if ((inpcb.inp_flags & INP_IPV6) == 0) 366 continue; 367 if (ptb->vflag >= 2) { 368 inet_ntop(AF_INET6, &inpcb.inp_laddr6, 369 tmp1, sizeof(tmp1)); 370 inet_ntop(AF_INET6, &inpcb.inp_faddr6, 371 tmp2, sizeof(tmp2)); 372 fprintf(stderr, "PCB %p local: [%s]:%d " 373 "remote: [%s]:%d\n", prev, 374 tmp1, inpcb.inp_lport, 375 tmp2, inpcb.inp_fport); 376 } 377 in6 = (struct sockaddr_in6 *)&me; 378 if (memcmp(&in6->sin6_addr, &inpcb.inp_laddr6, 379 sizeof(struct in6_addr)) != 0 || 380 in6->sin6_port != inpcb.inp_lport) 381 continue; 382 in6 = (struct sockaddr_in6 *)&them; 383 if (memcmp(&in6->sin6_addr, &inpcb.inp_faddr6, 384 sizeof(struct in6_addr)) != 0 || 385 in6->sin6_port != inpcb.inp_fport) 386 continue; 387 } 388 kget((u_long)inpcb.inp_ppcb, &tcpcb, sizeof(tcpcb)); 389 if (tcpcb.t_state != TCPS_ESTABLISHED) { 390 if (ptb->vflag >= 2) 391 fprintf(stderr, "Not established\n"); 392 continue; 393 } 394 if (ptb->vflag >= 2) 395 fprintf(stderr, "Found PCB at %p\n", prev); 396 return ((u_long)prev); 397 } 398 399 errx(1, "No matching PCB found"); 400 } 401 402 static void 403 kupdate_stats(u_long tcbaddr, struct inpcb *inpcb, 404 struct tcpcb *tcpcb, struct socket *sockb) 405 { 406 kget(tcbaddr, inpcb, sizeof(*inpcb)); 407 kget((u_long)inpcb->inp_ppcb, tcpcb, sizeof(*tcpcb)); 408 kget((u_long)inpcb->inp_socket, sockb, sizeof(*sockb)); 409 } 410 411 static void 412 check_kvar(const char *var) 413 { 414 u_int i; 415 416 for (i = 0; allowed_kvars[i] != NULL; i++) 417 if (strcmp(allowed_kvars[i], var) == 0) 418 return; 419 errx(1, "Unrecognised kvar: %s", var); 420 } 421 422 static void 423 list_kvars(void) 424 { 425 u_int i; 426 427 fprintf(stderr, "Supported kernel variables:\n"); 428 for (i = 0; allowed_kvars[i] != NULL; i++) 429 fprintf(stderr, "\t%s\n", allowed_kvars[i]); 430 } 431 432 static char ** 433 check_prepare_kvars(char *list) 434 { 435 char *item, **ret = NULL; 436 u_int n = 0; 437 438 while ((item = strsep(&list, ", \t\n")) != NULL) { 439 check_kvar(item); 440 if ((ret = realloc(ret, sizeof(*ret) * (++n + 1))) == NULL) 441 errx(1, "realloc(kvars)"); 442 if ((ret[n - 1] = strdup(item)) == NULL) 443 errx(1, "strdup"); 444 ret[n] = NULL; 445 } 446 return (ret); 447 } 448 449 static void 450 stats_prepare(struct statctx *sc) 451 { 452 sc->buf = ptb->dummybuf; 453 sc->buflen = ptb->dummybuf_len; 454 455 if (ptb->kvars) 456 sc->tcp_tcbaddr = kfind_tcb(sc->fd); 457 if (clock_gettime_tv(CLOCK_MONOTONIC, &sc->t_start) == -1) 458 err(1, "clock_gettime_tv"); 459 sc->t_last = sc->t_start; 460 461 } 462 463 static void 464 tcp_stats_display(unsigned long long total_elapsed, long double mbps, 465 float bwperc, struct statctx *sc, struct inpcb *inpcb, 466 struct tcpcb *tcpcb, struct socket *sockb) 467 { 468 int j; 469 470 printf("%12llu %14llu %12.3Lf %7.2f%% ", total_elapsed, sc->bytes, 471 mbps, bwperc); 472 473 if (ptb->kvars != NULL) { 474 kupdate_stats(sc->tcp_tcbaddr, inpcb, tcpcb, 475 sockb); 476 477 for (j = 0; ptb->kvars[j] != NULL; j++) { 478 #define S(a) #a 479 #define P(b, v, f) \ 480 if (strcmp(ptb->kvars[j], S(b.v)) == 0) { \ 481 printf("%s"f, j > 0 ? "," : "", b->v); \ 482 continue; \ 483 } 484 P(inpcb, inp_flags, "0x%08x") 485 P(sockb, so_rcv.sb_cc, "%lu") 486 P(sockb, so_rcv.sb_wat, "%lu") 487 P(sockb, so_rcv.sb_hiwat, "%lu") 488 P(sockb, so_snd.sb_cc, "%lu") 489 P(sockb, so_snd.sb_wat, "%lu") 490 P(sockb, so_snd.sb_hiwat, "%lu") 491 P(tcpcb, snd_una, "%u") 492 P(tcpcb, snd_nxt, "%u") 493 P(tcpcb, snd_wl1, "%u") 494 P(tcpcb, snd_wl2, "%u") 495 P(tcpcb, snd_wnd, "%lu") 496 P(tcpcb, rcv_wnd, "%lu") 497 P(tcpcb, rcv_nxt, "%u") 498 P(tcpcb, rcv_adv, "%u") 499 P(tcpcb, snd_max, "%u") 500 P(tcpcb, snd_cwnd, "%lu") 501 P(tcpcb, snd_ssthresh, "%lu") 502 P(tcpcb, t_rcvtime, "%u") 503 P(tcpcb, t_rtttime, "%u") 504 P(tcpcb, t_rtseq, "%u") 505 P(tcpcb, t_srtt, "%hu") 506 P(tcpcb, t_rttvar, "%hu") 507 P(tcpcb, t_rttmin, "%hu") 508 P(tcpcb, max_sndwnd, "%lu") 509 P(tcpcb, snd_scale, "%u") 510 P(tcpcb, rcv_scale, "%u") 511 P(tcpcb, last_ack_sent, "%u") 512 P(tcpcb, rfbuf_cnt, "%u") 513 P(tcpcb, rfbuf_ts, "%u") 514 P(tcpcb, ts_recent_age, "%u") 515 P(tcpcb, ts_recent, "%u") 516 #undef S 517 #undef P 518 } 519 } 520 printf("\n"); 521 } 522 523 static void 524 tcp_process_slice(int fd, short event, void *bula) 525 { 526 unsigned long long total_elapsed, since_last; 527 long double mbps, slice_mbps = 0; 528 float bwperc; 529 struct statctx *sc; 530 struct timeval t_cur, t_diff; 531 struct inpcb inpcb; 532 struct tcpcb tcpcb; 533 struct socket sockb; 534 535 TAILQ_FOREACH(sc, &sc_queue, entry) { 536 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 537 err(1, "clock_gettime_tv"); 538 if (ptb->kvars != NULL) /* process kernel stats */ 539 kupdate_stats(sc->tcp_tcbaddr, &inpcb, &tcpcb, 540 &sockb); 541 542 timersub(&t_cur, &sc->t_start, &t_diff); 543 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 544 timersub(&t_cur, &sc->t_last, &t_diff); 545 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 546 bwperc = (sc->bytes * 100.0) / mainstats.slice_bytes; 547 mbps = (sc->bytes * 8) / (since_last * 1000.0); 548 slice_mbps += mbps; 549 550 tcp_stats_display(total_elapsed, mbps, bwperc, sc, 551 &inpcb, &tcpcb, &sockb); 552 553 sc->t_last = t_cur; 554 sc->bytes = 0; 555 } 556 557 /* process stats for this slice */ 558 if (slice_mbps > mainstats.peak_mbps) 559 mainstats.peak_mbps = slice_mbps; 560 printf("Conn: %3d Mbps: %12.3Lf Peak Mbps: %12.3Lf Avg Mbps: %12.3Lf\n", 561 mainstats.nconns, slice_mbps, mainstats.peak_mbps, 562 slice_mbps / mainstats.nconns); 563 mainstats.slice_bytes = 0; 564 565 set_slice_timer(mainstats.nconns > 0); 566 } 567 568 static void 569 udp_process_slice(int fd, short event, void *v_sc) 570 { 571 struct statctx *sc = v_sc; 572 unsigned long long total_elapsed, since_last; 573 long double slice_mbps, pps; 574 struct timeval t_cur, t_diff; 575 576 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 577 err(1, "clock_gettime_tv"); 578 /* Calculate pps */ 579 timersub(&t_cur, &sc->t_start, &t_diff); 580 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 581 timersub(&t_cur, &sc->t_last, &t_diff); 582 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 583 slice_mbps = (sc->bytes * 8) / (since_last * 1000.0); 584 pps = (sc->udp_slice_pkts * 1000) / since_last; 585 if (slice_mbps > mainstats.peak_mbps) 586 mainstats.peak_mbps = slice_mbps; 587 printf("Elapsed: %11llu Mbps: %11.3Lf Peak Mbps: %11.3Lf %s PPS: %10.3Lf\n", 588 total_elapsed, slice_mbps, mainstats.peak_mbps, 589 ptb->sflag ? "Rx" : "Tx", pps); 590 591 /* Clean up this slice time */ 592 sc->t_last = t_cur; 593 sc->bytes = 0; 594 sc->udp_slice_pkts = 0; 595 set_slice_timer(1); 596 } 597 598 static void 599 udp_server_handle_sc(int fd, short event, void *v_sc) 600 { 601 ssize_t n; 602 struct statctx *sc = v_sc; 603 604 again: 605 n = read(fd, ptb->dummybuf, ptb->dummybuf_len); 606 if (n == 0) 607 return; 608 else if (n == -1) { 609 if (errno == EINTR) 610 goto again; 611 else if (errno == EWOULDBLOCK) 612 return; 613 warn("fd %d read error", fd); 614 return; 615 } 616 617 if (ptb->vflag >= 3) 618 fprintf(stderr, "read: %zd bytes\n", n); 619 /* If this was our first packet, start slice timer */ 620 if (mainstats.peak_mbps == 0) 621 set_slice_timer(1); 622 /* Account packet */ 623 sc->udp_slice_pkts++; 624 sc->bytes += n; 625 } 626 627 static void 628 tcp_server_handle_sc(int fd, short event, void *v_sc) 629 { 630 struct statctx *sc = v_sc; 631 ssize_t n; 632 633 again: 634 n = read(sc->fd, sc->buf, sc->buflen); 635 if (n == -1) { 636 if (errno == EINTR) 637 goto again; 638 else if (errno == EWOULDBLOCK) 639 return; 640 warn("fd %d read error", sc->fd); 641 return; 642 } else if (n == 0) { 643 if (ptb->vflag) 644 fprintf(stderr, "%8d closed by remote end\n", sc->fd); 645 close(sc->fd); 646 TAILQ_REMOVE(&sc_queue, sc, entry); 647 free(sc); 648 mainstats.nconns--; 649 set_slice_timer(mainstats.nconns > 0); 650 return; 651 } 652 if (ptb->vflag >= 3) 653 fprintf(stderr, "read: %zd bytes\n", n); 654 sc->bytes += n; 655 mainstats.slice_bytes += n; 656 } 657 658 static void 659 tcp_server_accept(int fd, short event, void *bula) 660 { 661 int sock, r; 662 struct statctx *sc; 663 struct sockaddr_storage ss; 664 socklen_t sslen; 665 char tmp[128]; 666 667 sslen = sizeof(ss); 668 again: 669 if ((sock = accept(fd, (struct sockaddr *)&ss, &sslen)) == -1) { 670 if (errno == EINTR) 671 goto again; 672 warn("accept"); 673 return; 674 } 675 saddr_ntop((struct sockaddr *)&ss, sslen, 676 tmp, sizeof(tmp)); 677 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 678 err(1, "fcntl(F_GETFL)"); 679 r |= O_NONBLOCK; 680 if (fcntl(sock, F_SETFL, r) == -1) 681 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 682 /* Alloc client structure and register reading callback */ 683 if ((sc = calloc(1, sizeof(*sc))) == NULL) 684 err(1, "calloc"); 685 sc->fd = sock; 686 stats_prepare(sc); 687 event_set(&sc->ev, sc->fd, EV_READ | EV_PERSIST, 688 tcp_server_handle_sc, sc); 689 event_add(&sc->ev, NULL); 690 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 691 mainstats.nconns++; 692 set_slice_timer(mainstats.nconns > 0); 693 if (ptb->vflag) 694 warnx("Accepted connection from %s, fd = %d\n", tmp, sc->fd); 695 } 696 697 static void 698 server_init(struct addrinfo *aitop, struct statctx *udp_sc) 699 { 700 char tmp[128]; 701 int sock, on = 1; 702 struct addrinfo *ai; 703 struct event *ev; 704 nfds_t lnfds; 705 706 if (setpgid(0, 0) == -1) 707 err(1, "setpgid"); 708 709 lnfds = 0; 710 for (ai = aitop; ai != NULL; ai = ai->ai_next) { 711 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, sizeof(tmp)); 712 if (ptb->vflag) 713 fprintf(stderr, "Try to bind to %s\n", tmp); 714 if ((sock = socket(ai->ai_family, ai->ai_socktype, 715 ai->ai_protocol)) == -1) { 716 if (ai->ai_next == NULL) 717 err(1, "socket"); 718 if (ptb->vflag) 719 warn("socket"); 720 continue; 721 } 722 if (ptb->Vflag) { 723 if (setsockopt(sock, SOL_SOCKET, SO_RTABLE, 724 &ptb->Vflag, sizeof(ptb->Vflag)) == -1) { 725 if (errno == ENOPROTOOPT) 726 warn("set rtable"); 727 else 728 err(1, "setsockopt SO_RTABLE"); 729 } 730 } 731 if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 732 &on, sizeof(on)) == -1) 733 warn("reuse port"); 734 if (bind(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 735 if (ai->ai_next == NULL) 736 err(1, "bind"); 737 if (ptb->vflag) 738 warn("bind"); 739 close(sock); 740 continue; 741 } 742 if (ptb->Sflag) { 743 if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, 744 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 745 warn("set receive buffer size"); 746 } 747 if (TCP_MODE) { 748 if (listen(sock, 64) == -1) { 749 if (ai->ai_next == NULL) 750 err(1, "listen"); 751 if (ptb->vflag) 752 warn("listen"); 753 close(sock); 754 continue; 755 } 756 } 757 if ((ev = calloc(1, sizeof(*ev))) == NULL) 758 err(1, "calloc"); 759 if (UDP_MODE) 760 event_set(ev, sock, EV_READ | EV_PERSIST, 761 udp_server_handle_sc, udp_sc); 762 else 763 event_set(ev, sock, EV_READ | EV_PERSIST, 764 tcp_server_accept, NULL); 765 event_add(ev, NULL); 766 if (ptb->vflag >= 3) 767 fprintf(stderr, "bound to fd %d\n", sock); 768 lnfds++; 769 } 770 freeaddrinfo(aitop); 771 if (lnfds == 0) 772 errx(1, "No working listen addresses found"); 773 } 774 775 static void 776 client_handle_sc(int fd, short event, void *v_sc) 777 { 778 struct statctx *sc = v_sc; 779 ssize_t n; 780 781 again: 782 if ((n = write(sc->fd, sc->buf, sc->buflen)) == -1) { 783 if (errno == EINTR || errno == EAGAIN || 784 (UDP_MODE && errno == ENOBUFS)) 785 goto again; 786 err(1, "write"); 787 } 788 if (TCP_MODE && n == 0) { 789 warnx("Remote end closed connection"); 790 exit(1); 791 } 792 if (ptb->vflag >= 3) 793 warnx("write: %zd bytes\n", n); 794 sc->bytes += n; 795 mainstats.slice_bytes += n; 796 if (UDP_MODE) 797 sc->udp_slice_pkts++; 798 } 799 800 static void 801 client_init(struct addrinfo *aitop, int nconn, struct statctx *udp_sc) 802 { 803 struct statctx *sc; 804 struct addrinfo *ai; 805 char tmp[128]; 806 int i, r, sock; 807 808 sc = udp_sc; 809 for (i = 0; i < nconn; i++) { 810 for (sock = -1, ai = aitop; ai != NULL; ai = ai->ai_next) { 811 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, 812 sizeof(tmp)); 813 if (ptb->vflag && i == 0) 814 fprintf(stderr, "Trying %s\n", tmp); 815 if ((sock = socket(ai->ai_family, ai->ai_socktype, 816 ai->ai_protocol)) == -1) { 817 if (ai->ai_next == NULL) 818 err(1, "socket"); 819 if (ptb->vflag) 820 warn("socket"); 821 continue; 822 } 823 if (ptb->Vflag) { 824 if (setsockopt(sock, SOL_SOCKET, SO_RTABLE, 825 &ptb->Vflag, sizeof(ptb->Vflag)) == -1) { 826 if (errno == ENOPROTOOPT) 827 warn("set rtable"); 828 else 829 err(1, "setsockopt SO_RTABLE"); 830 } 831 } 832 if (ptb->Sflag) { 833 if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, 834 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 835 warn("set TCP send buffer size"); 836 } 837 if (connect(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 838 if (ai->ai_next == NULL) 839 err(1, "connect"); 840 if (ptb->vflag) 841 warn("connect"); 842 close(sock); 843 sock = -1; 844 continue; 845 } 846 break; 847 } 848 if (sock == -1) 849 errx(1, "No host found"); 850 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 851 err(1, "fcntl(F_GETFL)"); 852 r |= O_NONBLOCK; 853 if (fcntl(sock, F_SETFL, r) == -1) 854 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 855 /* Alloc and prepare stats */ 856 if (TCP_MODE) { 857 if ((sc = calloc(1, sizeof(*sc))) == NULL) 858 err(1, "calloc"); 859 } 860 sc->fd = sock; 861 stats_prepare(sc); 862 event_set(&sc->ev, sc->fd, EV_WRITE | EV_PERSIST, 863 client_handle_sc, sc); 864 event_add(&sc->ev, NULL); 865 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 866 mainstats.nconns++; 867 set_slice_timer(mainstats.nconns > 0); 868 if (UDP_MODE) 869 break; 870 } 871 freeaddrinfo(aitop); 872 873 if (ptb->vflag && nconn > 1) 874 fprintf(stderr, "%u connections established\n", nconn); 875 } 876 877 int 878 main(int argc, char **argv) 879 { 880 extern int optind; 881 extern char *optarg; 882 883 char kerr[_POSIX2_LINE_MAX], *tmp; 884 struct addrinfo *aitop, hints; 885 const char *errstr; 886 struct rlimit rl; 887 int ch, herr, nconn; 888 struct nlist nl[] = { { "_tcbtable" }, { "" } }; 889 const char *host = NULL, *port = DEFAULT_PORT; 890 struct event ev_sigint, ev_sigterm, ev_sighup; 891 struct statctx *udp_sc = NULL; 892 893 /* Init world */ 894 ptb = &tcpbench; 895 ptb->dummybuf_len = 0; 896 ptb->Sflag = ptb->sflag = ptb->vflag = ptb->Vflag = 0; 897 ptb->kvmh = NULL; 898 ptb->kvars = NULL; 899 ptb->rflag = DEFAULT_STATS_INTERVAL; 900 nconn = 1; 901 902 while ((ch = getopt(argc, argv, "B:hlk:n:p:r:sS:uvV:")) != -1) { 903 switch (ch) { 904 case 'l': 905 list_kvars(); 906 exit(0); 907 case 'k': 908 if ((tmp = strdup(optarg)) == NULL) 909 errx(1, "strdup"); 910 ptb->kvars = check_prepare_kvars(tmp); 911 free(tmp); 912 break; 913 case 'r': 914 ptb->rflag = strtonum(optarg, 0, 60 * 60 * 24 * 1000, 915 &errstr); 916 if (errstr != NULL) 917 errx(1, "statistics interval is %s: %s", 918 errstr, optarg); 919 break; 920 case 'p': 921 port = optarg; 922 break; 923 case 's': 924 ptb->sflag = 1; 925 break; 926 case 'S': 927 ptb->Sflag = strtonum(optarg, 0, 1024*1024*1024, 928 &errstr); 929 if (errstr != NULL) 930 errx(1, "receive space interval is %s: %s", 931 errstr, optarg); 932 break; 933 case 'B': 934 ptb->dummybuf_len = strtonum(optarg, 0, 1024*1024*1024, 935 &errstr); 936 if (errstr != NULL) 937 errx(1, "read/write buffer size is %s: %s", 938 errstr, optarg); 939 break; 940 case 'v': 941 ptb->vflag++; 942 break; 943 case 'V': 944 ptb->Vflag = (unsigned int)strtonum(optarg, 0, 945 RT_TABLEID_MAX, &errstr); 946 if (errstr) 947 errx(1, "rtable value is %s: %s", 948 errstr, optarg); 949 break; 950 case 'n': 951 nconn = strtonum(optarg, 0, 65535, &errstr); 952 if (errstr != NULL) 953 errx(1, "number of connections is %s: %s", 954 errstr, optarg); 955 break; 956 case 'u': 957 ptb->uflag = 1; 958 break; 959 case 'h': 960 default: 961 usage(); 962 } 963 } 964 965 argv += optind; 966 argc -= optind; 967 if ((argc != (ptb->sflag ? 0 : 1)) || 968 (UDP_MODE && (ptb->kvars || nconn != 1))) 969 usage(); 970 971 if (!ptb->sflag) 972 host = argv[0]; 973 /* 974 * Rationale, 975 * If TCP, use a big buffer with big reads/writes. 976 * If UDP, use a big buffer in server and a buffer the size of a 977 * ethernet packet. 978 */ 979 if (!ptb->dummybuf_len) { 980 if (ptb->sflag || TCP_MODE) 981 ptb->dummybuf_len = DEFAULT_BUF; 982 else 983 ptb->dummybuf_len = DEFAULT_UDP_PKT; 984 } 985 986 bzero(&hints, sizeof(hints)); 987 if (UDP_MODE) 988 hints.ai_socktype = SOCK_DGRAM; 989 else 990 hints.ai_socktype = SOCK_STREAM; 991 if (ptb->sflag) 992 hints.ai_flags = AI_PASSIVE; 993 if ((herr = getaddrinfo(host, port, &hints, &aitop)) != 0) { 994 if (herr == EAI_SYSTEM) 995 err(1, "getaddrinfo"); 996 else 997 errx(1, "getaddrinfo: %s", gai_strerror(herr)); 998 } 999 if (ptb->kvars) { 1000 if ((ptb->kvmh = kvm_openfiles(NULL, NULL, NULL, 1001 O_RDONLY, kerr)) == NULL) 1002 errx(1, "kvm_open: %s", kerr); 1003 drop_gid(); 1004 if (kvm_nlist(ptb->kvmh, nl) < 0 || nl[0].n_type == 0) 1005 errx(1, "kvm: no namelist"); 1006 ptb->ktcbtab = nl[0].n_value; 1007 } else 1008 drop_gid(); 1009 1010 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1011 err(1, "getrlimit"); 1012 if (rl.rlim_cur < MAX_FD) 1013 rl.rlim_cur = MAX_FD; 1014 if (setrlimit(RLIMIT_NOFILE, &rl)) 1015 err(1, "setrlimit"); 1016 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1017 err(1, "getrlimit"); 1018 1019 /* Init world */ 1020 TAILQ_INIT(&sc_queue); 1021 if ((ptb->dummybuf = malloc(ptb->dummybuf_len)) == NULL) 1022 err(1, "malloc"); 1023 arc4random_buf(ptb->dummybuf, ptb->dummybuf_len); 1024 1025 if (UDP_MODE) { 1026 if ((udp_sc = calloc(1, sizeof(*udp_sc))) == NULL) 1027 err(1, "calloc"); 1028 udp_sc->fd = -1; 1029 stats_prepare(udp_sc); 1030 } 1031 1032 /* Setup libevent and signals */ 1033 event_init(); 1034 signal_set(&ev_sigterm, SIGTERM, signal_handler, NULL); 1035 signal_set(&ev_sighup, SIGHUP, signal_handler, NULL); 1036 signal_set(&ev_sigint, SIGINT, signal_handler, NULL); 1037 signal_add(&ev_sigint, NULL); 1038 signal_add(&ev_sigterm, NULL); 1039 signal_add(&ev_sighup, NULL); 1040 signal(SIGPIPE, SIG_IGN); 1041 1042 if (TCP_MODE) 1043 print_tcp_header(); 1044 1045 if (UDP_MODE) 1046 evtimer_set(&mainstats.timer, udp_process_slice, udp_sc); 1047 else 1048 evtimer_set(&mainstats.timer, tcp_process_slice, NULL); 1049 1050 if (ptb->sflag) { 1051 server_init(aitop, udp_sc); 1052 } else 1053 client_init(aitop, nconn, udp_sc); 1054 1055 /* libevent main loop*/ 1056 event_dispatch(); 1057 1058 return (0); 1059 } 1060