1 /* 2 * Copyright (c) 2008 Damien Miller <djm@mindrot.org> 3 * Copyright (c) 2011 Christiano F. Haesbaert <haesbaert@haesbaert.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/time.h> 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/resource.h> 23 #include <sys/queue.h> 24 25 #include <net/route.h> 26 27 #include <netinet/in.h> 28 #include <netinet/in_systm.h> 29 #include <netinet/ip.h> 30 #include <netinet/tcp.h> 31 #include <netinet/tcp_timer.h> 32 #include <netinet/tcp_fsm.h> 33 #include <netinet/in_pcb.h> 34 #include <netinet/tcp_var.h> 35 36 #include <arpa/inet.h> 37 38 #include <unistd.h> 39 #include <limits.h> 40 #include <stdlib.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <errno.h> 44 #include <event.h> 45 #include <netdb.h> 46 #include <signal.h> 47 #include <err.h> 48 #include <fcntl.h> 49 #include <poll.h> 50 51 #include <kvm.h> 52 #include <nlist.h> 53 54 #define DEFAULT_PORT "12345" 55 #define DEFAULT_STATS_INTERVAL 1000 /* ms */ 56 #define DEFAULT_BUF (256 * 1024) 57 #define DEFAULT_UDP_PKT (1500 - 28) /* TODO don't hardcode this */ 58 #define TCP_MODE !ptb->uflag 59 #define UDP_MODE ptb->uflag 60 #define MAX_FD 1024 61 62 /* Our tcpbench globals */ 63 struct { 64 int Sflag; /* Socket buffer size (tcp mode) */ 65 u_int rflag; /* Report rate (ms) */ 66 int sflag; /* True if server */ 67 int Tflag; /* ToS if != -1 */ 68 int vflag; /* Verbose */ 69 int uflag; /* UDP mode */ 70 kvm_t *kvmh; /* Kvm handler */ 71 char **kvars; /* Kvm enabled vars */ 72 u_long ktcbtab; /* Ktcb */ 73 char *dummybuf; /* IO buffer */ 74 size_t dummybuf_len; /* IO buffer len */ 75 } tcpbench, *ptb; 76 77 struct tcpservsock { 78 struct event ev; 79 struct event evt; 80 int fd; 81 }; 82 83 /* stats for a single tcp connection, udp uses only one */ 84 struct statctx { 85 TAILQ_ENTRY(statctx) entry; 86 struct timeval t_start, t_last; 87 unsigned long long bytes; 88 int fd; 89 char *buf; 90 size_t buflen; 91 struct event ev; 92 /* TCP only */ 93 struct tcpservsock *tcp_ts; 94 u_long tcp_tcbaddr; 95 /* UDP only */ 96 u_long udp_slice_pkts; 97 }; 98 99 static void signal_handler(int, short, void *); 100 static void saddr_ntop(const struct sockaddr *, socklen_t, char *, size_t); 101 static void drop_gid(void); 102 static void set_slice_timer(int); 103 static void print_tcp_header(void); 104 static void kget(u_long, void *, size_t); 105 static u_long kfind_tcb(int); 106 static void kupdate_stats(u_long, struct inpcb *, struct tcpcb *, 107 struct socket *); 108 static void list_kvars(void); 109 static void check_kvar(const char *); 110 static char ** check_prepare_kvars(char *); 111 static void stats_prepare(struct statctx *); 112 static void tcp_stats_display(unsigned long long, long double, float, 113 struct statctx *, struct inpcb *, struct tcpcb *, struct socket *); 114 static void tcp_process_slice(int, short, void *); 115 static void tcp_server_handle_sc(int, short, void *); 116 static void tcp_server_accept(int, short, void *); 117 static void server_init(struct addrinfo *, struct statctx *); 118 static void client_handle_sc(int, short, void *); 119 static void client_init(struct addrinfo *, int, struct statctx *, 120 struct addrinfo *); 121 static int clock_gettime_tv(clockid_t, struct timeval *); 122 static void udp_server_handle_sc(int, short, void *); 123 static void udp_process_slice(int, short, void *); 124 static int map_tos(char *, int *); 125 /* 126 * We account the mainstats here, that is the stats 127 * for all connections, all variables starting with slice 128 * are used to account information for the timeslice 129 * between each output. Peak variables record the highest 130 * between all slices so far. 131 */ 132 static struct { 133 unsigned long long slice_bytes; /* bytes for last slice */ 134 long double peak_mbps; /* peak mbps so far */ 135 int nconns; /* connected clients */ 136 struct event timer; /* process timer */ 137 } mainstats; 138 139 /* When adding variables, also add to tcp_stats_display() */ 140 static const char *allowed_kvars[] = { 141 "inpcb.inp_flags", 142 "sockb.so_rcv.sb_cc", 143 "sockb.so_rcv.sb_hiwat", 144 "sockb.so_rcv.sb_wat", 145 "sockb.so_snd.sb_cc", 146 "sockb.so_snd.sb_hiwat", 147 "sockb.so_snd.sb_wat", 148 "tcpcb.last_ack_sent", 149 "tcpcb.max_sndwnd", 150 "tcpcb.rcv_adv", 151 "tcpcb.rcv_nxt", 152 "tcpcb.rcv_scale", 153 "tcpcb.rcv_wnd", 154 "tcpcb.rfbuf_cnt", 155 "tcpcb.rfbuf_ts", 156 "tcpcb.snd_cwnd", 157 "tcpcb.snd_max", 158 "tcpcb.snd_nxt", 159 "tcpcb.snd_scale", 160 "tcpcb.snd_ssthresh", 161 "tcpcb.snd_una", 162 "tcpcb.snd_wl1", 163 "tcpcb.snd_wl2", 164 "tcpcb.snd_wnd", 165 "tcpcb.t_rcvtime", 166 "tcpcb.t_rtseq", 167 "tcpcb.t_rttmin", 168 "tcpcb.t_rtttime", 169 "tcpcb.t_rttvar", 170 "tcpcb.t_srtt", 171 "tcpcb.ts_recent", 172 "tcpcb.ts_recent_age", 173 NULL 174 }; 175 176 TAILQ_HEAD(, statctx) sc_queue; 177 178 static void __dead 179 usage(void) 180 { 181 fprintf(stderr, 182 "usage: tcpbench -l\n" 183 " tcpbench [-uv] [-B buf] [-b addr] [-k kvars] [-n connections]\n" 184 " [-p port] [-r interval] [-S space] [-T toskeyword]\n" 185 " [-t secs] [-V rtable] hostname\n" 186 " tcpbench -s [-uv] [-B buf] [-k kvars] [-p port]\n" 187 " [-r interval] [-S space] [-T toskeyword] [-V rtable]\n"); 188 exit(1); 189 } 190 191 static void 192 signal_handler(int sig, short event, void *bula) 193 { 194 /* 195 * signal handler rules don't apply, libevent decouples for us 196 */ 197 switch (sig) { 198 case SIGINT: 199 case SIGTERM: 200 case SIGHUP: 201 warnx("Terminated by signal %d", sig); 202 exit(0); 203 break; /* NOTREACHED */ 204 default: 205 errx(1, "unexpected signal %d", sig); 206 break; /* NOTREACHED */ 207 } 208 } 209 210 static void 211 saddr_ntop(const struct sockaddr *addr, socklen_t alen, char *buf, size_t len) 212 { 213 char hbuf[NI_MAXHOST], pbuf[NI_MAXSERV]; 214 int herr; 215 216 if ((herr = getnameinfo(addr, alen, hbuf, sizeof(hbuf), 217 pbuf, sizeof(pbuf), NI_NUMERICHOST|NI_NUMERICSERV)) != 0) { 218 if (herr == EAI_SYSTEM) 219 err(1, "getnameinfo"); 220 else 221 errx(1, "getnameinfo: %s", gai_strerror(herr)); 222 } 223 snprintf(buf, len, "[%s]:%s", hbuf, pbuf); 224 } 225 226 static void 227 drop_gid(void) 228 { 229 gid_t gid; 230 231 gid = getgid(); 232 if (setresgid(gid, gid, gid) == -1) 233 err(1, "setresgid"); 234 } 235 236 static void 237 set_slice_timer(int on) 238 { 239 struct timeval tv; 240 241 if (ptb->rflag == 0) 242 return; 243 244 if (on) { 245 if (evtimer_pending(&mainstats.timer, NULL)) 246 return; 247 timerclear(&tv); 248 /* XXX Is there a better way to do this ? */ 249 tv.tv_sec = ptb->rflag / 1000; 250 tv.tv_usec = (ptb->rflag % 1000) * 1000; 251 252 evtimer_add(&mainstats.timer, &tv); 253 } else if (evtimer_pending(&mainstats.timer, NULL)) 254 evtimer_del(&mainstats.timer); 255 } 256 257 static int 258 clock_gettime_tv(clockid_t clock_id, struct timeval *tv) 259 { 260 struct timespec ts; 261 262 if (clock_gettime(clock_id, &ts) == -1) 263 return (-1); 264 265 TIMESPEC_TO_TIMEVAL(tv, &ts); 266 267 return (0); 268 } 269 270 static void 271 print_tcp_header(void) 272 { 273 char **kv; 274 275 printf("%12s %14s %12s %8s ", "elapsed_ms", "bytes", "mbps", 276 "bwidth"); 277 for (kv = ptb->kvars; ptb->kvars != NULL && *kv != NULL; kv++) 278 printf("%s%s", kv != ptb->kvars ? "," : "", *kv); 279 printf("\n"); 280 } 281 282 static void 283 kget(u_long addr, void *buf, size_t size) 284 { 285 if (kvm_read(ptb->kvmh, addr, buf, size) != (ssize_t)size) 286 errx(1, "kvm_read: %s", kvm_geterr(ptb->kvmh)); 287 } 288 289 static u_long 290 kfind_tcb(int sock) 291 { 292 struct inpcbtable tcbtab; 293 struct inpcb *next, *prev; 294 struct inpcb inpcb, prevpcb; 295 struct tcpcb tcpcb; 296 297 struct sockaddr_storage me, them; 298 socklen_t melen, themlen; 299 struct sockaddr_in *in4; 300 struct sockaddr_in6 *in6; 301 char tmp1[64], tmp2[64]; 302 int nretry; 303 304 nretry = 10; 305 melen = themlen = sizeof(struct sockaddr_storage); 306 if (getsockname(sock, (struct sockaddr *)&me, &melen) == -1) 307 err(1, "getsockname"); 308 if (getpeername(sock, (struct sockaddr *)&them, &themlen) == -1) 309 err(1, "getpeername"); 310 if (me.ss_family != them.ss_family) 311 errx(1, "%s: me.ss_family != them.ss_family", __func__); 312 if (me.ss_family != AF_INET && me.ss_family != AF_INET6) 313 errx(1, "%s: unknown socket family", __func__); 314 if (ptb->vflag >= 2) { 315 saddr_ntop((struct sockaddr *)&me, me.ss_len, 316 tmp1, sizeof(tmp1)); 317 saddr_ntop((struct sockaddr *)&them, them.ss_len, 318 tmp2, sizeof(tmp2)); 319 fprintf(stderr, "Our socket local %s remote %s\n", tmp1, tmp2); 320 } 321 if (ptb->vflag >= 2) 322 fprintf(stderr, "Using PCB table at %lu\n", ptb->ktcbtab); 323 retry: 324 kget(ptb->ktcbtab, &tcbtab, sizeof(tcbtab)); 325 prev = NULL; 326 next = TAILQ_FIRST(&tcbtab.inpt_queue); 327 328 if (ptb->vflag >= 2) 329 fprintf(stderr, "PCB start at %p\n", next); 330 while (next != NULL) { 331 if (ptb->vflag >= 2) 332 fprintf(stderr, "Checking PCB %p\n", next); 333 kget((u_long)next, &inpcb, sizeof(inpcb)); 334 if (prev != NULL) { 335 kget((u_long)prev, &prevpcb, sizeof(prevpcb)); 336 if (TAILQ_NEXT(&prevpcb, inp_queue) != next) { 337 if (nretry--) { 338 warnx("PCB prev pointer insane"); 339 goto retry; 340 } else 341 errx(1, "PCB prev pointer insane," 342 " all attempts exhaused"); 343 } 344 } 345 prev = next; 346 next = TAILQ_NEXT(&inpcb, inp_queue); 347 348 if (me.ss_family == AF_INET) { 349 if ((inpcb.inp_flags & INP_IPV6) != 0) { 350 if (ptb->vflag >= 2) 351 fprintf(stderr, "Skip: INP_IPV6"); 352 continue; 353 } 354 if (ptb->vflag >= 2) { 355 inet_ntop(AF_INET, &inpcb.inp_laddr, 356 tmp1, sizeof(tmp1)); 357 inet_ntop(AF_INET, &inpcb.inp_faddr, 358 tmp2, sizeof(tmp2)); 359 fprintf(stderr, "PCB %p local: [%s]:%d " 360 "remote: [%s]:%d\n", prev, 361 tmp1, inpcb.inp_lport, 362 tmp2, inpcb.inp_fport); 363 } 364 in4 = (struct sockaddr_in *)&me; 365 if (memcmp(&in4->sin_addr, &inpcb.inp_laddr, 366 sizeof(struct in_addr)) != 0 || 367 in4->sin_port != inpcb.inp_lport) 368 continue; 369 in4 = (struct sockaddr_in *)&them; 370 if (memcmp(&in4->sin_addr, &inpcb.inp_faddr, 371 sizeof(struct in_addr)) != 0 || 372 in4->sin_port != inpcb.inp_fport) 373 continue; 374 } else { 375 if ((inpcb.inp_flags & INP_IPV6) == 0) 376 continue; 377 if (ptb->vflag >= 2) { 378 inet_ntop(AF_INET6, &inpcb.inp_laddr6, 379 tmp1, sizeof(tmp1)); 380 inet_ntop(AF_INET6, &inpcb.inp_faddr6, 381 tmp2, sizeof(tmp2)); 382 fprintf(stderr, "PCB %p local: [%s]:%d " 383 "remote: [%s]:%d\n", prev, 384 tmp1, inpcb.inp_lport, 385 tmp2, inpcb.inp_fport); 386 } 387 in6 = (struct sockaddr_in6 *)&me; 388 if (memcmp(&in6->sin6_addr, &inpcb.inp_laddr6, 389 sizeof(struct in6_addr)) != 0 || 390 in6->sin6_port != inpcb.inp_lport) 391 continue; 392 in6 = (struct sockaddr_in6 *)&them; 393 if (memcmp(&in6->sin6_addr, &inpcb.inp_faddr6, 394 sizeof(struct in6_addr)) != 0 || 395 in6->sin6_port != inpcb.inp_fport) 396 continue; 397 } 398 kget((u_long)inpcb.inp_ppcb, &tcpcb, sizeof(tcpcb)); 399 if (tcpcb.t_state != TCPS_ESTABLISHED) { 400 if (ptb->vflag >= 2) 401 fprintf(stderr, "Not established\n"); 402 continue; 403 } 404 if (ptb->vflag >= 2) 405 fprintf(stderr, "Found PCB at %p\n", prev); 406 return ((u_long)prev); 407 } 408 409 errx(1, "No matching PCB found"); 410 } 411 412 static void 413 kupdate_stats(u_long tcbaddr, struct inpcb *inpcb, 414 struct tcpcb *tcpcb, struct socket *sockb) 415 { 416 kget(tcbaddr, inpcb, sizeof(*inpcb)); 417 kget((u_long)inpcb->inp_ppcb, tcpcb, sizeof(*tcpcb)); 418 kget((u_long)inpcb->inp_socket, sockb, sizeof(*sockb)); 419 } 420 421 static void 422 check_kvar(const char *var) 423 { 424 u_int i; 425 426 for (i = 0; allowed_kvars[i] != NULL; i++) 427 if (strcmp(allowed_kvars[i], var) == 0) 428 return; 429 errx(1, "Unrecognised kvar: %s", var); 430 } 431 432 static void 433 list_kvars(void) 434 { 435 u_int i; 436 437 printf("Supported kernel variables:\n"); 438 for (i = 0; allowed_kvars[i] != NULL; i++) 439 printf("\t%s\n", allowed_kvars[i]); 440 } 441 442 static char ** 443 check_prepare_kvars(char *list) 444 { 445 char *item, **ret = NULL; 446 u_int n = 0; 447 448 while ((item = strsep(&list, ", \t\n")) != NULL) { 449 check_kvar(item); 450 if ((ret = realloc(ret, sizeof(*ret) * (++n + 1))) == NULL) 451 errx(1, "realloc(kvars)"); 452 if ((ret[n - 1] = strdup(item)) == NULL) 453 errx(1, "strdup"); 454 ret[n] = NULL; 455 } 456 return (ret); 457 } 458 459 static void 460 stats_prepare(struct statctx *sc) 461 { 462 sc->buf = ptb->dummybuf; 463 sc->buflen = ptb->dummybuf_len; 464 465 if (ptb->kvars) 466 sc->tcp_tcbaddr = kfind_tcb(sc->fd); 467 if (clock_gettime_tv(CLOCK_MONOTONIC, &sc->t_start) == -1) 468 err(1, "clock_gettime_tv"); 469 sc->t_last = sc->t_start; 470 471 } 472 473 static void 474 tcp_stats_display(unsigned long long total_elapsed, long double mbps, 475 float bwperc, struct statctx *sc, struct inpcb *inpcb, 476 struct tcpcb *tcpcb, struct socket *sockb) 477 { 478 int j; 479 480 printf("%12llu %14llu %12.3Lf %7.2f%% ", total_elapsed, sc->bytes, 481 mbps, bwperc); 482 483 if (ptb->kvars != NULL) { 484 kupdate_stats(sc->tcp_tcbaddr, inpcb, tcpcb, 485 sockb); 486 487 for (j = 0; ptb->kvars[j] != NULL; j++) { 488 #define S(a) #a 489 #define P(b, v, f) \ 490 if (strcmp(ptb->kvars[j], S(b.v)) == 0) { \ 491 printf("%s"f, j > 0 ? "," : "", b->v); \ 492 continue; \ 493 } 494 P(inpcb, inp_flags, "0x%08x") 495 P(sockb, so_rcv.sb_cc, "%lu") 496 P(sockb, so_rcv.sb_hiwat, "%lu") 497 P(sockb, so_rcv.sb_wat, "%lu") 498 P(sockb, so_snd.sb_cc, "%lu") 499 P(sockb, so_snd.sb_hiwat, "%lu") 500 P(sockb, so_snd.sb_wat, "%lu") 501 P(tcpcb, last_ack_sent, "%u") 502 P(tcpcb, max_sndwnd, "%lu") 503 P(tcpcb, rcv_adv, "%u") 504 P(tcpcb, rcv_nxt, "%u") 505 P(tcpcb, rcv_scale, "%u") 506 P(tcpcb, rcv_wnd, "%lu") 507 P(tcpcb, rfbuf_cnt, "%u") 508 P(tcpcb, rfbuf_ts, "%u") 509 P(tcpcb, snd_cwnd, "%lu") 510 P(tcpcb, snd_max, "%u") 511 P(tcpcb, snd_nxt, "%u") 512 P(tcpcb, snd_scale, "%u") 513 P(tcpcb, snd_ssthresh, "%lu") 514 P(tcpcb, snd_una, "%u") 515 P(tcpcb, snd_wl1, "%u") 516 P(tcpcb, snd_wl2, "%u") 517 P(tcpcb, snd_wnd, "%lu") 518 P(tcpcb, t_rcvtime, "%u") 519 P(tcpcb, t_rtseq, "%u") 520 P(tcpcb, t_rttmin, "%hu") 521 P(tcpcb, t_rtttime, "%u") 522 P(tcpcb, t_rttvar, "%hu") 523 P(tcpcb, t_srtt, "%hu") 524 P(tcpcb, ts_recent, "%u") 525 P(tcpcb, ts_recent_age, "%u") 526 #undef S 527 #undef P 528 } 529 } 530 printf("\n"); 531 } 532 533 static void 534 tcp_process_slice(int fd, short event, void *bula) 535 { 536 unsigned long long total_elapsed, since_last; 537 long double mbps, slice_mbps = 0; 538 float bwperc; 539 struct statctx *sc; 540 struct timeval t_cur, t_diff; 541 struct inpcb inpcb; 542 struct tcpcb tcpcb; 543 struct socket sockb; 544 545 TAILQ_FOREACH(sc, &sc_queue, entry) { 546 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 547 err(1, "clock_gettime_tv"); 548 if (ptb->kvars != NULL) /* process kernel stats */ 549 kupdate_stats(sc->tcp_tcbaddr, &inpcb, &tcpcb, 550 &sockb); 551 552 timersub(&t_cur, &sc->t_start, &t_diff); 553 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 554 timersub(&t_cur, &sc->t_last, &t_diff); 555 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 556 bwperc = (sc->bytes * 100.0) / mainstats.slice_bytes; 557 mbps = (sc->bytes * 8) / (since_last * 1000.0); 558 slice_mbps += mbps; 559 560 tcp_stats_display(total_elapsed, mbps, bwperc, sc, 561 &inpcb, &tcpcb, &sockb); 562 563 sc->t_last = t_cur; 564 sc->bytes = 0; 565 } 566 567 /* process stats for this slice */ 568 if (slice_mbps > mainstats.peak_mbps) 569 mainstats.peak_mbps = slice_mbps; 570 printf("Conn: %3d Mbps: %12.3Lf Peak Mbps: %12.3Lf Avg Mbps: %12.3Lf\n", 571 mainstats.nconns, slice_mbps, mainstats.peak_mbps, 572 mainstats.nconns ? slice_mbps / mainstats.nconns : 0); 573 mainstats.slice_bytes = 0; 574 575 set_slice_timer(mainstats.nconns > 0); 576 } 577 578 static void 579 udp_process_slice(int fd, short event, void *v_sc) 580 { 581 struct statctx *sc = v_sc; 582 unsigned long long total_elapsed, since_last, pps; 583 long double slice_mbps; 584 struct timeval t_cur, t_diff; 585 586 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 587 err(1, "clock_gettime_tv"); 588 /* Calculate pps */ 589 timersub(&t_cur, &sc->t_start, &t_diff); 590 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 591 timersub(&t_cur, &sc->t_last, &t_diff); 592 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 593 slice_mbps = (sc->bytes * 8) / (since_last * 1000.0); 594 pps = (sc->udp_slice_pkts * 1000) / since_last; 595 if (slice_mbps > mainstats.peak_mbps) 596 mainstats.peak_mbps = slice_mbps; 597 printf("Elapsed: %11llu Mbps: %11.3Lf Peak Mbps: %11.3Lf %s PPS: %7llu\n", 598 total_elapsed, slice_mbps, mainstats.peak_mbps, 599 ptb->sflag ? "Rx" : "Tx", pps); 600 601 /* Clean up this slice time */ 602 sc->t_last = t_cur; 603 sc->bytes = 0; 604 sc->udp_slice_pkts = 0; 605 set_slice_timer(1); 606 } 607 608 static void 609 udp_server_handle_sc(int fd, short event, void *v_sc) 610 { 611 ssize_t n; 612 struct statctx *sc = v_sc; 613 614 n = read(fd, ptb->dummybuf, ptb->dummybuf_len); 615 if (n == 0) 616 return; 617 else if (n == -1) { 618 if (errno != EINTR && errno != EWOULDBLOCK) 619 warn("fd %d read error", fd); 620 return; 621 } 622 623 if (ptb->vflag >= 3) 624 fprintf(stderr, "read: %zd bytes\n", n); 625 /* If this was our first packet, start slice timer */ 626 if (mainstats.peak_mbps == 0) 627 set_slice_timer(1); 628 /* Account packet */ 629 sc->udp_slice_pkts++; 630 sc->bytes += n; 631 } 632 633 static void 634 tcp_server_handle_sc(int fd, short event, void *v_sc) 635 { 636 struct statctx *sc = v_sc; 637 ssize_t n; 638 639 n = read(sc->fd, sc->buf, sc->buflen); 640 if (n == -1) { 641 if (errno != EINTR && errno != EWOULDBLOCK) 642 warn("fd %d read error", sc->fd); 643 return; 644 } else if (n == 0) { 645 if (ptb->vflag) 646 fprintf(stderr, "%8d closed by remote end\n", sc->fd); 647 648 TAILQ_REMOVE(&sc_queue, sc, entry); 649 650 event_del(&sc->ev); 651 close(sc->fd); 652 653 /* Some file descriptors are available again. */ 654 if (evtimer_pending(&sc->tcp_ts->evt, NULL)) { 655 evtimer_del(&sc->tcp_ts->evt); 656 event_add(&sc->tcp_ts->ev, NULL); 657 } 658 659 free(sc); 660 mainstats.nconns--; 661 return; 662 } 663 if (ptb->vflag >= 3) 664 fprintf(stderr, "read: %zd bytes\n", n); 665 sc->bytes += n; 666 mainstats.slice_bytes += n; 667 } 668 669 static void 670 tcp_server_accept(int fd, short event, void *arg) 671 { 672 struct tcpservsock *ts = arg; 673 int sock, r; 674 struct statctx *sc; 675 struct sockaddr_storage ss; 676 socklen_t sslen; 677 char tmp[128]; 678 679 sslen = sizeof(ss); 680 681 event_add(&ts->ev, NULL); 682 if (event & EV_TIMEOUT) 683 return; 684 if ((sock = accept(fd, (struct sockaddr *)&ss, &sslen)) == -1) { 685 /* 686 * Pause accept if we are out of file descriptors, or 687 * libevent will haunt us here too. 688 */ 689 if (errno == ENFILE || errno == EMFILE) { 690 struct timeval evtpause = { 1, 0 }; 691 692 event_del(&ts->ev); 693 evtimer_add(&ts->evt, &evtpause); 694 } else if (errno != EWOULDBLOCK && errno != EINTR && 695 errno != ECONNABORTED) 696 warn("accept"); 697 return; 698 } 699 saddr_ntop((struct sockaddr *)&ss, sslen, 700 tmp, sizeof(tmp)); 701 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 702 err(1, "fcntl(F_GETFL)"); 703 r |= O_NONBLOCK; 704 if (fcntl(sock, F_SETFL, r) == -1) 705 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 706 if (ptb->Tflag != -1 && ss.ss_family == AF_INET) { 707 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 708 &ptb->Tflag, sizeof(ptb->Tflag))) 709 err(1, "setsockopt IP_TOS"); 710 } 711 if (ptb->Tflag != -1 && ss.ss_family == AF_INET6) { 712 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 713 &ptb->Tflag, sizeof(ptb->Tflag))) 714 err(1, "setsockopt IPV6_TCLASS"); 715 } 716 /* Alloc client structure and register reading callback */ 717 if ((sc = calloc(1, sizeof(*sc))) == NULL) 718 err(1, "calloc"); 719 sc->tcp_ts = ts; 720 sc->fd = sock; 721 stats_prepare(sc); 722 event_set(&sc->ev, sc->fd, EV_READ | EV_PERSIST, 723 tcp_server_handle_sc, sc); 724 event_add(&sc->ev, NULL); 725 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 726 mainstats.nconns++; 727 if (mainstats.nconns == 1) 728 set_slice_timer(1); 729 if (ptb->vflag) 730 fprintf(stderr, "Accepted connection from %s, fd = %d\n", 731 tmp, sc->fd); 732 } 733 734 static void 735 server_init(struct addrinfo *aitop, struct statctx *udp_sc) 736 { 737 char tmp[128]; 738 int sock, on = 1; 739 struct addrinfo *ai; 740 struct event *ev; 741 struct tcpservsock *ts; 742 nfds_t lnfds; 743 744 lnfds = 0; 745 for (ai = aitop; ai != NULL; ai = ai->ai_next) { 746 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, sizeof(tmp)); 747 if (ptb->vflag) 748 fprintf(stderr, "Try to bind to %s\n", tmp); 749 if ((sock = socket(ai->ai_family, ai->ai_socktype, 750 ai->ai_protocol)) == -1) { 751 if (ai->ai_next == NULL) 752 err(1, "socket"); 753 if (ptb->vflag) 754 warn("socket"); 755 continue; 756 } 757 if (ptb->Tflag != -1 && ai->ai_family == AF_INET) { 758 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 759 &ptb->Tflag, sizeof(ptb->Tflag))) 760 err(1, "setsockopt IP_TOS"); 761 } 762 if (ptb->Tflag != -1 && ai->ai_family == AF_INET6) { 763 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 764 &ptb->Tflag, sizeof(ptb->Tflag))) 765 err(1, "setsockopt IPV6_TCLASS"); 766 } 767 if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 768 &on, sizeof(on)) == -1) 769 warn("reuse port"); 770 if (bind(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 771 if (ai->ai_next == NULL) 772 err(1, "bind"); 773 if (ptb->vflag) 774 warn("bind"); 775 close(sock); 776 continue; 777 } 778 if (ptb->Sflag) { 779 if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, 780 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 781 warn("set receive buffer size"); 782 } 783 if (TCP_MODE) { 784 if (listen(sock, 64) == -1) { 785 if (ai->ai_next == NULL) 786 err(1, "listen"); 787 if (ptb->vflag) 788 warn("listen"); 789 close(sock); 790 continue; 791 } 792 } 793 if (UDP_MODE) { 794 if ((ev = calloc(1, sizeof(*ev))) == NULL) 795 err(1, "calloc"); 796 event_set(ev, sock, EV_READ | EV_PERSIST, 797 udp_server_handle_sc, udp_sc); 798 event_add(ev, NULL); 799 } else { 800 if ((ts = calloc(1, sizeof(*ts))) == NULL) 801 err(1, "calloc"); 802 803 ts->fd = sock; 804 evtimer_set(&ts->evt, tcp_server_accept, ts); 805 event_set(&ts->ev, ts->fd, EV_READ, 806 tcp_server_accept, ts); 807 event_add(&ts->ev, NULL); 808 } 809 if (ptb->vflag >= 3) 810 fprintf(stderr, "bound to fd %d\n", sock); 811 lnfds++; 812 } 813 freeaddrinfo(aitop); 814 if (lnfds == 0) 815 errx(1, "No working listen addresses found"); 816 } 817 818 static void 819 client_handle_sc(int fd, short event, void *v_sc) 820 { 821 struct statctx *sc = v_sc; 822 ssize_t n; 823 824 if ((n = write(sc->fd, sc->buf, sc->buflen)) == -1) { 825 if (errno == EINTR || errno == EWOULDBLOCK || 826 (UDP_MODE && errno == ENOBUFS)) 827 return; 828 err(1, "write"); 829 } 830 if (TCP_MODE && n == 0) { 831 fprintf(stderr, "Remote end closed connection"); 832 exit(1); 833 } 834 if (ptb->vflag >= 3) 835 fprintf(stderr, "write: %zd bytes\n", n); 836 sc->bytes += n; 837 mainstats.slice_bytes += n; 838 if (UDP_MODE) 839 sc->udp_slice_pkts++; 840 } 841 842 static void 843 client_init(struct addrinfo *aitop, int nconn, struct statctx *udp_sc, 844 struct addrinfo *aib) 845 { 846 struct statctx *sc; 847 struct addrinfo *ai; 848 char tmp[128]; 849 int i, r, sock; 850 851 sc = udp_sc; 852 for (i = 0; i < nconn; i++) { 853 for (sock = -1, ai = aitop; ai != NULL; ai = ai->ai_next) { 854 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, 855 sizeof(tmp)); 856 if (ptb->vflag && i == 0) 857 fprintf(stderr, "Trying %s\n", tmp); 858 if ((sock = socket(ai->ai_family, ai->ai_socktype, 859 ai->ai_protocol)) == -1) { 860 if (ai->ai_next == NULL) 861 err(1, "socket"); 862 if (ptb->vflag) 863 warn("socket"); 864 continue; 865 } 866 if (aib != NULL) { 867 saddr_ntop(aib->ai_addr, aib->ai_addrlen, 868 tmp, sizeof(tmp)); 869 if (ptb->vflag) 870 fprintf(stderr, 871 "Try to bind to %s\n", tmp); 872 if (bind(sock, (struct sockaddr *)aib->ai_addr, 873 aib->ai_addrlen) == -1) 874 err(1, "bind"); 875 } 876 if (ptb->Tflag != -1 && ai->ai_family == AF_INET) { 877 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 878 &ptb->Tflag, sizeof(ptb->Tflag))) 879 err(1, "setsockopt IP_TOS"); 880 } 881 if (ptb->Tflag != -1 && ai->ai_family == AF_INET6) { 882 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 883 &ptb->Tflag, sizeof(ptb->Tflag))) 884 err(1, "setsockopt IPV6_TCLASS"); 885 } 886 if (ptb->Sflag) { 887 if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, 888 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 889 warn("set TCP send buffer size"); 890 } 891 if (connect(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 892 if (ai->ai_next == NULL) 893 err(1, "connect"); 894 if (ptb->vflag) 895 warn("connect"); 896 close(sock); 897 sock = -1; 898 continue; 899 } 900 break; 901 } 902 if (sock == -1) 903 errx(1, "No host found"); 904 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 905 err(1, "fcntl(F_GETFL)"); 906 r |= O_NONBLOCK; 907 if (fcntl(sock, F_SETFL, r) == -1) 908 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 909 /* Alloc and prepare stats */ 910 if (TCP_MODE) { 911 if ((sc = calloc(1, sizeof(*sc))) == NULL) 912 err(1, "calloc"); 913 } 914 sc->fd = sock; 915 stats_prepare(sc); 916 event_set(&sc->ev, sc->fd, EV_WRITE | EV_PERSIST, 917 client_handle_sc, sc); 918 event_add(&sc->ev, NULL); 919 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 920 mainstats.nconns++; 921 if (mainstats.nconns == 1) 922 set_slice_timer(1); 923 } 924 freeaddrinfo(aitop); 925 if (aib != NULL) 926 freeaddrinfo(aib); 927 928 if (ptb->vflag && nconn > 1) 929 fprintf(stderr, "%d connections established\n", 930 mainstats.nconns); 931 } 932 933 static int 934 map_tos(char *s, int *val) 935 { 936 /* DiffServ Codepoints and other TOS mappings */ 937 const struct toskeywords { 938 const char *keyword; 939 int val; 940 } *t, toskeywords[] = { 941 { "af11", IPTOS_DSCP_AF11 }, 942 { "af12", IPTOS_DSCP_AF12 }, 943 { "af13", IPTOS_DSCP_AF13 }, 944 { "af21", IPTOS_DSCP_AF21 }, 945 { "af22", IPTOS_DSCP_AF22 }, 946 { "af23", IPTOS_DSCP_AF23 }, 947 { "af31", IPTOS_DSCP_AF31 }, 948 { "af32", IPTOS_DSCP_AF32 }, 949 { "af33", IPTOS_DSCP_AF33 }, 950 { "af41", IPTOS_DSCP_AF41 }, 951 { "af42", IPTOS_DSCP_AF42 }, 952 { "af43", IPTOS_DSCP_AF43 }, 953 { "critical", IPTOS_PREC_CRITIC_ECP }, 954 { "cs0", IPTOS_DSCP_CS0 }, 955 { "cs1", IPTOS_DSCP_CS1 }, 956 { "cs2", IPTOS_DSCP_CS2 }, 957 { "cs3", IPTOS_DSCP_CS3 }, 958 { "cs4", IPTOS_DSCP_CS4 }, 959 { "cs5", IPTOS_DSCP_CS5 }, 960 { "cs6", IPTOS_DSCP_CS6 }, 961 { "cs7", IPTOS_DSCP_CS7 }, 962 { "ef", IPTOS_DSCP_EF }, 963 { "inetcontrol", IPTOS_PREC_INTERNETCONTROL }, 964 { "lowdelay", IPTOS_LOWDELAY }, 965 { "netcontrol", IPTOS_PREC_NETCONTROL }, 966 { "reliability", IPTOS_RELIABILITY }, 967 { "throughput", IPTOS_THROUGHPUT }, 968 { NULL, -1 }, 969 }; 970 971 for (t = toskeywords; t->keyword != NULL; t++) { 972 if (strcmp(s, t->keyword) == 0) { 973 *val = t->val; 974 return (1); 975 } 976 } 977 978 return (0); 979 } 980 981 static void 982 quit(int sig, short event, void *arg) 983 { 984 exit(0); 985 } 986 987 int 988 main(int argc, char **argv) 989 { 990 extern int optind; 991 extern char *optarg; 992 struct timeval tv; 993 unsigned int secs, rtable; 994 995 char kerr[_POSIX2_LINE_MAX], *tmp; 996 struct addrinfo *aitop, *aib, hints; 997 const char *errstr; 998 struct rlimit rl; 999 int ch, herr, nconn; 1000 struct nlist nl[] = { { "_tcbtable" }, { "" } }; 1001 const char *host = NULL, *port = DEFAULT_PORT, *srcbind = NULL; 1002 struct event ev_sigint, ev_sigterm, ev_sighup, ev_progtimer; 1003 struct statctx *udp_sc = NULL; 1004 1005 /* Init world */ 1006 setlinebuf(stdout); 1007 ptb = &tcpbench; 1008 ptb->dummybuf_len = 0; 1009 ptb->Sflag = ptb->sflag = ptb->vflag = 0; 1010 ptb->kvmh = NULL; 1011 ptb->kvars = NULL; 1012 ptb->rflag = DEFAULT_STATS_INTERVAL; 1013 ptb->Tflag = -1; 1014 nconn = 1; 1015 aib = NULL; 1016 secs = 0; 1017 1018 while ((ch = getopt(argc, argv, "b:B:hlk:n:p:r:sS:t:T:uvV:")) != -1) { 1019 switch (ch) { 1020 case 'b': 1021 srcbind = optarg; 1022 break; 1023 case 'l': 1024 list_kvars(); 1025 exit(0); 1026 case 'k': 1027 if ((tmp = strdup(optarg)) == NULL) 1028 errx(1, "strdup"); 1029 ptb->kvars = check_prepare_kvars(tmp); 1030 free(tmp); 1031 break; 1032 case 'r': 1033 ptb->rflag = strtonum(optarg, 0, 60 * 60 * 24 * 1000, 1034 &errstr); 1035 if (errstr != NULL) 1036 errx(1, "statistics interval is %s: %s", 1037 errstr, optarg); 1038 break; 1039 case 'p': 1040 port = optarg; 1041 break; 1042 case 's': 1043 ptb->sflag = 1; 1044 break; 1045 case 'S': 1046 ptb->Sflag = strtonum(optarg, 0, 1024*1024*1024, 1047 &errstr); 1048 if (errstr != NULL) 1049 errx(1, "receive space interval is %s: %s", 1050 errstr, optarg); 1051 break; 1052 case 'B': 1053 ptb->dummybuf_len = strtonum(optarg, 0, 1024*1024*1024, 1054 &errstr); 1055 if (errstr != NULL) 1056 errx(1, "read/write buffer size is %s: %s", 1057 errstr, optarg); 1058 break; 1059 case 'v': 1060 ptb->vflag++; 1061 break; 1062 case 'V': 1063 rtable = (unsigned int)strtonum(optarg, 0, 1064 RT_TABLEID_MAX, &errstr); 1065 if (errstr) 1066 errx(1, "rtable value is %s: %s", 1067 errstr, optarg); 1068 if (setrtable(rtable) == -1) 1069 err(1, "setrtable"); 1070 break; 1071 case 'n': 1072 nconn = strtonum(optarg, 0, 65535, &errstr); 1073 if (errstr != NULL) 1074 errx(1, "number of connections is %s: %s", 1075 errstr, optarg); 1076 break; 1077 case 'u': 1078 ptb->uflag = 1; 1079 break; 1080 case 'T': 1081 if (map_tos(optarg, &ptb->Tflag)) 1082 break; 1083 errstr = NULL; 1084 if (strlen(optarg) > 1 && optarg[0] == '0' && 1085 optarg[1] == 'x') 1086 ptb->Tflag = (int)strtol(optarg, NULL, 16); 1087 else 1088 ptb->Tflag = (int)strtonum(optarg, 0, 255, 1089 &errstr); 1090 if (ptb->Tflag == -1 || ptb->Tflag > 255 || errstr) 1091 errx(1, "illegal tos value %s", optarg); 1092 break; 1093 case 't': 1094 secs = strtonum(optarg, 1, UINT_MAX, &errstr); 1095 if (errstr != NULL) 1096 errx(1, "secs is %s: %s", 1097 errstr, optarg); 1098 break; 1099 case 'h': 1100 default: 1101 usage(); 1102 } 1103 } 1104 1105 argv += optind; 1106 argc -= optind; 1107 if ((argc != (ptb->sflag ? 0 : 1)) || 1108 (UDP_MODE && (ptb->kvars || nconn != 1))) 1109 usage(); 1110 1111 if (!ptb->sflag) 1112 host = argv[0]; 1113 /* 1114 * Rationale, 1115 * If TCP, use a big buffer with big reads/writes. 1116 * If UDP, use a big buffer in server and a buffer the size of a 1117 * ethernet packet. 1118 */ 1119 if (!ptb->dummybuf_len) { 1120 if (ptb->sflag || TCP_MODE) 1121 ptb->dummybuf_len = DEFAULT_BUF; 1122 else 1123 ptb->dummybuf_len = DEFAULT_UDP_PKT; 1124 } 1125 1126 bzero(&hints, sizeof(hints)); 1127 if (UDP_MODE) { 1128 hints.ai_socktype = SOCK_DGRAM; 1129 hints.ai_protocol = IPPROTO_UDP; 1130 } else { 1131 hints.ai_socktype = SOCK_STREAM; 1132 hints.ai_protocol = IPPROTO_TCP; 1133 } 1134 if (ptb->sflag) 1135 hints.ai_flags = AI_PASSIVE; 1136 if (srcbind != NULL) { 1137 hints.ai_flags |= AI_NUMERICHOST; 1138 herr = getaddrinfo(srcbind, NULL, &hints, &aib); 1139 hints.ai_flags &= ~AI_NUMERICHOST; 1140 if (herr != 0) { 1141 if (herr == EAI_SYSTEM) 1142 err(1, "getaddrinfo"); 1143 else 1144 errx(1, "getaddrinfo: %s", gai_strerror(herr)); 1145 } 1146 } 1147 if ((herr = getaddrinfo(host, port, &hints, &aitop)) != 0) { 1148 if (herr == EAI_SYSTEM) 1149 err(1, "getaddrinfo"); 1150 else 1151 errx(1, "getaddrinfo: %s", gai_strerror(herr)); 1152 } 1153 if (ptb->kvars) { 1154 if ((ptb->kvmh = kvm_openfiles(NULL, NULL, NULL, 1155 O_RDONLY, kerr)) == NULL) 1156 errx(1, "kvm_open: %s", kerr); 1157 drop_gid(); 1158 if (kvm_nlist(ptb->kvmh, nl) < 0 || nl[0].n_type == 0) 1159 errx(1, "kvm: no namelist"); 1160 ptb->ktcbtab = nl[0].n_value; 1161 } else 1162 drop_gid(); 1163 1164 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1165 err(1, "getrlimit"); 1166 if (rl.rlim_cur < MAX_FD) 1167 rl.rlim_cur = MAX_FD; 1168 if (setrlimit(RLIMIT_NOFILE, &rl)) 1169 err(1, "setrlimit"); 1170 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1171 err(1, "getrlimit"); 1172 1173 /* Init world */ 1174 TAILQ_INIT(&sc_queue); 1175 if ((ptb->dummybuf = malloc(ptb->dummybuf_len)) == NULL) 1176 err(1, "malloc"); 1177 arc4random_buf(ptb->dummybuf, ptb->dummybuf_len); 1178 1179 /* Setup libevent and signals */ 1180 event_init(); 1181 signal_set(&ev_sigterm, SIGTERM, signal_handler, NULL); 1182 signal_set(&ev_sighup, SIGHUP, signal_handler, NULL); 1183 signal_set(&ev_sigint, SIGINT, signal_handler, NULL); 1184 signal_add(&ev_sigint, NULL); 1185 signal_add(&ev_sigterm, NULL); 1186 signal_add(&ev_sighup, NULL); 1187 signal(SIGPIPE, SIG_IGN); 1188 1189 if (UDP_MODE) { 1190 if ((udp_sc = calloc(1, sizeof(*udp_sc))) == NULL) 1191 err(1, "calloc"); 1192 udp_sc->fd = -1; 1193 stats_prepare(udp_sc); 1194 evtimer_set(&mainstats.timer, udp_process_slice, udp_sc); 1195 } else { 1196 print_tcp_header(); 1197 evtimer_set(&mainstats.timer, tcp_process_slice, NULL); 1198 } 1199 1200 if (ptb->sflag) 1201 server_init(aitop, udp_sc); 1202 else { 1203 if (secs > 0) { 1204 timerclear(&tv); 1205 tv.tv_sec = secs + 1; 1206 evtimer_set(&ev_progtimer, quit, NULL); 1207 evtimer_add(&ev_progtimer, &tv); 1208 } 1209 client_init(aitop, nconn, udp_sc, aib); 1210 } 1211 1212 /* libevent main loop*/ 1213 event_dispatch(); 1214 1215 return (0); 1216 } 1217