1 /* 2 * Copyright (c) 2008 Damien Miller <djm@mindrot.org> 3 * Copyright (c) 2011 Christiano F. Haesbaert <haesbaert@haesbaert.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/time.h> 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/resource.h> 23 #include <sys/queue.h> 24 25 #include <net/route.h> 26 27 #include <netinet/in.h> 28 #include <netinet/in_systm.h> 29 #include <netinet/ip.h> 30 #include <netinet/tcp.h> 31 #include <netinet/tcp_timer.h> 32 #include <netinet/tcp_fsm.h> 33 #include <netinet/in_pcb.h> 34 #include <netinet/tcp_var.h> 35 36 #include <arpa/inet.h> 37 38 #include <unistd.h> 39 #include <limits.h> 40 #include <stdlib.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <errno.h> 44 #include <event.h> 45 #include <netdb.h> 46 #include <signal.h> 47 #include <err.h> 48 #include <fcntl.h> 49 #include <poll.h> 50 51 #include <kvm.h> 52 #include <nlist.h> 53 54 #define DEFAULT_PORT "12345" 55 #define DEFAULT_STATS_INTERVAL 1000 /* ms */ 56 #define DEFAULT_BUF (256 * 1024) 57 #define DEFAULT_UDP_PKT (1500 - 28) /* TODO don't hardcode this */ 58 #define TCP_MODE !ptb->uflag 59 #define UDP_MODE ptb->uflag 60 #define MAX_FD 1024 61 62 /* Our tcpbench globals */ 63 struct { 64 u_int Vflag; /* rtableid */ 65 int Sflag; /* Socket buffer size (tcp mode) */ 66 u_int rflag; /* Report rate (ms) */ 67 int sflag; /* True if server */ 68 int Tflag; /* ToS if != -1 */ 69 int vflag; /* Verbose */ 70 int uflag; /* UDP mode */ 71 kvm_t *kvmh; /* Kvm handler */ 72 char **kvars; /* Kvm enabled vars */ 73 u_long ktcbtab; /* Ktcb */ 74 char *dummybuf; /* IO buffer */ 75 size_t dummybuf_len; /* IO buffer len */ 76 } tcpbench, *ptb; 77 78 struct tcpservsock { 79 struct event ev; 80 struct event evt; 81 int fd; 82 }; 83 84 /* stats for a single tcp connection, udp uses only one */ 85 struct statctx { 86 TAILQ_ENTRY(statctx) entry; 87 struct timeval t_start, t_last; 88 unsigned long long bytes; 89 int fd; 90 char *buf; 91 size_t buflen; 92 struct event ev; 93 /* TCP only */ 94 struct tcpservsock *tcp_ts; 95 u_long tcp_tcbaddr; 96 /* UDP only */ 97 u_long udp_slice_pkts; 98 }; 99 100 static void signal_handler(int, short, void *); 101 static void saddr_ntop(const struct sockaddr *, socklen_t, char *, size_t); 102 static void drop_gid(void); 103 static void set_slice_timer(int); 104 static void print_tcp_header(void); 105 static void kget(u_long, void *, size_t); 106 static u_long kfind_tcb(int); 107 static void kupdate_stats(u_long, struct inpcb *, struct tcpcb *, 108 struct socket *); 109 static void list_kvars(void); 110 static void check_kvar(const char *); 111 static char ** check_prepare_kvars(char *); 112 static void stats_prepare(struct statctx *); 113 static void tcp_stats_display(unsigned long long, long double, float, 114 struct statctx *, struct inpcb *, struct tcpcb *, struct socket *); 115 static void tcp_process_slice(int, short, void *); 116 static void tcp_server_handle_sc(int, short, void *); 117 static void tcp_server_accept(int, short, void *); 118 static void server_init(struct addrinfo *, struct statctx *); 119 static void client_handle_sc(int, short, void *); 120 static void client_init(struct addrinfo *, int, struct statctx *, 121 struct addrinfo *); 122 static int clock_gettime_tv(clockid_t, struct timeval *); 123 static void udp_server_handle_sc(int, short, void *); 124 static void udp_process_slice(int, short, void *); 125 static int map_tos(char *, int *); 126 /* 127 * We account the mainstats here, that is the stats 128 * for all connections, all variables starting with slice 129 * are used to account information for the timeslice 130 * between each output. Peak variables record the highest 131 * between all slices so far. 132 */ 133 static struct { 134 unsigned long long slice_bytes; /* bytes for last slice */ 135 long double peak_mbps; /* peak mbps so far */ 136 int nconns; /* connected clients */ 137 struct event timer; /* process timer */ 138 } mainstats; 139 140 /* When adding variables, also add to tcp_stats_display() */ 141 static const char *allowed_kvars[] = { 142 "inpcb.inp_flags", 143 "sockb.so_rcv.sb_cc", 144 "sockb.so_rcv.sb_hiwat", 145 "sockb.so_rcv.sb_wat", 146 "sockb.so_snd.sb_cc", 147 "sockb.so_snd.sb_hiwat", 148 "sockb.so_snd.sb_wat", 149 "tcpcb.last_ack_sent", 150 "tcpcb.max_sndwnd", 151 "tcpcb.rcv_adv", 152 "tcpcb.rcv_nxt", 153 "tcpcb.rcv_scale", 154 "tcpcb.rcv_wnd", 155 "tcpcb.rfbuf_cnt", 156 "tcpcb.rfbuf_ts", 157 "tcpcb.snd_cwnd", 158 "tcpcb.snd_max", 159 "tcpcb.snd_nxt", 160 "tcpcb.snd_scale", 161 "tcpcb.snd_ssthresh", 162 "tcpcb.snd_una", 163 "tcpcb.snd_wl1", 164 "tcpcb.snd_wl2", 165 "tcpcb.snd_wnd", 166 "tcpcb.t_rcvtime", 167 "tcpcb.t_rtseq", 168 "tcpcb.t_rttmin", 169 "tcpcb.t_rtttime", 170 "tcpcb.t_rttvar", 171 "tcpcb.t_srtt", 172 "tcpcb.ts_recent", 173 "tcpcb.ts_recent_age", 174 NULL 175 }; 176 177 TAILQ_HEAD(, statctx) sc_queue; 178 179 static void __dead 180 usage(void) 181 { 182 fprintf(stderr, 183 "usage: tcpbench -l\n" 184 " tcpbench [-uv] [-B buf] [-b addr] [-k kvars] [-n connections]\n" 185 " [-p port] [-r interval] [-S space] [-T toskeyword]\n" 186 " [-t secs] [-V rtable] hostname\n" 187 " tcpbench -s [-uv] [-B buf] [-k kvars] [-p port]\n" 188 " [-r interval] [-S space] [-T toskeyword] [-V rtable]\n"); 189 exit(1); 190 } 191 192 static void 193 signal_handler(int sig, short event, void *bula) 194 { 195 /* 196 * signal handler rules don't apply, libevent decouples for us 197 */ 198 switch (sig) { 199 case SIGINT: 200 case SIGTERM: 201 case SIGHUP: 202 warnx("Terminated by signal %d", sig); 203 exit(0); 204 break; /* NOTREACHED */ 205 default: 206 errx(1, "unexpected signal %d", sig); 207 break; /* NOTREACHED */ 208 } 209 } 210 211 static void 212 saddr_ntop(const struct sockaddr *addr, socklen_t alen, char *buf, size_t len) 213 { 214 char hbuf[NI_MAXHOST], pbuf[NI_MAXSERV]; 215 int herr; 216 217 if ((herr = getnameinfo(addr, alen, hbuf, sizeof(hbuf), 218 pbuf, sizeof(pbuf), NI_NUMERICHOST|NI_NUMERICSERV)) != 0) { 219 if (herr == EAI_SYSTEM) 220 err(1, "getnameinfo"); 221 else 222 errx(1, "getnameinfo: %s", gai_strerror(herr)); 223 } 224 snprintf(buf, len, "[%s]:%s", hbuf, pbuf); 225 } 226 227 static void 228 drop_gid(void) 229 { 230 gid_t gid; 231 232 gid = getgid(); 233 if (setresgid(gid, gid, gid) == -1) 234 err(1, "setresgid"); 235 } 236 237 static void 238 set_slice_timer(int on) 239 { 240 struct timeval tv; 241 242 if (ptb->rflag == 0) 243 return; 244 245 if (on) { 246 if (evtimer_pending(&mainstats.timer, NULL)) 247 return; 248 timerclear(&tv); 249 /* XXX Is there a better way to do this ? */ 250 tv.tv_sec = ptb->rflag / 1000; 251 tv.tv_usec = (ptb->rflag % 1000) * 1000; 252 253 evtimer_add(&mainstats.timer, &tv); 254 } else if (evtimer_pending(&mainstats.timer, NULL)) 255 evtimer_del(&mainstats.timer); 256 } 257 258 static int 259 clock_gettime_tv(clockid_t clock_id, struct timeval *tv) 260 { 261 struct timespec ts; 262 263 if (clock_gettime(clock_id, &ts) == -1) 264 return (-1); 265 266 TIMESPEC_TO_TIMEVAL(tv, &ts); 267 268 return (0); 269 } 270 271 static void 272 print_tcp_header(void) 273 { 274 char **kv; 275 276 printf("%12s %14s %12s %8s ", "elapsed_ms", "bytes", "mbps", 277 "bwidth"); 278 for (kv = ptb->kvars; ptb->kvars != NULL && *kv != NULL; kv++) 279 printf("%s%s", kv != ptb->kvars ? "," : "", *kv); 280 printf("\n"); 281 } 282 283 static void 284 kget(u_long addr, void *buf, size_t size) 285 { 286 if (kvm_read(ptb->kvmh, addr, buf, size) != (ssize_t)size) 287 errx(1, "kvm_read: %s", kvm_geterr(ptb->kvmh)); 288 } 289 290 static u_long 291 kfind_tcb(int sock) 292 { 293 struct inpcbtable tcbtab; 294 struct inpcb *head, *next, *prev; 295 struct inpcb inpcb; 296 struct tcpcb tcpcb; 297 298 struct sockaddr_storage me, them; 299 socklen_t melen, themlen; 300 struct sockaddr_in *in4; 301 struct sockaddr_in6 *in6; 302 char tmp1[64], tmp2[64]; 303 int nretry; 304 305 nretry = 10; 306 melen = themlen = sizeof(struct sockaddr_storage); 307 if (getsockname(sock, (struct sockaddr *)&me, &melen) == -1) 308 err(1, "getsockname"); 309 if (getpeername(sock, (struct sockaddr *)&them, &themlen) == -1) 310 err(1, "getpeername"); 311 if (me.ss_family != them.ss_family) 312 errx(1, "%s: me.ss_family != them.ss_family", __func__); 313 if (me.ss_family != AF_INET && me.ss_family != AF_INET6) 314 errx(1, "%s: unknown socket family", __func__); 315 if (ptb->vflag >= 2) { 316 saddr_ntop((struct sockaddr *)&me, me.ss_len, 317 tmp1, sizeof(tmp1)); 318 saddr_ntop((struct sockaddr *)&them, them.ss_len, 319 tmp2, sizeof(tmp2)); 320 fprintf(stderr, "Our socket local %s remote %s\n", tmp1, tmp2); 321 } 322 if (ptb->vflag >= 2) 323 fprintf(stderr, "Using PCB table at %lu\n", ptb->ktcbtab); 324 retry: 325 kget(ptb->ktcbtab, &tcbtab, sizeof(tcbtab)); 326 prev = head = (struct inpcb *)&CIRCLEQ_FIRST( 327 &((struct inpcbtable *)ptb->ktcbtab)->inpt_queue); 328 next = CIRCLEQ_FIRST(&tcbtab.inpt_queue); 329 330 if (ptb->vflag >= 2) 331 fprintf(stderr, "PCB head at %p\n", head); 332 while (next != head) { 333 if (ptb->vflag >= 2) 334 fprintf(stderr, "Checking PCB %p\n", next); 335 kget((u_long)next, &inpcb, sizeof(inpcb)); 336 if (CIRCLEQ_PREV(&inpcb, inp_queue) != prev) { 337 if (nretry--) { 338 warnx("pcb prev pointer insane"); 339 goto retry; 340 } else 341 errx(1, "pcb prev pointer insane," 342 " all attempts exausted"); 343 } 344 prev = next; 345 next = CIRCLEQ_NEXT(&inpcb, inp_queue); 346 347 if (me.ss_family == AF_INET) { 348 if ((inpcb.inp_flags & INP_IPV6) != 0) { 349 if (ptb->vflag >= 2) 350 fprintf(stderr, "Skip: INP_IPV6"); 351 continue; 352 } 353 if (ptb->vflag >= 2) { 354 inet_ntop(AF_INET, &inpcb.inp_laddr, 355 tmp1, sizeof(tmp1)); 356 inet_ntop(AF_INET, &inpcb.inp_faddr, 357 tmp2, sizeof(tmp2)); 358 fprintf(stderr, "PCB %p local: [%s]:%d " 359 "remote: [%s]:%d\n", prev, 360 tmp1, inpcb.inp_lport, 361 tmp2, inpcb.inp_fport); 362 } 363 in4 = (struct sockaddr_in *)&me; 364 if (memcmp(&in4->sin_addr, &inpcb.inp_laddr, 365 sizeof(struct in_addr)) != 0 || 366 in4->sin_port != inpcb.inp_lport) 367 continue; 368 in4 = (struct sockaddr_in *)&them; 369 if (memcmp(&in4->sin_addr, &inpcb.inp_faddr, 370 sizeof(struct in_addr)) != 0 || 371 in4->sin_port != inpcb.inp_fport) 372 continue; 373 } else { 374 if ((inpcb.inp_flags & INP_IPV6) == 0) 375 continue; 376 if (ptb->vflag >= 2) { 377 inet_ntop(AF_INET6, &inpcb.inp_laddr6, 378 tmp1, sizeof(tmp1)); 379 inet_ntop(AF_INET6, &inpcb.inp_faddr6, 380 tmp2, sizeof(tmp2)); 381 fprintf(stderr, "PCB %p local: [%s]:%d " 382 "remote: [%s]:%d\n", prev, 383 tmp1, inpcb.inp_lport, 384 tmp2, inpcb.inp_fport); 385 } 386 in6 = (struct sockaddr_in6 *)&me; 387 if (memcmp(&in6->sin6_addr, &inpcb.inp_laddr6, 388 sizeof(struct in6_addr)) != 0 || 389 in6->sin6_port != inpcb.inp_lport) 390 continue; 391 in6 = (struct sockaddr_in6 *)&them; 392 if (memcmp(&in6->sin6_addr, &inpcb.inp_faddr6, 393 sizeof(struct in6_addr)) != 0 || 394 in6->sin6_port != inpcb.inp_fport) 395 continue; 396 } 397 kget((u_long)inpcb.inp_ppcb, &tcpcb, sizeof(tcpcb)); 398 if (tcpcb.t_state != TCPS_ESTABLISHED) { 399 if (ptb->vflag >= 2) 400 fprintf(stderr, "Not established\n"); 401 continue; 402 } 403 if (ptb->vflag >= 2) 404 fprintf(stderr, "Found PCB at %p\n", prev); 405 return ((u_long)prev); 406 } 407 408 errx(1, "No matching PCB found"); 409 } 410 411 static void 412 kupdate_stats(u_long tcbaddr, struct inpcb *inpcb, 413 struct tcpcb *tcpcb, struct socket *sockb) 414 { 415 kget(tcbaddr, inpcb, sizeof(*inpcb)); 416 kget((u_long)inpcb->inp_ppcb, tcpcb, sizeof(*tcpcb)); 417 kget((u_long)inpcb->inp_socket, sockb, sizeof(*sockb)); 418 } 419 420 static void 421 check_kvar(const char *var) 422 { 423 u_int i; 424 425 for (i = 0; allowed_kvars[i] != NULL; i++) 426 if (strcmp(allowed_kvars[i], var) == 0) 427 return; 428 errx(1, "Unrecognised kvar: %s", var); 429 } 430 431 static void 432 list_kvars(void) 433 { 434 u_int i; 435 436 printf("Supported kernel variables:\n"); 437 for (i = 0; allowed_kvars[i] != NULL; i++) 438 printf("\t%s\n", allowed_kvars[i]); 439 } 440 441 static char ** 442 check_prepare_kvars(char *list) 443 { 444 char *item, **ret = NULL; 445 u_int n = 0; 446 447 while ((item = strsep(&list, ", \t\n")) != NULL) { 448 check_kvar(item); 449 if ((ret = realloc(ret, sizeof(*ret) * (++n + 1))) == NULL) 450 errx(1, "realloc(kvars)"); 451 if ((ret[n - 1] = strdup(item)) == NULL) 452 errx(1, "strdup"); 453 ret[n] = NULL; 454 } 455 return (ret); 456 } 457 458 static void 459 stats_prepare(struct statctx *sc) 460 { 461 sc->buf = ptb->dummybuf; 462 sc->buflen = ptb->dummybuf_len; 463 464 if (ptb->kvars) 465 sc->tcp_tcbaddr = kfind_tcb(sc->fd); 466 if (clock_gettime_tv(CLOCK_MONOTONIC, &sc->t_start) == -1) 467 err(1, "clock_gettime_tv"); 468 sc->t_last = sc->t_start; 469 470 } 471 472 static void 473 tcp_stats_display(unsigned long long total_elapsed, long double mbps, 474 float bwperc, struct statctx *sc, struct inpcb *inpcb, 475 struct tcpcb *tcpcb, struct socket *sockb) 476 { 477 int j; 478 479 printf("%12llu %14llu %12.3Lf %7.2f%% ", total_elapsed, sc->bytes, 480 mbps, bwperc); 481 482 if (ptb->kvars != NULL) { 483 kupdate_stats(sc->tcp_tcbaddr, inpcb, tcpcb, 484 sockb); 485 486 for (j = 0; ptb->kvars[j] != NULL; j++) { 487 #define S(a) #a 488 #define P(b, v, f) \ 489 if (strcmp(ptb->kvars[j], S(b.v)) == 0) { \ 490 printf("%s"f, j > 0 ? "," : "", b->v); \ 491 continue; \ 492 } 493 P(inpcb, inp_flags, "0x%08x") 494 P(sockb, so_rcv.sb_cc, "%lu") 495 P(sockb, so_rcv.sb_hiwat, "%lu") 496 P(sockb, so_rcv.sb_wat, "%lu") 497 P(sockb, so_snd.sb_cc, "%lu") 498 P(sockb, so_snd.sb_hiwat, "%lu") 499 P(sockb, so_snd.sb_wat, "%lu") 500 P(tcpcb, last_ack_sent, "%u") 501 P(tcpcb, max_sndwnd, "%lu") 502 P(tcpcb, rcv_adv, "%u") 503 P(tcpcb, rcv_nxt, "%u") 504 P(tcpcb, rcv_scale, "%u") 505 P(tcpcb, rcv_wnd, "%lu") 506 P(tcpcb, rfbuf_cnt, "%u") 507 P(tcpcb, rfbuf_ts, "%u") 508 P(tcpcb, snd_cwnd, "%lu") 509 P(tcpcb, snd_max, "%u") 510 P(tcpcb, snd_nxt, "%u") 511 P(tcpcb, snd_scale, "%u") 512 P(tcpcb, snd_ssthresh, "%lu") 513 P(tcpcb, snd_una, "%u") 514 P(tcpcb, snd_wl1, "%u") 515 P(tcpcb, snd_wl2, "%u") 516 P(tcpcb, snd_wnd, "%lu") 517 P(tcpcb, t_rcvtime, "%u") 518 P(tcpcb, t_rtseq, "%u") 519 P(tcpcb, t_rttmin, "%hu") 520 P(tcpcb, t_rtttime, "%u") 521 P(tcpcb, t_rttvar, "%hu") 522 P(tcpcb, t_srtt, "%hu") 523 P(tcpcb, ts_recent, "%u") 524 P(tcpcb, ts_recent_age, "%u") 525 #undef S 526 #undef P 527 } 528 } 529 printf("\n"); 530 } 531 532 static void 533 tcp_process_slice(int fd, short event, void *bula) 534 { 535 unsigned long long total_elapsed, since_last; 536 long double mbps, slice_mbps = 0; 537 float bwperc; 538 struct statctx *sc; 539 struct timeval t_cur, t_diff; 540 struct inpcb inpcb; 541 struct tcpcb tcpcb; 542 struct socket sockb; 543 544 TAILQ_FOREACH(sc, &sc_queue, entry) { 545 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 546 err(1, "clock_gettime_tv"); 547 if (ptb->kvars != NULL) /* process kernel stats */ 548 kupdate_stats(sc->tcp_tcbaddr, &inpcb, &tcpcb, 549 &sockb); 550 551 timersub(&t_cur, &sc->t_start, &t_diff); 552 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 553 timersub(&t_cur, &sc->t_last, &t_diff); 554 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 555 bwperc = (sc->bytes * 100.0) / mainstats.slice_bytes; 556 mbps = (sc->bytes * 8) / (since_last * 1000.0); 557 slice_mbps += mbps; 558 559 tcp_stats_display(total_elapsed, mbps, bwperc, sc, 560 &inpcb, &tcpcb, &sockb); 561 562 sc->t_last = t_cur; 563 sc->bytes = 0; 564 } 565 566 /* process stats for this slice */ 567 if (slice_mbps > mainstats.peak_mbps) 568 mainstats.peak_mbps = slice_mbps; 569 printf("Conn: %3d Mbps: %12.3Lf Peak Mbps: %12.3Lf Avg Mbps: %12.3Lf\n", 570 mainstats.nconns, slice_mbps, mainstats.peak_mbps, 571 slice_mbps / mainstats.nconns); 572 mainstats.slice_bytes = 0; 573 574 set_slice_timer(mainstats.nconns > 0); 575 } 576 577 static void 578 udp_process_slice(int fd, short event, void *v_sc) 579 { 580 struct statctx *sc = v_sc; 581 unsigned long long total_elapsed, since_last, pps; 582 long double slice_mbps; 583 struct timeval t_cur, t_diff; 584 585 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 586 err(1, "clock_gettime_tv"); 587 /* Calculate pps */ 588 timersub(&t_cur, &sc->t_start, &t_diff); 589 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 590 timersub(&t_cur, &sc->t_last, &t_diff); 591 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 592 slice_mbps = (sc->bytes * 8) / (since_last * 1000.0); 593 pps = (sc->udp_slice_pkts * 1000) / since_last; 594 if (slice_mbps > mainstats.peak_mbps) 595 mainstats.peak_mbps = slice_mbps; 596 printf("Elapsed: %11llu Mbps: %11.3Lf Peak Mbps: %11.3Lf %s PPS: %7llu\n", 597 total_elapsed, slice_mbps, mainstats.peak_mbps, 598 ptb->sflag ? "Rx" : "Tx", pps); 599 600 /* Clean up this slice time */ 601 sc->t_last = t_cur; 602 sc->bytes = 0; 603 sc->udp_slice_pkts = 0; 604 set_slice_timer(1); 605 } 606 607 static void 608 udp_server_handle_sc(int fd, short event, void *v_sc) 609 { 610 ssize_t n; 611 struct statctx *sc = v_sc; 612 613 n = read(fd, ptb->dummybuf, ptb->dummybuf_len); 614 if (n == 0) 615 return; 616 else if (n == -1) { 617 if (errno != EINTR && errno != EWOULDBLOCK) 618 warn("fd %d read error", fd); 619 return; 620 } 621 622 if (ptb->vflag >= 3) 623 fprintf(stderr, "read: %zd bytes\n", n); 624 /* If this was our first packet, start slice timer */ 625 if (mainstats.peak_mbps == 0) 626 set_slice_timer(1); 627 /* Account packet */ 628 sc->udp_slice_pkts++; 629 sc->bytes += n; 630 } 631 632 static void 633 tcp_server_handle_sc(int fd, short event, void *v_sc) 634 { 635 struct statctx *sc = v_sc; 636 ssize_t n; 637 638 n = read(sc->fd, sc->buf, sc->buflen); 639 if (n == -1) { 640 if (errno != EINTR && errno != EWOULDBLOCK) 641 warn("fd %d read error", sc->fd); 642 return; 643 } else if (n == 0) { 644 if (ptb->vflag) 645 fprintf(stderr, "%8d closed by remote end\n", sc->fd); 646 647 TAILQ_REMOVE(&sc_queue, sc, entry); 648 649 event_del(&sc->ev); 650 close(sc->fd); 651 652 /* Some file descriptors are available again. */ 653 if (evtimer_pending(&sc->tcp_ts->evt, NULL)) { 654 evtimer_del(&sc->tcp_ts->evt); 655 event_add(&sc->tcp_ts->ev, NULL); 656 } 657 658 free(sc); 659 mainstats.nconns--; 660 set_slice_timer(mainstats.nconns > 0); 661 return; 662 } 663 if (ptb->vflag >= 3) 664 fprintf(stderr, "read: %zd bytes\n", n); 665 sc->bytes += n; 666 mainstats.slice_bytes += n; 667 } 668 669 static void 670 tcp_server_accept(int fd, short event, void *arg) 671 { 672 struct tcpservsock *ts = arg; 673 int sock, r; 674 struct statctx *sc; 675 struct sockaddr_storage ss; 676 socklen_t sslen; 677 char tmp[128]; 678 679 sslen = sizeof(ss); 680 681 event_add(&ts->ev, NULL); 682 if (event & EV_TIMEOUT) 683 return; 684 if ((sock = accept(fd, (struct sockaddr *)&ss, &sslen)) == -1) { 685 /* 686 * Pause accept if we are out of file descriptors, or 687 * libevent will haunt us here too. 688 */ 689 if (errno == ENFILE || errno == EMFILE) { 690 struct timeval evtpause = { 1, 0 }; 691 692 event_del(&ts->ev); 693 evtimer_add(&ts->evt, &evtpause); 694 } else if (errno != EWOULDBLOCK && errno != EINTR) 695 warn("accept"); 696 return; 697 } 698 saddr_ntop((struct sockaddr *)&ss, sslen, 699 tmp, sizeof(tmp)); 700 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 701 err(1, "fcntl(F_GETFL)"); 702 r |= O_NONBLOCK; 703 if (fcntl(sock, F_SETFL, r) == -1) 704 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 705 if (ptb->Tflag != -1 && ss.ss_family == AF_INET) { 706 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 707 &ptb->Tflag, sizeof(ptb->Tflag))) 708 err(1, "setsockopt IP_TOS"); 709 } 710 if (ptb->Tflag != -1 && ss.ss_family == AF_INET6) { 711 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 712 &ptb->Tflag, sizeof(ptb->Tflag))) 713 err(1, "setsockopt IPV6_TCLASS"); 714 } 715 /* Alloc client structure and register reading callback */ 716 if ((sc = calloc(1, sizeof(*sc))) == NULL) 717 err(1, "calloc"); 718 sc->tcp_ts = ts; 719 sc->fd = sock; 720 stats_prepare(sc); 721 event_set(&sc->ev, sc->fd, EV_READ | EV_PERSIST, 722 tcp_server_handle_sc, sc); 723 event_add(&sc->ev, NULL); 724 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 725 mainstats.nconns++; 726 set_slice_timer(mainstats.nconns > 0); 727 if (ptb->vflag) 728 fprintf(stderr, "Accepted connection from %s, fd = %d\n", 729 tmp, sc->fd); 730 } 731 732 static void 733 server_init(struct addrinfo *aitop, struct statctx *udp_sc) 734 { 735 char tmp[128]; 736 int sock, on = 1; 737 struct addrinfo *ai; 738 struct event *ev; 739 struct tcpservsock *ts; 740 nfds_t lnfds; 741 742 lnfds = 0; 743 for (ai = aitop; ai != NULL; ai = ai->ai_next) { 744 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, sizeof(tmp)); 745 if (ptb->vflag) 746 fprintf(stderr, "Try to bind to %s\n", tmp); 747 if ((sock = socket(ai->ai_family, ai->ai_socktype, 748 ai->ai_protocol)) == -1) { 749 if (ai->ai_next == NULL) 750 err(1, "socket"); 751 if (ptb->vflag) 752 warn("socket"); 753 continue; 754 } 755 if (ptb->Vflag) { 756 if (setsockopt(sock, SOL_SOCKET, SO_RTABLE, 757 &ptb->Vflag, sizeof(ptb->Vflag)) == -1) { 758 if (errno == ENOPROTOOPT) 759 warn("set rtable"); 760 else 761 err(1, "setsockopt SO_RTABLE"); 762 } 763 } 764 if (ptb->Tflag != -1 && ai->ai_family == AF_INET) { 765 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 766 &ptb->Tflag, sizeof(ptb->Tflag))) 767 err(1, "setsockopt IP_TOS"); 768 } 769 if (ptb->Tflag != -1 && ai->ai_family == AF_INET6) { 770 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 771 &ptb->Tflag, sizeof(ptb->Tflag))) 772 err(1, "setsockopt IPV6_TCLASS"); 773 } 774 if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 775 &on, sizeof(on)) == -1) 776 warn("reuse port"); 777 if (bind(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 778 if (ai->ai_next == NULL) 779 err(1, "bind"); 780 if (ptb->vflag) 781 warn("bind"); 782 close(sock); 783 continue; 784 } 785 if (ptb->Sflag) { 786 if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, 787 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 788 warn("set receive buffer size"); 789 } 790 if (TCP_MODE) { 791 if (listen(sock, 64) == -1) { 792 if (ai->ai_next == NULL) 793 err(1, "listen"); 794 if (ptb->vflag) 795 warn("listen"); 796 close(sock); 797 continue; 798 } 799 } 800 if (UDP_MODE) { 801 if ((ev = calloc(1, sizeof(*ev))) == NULL) 802 err(1, "calloc"); 803 event_set(ev, sock, EV_READ | EV_PERSIST, 804 udp_server_handle_sc, udp_sc); 805 event_add(ev, NULL); 806 } else { 807 if ((ts = calloc(1, sizeof(*ts))) == NULL) 808 err(1, "calloc"); 809 810 ts->fd = sock; 811 evtimer_set(&ts->evt, tcp_server_accept, ts); 812 event_set(&ts->ev, ts->fd, EV_READ, 813 tcp_server_accept, ts); 814 event_add(&ts->ev, NULL); 815 } 816 if (ptb->vflag >= 3) 817 fprintf(stderr, "bound to fd %d\n", sock); 818 lnfds++; 819 } 820 freeaddrinfo(aitop); 821 if (lnfds == 0) 822 errx(1, "No working listen addresses found"); 823 } 824 825 static void 826 client_handle_sc(int fd, short event, void *v_sc) 827 { 828 struct statctx *sc = v_sc; 829 ssize_t n; 830 831 if ((n = write(sc->fd, sc->buf, sc->buflen)) == -1) { 832 if (errno == EINTR || errno == EWOULDBLOCK || 833 (UDP_MODE && errno == ENOBUFS)) 834 return; 835 err(1, "write"); 836 } 837 if (TCP_MODE && n == 0) { 838 fprintf(stderr, "Remote end closed connection"); 839 exit(1); 840 } 841 if (ptb->vflag >= 3) 842 fprintf(stderr, "write: %zd bytes\n", n); 843 sc->bytes += n; 844 mainstats.slice_bytes += n; 845 if (UDP_MODE) 846 sc->udp_slice_pkts++; 847 } 848 849 static void 850 client_init(struct addrinfo *aitop, int nconn, struct statctx *udp_sc, 851 struct addrinfo *aib) 852 { 853 struct statctx *sc; 854 struct addrinfo *ai; 855 char tmp[128]; 856 int i, r, sock; 857 858 sc = udp_sc; 859 for (i = 0; i < nconn; i++) { 860 for (sock = -1, ai = aitop; ai != NULL; ai = ai->ai_next) { 861 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, 862 sizeof(tmp)); 863 if (ptb->vflag && i == 0) 864 fprintf(stderr, "Trying %s\n", tmp); 865 if ((sock = socket(ai->ai_family, ai->ai_socktype, 866 ai->ai_protocol)) == -1) { 867 if (ai->ai_next == NULL) 868 err(1, "socket"); 869 if (ptb->vflag) 870 warn("socket"); 871 continue; 872 } 873 if (aib != NULL) { 874 saddr_ntop(aib->ai_addr, aib->ai_addrlen, 875 tmp, sizeof(tmp)); 876 if (ptb->vflag) 877 fprintf(stderr, 878 "Try to bind to %s\n", tmp); 879 if (bind(sock, (struct sockaddr *)aib->ai_addr, 880 aib->ai_addrlen) == -1) 881 err(1, "bind"); 882 } 883 if (ptb->Tflag != -1 && ai->ai_family == AF_INET) { 884 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 885 &ptb->Tflag, sizeof(ptb->Tflag))) 886 err(1, "setsockopt IP_TOS"); 887 } 888 if (ptb->Tflag != -1 && ai->ai_family == AF_INET6) { 889 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 890 &ptb->Tflag, sizeof(ptb->Tflag))) 891 err(1, "setsockopt IPV6_TCLASS"); 892 } 893 if (ptb->Vflag) { 894 if (setsockopt(sock, SOL_SOCKET, SO_RTABLE, 895 &ptb->Vflag, sizeof(ptb->Vflag)) == -1) { 896 if (errno == ENOPROTOOPT) 897 warn("set rtable"); 898 else 899 err(1, "setsockopt SO_RTABLE"); 900 } 901 } 902 if (ptb->Sflag) { 903 if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, 904 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 905 warn("set TCP send buffer size"); 906 } 907 if (connect(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 908 if (ai->ai_next == NULL) 909 err(1, "connect"); 910 if (ptb->vflag) 911 warn("connect"); 912 close(sock); 913 sock = -1; 914 continue; 915 } 916 break; 917 } 918 if (sock == -1) 919 errx(1, "No host found"); 920 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 921 err(1, "fcntl(F_GETFL)"); 922 r |= O_NONBLOCK; 923 if (fcntl(sock, F_SETFL, r) == -1) 924 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 925 /* Alloc and prepare stats */ 926 if (TCP_MODE) { 927 if ((sc = calloc(1, sizeof(*sc))) == NULL) 928 err(1, "calloc"); 929 } 930 sc->fd = sock; 931 stats_prepare(sc); 932 event_set(&sc->ev, sc->fd, EV_WRITE | EV_PERSIST, 933 client_handle_sc, sc); 934 event_add(&sc->ev, NULL); 935 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 936 mainstats.nconns++; 937 set_slice_timer(mainstats.nconns > 0); 938 } 939 freeaddrinfo(aitop); 940 if (aib != NULL) 941 freeaddrinfo(aib); 942 943 if (ptb->vflag && nconn > 1) 944 fprintf(stderr, "%d connections established\n", 945 mainstats.nconns); 946 } 947 948 static int 949 map_tos(char *s, int *val) 950 { 951 /* DiffServ Codepoints and other TOS mappings */ 952 const struct toskeywords { 953 const char *keyword; 954 int val; 955 } *t, toskeywords[] = { 956 { "af11", IPTOS_DSCP_AF11 }, 957 { "af12", IPTOS_DSCP_AF12 }, 958 { "af13", IPTOS_DSCP_AF13 }, 959 { "af21", IPTOS_DSCP_AF21 }, 960 { "af22", IPTOS_DSCP_AF22 }, 961 { "af23", IPTOS_DSCP_AF23 }, 962 { "af31", IPTOS_DSCP_AF31 }, 963 { "af32", IPTOS_DSCP_AF32 }, 964 { "af33", IPTOS_DSCP_AF33 }, 965 { "af41", IPTOS_DSCP_AF41 }, 966 { "af42", IPTOS_DSCP_AF42 }, 967 { "af43", IPTOS_DSCP_AF43 }, 968 { "critical", IPTOS_PREC_CRITIC_ECP }, 969 { "cs0", IPTOS_DSCP_CS0 }, 970 { "cs1", IPTOS_DSCP_CS1 }, 971 { "cs2", IPTOS_DSCP_CS2 }, 972 { "cs3", IPTOS_DSCP_CS3 }, 973 { "cs4", IPTOS_DSCP_CS4 }, 974 { "cs5", IPTOS_DSCP_CS5 }, 975 { "cs6", IPTOS_DSCP_CS6 }, 976 { "cs7", IPTOS_DSCP_CS7 }, 977 { "ef", IPTOS_DSCP_EF }, 978 { "inetcontrol", IPTOS_PREC_INTERNETCONTROL }, 979 { "lowdelay", IPTOS_LOWDELAY }, 980 { "netcontrol", IPTOS_PREC_NETCONTROL }, 981 { "reliability", IPTOS_RELIABILITY }, 982 { "throughput", IPTOS_THROUGHPUT }, 983 { NULL, -1 }, 984 }; 985 986 for (t = toskeywords; t->keyword != NULL; t++) { 987 if (strcmp(s, t->keyword) == 0) { 988 *val = t->val; 989 return (1); 990 } 991 } 992 993 return (0); 994 } 995 996 static void 997 quit(int sig, short event, void *arg) 998 { 999 exit(0); 1000 } 1001 1002 int 1003 main(int argc, char **argv) 1004 { 1005 extern int optind; 1006 extern char *optarg; 1007 struct timeval tv; 1008 unsigned int secs; 1009 1010 char kerr[_POSIX2_LINE_MAX], *tmp; 1011 struct addrinfo *aitop, *aib, hints; 1012 const char *errstr; 1013 struct rlimit rl; 1014 int ch, herr, nconn; 1015 struct nlist nl[] = { { "_tcbtable" }, { "" } }; 1016 const char *host = NULL, *port = DEFAULT_PORT, *srcbind = NULL; 1017 struct event ev_sigint, ev_sigterm, ev_sighup, ev_progtimer; 1018 struct statctx *udp_sc = NULL; 1019 1020 /* Init world */ 1021 setlinebuf(stdout); 1022 ptb = &tcpbench; 1023 ptb->dummybuf_len = 0; 1024 ptb->Sflag = ptb->sflag = ptb->vflag = ptb->Vflag = 0; 1025 ptb->kvmh = NULL; 1026 ptb->kvars = NULL; 1027 ptb->rflag = DEFAULT_STATS_INTERVAL; 1028 ptb->Tflag = -1; 1029 nconn = 1; 1030 aib = NULL; 1031 secs = 0; 1032 1033 while ((ch = getopt(argc, argv, "b:B:hlk:n:p:r:sS:t:T:uvV:")) != -1) { 1034 switch (ch) { 1035 case 'b': 1036 srcbind = optarg; 1037 break; 1038 case 'l': 1039 list_kvars(); 1040 exit(0); 1041 case 'k': 1042 if ((tmp = strdup(optarg)) == NULL) 1043 errx(1, "strdup"); 1044 ptb->kvars = check_prepare_kvars(tmp); 1045 free(tmp); 1046 break; 1047 case 'r': 1048 ptb->rflag = strtonum(optarg, 0, 60 * 60 * 24 * 1000, 1049 &errstr); 1050 if (errstr != NULL) 1051 errx(1, "statistics interval is %s: %s", 1052 errstr, optarg); 1053 break; 1054 case 'p': 1055 port = optarg; 1056 break; 1057 case 's': 1058 ptb->sflag = 1; 1059 break; 1060 case 'S': 1061 ptb->Sflag = strtonum(optarg, 0, 1024*1024*1024, 1062 &errstr); 1063 if (errstr != NULL) 1064 errx(1, "receive space interval is %s: %s", 1065 errstr, optarg); 1066 break; 1067 case 'B': 1068 ptb->dummybuf_len = strtonum(optarg, 0, 1024*1024*1024, 1069 &errstr); 1070 if (errstr != NULL) 1071 errx(1, "read/write buffer size is %s: %s", 1072 errstr, optarg); 1073 break; 1074 case 'v': 1075 ptb->vflag++; 1076 break; 1077 case 'V': 1078 ptb->Vflag = (unsigned int)strtonum(optarg, 0, 1079 RT_TABLEID_MAX, &errstr); 1080 if (errstr) 1081 errx(1, "rtable value is %s: %s", 1082 errstr, optarg); 1083 break; 1084 case 'n': 1085 nconn = strtonum(optarg, 0, 65535, &errstr); 1086 if (errstr != NULL) 1087 errx(1, "number of connections is %s: %s", 1088 errstr, optarg); 1089 break; 1090 case 'u': 1091 ptb->uflag = 1; 1092 break; 1093 case 'T': 1094 if (map_tos(optarg, &ptb->Tflag)) 1095 break; 1096 errstr = NULL; 1097 if (strlen(optarg) > 1 && optarg[0] == '0' && 1098 optarg[1] == 'x') 1099 ptb->Tflag = (int)strtol(optarg, NULL, 16); 1100 else 1101 ptb->Tflag = (int)strtonum(optarg, 0, 255, 1102 &errstr); 1103 if (ptb->Tflag == -1 || ptb->Tflag > 255 || errstr) 1104 errx(1, "illegal tos value %s", optarg); 1105 break; 1106 case 't': 1107 secs = strtonum(optarg, 1, UINT_MAX, &errstr); 1108 if (errstr != NULL) 1109 errx(1, "secs is %s: %s", 1110 errstr, optarg); 1111 break; 1112 case 'h': 1113 default: 1114 usage(); 1115 } 1116 } 1117 1118 argv += optind; 1119 argc -= optind; 1120 if ((argc != (ptb->sflag ? 0 : 1)) || 1121 (UDP_MODE && (ptb->kvars || nconn != 1))) 1122 usage(); 1123 1124 if (!ptb->sflag) 1125 host = argv[0]; 1126 /* 1127 * Rationale, 1128 * If TCP, use a big buffer with big reads/writes. 1129 * If UDP, use a big buffer in server and a buffer the size of a 1130 * ethernet packet. 1131 */ 1132 if (!ptb->dummybuf_len) { 1133 if (ptb->sflag || TCP_MODE) 1134 ptb->dummybuf_len = DEFAULT_BUF; 1135 else 1136 ptb->dummybuf_len = DEFAULT_UDP_PKT; 1137 } 1138 1139 bzero(&hints, sizeof(hints)); 1140 if (UDP_MODE) { 1141 hints.ai_socktype = SOCK_DGRAM; 1142 hints.ai_protocol = IPPROTO_UDP; 1143 } else { 1144 hints.ai_socktype = SOCK_STREAM; 1145 hints.ai_protocol = IPPROTO_TCP; 1146 } 1147 if (ptb->sflag) 1148 hints.ai_flags = AI_PASSIVE; 1149 if (srcbind != NULL) { 1150 hints.ai_flags |= AI_NUMERICHOST; 1151 herr = getaddrinfo(srcbind, NULL, &hints, &aib); 1152 hints.ai_flags &= ~AI_NUMERICHOST; 1153 if (herr != 0) { 1154 if (herr == EAI_SYSTEM) 1155 err(1, "getaddrinfo"); 1156 else 1157 errx(1, "getaddrinfo: %s", gai_strerror(herr)); 1158 } 1159 } 1160 if ((herr = getaddrinfo(host, port, &hints, &aitop)) != 0) { 1161 if (herr == EAI_SYSTEM) 1162 err(1, "getaddrinfo"); 1163 else 1164 errx(1, "getaddrinfo: %s", gai_strerror(herr)); 1165 } 1166 if (ptb->kvars) { 1167 if ((ptb->kvmh = kvm_openfiles(NULL, NULL, NULL, 1168 O_RDONLY, kerr)) == NULL) 1169 errx(1, "kvm_open: %s", kerr); 1170 drop_gid(); 1171 if (kvm_nlist(ptb->kvmh, nl) < 0 || nl[0].n_type == 0) 1172 errx(1, "kvm: no namelist"); 1173 ptb->ktcbtab = nl[0].n_value; 1174 } else 1175 drop_gid(); 1176 1177 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1178 err(1, "getrlimit"); 1179 if (rl.rlim_cur < MAX_FD) 1180 rl.rlim_cur = MAX_FD; 1181 if (setrlimit(RLIMIT_NOFILE, &rl)) 1182 err(1, "setrlimit"); 1183 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1184 err(1, "getrlimit"); 1185 1186 /* Init world */ 1187 TAILQ_INIT(&sc_queue); 1188 if ((ptb->dummybuf = malloc(ptb->dummybuf_len)) == NULL) 1189 err(1, "malloc"); 1190 arc4random_buf(ptb->dummybuf, ptb->dummybuf_len); 1191 1192 /* Setup libevent and signals */ 1193 event_init(); 1194 signal_set(&ev_sigterm, SIGTERM, signal_handler, NULL); 1195 signal_set(&ev_sighup, SIGHUP, signal_handler, NULL); 1196 signal_set(&ev_sigint, SIGINT, signal_handler, NULL); 1197 signal_add(&ev_sigint, NULL); 1198 signal_add(&ev_sigterm, NULL); 1199 signal_add(&ev_sighup, NULL); 1200 signal(SIGPIPE, SIG_IGN); 1201 1202 if (UDP_MODE) { 1203 if ((udp_sc = calloc(1, sizeof(*udp_sc))) == NULL) 1204 err(1, "calloc"); 1205 udp_sc->fd = -1; 1206 stats_prepare(udp_sc); 1207 evtimer_set(&mainstats.timer, udp_process_slice, udp_sc); 1208 } else { 1209 print_tcp_header(); 1210 evtimer_set(&mainstats.timer, tcp_process_slice, NULL); 1211 } 1212 1213 if (ptb->sflag) 1214 server_init(aitop, udp_sc); 1215 else { 1216 if (secs > 0) { 1217 timerclear(&tv); 1218 tv.tv_sec = secs + 1; 1219 evtimer_set(&ev_progtimer, quit, NULL); 1220 evtimer_add(&ev_progtimer, &tv); 1221 } 1222 client_init(aitop, nconn, udp_sc, aib); 1223 } 1224 1225 /* libevent main loop*/ 1226 event_dispatch(); 1227 1228 return (0); 1229 } 1230