xref: /openbsd/usr.bin/tcpbench/tcpbench.c (revision 891d7ab6)
1 /*
2  * Copyright (c) 2008 Damien Miller <djm@mindrot.org>
3  * Copyright (c) 2011 Christiano F. Haesbaert <haesbaert@haesbaert.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/types.h>
19 #include <sys/time.h>
20 #include <sys/socket.h>
21 #include <sys/socketvar.h>
22 #include <sys/resource.h>
23 #include <sys/queue.h>
24 
25 #include <net/route.h>
26 
27 #include <netinet/in.h>
28 #include <netinet/in_systm.h>
29 #include <netinet/ip.h>
30 #include <netinet/tcp.h>
31 #include <netinet/tcp_timer.h>
32 #include <netinet/tcp_fsm.h>
33 #include <netinet/in_pcb.h>
34 #include <netinet/tcp_var.h>
35 
36 #include <arpa/inet.h>
37 
38 #include <unistd.h>
39 #include <limits.h>
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <event.h>
45 #include <netdb.h>
46 #include <signal.h>
47 #include <err.h>
48 #include <fcntl.h>
49 #include <poll.h>
50 
51 #include <kvm.h>
52 #include <nlist.h>
53 
54 #define DEFAULT_PORT "12345"
55 #define DEFAULT_STATS_INTERVAL 1000 /* ms */
56 #define DEFAULT_BUF (256 * 1024)
57 #define DEFAULT_UDP_PKT (1500 - 28) /* TODO don't hardcode this */
58 #define TCP_MODE !ptb->uflag
59 #define UDP_MODE ptb->uflag
60 #define MAX_FD 1024
61 
62 /* Our tcpbench globals */
63 struct {
64 	u_int	  Vflag;	/* rtableid */
65 	int	  Sflag;	/* Socket buffer size (tcp mode) */
66 	u_int	  rflag;	/* Report rate (ms) */
67 	int	  sflag;	/* True if server */
68 	int	  vflag;	/* Verbose */
69 	int	  uflag;	/* UDP mode */
70 	kvm_t	 *kvmh;		/* Kvm handler */
71 	char	**kvars;	/* Kvm enabled vars */
72 	u_long	  ktcbtab;	/* Ktcb */
73 	char	 *dummybuf;	/* IO buffer */
74 	size_t	  dummybuf_len;	/* IO buffer len */
75 } tcpbench, *ptb;
76 
77 /* stats for a single tcp connection, udp uses only one  */
78 struct statctx {
79 	TAILQ_ENTRY(statctx) entry;
80 	struct timeval t_start, t_last;
81 	unsigned long long bytes;
82 	int fd;
83 	char *buf;
84 	size_t buflen;
85 	struct event ev;
86 	/* TCP only */
87 	u_long tcp_tcbaddr;
88 	/* UDP only */
89 	u_long udp_slice_pkts;
90 };
91 
92 static void	signal_handler(int, short, void *);
93 static void	saddr_ntop(const struct sockaddr *, socklen_t, char *, size_t);
94 static void	drop_gid(void);
95 static void	set_slice_timer(int);
96 static void 	print_tcp_header(void);
97 static void	kget(u_long, void *, size_t);
98 static u_long	kfind_tcb(int);
99 static void	kupdate_stats(u_long, struct inpcb *, struct tcpcb *,
100     struct socket *);
101 static void	list_kvars(void);
102 static void	check_kvar(const char *);
103 static char **	check_prepare_kvars(char *);
104 static void	stats_prepare(struct statctx *);
105 static void	tcp_stats_display(unsigned long long, long double, float,
106     struct statctx *, struct inpcb *, struct tcpcb *, struct socket *);
107 static void	tcp_process_slice(int, short, void *);
108 static void	tcp_server_handle_sc(int, short, void *);
109 static void	tcp_server_accept(int, short, void *);
110 static void	server_init(struct addrinfo *, struct statctx *);
111 static void	client_handle_sc(int, short, void *);
112 static void	client_init(struct addrinfo *, int, struct statctx *);
113 static int	clock_gettime_tv(clockid_t, struct timeval *);
114 static void	udp_server_handle_sc(int, short, void *);
115 static void	udp_process_slice(int, short, void *);
116 
117 /*
118  * We account the mainstats here, that is the stats
119  * for all connections, all variables starting with slice
120  * are used to account information for the timeslice
121  * between each output. Peak variables record the highest
122  * between all slices so far.
123  */
124 static struct {
125 	unsigned long long slice_bytes; /* bytes for last slice */
126 	long double peak_mbps;		/* peak mbps so far */
127 	int nconns; 		        /* connected clients */
128 	struct event timer;		/* process timer */
129 } mainstats;
130 
131 /* When adding variables, also add to tcp_stats_display() */
132 static const char *allowed_kvars[] = {
133 	"inpcb.inp_flags",
134 	"sockb.so_rcv.sb_cc",
135 	"sockb.so_rcv.sb_wat",
136 	"sockb.so_rcv.sb_hiwat",
137 	"sockb.so_snd.sb_cc",
138 	"sockb.so_snd.sb_wat",
139 	"sockb.so_snd.sb_hiwat",
140 	"tcpcb.snd_una",
141 	"tcpcb.snd_nxt",
142 	"tcpcb.snd_wl1",
143 	"tcpcb.snd_wl2",
144 	"tcpcb.snd_wnd",
145 	"tcpcb.rcv_wnd",
146 	"tcpcb.rcv_nxt",
147 	"tcpcb.rcv_adv",
148 	"tcpcb.snd_max",
149 	"tcpcb.snd_cwnd",
150 	"tcpcb.snd_ssthresh",
151 	"tcpcb.t_rcvtime",
152 	"tcpcb.t_rtttime",
153 	"tcpcb.t_rtseq",
154 	"tcpcb.t_srtt",
155 	"tcpcb.t_rttvar",
156 	"tcpcb.t_rttmin",
157 	"tcpcb.max_sndwnd",
158 	"tcpcb.snd_scale",
159 	"tcpcb.rcv_scale",
160 	"tcpcb.last_ack_sent",
161 	"tcpcb.rfbuf_cnt",
162 	"tcpcb.rfbuf_ts",
163 	"tcpcb.ts_recent_age",
164 	"tcpcb.ts_recent",
165 	NULL
166 };
167 
168 TAILQ_HEAD(, statctx) sc_queue;
169 
170 static void __dead
171 usage(void)
172 {
173 	fprintf(stderr,
174 	    "usage: tcpbench -l\n"
175 	    "       tcpbench [-uv] [-B buf] [-k kvars] [-n connections] [-p port]\n"
176 	    "                [-r interval] [-S space] [-V rtable] hostname\n"
177 	    "       tcpbench -s [-uv] [-B buf] [-k kvars] [-p port]\n"
178 	    "                [-r interval] [-S space] [-V rtable]\n");
179 	exit(1);
180 }
181 
182 static void
183 signal_handler(int sig, short event, void *bula)
184 {
185 	/*
186 	 * signal handler rules don't apply, libevent decouples for us
187 	 */
188 	switch (sig) {
189 	case SIGINT:
190 	case SIGTERM:
191 	case SIGHUP:
192 		warnx("Terminated by signal %d", sig);
193 		exit(0);
194 		break;		/* NOTREACHED */
195 	default:
196 		errx(1, "unexpected signal %d", sig);
197 		break;		/* NOTREACHED */
198 	}
199 }
200 
201 static void
202 saddr_ntop(const struct sockaddr *addr, socklen_t alen, char *buf, size_t len)
203 {
204 	char hbuf[NI_MAXHOST], pbuf[NI_MAXSERV];
205 	int herr;
206 
207 	if ((herr = getnameinfo(addr, alen, hbuf, sizeof(hbuf),
208 	    pbuf, sizeof(pbuf), NI_NUMERICHOST|NI_NUMERICSERV)) != 0) {
209 		if (herr == EAI_SYSTEM)
210 			err(1, "getnameinfo");
211 		else
212 			errx(1, "getnameinfo: %s", gai_strerror(herr));
213 	}
214 	snprintf(buf, len, "[%s]:%s", hbuf, pbuf);
215 }
216 
217 static void
218 drop_gid(void)
219 {
220 	gid_t gid;
221 
222 	gid = getgid();
223 	if (setresgid(gid, gid, gid) == -1)
224 		err(1, "setresgid");
225 }
226 
227 static void
228 set_slice_timer(int on)
229 {
230 	struct timeval tv;
231 
232 	if (ptb->rflag == 0)
233 		return;
234 
235 	if (on) {
236 		if (evtimer_pending(&mainstats.timer, NULL))
237 			return;
238 		timerclear(&tv);
239 		/* XXX Is there a better way to do this ? */
240 		tv.tv_sec = ptb->rflag / 1000;
241 		tv.tv_usec = (ptb->rflag % 1000) * 1000;
242 
243 		evtimer_add(&mainstats.timer, &tv);
244 	} else if (evtimer_pending(&mainstats.timer, NULL))
245 		evtimer_del(&mainstats.timer);
246 }
247 
248 static int
249 clock_gettime_tv(clockid_t clock_id, struct timeval *tv)
250 {
251 	struct timespec ts;
252 
253 	if (clock_gettime(clock_id, &ts) == -1)
254 		return (-1);
255 
256 	TIMESPEC_TO_TIMEVAL(tv, &ts);
257 
258 	return (0);
259 }
260 
261 static void
262 print_tcp_header(void)
263 {
264 	char **kv;
265 
266 	printf("%12s %14s %12s %8s ", "elapsed_ms", "bytes", "mbps",
267 	    "bwidth");
268 	for (kv = ptb->kvars;  ptb->kvars != NULL && *kv != NULL; kv++)
269 		printf("%s%s", kv != ptb->kvars ? "," : "", *kv);
270 	printf("\n");
271 }
272 
273 static void
274 kget(u_long addr, void *buf, size_t size)
275 {
276 	if (kvm_read(ptb->kvmh, addr, buf, size) != (ssize_t)size)
277 		errx(1, "kvm_read: %s", kvm_geterr(ptb->kvmh));
278 }
279 
280 static u_long
281 kfind_tcb(int sock)
282 {
283 	struct inpcbtable tcbtab;
284 	struct inpcb *head, *next, *prev;
285 	struct inpcb inpcb;
286 	struct tcpcb tcpcb;
287 
288 	struct sockaddr_storage me, them;
289 	socklen_t melen, themlen;
290 	struct sockaddr_in *in4;
291 	struct sockaddr_in6 *in6;
292 	char tmp1[64], tmp2[64];
293 	int nretry;
294 
295 	nretry = 10;
296 	melen = themlen = sizeof(struct sockaddr_storage);
297 	if (getsockname(sock, (struct sockaddr *)&me, &melen) == -1)
298 		err(1, "getsockname");
299 	if (getpeername(sock, (struct sockaddr *)&them, &themlen) == -1)
300 		err(1, "getpeername");
301 	if (me.ss_family != them.ss_family)
302 		errx(1, "%s: me.ss_family != them.ss_family", __func__);
303 	if (me.ss_family != AF_INET && me.ss_family != AF_INET6)
304 		errx(1, "%s: unknown socket family", __func__);
305 	if (ptb->vflag >= 2) {
306 		saddr_ntop((struct sockaddr *)&me, me.ss_len,
307 		    tmp1, sizeof(tmp1));
308 		saddr_ntop((struct sockaddr *)&them, them.ss_len,
309 		    tmp2, sizeof(tmp2));
310 		fprintf(stderr, "Our socket local %s remote %s\n", tmp1, tmp2);
311 	}
312 	if (ptb->vflag >= 2)
313 		fprintf(stderr, "Using PCB table at %lu\n", ptb->ktcbtab);
314 retry:
315 	kget(ptb->ktcbtab, &tcbtab, sizeof(tcbtab));
316 	prev = head = (struct inpcb *)&CIRCLEQ_FIRST(
317 	    &((struct inpcbtable *)ptb->ktcbtab)->inpt_queue);
318 	next = CIRCLEQ_FIRST(&tcbtab.inpt_queue);
319 
320 	if (ptb->vflag >= 2)
321 		fprintf(stderr, "PCB head at %p\n", head);
322 	while (next != head) {
323 		if (ptb->vflag >= 2)
324 			fprintf(stderr, "Checking PCB %p\n", next);
325 		kget((u_long)next, &inpcb, sizeof(inpcb));
326 		if (CIRCLEQ_PREV(&inpcb, inp_queue) != prev) {
327 			if (nretry--) {
328 				warnx("pcb prev pointer insane");
329 				goto retry;
330 			}
331 			else
332 				errx(1, "pcb prev pointer insane,"
333 				     " all attempts exausted");
334 		}
335 		prev = next;
336 		next = CIRCLEQ_NEXT(&inpcb, inp_queue);
337 
338 		if (me.ss_family == AF_INET) {
339 			if ((inpcb.inp_flags & INP_IPV6) != 0) {
340 				if (ptb->vflag >= 2)
341 					fprintf(stderr, "Skip: INP_IPV6");
342 				continue;
343 			}
344 			if (ptb->vflag >= 2) {
345 				inet_ntop(AF_INET, &inpcb.inp_laddr,
346 				    tmp1, sizeof(tmp1));
347 				inet_ntop(AF_INET, &inpcb.inp_faddr,
348 				    tmp2, sizeof(tmp2));
349 				fprintf(stderr, "PCB %p local: [%s]:%d "
350 				    "remote: [%s]:%d\n", prev,
351 				    tmp1, inpcb.inp_lport,
352 				    tmp2, inpcb.inp_fport);
353 			}
354 			in4 = (struct sockaddr_in *)&me;
355 			if (memcmp(&in4->sin_addr, &inpcb.inp_laddr,
356 			    sizeof(struct in_addr)) != 0 ||
357 			    in4->sin_port != inpcb.inp_lport)
358 				continue;
359 			in4 = (struct sockaddr_in *)&them;
360 			if (memcmp(&in4->sin_addr, &inpcb.inp_faddr,
361 			    sizeof(struct in_addr)) != 0 ||
362 			    in4->sin_port != inpcb.inp_fport)
363 				continue;
364 		} else {
365 			if ((inpcb.inp_flags & INP_IPV6) == 0)
366 				continue;
367 			if (ptb->vflag >= 2) {
368 				inet_ntop(AF_INET6, &inpcb.inp_laddr6,
369 				    tmp1, sizeof(tmp1));
370 				inet_ntop(AF_INET6, &inpcb.inp_faddr6,
371 				    tmp2, sizeof(tmp2));
372 				fprintf(stderr, "PCB %p local: [%s]:%d "
373 				    "remote: [%s]:%d\n", prev,
374 				    tmp1, inpcb.inp_lport,
375 				    tmp2, inpcb.inp_fport);
376 			}
377 			in6 = (struct sockaddr_in6 *)&me;
378 			if (memcmp(&in6->sin6_addr, &inpcb.inp_laddr6,
379 			    sizeof(struct in6_addr)) != 0 ||
380 			    in6->sin6_port != inpcb.inp_lport)
381 				continue;
382 			in6 = (struct sockaddr_in6 *)&them;
383 			if (memcmp(&in6->sin6_addr, &inpcb.inp_faddr6,
384 			    sizeof(struct in6_addr)) != 0 ||
385 			    in6->sin6_port != inpcb.inp_fport)
386 				continue;
387 		}
388 		kget((u_long)inpcb.inp_ppcb, &tcpcb, sizeof(tcpcb));
389 		if (tcpcb.t_state != TCPS_ESTABLISHED) {
390 			if (ptb->vflag >= 2)
391 				fprintf(stderr, "Not established\n");
392 			continue;
393 		}
394 		if (ptb->vflag >= 2)
395 			fprintf(stderr, "Found PCB at %p\n", prev);
396 		return ((u_long)prev);
397 	}
398 
399 	errx(1, "No matching PCB found");
400 }
401 
402 static void
403 kupdate_stats(u_long tcbaddr, struct inpcb *inpcb,
404     struct tcpcb *tcpcb, struct socket *sockb)
405 {
406 	kget(tcbaddr, inpcb, sizeof(*inpcb));
407 	kget((u_long)inpcb->inp_ppcb, tcpcb, sizeof(*tcpcb));
408 	kget((u_long)inpcb->inp_socket, sockb, sizeof(*sockb));
409 }
410 
411 static void
412 check_kvar(const char *var)
413 {
414 	u_int i;
415 
416 	for (i = 0; allowed_kvars[i] != NULL; i++)
417 		if (strcmp(allowed_kvars[i], var) == 0)
418 			return;
419 	errx(1, "Unrecognised kvar: %s", var);
420 }
421 
422 static void
423 list_kvars(void)
424 {
425 	u_int i;
426 
427 	fprintf(stderr, "Supported kernel variables:\n");
428 	for (i = 0; allowed_kvars[i] != NULL; i++)
429 		fprintf(stderr, "\t%s\n", allowed_kvars[i]);
430 }
431 
432 static char **
433 check_prepare_kvars(char *list)
434 {
435 	char *item, **ret = NULL;
436 	u_int n = 0;
437 
438 	while ((item = strsep(&list, ", \t\n")) != NULL) {
439 		check_kvar(item);
440 		if ((ret = realloc(ret, sizeof(*ret) * (++n + 1))) == NULL)
441 			errx(1, "realloc(kvars)");
442 		if ((ret[n - 1] = strdup(item)) == NULL)
443 			errx(1, "strdup");
444 		ret[n] = NULL;
445 	}
446 	return (ret);
447 }
448 
449 static void
450 stats_prepare(struct statctx *sc)
451 {
452 	sc->buf = ptb->dummybuf;
453 	sc->buflen = ptb->dummybuf_len;
454 
455 	if (ptb->kvars)
456 		sc->tcp_tcbaddr = kfind_tcb(sc->fd);
457 	if (clock_gettime_tv(CLOCK_MONOTONIC, &sc->t_start) == -1)
458 		err(1, "clock_gettime_tv");
459 	sc->t_last = sc->t_start;
460 
461 }
462 
463 static void
464 tcp_stats_display(unsigned long long total_elapsed, long double mbps,
465     float bwperc, struct statctx *sc, struct inpcb *inpcb,
466     struct tcpcb *tcpcb, struct socket *sockb)
467 {
468 	int j;
469 
470 	printf("%12llu %14llu %12.3Lf %7.2f%% ", total_elapsed, sc->bytes,
471 	    mbps, bwperc);
472 
473 	if (ptb->kvars != NULL) {
474 		kupdate_stats(sc->tcp_tcbaddr, inpcb, tcpcb,
475 		    sockb);
476 
477 		for (j = 0; ptb->kvars[j] != NULL; j++) {
478 #define S(a) #a
479 #define P(b, v, f)							\
480 			if (strcmp(ptb->kvars[j], S(b.v)) == 0) {	\
481 				printf("%s"f, j > 0 ? "," : "", b->v);	\
482 				continue;				\
483 			}
484 			P(inpcb, inp_flags, "0x%08x")
485 			P(sockb, so_rcv.sb_cc, "%lu")
486 			P(sockb, so_rcv.sb_wat, "%lu")
487 			P(sockb, so_rcv.sb_hiwat, "%lu")
488 			P(sockb, so_snd.sb_cc, "%lu")
489 			P(sockb, so_snd.sb_wat, "%lu")
490 			P(sockb, so_snd.sb_hiwat, "%lu")
491 			P(tcpcb, snd_una, "%u")
492 			P(tcpcb, snd_nxt, "%u")
493 			P(tcpcb, snd_wl1, "%u")
494 			P(tcpcb, snd_wl2, "%u")
495 			P(tcpcb, snd_wnd, "%lu")
496 			P(tcpcb, rcv_wnd, "%lu")
497 			P(tcpcb, rcv_nxt, "%u")
498 			P(tcpcb, rcv_adv, "%u")
499 			P(tcpcb, snd_max, "%u")
500 			P(tcpcb, snd_cwnd, "%lu")
501 			P(tcpcb, snd_ssthresh, "%lu")
502 			P(tcpcb, t_rcvtime, "%u")
503 			P(tcpcb, t_rtttime, "%u")
504 			P(tcpcb, t_rtseq, "%u")
505 			P(tcpcb, t_srtt, "%hu")
506 			P(tcpcb, t_rttvar, "%hu")
507 			P(tcpcb, t_rttmin, "%hu")
508 			P(tcpcb, max_sndwnd, "%lu")
509 			P(tcpcb, snd_scale, "%u")
510 			P(tcpcb, rcv_scale, "%u")
511 			P(tcpcb, last_ack_sent, "%u")
512 			P(tcpcb, rfbuf_cnt, "%u")
513 			P(tcpcb, rfbuf_ts, "%u")
514 			P(tcpcb, ts_recent_age, "%u")
515 			P(tcpcb, ts_recent, "%u")
516 #undef S
517 #undef P
518 		}
519 	}
520 	printf("\n");
521 }
522 
523 static void
524 tcp_process_slice(int fd, short event, void *bula)
525 {
526 	unsigned long long total_elapsed, since_last;
527 	long double mbps, slice_mbps = 0;
528 	float bwperc;
529 	struct statctx *sc;
530 	struct timeval t_cur, t_diff;
531 	struct inpcb inpcb;
532 	struct tcpcb tcpcb;
533 	struct socket sockb;
534 
535 	TAILQ_FOREACH(sc, &sc_queue, entry) {
536 		if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1)
537 			err(1, "clock_gettime_tv");
538 		if (ptb->kvars != NULL) /* process kernel stats */
539 			kupdate_stats(sc->tcp_tcbaddr, &inpcb, &tcpcb,
540 			    &sockb);
541 
542 		timersub(&t_cur, &sc->t_start, &t_diff);
543 		total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000;
544 		timersub(&t_cur, &sc->t_last, &t_diff);
545 		since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000;
546 		bwperc = (sc->bytes * 100.0) / mainstats.slice_bytes;
547 		mbps = (sc->bytes * 8) / (since_last * 1000.0);
548 		slice_mbps += mbps;
549 
550 		tcp_stats_display(total_elapsed, mbps, bwperc, sc,
551 		    &inpcb, &tcpcb, &sockb);
552 
553 		sc->t_last = t_cur;
554 		sc->bytes = 0;
555 	}
556 
557 	/* process stats for this slice */
558 	if (slice_mbps > mainstats.peak_mbps)
559 		mainstats.peak_mbps = slice_mbps;
560 	printf("Conn: %3d Mbps: %12.3Lf Peak Mbps: %12.3Lf Avg Mbps: %12.3Lf\n",
561 	    mainstats.nconns, slice_mbps, mainstats.peak_mbps,
562 	    slice_mbps / mainstats.nconns);
563 	mainstats.slice_bytes = 0;
564 
565 	set_slice_timer(mainstats.nconns > 0);
566 }
567 
568 static void
569 udp_process_slice(int fd, short event, void *v_sc)
570 {
571 	struct statctx *sc = v_sc;
572 	unsigned long long total_elapsed, since_last;
573 	long double slice_mbps, pps;
574 	struct timeval t_cur, t_diff;
575 
576 	if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1)
577 		err(1, "clock_gettime_tv");
578 	/* Calculate pps */
579 	timersub(&t_cur, &sc->t_start, &t_diff);
580 	total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000;
581 	timersub(&t_cur, &sc->t_last, &t_diff);
582 	since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000;
583 	slice_mbps = (sc->bytes * 8) / (since_last * 1000.0);
584 	pps = (sc->udp_slice_pkts * 1000) / since_last;
585 	if (slice_mbps > mainstats.peak_mbps)
586 		mainstats.peak_mbps = slice_mbps;
587 	printf("Elapsed: %11llu Mbps: %11.3Lf Peak Mbps: %11.3Lf %s PPS: %10.3Lf\n",
588 	    total_elapsed, slice_mbps, mainstats.peak_mbps,
589 	    ptb->sflag ? "Rx" : "Tx", pps);
590 
591 	/* Clean up this slice time */
592 	sc->t_last = t_cur;
593 	sc->bytes = 0;
594 	sc->udp_slice_pkts = 0;
595 	set_slice_timer(1);
596 }
597 
598 static void
599 udp_server_handle_sc(int fd, short event, void *v_sc)
600 {
601 	ssize_t n;
602 	struct statctx *sc = v_sc;
603 
604 again:
605 	n = read(fd, ptb->dummybuf, ptb->dummybuf_len);
606 	if (n == 0)
607 		return;
608 	else if (n == -1) {
609 		if (errno == EINTR)
610 			goto again;
611 		else if (errno == EWOULDBLOCK)
612 			return;
613 		warn("fd %d read error", fd);
614 		return;
615 	}
616 
617 	if (ptb->vflag >= 3)
618 		fprintf(stderr, "read: %zd bytes\n", n);
619 	/* If this was our first packet, start slice timer */
620 	if (mainstats.peak_mbps == 0)
621 		set_slice_timer(1);
622 	/* Account packet */
623 	sc->udp_slice_pkts++;
624 	sc->bytes += n;
625 }
626 
627 static void
628 tcp_server_handle_sc(int fd, short event, void *v_sc)
629 {
630 	struct statctx *sc = v_sc;
631 	ssize_t n;
632 
633 again:
634 	n = read(sc->fd, sc->buf, sc->buflen);
635 	if (n == -1) {
636 		if (errno == EINTR)
637 			goto again;
638 		else if (errno == EWOULDBLOCK)
639 			return;
640 		warn("fd %d read error", sc->fd);
641 		return;
642 	} else if (n == 0) {
643 		if (ptb->vflag)
644 			fprintf(stderr, "%8d closed by remote end\n", sc->fd);
645 		close(sc->fd);
646 		TAILQ_REMOVE(&sc_queue, sc, entry);
647 		free(sc);
648 		mainstats.nconns--;
649 		set_slice_timer(mainstats.nconns > 0);
650 		return;
651 	}
652 	if (ptb->vflag >= 3)
653 		fprintf(stderr, "read: %zd bytes\n", n);
654 	sc->bytes += n;
655 	mainstats.slice_bytes += n;
656 }
657 
658 static void
659 tcp_server_accept(int fd, short event, void *bula)
660 {
661 	int sock, r;
662 	struct statctx *sc;
663 	struct sockaddr_storage ss;
664 	socklen_t sslen;
665 	char tmp[128];
666 
667 	sslen = sizeof(ss);
668 again:
669 	if ((sock = accept(fd, (struct sockaddr *)&ss, &sslen)) == -1) {
670 		if (errno == EINTR)
671 			goto again;
672 		warn("accept");
673 		return;
674 	}
675 	saddr_ntop((struct sockaddr *)&ss, sslen,
676 	    tmp, sizeof(tmp));
677 	if ((r = fcntl(sock, F_GETFL, 0)) == -1)
678 		err(1, "fcntl(F_GETFL)");
679 	r |= O_NONBLOCK;
680 	if (fcntl(sock, F_SETFL, r) == -1)
681 		err(1, "fcntl(F_SETFL, O_NONBLOCK)");
682 	/* Alloc client structure and register reading callback */
683 	if ((sc = calloc(1, sizeof(*sc))) == NULL)
684 		err(1, "calloc");
685 	sc->fd = sock;
686 	stats_prepare(sc);
687 	event_set(&sc->ev, sc->fd, EV_READ | EV_PERSIST,
688 	    tcp_server_handle_sc, sc);
689 	event_add(&sc->ev, NULL);
690 	TAILQ_INSERT_TAIL(&sc_queue, sc, entry);
691 	mainstats.nconns++;
692 	set_slice_timer(mainstats.nconns > 0);
693 	if (ptb->vflag)
694 		warnx("Accepted connection from %s, fd = %d\n", tmp, sc->fd);
695 }
696 
697 static void
698 server_init(struct addrinfo *aitop, struct statctx *udp_sc)
699 {
700 	char tmp[128];
701 	int sock, on = 1;
702 	struct addrinfo *ai;
703 	struct event *ev;
704 	nfds_t lnfds;
705 
706 	if (setpgid(0, 0) == -1)
707 		err(1, "setpgid");
708 
709 	lnfds = 0;
710 	for (ai = aitop; ai != NULL; ai = ai->ai_next) {
711 		saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, sizeof(tmp));
712 		if (ptb->vflag)
713 			fprintf(stderr, "Try to bind to %s\n", tmp);
714 		if ((sock = socket(ai->ai_family, ai->ai_socktype,
715 		    ai->ai_protocol)) == -1) {
716 			if (ai->ai_next == NULL)
717 				err(1, "socket");
718 			if (ptb->vflag)
719 				warn("socket");
720 			continue;
721 		}
722 		if (ptb->Vflag) {
723 			if (setsockopt(sock, SOL_SOCKET, SO_RTABLE,
724 			    &ptb->Vflag, sizeof(ptb->Vflag)) == -1) {
725 				if (errno == ENOPROTOOPT)
726 					warn("set rtable");
727 				else
728 					err(1, "setsockopt SO_RTABLE");
729 			}
730 		}
731 		if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
732 		    &on, sizeof(on)) == -1)
733 			warn("reuse port");
734 		if (bind(sock, ai->ai_addr, ai->ai_addrlen) != 0) {
735 			if (ai->ai_next == NULL)
736 				err(1, "bind");
737 			if (ptb->vflag)
738 				warn("bind");
739 			close(sock);
740 			continue;
741 		}
742 		if (ptb->Sflag) {
743 			if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
744 			    &ptb->Sflag, sizeof(ptb->Sflag)) == -1)
745 				warn("set receive buffer size");
746 		}
747 		if (TCP_MODE) {
748 			if (listen(sock, 64) == -1) {
749 				if (ai->ai_next == NULL)
750 					err(1, "listen");
751 				if (ptb->vflag)
752 					warn("listen");
753 				close(sock);
754 				continue;
755 			}
756 		}
757 		if ((ev = calloc(1, sizeof(*ev))) == NULL)
758 			err(1, "calloc");
759 		if (UDP_MODE)
760 			event_set(ev, sock, EV_READ | EV_PERSIST,
761 			    udp_server_handle_sc, udp_sc);
762 		else
763 			event_set(ev, sock, EV_READ | EV_PERSIST,
764 			    tcp_server_accept, NULL);
765 		event_add(ev, NULL);
766 		if (ptb->vflag >= 3)
767 			fprintf(stderr, "bound to fd %d\n", sock);
768 		lnfds++;
769 	}
770 	freeaddrinfo(aitop);
771 	if (lnfds == 0)
772 		errx(1, "No working listen addresses found");
773 }
774 
775 static void
776 client_handle_sc(int fd, short event, void *v_sc)
777 {
778 	struct statctx *sc = v_sc;
779 	ssize_t n;
780 
781 again:
782 	if ((n = write(sc->fd, sc->buf, sc->buflen)) == -1) {
783 		if (errno == EINTR || errno == EAGAIN ||
784 		    (UDP_MODE && errno == ENOBUFS))
785 			goto again;
786 		err(1, "write");
787 	}
788 	if (TCP_MODE && n == 0) {
789 		warnx("Remote end closed connection");
790 		exit(1);
791 	}
792 	if (ptb->vflag >= 3)
793 		warnx("write: %zd bytes\n", n);
794 	sc->bytes += n;
795 	mainstats.slice_bytes += n;
796 	if (UDP_MODE)
797 		sc->udp_slice_pkts++;
798 }
799 
800 static void
801 client_init(struct addrinfo *aitop, int nconn, struct statctx *udp_sc)
802 {
803 	struct statctx *sc;
804 	struct addrinfo *ai;
805 	char tmp[128];
806 	int i, r, sock;
807 
808 	sc = udp_sc;
809 	for (i = 0; i < nconn; i++) {
810 		for (sock = -1, ai = aitop; ai != NULL; ai = ai->ai_next) {
811 			saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp,
812 			    sizeof(tmp));
813 			if (ptb->vflag && i == 0)
814 				fprintf(stderr, "Trying %s\n", tmp);
815 			if ((sock = socket(ai->ai_family, ai->ai_socktype,
816 			    ai->ai_protocol)) == -1) {
817 				if (ai->ai_next == NULL)
818 					err(1, "socket");
819 				if (ptb->vflag)
820 					warn("socket");
821 				continue;
822 			}
823 			if (ptb->Vflag) {
824 				if (setsockopt(sock, SOL_SOCKET, SO_RTABLE,
825 				    &ptb->Vflag, sizeof(ptb->Vflag)) == -1) {
826 					if (errno == ENOPROTOOPT)
827 						warn("set rtable");
828 					else
829 						err(1, "setsockopt SO_RTABLE");
830 				}
831 			}
832 			if (ptb->Sflag) {
833 				if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
834 				    &ptb->Sflag, sizeof(ptb->Sflag)) == -1)
835 					warn("set TCP send buffer size");
836 			}
837 			if (connect(sock, ai->ai_addr, ai->ai_addrlen) != 0) {
838 				if (ai->ai_next == NULL)
839 					err(1, "connect");
840 				if (ptb->vflag)
841 					warn("connect");
842 				close(sock);
843 				sock = -1;
844 				continue;
845 			}
846 			break;
847 		}
848 		if (sock == -1)
849 			errx(1, "No host found");
850 		if ((r = fcntl(sock, F_GETFL, 0)) == -1)
851 			err(1, "fcntl(F_GETFL)");
852 		r |= O_NONBLOCK;
853 		if (fcntl(sock, F_SETFL, r) == -1)
854 			err(1, "fcntl(F_SETFL, O_NONBLOCK)");
855 		/* Alloc and prepare stats */
856 		if (TCP_MODE) {
857 			if ((sc = calloc(1, sizeof(*sc))) == NULL)
858 				err(1, "calloc");
859 		}
860 		sc->fd = sock;
861 		stats_prepare(sc);
862 		event_set(&sc->ev, sc->fd, EV_WRITE | EV_PERSIST,
863 		    client_handle_sc, sc);
864 		event_add(&sc->ev, NULL);
865 		TAILQ_INSERT_TAIL(&sc_queue, sc, entry);
866 		mainstats.nconns++;
867 		set_slice_timer(mainstats.nconns > 0);
868 		if (UDP_MODE)
869 			break;
870 	}
871 	freeaddrinfo(aitop);
872 
873 	if (ptb->vflag && nconn > 1)
874 		fprintf(stderr, "%u connections established\n", nconn);
875 }
876 
877 int
878 main(int argc, char **argv)
879 {
880 	extern int optind;
881 	extern char *optarg;
882 
883 	char kerr[_POSIX2_LINE_MAX], *tmp;
884 	struct addrinfo *aitop, hints;
885 	const char *errstr;
886 	struct rlimit rl;
887 	int ch, herr, nconn;
888 	struct nlist nl[] = { { "_tcbtable" }, { "" } };
889 	const char *host = NULL, *port = DEFAULT_PORT;
890 	struct event ev_sigint, ev_sigterm, ev_sighup;
891 	struct statctx *udp_sc = NULL;
892 
893 	/* Init world */
894 	ptb = &tcpbench;
895 	ptb->dummybuf_len = 0;
896 	ptb->Sflag = ptb->sflag = ptb->vflag = ptb->Vflag = 0;
897 	ptb->kvmh  = NULL;
898 	ptb->kvars = NULL;
899 	ptb->rflag = DEFAULT_STATS_INTERVAL;
900 	nconn = 1;
901 
902 	while ((ch = getopt(argc, argv, "B:hlk:n:p:r:sS:uvV:")) != -1) {
903 		switch (ch) {
904 		case 'l':
905 			list_kvars();
906 			exit(0);
907 		case 'k':
908 			if ((tmp = strdup(optarg)) == NULL)
909 				errx(1, "strdup");
910 			ptb->kvars = check_prepare_kvars(tmp);
911 			free(tmp);
912 			break;
913 		case 'r':
914 			ptb->rflag = strtonum(optarg, 0, 60 * 60 * 24 * 1000,
915 			    &errstr);
916 			if (errstr != NULL)
917 				errx(1, "statistics interval is %s: %s",
918 				    errstr, optarg);
919 			break;
920 		case 'p':
921 			port = optarg;
922 			break;
923 		case 's':
924 			ptb->sflag = 1;
925 			break;
926 		case 'S':
927 			ptb->Sflag = strtonum(optarg, 0, 1024*1024*1024,
928 			    &errstr);
929 			if (errstr != NULL)
930 				errx(1, "receive space interval is %s: %s",
931 				    errstr, optarg);
932 			break;
933 		case 'B':
934 			ptb->dummybuf_len = strtonum(optarg, 0, 1024*1024*1024,
935 			    &errstr);
936 			if (errstr != NULL)
937 				errx(1, "read/write buffer size is %s: %s",
938 				    errstr, optarg);
939 			break;
940 		case 'v':
941 			ptb->vflag++;
942 			break;
943 		case 'V':
944 			ptb->Vflag = (unsigned int)strtonum(optarg, 0,
945 			    RT_TABLEID_MAX, &errstr);
946 			if (errstr)
947 				errx(1, "rtable value is %s: %s",
948 				    errstr, optarg);
949 			break;
950 		case 'n':
951 			nconn = strtonum(optarg, 0, 65535, &errstr);
952 			if (errstr != NULL)
953 				errx(1, "number of connections is %s: %s",
954 				    errstr, optarg);
955 			break;
956 		case 'u':
957 			ptb->uflag = 1;
958 			break;
959 		case 'h':
960 		default:
961 			usage();
962 		}
963 	}
964 
965 	argv += optind;
966 	argc -= optind;
967 	if ((argc != (ptb->sflag ? 0 : 1)) ||
968 	    (UDP_MODE && (ptb->kvars || nconn != 1)))
969 		usage();
970 
971 	if (!ptb->sflag)
972 		host = argv[0];
973 	/*
974 	 * Rationale,
975 	 * If TCP, use a big buffer with big reads/writes.
976 	 * If UDP, use a big buffer in server and a buffer the size of a
977 	 * ethernet packet.
978 	 */
979 	if (!ptb->dummybuf_len) {
980 		if (ptb->sflag || TCP_MODE)
981 			ptb->dummybuf_len = DEFAULT_BUF;
982 		else
983 			ptb->dummybuf_len = DEFAULT_UDP_PKT;
984 	}
985 
986 	bzero(&hints, sizeof(hints));
987 	if (UDP_MODE)
988 		hints.ai_socktype = SOCK_DGRAM;
989 	else
990 		hints.ai_socktype = SOCK_STREAM;
991 	if (ptb->sflag)
992 		hints.ai_flags = AI_PASSIVE;
993 	if ((herr = getaddrinfo(host, port, &hints, &aitop)) != 0) {
994 		if (herr == EAI_SYSTEM)
995 			err(1, "getaddrinfo");
996 		else
997 			errx(1, "getaddrinfo: %s", gai_strerror(herr));
998 	}
999 	if (ptb->kvars) {
1000 		if ((ptb->kvmh = kvm_openfiles(NULL, NULL, NULL,
1001 		    O_RDONLY, kerr)) == NULL)
1002 			errx(1, "kvm_open: %s", kerr);
1003 		drop_gid();
1004 		if (kvm_nlist(ptb->kvmh, nl) < 0 || nl[0].n_type == 0)
1005 			errx(1, "kvm: no namelist");
1006 		ptb->ktcbtab = nl[0].n_value;
1007 	} else
1008 		drop_gid();
1009 
1010 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1)
1011 		err(1, "getrlimit");
1012 	if (rl.rlim_cur < MAX_FD)
1013 		rl.rlim_cur = MAX_FD;
1014 	if (setrlimit(RLIMIT_NOFILE, &rl))
1015 		err(1, "setrlimit");
1016 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1)
1017 		err(1, "getrlimit");
1018 
1019 	/* Init world */
1020 	TAILQ_INIT(&sc_queue);
1021 	if ((ptb->dummybuf = malloc(ptb->dummybuf_len)) == NULL)
1022 		err(1, "malloc");
1023 	arc4random_buf(ptb->dummybuf, ptb->dummybuf_len);
1024 
1025 	if (UDP_MODE) {
1026 		if ((udp_sc = calloc(1, sizeof(*udp_sc))) == NULL)
1027 			err(1, "calloc");
1028 		udp_sc->fd = -1;
1029 		stats_prepare(udp_sc);
1030 	}
1031 
1032 	/* Setup libevent and signals */
1033 	event_init();
1034 	signal_set(&ev_sigterm, SIGTERM, signal_handler, NULL);
1035 	signal_set(&ev_sighup, SIGHUP, signal_handler, NULL);
1036 	signal_set(&ev_sigint, SIGINT, signal_handler, NULL);
1037 	signal_add(&ev_sigint, NULL);
1038 	signal_add(&ev_sigterm, NULL);
1039 	signal_add(&ev_sighup, NULL);
1040 	signal(SIGPIPE, SIG_IGN);
1041 
1042 	if (TCP_MODE)
1043 		print_tcp_header();
1044 
1045 	if (UDP_MODE)
1046 		evtimer_set(&mainstats.timer, udp_process_slice, udp_sc);
1047 	else
1048 		evtimer_set(&mainstats.timer, tcp_process_slice, NULL);
1049 
1050 	if (ptb->sflag) {
1051 		server_init(aitop, udp_sc);
1052 	} else
1053 		client_init(aitop, nconn, udp_sc);
1054 
1055 	/* libevent main loop*/
1056 	event_dispatch();
1057 
1058 	return (0);
1059 }
1060