xref: /freebsd/tools/tools/netrate/juggle/juggle.c (revision b3e76948)
1 /*-
2  * Copyright (c) 2005 Robert N. M. Watson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/stdint.h>
30 #include <sys/time.h>
31 #include <sys/utsname.h>
32 #include <sys/wait.h>
33 
34 #include <netinet/in.h>
35 
36 #include <err.h>
37 #include <errno.h>
38 #include <pthread.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 
45 /*
46  * juggle is a simple IPC/context switch performance test, which works on
47  * pairs of file descriptors of various types.  In various runs, it considers
48  * the cost of bouncing a message synchronously across the descriptor pair,
49  * either in the same thread, two different threads, or two different
50  * processes.  Timing measurements for each series of I/O's are reported, but
51  * the first measurement in each series discarded as "warmup" on the IPC
52  * primitive.  Variations on the test permit for pipelining, or the insertion
53  * of more than one packet into the stream at a time, intended to permit
54  * greater parallelism, hopefully allowing performance numbers to reflect
55  * use of available parallelism, and/or intelligence in context switching to
56  * avoid premature switching when multiple messages are queued.
57  */
58 
59 /*
60  * The UDP test uses UDP over the loopback interface.  Two arbitrary but
61  * fixed port numbers.
62  */
63 #define	UDP_PORT1	2020
64 #define	UDP_PORT2	2021
65 
66 /*
67  * Size of each message.  Must be smaller than the socket buffer or pipe
68  * buffer maximum size, as we want to send it atomically without blocking.
69  * If pipelining is in use, must be able to fit PIPELINE_MAX of these
70  * messages into the send queue.
71  */
72 #define	MESSAGELEN	128
73 
74 /*
75  * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
76  * fd1.  By counting in cycles, we allow the master thread or process to
77  * perform timing without explicitly synchronizing with the secondary thread
78  * or process.
79  */
80 #define	NUMCYCLES	1024
81 
82 /*
83  * Number of times to run each test.
84  */
85 #define	LOOPS		10
86 
87 /*
88  * Number of in-flight messages per cycle.  I adjusting this value, be
89  * careful not to exceed the socket/etc buffer depth, or messages may be lost
90  * or result in blocking.
91  */
92 #define	PIPELINE_MAX	4
93 
94 static int
udp_create(int * fd1p,int * fd2p)95 udp_create(int *fd1p, int *fd2p)
96 {
97 	struct sockaddr_in sin1, sin2;
98 	int sock1, sock2;
99 
100 	sock1 = socket(PF_INET, SOCK_DGRAM, 0);
101 	if (sock1 == -1)
102 		return (-1);
103 
104 	sock2 = socket(PF_INET, SOCK_DGRAM, 0);
105 	if (sock2 == -1) {
106 		close(sock1);
107 		return (-1);
108 	}
109 
110 	bzero(&sin1, sizeof(sin1));
111 	sin1.sin_len = sizeof(sin1);
112 	sin1.sin_family = AF_INET;
113 	sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
114 	sin1.sin_port = htons(UDP_PORT1);
115 
116 	bzero(&sin2, sizeof(sin2));
117 	sin2.sin_len = sizeof(sin2);
118 	sin2.sin_family = AF_INET;
119 	sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
120 	sin2.sin_port = htons(UDP_PORT2);
121 
122 	if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
123 		close(sock1);
124 		close(sock2);
125 		return (-1);
126 	}
127 
128 	if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
129 		close(sock1);
130 		close(sock2);
131 		return (-1);
132 	}
133 
134 	if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
135 		close(sock1);
136 		close(sock2);
137 		return (-1);
138 	}
139 
140 	if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
141 		close(sock1);
142 		close(sock2);
143 		return (-1);
144 	}
145 
146 	*fd1p = sock1;
147 	*fd2p = sock2;
148 
149 	return (0);
150 }
151 
152 static int
pipe_create(int * fd1p,int * fd2p)153 pipe_create(int *fd1p, int *fd2p)
154 {
155 	int fds[2];
156 
157 	if (pipe(fds) < 0)
158 		return (-1);
159 
160 	*fd1p = fds[0];
161 	*fd2p = fds[1];
162 
163 	return (0);
164 }
165 
166 static int
socketpairdgram_create(int * fd1p,int * fd2p)167 socketpairdgram_create(int *fd1p, int *fd2p)
168 {
169 	int fds[2];
170 
171 	if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
172 		return (-1);
173 
174 	*fd1p = fds[0];
175 	*fd2p = fds[1];
176 
177 	return (0);
178 }
179 
180 static int
socketpairstream_create(int * fd1p,int * fd2p)181 socketpairstream_create(int *fd1p, int *fd2p)
182 {
183 	int fds[2];
184 
185 	if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
186 		return (-1);
187 
188 	*fd1p = fds[0];
189 	*fd2p = fds[1];
190 
191 	return (0);
192 }
193 
194 static int
message_send(int s)195 message_send(int s)
196 {
197 	u_char buffer[MESSAGELEN];
198 	ssize_t len;
199 
200 	bzero(buffer, sizeof(buffer));
201 
202 	len = write(s, buffer, sizeof(buffer));
203 	if (len == -1)
204 		return (-1);
205 	if (len != sizeof(buffer)) {
206 		errno = EMSGSIZE;
207 		return (-1);
208 	}
209 	return (0);
210 }
211 
212 static int
message_recv(int s)213 message_recv(int s)
214 {
215 	u_char buffer[MESSAGELEN];
216 	ssize_t len;
217 
218 	len = read(s, buffer, sizeof(buffer));
219 	if (len == -1)
220 		return (-1);
221 	if (len != sizeof(buffer)) {
222 		errno = EMSGSIZE;
223 		return (-1);
224 	}
225 	return (0);
226 }
227 
228 /*
229  * Juggle messages between two file descriptors in a single thread/process,
230  * so simply a measure of IPC performance.
231  */
232 static struct timespec
juggle(int fd1,int fd2,int pipeline)233 juggle(int fd1, int fd2, int pipeline)
234 {
235 	struct timespec tstart, tfinish;
236 	int i, j;
237 
238 	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
239 		err(-1, "juggle: clock_gettime");
240 
241 	for (i = 0; i < NUMCYCLES; i++) {
242 
243 		for (j = 0; j < pipeline; j++) {
244 			if (message_send(fd1) < 0)
245 				err(-1, "message_send fd1");
246 		}
247 
248 		for (j = 0; j < pipeline; j++) {
249 			if (message_recv(fd2) < 0)
250 				err(-1, "message_recv fd2");
251 
252 			if (message_send(fd2) < 0)
253 				err(-1, "message_send fd2");
254 		}
255 
256 		for (j = 0; j < pipeline; j++) {
257 			if (message_recv(fd1) < 0)
258 				err(-1, "message_recv fd1");
259 		}
260 	}
261 
262 	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
263 		err(-1, "juggle: clock_gettime");
264 
265 	timespecsub(&tfinish, &tstart, &tfinish);
266 
267 	return (tfinish);
268 }
269 
270 /*
271  * Juggle messages between two file descriptors in two threads, so measure
272  * the cost of IPC and the cost of a thread context switch.
273  *
274  * In order to avoid measuring thread creation time, we make use of a
275  * condition variable to decide when both threads are ready to begin
276  * juggling.
277  */
278 static int threaded_child_ready;
279 static pthread_mutex_t threaded_mtx;
280 static pthread_cond_t threaded_cond;
281 static int threaded_pipeline;
282 
283 static void *
juggling_thread(void * arg)284 juggling_thread(void *arg)
285 {
286 	int fd2, i, j;
287 
288 	fd2 = *(int *)arg;
289 
290 	if (pthread_mutex_lock(&threaded_mtx) != 0)
291 		err(-1, "juggling_thread: pthread_mutex_lock");
292 
293 	threaded_child_ready = 1;
294 
295 	if (pthread_cond_signal(&threaded_cond) != 0)
296 		err(-1, "juggling_thread: pthread_cond_signal");
297 
298 	if (pthread_mutex_unlock(&threaded_mtx) != 0)
299 		err(-1, "juggling_thread: pthread_mutex_unlock");
300 
301 	for (i = 0; i < NUMCYCLES; i++) {
302 		for (j = 0; j < threaded_pipeline; j++) {
303 			if (message_recv(fd2) < 0)
304 				err(-1, "message_recv fd2");
305 
306 			if (message_send(fd2) < 0)
307 				err(-1, "message_send fd2");
308 		}
309 	}
310 
311 	return (NULL);
312 }
313 
314 static struct timespec
thread_juggle(int fd1,int fd2,int pipeline)315 thread_juggle(int fd1, int fd2, int pipeline)
316 {
317 	struct timespec tstart, tfinish;
318 	pthread_t thread;
319 	int i, j;
320 
321 	threaded_pipeline = pipeline;
322 
323 	if (pthread_mutex_init(&threaded_mtx, NULL) != 0)
324 		err(-1, "thread_juggle: pthread_mutex_init");
325 
326 	if (pthread_create(&thread, NULL, juggling_thread, &fd2) != 0)
327 		err(-1, "thread_juggle: pthread_create");
328 
329 	if (pthread_mutex_lock(&threaded_mtx) != 0)
330 		err(-1, "thread_juggle: pthread_mutex_lock");
331 
332 	while (!threaded_child_ready) {
333 		if (pthread_cond_wait(&threaded_cond, &threaded_mtx) != 0)
334 			err(-1, "thread_juggle: pthread_cond_wait");
335 	}
336 
337 	if (pthread_mutex_unlock(&threaded_mtx) != 0)
338 		err(-1, "thread_juggle: pthread_mutex_unlock");
339 
340 	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
341 		err(-1, "thread_juggle: clock_gettime");
342 
343 	for (i = 0; i < NUMCYCLES; i++) {
344 		for (j = 0; j < pipeline; j++) {
345 			if (message_send(fd1) < 0)
346 				err(-1, "message_send fd1");
347 		}
348 
349 		for (j = 0; j < pipeline; j++) {
350 			if (message_recv(fd1) < 0)
351 				err(-1, "message_recv fd1");
352 		}
353 	}
354 
355 	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
356 		err(-1, "thread_juggle: clock_gettime");
357 
358 	if (pthread_join(thread, NULL) != 0)
359 		err(-1, "thread_juggle: pthread_join");
360 
361 	timespecsub(&tfinish, &tstart, &tfinish);
362 
363 	return (tfinish);
364 }
365 
366 /*
367  * Juggle messages between two file descriptors in two processes, so measure
368  * the cost of IPC and the cost of a process context switch.
369  *
370  * Since we can't use a mutex between the processes, we simply do an extra
371  * write on the child to let the parent know that it's ready to start.
372  */
373 static struct timespec
process_juggle(int fd1,int fd2,int pipeline)374 process_juggle(int fd1, int fd2, int pipeline)
375 {
376 	struct timespec tstart, tfinish;
377 	pid_t pid, ppid, wpid;
378 	int error, i, j;
379 
380 	ppid = getpid();
381 
382 	pid = fork();
383 	if (pid < 0)
384 		err(-1, "process_juggle: fork");
385 
386 	if (pid == 0) {
387 		if (message_send(fd2) < 0) {
388 			error = errno;
389 			kill(ppid, SIGTERM);
390 			errno = error;
391 			err(-1, "process_juggle: child: message_send");
392 		}
393 
394 		for (i = 0; i < NUMCYCLES; i++) {
395 			for (j = 0; j < pipeline; j++) {
396 				if (message_send(fd2) < 0)
397 					err(-1, "message_send fd2");
398 
399 				if (message_recv(fd2) < 0)
400 					err(-1, "message_recv fd2");
401 			}
402 		}
403 
404 		exit(0);
405 	} else {
406 		if (message_recv(fd1) < 0) {
407 			error = errno;
408 			kill(pid, SIGTERM);
409 			errno = error;
410 			err(-1, "process_juggle: parent: message_recv");
411 		}
412 
413 		if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
414 			err(-1, "process_juggle: clock_gettime");
415 
416 		for (i = 0; i < NUMCYCLES; i++) {
417 			for (j = 0; j < pipeline; j++) {
418 				if (message_send(fd1) < 0) {
419 					error = errno;
420 					kill(pid, SIGTERM);
421 					errno = error;
422 					err(-1, "message_send fd1");
423 				}
424 			}
425 
426 			for (j = 0; j < pipeline; j++) {
427 				if (message_recv(fd1) < 0) {
428 					error = errno;
429 					kill(pid, SIGTERM);
430 					errno = error;
431 					err(-1, "message_recv fd1");
432 				}
433 			}
434 		}
435 
436 		if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
437 			err(-1, "process_juggle: clock_gettime");
438 	}
439 
440 	wpid = waitpid(pid, NULL, 0);
441 	if (wpid < 0)
442 		err(-1, "process_juggle: waitpid");
443 	if (wpid != pid)
444 		errx(-1, "process_juggle: waitpid: pid != wpid");
445 
446 	timespecsub(&tfinish, &tstart, &tfinish);
447 
448 	return (tfinish);
449 }
450 
451 /*
452  * When we print out results for larger pipeline sizes, we scale back by the
453  * depth of the pipeline.  This generally means dividing by the pipeline
454  * depth.  Except when it means dividing by zero.
455  */
456 static void
scale_timespec(struct timespec * ts,int p)457 scale_timespec(struct timespec *ts, int p)
458 {
459 
460 	if (p == 0)
461 		return;
462 
463 	ts->tv_sec /= p;
464 	ts->tv_nsec /= p;
465 }
466 
467 static const struct ipctype {
468 	int		(*it_create)(int *fd1p, int *fd2p);
469 	const char	*it_name;
470 } ipctypes[] = {
471 	{ pipe_create, "pipe" },
472 	{ udp_create, "udp" },
473 	{ socketpairdgram_create, "socketpairdgram" },
474 	{ socketpairstream_create, "socketpairstream" },
475 };
476 static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
477 
478 int
main(int argc,char * argv[])479 main(int argc, char *argv[])
480 {
481 	struct timespec juggle_results[LOOPS], process_results[LOOPS];
482 	struct timespec thread_results[LOOPS];
483 	int fd1, fd2, i, j, p;
484 	struct utsname uts;
485 
486 	printf("version, juggle.c %s\n", "$FreeBSD$");
487 
488 	if (uname(&uts) < 0)
489 		err(-1, "utsname");
490 	printf("sysname, %s\n", uts.sysname);
491 	printf("nodename, %s\n", uts.nodename);
492 	printf("release, %s\n", uts.release);
493 	printf("version, %s\n", uts.version);
494 	printf("machine, %s\n", uts.machine);
495 	printf("\n");
496 
497 	printf("MESSAGELEN, %d\n", MESSAGELEN);
498 	printf("NUMCYCLES, %d\n", NUMCYCLES);
499 	printf("LOOPS, %d\n", LOOPS);
500 	printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
501 	printf("\n\n");
502 
503 	printf("ipctype, test, pipeline_depth");
504 	for (j = 0; j < LOOPS; j++)
505 		printf(", data%d", j);
506 	printf("\n");
507 	fflush(stdout);
508 	for (p = 0; p < PIPELINE_MAX + 1; p++) {
509 		for (i = 0; i < ipctypes_len; i++) {
510 			if (ipctypes[i].it_create(&fd1, &fd2) < 0)
511 				err(-1, "main: %s", ipctypes[i].it_name);
512 
513 			/*
514 			 * For each test, do one uncounted warmup, then LOOPS
515 			 * runs of the actual test.
516 			 */
517 			juggle(fd1, fd2, p);
518 			for (j = 0; j < LOOPS; j++)
519 				juggle_results[j] = juggle(fd1, fd2, p);
520 			process_juggle(fd1, fd2, p);
521 			for (j = 0; j < LOOPS; j++)
522 				process_results[j] = process_juggle(fd1, fd2,
523 				    p);
524 			thread_juggle(fd1, fd2, p);
525 			for (j = 0; j < LOOPS; j++)
526 				thread_results[j] = thread_juggle(fd1, fd2,
527 				    p);
528 			for (j = 0; j < LOOPS; j++) {
529 				thread_results[j].tv_sec = 0;
530 				thread_results[j].tv_nsec = 0;
531 			}
532 			close(fd1);
533 			close(fd2);
534 		}
535 		/*
536 		 * When printing results for the round, normalize the results
537 		 * with respect to the pipeline depth.  We're doing p times
538 		 * as much work, and are we taking p times as long?
539 		 */
540 		for (i = 0; i < ipctypes_len; i++) {
541 			printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
542 			for (j = 0; j < LOOPS; j++) {
543 				if (j != 0)
544 					printf(", ");
545 				scale_timespec(&juggle_results[j], p);
546 				printf("%jd.%09lu",
547 				    (intmax_t)juggle_results[j].tv_sec,
548 				    juggle_results[j].tv_nsec);
549 			}
550 			printf("\n");
551 			printf("%s, process_juggle, %d, ",
552 			    ipctypes[i].it_name, p);
553 			for (j = 0; j < LOOPS; j++) {
554 				if (j != 0)
555 					printf(", ");
556 				scale_timespec(&process_results[j], p);
557 				printf("%jd.%09lu",
558                                     (intmax_t)process_results[j].tv_sec,
559 				    process_results[j].tv_nsec);
560 			}
561 			printf("\n");
562 			printf("%s, thread_juggle, %d, ",
563 			    ipctypes[i].it_name, p);
564 			for (j = 0; j < LOOPS; j++) {
565 				if (j != 0)
566 					printf(", ");
567 				scale_timespec(&thread_results[j], p);
568 				printf("%jd.%09lu",
569 				    (intmax_t)thread_results[j].tv_sec,
570 				    thread_results[j].tv_nsec);
571 			}
572 			printf("\n");
573 		}
574 		fflush(stdout);
575 	}
576 	return (0);
577 }
578