xref: /freebsd/tools/tools/netrate/juggle/juggle.c (revision 85732ac8)
1 /*-
2  * Copyright (c) 2005 Robert N. M. Watson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/types.h>
30 #include <sys/socket.h>
31 #include <sys/stdint.h>
32 #include <sys/time.h>
33 #include <sys/utsname.h>
34 #include <sys/wait.h>
35 
36 #include <netinet/in.h>
37 
38 #include <err.h>
39 #include <errno.h>
40 #include <pthread.h>
41 #include <signal.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 /*
48  * juggle is a simple IPC/context switch performance test, which works on
49  * pairs of file descriptors of various types.  In various runs, it considers
50  * the cost of bouncing a message synchronously across the descriptor pair,
51  * either in the same thread, two different threads, or two different
52  * processes.  Timing measurements for each series of I/O's are reported, but
53  * the first measurement in each series discarded as "warmup" on the IPC
54  * primitive.  Variations on the test permit for pipelining, or the insertion
55  * of more than one packet into the stream at a time, intended to permit
56  * greater parallelism, hopefully allowing performance numbers to reflect
57  * use of available parallelism, and/or intelligence in context switching to
58  * avoid premature switching when multiple messages are queued.
59  */
60 
61 /*
62  * The UDP test uses UDP over the loopback interface.  Two arbitrary but
63  * fixed port numbers.
64  */
65 #define	UDP_PORT1	2020
66 #define	UDP_PORT2	2021
67 
68 /*
69  * Size of each message.  Must be smaller than the socket buffer or pipe
70  * buffer maximum size, as we want to send it atomically without blocking.
71  * If pipelining is in use, must be able to fit PIPELINE_MAX of these
72  * messages into the send queue.
73  */
74 #define	MESSAGELEN	128
75 
76 /*
77  * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
78  * fd1.  By counting in cycles, we allow the master thread or process to
79  * perform timing without explicitly synchronizing with the secondary thread
80  * or process.
81  */
82 #define	NUMCYCLES	1024
83 
84 /*
85  * Number of times to run each test.
86  */
87 #define	LOOPS		10
88 
89 /*
90  * Number of in-flight messages per cycle.  I adjusting this value, be
91  * careful not to exceed the socket/etc buffer depth, or messages may be lost
92  * or result in blocking.
93  */
94 #define	PIPELINE_MAX	4
95 
96 static int
97 udp_create(int *fd1p, int *fd2p)
98 {
99 	struct sockaddr_in sin1, sin2;
100 	int sock1, sock2;
101 
102 	sock1 = socket(PF_INET, SOCK_DGRAM, 0);
103 	if (sock1 == -1)
104 		return (-1);
105 
106 	sock2 = socket(PF_INET, SOCK_DGRAM, 0);
107 	if (sock2 == -1) {
108 		close(sock1);
109 		return (-1);
110 	}
111 
112 	bzero(&sin1, sizeof(sin1));
113 	sin1.sin_len = sizeof(sin1);
114 	sin1.sin_family = AF_INET;
115 	sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
116 	sin1.sin_port = htons(UDP_PORT1);
117 
118 	bzero(&sin2, sizeof(sin2));
119 	sin2.sin_len = sizeof(sin2);
120 	sin2.sin_family = AF_INET;
121 	sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
122 	sin2.sin_port = htons(UDP_PORT2);
123 
124 	if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
125 		close(sock1);
126 		close(sock2);
127 		return (-1);
128 	}
129 
130 	if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
131 		close(sock1);
132 		close(sock2);
133 		return (-1);
134 	}
135 
136 	if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
137 		close(sock1);
138 		close(sock2);
139 		return (-1);
140 	}
141 
142 	if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
143 		close(sock1);
144 		close(sock2);
145 		return (-1);
146 	}
147 
148 	*fd1p = sock1;
149 	*fd2p = sock2;
150 
151 	return (0);
152 }
153 
154 static int
155 pipe_create(int *fd1p, int *fd2p)
156 {
157 	int fds[2];
158 
159 	if (pipe(fds) < 0)
160 		return (-1);
161 
162 	*fd1p = fds[0];
163 	*fd2p = fds[1];
164 
165 	return (0);
166 }
167 
168 static int
169 socketpairdgram_create(int *fd1p, int *fd2p)
170 {
171 	int fds[2];
172 
173 	if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
174 		return (-1);
175 
176 	*fd1p = fds[0];
177 	*fd2p = fds[1];
178 
179 	return (0);
180 }
181 
182 static int
183 socketpairstream_create(int *fd1p, int *fd2p)
184 {
185 	int fds[2];
186 
187 	if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
188 		return (-1);
189 
190 	*fd1p = fds[0];
191 	*fd2p = fds[1];
192 
193 	return (0);
194 }
195 
196 static int
197 message_send(int s)
198 {
199 	u_char buffer[MESSAGELEN];
200 	ssize_t len;
201 
202 	bzero(buffer, sizeof(buffer));
203 
204 	len = write(s, buffer, sizeof(buffer));
205 	if (len == -1)
206 		return (-1);
207 	if (len != sizeof(buffer)) {
208 		errno = EMSGSIZE;
209 		return (-1);
210 	}
211 	return (0);
212 }
213 
214 static int
215 message_recv(int s)
216 {
217 	u_char buffer[MESSAGELEN];
218 	ssize_t len;
219 
220 	len = read(s, buffer, sizeof(buffer));
221 	if (len == -1)
222 		return (-1);
223 	if (len != sizeof(buffer)) {
224 		errno = EMSGSIZE;
225 		return (-1);
226 	}
227 	return (0);
228 }
229 
230 /*
231  * Juggle messages between two file descriptors in a single thread/process,
232  * so simply a measure of IPC performance.
233  */
234 static struct timespec
235 juggle(int fd1, int fd2, int pipeline)
236 {
237 	struct timespec tstart, tfinish;
238 	int i, j;
239 
240 	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
241 		err(-1, "juggle: clock_gettime");
242 
243 	for (i = 0; i < NUMCYCLES; i++) {
244 
245 		for (j = 0; j < pipeline; j++) {
246 			if (message_send(fd1) < 0)
247 				err(-1, "message_send fd1");
248 		}
249 
250 		for (j = 0; j < pipeline; j++) {
251 			if (message_recv(fd2) < 0)
252 				err(-1, "message_recv fd2");
253 
254 			if (message_send(fd2) < 0)
255 				err(-1, "message_send fd2");
256 		}
257 
258 		for (j = 0; j < pipeline; j++) {
259 			if (message_recv(fd1) < 0)
260 				err(-1, "message_recv fd1");
261 		}
262 	}
263 
264 	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
265 		err(-1, "juggle: clock_gettime");
266 
267 	timespecsub(&tfinish, &tstart, &tfinish);
268 
269 	return (tfinish);
270 }
271 
272 /*
273  * Juggle messages between two file descriptors in two threads, so measure
274  * the cost of IPC and the cost of a thread context switch.
275  *
276  * In order to avoid measuring thread creation time, we make use of a
277  * condition variable to decide when both threads are ready to begin
278  * juggling.
279  */
280 static int threaded_child_ready;
281 static pthread_mutex_t threaded_mtx;
282 static pthread_cond_t threaded_cond;
283 static int threaded_pipeline;
284 
285 static void *
286 juggling_thread(void *arg)
287 {
288 	int fd2, i, j;
289 
290 	fd2 = *(int *)arg;
291 
292 	if (pthread_mutex_lock(&threaded_mtx) != 0)
293 		err(-1, "juggling_thread: pthread_mutex_lock");
294 
295 	threaded_child_ready = 1;
296 
297 	if (pthread_cond_signal(&threaded_cond) != 0)
298 		err(-1, "juggling_thread: pthread_cond_signal");
299 
300 	if (pthread_mutex_unlock(&threaded_mtx) != 0)
301 		err(-1, "juggling_thread: pthread_mutex_unlock");
302 
303 	for (i = 0; i < NUMCYCLES; i++) {
304 		for (j = 0; j < threaded_pipeline; j++) {
305 			if (message_recv(fd2) < 0)
306 				err(-1, "message_recv fd2");
307 
308 			if (message_send(fd2) < 0)
309 				err(-1, "message_send fd2");
310 		}
311 	}
312 
313 	return (NULL);
314 }
315 
316 static struct timespec
317 thread_juggle(int fd1, int fd2, int pipeline)
318 {
319 	struct timespec tstart, tfinish;
320 	pthread_t thread;
321 	int i, j;
322 
323 	threaded_pipeline = pipeline;
324 
325 	if (pthread_mutex_init(&threaded_mtx, NULL) != 0)
326 		err(-1, "thread_juggle: pthread_mutex_init");
327 
328 	if (pthread_create(&thread, NULL, juggling_thread, &fd2) != 0)
329 		err(-1, "thread_juggle: pthread_create");
330 
331 	if (pthread_mutex_lock(&threaded_mtx) != 0)
332 		err(-1, "thread_juggle: pthread_mutex_lock");
333 
334 	while (!threaded_child_ready) {
335 		if (pthread_cond_wait(&threaded_cond, &threaded_mtx) != 0)
336 			err(-1, "thread_juggle: pthread_cond_wait");
337 	}
338 
339 	if (pthread_mutex_unlock(&threaded_mtx) != 0)
340 		err(-1, "thread_juggle: pthread_mutex_unlock");
341 
342 	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
343 		err(-1, "thread_juggle: clock_gettime");
344 
345 	for (i = 0; i < NUMCYCLES; i++) {
346 		for (j = 0; j < pipeline; j++) {
347 			if (message_send(fd1) < 0)
348 				err(-1, "message_send fd1");
349 		}
350 
351 		for (j = 0; j < pipeline; j++) {
352 			if (message_recv(fd1) < 0)
353 				err(-1, "message_recv fd1");
354 		}
355 	}
356 
357 	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
358 		err(-1, "thread_juggle: clock_gettime");
359 
360 	if (pthread_join(thread, NULL) != 0)
361 		err(-1, "thread_juggle: pthread_join");
362 
363 	timespecsub(&tfinish, &tstart, &tfinish);
364 
365 	return (tfinish);
366 }
367 
368 /*
369  * Juggle messages between two file descriptors in two processes, so measure
370  * the cost of IPC and the cost of a process context switch.
371  *
372  * Since we can't use a mutex between the processes, we simply do an extra
373  * write on the child to let the parent know that it's ready to start.
374  */
375 static struct timespec
376 process_juggle(int fd1, int fd2, int pipeline)
377 {
378 	struct timespec tstart, tfinish;
379 	pid_t pid, ppid, wpid;
380 	int error, i, j;
381 
382 	ppid = getpid();
383 
384 	pid = fork();
385 	if (pid < 0)
386 		err(-1, "process_juggle: fork");
387 
388 	if (pid == 0) {
389 		if (message_send(fd2) < 0) {
390 			error = errno;
391 			kill(ppid, SIGTERM);
392 			errno = error;
393 			err(-1, "process_juggle: child: message_send");
394 		}
395 
396 		for (i = 0; i < NUMCYCLES; i++) {
397 			for (j = 0; j < pipeline; j++) {
398 				if (message_send(fd2) < 0)
399 					err(-1, "message_send fd2");
400 
401 				if (message_recv(fd2) < 0)
402 					err(-1, "message_recv fd2");
403 			}
404 		}
405 
406 		exit(0);
407 	} else {
408 		if (message_recv(fd1) < 0) {
409 			error = errno;
410 			kill(pid, SIGTERM);
411 			errno = error;
412 			err(-1, "process_juggle: parent: message_recv");
413 		}
414 
415 		if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
416 			err(-1, "process_juggle: clock_gettime");
417 
418 		for (i = 0; i < NUMCYCLES; i++) {
419 			for (j = 0; j < pipeline; j++) {
420 				if (message_send(fd1) < 0) {
421 					error = errno;
422 					kill(pid, SIGTERM);
423 					errno = error;
424 					err(-1, "message_send fd1");
425 				}
426 			}
427 
428 			for (j = 0; j < pipeline; j++) {
429 				if (message_recv(fd1) < 0) {
430 					error = errno;
431 					kill(pid, SIGTERM);
432 					errno = error;
433 					err(-1, "message_recv fd1");
434 				}
435 			}
436 		}
437 
438 		if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
439 			err(-1, "process_juggle: clock_gettime");
440 	}
441 
442 	wpid = waitpid(pid, NULL, 0);
443 	if (wpid < 0)
444 		err(-1, "process_juggle: waitpid");
445 	if (wpid != pid)
446 		errx(-1, "process_juggle: waitpid: pid != wpid");
447 
448 	timespecsub(&tfinish, &tstart, &tfinish);
449 
450 	return (tfinish);
451 }
452 
453 /*
454  * When we print out results for larger pipeline sizes, we scale back by the
455  * depth of the pipeline.  This generally means dividing by the pipeline
456  * depth.  Except when it means dividing by zero.
457  */
458 static void
459 scale_timespec(struct timespec *ts, int p)
460 {
461 
462 	if (p == 0)
463 		return;
464 
465 	ts->tv_sec /= p;
466 	ts->tv_nsec /= p;
467 }
468 
469 static const struct ipctype {
470 	int		(*it_create)(int *fd1p, int *fd2p);
471 	const char	*it_name;
472 } ipctypes[] = {
473 	{ pipe_create, "pipe" },
474 	{ udp_create, "udp" },
475 	{ socketpairdgram_create, "socketpairdgram" },
476 	{ socketpairstream_create, "socketpairstream" },
477 };
478 static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
479 
480 int
481 main(int argc, char *argv[])
482 {
483 	struct timespec juggle_results[LOOPS], process_results[LOOPS];
484 	struct timespec thread_results[LOOPS];
485 	int fd1, fd2, i, j, p;
486 	struct utsname uts;
487 
488 	printf("version, juggle.c %s\n", "$FreeBSD$");
489 
490 	if (uname(&uts) < 0)
491 		err(-1, "utsname");
492 	printf("sysname, %s\n", uts.sysname);
493 	printf("nodename, %s\n", uts.nodename);
494 	printf("release, %s\n", uts.release);
495 	printf("version, %s\n", uts.version);
496 	printf("machine, %s\n", uts.machine);
497 	printf("\n");
498 
499 	printf("MESSAGELEN, %d\n", MESSAGELEN);
500 	printf("NUMCYCLES, %d\n", NUMCYCLES);
501 	printf("LOOPS, %d\n", LOOPS);
502 	printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
503 	printf("\n\n");
504 
505 	printf("ipctype, test, pipeline_depth");
506 	for (j = 0; j < LOOPS; j++)
507 		printf(", data%d", j);
508 	printf("\n");
509 	fflush(stdout);
510 	for (p = 0; p < PIPELINE_MAX + 1; p++) {
511 		for (i = 0; i < ipctypes_len; i++) {
512 			if (ipctypes[i].it_create(&fd1, &fd2) < 0)
513 				err(-1, "main: %s", ipctypes[i].it_name);
514 
515 			/*
516 			 * For each test, do one uncounted warmup, then LOOPS
517 			 * runs of the actual test.
518 			 */
519 			juggle(fd1, fd2, p);
520 			for (j = 0; j < LOOPS; j++)
521 				juggle_results[j] = juggle(fd1, fd2, p);
522 			process_juggle(fd1, fd2, p);
523 			for (j = 0; j < LOOPS; j++)
524 				process_results[j] = process_juggle(fd1, fd2,
525 				    p);
526 			thread_juggle(fd1, fd2, p);
527 			for (j = 0; j < LOOPS; j++)
528 				thread_results[j] = thread_juggle(fd1, fd2,
529 				    p);
530 			for (j = 0; j < LOOPS; j++) {
531 				thread_results[j].tv_sec = 0;
532 				thread_results[j].tv_nsec = 0;
533 			}
534 			close(fd1);
535 			close(fd2);
536 		}
537 		/*
538 		 * When printing results for the round, normalize the results
539 		 * with respect to the pipeline depth.  We're doing p times
540 		 * as much work, and are we taking p times as long?
541 		 */
542 		for (i = 0; i < ipctypes_len; i++) {
543 			printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
544 			for (j = 0; j < LOOPS; j++) {
545 				if (j != 0)
546 					printf(", ");
547 				scale_timespec(&juggle_results[j], p);
548 				printf("%jd.%09lu",
549 				    (intmax_t)juggle_results[j].tv_sec,
550 				    juggle_results[j].tv_nsec);
551 			}
552 			printf("\n");
553 			printf("%s, process_juggle, %d, ",
554 			    ipctypes[i].it_name, p);
555 			for (j = 0; j < LOOPS; j++) {
556 				if (j != 0)
557 					printf(", ");
558 				scale_timespec(&process_results[j], p);
559 				printf("%jd.%09lu",
560                                     (intmax_t)process_results[j].tv_sec,
561 				    process_results[j].tv_nsec);
562 			}
563 			printf("\n");
564 			printf("%s, thread_juggle, %d, ",
565 			    ipctypes[i].it_name, p);
566 			for (j = 0; j < LOOPS; j++) {
567 				if (j != 0)
568 					printf(", ");
569 				scale_timespec(&thread_results[j], p);
570 				printf("%jd.%09lu",
571 				    (intmax_t)thread_results[j].tv_sec,
572 				    thread_results[j].tv_nsec);
573 			}
574 			printf("\n");
575 		}
576 		fflush(stdout);
577 	}
578 	return (0);
579 }
580