1 /*
2  * Copyright (c) 2011-2012 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2014-2015 Mellanox Technologies LTD. All rights reserved.
4  *
5  * This software is available to you under the OpenIB.org BSD license
6  * below:
7  *
8  *     Redistribution and use in source and binary forms, with or
9  *     without modification, are permitted provided that the following
10  *     conditions are met:
11  *
12  *      - Redistributions of source code must retain the above
13  *        copyright notice, this list of conditions and the following
14  *        disclaimer.
15  *
16  *      - Redistributions in binary form must reproduce the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer in the documentation and/or other materials
19  *        provided with the distribution.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
24  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
25  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
26  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28  * SOFTWARE.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <errno.h>
36 #include <getopt.h>
37 #include <sys/types.h>
38 #include <sys/socket.h>
39 #include <sys/time.h>
40 #include <sys/wait.h>
41 #include <netdb.h>
42 #include <fcntl.h>
43 #include <unistd.h>
44 #include <netinet/tcp.h>
45 
46 #include <rdma/rdma_cma.h>
47 #include <rdma/rsocket.h>
48 #include <util/compiler.h>
49 #include "common.h"
50 
51 struct test_size_param {
52 	int size;
53 	int option;
54 };
55 
56 static struct test_size_param test_size[] = {
57 	{ 1 <<  6, 0 },
58 	{ 1 <<  7, 1 }, { (1 <<  7) + (1 <<  6), 1},
59 	{ 1 <<  8, 1 }, { (1 <<  8) + (1 <<  7), 1},
60 	{ 1 <<  9, 1 }, { (1 <<  9) + (1 <<  8), 1},
61 	{ 1 << 10, 1 }, { (1 << 10) + (1 <<  9), 1},
62 	{ 1 << 11, 1 }, { (1 << 11) + (1 << 10), 1},
63 	{ 1 << 12, 0 }, { (1 << 12) + (1 << 11), 1},
64 	{ 1 << 13, 1 }, { (1 << 13) + (1 << 12), 1},
65 	{ 1 << 14, 1 }, { (1 << 14) + (1 << 13), 1},
66 	{ 1 << 15, 1 }, { (1 << 15) + (1 << 14), 1},
67 	{ 1 << 16, 0 }, { (1 << 16) + (1 << 15), 1},
68 	{ 1 << 17, 1 }, { (1 << 17) + (1 << 16), 1},
69 	{ 1 << 18, 1 }, { (1 << 18) + (1 << 17), 1},
70 	{ 1 << 19, 1 }, { (1 << 19) + (1 << 18), 1},
71 	{ 1 << 20, 0 }, { (1 << 20) + (1 << 19), 1},
72 	{ 1 << 21, 1 }, { (1 << 21) + (1 << 20), 1},
73 	{ 1 << 22, 1 }, { (1 << 22) + (1 << 21), 1},
74 };
75 #define TEST_CNT (sizeof test_size / sizeof test_size[0])
76 
77 static int rs, lrs;
78 static int use_async;
79 static int use_rgai;
80 static int verify;
81 static int flags = MSG_DONTWAIT;
82 static int poll_timeout = 0;
83 static int custom;
84 static int use_fork;
85 static pid_t fork_pid;
86 static enum rs_optimization optimization;
87 static int size_option;
88 static int iterations = 1;
89 static int transfer_size = 1000;
90 static int transfer_count = 1000;
91 static int buffer_size, inline_size = 64;
92 static char test_name[10] = "custom";
93 static const char *port = "7471";
94 static int keepalive;
95 static char *dst_addr;
96 static char *src_addr;
97 static struct timeval start, end;
98 static void *buf;
99 static struct rdma_addrinfo rai_hints;
100 static struct addrinfo ai_hints;
101 
102 static void show_perf(void)
103 {
104 	char str[32];
105 	float usec;
106 	long long bytes;
107 
108 	usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
109 	bytes = (long long) iterations * transfer_count * transfer_size * 2;
110 
111 	/* name size transfers iterations bytes seconds Gb/sec usec/xfer */
112 	printf("%-10s", test_name);
113 	size_str(str, sizeof str, transfer_size);
114 	printf("%-8s", str);
115 	cnt_str(str, sizeof str, transfer_count);
116 	printf("%-8s", str);
117 	cnt_str(str, sizeof str, iterations);
118 	printf("%-8s", str);
119 	size_str(str, sizeof str, bytes);
120 	printf("%-8s", str);
121 	printf("%8.2fs%10.2f%11.2f\n",
122 		usec / 1000000., (bytes * 8) / (1000. * usec),
123 		(usec / iterations) / (transfer_count * 2));
124 }
125 
126 static void init_latency_test(int size)
127 {
128 	char sstr[5];
129 
130 	size_str(sstr, sizeof sstr, size);
131 	snprintf(test_name, sizeof test_name, "%s_lat", sstr);
132 	transfer_count = 1;
133 	transfer_size = size;
134 	iterations = size_to_count(transfer_size);
135 }
136 
137 static void init_bandwidth_test(int size)
138 {
139 	char sstr[5];
140 
141 	size_str(sstr, sizeof sstr, size);
142 	snprintf(test_name, sizeof test_name, "%s_bw", sstr);
143 	iterations = 1;
144 	transfer_size = size;
145 	transfer_count = size_to_count(transfer_size);
146 }
147 
148 static int send_xfer(int size)
149 {
150 	struct pollfd fds;
151 	int offset, ret;
152 
153 	if (verify)
154 		format_buf(buf, size);
155 
156 	if (use_async) {
157 		fds.fd = rs;
158 		fds.events = POLLOUT;
159 	}
160 
161 	for (offset = 0; offset < size; ) {
162 		if (use_async) {
163 			ret = do_poll(&fds, poll_timeout);
164 			if (ret)
165 				return ret;
166 		}
167 
168 		ret = rs_send(rs, buf + offset, size - offset, flags);
169 		if (ret > 0) {
170 			offset += ret;
171 		} else if (errno != EWOULDBLOCK && errno != EAGAIN) {
172 			perror("rsend");
173 			return ret;
174 		}
175 	}
176 
177 	return 0;
178 }
179 
180 static int recv_xfer(int size)
181 {
182 	struct pollfd fds;
183 	int offset, ret;
184 
185 	if (use_async) {
186 		fds.fd = rs;
187 		fds.events = POLLIN;
188 	}
189 
190 	for (offset = 0; offset < size; ) {
191 		if (use_async) {
192 			ret = do_poll(&fds, poll_timeout);
193 			if (ret)
194 				return ret;
195 		}
196 
197 		ret = rs_recv(rs, buf + offset, size - offset, flags);
198 		if (ret > 0) {
199 			offset += ret;
200 		} else if (errno != EWOULDBLOCK && errno != EAGAIN) {
201 			perror("rrecv");
202 			return ret;
203 		}
204 	}
205 
206 	if (verify) {
207 		ret = verify_buf(buf, size);
208 		if (ret)
209 			return ret;
210 	}
211 
212 	return 0;
213 }
214 
215 static int sync_test(void)
216 {
217 	int ret;
218 
219 	ret = dst_addr ? send_xfer(16) : recv_xfer(16);
220 	if (ret)
221 		return ret;
222 
223 	return dst_addr ? recv_xfer(16) : send_xfer(16);
224 }
225 
226 static int run_test(void)
227 {
228 	int ret, i, t;
229 
230 	ret = sync_test();
231 	if (ret)
232 		goto out;
233 
234 	gettimeofday(&start, NULL);
235 	for (i = 0; i < iterations; i++) {
236 		for (t = 0; t < transfer_count; t++) {
237 			ret = dst_addr ? send_xfer(transfer_size) :
238 					 recv_xfer(transfer_size);
239 			if (ret)
240 				goto out;
241 		}
242 
243 		for (t = 0; t < transfer_count; t++) {
244 			ret = dst_addr ? recv_xfer(transfer_size) :
245 					 send_xfer(transfer_size);
246 			if (ret)
247 				goto out;
248 		}
249 	}
250 	gettimeofday(&end, NULL);
251 	show_perf();
252 	ret = 0;
253 
254 out:
255 	return ret;
256 }
257 
258 static void set_keepalive(int fd)
259 {
260 	int optval;
261 	socklen_t optlen = sizeof(optlen);
262 
263 	optval = 1;
264 	if (rs_setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &optval, optlen)) {
265 		perror("rsetsockopt SO_KEEPALIVE");
266 		return;
267 	}
268 
269 	optval = keepalive;
270 	if (rs_setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, optlen))
271 		perror("rsetsockopt TCP_KEEPIDLE");
272 
273 	if (!(rs_getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen)))
274 		printf("Keepalive: %s\n", (optval ? "ON" : "OFF"));
275 
276 	if (!(rs_getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, &optlen)))
277 		printf("  time: %i\n", optval);
278 }
279 
280 static void set_options(int fd)
281 {
282 	int val;
283 
284 	if (buffer_size) {
285 		rs_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &buffer_size,
286 			      sizeof buffer_size);
287 		rs_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &buffer_size,
288 			      sizeof buffer_size);
289 	} else {
290 		val = 1 << 19;
291 		rs_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &val, sizeof val);
292 		rs_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &val, sizeof val);
293 	}
294 
295 	val = 1;
296 	rs_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (void *) &val, sizeof(val));
297 
298 	if (flags & MSG_DONTWAIT)
299 		rs_fcntl(fd, F_SETFL, O_NONBLOCK);
300 
301 	if (use_rs) {
302 		/* Inline size based on experimental data */
303 		if (optimization == opt_latency) {
304 			rs_setsockopt(fd, SOL_RDMA, RDMA_INLINE, &inline_size,
305 				      sizeof inline_size);
306 		} else if (optimization == opt_bandwidth) {
307 			val = 0;
308 			rs_setsockopt(fd, SOL_RDMA, RDMA_INLINE, &val, sizeof val);
309 		}
310 	}
311 
312 	if (keepalive)
313 		set_keepalive(fd);
314 }
315 
316 static int server_listen(void)
317 {
318 	struct rdma_addrinfo *rai = NULL;
319 	struct addrinfo *ai;
320 	int val, ret;
321 
322 	if (use_rgai) {
323 		rai_hints.ai_flags |= RAI_PASSIVE;
324 		ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai);
325 	} else {
326 		ai_hints.ai_flags |= AI_PASSIVE;
327 		ret = getaddrinfo(src_addr, port, &ai_hints, &ai);
328 	}
329 	if (ret) {
330 		printf("getaddrinfo: %s\n", gai_strerror(ret));
331 		return ret;
332 	}
333 
334 	lrs = rai ? rs_socket(rai->ai_family, SOCK_STREAM, 0) :
335 		    rs_socket(ai->ai_family, SOCK_STREAM, 0);
336 	if (lrs < 0) {
337 		perror("rsocket");
338 		ret = lrs;
339 		goto free;
340 	}
341 
342 	val = 1;
343 	ret = rs_setsockopt(lrs, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
344 	if (ret) {
345 		perror("rsetsockopt SO_REUSEADDR");
346 		goto close;
347 	}
348 
349 	ret = rai ? rs_bind(lrs, rai->ai_src_addr, rai->ai_src_len) :
350 		    rs_bind(lrs, ai->ai_addr, ai->ai_addrlen);
351 	if (ret) {
352 		perror("rbind");
353 		goto close;
354 	}
355 
356 	ret = rs_listen(lrs, 1);
357 	if (ret)
358 		perror("rlisten");
359 
360 close:
361 	if (ret)
362 		rs_close(lrs);
363 free:
364 	if (rai)
365 		rdma_freeaddrinfo(rai);
366 	else
367 		freeaddrinfo(ai);
368 	return ret;
369 }
370 
371 static int server_connect(void)
372 {
373 	struct pollfd fds;
374 	int ret = 0;
375 
376 	set_options(lrs);
377 	do {
378 		if (use_async) {
379 			fds.fd = lrs;
380 			fds.events = POLLIN;
381 
382 			ret = do_poll(&fds, poll_timeout);
383 			if (ret) {
384 				perror("rpoll");
385 				return ret;
386 			}
387 		}
388 
389 		rs = rs_accept(lrs, NULL, NULL);
390 	} while (rs < 0 && (errno == EAGAIN || errno == EWOULDBLOCK));
391 	if (rs < 0) {
392 		perror("raccept");
393 		return rs;
394 	}
395 
396 	if (use_fork)
397 		fork_pid = fork();
398 	if (!fork_pid)
399 		set_options(rs);
400 	return ret;
401 }
402 
403 static int client_connect(void)
404 {
405 	struct rdma_addrinfo *rai = NULL, *rai_src = NULL;
406 	struct addrinfo *ai, *ai_src;
407 	struct pollfd fds;
408 	int ret, err;
409 	socklen_t len;
410 
411 	ret = use_rgai ? rdma_getaddrinfo(dst_addr, port, &rai_hints, &rai) :
412 			 getaddrinfo(dst_addr, port, &ai_hints, &ai);
413 
414 	if (ret) {
415 		printf("getaddrinfo: %s\n", gai_strerror(ret));
416 		return ret;
417 	}
418 
419 	if (src_addr) {
420 		if (use_rgai) {
421 			rai_hints.ai_flags |= RAI_PASSIVE;
422 			ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai_src);
423 		} else {
424 			ai_hints.ai_flags |= AI_PASSIVE;
425 			ret = getaddrinfo(src_addr, port, &ai_hints, &ai_src);
426 		}
427 		if (ret) {
428 			printf("getaddrinfo src_addr: %s\n", gai_strerror(ret));
429 			return ret;
430 		}
431 	}
432 
433 	rs = rai ? rs_socket(rai->ai_family, SOCK_STREAM, 0) :
434 		   rs_socket(ai->ai_family, SOCK_STREAM, 0);
435 	if (rs < 0) {
436 		perror("rsocket");
437 		ret = rs;
438 		goto free;
439 	}
440 
441 	set_options(rs);
442 
443 	if (src_addr) {
444 		ret = rai ? rs_bind(rs, rai_src->ai_src_addr, rai_src->ai_src_len) :
445 			    rs_bind(rs, ai_src->ai_addr, ai_src->ai_addrlen);
446 		if (ret) {
447 			perror("rbind");
448 			goto close;
449 		}
450 	}
451 
452 	if (rai && rai->ai_route) {
453 		ret = rs_setsockopt(rs, SOL_RDMA, RDMA_ROUTE, rai->ai_route,
454 				    rai->ai_route_len);
455 		if (ret) {
456 			perror("rsetsockopt RDMA_ROUTE");
457 			goto close;
458 		}
459 	}
460 
461 	ret = rai ? rs_connect(rs, rai->ai_dst_addr, rai->ai_dst_len) :
462 		    rs_connect(rs, ai->ai_addr, ai->ai_addrlen);
463 	if (ret && (errno != EINPROGRESS)) {
464 		perror("rconnect");
465 		goto close;
466 	}
467 
468 	if (ret && (errno == EINPROGRESS)) {
469 		fds.fd = rs;
470 		fds.events = POLLOUT;
471 		ret = do_poll(&fds, poll_timeout);
472 		if (ret) {
473 			perror("rpoll");
474 			goto close;
475 		}
476 
477 		len = sizeof err;
478 		ret = rs_getsockopt(rs, SOL_SOCKET, SO_ERROR, &err, &len);
479 		if (ret)
480 			goto close;
481 		if (err) {
482 			ret = -1;
483 			errno = err;
484 			perror("async rconnect");
485 		}
486 	}
487 
488 close:
489 	if (ret)
490 		rs_close(rs);
491 free:
492 	if (rai)
493 		rdma_freeaddrinfo(rai);
494 	else
495 		freeaddrinfo(ai);
496 	return ret;
497 }
498 
499 static int run(void)
500 {
501 	int i, ret = 0;
502 
503 	buf = malloc(!custom ? test_size[TEST_CNT - 1].size : transfer_size);
504 	if (!buf) {
505 		perror("malloc");
506 		return -1;
507 	}
508 
509 	if (!dst_addr) {
510 		ret = server_listen();
511 		if (ret)
512 			goto free;
513 	}
514 
515 	printf("%-10s%-8s%-8s%-8s%-8s%8s %10s%13s\n",
516 	       "name", "bytes", "xfers", "iters", "total", "time", "Gb/sec", "usec/xfer");
517 	if (!custom) {
518 		optimization = opt_latency;
519 		ret = dst_addr ? client_connect() : server_connect();
520 		if (ret)
521 			goto free;
522 
523 		for (i = 0; i < TEST_CNT && !fork_pid; i++) {
524 			if (test_size[i].option > size_option)
525 				continue;
526 			init_latency_test(test_size[i].size);
527 			run_test();
528 		}
529 		if (fork_pid)
530 			waitpid(fork_pid, NULL, 0);
531 		else
532 			rs_shutdown(rs, SHUT_RDWR);
533 		rs_close(rs);
534 
535 		if (!dst_addr && use_fork && !fork_pid)
536 			goto free;
537 
538 		optimization = opt_bandwidth;
539 		ret = dst_addr ? client_connect() : server_connect();
540 		if (ret)
541 			goto free;
542 		for (i = 0; i < TEST_CNT && !fork_pid; i++) {
543 			if (test_size[i].option > size_option)
544 				continue;
545 			init_bandwidth_test(test_size[i].size);
546 			run_test();
547 		}
548 	} else {
549 		ret = dst_addr ? client_connect() : server_connect();
550 		if (ret)
551 			goto free;
552 
553 		if (!fork_pid)
554 			ret = run_test();
555 	}
556 
557 	if (fork_pid)
558 		waitpid(fork_pid, NULL, 0);
559 	else
560 		rs_shutdown(rs, SHUT_RDWR);
561 	rs_close(rs);
562 free:
563 	free(buf);
564 	return ret;
565 }
566 
567 static int set_test_opt(const char *arg)
568 {
569 	if (strlen(arg) == 1) {
570 		switch (arg[0]) {
571 		case 's':
572 			use_rs = 0;
573 			break;
574 		case 'a':
575 			use_async = 1;
576 			break;
577 		case 'b':
578 			flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
579 			break;
580 		case 'f':
581 			use_fork = 1;
582 			use_rs = 0;
583 			break;
584 		case 'n':
585 			flags |= MSG_DONTWAIT;
586 			break;
587 		case 'r':
588 			use_rgai = 1;
589 			break;
590 		case 'v':
591 			verify = 1;
592 			break;
593 		default:
594 			return -1;
595 		}
596 	} else {
597 		if (!strncasecmp("socket", arg, 6)) {
598 			use_rs = 0;
599 		} else if (!strncasecmp("async", arg, 5)) {
600 			use_async = 1;
601 		} else if (!strncasecmp("block", arg, 5)) {
602 			flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
603 		} else if (!strncasecmp("nonblock", arg, 8)) {
604 			flags |= MSG_DONTWAIT;
605 		} else if (!strncasecmp("resolve", arg, 7)) {
606 			use_rgai = 1;
607 		} else if (!strncasecmp("verify", arg, 6)) {
608 			verify = 1;
609 		} else if (!strncasecmp("fork", arg, 4)) {
610 			use_fork = 1;
611 			use_rs = 0;
612 		} else {
613 			return -1;
614 		}
615 	}
616 	return 0;
617 }
618 
619 int main(int argc, char **argv)
620 {
621 	int op, ret;
622 
623 	ai_hints.ai_socktype = SOCK_STREAM;
624 	rai_hints.ai_port_space = RDMA_PS_TCP;
625 	while ((op = getopt(argc, argv, "s:b:f:B:i:I:C:S:p:k:T:")) != -1) {
626 		switch (op) {
627 		case 's':
628 			dst_addr = optarg;
629 			break;
630 		case 'b':
631 			src_addr = optarg;
632 			break;
633 		case 'f':
634 			if (!strncasecmp("ip", optarg, 2)) {
635 				ai_hints.ai_flags = AI_NUMERICHOST;
636 			} else if (!strncasecmp("gid", optarg, 3)) {
637 				rai_hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY;
638 				rai_hints.ai_family = AF_IB;
639 				use_rgai = 1;
640 			} else {
641 				fprintf(stderr, "Warning: unknown address format\n");
642 			}
643 			break;
644 		case 'B':
645 			buffer_size = atoi(optarg);
646 			break;
647 		case 'i':
648 			inline_size = atoi(optarg);
649 			break;
650 		case 'I':
651 			custom = 1;
652 			iterations = atoi(optarg);
653 			break;
654 		case 'C':
655 			custom = 1;
656 			transfer_count = atoi(optarg);
657 			break;
658 		case 'S':
659 			if (!strncasecmp("all", optarg, 3)) {
660 				size_option = 1;
661 			} else {
662 				custom = 1;
663 				transfer_size = atoi(optarg);
664 			}
665 			break;
666 		case 'p':
667 			port = optarg;
668 			break;
669 		case 'k':
670 			keepalive = atoi(optarg);
671 			break;
672 		case 'T':
673 			if (!set_test_opt(optarg))
674 				break;
675 			/* invalid option - fall through */
676 			SWITCH_FALLTHROUGH;
677 		default:
678 			printf("usage: %s\n", argv[0]);
679 			printf("\t[-s server_address]\n");
680 			printf("\t[-b bind_address]\n");
681 			printf("\t[-f address_format]\n");
682 			printf("\t    name, ip, ipv6, or gid\n");
683 			printf("\t[-B buffer_size]\n");
684 			printf("\t[-i inline_size]\n");
685 			printf("\t[-I iterations]\n");
686 			printf("\t[-C transfer_count]\n");
687 			printf("\t[-S transfer_size or all]\n");
688 			printf("\t[-p port_number]\n");
689 			printf("\t[-k keepalive_time]\n");
690 			printf("\t[-T test_option]\n");
691 			printf("\t    s|sockets - use standard tcp/ip sockets\n");
692 			printf("\t    a|async - asynchronous operation (use poll)\n");
693 			printf("\t    b|blocking - use blocking calls\n");
694 			printf("\t    f|fork - fork server processing\n");
695 			printf("\t    n|nonblocking - use nonblocking calls\n");
696 			printf("\t    r|resolve - use rdma cm to resolve address\n");
697 			printf("\t    v|verify - verify data\n");
698 			exit(1);
699 		}
700 	}
701 
702 	if (!(flags & MSG_DONTWAIT))
703 		poll_timeout = -1;
704 
705 	ret = run();
706 	return ret;
707 }
708