1 /*
2  * Copyright (c) 2011-2012 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved.
4  *
5  * This software is available to you under the OpenIB.org BSD license
6  * below:
7  *
8  *     Redistribution and use in source and binary forms, with or
9  *     without modification, are permitted provided that the following
10  *     conditions are met:
11  *
12  *      - Redistributions of source code must retain the above
13  *        copyright notice, this list of conditions and the following
14  *        disclaimer.
15  *
16  *      - Redistributions in binary form must reproduce the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer in the documentation and/or other materials
19  *        provided with the distribution.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
24  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
25  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
26  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28  * SOFTWARE.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <errno.h>
36 #include <getopt.h>
37 #include <sys/types.h>
38 #include <sys/socket.h>
39 #include <sys/time.h>
40 #include <sys/wait.h>
41 #include <netdb.h>
42 #include <fcntl.h>
43 #include <unistd.h>
44 #include <netinet/tcp.h>
45 
46 #include <rdma/rdma_cma.h>
47 #include <rdma/rsocket.h>
48 #include <util/compiler.h>
49 #include "common.h"
50 
51 struct test_size_param {
52 	int size;
53 	int option;
54 };
55 
56 static struct test_size_param test_size[] = {
57 	{ 1 <<  6, 0 },
58 	{ 1 <<  7, 1 }, { (1 <<  7) + (1 <<  6), 1},
59 	{ 1 <<  8, 1 }, { (1 <<  8) + (1 <<  7), 1},
60 	{ 1 <<  9, 1 }, { (1 <<  9) + (1 <<  8), 1},
61 	{ 1 << 10, 1 }, { (1 << 10) + (1 <<  9), 1},
62 	{ 1 << 11, 1 }, { (1 << 11) + (1 << 10), 1},
63 	{ 1 << 12, 0 }, { (1 << 12) + (1 << 11), 1},
64 	{ 1 << 13, 1 }, { (1 << 13) + (1 << 12), 1},
65 	{ 1 << 14, 1 }, { (1 << 14) + (1 << 13), 1},
66 	{ 1 << 15, 1 }, { (1 << 15) + (1 << 14), 1},
67 	{ 1 << 16, 0 }, { (1 << 16) + (1 << 15), 1},
68 	{ 1 << 17, 1 }, { (1 << 17) + (1 << 16), 1},
69 	{ 1 << 18, 1 }, { (1 << 18) + (1 << 17), 1},
70 	{ 1 << 19, 1 }, { (1 << 19) + (1 << 18), 1},
71 	{ 1 << 20, 0 }, { (1 << 20) + (1 << 19), 1},
72 	{ 1 << 21, 1 }, { (1 << 21) + (1 << 20), 1},
73 	{ 1 << 22, 1 }, { (1 << 22) + (1 << 21), 1},
74 };
75 #define TEST_CNT (sizeof test_size / sizeof test_size[0])
76 
77 static int rs, lrs;
78 static int use_async;
79 static int use_rgai;
80 static int verify;
81 static int flags = MSG_DONTWAIT;
82 static int poll_timeout = 0;
83 static int custom;
84 static enum rs_optimization optimization;
85 static int size_option;
86 static int iterations = 1;
87 static int transfer_size = 1000;
88 static int transfer_count = 1000;
89 static int buffer_size, inline_size = 64;
90 static char test_name[10] = "custom";
91 static const char *port = "7471";
92 static char *dst_addr;
93 static char *src_addr;
94 static struct timeval start, end;
95 static void *buf;
96 static volatile uint8_t *poll_byte;
97 static struct rdma_addrinfo rai_hints;
98 static struct addrinfo ai_hints;
99 
100 static void show_perf(void)
101 {
102 	char str[32];
103 	float usec;
104 	long long bytes;
105 
106 	usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
107 	bytes = (long long) iterations * transfer_count * transfer_size * 2;
108 
109 	/* name size transfers iterations bytes seconds Gb/sec usec/xfer */
110 	printf("%-10s", test_name);
111 	size_str(str, sizeof str, transfer_size);
112 	printf("%-8s", str);
113 	cnt_str(str, sizeof str, transfer_count);
114 	printf("%-8s", str);
115 	cnt_str(str, sizeof str, iterations);
116 	printf("%-8s", str);
117 	size_str(str, sizeof str, bytes);
118 	printf("%-8s", str);
119 	printf("%8.2fs%10.2f%11.2f\n",
120 		usec / 1000000., (bytes * 8) / (1000. * usec),
121 		(usec / iterations) / (transfer_count * 2));
122 }
123 
124 static void init_latency_test(int size)
125 {
126 	char sstr[5];
127 
128 	size_str(sstr, sizeof sstr, size);
129 	snprintf(test_name, sizeof test_name, "%s_lat", sstr);
130 	transfer_count = 1;
131 	transfer_size = size;
132 	iterations = size_to_count(transfer_size);
133 }
134 
135 static void init_bandwidth_test(int size)
136 {
137 	char sstr[5];
138 
139 	size_str(sstr, sizeof sstr, size);
140 	snprintf(test_name, sizeof test_name, "%s_bw", sstr);
141 	iterations = 1;
142 	transfer_size = size;
143 	transfer_count = size_to_count(transfer_size);
144 }
145 
146 static int send_msg(int size)
147 {
148 	struct pollfd fds;
149 	int offset, ret;
150 
151 	if (use_async) {
152 		fds.fd = rs;
153 		fds.events = POLLOUT;
154 	}
155 
156 	for (offset = 0; offset < size; ) {
157 		if (use_async) {
158 			ret = do_poll(&fds, poll_timeout);
159 			if (ret)
160 				return ret;
161 		}
162 
163 		ret = rsend(rs, buf + offset, size - offset, flags);
164 		if (ret > 0) {
165 			offset += ret;
166 		} else if (errno != EWOULDBLOCK && errno != EAGAIN) {
167 			perror("rsend");
168 			return ret;
169 		}
170 	}
171 
172 	return 0;
173 }
174 
175 static int send_xfer(int size)
176 {
177 	struct pollfd fds;
178 	int offset, ret;
179 
180 	if (use_async) {
181 		fds.fd = rs;
182 		fds.events = POLLOUT;
183 	}
184 
185 	for (offset = 0; offset < size; ) {
186 		if (use_async) {
187 			ret = do_poll(&fds, poll_timeout);
188 			if (ret)
189 				return ret;
190 		}
191 
192 		ret = riowrite(rs, buf + offset, size - offset, offset, flags);
193 		if (ret > 0) {
194 			offset += ret;
195 		} else if (errno != EWOULDBLOCK && errno != EAGAIN) {
196 			perror("riowrite");
197 			return ret;
198 		}
199 	}
200 
201 	return 0;
202 }
203 
204 static int recv_msg(int size)
205 {
206 	struct pollfd fds;
207 	int offset, ret;
208 
209 	if (use_async) {
210 		fds.fd = rs;
211 		fds.events = POLLIN;
212 	}
213 
214 	for (offset = 0; offset < size; ) {
215 		if (use_async) {
216 			ret = do_poll(&fds, poll_timeout);
217 			if (ret)
218 				return ret;
219 		}
220 
221 		ret = rrecv(rs, buf + offset, size - offset, flags);
222 		if (ret > 0) {
223 			offset += ret;
224 		} else if (errno != EWOULDBLOCK && errno != EAGAIN) {
225 			perror("rrecv");
226 			return ret;
227 		}
228 	}
229 
230 	return 0;
231 }
232 
233 static int recv_xfer(int size, uint8_t marker)
234 {
235 	int ret;
236 
237 	while (*poll_byte != marker)
238 		;
239 
240 	if (verify) {
241 		ret = verify_buf(buf, size - 1);
242 		if (ret)
243 			return ret;
244 	}
245 
246 	return 0;
247 }
248 
249 static int sync_test(void)
250 {
251 	int ret;
252 
253 	ret = dst_addr ? send_msg(16) : recv_msg(16);
254 	if (ret)
255 		return ret;
256 
257 	return dst_addr ? recv_msg(16) : send_msg(16);
258 }
259 
260 static int run_test(void)
261 {
262 	int ret, i, t;
263 	off_t offset;
264 	uint8_t marker = 0;
265 
266 	poll_byte = buf + transfer_size - 1;
267 	*poll_byte = -1;
268 	offset = riomap(rs, buf, transfer_size, PROT_WRITE, 0, 0);
269 	if (offset ==  -1) {
270 		perror("riomap");
271 		ret = -1;
272 		goto out;
273 	}
274 	ret = sync_test();
275 	if (ret)
276 		goto out;
277 
278 	gettimeofday(&start, NULL);
279 	for (i = 0; i < iterations; i++) {
280 		if (dst_addr) {
281 			for (t = 0; t < transfer_count - 1; t++) {
282 				ret = send_xfer(transfer_size);
283 				if (ret)
284 					goto out;
285 			}
286 			*poll_byte = (uint8_t) marker++;
287 			if (verify)
288 				format_buf(buf, transfer_size - 1);
289 			ret = send_xfer(transfer_size);
290 			if (ret)
291 				goto out;
292 
293 			ret = recv_xfer(transfer_size, marker++);
294 		} else {
295 			ret = recv_xfer(transfer_size, marker++);
296 			if (ret)
297 				goto out;
298 
299 			for (t = 0; t < transfer_count - 1; t++) {
300 				ret = send_xfer(transfer_size);
301 				if (ret)
302 					goto out;
303 			}
304 			*poll_byte = (uint8_t) marker++;
305 			if (verify)
306 				format_buf(buf, transfer_size - 1);
307 			ret = send_xfer(transfer_size);
308 		}
309 		if (ret)
310 			goto out;
311 	}
312 	gettimeofday(&end, NULL);
313 	show_perf();
314 	ret = riounmap(rs, buf, transfer_size);
315 
316 out:
317 	return ret;
318 }
319 
320 static void set_options(int fd)
321 {
322 	int val;
323 
324 	if (buffer_size) {
325 		rsetsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &buffer_size,
326 			    sizeof buffer_size);
327 		rsetsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &buffer_size,
328 			    sizeof buffer_size);
329 	} else {
330 		val = 1 << 19;
331 		rsetsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &val, sizeof val);
332 		rsetsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &val, sizeof val);
333 	}
334 
335 	val = 1;
336 	rsetsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (void *) &val, sizeof(val));
337 	rsetsockopt(fd, SOL_RDMA, RDMA_IOMAPSIZE, (void *) &val, sizeof val);
338 
339 	if (flags & MSG_DONTWAIT)
340 		rfcntl(fd, F_SETFL, O_NONBLOCK);
341 
342 	/* Inline size based on experimental data */
343 	if (optimization == opt_latency) {
344 		rsetsockopt(fd, SOL_RDMA, RDMA_INLINE, &inline_size,
345 			    sizeof inline_size);
346 	} else if (optimization == opt_bandwidth) {
347 		val = 0;
348 		rsetsockopt(fd, SOL_RDMA, RDMA_INLINE, &val, sizeof val);
349 	}
350 }
351 
352 static int server_listen(void)
353 {
354 	struct rdma_addrinfo *rai = NULL;
355 	struct addrinfo *ai;
356 	int val, ret;
357 
358 	if (use_rgai) {
359 		rai_hints.ai_flags |= RAI_PASSIVE;
360 		ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai);
361 	} else {
362 		ai_hints.ai_flags |= AI_PASSIVE;
363 		ret = getaddrinfo(src_addr, port, &ai_hints, &ai);
364 	}
365 	if (ret) {
366 		printf("getaddrinfo: %s\n", gai_strerror(ret));
367 		return ret;
368 	}
369 
370 	lrs = rai ? rsocket(rai->ai_family, SOCK_STREAM, 0) :
371 		    rsocket(ai->ai_family, SOCK_STREAM, 0);
372 	if (lrs < 0) {
373 		perror("rsocket");
374 		ret = lrs;
375 		goto free;
376 	}
377 
378 	val = 1;
379 	ret = rsetsockopt(lrs, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
380 	if (ret) {
381 		perror("rsetsockopt SO_REUSEADDR");
382 		goto close;
383 	}
384 
385 	ret = rai ? rbind(lrs, rai->ai_src_addr, rai->ai_src_len) :
386 		    rbind(lrs, ai->ai_addr, ai->ai_addrlen);
387 	if (ret) {
388 		perror("rbind");
389 		goto close;
390 	}
391 
392 	ret = rlisten(lrs, 1);
393 	if (ret)
394 		perror("rlisten");
395 
396 close:
397 	if (ret)
398 		rclose(lrs);
399 free:
400 	if (rai)
401 		rdma_freeaddrinfo(rai);
402 	else
403 		freeaddrinfo(ai);
404 	return ret;
405 }
406 
407 static int server_connect(void)
408 {
409 	struct pollfd fds;
410 	int ret = 0;
411 
412 	set_options(lrs);
413 	do {
414 		if (use_async) {
415 			fds.fd = lrs;
416 			fds.events = POLLIN;
417 
418 			ret = do_poll(&fds, poll_timeout);
419 			if (ret) {
420 				perror("rpoll");
421 				return ret;
422 			}
423 		}
424 
425 		rs = raccept(lrs, NULL, NULL);
426 	} while (rs < 0 && (errno == EAGAIN || errno == EWOULDBLOCK));
427 	if (rs < 0) {
428 		perror("raccept");
429 		return rs;
430 	}
431 
432 	set_options(rs);
433 	return ret;
434 }
435 
436 static int client_connect(void)
437 {
438 	struct rdma_addrinfo *rai = NULL;
439 	struct addrinfo *ai;
440 	struct pollfd fds;
441 	int ret, err;
442 	socklen_t len;
443 
444 	ret = use_rgai ? rdma_getaddrinfo(dst_addr, port, &rai_hints, &rai) :
445 			 getaddrinfo(dst_addr, port, &ai_hints, &ai);
446 	if (ret) {
447 		printf("getaddrinfo: %s\n", gai_strerror(ret));
448 		return ret;
449 	}
450 
451 	rs = rai ? rsocket(rai->ai_family, SOCK_STREAM, 0) :
452 		   rsocket(ai->ai_family, SOCK_STREAM, 0);
453 	if (rs < 0) {
454 		perror("rsocket");
455 		ret = rs;
456 		goto free;
457 	}
458 
459 	set_options(rs);
460 	/* TODO: bind client to src_addr */
461 
462 	ret = rai ? rconnect(rs, rai->ai_dst_addr, rai->ai_dst_len) :
463 		    rconnect(rs, ai->ai_addr, ai->ai_addrlen);
464 	if (ret && (errno != EINPROGRESS)) {
465 		perror("rconnect");
466 		goto close;
467 	}
468 
469 	if (ret && (errno == EINPROGRESS)) {
470 		fds.fd = rs;
471 		fds.events = POLLOUT;
472 		ret = do_poll(&fds, poll_timeout);
473 		if (ret) {
474 			perror("rpoll");
475 			goto close;
476 		}
477 
478 		len = sizeof err;
479 		ret = rgetsockopt(rs, SOL_SOCKET, SO_ERROR, &err, &len);
480 		if (ret)
481 			goto close;
482 		if (err) {
483 			ret = -1;
484 			errno = err;
485 			perror("async rconnect");
486 		}
487 	}
488 
489 close:
490 	if (ret)
491 		rclose(rs);
492 free:
493 	if (rai)
494 		rdma_freeaddrinfo(rai);
495 	else
496 		freeaddrinfo(ai);
497 	return ret;
498 }
499 
500 static int run(void)
501 {
502 	int i, ret = 0;
503 
504 	buf = malloc(!custom ? test_size[TEST_CNT - 1].size : transfer_size);
505 	if (!buf) {
506 		perror("malloc");
507 		return -1;
508 	}
509 
510 	if (!dst_addr) {
511 		ret = server_listen();
512 		if (ret)
513 			goto free;
514 	}
515 
516 	printf("%-10s%-8s%-8s%-8s%-8s%8s %10s%13s\n",
517 	       "name", "bytes", "xfers", "iters", "total", "time", "Gb/sec", "usec/xfer");
518 	if (!custom) {
519 		optimization = opt_latency;
520 		ret = dst_addr ? client_connect() : server_connect();
521 		if (ret)
522 			goto free;
523 
524 		for (i = 0; i < TEST_CNT; i++) {
525 			if (test_size[i].option > size_option)
526 				continue;
527 			init_latency_test(test_size[i].size);
528 			run_test();
529 		}
530 		rshutdown(rs, SHUT_RDWR);
531 		rclose(rs);
532 
533 		optimization = opt_bandwidth;
534 		ret = dst_addr ? client_connect() : server_connect();
535 		if (ret)
536 			goto free;
537 		for (i = 0; i < TEST_CNT; i++) {
538 			if (test_size[i].option > size_option)
539 				continue;
540 			init_bandwidth_test(test_size[i].size);
541 			run_test();
542 		}
543 	} else {
544 		ret = dst_addr ? client_connect() : server_connect();
545 		if (ret)
546 			goto free;
547 
548 		ret = run_test();
549 	}
550 
551 	rshutdown(rs, SHUT_RDWR);
552 	rclose(rs);
553 free:
554 	free(buf);
555 	return ret;
556 }
557 
558 static int set_test_opt(const char *arg)
559 {
560 	if (strlen(arg) == 1) {
561 		switch (arg[0]) {
562 		case 'a':
563 			use_async = 1;
564 			break;
565 		case 'b':
566 			flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
567 			break;
568 		case 'n':
569 			flags |= MSG_DONTWAIT;
570 			break;
571 		case 'v':
572 			verify = 1;
573 			break;
574 		default:
575 			return -1;
576 		}
577 	} else {
578 		if (!strncasecmp("async", arg, 5)) {
579 			use_async = 1;
580 		} else if (!strncasecmp("block", arg, 5)) {
581 			flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
582 		} else if (!strncasecmp("nonblock", arg, 8)) {
583 			flags |= MSG_DONTWAIT;
584 		} else if (!strncasecmp("verify", arg, 6)) {
585 			verify = 1;
586 		} else {
587 			return -1;
588 		}
589 	}
590 	return 0;
591 }
592 
593 int main(int argc, char **argv)
594 {
595 	int op, ret;
596 
597 	ai_hints.ai_socktype = SOCK_STREAM;
598 	rai_hints.ai_port_space = RDMA_PS_TCP;
599 	while ((op = getopt(argc, argv, "s:b:f:B:i:I:C:S:p:T:")) != -1) {
600 		switch (op) {
601 		case 's':
602 			dst_addr = optarg;
603 			break;
604 		case 'b':
605 			src_addr = optarg;
606 			break;
607 		case 'f':
608 			if (!strncasecmp("ip", optarg, 2)) {
609 				ai_hints.ai_flags = AI_NUMERICHOST;
610 			} else if (!strncasecmp("gid", optarg, 3)) {
611 				rai_hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY;
612 				rai_hints.ai_family = AF_IB;
613 				use_rgai = 1;
614 			} else {
615 				fprintf(stderr, "Warning: unknown address format\n");
616 			}
617 			break;
618 		case 'B':
619 			buffer_size = atoi(optarg);
620 			break;
621 		case 'i':
622 			inline_size = atoi(optarg);
623 			break;
624 		case 'I':
625 			custom = 1;
626 			iterations = atoi(optarg);
627 			break;
628 		case 'C':
629 			custom = 1;
630 			transfer_count = atoi(optarg);
631 			break;
632 		case 'S':
633 			if (!strncasecmp("all", optarg, 3)) {
634 				size_option = 1;
635 			} else {
636 				custom = 1;
637 				transfer_size = atoi(optarg);
638 			}
639 			break;
640 		case 'p':
641 			port = optarg;
642 			break;
643 		case 'T':
644 			if (!set_test_opt(optarg))
645 				break;
646 			/* invalid option - fall through */
647 			SWITCH_FALLTHROUGH;
648 		default:
649 			printf("usage: %s\n", argv[0]);
650 			printf("\t[-s server_address]\n");
651 			printf("\t[-b bind_address]\n");
652 			printf("\t[-f address_format]\n");
653 			printf("\t    name, ip, ipv6, or gid\n");
654 			printf("\t[-B buffer_size]\n");
655 			printf("\t[-i inline_size]\n");
656 			printf("\t[-I iterations]\n");
657 			printf("\t[-C transfer_count]\n");
658 			printf("\t[-S transfer_size or all]\n");
659 			printf("\t[-p port_number]\n");
660 			printf("\t[-T test_option]\n");
661 			printf("\t    a|async - asynchronous operation (use poll)\n");
662 			printf("\t    b|blocking - use blocking calls\n");
663 			printf("\t    n|nonblocking - use nonblocking calls\n");
664 			printf("\t    v|verify - verify data\n");
665 			exit(1);
666 		}
667 	}
668 
669 	if (!(flags & MSG_DONTWAIT))
670 		poll_timeout = -1;
671 
672 	ret = run();
673 	return ret;
674 }
675