1 /*
2  * Copyright (c) 2013 Intel Corporation.  All rights reserved.
3  *
4  * This software is available to you under the OpenIB.org BSD license
5  * below:
6  *
7  *     Redistribution and use in source and binary forms, with or
8  *     without modification, are permitted provided that the following
9  *     conditions are met:
10  *
11  *      - Redistributions of source code must retain the above
12  *        copyright notice, this list of conditions and the following
13  *        disclaimer.
14  *
15  *      - Redistributions in binary form must reproduce the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer in the documentation and/or other materials
18  *        provided with the distribution.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE.
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <errno.h>
35 #include <getopt.h>
36 #include <sys/types.h>
37 #include <sys/socket.h>
38 #include <sys/time.h>
39 #include <sys/wait.h>
40 #include <netdb.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <netinet/tcp.h>
44 
45 #include <rdma/rdma_cma.h>
46 #include "common.h"
47 
48 static struct rdma_addrinfo hints, *rai;
49 static struct rdma_event_channel *channel;
50 static const char *port = "7471";
51 static char *dst_addr;
52 static char *src_addr;
53 static int timeout = 2000;
54 static int retries = 2;
55 
56 enum step {
57 	STEP_CREATE_ID,
58 	STEP_BIND,
59 	STEP_RESOLVE_ADDR,
60 	STEP_RESOLVE_ROUTE,
61 	STEP_CREATE_QP,
62 	STEP_CONNECT,
63 	STEP_DISCONNECT,
64 	STEP_DESTROY,
65 	STEP_CNT
66 };
67 
68 static const char *step_str[] = {
69 	"create id",
70 	"bind addr",
71 	"resolve addr",
72 	"resolve route",
73 	"create qp",
74 	"connect",
75 	"disconnect",
76 	"destroy"
77 };
78 
79 struct node {
80 	struct rdma_cm_id *id;
81 	struct timeval times[STEP_CNT][2];
82 	int error;
83 	int retries;
84 };
85 
86 struct list_head {
87 	struct list_head	*prev;
88 	struct list_head	*next;
89 	struct rdma_cm_id	*id;
90 };
91 
92 struct work_list {
93 	pthread_mutex_t		lock;
94 	pthread_cond_t		cond;
95 	struct list_head	list;
96 };
97 
98 #define INIT_LIST(x) ((x)->prev = (x)->next = (x))
99 
100 static struct work_list req_work;
101 static struct work_list disc_work;
102 static struct node *nodes;
103 static struct timeval times[STEP_CNT][2];
104 static int connections = 100;
105 static volatile int started[STEP_CNT];
106 static volatile int completed[STEP_CNT];
107 static struct ibv_qp_init_attr init_qp_attr;
108 static struct rdma_conn_param conn_param;
109 
110 #define start_perf(n, s)	gettimeofday(&((n)->times[s][0]), NULL)
111 #define end_perf(n, s)		gettimeofday(&((n)->times[s][1]), NULL)
112 #define start_time(s)		gettimeofday(&times[s][0], NULL)
113 #define end_time(s)		gettimeofday(&times[s][1], NULL)
114 
115 static inline void __list_delete(struct list_head *list)
116 {
117 	struct list_head *prev, *next;
118 	prev = list->prev;
119 	next = list->next;
120 	prev->next = next;
121 	next->prev = prev;
122 	INIT_LIST(list);
123 }
124 
125 static inline int __list_empty(struct work_list *list)
126 {
127 	return list->list.next == &list->list;
128 }
129 
130 static inline struct list_head *__list_remove_head(struct work_list *work_list)
131 {
132 	struct list_head *list_item;
133 
134 	list_item = work_list->list.next;
135 	__list_delete(list_item);
136 	return list_item;
137 }
138 
139 static inline void list_add_tail(struct work_list *work_list, struct list_head *req)
140 {
141 	int empty;
142 	pthread_mutex_lock(&work_list->lock);
143 	empty = __list_empty(work_list);
144 	req->prev = work_list->list.prev;
145 	req->next = &work_list->list;
146 	req->prev->next = work_list->list.prev = req;
147 	pthread_mutex_unlock(&work_list->lock);
148 	if (empty)
149 		pthread_cond_signal(&work_list->cond);
150 }
151 
152 static int zero_time(struct timeval *t)
153 {
154 	return !(t->tv_sec || t->tv_usec);
155 }
156 
157 static float diff_us(struct timeval *end, struct timeval *start)
158 {
159 	return (end->tv_sec - start->tv_sec) * 1000000. + (end->tv_usec - start->tv_usec);
160 }
161 
162 static void show_perf(void)
163 {
164 	int c, i;
165 	float us, max[STEP_CNT], min[STEP_CNT];
166 
167 	for (i = 0; i < STEP_CNT; i++) {
168 		max[i] = 0;
169 		min[i] = 999999999.;
170 		for (c = 0; c < connections; c++) {
171 			if (!zero_time(&nodes[c].times[i][0]) &&
172 			    !zero_time(&nodes[c].times[i][1])) {
173 				us = diff_us(&nodes[c].times[i][1], &nodes[c].times[i][0]);
174 				if (us > max[i])
175 					max[i] = us;
176 				if (us < min[i])
177 					min[i] = us;
178 			}
179 		}
180 	}
181 
182 	printf("step              total ms     max ms     min us  us / conn\n");
183 	for (i = 0; i < STEP_CNT; i++) {
184 		if (i == STEP_BIND && !src_addr)
185 			continue;
186 
187 		us = diff_us(&times[i][1], &times[i][0]);
188 		printf("%-13s: %11.2f%11.2f%11.2f%11.2f\n", step_str[i], us / 1000.,
189 			max[i] / 1000., min[i], us / connections);
190 	}
191 }
192 
193 static void addr_handler(struct node *n)
194 {
195 	end_perf(n, STEP_RESOLVE_ADDR);
196 	completed[STEP_RESOLVE_ADDR]++;
197 }
198 
199 static void route_handler(struct node *n)
200 {
201 	end_perf(n, STEP_RESOLVE_ROUTE);
202 	completed[STEP_RESOLVE_ROUTE]++;
203 }
204 
205 static void conn_handler(struct node *n)
206 {
207 	end_perf(n, STEP_CONNECT);
208 	completed[STEP_CONNECT]++;
209 }
210 
211 static void disc_handler(struct node *n)
212 {
213 	end_perf(n, STEP_DISCONNECT);
214 	completed[STEP_DISCONNECT]++;
215 }
216 
217 static void __req_handler(struct rdma_cm_id *id)
218 {
219 	int ret;
220 
221 	ret = rdma_create_qp(id, NULL, &init_qp_attr);
222 	if (ret) {
223 		perror("failure creating qp");
224 		goto err;
225 	}
226 
227 	ret = rdma_accept(id, NULL);
228 	if (ret) {
229 		perror("failure accepting");
230 		goto err;
231 	}
232 	return;
233 
234 err:
235 	printf("failing connection request\n");
236 	rdma_reject(id, NULL, 0);
237 	rdma_destroy_id(id);
238 	return;
239 }
240 
241 static void *req_handler_thread(void *arg)
242 {
243 	struct list_head *work;
244 	do {
245 		pthread_mutex_lock(&req_work.lock);
246 		if (__list_empty(&req_work))
247 			pthread_cond_wait(&req_work.cond, &req_work.lock);
248 		work = __list_remove_head(&req_work);
249 		pthread_mutex_unlock(&req_work.lock);
250 		__req_handler(work->id);
251 		free(work);
252 	} while (1);
253 	return NULL;
254 }
255 
256 static void *disc_handler_thread(void *arg)
257 {
258 	struct list_head *work;
259 	do {
260 		pthread_mutex_lock(&disc_work.lock);
261 		if (__list_empty(&disc_work))
262 			pthread_cond_wait(&disc_work.cond, &disc_work.lock);
263 		work = __list_remove_head(&disc_work);
264 		pthread_mutex_unlock(&disc_work.lock);
265 		rdma_disconnect(work->id);
266 		rdma_destroy_id(work->id);
267 		free(work);
268 	} while (1);
269 	return NULL;
270 }
271 
272 static void cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
273 {
274 	struct node *n = id->context;
275 	struct list_head *request;
276 
277 	switch (event->event) {
278 	case RDMA_CM_EVENT_ADDR_RESOLVED:
279 		addr_handler(n);
280 		break;
281 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
282 		route_handler(n);
283 		break;
284 	case RDMA_CM_EVENT_CONNECT_REQUEST:
285 		request = malloc(sizeof *request);
286 		if (!request) {
287 			perror("out of memory accepting connect request");
288 			rdma_reject(id, NULL, 0);
289 			rdma_destroy_id(id);
290 		} else {
291 			INIT_LIST(request);
292 			request->id = id;
293 			list_add_tail(&req_work, request);
294 		}
295 		break;
296 	case RDMA_CM_EVENT_ESTABLISHED:
297 		if (n)
298 			conn_handler(n);
299 		break;
300 	case RDMA_CM_EVENT_ADDR_ERROR:
301 		if (n->retries--) {
302 			if (!rdma_resolve_addr(n->id, rai->ai_src_addr,
303 					       rai->ai_dst_addr, timeout))
304 				break;
305 		}
306 		printf("RDMA_CM_EVENT_ADDR_ERROR, error: %d\n", event->status);
307 		addr_handler(n);
308 		n->error = 1;
309 		break;
310 	case RDMA_CM_EVENT_ROUTE_ERROR:
311 		if (n->retries--) {
312 			if (!rdma_resolve_route(n->id, timeout))
313 				break;
314 		}
315 		printf("RDMA_CM_EVENT_ROUTE_ERROR, error: %d\n", event->status);
316 		route_handler(n);
317 		n->error = 1;
318 		break;
319 	case RDMA_CM_EVENT_CONNECT_ERROR:
320 	case RDMA_CM_EVENT_UNREACHABLE:
321 	case RDMA_CM_EVENT_REJECTED:
322 		printf("event: %s, error: %d\n",
323 		       rdma_event_str(event->event), event->status);
324 		conn_handler(n);
325 		n->error = 1;
326 		break;
327 	case RDMA_CM_EVENT_DISCONNECTED:
328 		if (!n) {
329 			request = malloc(sizeof *request);
330 			if (!request) {
331 				perror("out of memory queueing disconnect request, handling synchronously");
332 				rdma_disconnect(id);
333 				rdma_destroy_id(id);
334 			} else {
335 				INIT_LIST(request);
336 				request->id = id;
337 				list_add_tail(&disc_work, request);
338 			}
339 		} else
340 			disc_handler(n);
341 		break;
342 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
343 		/* Cleanup will occur after test completes. */
344 		break;
345 	default:
346 		break;
347 	}
348 	rdma_ack_cm_event(event);
349 }
350 
351 static int alloc_nodes(void)
352 {
353 	int ret, i;
354 
355 	nodes = calloc(sizeof *nodes, connections);
356 	if (!nodes)
357 		return -ENOMEM;
358 
359 	printf("creating id\n");
360 	start_time(STEP_CREATE_ID);
361 	for (i = 0; i < connections; i++) {
362 		start_perf(&nodes[i], STEP_CREATE_ID);
363 		if (dst_addr) {
364 			ret = rdma_create_id(channel, &nodes[i].id, &nodes[i],
365 					     hints.ai_port_space);
366 			if (ret)
367 				goto err;
368 		}
369 		end_perf(&nodes[i], STEP_CREATE_ID);
370 	}
371 	end_time(STEP_CREATE_ID);
372 	return 0;
373 
374 err:
375 	while (--i >= 0)
376 		rdma_destroy_id(nodes[i].id);
377 	free(nodes);
378 	return ret;
379 }
380 
381 static void cleanup_nodes(void)
382 {
383 	int i;
384 
385 	printf("destroying id\n");
386 	start_time(STEP_DESTROY);
387 	for (i = 0; i < connections; i++) {
388 		start_perf(&nodes[i], STEP_DESTROY);
389 		if (nodes[i].id)
390 			rdma_destroy_id(nodes[i].id);
391 		end_perf(&nodes[i], STEP_DESTROY);
392 	}
393 	end_time(STEP_DESTROY);
394 }
395 
396 static void *process_events(void *arg)
397 {
398 	struct rdma_cm_event *event;
399 	int ret = 0;
400 
401 	while (!ret) {
402 		ret = rdma_get_cm_event(channel, &event);
403 		if (!ret) {
404 			cma_handler(event->id, event);
405 		} else {
406 			perror("failure in rdma_get_cm_event in process_server_events");
407 			ret = errno;
408 		}
409 	}
410 	return NULL;
411 }
412 
413 static int run_server(void)
414 {
415 	pthread_t req_thread, disc_thread;
416 	struct rdma_cm_id *listen_id;
417 	int ret;
418 
419 	INIT_LIST(&req_work.list);
420 	INIT_LIST(&disc_work.list);
421 	ret = pthread_mutex_init(&req_work.lock, NULL);
422 	if (ret) {
423 		perror("initializing mutex for req work");
424 		return ret;
425 	}
426 
427 	ret = pthread_mutex_init(&disc_work.lock, NULL);
428 	if (ret) {
429 		perror("initializing mutex for disc work");
430 		return ret;
431 	}
432 
433 	ret = pthread_cond_init(&req_work.cond, NULL);
434 	if (ret) {
435 		perror("initializing cond for req work");
436 		return ret;
437 	}
438 
439 	ret = pthread_cond_init(&disc_work.cond, NULL);
440 	if (ret) {
441 		perror("initializing cond for disc work");
442 		return ret;
443 	}
444 
445 	ret = pthread_create(&req_thread, NULL, req_handler_thread, NULL);
446 	if (ret) {
447 		perror("failed to create req handler thread");
448 		return ret;
449 	}
450 
451 	ret = pthread_create(&disc_thread, NULL, disc_handler_thread, NULL);
452 	if (ret) {
453 		perror("failed to create disconnect handler thread");
454 		return ret;
455 	}
456 
457 	ret = rdma_create_id(channel, &listen_id, NULL, hints.ai_port_space);
458 	if (ret) {
459 		perror("listen request failed");
460 		return ret;
461 	}
462 
463 	ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
464 	if (ret) {
465 		printf("getrdmaaddr error: %s\n", gai_strerror(ret));
466 		goto out;
467 	}
468 
469 	ret = rdma_bind_addr(listen_id, rai->ai_src_addr);
470 	if (ret) {
471 		perror("bind address failed");
472 		goto out;
473 	}
474 
475 	ret = rdma_listen(listen_id, 0);
476 	if (ret) {
477 		perror("failure trying to listen");
478 		goto out;
479 	}
480 
481 	process_events(NULL);
482  out:
483 	rdma_destroy_id(listen_id);
484 	return ret;
485 }
486 
487 static int run_client(void)
488 {
489 	pthread_t event_thread;
490 	int i, ret;
491 
492 	ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
493 	if (ret) {
494 		printf("getaddrinfo error: %s\n", gai_strerror(ret));
495 		return ret;
496 	}
497 
498 	conn_param.responder_resources = 1;
499 	conn_param.initiator_depth = 1;
500 	conn_param.retry_count = retries;
501 	conn_param.private_data = rai->ai_connect;
502 	conn_param.private_data_len = rai->ai_connect_len;
503 
504 	ret = pthread_create(&event_thread, NULL, process_events, NULL);
505 	if (ret) {
506 		perror("failure creating event thread");
507 		return ret;
508 	}
509 
510 	if (src_addr) {
511 		printf("binding source address\n");
512 		start_time(STEP_BIND);
513 		for (i = 0; i < connections; i++) {
514 			start_perf(&nodes[i], STEP_BIND);
515 			ret = rdma_bind_addr(nodes[i].id, rai->ai_src_addr);
516 			if (ret) {
517 				perror("failure bind addr");
518 				nodes[i].error = 1;
519 				continue;
520 			}
521 			end_perf(&nodes[i], STEP_BIND);
522 		}
523 		end_time(STEP_BIND);
524 	}
525 
526 	printf("resolving address\n");
527 	start_time(STEP_RESOLVE_ADDR);
528 	for (i = 0; i < connections; i++) {
529 		if (nodes[i].error)
530 			continue;
531 		nodes[i].retries = retries;
532 		start_perf(&nodes[i], STEP_RESOLVE_ADDR);
533 		ret = rdma_resolve_addr(nodes[i].id, rai->ai_src_addr,
534 					rai->ai_dst_addr, timeout);
535 		if (ret) {
536 			perror("failure getting addr");
537 			nodes[i].error = 1;
538 			continue;
539 		}
540 		started[STEP_RESOLVE_ADDR]++;
541 	}
542 	while (started[STEP_RESOLVE_ADDR] != completed[STEP_RESOLVE_ADDR]) sched_yield();
543 	end_time(STEP_RESOLVE_ADDR);
544 
545 	printf("resolving route\n");
546 	start_time(STEP_RESOLVE_ROUTE);
547 	for (i = 0; i < connections; i++) {
548 		if (nodes[i].error)
549 			continue;
550 		nodes[i].retries = retries;
551 		start_perf(&nodes[i], STEP_RESOLVE_ROUTE);
552 		ret = rdma_resolve_route(nodes[i].id, timeout);
553 		if (ret) {
554 			perror("failure resolving route");
555 			nodes[i].error = 1;
556 			continue;
557 		}
558 		started[STEP_RESOLVE_ROUTE]++;
559 	}
560 	while (started[STEP_RESOLVE_ROUTE] != completed[STEP_RESOLVE_ROUTE]) sched_yield();
561 	end_time(STEP_RESOLVE_ROUTE);
562 
563 	printf("creating qp\n");
564 	start_time(STEP_CREATE_QP);
565 	for (i = 0; i < connections; i++) {
566 		if (nodes[i].error)
567 			continue;
568 		start_perf(&nodes[i], STEP_CREATE_QP);
569 		ret = rdma_create_qp(nodes[i].id, NULL, &init_qp_attr);
570 		if (ret) {
571 			perror("failure creating qp");
572 			nodes[i].error = 1;
573 			continue;
574 		}
575 		end_perf(&nodes[i], STEP_CREATE_QP);
576 	}
577 	end_time(STEP_CREATE_QP);
578 
579 	printf("connecting\n");
580 	start_time(STEP_CONNECT);
581 	for (i = 0; i < connections; i++) {
582 		if (nodes[i].error)
583 			continue;
584 		start_perf(&nodes[i], STEP_CONNECT);
585 		ret = rdma_connect(nodes[i].id, &conn_param);
586 		if (ret) {
587 			perror("failure rconnecting");
588 			nodes[i].error = 1;
589 			continue;
590 		}
591 		started[STEP_CONNECT]++;
592 	}
593 	while (started[STEP_CONNECT] != completed[STEP_CONNECT]) sched_yield();
594 	end_time(STEP_CONNECT);
595 
596 	printf("disconnecting\n");
597 	start_time(STEP_DISCONNECT);
598 	for (i = 0; i < connections; i++) {
599 		if (nodes[i].error)
600 			continue;
601 		start_perf(&nodes[i], STEP_DISCONNECT);
602 		rdma_disconnect(nodes[i].id);
603 		started[STEP_DISCONNECT]++;
604 	}
605 	while (started[STEP_DISCONNECT] != completed[STEP_DISCONNECT]) sched_yield();
606 	end_time(STEP_DISCONNECT);
607 
608 	return ret;
609 }
610 
611 int main(int argc, char **argv)
612 {
613 	int op, ret;
614 
615 	hints.ai_port_space = RDMA_PS_TCP;
616 	hints.ai_qp_type = IBV_QPT_RC;
617 	while ((op = getopt(argc, argv, "s:b:c:p:r:t:")) != -1) {
618 		switch (op) {
619 		case 's':
620 			dst_addr = optarg;
621 			break;
622 		case 'b':
623 			src_addr = optarg;
624 			break;
625 		case 'c':
626 			connections = atoi(optarg);
627 			break;
628 		case 'p':
629 			port = optarg;
630 			break;
631 		case 'r':
632 			retries = atoi(optarg);
633 			break;
634 		case 't':
635 			timeout = atoi(optarg);
636 			break;
637 		default:
638 			printf("usage: %s\n", argv[0]);
639 			printf("\t[-s server_address]\n");
640 			printf("\t[-b bind_address]\n");
641 			printf("\t[-c connections]\n");
642 			printf("\t[-p port_number]\n");
643 			printf("\t[-r retries]\n");
644 			printf("\t[-t timeout_ms]\n");
645 			exit(1);
646 		}
647 	}
648 
649 	init_qp_attr.cap.max_send_wr = 1;
650 	init_qp_attr.cap.max_recv_wr = 1;
651 	init_qp_attr.cap.max_send_sge = 1;
652 	init_qp_attr.cap.max_recv_sge = 1;
653 	init_qp_attr.qp_type = IBV_QPT_RC;
654 
655 	channel = rdma_create_event_channel();
656 	if (!channel) {
657 		printf("failed to create event channel\n");
658 		exit(1);
659 	}
660 
661 	if (dst_addr) {
662 		alloc_nodes();
663 		ret = run_client();
664 	} else {
665 		hints.ai_flags |= RAI_PASSIVE;
666 		ret = run_server();
667 	}
668 
669 	cleanup_nodes();
670 	rdma_destroy_event_channel(channel);
671 	if (rai)
672 		rdma_freeaddrinfo(rai);
673 
674 	show_perf();
675 	free(nodes);
676 	return ret;
677 }
678