1 /*
2  * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $Id$
33  */
34 
35 #include <stdlib.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <errno.h>
39 #include <sys/types.h>
40 #include <arpa/inet.h>
41 #include <sys/socket.h>
42 #include <netdb.h>
43 #include <unistd.h>
44 #include <getopt.h>
45 
46 #include <rdma/rdma_cma.h>
47 #include <infiniband/ib.h>
48 
49 struct cmatest_node {
50 	int			id;
51 	struct rdma_cm_id	*cma_id;
52 	int			connected;
53 	struct ibv_pd		*pd;
54 	struct ibv_cq		*cq;
55 	struct ibv_mr		*mr;
56 	struct ibv_ah		*ah;
57 	uint32_t		remote_qpn;
58 	uint32_t		remote_qkey;
59 	void			*mem;
60 };
61 
62 struct cmatest {
63 	struct rdma_event_channel *channel;
64 	pthread_t 		cmathread;
65 	struct cmatest_node	*nodes;
66 	int			conn_index;
67 	int			connects_left;
68 
69 	struct sockaddr_storage	dst_in;
70 	struct sockaddr		*dst_addr;
71 	struct sockaddr_storage	src_in;
72 	struct sockaddr		*src_addr;
73 };
74 
75 static struct cmatest test;
76 static int connections = 1;
77 static int message_size = 100;
78 static int message_count = 10;
79 static int is_sender;
80 static int unmapped_addr;
81 static char *dst_addr;
82 static char *src_addr;
83 static enum rdma_port_space port_space = RDMA_PS_UDP;
84 
85 static int create_message(struct cmatest_node *node)
86 {
87 	if (!message_size)
88 		message_count = 0;
89 
90 	if (!message_count)
91 		return 0;
92 
93 	node->mem = malloc(message_size + sizeof(struct ibv_grh));
94 	if (!node->mem) {
95 		printf("failed message allocation\n");
96 		return -1;
97 	}
98 	node->mr = ibv_reg_mr(node->pd, node->mem,
99 			      message_size + sizeof(struct ibv_grh),
100 			      IBV_ACCESS_LOCAL_WRITE);
101 	if (!node->mr) {
102 		printf("failed to reg MR\n");
103 		goto err;
104 	}
105 	return 0;
106 err:
107 	free(node->mem);
108 	return -1;
109 }
110 
111 static int verify_test_params(struct cmatest_node *node)
112 {
113 	struct ibv_port_attr port_attr;
114 	int ret;
115 
116 	ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num,
117 			     &port_attr);
118 	if (ret)
119 		return ret;
120 
121 	if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) {
122 		printf("mckey: message_size %d is larger than active mtu %d\n",
123 		       message_size, 1 << (port_attr.active_mtu + 7));
124 		return -EINVAL;
125 	}
126 
127 	return 0;
128 }
129 
130 static int init_node(struct cmatest_node *node)
131 {
132 	struct ibv_qp_init_attr init_qp_attr;
133 	int cqe, ret;
134 
135 	node->pd = ibv_alloc_pd(node->cma_id->verbs);
136 	if (!node->pd) {
137 		ret = -ENOMEM;
138 		printf("mckey: unable to allocate PD\n");
139 		goto out;
140 	}
141 
142 	cqe = message_count ? message_count * 2 : 2;
143 	node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0);
144 	if (!node->cq) {
145 		ret = -ENOMEM;
146 		printf("mckey: unable to create CQ\n");
147 		goto out;
148 	}
149 
150 	memset(&init_qp_attr, 0, sizeof init_qp_attr);
151 	init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
152 	init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
153 	init_qp_attr.cap.max_send_sge = 1;
154 	init_qp_attr.cap.max_recv_sge = 1;
155 	init_qp_attr.qp_context = node;
156 	init_qp_attr.sq_sig_all = 0;
157 	init_qp_attr.qp_type = IBV_QPT_UD;
158 	init_qp_attr.send_cq = node->cq;
159 	init_qp_attr.recv_cq = node->cq;
160 	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
161 	if (ret) {
162 		perror("mckey: unable to create QP");
163 		goto out;
164 	}
165 
166 	ret = create_message(node);
167 	if (ret) {
168 		printf("mckey: failed to create messages: %d\n", ret);
169 		goto out;
170 	}
171 out:
172 	return ret;
173 }
174 
175 static int post_recvs(struct cmatest_node *node)
176 {
177 	struct ibv_recv_wr recv_wr, *recv_failure;
178 	struct ibv_sge sge;
179 	int i, ret = 0;
180 
181 	if (!message_count)
182 		return 0;
183 
184 	recv_wr.next = NULL;
185 	recv_wr.sg_list = &sge;
186 	recv_wr.num_sge = 1;
187 	recv_wr.wr_id = (uintptr_t) node;
188 
189 	sge.length = message_size + sizeof(struct ibv_grh);
190 	sge.lkey = node->mr->lkey;
191 	sge.addr = (uintptr_t) node->mem;
192 
193 	for (i = 0; i < message_count && !ret; i++ ) {
194 		ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
195 		if (ret) {
196 			printf("failed to post receives: %d\n", ret);
197 			break;
198 		}
199 	}
200 	return ret;
201 }
202 
203 static int post_sends(struct cmatest_node *node, int signal_flag)
204 {
205 	struct ibv_send_wr send_wr, *bad_send_wr;
206 	struct ibv_sge sge;
207 	int i, ret = 0;
208 
209 	if (!node->connected || !message_count)
210 		return 0;
211 
212 	send_wr.next = NULL;
213 	send_wr.sg_list = &sge;
214 	send_wr.num_sge = 1;
215 	send_wr.opcode = IBV_WR_SEND_WITH_IMM;
216 	send_wr.send_flags = signal_flag;
217 	send_wr.wr_id = (unsigned long)node;
218 	send_wr.imm_data = htobe32(node->cma_id->qp->qp_num);
219 
220 	send_wr.wr.ud.ah = node->ah;
221 	send_wr.wr.ud.remote_qpn = node->remote_qpn;
222 	send_wr.wr.ud.remote_qkey = node->remote_qkey;
223 
224 	sge.length = message_size;
225 	sge.lkey = node->mr->lkey;
226 	sge.addr = (uintptr_t) node->mem;
227 
228 	for (i = 0; i < message_count && !ret; i++) {
229 		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
230 		if (ret)
231 			printf("failed to post sends: %d\n", ret);
232 	}
233 	return ret;
234 }
235 
236 static void connect_error(void)
237 {
238 	test.connects_left--;
239 }
240 
241 static int addr_handler(struct cmatest_node *node)
242 {
243 	int ret;
244 
245 	ret = verify_test_params(node);
246 	if (ret)
247 		goto err;
248 
249 	ret = init_node(node);
250 	if (ret)
251 		goto err;
252 
253 	if (!is_sender) {
254 		ret = post_recvs(node);
255 		if (ret)
256 			goto err;
257 	}
258 
259 	ret = rdma_join_multicast(node->cma_id, test.dst_addr, node);
260 	if (ret) {
261 		perror("mckey: failure joining");
262 		goto err;
263 	}
264 	return 0;
265 err:
266 	connect_error();
267 	return ret;
268 }
269 
270 static int join_handler(struct cmatest_node *node,
271 			struct rdma_ud_param *param)
272 {
273 	char buf[40];
274 
275 	inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40);
276 	printf("mckey: joined dgid: %s mlid 0x%x sl %d\n", buf,
277 		param->ah_attr.dlid, param->ah_attr.sl);
278 
279 	node->remote_qpn = param->qp_num;
280 	node->remote_qkey = param->qkey;
281 	node->ah = ibv_create_ah(node->pd, &param->ah_attr);
282 	if (!node->ah) {
283 		printf("mckey: failure creating address handle\n");
284 		goto err;
285 	}
286 
287 	node->connected = 1;
288 	test.connects_left--;
289 	return 0;
290 err:
291 	connect_error();
292 	return -1;
293 }
294 
295 static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
296 {
297 	int ret = 0;
298 
299 	switch (event->event) {
300 	case RDMA_CM_EVENT_ADDR_RESOLVED:
301 		ret = addr_handler(cma_id->context);
302 		break;
303 	case RDMA_CM_EVENT_MULTICAST_JOIN:
304 		ret = join_handler(cma_id->context, &event->param.ud);
305 		break;
306 	case RDMA_CM_EVENT_ADDR_ERROR:
307 	case RDMA_CM_EVENT_ROUTE_ERROR:
308 	case RDMA_CM_EVENT_MULTICAST_ERROR:
309 		printf("mckey: event: %s, error: %d\n",
310 		       rdma_event_str(event->event), event->status);
311 		connect_error();
312 		ret = event->status;
313 		break;
314 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
315 		/* Cleanup will occur after test completes. */
316 		break;
317 	default:
318 		break;
319 	}
320 	return ret;
321 }
322 
323 static void *cma_thread(void *arg)
324 {
325 	struct rdma_cm_event *event;
326 	int ret;
327 
328 	while (1) {
329 		ret = rdma_get_cm_event(test.channel, &event);
330 		if (ret) {
331 			perror("rdma_get_cm_event");
332 			break;
333 		}
334 
335 		switch (event->event) {
336 		case RDMA_CM_EVENT_MULTICAST_ERROR:
337 		case RDMA_CM_EVENT_ADDR_CHANGE:
338 			printf("mckey: event: %s, status: %d\n",
339 			       rdma_event_str(event->event), event->status);
340 			break;
341 		default:
342 			break;
343 		}
344 
345 		rdma_ack_cm_event(event);
346 	}
347 	return NULL;
348 }
349 
350 static void destroy_node(struct cmatest_node *node)
351 {
352 	if (!node->cma_id)
353 		return;
354 
355 	if (node->ah)
356 		ibv_destroy_ah(node->ah);
357 
358 	if (node->cma_id->qp)
359 		rdma_destroy_qp(node->cma_id);
360 
361 	if (node->cq)
362 		ibv_destroy_cq(node->cq);
363 
364 	if (node->mem) {
365 		ibv_dereg_mr(node->mr);
366 		free(node->mem);
367 	}
368 
369 	if (node->pd)
370 		ibv_dealloc_pd(node->pd);
371 
372 	/* Destroy the RDMA ID after all device resources */
373 	rdma_destroy_id(node->cma_id);
374 }
375 
376 static int alloc_nodes(void)
377 {
378 	int ret, i;
379 
380 	test.nodes = malloc(sizeof *test.nodes * connections);
381 	if (!test.nodes) {
382 		printf("mckey: unable to allocate memory for test nodes\n");
383 		return -ENOMEM;
384 	}
385 	memset(test.nodes, 0, sizeof *test.nodes * connections);
386 
387 	for (i = 0; i < connections; i++) {
388 		test.nodes[i].id = i;
389 		ret = rdma_create_id(test.channel, &test.nodes[i].cma_id,
390 				     &test.nodes[i], port_space);
391 		if (ret)
392 			goto err;
393 	}
394 	return 0;
395 err:
396 	while (--i >= 0)
397 		rdma_destroy_id(test.nodes[i].cma_id);
398 	free(test.nodes);
399 	return ret;
400 }
401 
402 static void destroy_nodes(void)
403 {
404 	int i;
405 
406 	for (i = 0; i < connections; i++)
407 		destroy_node(&test.nodes[i]);
408 	free(test.nodes);
409 }
410 
411 static int poll_cqs(void)
412 {
413 	struct ibv_wc wc[8];
414 	int done, i, ret;
415 
416 	for (i = 0; i < connections; i++) {
417 		if (!test.nodes[i].connected)
418 			continue;
419 
420 		for (done = 0; done < message_count; done += ret) {
421 			ret = ibv_poll_cq(test.nodes[i].cq, 8, wc);
422 			if (ret < 0) {
423 				printf("mckey: failed polling CQ: %d\n", ret);
424 				return ret;
425 			}
426 		}
427 	}
428 	return 0;
429 }
430 
431 static int connect_events(void)
432 {
433 	struct rdma_cm_event *event;
434 	int ret = 0;
435 
436 	while (test.connects_left && !ret) {
437 		ret = rdma_get_cm_event(test.channel, &event);
438 		if (!ret) {
439 			ret = cma_handler(event->id, event);
440 			rdma_ack_cm_event(event);
441 		}
442 	}
443 	return ret;
444 }
445 
446 static int get_addr(char *dst, struct sockaddr *addr)
447 {
448 	struct addrinfo *res;
449 	int ret;
450 
451 	ret = getaddrinfo(dst, NULL, NULL, &res);
452 	if (ret) {
453 		printf("getaddrinfo failed (%s) - invalid hostname or IP address\n", gai_strerror(ret));
454 		return ret;
455 	}
456 
457 	memcpy(addr, res->ai_addr, res->ai_addrlen);
458 	freeaddrinfo(res);
459 	return ret;
460 }
461 
462 static int get_dst_addr(char *dst, struct sockaddr *addr)
463 {
464 	struct sockaddr_ib *sib;
465 
466 	if (!unmapped_addr)
467 		return get_addr(dst, addr);
468 
469 	sib = (struct sockaddr_ib *) addr;
470 	memset(sib, 0, sizeof *sib);
471 	sib->sib_family = AF_IB;
472 	inet_pton(AF_INET6, dst, &sib->sib_addr);
473 	return 0;
474 }
475 
476 static int run(void)
477 {
478 	int i, ret, err;
479 
480 	printf("mckey: starting %s\n", is_sender ? "client" : "server");
481 	if (src_addr) {
482 		ret = get_addr(src_addr, (struct sockaddr *) &test.src_in);
483 		if (ret)
484 			return ret;
485 	}
486 
487 	ret = get_dst_addr(dst_addr, (struct sockaddr *) &test.dst_in);
488 	if (ret)
489 		return ret;
490 
491 	printf("mckey: joining\n");
492 	for (i = 0; i < connections; i++) {
493 		if (src_addr) {
494 			ret = rdma_bind_addr(test.nodes[i].cma_id,
495 					     test.src_addr);
496 			if (ret) {
497 				perror("mckey: addr bind failure");
498 				connect_error();
499 				return ret;
500 			}
501 		}
502 
503 		if (unmapped_addr)
504 			ret = addr_handler(&test.nodes[i]);
505 		else
506 			ret = rdma_resolve_addr(test.nodes[i].cma_id,
507 						test.src_addr, test.dst_addr,
508 						2000);
509 		if (ret) {
510 			perror("mckey: resolve addr failure");
511 			connect_error();
512 			return ret;
513 		}
514 	}
515 
516 	ret = connect_events();
517 	if (ret)
518 		goto out;
519 
520 	pthread_create(&test.cmathread, NULL, cma_thread, NULL);
521 
522 	/*
523 	 * Pause to give SM chance to configure switches.  We don't want to
524 	 * handle reliability issue in this simple test program.
525 	 */
526 	sleep(3);
527 
528 	if (message_count) {
529 		if (is_sender) {
530 			printf("initiating data transfers\n");
531 			for (i = 0; i < connections; i++) {
532 				ret = post_sends(&test.nodes[i], 0);
533 				if (ret)
534 					goto out;
535 			}
536 		} else {
537 			printf("receiving data transfers\n");
538 			ret = poll_cqs();
539 			if (ret)
540 				goto out;
541 		}
542 		printf("data transfers complete\n");
543 	}
544 out:
545 	for (i = 0; i < connections; i++) {
546 		err = rdma_leave_multicast(test.nodes[i].cma_id,
547 					   test.dst_addr);
548 		if (err) {
549 			perror("mckey: failure leaving");
550 			ret = err;
551 		}
552 	}
553 	return ret;
554 }
555 
556 int main(int argc, char **argv)
557 {
558 	int op, ret;
559 
560 
561 	while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1) {
562 		switch (op) {
563 		case 'm':
564 			dst_addr = optarg;
565 			break;
566 		case 'M':
567 			unmapped_addr = 1;
568 			dst_addr = optarg;
569 			break;
570 		case 's':
571 			is_sender = 1;
572 			break;
573 		case 'b':
574 			src_addr = optarg;
575 			test.src_addr = (struct sockaddr *) &test.src_in;
576 			break;
577 		case 'c':
578 			connections = atoi(optarg);
579 			break;
580 		case 'C':
581 			message_count = atoi(optarg);
582 			break;
583 		case 'S':
584 			message_size = atoi(optarg);
585 			break;
586 		case 'p':
587 			port_space = strtol(optarg, NULL, 0);
588 			break;
589 		default:
590 			printf("usage: %s\n", argv[0]);
591 			printf("\t-m multicast_address\n");
592 			printf("\t[-M unmapped_multicast_address]\n"
593 			       "\t replaces -m and requires -b\n");
594 			printf("\t[-s(ender)]\n");
595 			printf("\t[-b bind_address]\n");
596 			printf("\t[-c connections]\n");
597 			printf("\t[-C message_count]\n");
598 			printf("\t[-S message_size]\n");
599 			printf("\t[-p port_space - %#x for UDP (default), "
600 			       "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB);
601 			exit(1);
602 		}
603 	}
604 
605 	if (unmapped_addr && !src_addr) {
606 		printf("unmapped multicast address requires binding "
607 			"to source address\n");
608 		exit(1);
609 	}
610 
611 	test.dst_addr = (struct sockaddr *) &test.dst_in;
612 	test.connects_left = connections;
613 
614 	test.channel = rdma_create_event_channel();
615 	if (!test.channel) {
616 		perror("failed to create event channel");
617 		exit(1);
618 	}
619 
620 	if (alloc_nodes())
621 		exit(1);
622 
623 	ret = run();
624 
625 	printf("test complete\n");
626 	destroy_nodes();
627 	rdma_destroy_event_channel(test.channel);
628 
629 	printf("return status %d\n", ret);
630 	return ret;
631 }
632