1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #if HAVE_CONFIG_H
34 #  include <config.h>
35 #endif /* HAVE_CONFIG_H */
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <sys/time.h>
44 #include <netdb.h>
45 #include <getopt.h>
46 #include <arpa/inet.h>
47 #include <time.h>
48 
49 #include "pingpong.h"
50 
51 enum {
52 	PINGPONG_RECV_WRID = 1,
53 	PINGPONG_SEND_WRID = 2,
54 
55 	MAX_QP             = 256,
56 };
57 
58 static int page_size;
59 
60 struct pingpong_context {
61 	struct ibv_context	*context;
62 	struct ibv_comp_channel *channel;
63 	struct ibv_pd		*pd;
64 	struct ibv_mr		*mr;
65 	struct ibv_cq		*cq;
66 	struct ibv_srq		*srq;
67 	struct ibv_qp		*qp[MAX_QP];
68 	void			*buf;
69 	int			 size;
70 	int			 num_qp;
71 	int			 rx_depth;
72 	int			 pending[MAX_QP];
73 	struct ibv_port_attr	 portinfo;
74 };
75 
76 struct pingpong_dest {
77 	int lid;
78 	int qpn;
79 	int psn;
80 	union ibv_gid gid;
81 };
82 
83 static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84 			  int sl, const struct pingpong_dest *my_dest,
85 			  const struct pingpong_dest *dest, int sgid_idx)
86 {
87 	int i;
88 
89 	for (i = 0; i < ctx->num_qp; ++i) {
90 		struct ibv_qp_attr attr = {
91 			.qp_state		= IBV_QPS_RTR,
92 			.path_mtu		= mtu,
93 			.dest_qp_num		= dest[i].qpn,
94 			.rq_psn			= dest[i].psn,
95 			.max_dest_rd_atomic	= 1,
96 			.min_rnr_timer		= 12,
97 			.ah_attr		= {
98 				.is_global	= 0,
99 				.dlid		= dest[i].lid,
100 				.sl		= sl,
101 				.src_path_bits	= 0,
102 				.port_num	= port
103 			}
104 		};
105 
106 		if (dest->gid.global.interface_id) {
107 			attr.ah_attr.is_global = 1;
108 			attr.ah_attr.grh.hop_limit = 1;
109 			attr.ah_attr.grh.dgid = dest->gid;
110 			attr.ah_attr.grh.sgid_index = sgid_idx;
111 		}
112 		if (ibv_modify_qp(ctx->qp[i], &attr,
113 				  IBV_QP_STATE              |
114 				  IBV_QP_AV                 |
115 				  IBV_QP_PATH_MTU           |
116 				  IBV_QP_DEST_QPN           |
117 				  IBV_QP_RQ_PSN             |
118 				  IBV_QP_MAX_DEST_RD_ATOMIC |
119 				  IBV_QP_MIN_RNR_TIMER)) {
120 			fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
121 			return 1;
122 		}
123 
124 		attr.qp_state	    = IBV_QPS_RTS;
125 		attr.timeout	    = 14;
126 		attr.retry_cnt	    = 7;
127 		attr.rnr_retry	    = 7;
128 		attr.sq_psn	    = my_dest[i].psn;
129 		attr.max_rd_atomic  = 1;
130 		if (ibv_modify_qp(ctx->qp[i], &attr,
131 				  IBV_QP_STATE              |
132 				  IBV_QP_TIMEOUT            |
133 				  IBV_QP_RETRY_CNT          |
134 				  IBV_QP_RNR_RETRY          |
135 				  IBV_QP_SQ_PSN             |
136 				  IBV_QP_MAX_QP_RD_ATOMIC)) {
137 			fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
138 			return 1;
139 		}
140 	}
141 
142 	return 0;
143 }
144 
145 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146 						 const struct pingpong_dest *my_dest)
147 {
148 	struct addrinfo *res, *t;
149 	struct addrinfo hints = {
150 		.ai_family   = AF_INET,
151 		.ai_socktype = SOCK_STREAM
152 	};
153 	char *service;
154 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
155 	int n;
156 	int r;
157 	int i;
158 	int sockfd = -1;
159 	struct pingpong_dest *rem_dest = NULL;
160 	char gid[33];
161 
162 	if (asprintf(&service, "%d", port) < 0)
163 		return NULL;
164 
165 	n = getaddrinfo(servername, service, &hints, &res);
166 
167 	if (n < 0) {
168 		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
169 		free(service);
170 		return NULL;
171 	}
172 
173 	for (t = res; t; t = t->ai_next) {
174 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
175 		if (sockfd >= 0) {
176 			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
177 				break;
178 			close(sockfd);
179 			sockfd = -1;
180 		}
181 	}
182 
183 	freeaddrinfo(res);
184 	free(service);
185 
186 	if (sockfd < 0) {
187 		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
188 		return NULL;
189 	}
190 
191 	for (i = 0; i < MAX_QP; ++i) {
192 		gid_to_wire_gid(&my_dest[i].gid, gid);
193 		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
194 		if (write(sockfd, msg, sizeof msg) != sizeof msg) {
195 			fprintf(stderr, "Couldn't send local address\n");
196 			goto out;
197 		}
198 	}
199 
200 	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
201 	if (!rem_dest)
202 		goto out;
203 
204 	for (i = 0; i < MAX_QP; ++i) {
205 		n = 0;
206 		while (n < sizeof msg) {
207 			r = read(sockfd, msg + n, sizeof msg - n);
208 			if (r < 0) {
209 				perror("client read");
210 				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
211 					n, (int) sizeof msg, i);
212 				goto out;
213 			}
214 			n += r;
215 		}
216 
217 		sscanf(msg, "%x:%x:%x:%s",
218 		       &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid);
219 		wire_gid_to_gid(gid, &rem_dest[i].gid);
220 	}
221 
222 	write(sockfd, "done", sizeof "done");
223 
224 out:
225 	close(sockfd);
226 	return rem_dest;
227 }
228 
229 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
230 						 int ib_port, enum ibv_mtu mtu,
231 						 int port, int sl,
232 						 const struct pingpong_dest *my_dest,
233 						 int sgid_idx)
234 {
235 	struct addrinfo *res, *t;
236 	struct addrinfo hints = {
237 		.ai_flags    = AI_PASSIVE,
238 		.ai_family   = AF_INET,
239 		.ai_socktype = SOCK_STREAM
240 	};
241 	char *service;
242 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
243 	int n;
244 	int r;
245 	int i;
246 	int sockfd = -1, connfd;
247 	struct pingpong_dest *rem_dest = NULL;
248 	char gid[33];
249 
250 	if (asprintf(&service, "%d", port) < 0)
251 		return NULL;
252 
253 	n = getaddrinfo(NULL, service, &hints, &res);
254 
255 	if (n < 0) {
256 		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
257 		free(service);
258 		return NULL;
259 	}
260 
261 	for (t = res; t; t = t->ai_next) {
262 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
263 		if (sockfd >= 0) {
264 			n = 1;
265 
266 			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
267 
268 			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
269 				break;
270 			close(sockfd);
271 			sockfd = -1;
272 		}
273 	}
274 
275 	freeaddrinfo(res);
276 	free(service);
277 
278 	if (sockfd < 0) {
279 		fprintf(stderr, "Couldn't listen to port %d\n", port);
280 		return NULL;
281 	}
282 
283 	listen(sockfd, 1);
284 	connfd = accept(sockfd, NULL, 0);
285 	close(sockfd);
286 	if (connfd < 0) {
287 		fprintf(stderr, "accept() failed\n");
288 		return NULL;
289 	}
290 
291 	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
292 	if (!rem_dest)
293 		goto out;
294 
295 	for (i = 0; i < MAX_QP; ++i) {
296 		n = 0;
297 		while (n < sizeof msg) {
298 			r = read(connfd, msg + n, sizeof msg - n);
299 			if (r < 0) {
300 				perror("server read");
301 				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
302 					n, (int) sizeof msg, i);
303 				goto out;
304 			}
305 			n += r;
306 		}
307 
308 		sscanf(msg, "%x:%x:%x:%s",
309 		       &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid);
310 		wire_gid_to_gid(gid, &rem_dest[i].gid);
311 	}
312 
313 	if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, sgid_idx)) {
314 		fprintf(stderr, "Couldn't connect to remote QP\n");
315 		free(rem_dest);
316 		rem_dest = NULL;
317 		goto out;
318 	}
319 
320 	for (i = 0; i < MAX_QP; ++i) {
321 		gid_to_wire_gid(&my_dest[i].gid, gid);
322 		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
323 		if (write(connfd, msg, sizeof msg) != sizeof msg) {
324 			fprintf(stderr, "Couldn't send local address\n");
325 			free(rem_dest);
326 			rem_dest = NULL;
327 			goto out;
328 		}
329 	}
330 
331 	read(connfd, msg, sizeof msg);
332 
333 out:
334 	close(connfd);
335 	return rem_dest;
336 }
337 
338 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
339 					    int num_qp, int rx_depth, int port,
340 					    int use_event)
341 {
342 	struct pingpong_context *ctx;
343 	int i;
344 
345 	ctx = calloc(1, sizeof *ctx);
346 	if (!ctx)
347 		return NULL;
348 
349 	ctx->size     = size;
350 	ctx->num_qp   = num_qp;
351 	ctx->rx_depth = rx_depth;
352 
353 	ctx->buf = malloc(roundup(size, page_size));
354 	if (!ctx->buf) {
355 		fprintf(stderr, "Couldn't allocate work buf.\n");
356 		return NULL;
357 	}
358 
359 	memset(ctx->buf, 0, size);
360 
361 	ctx->context = ibv_open_device(ib_dev);
362 	if (!ctx->context) {
363 		fprintf(stderr, "Couldn't get context for %s\n",
364 			ibv_get_device_name(ib_dev));
365 		return NULL;
366 	}
367 
368 	if (use_event) {
369 		ctx->channel = ibv_create_comp_channel(ctx->context);
370 		if (!ctx->channel) {
371 			fprintf(stderr, "Couldn't create completion channel\n");
372 			return NULL;
373 		}
374 	} else
375 		ctx->channel = NULL;
376 
377 	ctx->pd = ibv_alloc_pd(ctx->context);
378 	if (!ctx->pd) {
379 		fprintf(stderr, "Couldn't allocate PD\n");
380 		return NULL;
381 	}
382 
383 	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
384 	if (!ctx->mr) {
385 		fprintf(stderr, "Couldn't register MR\n");
386 		return NULL;
387 	}
388 
389 	ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
390 				ctx->channel, 0);
391 	if (!ctx->cq) {
392 		fprintf(stderr, "Couldn't create CQ\n");
393 		return NULL;
394 	}
395 
396 	{
397 		struct ibv_srq_init_attr attr = {
398 			.attr = {
399 				.max_wr  = rx_depth,
400 				.max_sge = 1
401 			}
402 		};
403 
404 		ctx->srq = ibv_create_srq(ctx->pd, &attr);
405 		if (!ctx->srq)  {
406 			fprintf(stderr, "Couldn't create SRQ\n");
407 			return NULL;
408 		}
409 	}
410 
411 	for (i = 0; i < num_qp; ++i) {
412 		struct ibv_qp_init_attr attr = {
413 			.send_cq = ctx->cq,
414 			.recv_cq = ctx->cq,
415 			.srq     = ctx->srq,
416 			.cap     = {
417 				.max_send_wr  = 1,
418 				.max_send_sge = 1,
419 			},
420 			.qp_type = IBV_QPT_RC
421 		};
422 
423 		ctx->qp[i] = ibv_create_qp(ctx->pd, &attr);
424 		if (!ctx->qp[i])  {
425 			fprintf(stderr, "Couldn't create QP[%d]\n", i);
426 			return NULL;
427 		}
428 	}
429 
430 	for (i = 0; i < num_qp; ++i) {
431 		struct ibv_qp_attr attr = {
432 			.qp_state        = IBV_QPS_INIT,
433 			.pkey_index      = 0,
434 			.port_num        = port,
435 			.qp_access_flags = 0
436 		};
437 
438 		if (ibv_modify_qp(ctx->qp[i], &attr,
439 				  IBV_QP_STATE              |
440 				  IBV_QP_PKEY_INDEX         |
441 				  IBV_QP_PORT               |
442 				  IBV_QP_ACCESS_FLAGS)) {
443 			fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
444 			return NULL;
445 		}
446 	}
447 
448 	return ctx;
449 }
450 
451 int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
452 {
453 	int i;
454 
455 	for (i = 0; i < num_qp; ++i) {
456 		if (ibv_destroy_qp(ctx->qp[i])) {
457 			fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
458 			return 1;
459 		}
460 	}
461 
462 	if (ibv_destroy_srq(ctx->srq)) {
463 		fprintf(stderr, "Couldn't destroy SRQ\n");
464 		return 1;
465 	}
466 
467 	if (ibv_destroy_cq(ctx->cq)) {
468 		fprintf(stderr, "Couldn't destroy CQ\n");
469 		return 1;
470 	}
471 
472 	if (ibv_dereg_mr(ctx->mr)) {
473 		fprintf(stderr, "Couldn't deregister MR\n");
474 		return 1;
475 	}
476 
477 	if (ibv_dealloc_pd(ctx->pd)) {
478 		fprintf(stderr, "Couldn't deallocate PD\n");
479 		return 1;
480 	}
481 
482 	if (ctx->channel) {
483 		if (ibv_destroy_comp_channel(ctx->channel)) {
484 			fprintf(stderr, "Couldn't destroy completion channel\n");
485 			return 1;
486 		}
487 	}
488 
489 	if (ibv_close_device(ctx->context)) {
490 		fprintf(stderr, "Couldn't release context\n");
491 		return 1;
492 	}
493 
494 	free(ctx->buf);
495 	free(ctx);
496 
497 	return 0;
498 }
499 
500 static int pp_post_recv(struct pingpong_context *ctx, int n)
501 {
502 	struct ibv_sge list = {
503 		.addr	= (uintptr_t) ctx->buf,
504 		.length = ctx->size,
505 		.lkey	= ctx->mr->lkey
506 	};
507 	struct ibv_recv_wr wr = {
508 		.wr_id	    = PINGPONG_RECV_WRID,
509 		.sg_list    = &list,
510 		.num_sge    = 1,
511 	};
512 	struct ibv_recv_wr *bad_wr;
513 	int i;
514 
515 	for (i = 0; i < n; ++i)
516 		if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
517 			break;
518 
519 	return i;
520 }
521 
522 static int pp_post_send(struct pingpong_context *ctx, int qp_index)
523 {
524 	struct ibv_sge list = {
525 		.addr	= (uintptr_t) ctx->buf,
526 		.length = ctx->size,
527 		.lkey	= ctx->mr->lkey
528 	};
529 	struct ibv_send_wr wr = {
530 		.wr_id	    = PINGPONG_SEND_WRID,
531 		.sg_list    = &list,
532 		.num_sge    = 1,
533 		.opcode     = IBV_WR_SEND,
534 		.send_flags = IBV_SEND_SIGNALED,
535 	};
536 	struct ibv_send_wr *bad_wr;
537 
538 	return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
539 }
540 
541 static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
542 {
543 	int i;
544 
545 	for (i = 0; i < num_qp; ++i)
546 		if (ctx->qp[i]->qp_num == qpn)
547 			return i;
548 
549 	return -1;
550 }
551 
552 static void usage(const char *argv0)
553 {
554 	printf("Usage:\n");
555 	printf("  %s            start a server and wait for connection\n", argv0);
556 	printf("  %s <host>     connect to server at <host>\n", argv0);
557 	printf("\n");
558 	printf("Options:\n");
559 	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
560 	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
561 	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
562 	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
563 	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
564 	printf("  -q, --num-qp=<num>     number of QPs to use (default 16)\n");
565 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
566 	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
567 	printf("  -l, --sl=<sl>          service level value\n");
568 	printf("  -e, --events           sleep on CQ events (default poll)\n");
569 	printf("  -g, --gid-idx=<gid index> local port gid index\n");
570 }
571 
572 int main(int argc, char *argv[])
573 {
574 	struct ibv_device      **dev_list;
575 	struct ibv_device	*ib_dev;
576 	struct ibv_wc		*wc;
577 	struct pingpong_context *ctx;
578 	struct pingpong_dest     my_dest[MAX_QP];
579 	struct pingpong_dest    *rem_dest;
580 	struct timeval           start, end;
581 	char                    *ib_devname = NULL;
582 	char                    *servername = NULL;
583 	int                      port = 18515;
584 	int                      ib_port = 1;
585 	int                      size = 4096;
586 	enum ibv_mtu		 mtu = IBV_MTU_1024;
587 	int                      num_qp = 16;
588 	int                      rx_depth = 500;
589 	int                      iters = 1000;
590 	int                      use_event = 0;
591 	int                      routs;
592 	int                      rcnt, scnt;
593 	int			 num_wc;
594 	int                      i;
595 	int                      num_cq_events = 0;
596 	int                      sl = 0;
597 	int			 gidx = -1;
598 	char			 gid[33];
599 
600 	srand48(getpid() * time(NULL));
601 
602 	while (1) {
603 		int c;
604 
605 		static struct option long_options[] = {
606 			{ .name = "port",     .has_arg = 1, .val = 'p' },
607 			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
608 			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
609 			{ .name = "size",     .has_arg = 1, .val = 's' },
610 			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
611 			{ .name = "num-qp",   .has_arg = 1, .val = 'q' },
612 			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
613 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
614 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
615 			{ .name = "events",   .has_arg = 0, .val = 'e' },
616 			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
617 			{ 0 }
618 		};
619 
620 		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", long_options, NULL);
621 		if (c == -1)
622 			break;
623 
624 		switch (c) {
625 		case 'p':
626 			port = strtol(optarg, NULL, 0);
627 			if (port < 0 || port > 65535) {
628 				usage(argv[0]);
629 				return 1;
630 			}
631 			break;
632 
633 		case 'd':
634 			ib_devname = strdup(optarg);
635 			break;
636 
637 		case 'i':
638 			ib_port = strtol(optarg, NULL, 0);
639 			if (ib_port < 0) {
640 				usage(argv[0]);
641 				return 1;
642 			}
643 			break;
644 
645 		case 's':
646 			size = strtol(optarg, NULL, 0);
647 			break;
648 
649 		case 'm':
650 			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
651 			if (mtu < 0) {
652 				usage(argv[0]);
653 				return 1;
654 			}
655 			break;
656 
657 		case 'q':
658 			num_qp = strtol(optarg, NULL, 0);
659 			break;
660 
661 		case 'r':
662 			rx_depth = strtol(optarg, NULL, 0);
663 			break;
664 
665 		case 'n':
666 			iters = strtol(optarg, NULL, 0);
667 			break;
668 
669 		case 'l':
670 			sl = strtol(optarg, NULL, 0);
671 			break;
672 
673 		case 'e':
674 			++use_event;
675 			break;
676 
677 		case 'g':
678 			gidx = strtol(optarg, NULL, 0);
679 			break;
680 
681 		default:
682 			usage(argv[0]);
683 			return 1;
684 		}
685 	}
686 
687 	if (optind == argc - 1)
688 		servername = strdup(argv[optind]);
689 	else if (optind < argc) {
690 		usage(argv[0]);
691 		return 1;
692 	}
693 
694 	if (num_qp > rx_depth) {
695 		fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
696 			"must have at least one receive per QP.\n",
697 			rx_depth, num_qp);
698 		return 1;
699 	}
700 
701 	num_wc = num_qp + rx_depth;
702 	wc     = alloca(num_wc * sizeof *wc);
703 
704 	page_size = sysconf(_SC_PAGESIZE);
705 
706 	dev_list = ibv_get_device_list(NULL);
707 	if (!dev_list) {
708 		perror("Failed to get IB devices list");
709 		return 1;
710 	}
711 
712 	if (!ib_devname) {
713 		ib_dev = *dev_list;
714 		if (!ib_dev) {
715 			fprintf(stderr, "No IB devices found\n");
716 			return 1;
717 		}
718 	} else {
719 		int i;
720 		for (i = 0; dev_list[i]; ++i)
721 			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
722 				break;
723 		ib_dev = dev_list[i];
724 		if (!ib_dev) {
725 			fprintf(stderr, "IB device %s not found\n", ib_devname);
726 			return 1;
727 		}
728 	}
729 
730 	ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
731 	if (!ctx)
732 		return 1;
733 
734 	routs = pp_post_recv(ctx, ctx->rx_depth);
735 	if (routs < ctx->rx_depth) {
736 		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
737 		return 1;
738 	}
739 
740 	if (use_event)
741 		if (ibv_req_notify_cq(ctx->cq, 0)) {
742 			fprintf(stderr, "Couldn't request CQ notification\n");
743 			return 1;
744 		}
745 
746 	memset(my_dest, 0, sizeof my_dest);
747 
748 	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
749 		fprintf(stderr, "Couldn't get port info\n");
750 		return 1;
751 	}
752 	for (i = 0; i < num_qp; ++i) {
753 		my_dest[i].qpn = ctx->qp[i]->qp_num;
754 		my_dest[i].psn = lrand48() & 0xffffff;
755 		my_dest[i].lid = ctx->portinfo.lid;
756 		if (ctx->portinfo.link_layer == IBV_LINK_LAYER_INFINIBAND && !my_dest[i].lid) {
757 			fprintf(stderr, "Couldn't get local LID\n");
758 			return 1;
759 		}
760 
761 		if (gidx >= 0) {
762 			if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest[i].gid)) {
763 				fprintf(stderr, "Could not get local gid for gid index %d\n", gidx);
764 				return 1;
765 			}
766 		} else
767 			memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
768 
769 		inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
770 		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
771 		       my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
772 	}
773 
774 	if (servername)
775 		rem_dest = pp_client_exch_dest(servername, port, my_dest);
776 	else
777 		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, my_dest, gidx);
778 
779 	if (!rem_dest)
780 		return 1;
781 
782 	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
783 
784 	for (i = 0; i < num_qp; ++i) {
785 		inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
786 		printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
787 		       rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn, gid);
788 	}
789 
790 	if (servername)
791 		if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, gidx))
792 			return 1;
793 
794 	if (servername)
795 		for (i = 0; i < num_qp; ++i) {
796 			if (pp_post_send(ctx, i)) {
797 				fprintf(stderr, "Couldn't post send\n");
798 				return 1;
799 			}
800 			ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
801 		}
802 	else
803 		for (i = 0; i < num_qp; ++i)
804 			ctx->pending[i] = PINGPONG_RECV_WRID;
805 
806 	if (gettimeofday(&start, NULL)) {
807 		perror("gettimeofday");
808 		return 1;
809 	}
810 
811 	rcnt = scnt = 0;
812 	while (rcnt < iters || scnt < iters) {
813 		if (use_event) {
814 			struct ibv_cq *ev_cq;
815 			void          *ev_ctx;
816 
817 			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
818 				fprintf(stderr, "Failed to get cq_event\n");
819 				return 1;
820 			}
821 
822 			++num_cq_events;
823 
824 			if (ev_cq != ctx->cq) {
825 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
826 				return 1;
827 			}
828 
829 			if (ibv_req_notify_cq(ctx->cq, 0)) {
830 				fprintf(stderr, "Couldn't request CQ notification\n");
831 				return 1;
832 			}
833 		}
834 
835 		{
836 			int ne, qp_ind;
837 
838 			do {
839 				ne = ibv_poll_cq(ctx->cq, num_wc, wc);
840 				if (ne < 0) {
841 					fprintf(stderr, "poll CQ failed %d\n", ne);
842 					return 1;
843 				}
844 			} while (!use_event && ne < 1);
845 
846 			for (i = 0; i < ne; ++i) {
847 				if (wc[i].status != IBV_WC_SUCCESS) {
848 					fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
849 						ibv_wc_status_str(wc[i].status),
850 						wc[i].status, (int) wc[i].wr_id);
851 					return 1;
852 				}
853 
854 				qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
855 				if (qp_ind < 0) {
856 					fprintf(stderr, "Couldn't find QPN %06x\n",
857 						wc[i].qp_num);
858 					return 1;
859 				}
860 
861 				switch ((int) wc[i].wr_id) {
862 				case PINGPONG_SEND_WRID:
863 					++scnt;
864 					break;
865 
866 				case PINGPONG_RECV_WRID:
867 					if (--routs <= num_qp) {
868 						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
869 						if (routs < ctx->rx_depth) {
870 							fprintf(stderr,
871 								"Couldn't post receive (%d)\n",
872 								routs);
873 							return 1;
874 						}
875 					}
876 
877 					++rcnt;
878 					break;
879 
880 				default:
881 					fprintf(stderr, "Completion for unknown wr_id %d\n",
882 						(int) wc[i].wr_id);
883 					return 1;
884 				}
885 
886 				ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
887 				if (scnt < iters && !ctx->pending[qp_ind]) {
888 					if (pp_post_send(ctx, qp_ind)) {
889 						fprintf(stderr, "Couldn't post send\n");
890 						return 1;
891 					}
892 					ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
893 							       PINGPONG_SEND_WRID;
894 				}
895 
896 			}
897 		}
898 	}
899 
900 	if (gettimeofday(&end, NULL)) {
901 		perror("gettimeofday");
902 		return 1;
903 	}
904 
905 	{
906 		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
907 			(end.tv_usec - start.tv_usec);
908 		long long bytes = (long long) size * iters * 2;
909 
910 		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
911 		       bytes, usec / 1000000., bytes * 8. / usec);
912 		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
913 		       iters, usec / 1000000., usec / iters);
914 	}
915 
916 	ibv_ack_cq_events(ctx->cq, num_cq_events);
917 
918 	if (pp_close_ctx(ctx, num_qp))
919 		return 1;
920 
921 	ibv_free_device_list(dev_list);
922 	free(rem_dest);
923 
924 	return 0;
925 }
926