1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #define _GNU_SOURCE
33 #include <config.h>
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <string.h>
39 #include <sys/types.h>
40 #include <sys/socket.h>
41 #include <sys/time.h>
42 #include <netdb.h>
43 #include <stdlib.h>
44 #include <getopt.h>
45 #include <arpa/inet.h>
46 #include <time.h>
47 #include <inttypes.h>
48 
49 #include "pingpong.h"
50 
51 #include <sys/param.h>
52 
53 enum {
54 	PINGPONG_RECV_WRID = 1,
55 	PINGPONG_SEND_WRID = 2,
56 };
57 
58 static int page_size;
59 static int use_odp;
60 static int use_ts;
61 
62 struct pingpong_context {
63 	struct ibv_context	*context;
64 	struct ibv_comp_channel *channel;
65 	struct ibv_pd		*pd;
66 	struct ibv_mr		*mr;
67 	union {
68 		struct ibv_cq		*cq;
69 		struct ibv_cq_ex	*cq_ex;
70 	} cq_s;
71 	struct ibv_qp		*qp;
72 	void			*buf;
73 	int			 size;
74 	int			 send_flags;
75 	int			 rx_depth;
76 	int			 pending;
77 	struct ibv_port_attr     portinfo;
78 	uint64_t		 completion_timestamp_mask;
79 };
80 
81 static struct ibv_cq *pp_cq(struct pingpong_context *ctx)
82 {
83 	return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) :
84 		ctx->cq_s.cq;
85 }
86 
87 struct pingpong_dest {
88 	int lid;
89 	int qpn;
90 	int psn;
91 	union ibv_gid gid;
92 };
93 
94 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
95 			  enum ibv_mtu mtu, int sl,
96 			  struct pingpong_dest *dest, int sgid_idx)
97 {
98 	struct ibv_qp_attr attr = {
99 		.qp_state		= IBV_QPS_RTR,
100 		.path_mtu		= mtu,
101 		.dest_qp_num		= dest->qpn,
102 		.rq_psn			= dest->psn,
103 		.max_dest_rd_atomic	= 1,
104 		.min_rnr_timer		= 12,
105 		.ah_attr		= {
106 			.is_global	= 0,
107 			.dlid		= dest->lid,
108 			.sl		= sl,
109 			.src_path_bits	= 0,
110 			.port_num	= port
111 		}
112 	};
113 
114 	if (dest->gid.global.interface_id) {
115 		attr.ah_attr.is_global = 1;
116 		attr.ah_attr.grh.hop_limit = 1;
117 		attr.ah_attr.grh.dgid = dest->gid;
118 		attr.ah_attr.grh.sgid_index = sgid_idx;
119 	}
120 	if (ibv_modify_qp(ctx->qp, &attr,
121 			  IBV_QP_STATE              |
122 			  IBV_QP_AV                 |
123 			  IBV_QP_PATH_MTU           |
124 			  IBV_QP_DEST_QPN           |
125 			  IBV_QP_RQ_PSN             |
126 			  IBV_QP_MAX_DEST_RD_ATOMIC |
127 			  IBV_QP_MIN_RNR_TIMER)) {
128 		fprintf(stderr, "Failed to modify QP to RTR\n");
129 		return 1;
130 	}
131 
132 	attr.qp_state	    = IBV_QPS_RTS;
133 	attr.timeout	    = 14;
134 	attr.retry_cnt	    = 7;
135 	attr.rnr_retry	    = 7;
136 	attr.sq_psn	    = my_psn;
137 	attr.max_rd_atomic  = 1;
138 	if (ibv_modify_qp(ctx->qp, &attr,
139 			  IBV_QP_STATE              |
140 			  IBV_QP_TIMEOUT            |
141 			  IBV_QP_RETRY_CNT          |
142 			  IBV_QP_RNR_RETRY          |
143 			  IBV_QP_SQ_PSN             |
144 			  IBV_QP_MAX_QP_RD_ATOMIC)) {
145 		fprintf(stderr, "Failed to modify QP to RTS\n");
146 		return 1;
147 	}
148 
149 	return 0;
150 }
151 
152 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
153 						 const struct pingpong_dest *my_dest)
154 {
155 	struct addrinfo *res, *t;
156 	struct addrinfo hints = {
157 		.ai_family   = AF_UNSPEC,
158 		.ai_socktype = SOCK_STREAM
159 	};
160 	char *service;
161 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
162 	int n;
163 	int sockfd = -1;
164 	struct pingpong_dest *rem_dest = NULL;
165 	char gid[33];
166 
167 	if (asprintf(&service, "%d", port) < 0)
168 		return NULL;
169 
170 	n = getaddrinfo(servername, service, &hints, &res);
171 
172 	if (n < 0) {
173 		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
174 		free(service);
175 		return NULL;
176 	}
177 
178 	for (t = res; t; t = t->ai_next) {
179 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
180 		if (sockfd >= 0) {
181 			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
182 				break;
183 			close(sockfd);
184 			sockfd = -1;
185 		}
186 	}
187 
188 	freeaddrinfo_null(res);
189 	free(service);
190 
191 	if (sockfd < 0) {
192 		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
193 		return NULL;
194 	}
195 
196 	gid_to_wire_gid(&my_dest->gid, gid);
197 	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
198 							my_dest->psn, gid);
199 	if (write(sockfd, msg, sizeof msg) != sizeof msg) {
200 		fprintf(stderr, "Couldn't send local address\n");
201 		goto out;
202 	}
203 
204 	if (read(sockfd, msg, sizeof msg) != sizeof msg ||
205 	    write(sockfd, "done", sizeof "done") != sizeof "done") {
206 		perror("client read/write");
207 		fprintf(stderr, "Couldn't read/write remote address\n");
208 		goto out;
209 	}
210 
211 	rem_dest = malloc(sizeof *rem_dest);
212 	if (!rem_dest)
213 		goto out;
214 
215 	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
216 						&rem_dest->psn, gid);
217 	wire_gid_to_gid(gid, &rem_dest->gid);
218 
219 out:
220 	close(sockfd);
221 	return rem_dest;
222 }
223 
224 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
225 						 int ib_port, enum ibv_mtu mtu,
226 						 int port, int sl,
227 						 const struct pingpong_dest *my_dest,
228 						 int sgid_idx)
229 {
230 	struct addrinfo *res, *t;
231 	struct addrinfo hints = {
232 		.ai_flags    = AI_PASSIVE,
233 		.ai_family   = AF_UNSPEC,
234 		.ai_socktype = SOCK_STREAM
235 	};
236 	char *service;
237 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
238 	int n;
239 	int sockfd = -1, connfd;
240 	struct pingpong_dest *rem_dest = NULL;
241 	char gid[33];
242 
243 	if (asprintf(&service, "%d", port) < 0)
244 		return NULL;
245 
246 	n = getaddrinfo(NULL, service, &hints, &res);
247 
248 	if (n < 0) {
249 		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
250 		free(service);
251 		return NULL;
252 	}
253 
254 	for (t = res; t; t = t->ai_next) {
255 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
256 		if (sockfd >= 0) {
257 			n = 1;
258 
259 			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
260 
261 			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
262 				break;
263 			close(sockfd);
264 			sockfd = -1;
265 		}
266 	}
267 
268 	freeaddrinfo_null(res);
269 	free(service);
270 
271 	if (sockfd < 0) {
272 		fprintf(stderr, "Couldn't listen to port %d\n", port);
273 		return NULL;
274 	}
275 
276 	if (listen(sockfd, 1) < 0) {
277 		perror("listen() failed");
278 		close(sockfd);
279 		return NULL;
280 	}
281 	connfd = accept(sockfd, NULL, NULL);
282 	close(sockfd);
283 	if (connfd < 0) {
284 		fprintf(stderr, "accept() failed\n");
285 		return NULL;
286 	}
287 
288 	n = read(connfd, msg, sizeof msg);
289 	if (n != sizeof msg) {
290 		perror("server read");
291 		fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
292 		goto out;
293 	}
294 
295 	rem_dest = malloc(sizeof *rem_dest);
296 	if (!rem_dest)
297 		goto out;
298 
299 	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
300 							&rem_dest->psn, gid);
301 	wire_gid_to_gid(gid, &rem_dest->gid);
302 
303 	if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest,
304 								sgid_idx)) {
305 		fprintf(stderr, "Couldn't connect to remote QP\n");
306 		free(rem_dest);
307 		rem_dest = NULL;
308 		goto out;
309 	}
310 
311 
312 	gid_to_wire_gid(&my_dest->gid, gid);
313 	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
314 							my_dest->psn, gid);
315 	if (write(connfd, msg, sizeof msg) != sizeof msg ||
316 	    read(connfd, msg, sizeof msg) != sizeof "done") {
317 		fprintf(stderr, "Couldn't send/recv local address\n");
318 		free(rem_dest);
319 		rem_dest = NULL;
320 		goto out;
321 	}
322 
323 
324 out:
325 	close(connfd);
326 	return rem_dest;
327 }
328 
329 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
330 					    int rx_depth, int port,
331 					    int use_event)
332 {
333 	struct pingpong_context *ctx;
334 	int access_flags = IBV_ACCESS_LOCAL_WRITE;
335 
336 	ctx = calloc(1, sizeof *ctx);
337 	if (!ctx)
338 		return NULL;
339 
340 	ctx->size       = size;
341 	ctx->send_flags = IBV_SEND_SIGNALED;
342 	ctx->rx_depth   = rx_depth;
343 
344 	ctx->buf = memalign(page_size, size);
345 	if (!ctx->buf) {
346 		fprintf(stderr, "Couldn't allocate work buf.\n");
347 		goto clean_ctx;
348 	}
349 
350 	/* FIXME memset(ctx->buf, 0, size); */
351 	memset(ctx->buf, 0x7b, size);
352 
353 	ctx->context = ibv_open_device(ib_dev);
354 	if (!ctx->context) {
355 		fprintf(stderr, "Couldn't get context for %s\n",
356 			ibv_get_device_name(ib_dev));
357 		goto clean_buffer;
358 	}
359 
360 	if (use_event) {
361 		ctx->channel = ibv_create_comp_channel(ctx->context);
362 		if (!ctx->channel) {
363 			fprintf(stderr, "Couldn't create completion channel\n");
364 			goto clean_device;
365 		}
366 	} else
367 		ctx->channel = NULL;
368 
369 	ctx->pd = ibv_alloc_pd(ctx->context);
370 	if (!ctx->pd) {
371 		fprintf(stderr, "Couldn't allocate PD\n");
372 		goto clean_comp_channel;
373 	}
374 
375 	if (use_odp || use_ts) {
376 		const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND |
377 					      IBV_ODP_SUPPORT_RECV;
378 		struct ibv_device_attr_ex attrx;
379 
380 		if (ibv_query_device_ex(ctx->context, NULL, &attrx)) {
381 			fprintf(stderr, "Couldn't query device for its features\n");
382 			goto clean_comp_channel;
383 		}
384 
385 		if (use_odp) {
386 			if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) ||
387 			    (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) {
388 				fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n");
389 				goto clean_comp_channel;
390 			}
391 			access_flags |= IBV_ACCESS_ON_DEMAND;
392 		}
393 
394 		if (use_ts) {
395 			if (!attrx.completion_timestamp_mask) {
396 				fprintf(stderr, "The device isn't completion timestamp capable\n");
397 				goto clean_comp_channel;
398 			}
399 			ctx->completion_timestamp_mask = attrx.completion_timestamp_mask;
400 		}
401 	}
402 	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags);
403 
404 	if (!ctx->mr) {
405 		fprintf(stderr, "Couldn't register MR\n");
406 		goto clean_pd;
407 	}
408 
409 	if (use_ts) {
410 		struct ibv_cq_init_attr_ex attr_ex = {
411 			.cqe = rx_depth + 1,
412 			.cq_context = NULL,
413 			.channel = ctx->channel,
414 			.comp_vector = 0,
415 			.wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
416 		};
417 
418 		ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex);
419 	} else {
420 		ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
421 					     ctx->channel, 0);
422 	}
423 
424 	if (!pp_cq(ctx)) {
425 		fprintf(stderr, "Couldn't create CQ\n");
426 		goto clean_mr;
427 	}
428 
429 	{
430 		struct ibv_qp_attr attr;
431 		struct ibv_qp_init_attr init_attr = {
432 			.send_cq = pp_cq(ctx),
433 			.recv_cq = pp_cq(ctx),
434 			.cap     = {
435 				.max_send_wr  = 1,
436 				.max_recv_wr  = rx_depth,
437 				.max_send_sge = 1,
438 				.max_recv_sge = 1
439 			},
440 			.qp_type = IBV_QPT_RC
441 		};
442 
443 		ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
444 		if (!ctx->qp)  {
445 			fprintf(stderr, "Couldn't create QP\n");
446 			goto clean_cq;
447 		}
448 
449 		ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr);
450 		if (init_attr.cap.max_inline_data >= size) {
451 			ctx->send_flags |= IBV_SEND_INLINE;
452 		}
453 	}
454 
455 	{
456 		struct ibv_qp_attr attr = {
457 			.qp_state        = IBV_QPS_INIT,
458 			.pkey_index      = 0,
459 			.port_num        = port,
460 			.qp_access_flags = 0
461 		};
462 
463 		if (ibv_modify_qp(ctx->qp, &attr,
464 				  IBV_QP_STATE              |
465 				  IBV_QP_PKEY_INDEX         |
466 				  IBV_QP_PORT               |
467 				  IBV_QP_ACCESS_FLAGS)) {
468 			fprintf(stderr, "Failed to modify QP to INIT\n");
469 			goto clean_qp;
470 		}
471 	}
472 
473 	return ctx;
474 
475 clean_qp:
476 	ibv_destroy_qp(ctx->qp);
477 
478 clean_cq:
479 	ibv_destroy_cq(pp_cq(ctx));
480 
481 clean_mr:
482 	ibv_dereg_mr(ctx->mr);
483 
484 clean_pd:
485 	ibv_dealloc_pd(ctx->pd);
486 
487 clean_comp_channel:
488 	if (ctx->channel)
489 		ibv_destroy_comp_channel(ctx->channel);
490 
491 clean_device:
492 	ibv_close_device(ctx->context);
493 
494 clean_buffer:
495 	free(ctx->buf);
496 
497 clean_ctx:
498 	free(ctx);
499 
500 	return NULL;
501 }
502 
503 static int pp_close_ctx(struct pingpong_context *ctx)
504 {
505 	if (ibv_destroy_qp(ctx->qp)) {
506 		fprintf(stderr, "Couldn't destroy QP\n");
507 		return 1;
508 	}
509 
510 	if (ibv_destroy_cq(pp_cq(ctx))) {
511 		fprintf(stderr, "Couldn't destroy CQ\n");
512 		return 1;
513 	}
514 
515 	if (ibv_dereg_mr(ctx->mr)) {
516 		fprintf(stderr, "Couldn't deregister MR\n");
517 		return 1;
518 	}
519 
520 	if (ibv_dealloc_pd(ctx->pd)) {
521 		fprintf(stderr, "Couldn't deallocate PD\n");
522 		return 1;
523 	}
524 
525 	if (ctx->channel) {
526 		if (ibv_destroy_comp_channel(ctx->channel)) {
527 			fprintf(stderr, "Couldn't destroy completion channel\n");
528 			return 1;
529 		}
530 	}
531 
532 	if (ibv_close_device(ctx->context)) {
533 		fprintf(stderr, "Couldn't release context\n");
534 		return 1;
535 	}
536 
537 	free(ctx->buf);
538 	free(ctx);
539 
540 	return 0;
541 }
542 
543 static int pp_post_recv(struct pingpong_context *ctx, int n)
544 {
545 	struct ibv_sge list = {
546 		.addr	= (uintptr_t) ctx->buf,
547 		.length = ctx->size,
548 		.lkey	= ctx->mr->lkey
549 	};
550 	struct ibv_recv_wr wr = {
551 		.wr_id	    = PINGPONG_RECV_WRID,
552 		.sg_list    = &list,
553 		.num_sge    = 1,
554 	};
555 	struct ibv_recv_wr *bad_wr;
556 	int i;
557 
558 	for (i = 0; i < n; ++i)
559 		if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
560 			break;
561 
562 	return i;
563 }
564 
565 static int pp_post_send(struct pingpong_context *ctx)
566 {
567 	struct ibv_sge list = {
568 		.addr	= (uintptr_t) ctx->buf,
569 		.length = ctx->size,
570 		.lkey	= ctx->mr->lkey
571 	};
572 	struct ibv_send_wr wr = {
573 		.wr_id	    = PINGPONG_SEND_WRID,
574 		.sg_list    = &list,
575 		.num_sge    = 1,
576 		.opcode     = IBV_WR_SEND,
577 		.send_flags = ctx->send_flags,
578 	};
579 	struct ibv_send_wr *bad_wr;
580 
581 	return ibv_post_send(ctx->qp, &wr, &bad_wr);
582 }
583 
584 struct ts_params {
585 	uint64_t		 comp_recv_max_time_delta;
586 	uint64_t		 comp_recv_min_time_delta;
587 	uint64_t		 comp_recv_total_time_delta;
588 	uint64_t		 comp_recv_prev_time;
589 	int			 last_comp_with_ts;
590 	unsigned int		 comp_with_time_iters;
591 };
592 
593 static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt,
594 				  int *rcnt, int *routs, int iters,
595 				  uint64_t wr_id, enum ibv_wc_status status,
596 				  uint64_t completion_timestamp,
597 				  struct ts_params *ts)
598 {
599 	if (status != IBV_WC_SUCCESS) {
600 		fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
601 			ibv_wc_status_str(status),
602 			status, (int)wr_id);
603 		return 1;
604 	}
605 
606 	switch ((int)wr_id) {
607 	case PINGPONG_SEND_WRID:
608 		++(*scnt);
609 		break;
610 
611 	case PINGPONG_RECV_WRID:
612 		if (--(*routs) <= 1) {
613 			*routs += pp_post_recv(ctx, ctx->rx_depth - *routs);
614 			if (*routs < ctx->rx_depth) {
615 				fprintf(stderr,
616 					"Couldn't post receive (%d)\n",
617 					*routs);
618 				return 1;
619 			}
620 		}
621 
622 		++(*rcnt);
623 		if (use_ts) {
624 			if (ts->last_comp_with_ts) {
625 				uint64_t delta;
626 
627 				/* checking whether the clock was wrapped around */
628 				if (completion_timestamp >= ts->comp_recv_prev_time)
629 					delta = completion_timestamp - ts->comp_recv_prev_time;
630 				else
631 					delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time +
632 						completion_timestamp + 1;
633 
634 				ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta);
635 				ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta);
636 				ts->comp_recv_total_time_delta += delta;
637 				ts->comp_with_time_iters++;
638 			}
639 
640 			ts->comp_recv_prev_time = completion_timestamp;
641 			ts->last_comp_with_ts = 1;
642 		} else {
643 			ts->last_comp_with_ts = 0;
644 		}
645 
646 		break;
647 
648 	default:
649 		fprintf(stderr, "Completion for unknown wr_id %d\n",
650 			(int)wr_id);
651 		return 1;
652 	}
653 
654 	ctx->pending &= ~(int)wr_id;
655 	if (*scnt < iters && !ctx->pending) {
656 		if (pp_post_send(ctx)) {
657 			fprintf(stderr, "Couldn't post send\n");
658 			return 1;
659 		}
660 		ctx->pending = PINGPONG_RECV_WRID |
661 			PINGPONG_SEND_WRID;
662 	}
663 
664 	return 0;
665 }
666 
667 static void usage(const char *argv0)
668 {
669 	printf("Usage:\n");
670 	printf("  %s            start a server and wait for connection\n", argv0);
671 	printf("  %s <host>     connect to server at <host>\n", argv0);
672 	printf("\n");
673 	printf("Options:\n");
674 	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
675 	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
676 	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
677 	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
678 	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
679 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
680 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
681 	printf("  -l, --sl=<sl>          service level value\n");
682 	printf("  -e, --events           sleep on CQ events (default poll)\n");
683 	printf("  -g, --gid-idx=<gid index> local port gid index\n");
684 	printf("  -o, --odp		    use on demand paging\n");
685 	printf("  -t, --ts	            get CQE with timestamp\n");
686 }
687 
688 int main(int argc, char *argv[])
689 {
690 	struct ibv_device      **dev_list;
691 	struct ibv_device	*ib_dev;
692 	struct pingpong_context *ctx;
693 	struct pingpong_dest     my_dest;
694 	struct pingpong_dest    *rem_dest;
695 	struct timeval           start, end;
696 	char                    *ib_devname = NULL;
697 	char                    *servername = NULL;
698 	unsigned int             port = 18515;
699 	int                      ib_port = 1;
700 	unsigned int             size = 4096;
701 	enum ibv_mtu		 mtu = IBV_MTU_1024;
702 	unsigned int             rx_depth = 500;
703 	unsigned int             iters = 1000;
704 	int                      use_event = 0;
705 	int                      routs;
706 	int                      rcnt, scnt;
707 	int                      num_cq_events = 0;
708 	int                      sl = 0;
709 	int			 gidx = -1;
710 	char			 gid[33];
711 	struct ts_params	 ts;
712 
713 	srand48(getpid() * time(NULL));
714 
715 	while (1) {
716 		int c;
717 
718 		static struct option long_options[] = {
719 			{ .name = "port",     .has_arg = 1, .val = 'p' },
720 			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
721 			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
722 			{ .name = "size",     .has_arg = 1, .val = 's' },
723 			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
724 			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
725 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
726 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
727 			{ .name = "events",   .has_arg = 0, .val = 'e' },
728 			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
729 			{ .name = "odp",      .has_arg = 0, .val = 'o' },
730 			{ .name = "ts",       .has_arg = 0, .val = 't' },
731 			{}
732 		};
733 
734 		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot",
735 				long_options, NULL);
736 
737 		if (c == -1)
738 			break;
739 
740 		switch (c) {
741 		case 'p':
742 			port = strtoul(optarg, NULL, 0);
743 			if (port > 65535) {
744 				usage(argv[0]);
745 				return 1;
746 			}
747 			break;
748 
749 		case 'd':
750 			ib_devname = strdupa(optarg);
751 			break;
752 
753 		case 'i':
754 			ib_port = strtol(optarg, NULL, 0);
755 			if (ib_port < 1) {
756 				usage(argv[0]);
757 				return 1;
758 			}
759 			break;
760 
761 		case 's':
762 			size = strtoul(optarg, NULL, 0);
763 			break;
764 
765 		case 'm':
766 			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
767 			if (mtu == 0) {
768 				usage(argv[0]);
769 				return 1;
770 			}
771 			break;
772 
773 		case 'r':
774 			rx_depth = strtoul(optarg, NULL, 0);
775 			break;
776 
777 		case 'n':
778 			iters = strtoul(optarg, NULL, 0);
779 			break;
780 
781 		case 'l':
782 			sl = strtol(optarg, NULL, 0);
783 			break;
784 
785 		case 'e':
786 			++use_event;
787 			break;
788 
789 		case 'g':
790 			gidx = strtol(optarg, NULL, 0);
791 			break;
792 
793 		case 'o':
794 			use_odp = 1;
795 			break;
796 		case 't':
797 			use_ts = 1;
798 			break;
799 
800 		default:
801 			usage(argv[0]);
802 			return 1;
803 		}
804 	}
805 
806 	if (optind == argc - 1)
807 		servername = strdupa(argv[optind]);
808 	else if (optind < argc) {
809 		usage(argv[0]);
810 		return 1;
811 	}
812 
813 	if (use_ts) {
814 		ts.comp_recv_max_time_delta = 0;
815 		ts.comp_recv_min_time_delta = 0xffffffff;
816 		ts.comp_recv_total_time_delta = 0;
817 		ts.comp_recv_prev_time = 0;
818 		ts.last_comp_with_ts = 0;
819 		ts.comp_with_time_iters = 0;
820 	}
821 
822 	page_size = sysconf(_SC_PAGESIZE);
823 
824 	dev_list = ibv_get_device_list(NULL);
825 	if (!dev_list) {
826 		perror("Failed to get IB devices list");
827 		return 1;
828 	}
829 
830 	if (!ib_devname) {
831 		ib_dev = *dev_list;
832 		if (!ib_dev) {
833 			fprintf(stderr, "No IB devices found\n");
834 			return 1;
835 		}
836 	} else {
837 		int i;
838 		for (i = 0; dev_list[i]; ++i)
839 			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
840 				break;
841 		ib_dev = dev_list[i];
842 		if (!ib_dev) {
843 			fprintf(stderr, "IB device %s not found\n", ib_devname);
844 			return 1;
845 		}
846 	}
847 
848 	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
849 	if (!ctx)
850 		return 1;
851 
852 	routs = pp_post_recv(ctx, ctx->rx_depth);
853 	if (routs < ctx->rx_depth) {
854 		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
855 		return 1;
856 	}
857 
858 	if (use_event)
859 		if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
860 			fprintf(stderr, "Couldn't request CQ notification\n");
861 			return 1;
862 		}
863 
864 
865 	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
866 		fprintf(stderr, "Couldn't get port info\n");
867 		return 1;
868 	}
869 
870 	my_dest.lid = ctx->portinfo.lid;
871 	if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET &&
872 							!my_dest.lid) {
873 		fprintf(stderr, "Couldn't get local LID\n");
874 		return 1;
875 	}
876 
877 	if (gidx >= 0) {
878 		if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) {
879 			fprintf(stderr, "can't read sgid of index %d\n", gidx);
880 			return 1;
881 		}
882 	} else
883 		memset(&my_dest.gid, 0, sizeof my_dest.gid);
884 
885 	my_dest.qpn = ctx->qp->qp_num;
886 	my_dest.psn = lrand48() & 0xffffff;
887 	inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid);
888 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
889 	       my_dest.lid, my_dest.qpn, my_dest.psn, gid);
890 
891 
892 	if (servername)
893 		rem_dest = pp_client_exch_dest(servername, port, &my_dest);
894 	else
895 		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
896 								&my_dest, gidx);
897 
898 	if (!rem_dest)
899 		return 1;
900 
901 	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
902 	printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
903 	       rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid);
904 
905 	if (servername)
906 		if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest,
907 					gidx))
908 			return 1;
909 
910 	ctx->pending = PINGPONG_RECV_WRID;
911 
912 	if (servername) {
913 		if (pp_post_send(ctx)) {
914 			fprintf(stderr, "Couldn't post send\n");
915 			return 1;
916 		}
917 		ctx->pending |= PINGPONG_SEND_WRID;
918 	}
919 
920 	if (gettimeofday(&start, NULL)) {
921 		perror("gettimeofday");
922 		return 1;
923 	}
924 
925 	rcnt = scnt = 0;
926 	while (rcnt < iters || scnt < iters) {
927 		int ret;
928 
929 		if (use_event) {
930 			struct ibv_cq *ev_cq;
931 			void          *ev_ctx;
932 
933 			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
934 				fprintf(stderr, "Failed to get cq_event\n");
935 				return 1;
936 			}
937 
938 			++num_cq_events;
939 
940 			if (ev_cq != pp_cq(ctx)) {
941 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
942 				return 1;
943 			}
944 
945 			if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
946 				fprintf(stderr, "Couldn't request CQ notification\n");
947 				return 1;
948 			}
949 		}
950 
951 		if (use_ts) {
952 			struct ibv_poll_cq_attr attr = {};
953 
954 			do {
955 				ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr);
956 			} while (!use_event && ret == ENOENT);
957 
958 			if (ret) {
959 				fprintf(stderr, "poll CQ failed %d\n", ret);
960 				return ret;
961 			}
962 			ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
963 					      iters,
964 					      ctx->cq_s.cq_ex->wr_id,
965 					      ctx->cq_s.cq_ex->status,
966 					      ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
967 					      &ts);
968 			if (ret) {
969 				ibv_end_poll(ctx->cq_s.cq_ex);
970 				return ret;
971 			}
972 			ret = ibv_next_poll(ctx->cq_s.cq_ex);
973 			if (!ret)
974 				ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
975 						      iters,
976 						      ctx->cq_s.cq_ex->wr_id,
977 						      ctx->cq_s.cq_ex->status,
978 						      ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
979 						      &ts);
980 			ibv_end_poll(ctx->cq_s.cq_ex);
981 			if (ret && ret != ENOENT) {
982 				fprintf(stderr, "poll CQ failed %d\n", ret);
983 				return ret;
984 			}
985 		} else {
986 			int ne, i;
987 			struct ibv_wc wc[2];
988 
989 			do {
990 				ne = ibv_poll_cq(pp_cq(ctx), 2, wc);
991 				if (ne < 0) {
992 					fprintf(stderr, "poll CQ failed %d\n", ne);
993 					return 1;
994 				}
995 			} while (!use_event && ne < 1);
996 
997 			for (i = 0; i < ne; ++i) {
998 				ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
999 						      iters,
1000 						      wc[i].wr_id,
1001 						      wc[i].status,
1002 						      0, &ts);
1003 				if (ret) {
1004 					fprintf(stderr, "parse WC failed %d\n", ne);
1005 					return 1;
1006 				}
1007 			}
1008 		}
1009 	}
1010 
1011 	if (gettimeofday(&end, NULL)) {
1012 		perror("gettimeofday");
1013 		return 1;
1014 	}
1015 
1016 	{
1017 		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
1018 			(end.tv_usec - start.tv_usec);
1019 		long long bytes = (long long) size * iters * 2;
1020 
1021 		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
1022 		       bytes, usec / 1000000., bytes * 8. / usec);
1023 		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
1024 		       iters, usec / 1000000., usec / iters);
1025 
1026 		if (use_ts && ts.comp_with_time_iters) {
1027 			printf("Max receive completion clock cycles = %" PRIu64 "\n",
1028 			       ts.comp_recv_max_time_delta);
1029 			printf("Min receive completion clock cycles = %" PRIu64 "\n",
1030 			       ts.comp_recv_min_time_delta);
1031 			printf("Average receive completion clock cycles = %f\n",
1032 			       (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters);
1033 		}
1034 	}
1035 
1036 	ibv_ack_cq_events(pp_cq(ctx), num_cq_events);
1037 
1038 	if (pp_close_ctx(ctx))
1039 		return 1;
1040 
1041 	ibv_free_device_list(dev_list);
1042 	free(rem_dest);
1043 
1044 	return 0;
1045 }
1046