1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #define _GNU_SOURCE
33 #include <config.h>
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <string.h>
39 #include <sys/types.h>
40 #include <sys/socket.h>
41 #include <sys/time.h>
42 #include <netdb.h>
43 #include <stdlib.h>
44 #include <getopt.h>
45 #include <arpa/inet.h>
46 #include <time.h>
47 #include <inttypes.h>
48 
49 #include "pingpong.h"
50 
51 #include <sys/param.h>
52 
53 enum {
54 	PINGPONG_RECV_WRID = 1,
55 	PINGPONG_SEND_WRID = 2,
56 };
57 
58 static int page_size;
59 static int use_odp;
60 static int use_ts;
61 
62 struct pingpong_context {
63 	struct ibv_context	*context;
64 	struct ibv_comp_channel *channel;
65 	struct ibv_pd		*pd;
66 	struct ibv_mr		*mr;
67 	union {
68 		struct ibv_cq		*cq;
69 		struct ibv_cq_ex	*cq_ex;
70 	} cq_s;
71 	struct ibv_qp		*qp;
72 	void			*buf;
73 	int			 size;
74 	int			 send_flags;
75 	int			 rx_depth;
76 	int			 pending;
77 	struct ibv_port_attr     portinfo;
78 	uint64_t		 completion_timestamp_mask;
79 };
80 
81 static struct ibv_cq *pp_cq(struct pingpong_context *ctx)
82 {
83 	return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) :
84 		ctx->cq_s.cq;
85 }
86 
87 struct pingpong_dest {
88 	int lid;
89 	int qpn;
90 	int psn;
91 	union ibv_gid gid;
92 };
93 
94 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
95 			  enum ibv_mtu mtu, int sl,
96 			  struct pingpong_dest *dest, int sgid_idx)
97 {
98 	struct ibv_qp_attr attr = {
99 		.qp_state		= IBV_QPS_RTR,
100 		.path_mtu		= mtu,
101 		.dest_qp_num		= dest->qpn,
102 		.rq_psn			= dest->psn,
103 		.max_dest_rd_atomic	= 1,
104 		.min_rnr_timer		= 12,
105 		.ah_attr		= {
106 			.is_global	= 0,
107 			.dlid		= dest->lid,
108 			.sl		= sl,
109 			.src_path_bits	= 0,
110 			.port_num	= port
111 		}
112 	};
113 
114 	if (dest->gid.global.interface_id) {
115 		attr.ah_attr.is_global = 1;
116 		attr.ah_attr.grh.hop_limit = 1;
117 		attr.ah_attr.grh.dgid = dest->gid;
118 		attr.ah_attr.grh.sgid_index = sgid_idx;
119 	}
120 	if (ibv_modify_qp(ctx->qp, &attr,
121 			  IBV_QP_STATE              |
122 			  IBV_QP_AV                 |
123 			  IBV_QP_PATH_MTU           |
124 			  IBV_QP_DEST_QPN           |
125 			  IBV_QP_RQ_PSN             |
126 			  IBV_QP_MAX_DEST_RD_ATOMIC |
127 			  IBV_QP_MIN_RNR_TIMER)) {
128 		fprintf(stderr, "Failed to modify QP to RTR\n");
129 		return 1;
130 	}
131 
132 	attr.qp_state	    = IBV_QPS_RTS;
133 	attr.timeout	    = 14;
134 	attr.retry_cnt	    = 7;
135 	attr.rnr_retry	    = 7;
136 	attr.sq_psn	    = my_psn;
137 	attr.max_rd_atomic  = 1;
138 	if (ibv_modify_qp(ctx->qp, &attr,
139 			  IBV_QP_STATE              |
140 			  IBV_QP_TIMEOUT            |
141 			  IBV_QP_RETRY_CNT          |
142 			  IBV_QP_RNR_RETRY          |
143 			  IBV_QP_SQ_PSN             |
144 			  IBV_QP_MAX_QP_RD_ATOMIC)) {
145 		fprintf(stderr, "Failed to modify QP to RTS\n");
146 		return 1;
147 	}
148 
149 	return 0;
150 }
151 
152 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
153 						 const struct pingpong_dest *my_dest)
154 {
155 	struct addrinfo *res, *t;
156 	struct addrinfo hints = {
157 		.ai_family   = AF_UNSPEC,
158 		.ai_socktype = SOCK_STREAM
159 	};
160 	char *service;
161 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
162 	int n;
163 	int sockfd = -1;
164 	struct pingpong_dest *rem_dest = NULL;
165 	char gid[33];
166 
167 	if (asprintf(&service, "%d", port) < 0)
168 		return NULL;
169 
170 	n = getaddrinfo(servername, service, &hints, &res);
171 
172 	if (n < 0) {
173 		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
174 		free(service);
175 		return NULL;
176 	}
177 
178 	for (t = res; t; t = t->ai_next) {
179 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
180 		if (sockfd >= 0) {
181 			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
182 				break;
183 			close(sockfd);
184 			sockfd = -1;
185 		}
186 	}
187 
188 	freeaddrinfo_null(res);
189 	free(service);
190 
191 	if (sockfd < 0) {
192 		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
193 		return NULL;
194 	}
195 
196 	gid_to_wire_gid(&my_dest->gid, gid);
197 	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
198 							my_dest->psn, gid);
199 	if (write(sockfd, msg, sizeof msg) != sizeof msg) {
200 		fprintf(stderr, "Couldn't send local address\n");
201 		goto out;
202 	}
203 
204 	if (read(sockfd, msg, sizeof msg) != sizeof msg ||
205 	    write(sockfd, "done", sizeof "done") != sizeof "done") {
206 		perror("client read/write");
207 		fprintf(stderr, "Couldn't read/write remote address\n");
208 		goto out;
209 	}
210 
211 	rem_dest = malloc(sizeof *rem_dest);
212 	if (!rem_dest)
213 		goto out;
214 
215 	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
216 						&rem_dest->psn, gid);
217 	wire_gid_to_gid(gid, &rem_dest->gid);
218 
219 out:
220 	close(sockfd);
221 	return rem_dest;
222 }
223 
224 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
225 						 int ib_port, enum ibv_mtu mtu,
226 						 int port, int sl,
227 						 const struct pingpong_dest *my_dest,
228 						 int sgid_idx)
229 {
230 	struct addrinfo *res, *t;
231 	struct addrinfo hints = {
232 		.ai_flags    = AI_PASSIVE,
233 		.ai_family   = AF_INET,
234 		.ai_socktype = SOCK_STREAM
235 	};
236 	char *service;
237 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
238 	int n;
239 	int sockfd = -1, connfd;
240 	struct pingpong_dest *rem_dest = NULL;
241 	char gid[33];
242 
243 	if (asprintf(&service, "%d", port) < 0)
244 		return NULL;
245 
246 	n = getaddrinfo(NULL, service, &hints, &res);
247 
248 	if (n < 0) {
249 		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
250 		free(service);
251 		return NULL;
252 	}
253 
254 	for (t = res; t; t = t->ai_next) {
255 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
256 		if (sockfd >= 0) {
257 			n = 1;
258 
259 			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
260 
261 			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
262 				break;
263 			close(sockfd);
264 			sockfd = -1;
265 		}
266 	}
267 
268 	freeaddrinfo_null(res);
269 	free(service);
270 
271 	if (sockfd < 0) {
272 		fprintf(stderr, "Couldn't listen to port %d\n", port);
273 		return NULL;
274 	}
275 
276 	listen(sockfd, 1);
277 	connfd = accept(sockfd, NULL, NULL);
278 	close(sockfd);
279 	if (connfd < 0) {
280 		fprintf(stderr, "accept() failed\n");
281 		return NULL;
282 	}
283 
284 	n = read(connfd, msg, sizeof msg);
285 	if (n != sizeof msg) {
286 		perror("server read");
287 		fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
288 		goto out;
289 	}
290 
291 	rem_dest = malloc(sizeof *rem_dest);
292 	if (!rem_dest)
293 		goto out;
294 
295 	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
296 							&rem_dest->psn, gid);
297 	wire_gid_to_gid(gid, &rem_dest->gid);
298 
299 	if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest,
300 								sgid_idx)) {
301 		fprintf(stderr, "Couldn't connect to remote QP\n");
302 		free(rem_dest);
303 		rem_dest = NULL;
304 		goto out;
305 	}
306 
307 
308 	gid_to_wire_gid(&my_dest->gid, gid);
309 	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
310 							my_dest->psn, gid);
311 	if (write(connfd, msg, sizeof msg) != sizeof msg ||
312 	    read(connfd, msg, sizeof msg) != sizeof "done") {
313 		fprintf(stderr, "Couldn't send/recv local address\n");
314 		free(rem_dest);
315 		rem_dest = NULL;
316 		goto out;
317 	}
318 
319 
320 out:
321 	close(connfd);
322 	return rem_dest;
323 }
324 
325 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
326 					    int rx_depth, int port,
327 					    int use_event)
328 {
329 	struct pingpong_context *ctx;
330 	int access_flags = IBV_ACCESS_LOCAL_WRITE;
331 
332 	ctx = calloc(1, sizeof *ctx);
333 	if (!ctx)
334 		return NULL;
335 
336 	ctx->size       = size;
337 	ctx->send_flags = IBV_SEND_SIGNALED;
338 	ctx->rx_depth   = rx_depth;
339 
340 	ctx->buf = memalign(page_size, size);
341 	if (!ctx->buf) {
342 		fprintf(stderr, "Couldn't allocate work buf.\n");
343 		goto clean_ctx;
344 	}
345 
346 	/* FIXME memset(ctx->buf, 0, size); */
347 	memset(ctx->buf, 0x7b, size);
348 
349 	ctx->context = ibv_open_device(ib_dev);
350 	if (!ctx->context) {
351 		fprintf(stderr, "Couldn't get context for %s\n",
352 			ibv_get_device_name(ib_dev));
353 		goto clean_buffer;
354 	}
355 
356 	if (use_event) {
357 		ctx->channel = ibv_create_comp_channel(ctx->context);
358 		if (!ctx->channel) {
359 			fprintf(stderr, "Couldn't create completion channel\n");
360 			goto clean_device;
361 		}
362 	} else
363 		ctx->channel = NULL;
364 
365 	ctx->pd = ibv_alloc_pd(ctx->context);
366 	if (!ctx->pd) {
367 		fprintf(stderr, "Couldn't allocate PD\n");
368 		goto clean_comp_channel;
369 	}
370 
371 	if (use_odp || use_ts) {
372 		const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND |
373 					      IBV_ODP_SUPPORT_RECV;
374 		struct ibv_device_attr_ex attrx;
375 
376 		if (ibv_query_device_ex(ctx->context, NULL, &attrx)) {
377 			fprintf(stderr, "Couldn't query device for its features\n");
378 			goto clean_comp_channel;
379 		}
380 
381 		if (use_odp) {
382 			if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) ||
383 			    (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) {
384 				fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n");
385 				goto clean_comp_channel;
386 			}
387 			access_flags |= IBV_ACCESS_ON_DEMAND;
388 		}
389 
390 		if (use_ts) {
391 			if (!attrx.completion_timestamp_mask) {
392 				fprintf(stderr, "The device isn't completion timestamp capable\n");
393 				goto clean_comp_channel;
394 			}
395 			ctx->completion_timestamp_mask = attrx.completion_timestamp_mask;
396 		}
397 	}
398 	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags);
399 
400 	if (!ctx->mr) {
401 		fprintf(stderr, "Couldn't register MR\n");
402 		goto clean_pd;
403 	}
404 
405 	if (use_ts) {
406 		struct ibv_cq_init_attr_ex attr_ex = {
407 			.cqe = rx_depth + 1,
408 			.cq_context = NULL,
409 			.channel = ctx->channel,
410 			.comp_vector = 0,
411 			.wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
412 		};
413 
414 		ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex);
415 	} else {
416 		ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
417 					     ctx->channel, 0);
418 	}
419 
420 	if (!pp_cq(ctx)) {
421 		fprintf(stderr, "Couldn't create CQ\n");
422 		goto clean_mr;
423 	}
424 
425 	{
426 		struct ibv_qp_attr attr;
427 		struct ibv_qp_init_attr init_attr = {
428 			.send_cq = pp_cq(ctx),
429 			.recv_cq = pp_cq(ctx),
430 			.cap     = {
431 				.max_send_wr  = 1,
432 				.max_recv_wr  = rx_depth,
433 				.max_send_sge = 1,
434 				.max_recv_sge = 1
435 			},
436 			.qp_type = IBV_QPT_RC
437 		};
438 
439 		ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
440 		if (!ctx->qp)  {
441 			fprintf(stderr, "Couldn't create QP\n");
442 			goto clean_cq;
443 		}
444 
445 		ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr);
446 		if (init_attr.cap.max_inline_data >= size) {
447 			ctx->send_flags |= IBV_SEND_INLINE;
448 		}
449 	}
450 
451 	{
452 		struct ibv_qp_attr attr = {
453 			.qp_state        = IBV_QPS_INIT,
454 			.pkey_index      = 0,
455 			.port_num        = port,
456 			.qp_access_flags = 0
457 		};
458 
459 		if (ibv_modify_qp(ctx->qp, &attr,
460 				  IBV_QP_STATE              |
461 				  IBV_QP_PKEY_INDEX         |
462 				  IBV_QP_PORT               |
463 				  IBV_QP_ACCESS_FLAGS)) {
464 			fprintf(stderr, "Failed to modify QP to INIT\n");
465 			goto clean_qp;
466 		}
467 	}
468 
469 	return ctx;
470 
471 clean_qp:
472 	ibv_destroy_qp(ctx->qp);
473 
474 clean_cq:
475 	ibv_destroy_cq(pp_cq(ctx));
476 
477 clean_mr:
478 	ibv_dereg_mr(ctx->mr);
479 
480 clean_pd:
481 	ibv_dealloc_pd(ctx->pd);
482 
483 clean_comp_channel:
484 	if (ctx->channel)
485 		ibv_destroy_comp_channel(ctx->channel);
486 
487 clean_device:
488 	ibv_close_device(ctx->context);
489 
490 clean_buffer:
491 	free(ctx->buf);
492 
493 clean_ctx:
494 	free(ctx);
495 
496 	return NULL;
497 }
498 
499 static int pp_close_ctx(struct pingpong_context *ctx)
500 {
501 	if (ibv_destroy_qp(ctx->qp)) {
502 		fprintf(stderr, "Couldn't destroy QP\n");
503 		return 1;
504 	}
505 
506 	if (ibv_destroy_cq(pp_cq(ctx))) {
507 		fprintf(stderr, "Couldn't destroy CQ\n");
508 		return 1;
509 	}
510 
511 	if (ibv_dereg_mr(ctx->mr)) {
512 		fprintf(stderr, "Couldn't deregister MR\n");
513 		return 1;
514 	}
515 
516 	if (ibv_dealloc_pd(ctx->pd)) {
517 		fprintf(stderr, "Couldn't deallocate PD\n");
518 		return 1;
519 	}
520 
521 	if (ctx->channel) {
522 		if (ibv_destroy_comp_channel(ctx->channel)) {
523 			fprintf(stderr, "Couldn't destroy completion channel\n");
524 			return 1;
525 		}
526 	}
527 
528 	if (ibv_close_device(ctx->context)) {
529 		fprintf(stderr, "Couldn't release context\n");
530 		return 1;
531 	}
532 
533 	free(ctx->buf);
534 	free(ctx);
535 
536 	return 0;
537 }
538 
539 static int pp_post_recv(struct pingpong_context *ctx, int n)
540 {
541 	struct ibv_sge list = {
542 		.addr	= (uintptr_t) ctx->buf,
543 		.length = ctx->size,
544 		.lkey	= ctx->mr->lkey
545 	};
546 	struct ibv_recv_wr wr = {
547 		.wr_id	    = PINGPONG_RECV_WRID,
548 		.sg_list    = &list,
549 		.num_sge    = 1,
550 	};
551 	struct ibv_recv_wr *bad_wr;
552 	int i;
553 
554 	for (i = 0; i < n; ++i)
555 		if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
556 			break;
557 
558 	return i;
559 }
560 
561 static int pp_post_send(struct pingpong_context *ctx)
562 {
563 	struct ibv_sge list = {
564 		.addr	= (uintptr_t) ctx->buf,
565 		.length = ctx->size,
566 		.lkey	= ctx->mr->lkey
567 	};
568 	struct ibv_send_wr wr = {
569 		.wr_id	    = PINGPONG_SEND_WRID,
570 		.sg_list    = &list,
571 		.num_sge    = 1,
572 		.opcode     = IBV_WR_SEND,
573 		.send_flags = ctx->send_flags,
574 	};
575 	struct ibv_send_wr *bad_wr;
576 
577 	return ibv_post_send(ctx->qp, &wr, &bad_wr);
578 }
579 
580 struct ts_params {
581 	uint64_t		 comp_recv_max_time_delta;
582 	uint64_t		 comp_recv_min_time_delta;
583 	uint64_t		 comp_recv_total_time_delta;
584 	uint64_t		 comp_recv_prev_time;
585 	int			 last_comp_with_ts;
586 	unsigned int		 comp_with_time_iters;
587 };
588 
589 static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt,
590 				  int *rcnt, int *routs, int iters,
591 				  uint64_t wr_id, enum ibv_wc_status status,
592 				  uint64_t completion_timestamp,
593 				  struct ts_params *ts)
594 {
595 	if (status != IBV_WC_SUCCESS) {
596 		fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
597 			ibv_wc_status_str(status),
598 			status, (int)wr_id);
599 		return 1;
600 	}
601 
602 	switch ((int)wr_id) {
603 	case PINGPONG_SEND_WRID:
604 		++(*scnt);
605 		break;
606 
607 	case PINGPONG_RECV_WRID:
608 		if (--(*routs) <= 1) {
609 			*routs += pp_post_recv(ctx, ctx->rx_depth - *routs);
610 			if (*routs < ctx->rx_depth) {
611 				fprintf(stderr,
612 					"Couldn't post receive (%d)\n",
613 					*routs);
614 				return 1;
615 			}
616 		}
617 
618 		++(*rcnt);
619 		if (use_ts) {
620 			if (ts->last_comp_with_ts) {
621 				uint64_t delta;
622 
623 				/* checking whether the clock was wrapped around */
624 				if (completion_timestamp >= ts->comp_recv_prev_time)
625 					delta = completion_timestamp - ts->comp_recv_prev_time;
626 				else
627 					delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time +
628 						completion_timestamp + 1;
629 
630 				ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta);
631 				ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta);
632 				ts->comp_recv_total_time_delta += delta;
633 				ts->comp_with_time_iters++;
634 			}
635 
636 			ts->comp_recv_prev_time = completion_timestamp;
637 			ts->last_comp_with_ts = 1;
638 		} else {
639 			ts->last_comp_with_ts = 0;
640 		}
641 
642 		break;
643 
644 	default:
645 		fprintf(stderr, "Completion for unknown wr_id %d\n",
646 			(int)wr_id);
647 		return 1;
648 	}
649 
650 	ctx->pending &= ~(int)wr_id;
651 	if (*scnt < iters && !ctx->pending) {
652 		if (pp_post_send(ctx)) {
653 			fprintf(stderr, "Couldn't post send\n");
654 			return 1;
655 		}
656 		ctx->pending = PINGPONG_RECV_WRID |
657 			PINGPONG_SEND_WRID;
658 	}
659 
660 	return 0;
661 }
662 
663 static void usage(const char *argv0)
664 {
665 	printf("Usage:\n");
666 	printf("  %s            start a server and wait for connection\n", argv0);
667 	printf("  %s <host>     connect to server at <host>\n", argv0);
668 	printf("\n");
669 	printf("Options:\n");
670 	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
671 	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
672 	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
673 	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
674 	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
675 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
676 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
677 	printf("  -l, --sl=<sl>          service level value\n");
678 	printf("  -e, --events           sleep on CQ events (default poll)\n");
679 	printf("  -g, --gid-idx=<gid index> local port gid index\n");
680 	printf("  -o, --odp		    use on demand paging\n");
681 	printf("  -t, --ts	            get CQE with timestamp\n");
682 }
683 
684 int main(int argc, char *argv[])
685 {
686 	struct ibv_device      **dev_list;
687 	struct ibv_device	*ib_dev;
688 	struct pingpong_context *ctx;
689 	struct pingpong_dest     my_dest;
690 	struct pingpong_dest    *rem_dest;
691 	struct timeval           start, end;
692 	char                    *ib_devname = NULL;
693 	char                    *servername = NULL;
694 	unsigned int             port = 18515;
695 	int                      ib_port = 1;
696 	unsigned int             size = 4096;
697 	enum ibv_mtu		 mtu = IBV_MTU_1024;
698 	unsigned int             rx_depth = 500;
699 	unsigned int             iters = 1000;
700 	int                      use_event = 0;
701 	int                      routs;
702 	int                      rcnt, scnt;
703 	int                      num_cq_events = 0;
704 	int                      sl = 0;
705 	int			 gidx = -1;
706 	char			 gid[33];
707 	struct ts_params	 ts;
708 
709 	srand48(getpid() * time(NULL));
710 
711 	while (1) {
712 		int c;
713 
714 		static struct option long_options[] = {
715 			{ .name = "port",     .has_arg = 1, .val = 'p' },
716 			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
717 			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
718 			{ .name = "size",     .has_arg = 1, .val = 's' },
719 			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
720 			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
721 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
722 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
723 			{ .name = "events",   .has_arg = 0, .val = 'e' },
724 			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
725 			{ .name = "odp",      .has_arg = 0, .val = 'o' },
726 			{ .name = "ts",       .has_arg = 0, .val = 't' },
727 			{}
728 		};
729 
730 		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot",
731 				long_options, NULL);
732 
733 		if (c == -1)
734 			break;
735 
736 		switch (c) {
737 		case 'p':
738 			port = strtoul(optarg, NULL, 0);
739 			if (port > 65535) {
740 				usage(argv[0]);
741 				return 1;
742 			}
743 			break;
744 
745 		case 'd':
746 			ib_devname = strdupa(optarg);
747 			break;
748 
749 		case 'i':
750 			ib_port = strtol(optarg, NULL, 0);
751 			if (ib_port < 1) {
752 				usage(argv[0]);
753 				return 1;
754 			}
755 			break;
756 
757 		case 's':
758 			size = strtoul(optarg, NULL, 0);
759 			break;
760 
761 		case 'm':
762 			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
763 			if (mtu == 0) {
764 				usage(argv[0]);
765 				return 1;
766 			}
767 			break;
768 
769 		case 'r':
770 			rx_depth = strtoul(optarg, NULL, 0);
771 			break;
772 
773 		case 'n':
774 			iters = strtoul(optarg, NULL, 0);
775 			break;
776 
777 		case 'l':
778 			sl = strtol(optarg, NULL, 0);
779 			break;
780 
781 		case 'e':
782 			++use_event;
783 			break;
784 
785 		case 'g':
786 			gidx = strtol(optarg, NULL, 0);
787 			break;
788 
789 		case 'o':
790 			use_odp = 1;
791 			break;
792 		case 't':
793 			use_ts = 1;
794 			break;
795 
796 		default:
797 			usage(argv[0]);
798 			return 1;
799 		}
800 	}
801 
802 	if (optind == argc - 1)
803 		servername = strdupa(argv[optind]);
804 	else if (optind < argc) {
805 		usage(argv[0]);
806 		return 1;
807 	}
808 
809 	if (use_ts) {
810 		ts.comp_recv_max_time_delta = 0;
811 		ts.comp_recv_min_time_delta = 0xffffffff;
812 		ts.comp_recv_total_time_delta = 0;
813 		ts.comp_recv_prev_time = 0;
814 		ts.last_comp_with_ts = 0;
815 		ts.comp_with_time_iters = 0;
816 	}
817 
818 	page_size = sysconf(_SC_PAGESIZE);
819 
820 	dev_list = ibv_get_device_list(NULL);
821 	if (!dev_list) {
822 		perror("Failed to get IB devices list");
823 		return 1;
824 	}
825 
826 	if (!ib_devname) {
827 		ib_dev = *dev_list;
828 		if (!ib_dev) {
829 			fprintf(stderr, "No IB devices found\n");
830 			return 1;
831 		}
832 	} else {
833 		int i;
834 		for (i = 0; dev_list[i]; ++i)
835 			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
836 				break;
837 		ib_dev = dev_list[i];
838 		if (!ib_dev) {
839 			fprintf(stderr, "IB device %s not found\n", ib_devname);
840 			return 1;
841 		}
842 	}
843 
844 	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
845 	if (!ctx)
846 		return 1;
847 
848 	routs = pp_post_recv(ctx, ctx->rx_depth);
849 	if (routs < ctx->rx_depth) {
850 		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
851 		return 1;
852 	}
853 
854 	if (use_event)
855 		if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
856 			fprintf(stderr, "Couldn't request CQ notification\n");
857 			return 1;
858 		}
859 
860 
861 	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
862 		fprintf(stderr, "Couldn't get port info\n");
863 		return 1;
864 	}
865 
866 	my_dest.lid = ctx->portinfo.lid;
867 	if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET &&
868 							!my_dest.lid) {
869 		fprintf(stderr, "Couldn't get local LID\n");
870 		return 1;
871 	}
872 
873 	if (gidx >= 0) {
874 		if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) {
875 			fprintf(stderr, "can't read sgid of index %d\n", gidx);
876 			return 1;
877 		}
878 	} else
879 		memset(&my_dest.gid, 0, sizeof my_dest.gid);
880 
881 	my_dest.qpn = ctx->qp->qp_num;
882 	my_dest.psn = lrand48() & 0xffffff;
883 	inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid);
884 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
885 	       my_dest.lid, my_dest.qpn, my_dest.psn, gid);
886 
887 
888 	if (servername)
889 		rem_dest = pp_client_exch_dest(servername, port, &my_dest);
890 	else
891 		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
892 								&my_dest, gidx);
893 
894 	if (!rem_dest)
895 		return 1;
896 
897 	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
898 	printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
899 	       rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid);
900 
901 	if (servername)
902 		if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest,
903 					gidx))
904 			return 1;
905 
906 	ctx->pending = PINGPONG_RECV_WRID;
907 
908 	if (servername) {
909 		if (pp_post_send(ctx)) {
910 			fprintf(stderr, "Couldn't post send\n");
911 			return 1;
912 		}
913 		ctx->pending |= PINGPONG_SEND_WRID;
914 	}
915 
916 	if (gettimeofday(&start, NULL)) {
917 		perror("gettimeofday");
918 		return 1;
919 	}
920 
921 	rcnt = scnt = 0;
922 	while (rcnt < iters || scnt < iters) {
923 		int ret;
924 
925 		if (use_event) {
926 			struct ibv_cq *ev_cq;
927 			void          *ev_ctx;
928 
929 			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
930 				fprintf(stderr, "Failed to get cq_event\n");
931 				return 1;
932 			}
933 
934 			++num_cq_events;
935 
936 			if (ev_cq != pp_cq(ctx)) {
937 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
938 				return 1;
939 			}
940 
941 			if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
942 				fprintf(stderr, "Couldn't request CQ notification\n");
943 				return 1;
944 			}
945 		}
946 
947 		if (use_ts) {
948 			struct ibv_poll_cq_attr attr = {};
949 
950 			do {
951 				ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr);
952 			} while (!use_event && ret == ENOENT);
953 
954 			if (ret) {
955 				fprintf(stderr, "poll CQ failed %d\n", ret);
956 				return ret;
957 			}
958 			ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
959 					      iters,
960 					      ctx->cq_s.cq_ex->wr_id,
961 					      ctx->cq_s.cq_ex->status,
962 					      ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
963 					      &ts);
964 			if (ret) {
965 				ibv_end_poll(ctx->cq_s.cq_ex);
966 				return ret;
967 			}
968 			ret = ibv_next_poll(ctx->cq_s.cq_ex);
969 			if (!ret)
970 				ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
971 						      iters,
972 						      ctx->cq_s.cq_ex->wr_id,
973 						      ctx->cq_s.cq_ex->status,
974 						      ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
975 						      &ts);
976 			ibv_end_poll(ctx->cq_s.cq_ex);
977 			if (ret && ret != ENOENT) {
978 				fprintf(stderr, "poll CQ failed %d\n", ret);
979 				return ret;
980 			}
981 		} else {
982 			int ne, i;
983 			struct ibv_wc wc[2];
984 
985 			do {
986 				ne = ibv_poll_cq(pp_cq(ctx), 2, wc);
987 				if (ne < 0) {
988 					fprintf(stderr, "poll CQ failed %d\n", ne);
989 					return 1;
990 				}
991 			} while (!use_event && ne < 1);
992 
993 			for (i = 0; i < ne; ++i) {
994 				ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
995 						      iters,
996 						      wc[i].wr_id,
997 						      wc[i].status,
998 						      0, &ts);
999 				if (ret) {
1000 					fprintf(stderr, "parse WC failed %d\n", ne);
1001 					return 1;
1002 				}
1003 			}
1004 		}
1005 	}
1006 
1007 	if (gettimeofday(&end, NULL)) {
1008 		perror("gettimeofday");
1009 		return 1;
1010 	}
1011 
1012 	{
1013 		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
1014 			(end.tv_usec - start.tv_usec);
1015 		long long bytes = (long long) size * iters * 2;
1016 
1017 		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
1018 		       bytes, usec / 1000000., bytes * 8. / usec);
1019 		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
1020 		       iters, usec / 1000000., usec / iters);
1021 
1022 		if (use_ts && ts.comp_with_time_iters) {
1023 			printf("Max receive completion clock cycles = %" PRIu64 "\n",
1024 			       ts.comp_recv_max_time_delta);
1025 			printf("Min receive completion clock cycles = %" PRIu64 "\n",
1026 			       ts.comp_recv_min_time_delta);
1027 			printf("Average receive completion clock cycles = %f\n",
1028 			       (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters);
1029 		}
1030 	}
1031 
1032 	ibv_ack_cq_events(pp_cq(ctx), num_cq_events);
1033 
1034 	if (pp_close_ctx(ctx))
1035 		return 1;
1036 
1037 	ibv_free_device_list(dev_list);
1038 	free(rem_dest);
1039 
1040 	return 0;
1041 }
1042