1 /*
2  * Copyright (c) 2015-2017 Los Alamos National Security, LLC.
3  *                         All rights reserved.
4  * Copyright (c) 2015-2018 Cray Inc. All rights reserved.
5  * Copyright (c) 2019 Triad National Security, LLC. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <errno.h>
39 #include <getopt.h>
40 #include <poll.h>
41 #include <time.h>
42 #include <string.h>
43 #include <pthread.h>
44 
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <inttypes.h>
49 
50 #include "gnix_vc.h"
51 #include "gnix_cm_nic.h"
52 #include "gnix_hashtable.h"
53 #include "gnix_rma.h"
54 #include "gnix_util.h"
55 
56 #include <criterion/criterion.h>
57 #include "gnix_rdma_headers.h"
58 #include "common.h"
59 
60 #if 1
61 #define dbg_printf(...)
62 #else
63 #define dbg_printf(...)				\
64 	do {					\
65 		printf(__VA_ARGS__);		\
66 		fflush(stdout);			\
67 	} while (0)
68 #endif
69 
70 /* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */
71 static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY;
72 static struct fid_fabric *fab;
73 static struct fid_domain *dom[2];
74 struct fi_gni_ops_domain *gni_domain_ops[2];
75 static struct fid_ep *ep[2];
76 static struct fid_av *av[2];
77 static struct fi_info *hints;
78 static struct fi_info *fi;
79 void *ep_name[2];
80 size_t gni_addr[2];
81 static struct fid_cq *send_cq[2];
82 static struct fid_cq *recv_cq[2];
83 static struct fi_cq_attr cq_attr[2];
84 static struct fid_stx *stx_ctx[2];
85 static struct fid_stx *stx_ctx_too_late;
86 
87 #define BUF_SZ (64*1024)
88 char *target, *target_base;
89 char *source, *source_base;
90 char *uc_source;
91 struct fid_mr *rem_mr[2], *loc_mr[2];
92 uint64_t mr_key[2];
93 
94 static struct fid_cntr *write_cntr[2], *read_cntr[2];
95 static struct fid_cntr *rwrite_cntr;
96 static struct fid_cntr *rread_cntr;
97 static struct fi_cntr_attr cntr_attr = {
98 	.events = FI_CNTR_EVENTS_COMP,
99 	.flags = 0
100 };
101 static uint64_t writes[2] = {0}, reads[2] = {0}, write_errs[2] = {0},
102 	read_errs[2] = {0};
103 #define MLOOPS 1000
104 static int dgm_fail;
105 
common_setup_stx(uint32_t version,int mr_mode)106 static void common_setup_stx(uint32_t version, int mr_mode)
107 {
108 	int ret = 0;
109 	struct fi_av_attr attr;
110 	size_t addrlen = 0;
111 	int requested_key[2][2] = {{0, 0}, {0, 0} };
112 	int i, j;
113 	dgm_fail = 0;
114 
115 	hints->domain_attr->mr_mode = mr_mode;
116 	hints->domain_attr->cq_data_size = 4;
117 	hints->ep_attr->tx_ctx_cnt = FI_SHARED_CONTEXT;
118 	hints->mode = mode_bits;
119 	hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ |
120 		       FI_WRITE | FI_REMOTE_WRITE;
121 
122 	hints->fabric_attr->prov_name = strdup("gni");
123 
124 	ret = fi_getinfo(version, NULL, 0, 0, hints, &fi);
125 	cr_assert(!ret, "fi_getinfo");
126 
127 	ret = fi_fabric(fi->fabric_attr, &fab, NULL);
128 	cr_assert(!ret, "fi_fabric");
129 
130 	ret = fi_domain(fab, fi, dom, NULL);
131 	cr_assert(!ret, "fi_domain");
132 
133 	ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1,
134 			  0, (void **) gni_domain_ops, NULL);
135 
136 	memset(&attr, 0, sizeof(attr));
137 	attr.type = FI_AV_MAP;
138 	attr.count = 2;
139 
140 	ret = fi_av_open(dom[0], &attr, av, NULL);
141 	cr_assert(!ret, "fi_av_open");
142 
143 	ret = fi_endpoint(dom[0], fi, &ep[0], NULL);
144 	cr_assert(!ret, "fi_endpoint");
145 
146 	cq_attr[0].format = FI_CQ_FORMAT_TAGGED;
147 	cq_attr[0].size = 1024;
148 	cq_attr[0].wait_obj = 0;
149 
150 	ret = fi_cq_open(dom[0], cq_attr, send_cq, 0);
151 	cr_assert(!ret, "fi_cq_open");
152 
153 	ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0);
154 	cr_assert(!ret, "fi_cq_open");
155 
156 	ret = fi_stx_context(dom[0], NULL, &stx_ctx[0], 0);
157 	cr_assert(!ret, "fi_stx_context");
158 
159 	ret = fi_stx_context(dom[0], NULL, &stx_ctx_too_late, 0);
160 	cr_assert(!ret, "fi_stx_context");
161 
162 	ret = fi_domain(fab, fi, dom + 1, NULL);
163 	cr_assert(!ret, "fi_domain");
164 
165 	ret = fi_open_ops(&dom[1]->fid, FI_GNI_DOMAIN_OPS_1,
166 			  0, (void **) gni_domain_ops + 1, NULL);
167 
168 	ret = fi_av_open(dom[1], &attr, av + 1, NULL);
169 	cr_assert(!ret, "fi_av_open");
170 
171 	ret = fi_endpoint(dom[1], fi, &ep[1], NULL);
172 	cr_assert(!ret, "fi_endpoint");
173 
174 	ret = fi_stx_context(dom[1], NULL, &stx_ctx[1], 0);
175 	cr_assert(!ret, "fi_stx_context");
176 
177 	cq_attr[1].format = FI_CQ_FORMAT_TAGGED;
178 	cq_attr[1].size = 1024;
179 	cq_attr[1].wait_obj = 0;
180 
181 	ret = fi_cq_open(dom[1], cq_attr + 1, send_cq + 1, 0);
182 	cr_assert(!ret, "fi_cq_open");
183 
184 	ret = fi_cq_open(dom[1], cq_attr + 1, recv_cq + 1, 0);
185 	cr_assert(!ret, "fi_cq_open");
186 
187 	/*
188 	 * imitate shmem, etc. use FI_WRITE for bind
189 	 * flag
190 	 */
191 	ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT);
192 	cr_assert(!ret, "fi_ep_bind");
193 
194 	ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV);
195 	cr_assert(!ret, "fi_ep_bind");
196 
197 	ret = fi_ep_bind(ep[0], &stx_ctx[0]->fid, 0);
198 	cr_assert(!ret, "fi_ep_bind stx");
199 
200 	/*
201 	 * this shouldn't work, wrong domain
202 	 */
203 
204 	ret = fi_ep_bind(ep[0], &stx_ctx[1]->fid, 0);
205 	cr_assert_eq(ret, -FI_EINVAL);
206 
207 	ret = fi_getname(&ep[0]->fid, NULL, &addrlen);
208 	cr_assert(addrlen > 0);
209 
210 	ep_name[0] = malloc(addrlen);
211 	cr_assert(ep_name[0] != NULL);
212 
213 	ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen);
214 	cr_assert(ret == FI_SUCCESS);
215 
216 	ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT);
217 	cr_assert(!ret, "fi_ep_bind");
218 
219 	ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV);
220 	cr_assert(!ret, "fi_ep_bind");
221 
222 	ret = fi_ep_bind(ep[1], &stx_ctx[1]->fid, 0);
223 	cr_assert(!ret, "fi_ep_bind stx");
224 
225 	ret = fi_getname(&ep[1]->fid, NULL, &addrlen);
226 	cr_assert(addrlen > 0);
227 
228 	ep_name[1] = malloc(addrlen);
229 	cr_assert(ep_name[1] != NULL);
230 
231 	ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen);
232 	cr_assert(ret == FI_SUCCESS);
233 
234 	ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0,
235 			   NULL);
236 	cr_assert(ret == 1);
237 	ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0,
238 			   NULL);
239 	cr_assert(ret == 1);
240 
241 	ret = fi_av_insert(av[1], ep_name[0], 1, &gni_addr[0], 0,
242 			   NULL);
243 	cr_assert(ret == 1);
244 	ret = fi_av_insert(av[1], ep_name[1], 1, &gni_addr[1], 0,
245 			   NULL);
246 	cr_assert(ret == 1);
247 
248 	ret = fi_ep_bind(ep[0], &av[0]->fid, 0);
249 	cr_assert(!ret, "fi_ep_bind");
250 
251 	ret = fi_ep_bind(ep[1], &av[1]->fid, 0);
252 	cr_assert(!ret, "fi_ep_bind");
253 
254 	target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
255 	assert(target_base);
256 	target = GNIT_ALIGN_BUFFER(char *, target_base);
257 
258 	source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
259 	assert(source_base);
260 	source = GNIT_ALIGN_BUFFER(char *, source_base);
261 
262 	if (USING_SCALABLE(fi)) {
263 		for (i = 0; i < 2; i++)
264 			for (j = 0; j < 2; j++)
265 				requested_key[i][j] = (i * 2) + j;
266 	}
267 
268 	ret = fi_mr_reg(dom[0],
269 			  target,
270 			  BUF_SZ,
271 			  FI_REMOTE_WRITE,
272 			  0,
273 			  requested_key[0][0],
274 			  0,
275 			  &rem_mr[0],
276 			  &target);
277 	cr_assert_eq(ret, 0);
278 	ret = fi_mr_reg(dom[1],
279 			  target,
280 			  BUF_SZ,
281 			  FI_REMOTE_WRITE,
282 			  0,
283 			  requested_key[1][0],
284 			  0,
285 			  &rem_mr[1],
286 			  &target);
287 	cr_assert_eq(ret, 0);
288 
289 	ret = fi_mr_reg(dom[0],
290 			  source,
291 			  BUF_SZ,
292 			  FI_REMOTE_WRITE,
293 			  0,
294 			  requested_key[0][1],
295 			  0,
296 			  &loc_mr[0],
297 			  &source);
298 	cr_assert_eq(ret, 0);
299 	ret = fi_mr_reg(dom[1],
300 			  source,
301 			  BUF_SZ,
302 			  FI_REMOTE_WRITE,
303 			  0,
304 			  requested_key[1][1],
305 			  0,
306 			  &loc_mr[1],
307 			  &source);
308 	cr_assert_eq(ret, 0);
309 
310 	if (USING_SCALABLE(fi)) {
311 		for (i = 0; i < 2; i++) {
312 			MR_ENABLE(rem_mr[i], target, BUF_SZ);
313 			MR_ENABLE(loc_mr[i], source, BUF_SZ);
314 		}
315 	}
316 
317 	uc_source = malloc(BUF_SZ);
318 	assert(uc_source);
319 
320 	mr_key[0] = fi_mr_key(rem_mr[0]);
321 	mr_key[1] = fi_mr_key(rem_mr[1]);
322 
323 	ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0);
324 	cr_assert(!ret, "fi_cntr_open");
325 
326 	ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_WRITE);
327 	cr_assert(!ret, "fi_ep_bind");
328 
329 	ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0);
330 	cr_assert(!ret, "fi_cntr_open");
331 
332 	ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_READ);
333 	cr_assert(!ret, "fi_ep_bind");
334 
335 	ret = fi_cntr_open(dom[1], &cntr_attr, write_cntr + 1, 0);
336 	cr_assert(!ret, "fi_cntr_open");
337 
338 	ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_WRITE);
339 	cr_assert(!ret, "fi_ep_bind");
340 
341 	ret = fi_cntr_open(dom[1], &cntr_attr, read_cntr + 1, 0);
342 	cr_assert(!ret, "fi_cntr_open");
343 
344 	ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_READ);
345 	cr_assert(!ret, "fi_ep_bind");
346 
347 	if (hints->caps & FI_RMA_EVENT) {
348 		ret = fi_cntr_open(dom[1], &cntr_attr, &rwrite_cntr, 0);
349 		cr_assert(!ret, "fi_cntr_open");
350 
351 		ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE);
352 		cr_assert(!ret, "fi_ep_bind");
353 
354 		ret = fi_cntr_open(dom[1], &cntr_attr, &rread_cntr, 0);
355 		cr_assert(!ret, "fi_cntr_open");
356 
357 		ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ);
358 		cr_assert(!ret, "fi_ep_bind");
359 	}
360 
361 	ret = fi_enable(ep[0]);
362 	cr_assert(!ret, "fi_ep_enable");
363 
364 	/*
365 	 * this should not work - don't allow binding of STX
366 	 * after the EP is enabled
367 	 */
368 	ret = fi_ep_bind(ep[0], &stx_ctx_too_late->fid, 0);
369 	cr_assert_eq(ret, -FI_EOPBADSTATE, "fi_ep_bind stx");
370 
371 	ret = fi_close(&stx_ctx_too_late->fid);
372 	cr_assert(!ret, "failure in closing stx_ctx_too_late");
373 
374 	ret = fi_enable(ep[1]);
375 	cr_assert(!ret, "fi_ep_enable");
376 
377 }
378 
common_setup_stx_1dom(uint32_t version,int mr_mode)379 static void common_setup_stx_1dom(uint32_t version, int mr_mode)
380 {
381 	int ret = 0;
382 	struct fi_av_attr attr;
383 	size_t addrlen = 0;
384 
385 	dgm_fail = 0;
386 
387 	hints->domain_attr->mr_mode = mr_mode;
388 	hints->domain_attr->cq_data_size = 4;
389 	hints->ep_attr->tx_ctx_cnt = FI_SHARED_CONTEXT;
390 	hints->mode = mode_bits;
391 	hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ |
392 		       FI_WRITE | FI_REMOTE_WRITE;
393 
394 	hints->fabric_attr->prov_name = strdup("gni");
395 
396 	ret = fi_getinfo(version, NULL, 0, 0, hints, &fi);
397 	cr_assert(!ret, "fi_getinfo");
398 
399 	ret = fi_fabric(fi->fabric_attr, &fab, NULL);
400 	cr_assert(!ret, "fi_fabric");
401 
402 	ret = fi_domain(fab, fi, dom, NULL);
403 	cr_assert(!ret, "fi_domain");
404 
405 	ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1,
406 			  0, (void **) gni_domain_ops, NULL);
407 
408 	memset(&attr, 0, sizeof(attr));
409 	attr.type = FI_AV_MAP;
410 	attr.count = 2;
411 
412 	ret = fi_av_open(dom[0], &attr, av, NULL);
413 	cr_assert(!ret, "fi_av_open");
414 
415 	ret = fi_endpoint(dom[0], fi, &ep[0], NULL);
416 	cr_assert(!ret, "fi_endpoint");
417 
418 	cq_attr[0].format = FI_CQ_FORMAT_TAGGED;
419 	cq_attr[0].size = 1024;
420 	cq_attr[0].wait_obj = 0;
421 
422 	ret = fi_cq_open(dom[0], cq_attr, send_cq, 0);
423 	cr_assert(!ret, "fi_cq_open");
424 
425 	ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0);
426 	cr_assert(!ret, "fi_cq_open");
427 
428 	ret = fi_stx_context(dom[0], NULL, &stx_ctx[0], 0);
429 	cr_assert(!ret, "fi_stx_context");
430 
431 	ret = fi_stx_context(dom[0], NULL, &stx_ctx_too_late, 0);
432 	cr_assert(!ret, "fi_stx_context");
433 
434 	ret = fi_endpoint(dom[0], fi, &ep[1], NULL);
435 	cr_assert(!ret, "fi_endpoint");
436 
437 	cq_attr[1].format = FI_CQ_FORMAT_TAGGED;
438 	cq_attr[1].size = 1024;
439 	cq_attr[1].wait_obj = 0;
440 
441 	ret = fi_cq_open(dom[0], cq_attr + 1, send_cq + 1, 0);
442 	cr_assert(!ret, "fi_cq_open");
443 
444 	ret = fi_cq_open(dom[0], cq_attr + 1, recv_cq + 1, 0);
445 	cr_assert(!ret, "fi_cq_open");
446 
447 	/*
448 	 * imitate shmem, etc. use FI_WRITE for bind
449 	 * flag
450 	 */
451 	ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT);
452 	cr_assert(!ret, "fi_ep_bind");
453 
454 	ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV);
455 	cr_assert(!ret, "fi_ep_bind");
456 
457 	ret = fi_ep_bind(ep[0], &stx_ctx[0]->fid, 0);
458 	cr_assert(!ret, "fi_ep_bind stx");
459 
460 	ret = fi_getname(&ep[0]->fid, NULL, &addrlen);
461 	cr_assert(addrlen > 0);
462 
463 	ep_name[0] = malloc(addrlen);
464 	cr_assert(ep_name[0] != NULL);
465 
466 	ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen);
467 	cr_assert(ret == FI_SUCCESS);
468 
469 	ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT);
470 	cr_assert(!ret, "fi_ep_bind");
471 
472 	ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV);
473 	cr_assert(!ret, "fi_ep_bind");
474 
475 	ret = fi_ep_bind(ep[1], &stx_ctx[0]->fid, 0);
476 	cr_assert(!ret, "fi_ep_bind stx");
477 
478 	ret = fi_getname(&ep[1]->fid, NULL, &addrlen);
479 	cr_assert(addrlen > 0);
480 
481 	ep_name[1] = malloc(addrlen);
482 	cr_assert(ep_name[1] != NULL);
483 
484 	ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen);
485 	cr_assert(ret == FI_SUCCESS);
486 
487 	ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0,
488 			   NULL);
489 	cr_assert(ret == 1);
490 	ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0,
491 			   NULL);
492 	cr_assert(ret == 1);
493 
494 	ret = fi_ep_bind(ep[0], &av[0]->fid, 0);
495 	cr_assert(!ret, "fi_ep_bind");
496 
497 	ret = fi_ep_bind(ep[1], &av[0]->fid, 0);
498 	cr_assert(!ret, "fi_ep_bind");
499 
500 	target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
501 	assert(target_base);
502 	target = GNIT_ALIGN_BUFFER(char *, target_base);
503 
504 	source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
505 	assert(source_base);
506 	source = GNIT_ALIGN_BUFFER(char *, source_base);
507 
508 	ret = fi_mr_reg(dom[0],
509 			  target,
510 			  BUF_SZ,
511 			  FI_REMOTE_WRITE,
512 			  0,
513 			  (USING_SCALABLE(fi) ? 1 : 0),
514 			  0,
515 			  &rem_mr[0],
516 			  &target);
517 	cr_assert_eq(ret, 0);
518 
519 	ret = fi_mr_reg(dom[0],
520 			  source,
521 			  BUF_SZ,
522 			  FI_REMOTE_WRITE,
523 			  0,
524 			  (USING_SCALABLE(fi) ? 2 : 0),
525 			  0,
526 			  &loc_mr[0],
527 			  &source);
528 	cr_assert_eq(ret, 0);
529 
530 	if (USING_SCALABLE(fi)) {
531 		MR_ENABLE(rem_mr[0], target, BUF_SZ);
532 		MR_ENABLE(loc_mr[0], source, BUF_SZ);
533 	}
534 
535 	uc_source = malloc(BUF_SZ);
536 	assert(uc_source);
537 
538 	mr_key[0] = fi_mr_key(rem_mr[0]);
539 	mr_key[1] = mr_key[0];
540 
541 	ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0);
542 	cr_assert(!ret, "fi_cntr_open");
543 
544 	ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_WRITE);
545 	cr_assert(!ret, "fi_ep_bind");
546 
547 	ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0);
548 	cr_assert(!ret, "fi_cntr_open");
549 
550 	ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_READ);
551 	cr_assert(!ret, "fi_ep_bind");
552 
553 	ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr + 1, 0);
554 	cr_assert(!ret, "fi_cntr_open");
555 
556 	ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_WRITE);
557 	cr_assert(!ret, "fi_ep_bind");
558 
559 	ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr + 1, 0);
560 	cr_assert(!ret, "fi_cntr_open");
561 
562 	ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_READ);
563 	cr_assert(!ret, "fi_ep_bind");
564 
565 	if (hints->caps & FI_RMA_EVENT) {
566 		ret = fi_cntr_open(dom[0], &cntr_attr, &rwrite_cntr, 0);
567 		cr_assert(!ret, "fi_cntr_open");
568 
569 		ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE);
570 		cr_assert(!ret, "fi_ep_bind");
571 
572 		ret = fi_cntr_open(dom[0], &cntr_attr, &rread_cntr, 0);
573 		cr_assert(!ret, "fi_cntr_open");
574 
575 		ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ);
576 		cr_assert(!ret, "fi_ep_bind");
577 	}
578 
579 	ret = fi_enable(ep[0]);
580 	cr_assert(!ret, "fi_ep_enable");
581 
582 	ret = fi_enable(ep[1]);
583 	cr_assert(!ret, "fi_ep_enable");
584 
585 }
586 
rdm_rma_basic_setup(void)587 static void rdm_rma_basic_setup(void)
588 {
589 	hints = fi_allocinfo();
590 	cr_assert(hints, "fi_allocinfo");
591 	hints->ep_attr->type = FI_EP_RDM;
592 	hints->caps = FI_RMA_EVENT;
593 	common_setup_stx(fi_version(), GNIX_MR_BASIC);
594 }
595 
dgram_basic_setup(void)596 static void dgram_basic_setup(void)
597 {
598 	hints = fi_allocinfo();
599 	cr_assert(hints, "fi_allocinfo");
600 	hints->ep_attr->type = FI_EP_DGRAM;
601 	hints->caps = FI_RMA_EVENT;
602 	common_setup_stx(fi_version(), GNIX_MR_BASIC);
603 }
604 
dgram_basic_setup_1dom(void)605 static void dgram_basic_setup_1dom(void)
606 {
607 	hints = fi_allocinfo();
608 	cr_assert(hints, "fi_allocinfo");
609 	hints->ep_attr->type = FI_EP_DGRAM;
610 	hints->caps = FI_RMA_EVENT;
611 	common_setup_stx_1dom(fi_version(), GNIX_MR_BASIC);
612 }
613 
rdm_rma_scalable_setup(void)614 static void rdm_rma_scalable_setup(void)
615 {
616 	hints = fi_allocinfo();
617 	cr_assert(hints, "fi_allocinfo");
618 	hints->ep_attr->type = FI_EP_RDM;
619 	hints->caps = FI_RMA_EVENT;
620 	common_setup_stx(fi_version(), GNIX_MR_SCALABLE);
621 }
622 
dgram_scalable_setup(void)623 static void dgram_scalable_setup(void)
624 {
625 	hints = fi_allocinfo();
626 	cr_assert(hints, "fi_allocinfo");
627 	hints->ep_attr->type = FI_EP_DGRAM;
628 	hints->caps = FI_RMA_EVENT;
629 	common_setup_stx(fi_version(), GNIX_MR_SCALABLE);
630 }
631 
dgram_scalable_setup_1dom(void)632 static void dgram_scalable_setup_1dom(void)
633 {
634 	hints = fi_allocinfo();
635 	cr_assert(hints, "fi_allocinfo");
636 	hints->ep_attr->type = FI_EP_DGRAM;
637 	hints->caps = FI_RMA_EVENT;
638 	common_setup_stx_1dom(fi_version(), GNIX_MR_SCALABLE);
639 }
640 
rdm_rma_stx_teardown(void)641 static void rdm_rma_stx_teardown(void)
642 {
643 	int ret = 0;
644 
645 	if (hints->caps & FI_RMA_EVENT) {
646 		ret = fi_close(&rwrite_cntr->fid);
647 		cr_assert(!ret, "failure in closing dom[1] rwrite counter.");
648 
649 		ret = fi_close(&rread_cntr->fid);
650 		cr_assert(!ret, "failure in closing dom[1] rread counter.");
651 	}
652 
653 	ret = fi_close(&read_cntr[0]->fid);
654 	cr_assert(!ret, "failure in closing dom[0] read counter.");
655 
656 	ret = fi_close(&read_cntr[1]->fid);
657 	cr_assert(!ret, "failure in closing dom[1] read counter.");
658 
659 	ret = fi_close(&write_cntr[0]->fid);
660 	cr_assert(!ret, "failure in closing dom[0] write counter.");
661 
662 	ret = fi_close(&write_cntr[1]->fid);
663 	cr_assert(!ret, "failure in closing dom[1] write counter.");
664 
665 	free(uc_source);
666 
667 	ret = fi_close(&loc_mr[0]->fid);
668 	cr_assert(!ret, "failure in closing dom[0] local mr.");
669 
670 	if (loc_mr[1] != NULL) {
671 		ret = fi_close(&loc_mr[1]->fid);
672 		cr_assert(!ret, "failure in closing dom[1] local mr.");
673 	}
674 
675 	ret = fi_close(&rem_mr[0]->fid);
676 	cr_assert(!ret, "failure in closing dom[0] remote mr.");
677 
678 	if (rem_mr[1] != NULL) {
679 		ret = fi_close(&rem_mr[1]->fid);
680 		cr_assert(!ret, "failure in closing dom[1] remote mr.");
681 	}
682 
683 	ret = fi_close(&stx_ctx[0]->fid);
684 	cr_assert(!ret, "failure in closing dom[0] stx_ctx.");
685 
686 	if (stx_ctx[1] != NULL) {
687 		ret = fi_close(&stx_ctx[1]->fid);
688 		cr_assert(!ret, "failure in closing dom[1] stx_ctx.");
689 	}
690 
691 	free(target_base);
692 	free(source_base);
693 
694 	ret = fi_close(&ep[0]->fid);
695 	cr_assert(!ret, "failure in closing ep[0].");
696 
697 	ret = fi_close(&ep[1]->fid);
698 	cr_assert(!ret, "failure in closing ep[1].");
699 
700 	ret = fi_close(&recv_cq[0]->fid);
701 	cr_assert(!ret, "failure in dom[0] recv cq.");
702 
703 	ret = fi_close(&recv_cq[1]->fid);
704 	cr_assert(!ret, "failure in dom[1] recv cq.");
705 
706 	ret = fi_close(&send_cq[0]->fid);
707 	cr_assert(!ret, "failure in dom[0] send cq.");
708 
709 	ret = fi_close(&send_cq[1]->fid);
710 	cr_assert(!ret, "failure in dom[1] send cq.");
711 
712 	ret = fi_close(&av[0]->fid);
713 	cr_assert(!ret, "failure in closing dom[0] av.");
714 
715 	if (av[1] != NULL) {
716 		ret = fi_close(&av[1]->fid);
717 		cr_assert(!ret, "failure in closing dom[1] av.");
718 	}
719 
720 	ret = fi_close(&dom[0]->fid);
721 	cr_assert(!ret, "failure in closing domain dom[0].");
722 
723 	if (dom[1] != NULL) {
724 		ret = fi_close(&dom[1]->fid);
725 		cr_assert(!ret,
726 			"failure in closing domain dom[1].");
727 	}
728 
729 	ret = fi_close(&fab->fid);
730 	cr_assert(!ret, "failure in closing fabric.");
731 
732 	fi_freeinfo(fi);
733 	fi_freeinfo(hints);
734 	hints = NULL;
735 	dgm_fail = 0;
736 	free(ep_name[0]);
737 	free(ep_name[1]);
738 }
739 
init_data(char * buf,int len,char seed)740 static void init_data(char *buf, int len, char seed)
741 {
742 	int i;
743 
744 	for (i = 0; i < len; i++) {
745 		buf[i] = seed++;
746 	}
747 }
748 
check_data(char * buf1,char * buf2,int len)749 static int check_data(char *buf1, char *buf2, int len)
750 {
751 	int i;
752 
753 	for (i = 0; i < len; i++) {
754 		if (buf1[i] != buf2[i]) {
755 			printf("data mismatch, elem: %d, b1: 0x%hhx,"
756 				" b2: 0x%hhx, len: %d\n",
757 			       i, buf1[i], buf2[i], len);
758 			return 0;
759 		}
760 	}
761 
762 	return 1;
763 }
764 
rdm_rma_check_tcqe(struct fi_cq_tagged_entry * tcqe,void * ctx,uint64_t flags,uint64_t data,struct fid_ep * fid_ep)765 static void rdm_rma_check_tcqe(struct fi_cq_tagged_entry *tcqe, void *ctx,
766 				uint64_t flags, uint64_t data,
767 			       struct fid_ep *fid_ep)
768 {
769 	struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep);
770 
771 	cr_assert(tcqe->op_context == ctx, "CQE Context mismatch");
772 	cr_assert(tcqe->flags == flags, "CQE flags mismatch");
773 
774 	/* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */
775 	if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) {
776 		cr_assert(tcqe->data == data, "CQE data invalid");
777 	} else {
778 		cr_assert(tcqe->data == 0, "CQE data invalid");
779 	}
780 
781 	cr_assert(tcqe->len == 0, "CQE length mismatch");
782 	cr_assert(tcqe->buf == 0, "CQE address mismatch");
783 	cr_assert(tcqe->tag == 0, "CQE tag invalid");
784 }
785 
rdm_rma_check_cntrs(uint64_t w[2],uint64_t r[2],uint64_t w_e[2],uint64_t r_e[2])786 static void rdm_rma_check_cntrs(uint64_t w[2], uint64_t r[2], uint64_t w_e[2],
787 			 uint64_t r_e[2])
788 {
789 	/* Domain 0 */
790 	writes[0] += w[0];
791 	reads[0] += r[0];
792 	write_errs[0] += w_e[0];
793 	read_errs[0] += r_e[0];
794 	/*dbg_printf("%ld, %ld\n", fi_cntr_read(write_cntr[0]), writes[0]);*/
795 	cr_assert(fi_cntr_read(write_cntr[0]) == writes[0], "Bad write count");
796 	cr_assert(fi_cntr_read(read_cntr[0]) == reads[0], "Bad read count");
797 	cr_assert(fi_cntr_readerr(write_cntr[0]) == write_errs[0],
798 		  "Bad write err count");
799 	cr_assert(fi_cntr_readerr(read_cntr[0]) == read_errs[0],
800 		  "Bad read err count");
801 
802 	/* Domain 1 */
803 	writes[1] += w[1];
804 	reads[1] += r[1];
805 	write_errs[1] += w_e[1];
806 	read_errs[1] += r_e[1];
807 	cr_assert(fi_cntr_read(write_cntr[1]) == writes[1], "Bad write count");
808 	cr_assert(fi_cntr_read(read_cntr[1]) == reads[1], "Bad read count");
809 	cr_assert(fi_cntr_readerr(write_cntr[1]) == write_errs[1],
810 		  "Bad write err count");
811 	cr_assert(fi_cntr_readerr(read_cntr[1]) == read_errs[1],
812 		  "Bad read err count");
813 
814 	if (hints->caps & FI_RMA_EVENT) {
815 		cr_assert(fi_cntr_read(rwrite_cntr) == writes[0],
816 			  "Bad rwrite count");
817 		cr_assert(fi_cntr_read(rread_cntr) == reads[0],
818 			  "Bad rread count");
819 		cr_assert(fi_cntr_readerr(rwrite_cntr) == 0,
820 			  "Bad rwrite err count");
821 		cr_assert(fi_cntr_readerr(rread_cntr) == 0,
822 			  "Bad rread err count");
823 	}
824 }
825 
xfer_for_each_size(void (* xfer)(int len),int slen,int elen)826 static void xfer_for_each_size(void (*xfer)(int len), int slen, int elen)
827 {
828 	int i;
829 
830 	for (i = slen; i <= elen; i *= 2) {
831 		xfer(i);
832 	}
833 }
834 
err_inject_enable(void)835 static void err_inject_enable(void)
836 {
837 	int ret, err_count_val = 1;
838 
839 	ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_ERR_INJECT_COUNT,
840 					 &err_count_val);
841 	cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)");
842 
843 	if (gni_domain_ops[1] != NULL) {
844 		ret = gni_domain_ops[1]->set_val(&dom[1]->fid,
845 						 GNI_ERR_INJECT_COUNT,
846 						 &err_count_val);
847 		cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)");
848 	}
849 }
850 
851 /*******************************************************************************
852  * Test RMA functions
853  ******************************************************************************/
854 TestSuite(dgram_rma_stx_basic,
855 	  .init = dgram_basic_setup,
856 	  .fini = rdm_rma_stx_teardown,
857 	  .disabled = false);
858 
859 TestSuite(rdm_rma_stx_basic,
860 	  .init = rdm_rma_basic_setup,
861 	  .fini = rdm_rma_stx_teardown,
862 	  .disabled = false);
863 
864 TestSuite(dgram_rma_1dom_stx_basic,
865 	  .init = dgram_basic_setup_1dom,
866 	  .fini = rdm_rma_stx_teardown,
867 	  .disabled = false);
868 
869 TestSuite(dgram_rma_stx_scalable,
870 	  .init = dgram_scalable_setup,
871 	  .fini = rdm_rma_stx_teardown,
872 	  .disabled = false);
873 
874 TestSuite(rdm_rma_stx_scalable,
875 	  .init = rdm_rma_scalable_setup,
876 	  .fini = rdm_rma_stx_teardown,
877 	  .disabled = false);
878 
879 TestSuite(dgram_rma_1dom_stx_scalable,
880 	  .init = dgram_scalable_setup_1dom,
881 	  .fini = rdm_rma_stx_teardown,
882 	  .disabled = false);
883 
do_write(int len)884 static void do_write(int len)
885 {
886 	int ret;
887 	ssize_t sz;
888 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
889 					  (void *) -1, UINT_MAX, UINT_MAX };
890 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
891 
892 	init_data(source, len, 0xab);
893 	init_data(target, len, 0);
894 
895 	sz = fi_write(ep[0], source, len,
896 			  loc_mr[0], gni_addr[1],
897 			  _REM_ADDR(fi, target, target), mr_key[1],
898 			  target);
899 	cr_assert_eq(sz, 0);
900 
901 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
902 		pthread_yield();
903 	}
904 
905 	if (dgm_fail) {
906 		cr_assert_eq(ret, -FI_EAVAIL);
907 		return;
908 	}
909 	cr_assert_eq(ret, 1);
910 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
911 
912 	w[0] = 1;
913 	rdm_rma_check_cntrs(w, r, w_e, r_e);
914 
915 	dbg_printf("got write context event!\n");
916 
917 	cr_assert(check_data(source, target, len), "Data mismatch");
918 }
919 
Test(rdm_rma_stx_basic,write)920 Test(rdm_rma_stx_basic, write)
921 {
922 	xfer_for_each_size(do_write, 8, BUF_SZ);
923 }
924 
Test(rdm_rma_stx_basic,write_retrans)925 Test(rdm_rma_stx_basic, write_retrans)
926 {
927 	err_inject_enable();
928 	xfer_for_each_size(do_write, 8, BUF_SZ);
929 }
930 
Test(dgram_rma_stx_basic,write)931 Test(dgram_rma_stx_basic, write)
932 {
933 	xfer_for_each_size(do_write, 8, BUF_SZ);
934 }
935 
Test(dgram_rma_stx_basic,write_retrans)936 Test(dgram_rma_stx_basic, write_retrans)
937 {
938 	dgm_fail = 1;
939 	err_inject_enable();
940 	xfer_for_each_size(do_write, 8, BUF_SZ);
941 }
942 
Test(dgram_rma_1dom_stx_basic,write)943 Test(dgram_rma_1dom_stx_basic, write)
944 {
945 	xfer_for_each_size(do_write, 8, BUF_SZ);
946 }
947 
Test(dgram_rma_1dom_stx_basic,write_retrans)948 Test(dgram_rma_1dom_stx_basic, write_retrans)
949 {
950 	dgm_fail = 1;
951 	err_inject_enable();
952 	xfer_for_each_size(do_write, 8, BUF_SZ);
953 }
954 
955 /* scalable */
956 
Test(rdm_rma_stx_scalable,write)957 Test(rdm_rma_stx_scalable, write)
958 {
959 	xfer_for_each_size(do_write, 8, BUF_SZ);
960 }
961 
Test(rdm_rma_stx_scalable,write_retrans)962 Test(rdm_rma_stx_scalable, write_retrans)
963 {
964 	err_inject_enable();
965 	xfer_for_each_size(do_write, 8, BUF_SZ);
966 }
967 
Test(dgram_rma_stx_scalable,write)968 Test(dgram_rma_stx_scalable, write)
969 {
970 	xfer_for_each_size(do_write, 8, BUF_SZ);
971 }
972 
Test(dgram_rma_stx_scalable,write_retrans)973 Test(dgram_rma_stx_scalable, write_retrans)
974 {
975 	dgm_fail = 1;
976 	err_inject_enable();
977 	xfer_for_each_size(do_write, 8, BUF_SZ);
978 }
979 
Test(dgram_rma_1dom_stx_scalable,write)980 Test(dgram_rma_1dom_stx_scalable, write)
981 {
982 	xfer_for_each_size(do_write, 8, BUF_SZ);
983 }
984 
Test(dgram_rma_1dom_stx_scalable,write_retrans)985 Test(dgram_rma_1dom_stx_scalable, write_retrans)
986 {
987 	dgm_fail = 1;
988 	err_inject_enable();
989 	xfer_for_each_size(do_write, 8, BUF_SZ);
990 }
991 
do_writev(int len)992 static void do_writev(int len)
993 {
994 	int ret;
995 	ssize_t sz;
996 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
997 					  (void *) -1, UINT_MAX, UINT_MAX };
998 	struct iovec iov;
999 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1000 
1001 	iov.iov_base = source;
1002 	iov.iov_len = len;
1003 
1004 	init_data(source, len, 0x25);
1005 	init_data(target, len, 0);
1006 
1007 	sz = fi_writev(ep[0], &iov, (void **)loc_mr, 1,
1008 			   gni_addr[1],
1009 			   _REM_ADDR(fi, target, target), mr_key[1],
1010 			   target);
1011 	cr_assert_eq(sz, 0);
1012 
1013 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1014 		pthread_yield();
1015 	}
1016 
1017 	if (dgm_fail) {
1018 		cr_assert_eq(ret, -FI_EAVAIL);
1019 		return;
1020 	}
1021 
1022 	cr_assert_eq(ret, 1);
1023 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1024 
1025 	w[0] = 1;
1026 	rdm_rma_check_cntrs(w, r, w_e, r_e);
1027 
1028 	dbg_printf("got write context event!\n");
1029 
1030 	cr_assert(check_data(source, target, len), "Data mismatch");
1031 }
1032 
Test(rdm_rma_stx_basic,writev)1033 Test(rdm_rma_stx_basic, writev)
1034 {
1035 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1036 }
1037 
Test(rdm_rma_stx_basic,writev_retrans)1038 Test(rdm_rma_stx_basic, writev_retrans)
1039 {
1040 	err_inject_enable();
1041 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1042 }
1043 
Test(dgram_rma_stx_basic,writev)1044 Test(dgram_rma_stx_basic, writev)
1045 {
1046 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1047 }
1048 
Test(dgram_rma_stx_basic,writev_retrans)1049 Test(dgram_rma_stx_basic, writev_retrans)
1050 {
1051 	dgm_fail = 1;
1052 	err_inject_enable();
1053 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1054 }
1055 
Test(dgram_rma_1dom_stx_basic,writev)1056 Test(dgram_rma_1dom_stx_basic, writev)
1057 {
1058 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1059 }
1060 
Test(dgram_rma_1dom_stx_basic,writev_retrans)1061 Test(dgram_rma_1dom_stx_basic, writev_retrans)
1062 {
1063 	dgm_fail = 1;
1064 	err_inject_enable();
1065 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1066 }
1067 
1068 /* scalable */
1069 
Test(rdm_rma_stx_scalable,writev)1070 Test(rdm_rma_stx_scalable, writev)
1071 {
1072 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1073 }
1074 
Test(rdm_rma_stx_scalable,writev_retrans)1075 Test(rdm_rma_stx_scalable, writev_retrans)
1076 {
1077 	err_inject_enable();
1078 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1079 }
1080 
Test(dgram_rma_stx_scalable,writev)1081 Test(dgram_rma_stx_scalable, writev)
1082 {
1083 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1084 }
1085 
Test(dgram_rma_stx_scalable,writev_retrans)1086 Test(dgram_rma_stx_scalable, writev_retrans)
1087 {
1088 	dgm_fail = 1;
1089 	err_inject_enable();
1090 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1091 }
1092 
Test(dgram_rma_1dom_stx_scalable,writev)1093 Test(dgram_rma_1dom_stx_scalable, writev)
1094 {
1095 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1096 }
1097 
Test(dgram_rma_1dom_stx_scalable,writev_retrans)1098 Test(dgram_rma_1dom_stx_scalable, writev_retrans)
1099 {
1100 	dgm_fail = 1;
1101 	err_inject_enable();
1102 	xfer_for_each_size(do_writev, 8, BUF_SZ);
1103 }
1104 
do_writemsg(int len)1105 static void do_writemsg(int len)
1106 {
1107 	int ret;
1108 	ssize_t sz;
1109 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1110 					  (void *) -1, UINT_MAX, UINT_MAX };
1111 	struct iovec iov;
1112 	struct fi_msg_rma msg;
1113 	struct fi_rma_iov rma_iov;
1114 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1115 
1116 	iov.iov_base = source;
1117 	iov.iov_len = len;
1118 
1119 	rma_iov.addr = _REM_ADDR(fi, target, target);
1120 	rma_iov.len = len;
1121 	rma_iov.key = mr_key[1];
1122 
1123 	msg.msg_iov = &iov;
1124 	msg.desc = (void **)loc_mr;
1125 	msg.iov_count = 1;
1126 	msg.addr = gni_addr[1];
1127 	msg.rma_iov = &rma_iov;
1128 	msg.rma_iov_count = 1;
1129 	msg.context = target;
1130 	msg.data = (uint64_t)target;
1131 
1132 	init_data(source, len, 0xef);
1133 	init_data(target, len, 0);
1134 	sz = fi_writemsg(ep[0], &msg, 0);
1135 	cr_assert_eq(sz, 0);
1136 
1137 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1138 		pthread_yield();
1139 	}
1140 
1141 	if (dgm_fail) {
1142 		cr_assert_eq(ret, -FI_EAVAIL);
1143 		return;
1144 	}
1145 	cr_assert_eq(ret, 1);
1146 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1147 
1148 	w[0] = 1;
1149 	rdm_rma_check_cntrs(w, r, w_e, r_e);
1150 
1151 	dbg_printf("got write context event!\n");
1152 
1153 	cr_assert(check_data(source, target, len), "Data mismatch");
1154 }
1155 
Test(rdm_rma_stx_basic,writemsg)1156 Test(rdm_rma_stx_basic, writemsg)
1157 {
1158 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1159 }
1160 
Test(rdm_rma_stx_basic,writemsg_retrans)1161 Test(rdm_rma_stx_basic, writemsg_retrans)
1162 {
1163 	err_inject_enable();
1164 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1165 }
1166 
Test(dgram_rma_stx_basic,writemsg)1167 Test(dgram_rma_stx_basic, writemsg)
1168 {
1169 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1170 }
1171 
Test(dgram_rma_stx_basic,writemsg_retrans)1172 Test(dgram_rma_stx_basic, writemsg_retrans)
1173 {
1174 	dgm_fail = 1;
1175 	err_inject_enable();
1176 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1177 }
1178 
Test(dgram_rma_1dom_stx_basic,writemsg)1179 Test(dgram_rma_1dom_stx_basic, writemsg)
1180 {
1181 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1182 }
1183 
Test(dgram_rma_1dom_stx_basic,writemsg_retrans)1184 Test(dgram_rma_1dom_stx_basic, writemsg_retrans)
1185 {
1186 	dgm_fail = 1;
1187 	err_inject_enable();
1188 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1189 }
1190 
1191 /* scalable */
1192 
Test(rdm_rma_stx_scalable,writemsg)1193 Test(rdm_rma_stx_scalable, writemsg)
1194 {
1195 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1196 }
1197 
Test(rdm_rma_stx_scalable,writemsg_retrans)1198 Test(rdm_rma_stx_scalable, writemsg_retrans)
1199 {
1200 	err_inject_enable();
1201 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1202 }
1203 
Test(dgram_rma_stx_scalable,writemsg)1204 Test(dgram_rma_stx_scalable, writemsg)
1205 {
1206 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1207 }
1208 
Test(dgram_rma_stx_scalable,writemsg_retrans)1209 Test(dgram_rma_stx_scalable, writemsg_retrans)
1210 {
1211 	dgm_fail = 1;
1212 	err_inject_enable();
1213 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1214 }
1215 
Test(dgram_rma_1dom_stx_scalable,writemsg)1216 Test(dgram_rma_1dom_stx_scalable, writemsg)
1217 {
1218 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1219 }
1220 
Test(dgram_rma_1dom_stx_scalable,writemsg_retrans)1221 Test(dgram_rma_1dom_stx_scalable, writemsg_retrans)
1222 {
1223 	dgm_fail = 1;
1224 	err_inject_enable();
1225 	xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1226 }
1227 
1228 /*
1229  * write_fence should be validated by inspecting debug.
1230  *
1231  * The following sequence of events should be seen:
1232  *
1233  * TX request processed: A
1234  * TX request queue stalled on FI_FENCE request: B
1235  * Added event: A
1236  * TX request processed: B
1237  *
1238  */
1239 
do_write_fence(int len)1240 static void do_write_fence(int len)
1241 {
1242 	int ret;
1243 	ssize_t sz;
1244 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1245 					  (void *) -1, UINT_MAX, UINT_MAX };
1246 	struct iovec iov;
1247 	struct fi_msg_rma msg;
1248 	struct fi_rma_iov rma_iov;
1249 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1250 
1251 	iov.iov_base = source;
1252 	iov.iov_len = len;
1253 
1254 	rma_iov.addr = _REM_ADDR(fi, target, target);
1255 	rma_iov.len = sizeof(target);
1256 	rma_iov.key = mr_key[1];
1257 
1258 	msg.msg_iov = &iov;
1259 	msg.desc = (void **)loc_mr;
1260 	msg.iov_count = 1;
1261 	msg.addr = gni_addr[1];
1262 	msg.rma_iov = &rma_iov;
1263 	msg.rma_iov_count = 1;
1264 	msg.context = target;
1265 	msg.data = (uint64_t)target;
1266 
1267 	init_data(source, len, 0xef);
1268 	init_data(target, len, 0);
1269 
1270 	/* write A */
1271 	sz = fi_writemsg(ep[0], &msg, 0);
1272 	cr_assert_eq(sz, 0);
1273 
1274 	/* write B */
1275 	sz = fi_writemsg(ep[0], &msg, FI_FENCE);
1276 	cr_assert_eq(sz, 0);
1277 
1278 	/* event A */
1279 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1280 		pthread_yield();
1281 	}
1282 
1283 	if (dgm_fail) {
1284 		cr_assert_eq(ret, -FI_EAVAIL);
1285 		return;
1286 	}
1287 
1288 	cr_assert_eq(ret, 1);
1289 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1290 
1291 	/* reset cqe */
1292 	cqe.op_context = cqe.buf = (void *) -1;
1293 	cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX;
1294 
1295 	/* event B */
1296 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1297 		pthread_yield();
1298 	}
1299 
1300 	cr_assert_eq(ret, 1);
1301 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1302 
1303 	w[0] = 2;
1304 	rdm_rma_check_cntrs(w, r, w_e, r_e);
1305 
1306 	dbg_printf("got write context event!\n");
1307 
1308 	cr_assert(check_data(source, target, len), "Data mismatch");
1309 }
1310 
Test(rdm_rma_stx_basic,write_fence)1311 Test(rdm_rma_stx_basic, write_fence)
1312 {
1313 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1314 }
1315 
Test(rdm_rma_stx_basic,write_fence_retrans)1316 Test(rdm_rma_stx_basic, write_fence_retrans)
1317 {
1318 	err_inject_enable();
1319 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1320 }
1321 
Test(dgram_rma_stx_basic,write_fence)1322 Test(dgram_rma_stx_basic, write_fence)
1323 {
1324 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1325 }
1326 
Test(dgram_rma_stx_basic,write_fence_retrans)1327 Test(dgram_rma_stx_basic, write_fence_retrans)
1328 {
1329 	dgm_fail = 1;
1330 	err_inject_enable();
1331 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1332 }
1333 
Test(dgram_rma_1dom_stx_basic,write_fence)1334 Test(dgram_rma_1dom_stx_basic, write_fence)
1335 {
1336 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1337 }
1338 
Test(dgram_rma_1dom_stx_basic,write_fence_retrans)1339 Test(dgram_rma_1dom_stx_basic, write_fence_retrans)
1340 {
1341 	dgm_fail = 1;
1342 	err_inject_enable();
1343 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1344 }
1345 
1346 /* scalable */
1347 
Test(rdm_rma_stx_scalable,write_fence)1348 Test(rdm_rma_stx_scalable, write_fence)
1349 {
1350 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1351 }
1352 
Test(rdm_rma_stx_scalable,write_fence_retrans)1353 Test(rdm_rma_stx_scalable, write_fence_retrans)
1354 {
1355 	err_inject_enable();
1356 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1357 }
1358 
Test(dgram_rma_stx_scalable,write_fence)1359 Test(dgram_rma_stx_scalable, write_fence)
1360 {
1361 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1362 }
1363 
Test(dgram_rma_stx_scalable,write_fence_retrans)1364 Test(dgram_rma_stx_scalable, write_fence_retrans)
1365 {
1366 	dgm_fail = 1;
1367 	err_inject_enable();
1368 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1369 }
1370 
Test(dgram_rma_1dom_stx_scalable,write_fence)1371 Test(dgram_rma_1dom_stx_scalable, write_fence)
1372 {
1373 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1374 }
1375 
Test(dgram_rma_1dom_stx_scalable,write_fence_retrans)1376 Test(dgram_rma_1dom_stx_scalable, write_fence_retrans)
1377 {
1378 	dgm_fail = 1;
1379 	err_inject_enable();
1380 	xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1381 }
1382 
1383 #define INJECT_SIZE 64
do_inject_write(int len)1384 static void do_inject_write(int len)
1385 {
1386 	ssize_t sz;
1387 	int ret, i, loops = 0;
1388 	struct fi_cq_tagged_entry cqe;
1389 
1390 	init_data(source, len, 0x23);
1391 	init_data(target, len, 0);
1392 	sz = fi_inject_write(ep[0], source, len,
1393 				 gni_addr[1],
1394 				 _REM_ADDR(fi, target, target), mr_key[1]);
1395 	cr_assert_eq(sz, 0);
1396 
1397 	for (i = 0; i < len; i++) {
1398 		loops = 0;
1399 		while (source[i] != target[i]) {
1400 			/* for progress */
1401 			ret = fi_cq_read(send_cq[0], &cqe, 1);
1402 			cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL,
1403 				  "Received unexpected event\n");
1404 
1405 			pthread_yield();
1406 			cr_assert(++loops < MLOOPS || dgm_fail,
1407 				  "Data mismatch");
1408 			if (dgm_fail && loops > MLOOPS)
1409 				break;
1410 		}
1411 	}
1412 	cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail");
1413 }
1414 
Test(rdm_rma_stx_basic,inject_write)1415 Test(rdm_rma_stx_basic, inject_write)
1416 {
1417 	/* FIXME intermittent test failures */
1418 	cr_skip_test("intermittent test failures");
1419 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1420 }
1421 
Test(rdm_rma_stx_basic,inject_write_retrans)1422 Test(rdm_rma_stx_basic, inject_write_retrans)
1423 {
1424 	/* FIXME intermittent test failures */
1425 	cr_skip_test("intermittent test failures");
1426 	err_inject_enable();
1427 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1428 }
1429 
Test(dgram_rma_stx_basic,inject_write)1430 Test(dgram_rma_stx_basic, inject_write)
1431 {
1432 	/* FIXME intermittent test failures */
1433 	cr_skip_test("intermittent test failures");
1434 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1435 }
1436 
Test(dgram_rma_stx_basic,inject_write_retrans)1437 Test(dgram_rma_stx_basic, inject_write_retrans)
1438 {
1439 	dgm_fail = 1;
1440 	err_inject_enable();
1441 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1442 }
1443 
Test(dgram_rma_1dom_stx_basic,inject_write)1444 Test(dgram_rma_1dom_stx_basic, inject_write)
1445 {
1446 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1447 }
1448 
Test(dgram_rma_1dom_stx_basic,inject_write_retrans)1449 Test(dgram_rma_1dom_stx_basic, inject_write_retrans)
1450 {
1451 	dgm_fail = 1;
1452 	err_inject_enable();
1453 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1454 }
1455 
1456 /* scalable */
1457 
Test(rdm_rma_stx_scalable,inject_write)1458 Test(rdm_rma_stx_scalable, inject_write)
1459 {
1460 	/* FIXME intermittent test failures */
1461 	cr_skip_test("intermittent test failures");
1462 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1463 }
1464 
Test(rdm_rma_stx_scalable,inject_write_retrans)1465 Test(rdm_rma_stx_scalable, inject_write_retrans)
1466 {
1467 	/* FIXME intermittent test failures */
1468 	cr_skip_test("intermittent test failures");
1469 	err_inject_enable();
1470 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1471 }
1472 
Test(dgram_rma_stx_scalable,inject_write)1473 Test(dgram_rma_stx_scalable, inject_write)
1474 {
1475 	/* FIXME intermittent test failures */
1476 	cr_skip_test("intermittent test failures");
1477 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1478 }
1479 
Test(dgram_rma_stx_scalable,inject_write_retrans)1480 Test(dgram_rma_stx_scalable, inject_write_retrans)
1481 {
1482 	dgm_fail = 1;
1483 	err_inject_enable();
1484 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1485 }
1486 
Test(dgram_rma_1dom_stx_scalable,inject_write)1487 Test(dgram_rma_1dom_stx_scalable, inject_write)
1488 {
1489 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1490 }
1491 
Test(dgram_rma_1dom_stx_scalable,inject_write_retrans)1492 Test(dgram_rma_1dom_stx_scalable, inject_write_retrans)
1493 {
1494 	dgm_fail = 1;
1495 	err_inject_enable();
1496 	xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1497 }
1498 
do_writedata(int len)1499 static void do_writedata(int len)
1500 {
1501 	int ret;
1502 	ssize_t sz;
1503 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1504 					  (void *) -1, UINT_MAX, UINT_MAX };
1505 	struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX,
1506 					  (void *) -1, UINT_MAX, UINT_MAX };
1507 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1508 
1509 
1510 #define WRITE_DATA 0x5123da1a145
1511 	init_data(source, len, 0x23);
1512 	init_data(target, len, 0);
1513 	sz = fi_writedata(ep[0], source, len, loc_mr[0], WRITE_DATA,
1514 			  gni_addr[1],
1515 			  _REM_ADDR(fi, target, target), mr_key[1],
1516 			  target);
1517 	cr_assert_eq(sz, 0);
1518 
1519 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1520 		pthread_yield();
1521 	}
1522 
1523 	if (dgm_fail) {
1524 		cr_assert_eq(ret, -FI_EAVAIL);
1525 		return;
1526 	}
1527 
1528 	cr_assert_eq(ret, 1);
1529 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1530 
1531 	w[0] = 1;
1532 	rdm_rma_check_cntrs(w, r, w_e, r_e);
1533 
1534 	dbg_printf("got write context event!\n");
1535 
1536 	cr_assert(check_data(source, target, len), "Data mismatch");
1537 
1538 	while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) {
1539 		pthread_yield();
1540 	}
1541 	cr_assert(ret != FI_SUCCESS, "Missing remote data");
1542 
1543 	rdm_rma_check_tcqe(&dcqe, NULL,
1544 			   (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA),
1545 			   WRITE_DATA, ep[1]);
1546 }
1547 
Test(rdm_rma_stx_basic,writedata)1548 Test(rdm_rma_stx_basic, writedata)
1549 {
1550 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1551 }
1552 
Test(rdm_rma_stx_basic,writedata_retrans)1553 Test(rdm_rma_stx_basic, writedata_retrans)
1554 {
1555 	err_inject_enable();
1556 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1557 }
1558 
Test(dgram_rma_stx_basic,writedata)1559 Test(dgram_rma_stx_basic, writedata)
1560 {
1561 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1562 }
1563 
Test(dgram_rma_stx_basic,writedata_retrans)1564 Test(dgram_rma_stx_basic, writedata_retrans)
1565 {
1566 	dgm_fail = 1;
1567 	err_inject_enable();
1568 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1569 }
1570 
Test(dgram_rma_1dom_stx_basic,writedata)1571 Test(dgram_rma_1dom_stx_basic, writedata)
1572 {
1573 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1574 }
1575 
Test(dgram_rma_1dom_stx_basic,writedata_retrans)1576 Test(dgram_rma_1dom_stx_basic, writedata_retrans)
1577 {
1578 	dgm_fail = 1;
1579 	err_inject_enable();
1580 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1581 }
1582 
1583 /* scalable */
1584 
Test(rdm_rma_stx_scalable,writedata)1585 Test(rdm_rma_stx_scalable, writedata)
1586 {
1587 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1588 }
1589 
Test(rdm_rma_stx_scalable,writedata_retrans)1590 Test(rdm_rma_stx_scalable, writedata_retrans)
1591 {
1592 	err_inject_enable();
1593 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1594 }
1595 
Test(dgram_rma_stx_scalable,writedata)1596 Test(dgram_rma_stx_scalable, writedata)
1597 {
1598 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1599 }
1600 
Test(dgram_rma_stx_scalable,writedata_retrans)1601 Test(dgram_rma_stx_scalable, writedata_retrans)
1602 {
1603 	dgm_fail = 1;
1604 	err_inject_enable();
1605 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1606 }
1607 
Test(dgram_rma_1dom_stx_scalable,writedata)1608 Test(dgram_rma_1dom_stx_scalable, writedata)
1609 {
1610 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1611 }
1612 
Test(dgram_rma_1dom_stx_scalable,writedata_retrans)1613 Test(dgram_rma_1dom_stx_scalable, writedata_retrans)
1614 {
1615 	dgm_fail = 1;
1616 	err_inject_enable();
1617 	xfer_for_each_size(do_writedata, 8, BUF_SZ);
1618 }
1619 
1620 #define INJECTWRITE_DATA 0xdededadadeadbeaf
do_inject_writedata(int len)1621 static void do_inject_writedata(int len)
1622 {
1623 	ssize_t sz;
1624 	int ret, i, loops = 0;
1625 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1626 					  (void *) -1, UINT_MAX, UINT_MAX };
1627 	struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX,
1628 					   (void *) -1, UINT_MAX, UINT_MAX };
1629 
1630 	init_data(source, len, 0x23);
1631 	init_data(target, len, 0);
1632 	sz = fi_inject_writedata(ep[0], source, len, INJECTWRITE_DATA,
1633 				 gni_addr[1],
1634 				 _REM_ADDR(fi, target, target), mr_key[1]);
1635 	cr_assert_eq(sz, 0);
1636 
1637 	for (i = 0; i < len; i++) {
1638 		loops = 0;
1639 		while (source[i] != target[i]) {
1640 			/* for progress */
1641 			ret = fi_cq_read(send_cq[0], &cqe, 1);
1642 			cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL,
1643 				  "Received unexpected event\n");
1644 
1645 			pthread_yield();
1646 			cr_assert(++loops < MLOOPS || dgm_fail,
1647 				  "Data mismatch");
1648 			if (dgm_fail && loops > MLOOPS)
1649 				break;
1650 		}
1651 	}
1652 	cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail");
1653 	if (dgm_fail && loops >= MLOOPS)
1654 		return;
1655 
1656 	while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) {
1657 		ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */
1658 		pthread_yield();
1659 	}
1660 	cr_assert(ret != FI_SUCCESS, "Missing remote data");
1661 
1662 	rdm_rma_check_tcqe(&dcqe, NULL,
1663 			   (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA),
1664 			   INJECTWRITE_DATA, ep[1]);
1665 }
1666 
Test(rdm_rma_stx_basic,inject_writedata)1667 Test(rdm_rma_stx_basic, inject_writedata)
1668 {
1669 	/* FIXME intermittent test failures */
1670 	cr_skip_test("intermittent test failures");
1671 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1672 }
1673 
Test(rdm_rma_stx_basic,inject_writedata_retrans)1674 Test(rdm_rma_stx_basic, inject_writedata_retrans)
1675 {
1676 	/* FIXME intermittent test failures */
1677 	cr_skip_test("intermittent test failures");
1678 	err_inject_enable();
1679 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1680 }
1681 
Test(dgram_rma_stx_basic,inject_writedata)1682 Test(dgram_rma_stx_basic, inject_writedata)
1683 {
1684 	/* FIXME intermittent test failures */
1685 	cr_skip_test("intermittent test failures");
1686 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1687 }
1688 
Test(dgram_rma_stx_basic,inject_writedata_retrans)1689 Test(dgram_rma_stx_basic, inject_writedata_retrans)
1690 {
1691 	dgm_fail = 1;
1692 	err_inject_enable();
1693 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1694 }
1695 
Test(dgram_rma_1dom_stx_basic,inject_writedata)1696 Test(dgram_rma_1dom_stx_basic, inject_writedata)
1697 {
1698 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1699 }
1700 
Test(dgram_rma_1dom_stx_basic,inject_writedata_retrans)1701 Test(dgram_rma_1dom_stx_basic, inject_writedata_retrans)
1702 {
1703 	dgm_fail = 1;
1704 	err_inject_enable();
1705 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1706 }
1707 
1708 /* scalable */
1709 
Test(rdm_rma_stx_scalable,inject_writedata)1710 Test(rdm_rma_stx_scalable, inject_writedata)
1711 {
1712 	/* FIXME intermittent test failures */
1713 	cr_skip_test("intermittent test failures");
1714 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1715 }
1716 
Test(rdm_rma_stx_scalable,inject_writedata_retrans)1717 Test(rdm_rma_stx_scalable, inject_writedata_retrans)
1718 {
1719 	/* FIXME intermittent test failures */
1720 	cr_skip_test("intermittent test failures");
1721 	err_inject_enable();
1722 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1723 }
1724 
Test(dgram_rma_stx_scalable,inject_writedata)1725 Test(dgram_rma_stx_scalable, inject_writedata)
1726 {
1727 	/* FIXME intermittent test failures */
1728 	cr_skip_test("intermittent test failures");
1729 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1730 }
1731 
Test(dgram_rma_stx_scalable,inject_writedata_retrans)1732 Test(dgram_rma_stx_scalable, inject_writedata_retrans)
1733 {
1734 	dgm_fail = 1;
1735 	err_inject_enable();
1736 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1737 }
1738 
Test(dgram_rma_1dom_stx_scalable,inject_writedata)1739 Test(dgram_rma_1dom_stx_scalable, inject_writedata)
1740 {
1741 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1742 }
1743 
Test(dgram_rma_1dom_stx_scalable,inject_writedata_retrans)1744 Test(dgram_rma_1dom_stx_scalable, inject_writedata_retrans)
1745 {
1746 	dgm_fail = 1;
1747 	err_inject_enable();
1748 	xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1749 }
1750 
do_read(int len)1751 static void do_read(int len)
1752 {
1753 	int ret;
1754 	ssize_t sz;
1755 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1756 					  (void *) -1, UINT_MAX, UINT_MAX };
1757 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1758 
1759 #define READ_CTX 0x4e3dda1aULL
1760 	init_data(source, len, 0);
1761 	init_data(target, len, 0xad);
1762 
1763 	/* domain 0 from domain 1 */
1764 	sz = fi_read(ep[0], source, len,
1765 			 loc_mr[0], gni_addr[1],
1766 			 _REM_ADDR(fi, target, target), mr_key[1],
1767 			 (void *)READ_CTX);
1768 	cr_assert_eq(sz, 0);
1769 
1770 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1771 		pthread_yield();
1772 	}
1773 
1774 	cr_assert_eq(ret, 1);
1775 	rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]);
1776 
1777 	r[0] = 1;
1778 	rdm_rma_check_cntrs(w, r, w_e, r_e);
1779 
1780 	dbg_printf("got read context event!\n");
1781 
1782 	cr_assert(check_data(source, target, len), "Data mismatch");
1783 }
1784 
Test(rdm_rma_stx_basic,read)1785 Test(rdm_rma_stx_basic, read)
1786 {
1787 	xfer_for_each_size(do_read, 8, BUF_SZ);
1788 }
1789 
Test(rdm_rma_stx_basic,read_retrans)1790 Test(rdm_rma_stx_basic, read_retrans)
1791 {
1792 	err_inject_enable();
1793 	xfer_for_each_size(do_read, 8, BUF_SZ);
1794 }
1795 
Test(dgram_rma_stx_basic,read)1796 Test(dgram_rma_stx_basic, read)
1797 {
1798 	xfer_for_each_size(do_read, 8, BUF_SZ);
1799 }
1800 
Test(dgram_rma_1dom_stx_basic,read)1801 Test(dgram_rma_1dom_stx_basic, read)
1802 {
1803 	xfer_for_each_size(do_read, 8, BUF_SZ);
1804 }
1805 
1806 /* scalable */
1807 
Test(rdm_rma_stx_scalable,read)1808 Test(rdm_rma_stx_scalable, read)
1809 {
1810 	xfer_for_each_size(do_read, 8, BUF_SZ);
1811 }
1812 
Test(rdm_rma_stx_scalable,read_retrans)1813 Test(rdm_rma_stx_scalable, read_retrans)
1814 {
1815 	err_inject_enable();
1816 	xfer_for_each_size(do_read, 8, BUF_SZ);
1817 }
1818 
Test(dgram_rma_stx_scalable,read)1819 Test(dgram_rma_stx_scalable, read)
1820 {
1821 	xfer_for_each_size(do_read, 8, BUF_SZ);
1822 }
1823 
Test(dgram_rma_1dom_stx_scalable,read)1824 Test(dgram_rma_1dom_stx_scalable, read)
1825 {
1826 	xfer_for_each_size(do_read, 8, BUF_SZ);
1827 }
1828 
do_readv(int len)1829 static void do_readv(int len)
1830 {
1831 	int ret;
1832 	ssize_t sz;
1833 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1834 					  (void *) -1, UINT_MAX, UINT_MAX };
1835 	struct iovec iov;
1836 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1837 
1838 	iov.iov_base = source;
1839 	iov.iov_len = len;
1840 
1841 	init_data(target, len, 0x25);
1842 	init_data(source, len, 0);
1843 	sz = fi_readv(ep[0], &iov, (void **)loc_mr, 1,
1844 			  gni_addr[1],
1845 			  _REM_ADDR(fi, target, target), mr_key[1],
1846 			  target);
1847 	cr_assert_eq(sz, 0);
1848 
1849 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1850 		pthread_yield();
1851 	}
1852 
1853 	cr_assert_eq(ret, 1);
1854 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]);
1855 
1856 	r[0] = 1;
1857 	rdm_rma_check_cntrs(w, r, w_e, r_e);
1858 
1859 	dbg_printf("got write context event!\n");
1860 
1861 	cr_assert(check_data(source, target, len), "Data mismatch");
1862 }
1863 
Test(rdm_rma_stx_basic,readv)1864 Test(rdm_rma_stx_basic, readv)
1865 {
1866 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1867 }
1868 
Test(rdm_rma_stx_basic,readv_retrans)1869 Test(rdm_rma_stx_basic, readv_retrans)
1870 {
1871 	err_inject_enable();
1872 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1873 }
1874 
Test(dgram_rma_stx_basic,readv)1875 Test(dgram_rma_stx_basic, readv)
1876 {
1877 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1878 }
1879 
Test(dgram_rma_1dom_stx_basic,readv)1880 Test(dgram_rma_1dom_stx_basic, readv)
1881 {
1882 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1883 }
1884 
1885 /* scalable */
1886 
Test(rdm_rma_stx_scalable,readv)1887 Test(rdm_rma_stx_scalable, readv)
1888 {
1889 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1890 }
1891 
Test(rdm_rma_stx_scalable,readv_retrans)1892 Test(rdm_rma_stx_scalable, readv_retrans)
1893 {
1894 	err_inject_enable();
1895 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1896 }
1897 
Test(dgram_rma_stx_scalable,readv)1898 Test(dgram_rma_stx_scalable, readv)
1899 {
1900 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1901 }
1902 
Test(dgram_rma_1dom_stx_scalable,readv)1903 Test(dgram_rma_1dom_stx_scalable, readv)
1904 {
1905 	xfer_for_each_size(do_readv, 8, BUF_SZ);
1906 }
1907 
do_readmsg(int len)1908 static void do_readmsg(int len)
1909 {
1910 	int ret;
1911 	ssize_t sz;
1912 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1913 					  (void *) -1, UINT_MAX, UINT_MAX };
1914 	struct iovec iov;
1915 	struct fi_msg_rma msg;
1916 	struct fi_rma_iov rma_iov;
1917 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1918 
1919 	iov.iov_base = source;
1920 	iov.iov_len = len;
1921 
1922 	rma_iov.addr = _REM_ADDR(fi, target, target);
1923 	rma_iov.len = len;
1924 	rma_iov.key = mr_key[1];
1925 
1926 	msg.msg_iov = &iov;
1927 	msg.desc = (void **)loc_mr;
1928 	msg.iov_count = 1;
1929 	msg.addr = gni_addr[1];
1930 	msg.rma_iov = &rma_iov;
1931 	msg.rma_iov_count = 1;
1932 	msg.context = target;
1933 	msg.data = (uint64_t)target;
1934 
1935 	init_data(target, len, 0xef);
1936 	init_data(source, len, 0);
1937 	sz = fi_readmsg(ep[0], &msg, 0);
1938 	cr_assert_eq(sz, 0);
1939 
1940 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1941 		pthread_yield();
1942 	}
1943 
1944 	cr_assert_eq(ret, 1);
1945 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]);
1946 
1947 	r[0] = 1;
1948 	rdm_rma_check_cntrs(w, r, w_e, r_e);
1949 
1950 	dbg_printf("got write context event!\n");
1951 
1952 	cr_assert(check_data(source, target, len), "Data mismatch");
1953 
1954 	iov.iov_base = source;
1955 	iov.iov_len = len;
1956 
1957 	rma_iov.addr = (uint64_t)target;
1958 	rma_iov.len = len;
1959 	rma_iov.key = mr_key[0];
1960 
1961 	msg.msg_iov = &iov;
1962 	msg.desc = (void **)(loc_mr + 1);
1963 	msg.iov_count = 1;
1964 	msg.addr = gni_addr[0];
1965 	msg.rma_iov = &rma_iov;
1966 	msg.rma_iov_count = 1;
1967 	msg.context = target;
1968 	msg.data = (uint64_t)target;
1969 }
1970 
Test(rdm_rma_stx_basic,readmsg)1971 Test(rdm_rma_stx_basic, readmsg)
1972 {
1973 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1974 }
1975 
Test(rdm_rma_stx_basic,readmsg_retrans)1976 Test(rdm_rma_stx_basic, readmsg_retrans)
1977 {
1978 	err_inject_enable();
1979 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1980 }
1981 
Test(dgram_rma_stx_basic,readmsg)1982 Test(dgram_rma_stx_basic, readmsg)
1983 {
1984 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1985 }
1986 
Test(dgram_rma_1dom_stx_basic,readmsg)1987 Test(dgram_rma_1dom_stx_basic, readmsg)
1988 {
1989 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1990 }
1991 
1992 /* scalable */
1993 
Test(rdm_rma_stx_scalable,readmsg)1994 Test(rdm_rma_stx_scalable, readmsg)
1995 {
1996 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1997 }
1998 
Test(rdm_rma_stx_scalable,readmsg_retrans)1999 Test(rdm_rma_stx_scalable, readmsg_retrans)
2000 {
2001 	err_inject_enable();
2002 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
2003 }
2004 
Test(dgram_rma_stx_scalable,readmsg)2005 Test(dgram_rma_stx_scalable, readmsg)
2006 {
2007 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
2008 }
2009 
Test(dgram_rma_1dom_stx_scalable,readmsg)2010 Test(dgram_rma_1dom_stx_scalable, readmsg)
2011 {
2012 	xfer_for_each_size(do_readmsg, 8, BUF_SZ);
2013 }
2014 
inject_common(void)2015 static void inject_common(void)
2016 {
2017 	int ret;
2018 	ssize_t sz;
2019 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2020 					  (void *) -1, UINT_MAX, UINT_MAX };
2021 	struct iovec iov;
2022 	struct fi_msg_rma msg;
2023 	struct fi_rma_iov rma_iov;
2024 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2025 
2026 	iov.iov_base = source;
2027 	iov.iov_len = GNIX_INJECT_SIZE;
2028 
2029 	rma_iov.addr = _REM_ADDR(fi, target, target);
2030 	rma_iov.len = GNIX_INJECT_SIZE;
2031 	rma_iov.key = mr_key[1];
2032 
2033 	msg.msg_iov = &iov;
2034 	msg.desc = (void **)loc_mr;
2035 	msg.iov_count = 1;
2036 	msg.addr = gni_addr[1];
2037 	msg.rma_iov = &rma_iov;
2038 	msg.rma_iov_count = 1;
2039 	msg.context = target;
2040 	msg.data = (uint64_t)target;
2041 
2042 	init_data(source, GNIX_INJECT_SIZE, 0xef);
2043 	init_data(target, GNIX_INJECT_SIZE, 0);
2044 
2045 	sz = fi_writemsg(ep[0], &msg, FI_INJECT);
2046 	cr_assert_eq(sz, 0);
2047 
2048 	iov.iov_len = GNIX_INJECT_SIZE+1;
2049 	sz = fi_writemsg(ep[0], &msg, FI_INJECT);
2050 	cr_assert_eq(sz, -FI_EINVAL);
2051 
2052 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2053 		pthread_yield();
2054 	}
2055 
2056 	cr_assert_eq(ret, 1);
2057 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
2058 
2059 	w[0] = 1;
2060 	rdm_rma_check_cntrs(w, r, w_e, r_e);
2061 
2062 	dbg_printf("got write context event!\n");
2063 
2064 	cr_assert(check_data(source, target, GNIX_INJECT_SIZE),
2065 		  "Data mismatch");
2066 }
2067 
Test(rdm_rma_stx_basic,inject)2068 Test(rdm_rma_stx_basic, inject)
2069 {
2070 	inject_common();
2071 }
2072 
Test(dgram_rma_stx_basic,inject)2073 Test(dgram_rma_stx_basic, inject)
2074 {
2075 	inject_common();
2076 }
2077 
Test(dgram_rma_1dom_stx_basic,inject)2078 Test(dgram_rma_1dom_stx_basic, inject)
2079 {
2080 	inject_common();
2081 }
2082 
2083 /* scalable */
2084 
Test(rdm_rma_stx_scalable,inject)2085 Test(rdm_rma_stx_scalable, inject)
2086 {
2087 	inject_common();
2088 }
2089 
Test(dgram_rma_stx_scalable,inject)2090 Test(dgram_rma_stx_scalable, inject)
2091 {
2092 	inject_common();
2093 }
2094 
Test(dgram_rma_1dom_stx_scalable,inject)2095 Test(dgram_rma_1dom_stx_scalable, inject)
2096 {
2097 	inject_common();
2098 }
2099 
do_write_autoreg(int len)2100 static void do_write_autoreg(int len)
2101 {
2102 	int ret;
2103 	ssize_t sz;
2104 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2105 					  (void *) -1, UINT_MAX, UINT_MAX };
2106 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2107 
2108 	init_data(source, len, 0xab);
2109 	init_data(target, len, 0);
2110 	sz = fi_write(ep[0], source, len,
2111 			  NULL, gni_addr[1],
2112 			  _REM_ADDR(fi, target, target), mr_key[1],
2113 			  target);
2114 	cr_assert_eq(sz, 0);
2115 
2116 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2117 		pthread_yield();
2118 	}
2119 
2120 	cr_assert_eq(ret, 1);
2121 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
2122 
2123 	w[0] = 1;
2124 	rdm_rma_check_cntrs(w, r, w_e, r_e);
2125 
2126 	dbg_printf("got write context event!\n");
2127 
2128 	cr_assert(check_data(source, target, len), "Data mismatch");
2129 }
2130 
Test(rdm_rma_stx_basic,write_autoreg)2131 Test(rdm_rma_stx_basic, write_autoreg)
2132 {
2133 	xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2134 }
2135 
Test(dgram_rma_stx_basic,write_autoreg)2136 Test(dgram_rma_stx_basic, write_autoreg)
2137 {
2138 	xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2139 }
2140 
Test(dgram_rma_1dom_stx_basic,write_autoreg)2141 Test(dgram_rma_1dom_stx_basic, write_autoreg)
2142 {
2143 	xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2144 }
2145 
2146 /* scalable */
2147 
Test(rdm_rma_stx_scalable,write_autoreg)2148 Test(rdm_rma_stx_scalable, write_autoreg)
2149 {
2150 	xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2151 }
2152 
Test(dgram_rma_stx_scalable,write_autoreg)2153 Test(dgram_rma_stx_scalable, write_autoreg)
2154 {
2155 	xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2156 }
2157 
Test(dgram_rma_1dom_stx_scalable,write_autoreg)2158 Test(dgram_rma_1dom_stx_scalable, write_autoreg)
2159 {
2160 	xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2161 }
2162 
do_write_autoreg_uncached(int len)2163 static void do_write_autoreg_uncached(int len)
2164 {
2165 	int ret;
2166 	ssize_t sz;
2167 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2168 					  (void *) -1, UINT_MAX, UINT_MAX };
2169 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2170 
2171 	init_data(uc_source, len, 0xab);
2172 	init_data(target, len, 0);
2173 	sz = fi_write(ep[0], uc_source, len,
2174 			  NULL, gni_addr[1],
2175 			  _REM_ADDR(fi, target, target), mr_key[1],
2176 			  target);
2177 	cr_assert_eq(sz, 0);
2178 
2179 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2180 		pthread_yield();
2181 	}
2182 
2183 	cr_assert_eq(ret, 1);
2184 	rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
2185 
2186 	w[0] = 1;
2187 	rdm_rma_check_cntrs(w, r, w_e, r_e);
2188 
2189 	dbg_printf("got write context event!\n");
2190 
2191 	cr_assert(check_data(uc_source, target, len), "Data mismatch");
2192 }
2193 
Test(rdm_rma_stx_basic,write_autoreg_uncached)2194 Test(rdm_rma_stx_basic, write_autoreg_uncached)
2195 {
2196 	xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2197 }
2198 
Test(dgram_rma_stx_basic,write_autoreg_uncached)2199 Test(dgram_rma_stx_basic, write_autoreg_uncached)
2200 {
2201 	xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2202 }
2203 
Test(dgram_rma_1dom_stx_basic,write_autoreg_uncached)2204 Test(dgram_rma_1dom_stx_basic, write_autoreg_uncached)
2205 {
2206 	xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2207 }
2208 
2209 /* scalable */
2210 
Test(rdm_rma_stx_scalable,write_autoreg_uncached)2211 Test(rdm_rma_stx_scalable, write_autoreg_uncached)
2212 {
2213 	xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2214 }
2215 
Test(dgram_rma_stx_scalable,write_autoreg_uncached)2216 Test(dgram_rma_stx_scalable, write_autoreg_uncached)
2217 {
2218 	xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2219 }
2220 
Test(dgram_rma_1dom_stx_scalable,write_autoreg_uncached)2221 Test(dgram_rma_1dom_stx_scalable, write_autoreg_uncached)
2222 {
2223 	xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2224 }
2225 
do_write_error(int len)2226 static void do_write_error(int len)
2227 {
2228 	int ret;
2229 	ssize_t sz;
2230 	struct fi_cq_tagged_entry cqe;
2231 	struct fi_cq_err_entry err_cqe = {0};
2232 
2233 	err_cqe.err_data_size = 0;
2234 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2235 
2236 	init_data(source, len, 0xab);
2237 	init_data(target, len, 0);
2238 	sz = fi_write(ep[0], source, len,
2239 			  loc_mr[0], gni_addr[1],
2240 			  _REM_ADDR(fi, target, target), mr_key[1],
2241 			  target);
2242 	cr_assert_eq(sz, 0);
2243 
2244 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2245 		pthread_yield();
2246 	}
2247 
2248 	cr_assert_eq(ret, -FI_EAVAIL);
2249 
2250 	ret = fi_cq_readerr(send_cq[0], &err_cqe, 0);
2251 	cr_assert_eq(ret, 1);
2252 
2253 	cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target,
2254 		  "Bad error context");
2255 	cr_assert(err_cqe.flags == (FI_RMA | FI_WRITE));
2256 	cr_assert(err_cqe.len == 0, "Bad error len");
2257 	cr_assert(err_cqe.buf == 0, "Bad error buf");
2258 	cr_assert(err_cqe.data == 0, "Bad error data");
2259 	cr_assert(err_cqe.tag == 0, "Bad error tag");
2260 	cr_assert(err_cqe.olen == 0, "Bad error olen");
2261 	cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno");
2262 	cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR),
2263 		  "Bad prov errno");
2264 	cr_assert(err_cqe.err_data == NULL, "Bad error provider data");
2265 
2266 	w_e[0] = 1;
2267 	rdm_rma_check_cntrs(w, r, w_e, r_e);
2268 }
2269 
__write_error(void)2270 static inline void __write_error(void)
2271 {
2272 	int ret, max_retrans_val = 1;
2273 
2274 	ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS,
2275 					 &max_retrans_val);
2276 	cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2277 
2278 	ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS,
2279 					 &max_retrans_val);
2280 	cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2281 	err_inject_enable();
2282 
2283 	xfer_for_each_size(do_write_error, 8, BUF_SZ);
2284 }
2285 
Test(rdm_rma_stx_basic,write_error)2286 Test(rdm_rma_stx_basic, write_error)
2287 {
2288 	__write_error();
2289 }
2290 
Test(rdm_rma_stx_scalable,write_error)2291 Test(rdm_rma_stx_scalable, write_error)
2292 {
2293 	__write_error();
2294 }
2295 
Test(dgram_rma_stx_basic,write_error)2296 Test(dgram_rma_stx_basic, write_error)
2297 {
2298 	__write_error();
2299 }
2300 
Test(dgram_rma_stx_scalable,write_error)2301 Test(dgram_rma_stx_scalable, write_error)
2302 {
2303 	__write_error();
2304 }
2305 
do_read_error(int len)2306 static void do_read_error(int len)
2307 {
2308 	int ret;
2309 	ssize_t sz;
2310 	struct fi_cq_tagged_entry cqe;
2311 	struct fi_cq_err_entry err_cqe = {0};
2312 
2313 	err_cqe.err_data_size = 0;
2314 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2315 
2316 	init_data(source, len, 0);
2317 	init_data(target, len, 0xad);
2318 	sz = fi_read(ep[0], source, len,
2319 			 loc_mr[0], gni_addr[1],
2320 			 _REM_ADDR(fi, target, target), mr_key[1],
2321 			 (void *)READ_CTX);
2322 	cr_assert_eq(sz, 0);
2323 
2324 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2325 		pthread_yield();
2326 	}
2327 
2328 	cr_assert_eq(ret, -FI_EAVAIL);
2329 
2330 	ret = fi_cq_readerr(send_cq[0], &err_cqe, 0);
2331 	cr_assert_eq(ret, 1);
2332 
2333 	cr_assert((uint64_t)err_cqe.op_context == (uint64_t)READ_CTX,
2334 		  "Bad error context");
2335 	cr_assert(err_cqe.flags == (FI_RMA | FI_READ));
2336 	cr_assert(err_cqe.len == 0, "Bad error len");
2337 	cr_assert(err_cqe.buf == 0, "Bad error buf");
2338 	cr_assert(err_cqe.data == 0, "Bad error data");
2339 	cr_assert(err_cqe.tag == 0, "Bad error tag");
2340 	cr_assert(err_cqe.olen == 0, "Bad error olen");
2341 	cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno");
2342 	cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR),
2343 		  "Bad prov errno");
2344 	cr_assert(err_cqe.err_data == NULL, "Bad error provider data");
2345 
2346 	r_e[0] = 1;
2347 	rdm_rma_check_cntrs(w, r, w_e, r_e);
2348 }
2349 
__read_error(void)2350 static inline void __read_error(void)
2351 {
2352 	int ret, max_retrans_val = 1;
2353 
2354 	ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS,
2355 					 &max_retrans_val);
2356 	cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2357 
2358 	ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS,
2359 					 &max_retrans_val);
2360 	cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2361 	err_inject_enable();
2362 
2363 	xfer_for_each_size(do_read_error, 8, BUF_SZ);
2364 }
2365 
Test(rdm_rma_stx_basic,read_error)2366 Test(rdm_rma_stx_basic, read_error)
2367 {
2368 	__read_error();
2369 }
2370 
Test(rdm_rma_stx_scalable,read_error)2371 Test(rdm_rma_stx_scalable, read_error)
2372 {
2373 	__read_error();
2374 }
2375 
do_read_buf(void * s,void * t,int len)2376 static void do_read_buf(void *s, void *t, int len)
2377 {
2378 	int ret;
2379 	ssize_t sz;
2380 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2381 					  (void *) -1, UINT_MAX, UINT_MAX };
2382 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2383 
2384 #define READ_CTX 0x4e3dda1aULL
2385 	init_data(s, len, 0);
2386 	init_data(t, len, 0xad);
2387 	sz = fi_read(ep[0], s, len, NULL, gni_addr[1],
2388 			 _REM_ADDR(fi, target, t), mr_key[1],
2389 			 (void *)READ_CTX);
2390 	cr_assert_eq(sz, 0);
2391 
2392 	while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2393 		pthread_yield();
2394 	}
2395 
2396 	cr_assert_eq(ret, 1);
2397 	rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]);
2398 
2399 	r[0] = 1;
2400 	rdm_rma_check_cntrs(w, r, w_e, r_e);
2401 
2402 	dbg_printf("got read context event!\n");
2403 
2404 	cr_assert(check_data(s, t, len), "Data mismatch");
2405 }
2406 
do_read_alignment(int len)2407 static void do_read_alignment(int len)
2408 {
2409 	int s_off, t_off, l_off;
2410 
2411 	for (s_off = 0; s_off < 7; s_off++) {
2412 		for (t_off = 0; t_off < 7; t_off++) {
2413 			for (l_off = 0; l_off < 7; l_off++) {
2414 				do_read_buf(source + s_off,
2415 						target + t_off,
2416 						len + l_off);
2417 			}
2418 		}
2419 	}
2420 }
2421 
Test(rdm_rma_stx_basic,read_alignment)2422 Test(rdm_rma_stx_basic, read_alignment)
2423 {
2424 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2425 }
2426 
Test(rdm_rma_stx_basic,read_alignment_retrans)2427 Test(rdm_rma_stx_basic, read_alignment_retrans)
2428 {
2429 	err_inject_enable();
2430 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2431 }
2432 
Test(dgram_rma_stx_basic,read_alignment)2433 Test(dgram_rma_stx_basic, read_alignment)
2434 {
2435 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2436 }
2437 
Test(dgram_rma_1dom_stx_basic,read_alignment)2438 Test(dgram_rma_1dom_stx_basic, read_alignment)
2439 {
2440 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2441 }
2442 
2443 /* scalable */
2444 
Test(rdm_rma_stx_scalable,read_alignment)2445 Test(rdm_rma_stx_scalable, read_alignment)
2446 {
2447 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2448 }
2449 
Test(rdm_rma_stx_scalable,read_alignment_retrans)2450 Test(rdm_rma_stx_scalable, read_alignment_retrans)
2451 {
2452 	err_inject_enable();
2453 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2454 }
2455 
Test(dgram_rma_stx_scalable,read_alignment)2456 Test(dgram_rma_stx_scalable, read_alignment)
2457 {
2458 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2459 }
2460 
Test(dgram_rma_1dom_stx_scalable,read_alignment)2461 Test(dgram_rma_1dom_stx_scalable, read_alignment)
2462 {
2463 	xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2464 }
2465 
do_write_buf(void * s,void * t,int len)2466 static void do_write_buf(void *s, void *t, int len)
2467 {
2468 	int ret;
2469 	ssize_t sz;
2470 	struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2471 					  (void *) -1, UINT_MAX, UINT_MAX };
2472 	struct fi_cq_err_entry cq_err;
2473 	int errors_to_read = (dgm_fail) ? 1 : 0;
2474 	uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2475 
2476 	init_data(s, len, 0xab);
2477 	init_data(t, len, 0);
2478 	sz = fi_write(ep[0], s, len, NULL, gni_addr[1],
2479 			  _REM_ADDR(fi, target, t), mr_key[1], t);
2480 	cr_assert_eq(sz, 0);
2481 
2482 	do {
2483 		while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2484 			pthread_yield();
2485 		}
2486 
2487 		if (dgm_fail) {
2488 			cr_assert_eq(ret, -FI_EAVAIL);
2489 
2490 			ret = fi_cq_readerr(send_cq[0], &cq_err, 0);
2491 			cr_assert_eq(ret, 1);
2492 
2493 			errors_to_read--;
2494 		}
2495 	} while (errors_to_read > 0);
2496 
2497 	if (dgm_fail)
2498 		return;
2499 
2500 	cr_assert_eq(ret, 1);
2501 	rdm_rma_check_tcqe(&cqe, t, FI_RMA | FI_WRITE, 0, ep[0]);
2502 
2503 	w[0] = 1;
2504 	rdm_rma_check_cntrs(w, r, w_e, r_e);
2505 
2506 	dbg_printf("got write context event!\n");
2507 
2508 	cr_assert(check_data(s, t, len), "Data mismatch");
2509 }
2510 
do_write_alignment(int len)2511 static void do_write_alignment(int len)
2512 {
2513 	int s_off, t_off, l_off;
2514 
2515 	for (s_off = 0; s_off < 7; s_off++) {
2516 		for (t_off = 0; t_off < 7; t_off++) {
2517 			for (l_off = 0; l_off < 7; l_off++) {
2518 				do_write_buf(source + s_off,
2519 						 target + t_off,
2520 						 len + l_off);
2521 			}
2522 		}
2523 	}
2524 }
2525 
Test(rdm_rma_stx_basic,write_alignment)2526 Test(rdm_rma_stx_basic, write_alignment)
2527 {
2528 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2529 }
2530 
Test(rdm_rma_stx_basic,write_alignment_retrans)2531 Test(rdm_rma_stx_basic, write_alignment_retrans)
2532 {
2533 	err_inject_enable();
2534 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2535 }
2536 
Test(dgram_rma_stx_basic,write_alignment)2537 Test(dgram_rma_stx_basic, write_alignment)
2538 {
2539 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2540 }
2541 
Test(dgram_rma_stx_basic,write_alignment_retrans)2542 Test(dgram_rma_stx_basic, write_alignment_retrans)
2543 {
2544 	dgm_fail = 1;
2545 	err_inject_enable();
2546 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2547 }
2548 
Test(dgram_rma_1dom_stx_basic,write_alignment)2549 Test(dgram_rma_1dom_stx_basic, write_alignment)
2550 {
2551 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2552 }
2553 
Test(dgram_rma_1dom_stx_basic,write_alignment_retrans)2554 Test(dgram_rma_1dom_stx_basic, write_alignment_retrans)
2555 {
2556 	dgm_fail = 1;
2557 	err_inject_enable();
2558 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2559 }
2560 
2561 /* scalable */
2562 
Test(rdm_rma_stx_scalable,write_alignment)2563 Test(rdm_rma_stx_scalable, write_alignment)
2564 {
2565 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2566 }
2567 
Test(rdm_rma_stx_scalable,write_alignment_retrans)2568 Test(rdm_rma_stx_scalable, write_alignment_retrans)
2569 {
2570 	err_inject_enable();
2571 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2572 }
2573 
Test(dgram_rma_stx_scalable,write_alignment)2574 Test(dgram_rma_stx_scalable, write_alignment)
2575 {
2576 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2577 }
2578 
Test(dgram_rma_stx_scalable,write_alignment_retrans)2579 Test(dgram_rma_stx_scalable, write_alignment_retrans)
2580 {
2581 	dgm_fail = 1;
2582 	err_inject_enable();
2583 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2584 }
2585 
Test(dgram_rma_1dom_stx_scalable,write_alignment)2586 Test(dgram_rma_1dom_stx_scalable, write_alignment)
2587 {
2588 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2589 }
2590 
Test(dgram_rma_1dom_stx_scalable,write_alignment_retrans)2591 Test(dgram_rma_1dom_stx_scalable, write_alignment_retrans)
2592 {
2593 	dgm_fail = 1;
2594 	err_inject_enable();
2595 	xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2596 }
2597 
do_trigger(int len)2598 static void do_trigger(int len)
2599 {
2600 	int ret, i;
2601 	ssize_t sz;
2602 	struct fi_cq_tagged_entry cqe;
2603 	struct fi_msg_rma msg[4];
2604 	struct iovec iov;
2605 	struct fi_rma_iov rma_iov;
2606 	struct fi_triggered_context t_ctx[4];
2607 	void *ctxs[4];
2608 
2609 	iov.iov_base = source;
2610 	iov.iov_len = len;
2611 
2612 	rma_iov.addr = _REM_ADDR(fi, target, target);
2613 	rma_iov.len = len;
2614 	rma_iov.key = mr_key[1];
2615 
2616 	msg[0].msg_iov = &iov;
2617 	msg[0].desc = (void **)loc_mr;
2618 	msg[0].iov_count = 1;
2619 	msg[0].addr = gni_addr[1];
2620 	msg[0].rma_iov = &rma_iov;
2621 	msg[0].rma_iov_count = 1;
2622 	msg[0].data = (uint64_t)target;
2623 	msg[1] = msg[2] = msg[3] = msg[0];
2624 
2625 	/* XXX: Req 0 is guaranteed to be sent before req 2, but req 2 will
2626 	 * race req 0 through the network.  Fix race if needed. */
2627 	t_ctx[0].trigger.threshold.threshold = 1;
2628 	t_ctx[1].trigger.threshold.threshold = 2;
2629 	t_ctx[2].trigger.threshold.threshold = 1;
2630 	t_ctx[3].trigger.threshold.threshold = 0;
2631 	ctxs[0] = &t_ctx[3];
2632 	ctxs[1] = &t_ctx[0];
2633 	ctxs[2] = &t_ctx[2];
2634 	ctxs[3] = &t_ctx[1];
2635 
2636 	for (i = 0; i < 4; i++) {
2637 		t_ctx[i].event_type = FI_TRIGGER_THRESHOLD;
2638 		t_ctx[i].trigger.threshold.cntr = write_cntr[0];
2639 		msg[i].context = &t_ctx[i];
2640 
2641 		sz = fi_writemsg(ep[0], &msg[i], FI_TRIGGER);
2642 		cr_assert_eq(sz, 0);
2643 	}
2644 
2645 	for (i = 0; i < 4; i++) {
2646 		/* reset cqe */
2647 		cqe.op_context = cqe.buf = (void *) -1;
2648 		cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX;
2649 		while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2650 			pthread_yield();
2651 		}
2652 
2653 		cr_assert_eq(ret, 1);
2654 
2655 		rdm_rma_check_tcqe(&cqe, ctxs[i], FI_RMA | FI_WRITE, 0, ep[0]);
2656 	}
2657 
2658 	sz = fi_cntr_set(write_cntr[0], 0);
2659 	cr_assert_eq(sz, 0);
2660 }
2661 
2662 /*
2663  * TODO: fix this test.  fails sporadically
2664  */
2665 Test(rdm_rma_stx_basic, trigger, .disabled = true)
2666 {
2667 	xfer_for_each_size(do_trigger, 8, BUF_SZ);
2668 }
2669 
2670 Test(rdm_rma_stx_scalable, trigger, .disabled = true)
2671 {
2672 	/* FIXME intermittent test failures */
2673 	cr_skip_test("intermittent test failures");
2674 	xfer_for_each_size(do_trigger, 8, BUF_SZ);
2675 }
2676