1 /*
2 * Copyright (c) 2015-2017 Los Alamos National Security, LLC.
3 * All rights reserved.
4 * Copyright (c) 2015-2018 Cray Inc. All rights reserved.
5 * Copyright (c) 2019 Triad National Security, LLC. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <errno.h>
39 #include <getopt.h>
40 #include <poll.h>
41 #include <time.h>
42 #include <string.h>
43 #include <pthread.h>
44
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <inttypes.h>
49
50 #include "gnix_vc.h"
51 #include "gnix_cm_nic.h"
52 #include "gnix_hashtable.h"
53 #include "gnix_rma.h"
54 #include "gnix_util.h"
55
56 #include <criterion/criterion.h>
57 #include "gnix_rdma_headers.h"
58 #include "common.h"
59
60 #if 1
61 #define dbg_printf(...)
62 #else
63 #define dbg_printf(...) \
64 do { \
65 printf(__VA_ARGS__); \
66 fflush(stdout); \
67 } while (0)
68 #endif
69
70 /* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */
71 static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY;
72 static struct fid_fabric *fab;
73 static struct fid_domain *dom[2];
74 struct fi_gni_ops_domain *gni_domain_ops[2];
75 static struct fid_ep *ep[2];
76 static struct fid_av *av[2];
77 static struct fi_info *hints;
78 static struct fi_info *fi;
79 void *ep_name[2];
80 size_t gni_addr[2];
81 static struct fid_cq *send_cq[2];
82 static struct fid_cq *recv_cq[2];
83 static struct fi_cq_attr cq_attr[2];
84 static struct fid_stx *stx_ctx[2];
85 static struct fid_stx *stx_ctx_too_late;
86
87 #define BUF_SZ (64*1024)
88 char *target, *target_base;
89 char *source, *source_base;
90 char *uc_source;
91 struct fid_mr *rem_mr[2], *loc_mr[2];
92 uint64_t mr_key[2];
93
94 static struct fid_cntr *write_cntr[2], *read_cntr[2];
95 static struct fid_cntr *rwrite_cntr;
96 static struct fid_cntr *rread_cntr;
97 static struct fi_cntr_attr cntr_attr = {
98 .events = FI_CNTR_EVENTS_COMP,
99 .flags = 0
100 };
101 static uint64_t writes[2] = {0}, reads[2] = {0}, write_errs[2] = {0},
102 read_errs[2] = {0};
103 #define MLOOPS 1000
104 static int dgm_fail;
105
common_setup_stx(uint32_t version,int mr_mode)106 static void common_setup_stx(uint32_t version, int mr_mode)
107 {
108 int ret = 0;
109 struct fi_av_attr attr;
110 size_t addrlen = 0;
111 int requested_key[2][2] = {{0, 0}, {0, 0} };
112 int i, j;
113 dgm_fail = 0;
114
115 hints->domain_attr->mr_mode = mr_mode;
116 hints->domain_attr->cq_data_size = 4;
117 hints->ep_attr->tx_ctx_cnt = FI_SHARED_CONTEXT;
118 hints->mode = mode_bits;
119 hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ |
120 FI_WRITE | FI_REMOTE_WRITE;
121
122 hints->fabric_attr->prov_name = strdup("gni");
123
124 ret = fi_getinfo(version, NULL, 0, 0, hints, &fi);
125 cr_assert(!ret, "fi_getinfo");
126
127 ret = fi_fabric(fi->fabric_attr, &fab, NULL);
128 cr_assert(!ret, "fi_fabric");
129
130 ret = fi_domain(fab, fi, dom, NULL);
131 cr_assert(!ret, "fi_domain");
132
133 ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1,
134 0, (void **) gni_domain_ops, NULL);
135
136 memset(&attr, 0, sizeof(attr));
137 attr.type = FI_AV_MAP;
138 attr.count = 2;
139
140 ret = fi_av_open(dom[0], &attr, av, NULL);
141 cr_assert(!ret, "fi_av_open");
142
143 ret = fi_endpoint(dom[0], fi, &ep[0], NULL);
144 cr_assert(!ret, "fi_endpoint");
145
146 cq_attr[0].format = FI_CQ_FORMAT_TAGGED;
147 cq_attr[0].size = 1024;
148 cq_attr[0].wait_obj = 0;
149
150 ret = fi_cq_open(dom[0], cq_attr, send_cq, 0);
151 cr_assert(!ret, "fi_cq_open");
152
153 ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0);
154 cr_assert(!ret, "fi_cq_open");
155
156 ret = fi_stx_context(dom[0], NULL, &stx_ctx[0], 0);
157 cr_assert(!ret, "fi_stx_context");
158
159 ret = fi_stx_context(dom[0], NULL, &stx_ctx_too_late, 0);
160 cr_assert(!ret, "fi_stx_context");
161
162 ret = fi_domain(fab, fi, dom + 1, NULL);
163 cr_assert(!ret, "fi_domain");
164
165 ret = fi_open_ops(&dom[1]->fid, FI_GNI_DOMAIN_OPS_1,
166 0, (void **) gni_domain_ops + 1, NULL);
167
168 ret = fi_av_open(dom[1], &attr, av + 1, NULL);
169 cr_assert(!ret, "fi_av_open");
170
171 ret = fi_endpoint(dom[1], fi, &ep[1], NULL);
172 cr_assert(!ret, "fi_endpoint");
173
174 ret = fi_stx_context(dom[1], NULL, &stx_ctx[1], 0);
175 cr_assert(!ret, "fi_stx_context");
176
177 cq_attr[1].format = FI_CQ_FORMAT_TAGGED;
178 cq_attr[1].size = 1024;
179 cq_attr[1].wait_obj = 0;
180
181 ret = fi_cq_open(dom[1], cq_attr + 1, send_cq + 1, 0);
182 cr_assert(!ret, "fi_cq_open");
183
184 ret = fi_cq_open(dom[1], cq_attr + 1, recv_cq + 1, 0);
185 cr_assert(!ret, "fi_cq_open");
186
187 /*
188 * imitate shmem, etc. use FI_WRITE for bind
189 * flag
190 */
191 ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT);
192 cr_assert(!ret, "fi_ep_bind");
193
194 ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV);
195 cr_assert(!ret, "fi_ep_bind");
196
197 ret = fi_ep_bind(ep[0], &stx_ctx[0]->fid, 0);
198 cr_assert(!ret, "fi_ep_bind stx");
199
200 /*
201 * this shouldn't work, wrong domain
202 */
203
204 ret = fi_ep_bind(ep[0], &stx_ctx[1]->fid, 0);
205 cr_assert_eq(ret, -FI_EINVAL);
206
207 ret = fi_getname(&ep[0]->fid, NULL, &addrlen);
208 cr_assert(addrlen > 0);
209
210 ep_name[0] = malloc(addrlen);
211 cr_assert(ep_name[0] != NULL);
212
213 ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen);
214 cr_assert(ret == FI_SUCCESS);
215
216 ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT);
217 cr_assert(!ret, "fi_ep_bind");
218
219 ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV);
220 cr_assert(!ret, "fi_ep_bind");
221
222 ret = fi_ep_bind(ep[1], &stx_ctx[1]->fid, 0);
223 cr_assert(!ret, "fi_ep_bind stx");
224
225 ret = fi_getname(&ep[1]->fid, NULL, &addrlen);
226 cr_assert(addrlen > 0);
227
228 ep_name[1] = malloc(addrlen);
229 cr_assert(ep_name[1] != NULL);
230
231 ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen);
232 cr_assert(ret == FI_SUCCESS);
233
234 ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0,
235 NULL);
236 cr_assert(ret == 1);
237 ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0,
238 NULL);
239 cr_assert(ret == 1);
240
241 ret = fi_av_insert(av[1], ep_name[0], 1, &gni_addr[0], 0,
242 NULL);
243 cr_assert(ret == 1);
244 ret = fi_av_insert(av[1], ep_name[1], 1, &gni_addr[1], 0,
245 NULL);
246 cr_assert(ret == 1);
247
248 ret = fi_ep_bind(ep[0], &av[0]->fid, 0);
249 cr_assert(!ret, "fi_ep_bind");
250
251 ret = fi_ep_bind(ep[1], &av[1]->fid, 0);
252 cr_assert(!ret, "fi_ep_bind");
253
254 target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
255 assert(target_base);
256 target = GNIT_ALIGN_BUFFER(char *, target_base);
257
258 source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
259 assert(source_base);
260 source = GNIT_ALIGN_BUFFER(char *, source_base);
261
262 if (USING_SCALABLE(fi)) {
263 for (i = 0; i < 2; i++)
264 for (j = 0; j < 2; j++)
265 requested_key[i][j] = (i * 2) + j;
266 }
267
268 ret = fi_mr_reg(dom[0],
269 target,
270 BUF_SZ,
271 FI_REMOTE_WRITE,
272 0,
273 requested_key[0][0],
274 0,
275 &rem_mr[0],
276 &target);
277 cr_assert_eq(ret, 0);
278 ret = fi_mr_reg(dom[1],
279 target,
280 BUF_SZ,
281 FI_REMOTE_WRITE,
282 0,
283 requested_key[1][0],
284 0,
285 &rem_mr[1],
286 &target);
287 cr_assert_eq(ret, 0);
288
289 ret = fi_mr_reg(dom[0],
290 source,
291 BUF_SZ,
292 FI_REMOTE_WRITE,
293 0,
294 requested_key[0][1],
295 0,
296 &loc_mr[0],
297 &source);
298 cr_assert_eq(ret, 0);
299 ret = fi_mr_reg(dom[1],
300 source,
301 BUF_SZ,
302 FI_REMOTE_WRITE,
303 0,
304 requested_key[1][1],
305 0,
306 &loc_mr[1],
307 &source);
308 cr_assert_eq(ret, 0);
309
310 if (USING_SCALABLE(fi)) {
311 for (i = 0; i < 2; i++) {
312 MR_ENABLE(rem_mr[i], target, BUF_SZ);
313 MR_ENABLE(loc_mr[i], source, BUF_SZ);
314 }
315 }
316
317 uc_source = malloc(BUF_SZ);
318 assert(uc_source);
319
320 mr_key[0] = fi_mr_key(rem_mr[0]);
321 mr_key[1] = fi_mr_key(rem_mr[1]);
322
323 ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0);
324 cr_assert(!ret, "fi_cntr_open");
325
326 ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_WRITE);
327 cr_assert(!ret, "fi_ep_bind");
328
329 ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0);
330 cr_assert(!ret, "fi_cntr_open");
331
332 ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_READ);
333 cr_assert(!ret, "fi_ep_bind");
334
335 ret = fi_cntr_open(dom[1], &cntr_attr, write_cntr + 1, 0);
336 cr_assert(!ret, "fi_cntr_open");
337
338 ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_WRITE);
339 cr_assert(!ret, "fi_ep_bind");
340
341 ret = fi_cntr_open(dom[1], &cntr_attr, read_cntr + 1, 0);
342 cr_assert(!ret, "fi_cntr_open");
343
344 ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_READ);
345 cr_assert(!ret, "fi_ep_bind");
346
347 if (hints->caps & FI_RMA_EVENT) {
348 ret = fi_cntr_open(dom[1], &cntr_attr, &rwrite_cntr, 0);
349 cr_assert(!ret, "fi_cntr_open");
350
351 ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE);
352 cr_assert(!ret, "fi_ep_bind");
353
354 ret = fi_cntr_open(dom[1], &cntr_attr, &rread_cntr, 0);
355 cr_assert(!ret, "fi_cntr_open");
356
357 ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ);
358 cr_assert(!ret, "fi_ep_bind");
359 }
360
361 ret = fi_enable(ep[0]);
362 cr_assert(!ret, "fi_ep_enable");
363
364 /*
365 * this should not work - don't allow binding of STX
366 * after the EP is enabled
367 */
368 ret = fi_ep_bind(ep[0], &stx_ctx_too_late->fid, 0);
369 cr_assert_eq(ret, -FI_EOPBADSTATE, "fi_ep_bind stx");
370
371 ret = fi_close(&stx_ctx_too_late->fid);
372 cr_assert(!ret, "failure in closing stx_ctx_too_late");
373
374 ret = fi_enable(ep[1]);
375 cr_assert(!ret, "fi_ep_enable");
376
377 }
378
common_setup_stx_1dom(uint32_t version,int mr_mode)379 static void common_setup_stx_1dom(uint32_t version, int mr_mode)
380 {
381 int ret = 0;
382 struct fi_av_attr attr;
383 size_t addrlen = 0;
384
385 dgm_fail = 0;
386
387 hints->domain_attr->mr_mode = mr_mode;
388 hints->domain_attr->cq_data_size = 4;
389 hints->ep_attr->tx_ctx_cnt = FI_SHARED_CONTEXT;
390 hints->mode = mode_bits;
391 hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ |
392 FI_WRITE | FI_REMOTE_WRITE;
393
394 hints->fabric_attr->prov_name = strdup("gni");
395
396 ret = fi_getinfo(version, NULL, 0, 0, hints, &fi);
397 cr_assert(!ret, "fi_getinfo");
398
399 ret = fi_fabric(fi->fabric_attr, &fab, NULL);
400 cr_assert(!ret, "fi_fabric");
401
402 ret = fi_domain(fab, fi, dom, NULL);
403 cr_assert(!ret, "fi_domain");
404
405 ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1,
406 0, (void **) gni_domain_ops, NULL);
407
408 memset(&attr, 0, sizeof(attr));
409 attr.type = FI_AV_MAP;
410 attr.count = 2;
411
412 ret = fi_av_open(dom[0], &attr, av, NULL);
413 cr_assert(!ret, "fi_av_open");
414
415 ret = fi_endpoint(dom[0], fi, &ep[0], NULL);
416 cr_assert(!ret, "fi_endpoint");
417
418 cq_attr[0].format = FI_CQ_FORMAT_TAGGED;
419 cq_attr[0].size = 1024;
420 cq_attr[0].wait_obj = 0;
421
422 ret = fi_cq_open(dom[0], cq_attr, send_cq, 0);
423 cr_assert(!ret, "fi_cq_open");
424
425 ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0);
426 cr_assert(!ret, "fi_cq_open");
427
428 ret = fi_stx_context(dom[0], NULL, &stx_ctx[0], 0);
429 cr_assert(!ret, "fi_stx_context");
430
431 ret = fi_stx_context(dom[0], NULL, &stx_ctx_too_late, 0);
432 cr_assert(!ret, "fi_stx_context");
433
434 ret = fi_endpoint(dom[0], fi, &ep[1], NULL);
435 cr_assert(!ret, "fi_endpoint");
436
437 cq_attr[1].format = FI_CQ_FORMAT_TAGGED;
438 cq_attr[1].size = 1024;
439 cq_attr[1].wait_obj = 0;
440
441 ret = fi_cq_open(dom[0], cq_attr + 1, send_cq + 1, 0);
442 cr_assert(!ret, "fi_cq_open");
443
444 ret = fi_cq_open(dom[0], cq_attr + 1, recv_cq + 1, 0);
445 cr_assert(!ret, "fi_cq_open");
446
447 /*
448 * imitate shmem, etc. use FI_WRITE for bind
449 * flag
450 */
451 ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT);
452 cr_assert(!ret, "fi_ep_bind");
453
454 ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV);
455 cr_assert(!ret, "fi_ep_bind");
456
457 ret = fi_ep_bind(ep[0], &stx_ctx[0]->fid, 0);
458 cr_assert(!ret, "fi_ep_bind stx");
459
460 ret = fi_getname(&ep[0]->fid, NULL, &addrlen);
461 cr_assert(addrlen > 0);
462
463 ep_name[0] = malloc(addrlen);
464 cr_assert(ep_name[0] != NULL);
465
466 ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen);
467 cr_assert(ret == FI_SUCCESS);
468
469 ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT);
470 cr_assert(!ret, "fi_ep_bind");
471
472 ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV);
473 cr_assert(!ret, "fi_ep_bind");
474
475 ret = fi_ep_bind(ep[1], &stx_ctx[0]->fid, 0);
476 cr_assert(!ret, "fi_ep_bind stx");
477
478 ret = fi_getname(&ep[1]->fid, NULL, &addrlen);
479 cr_assert(addrlen > 0);
480
481 ep_name[1] = malloc(addrlen);
482 cr_assert(ep_name[1] != NULL);
483
484 ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen);
485 cr_assert(ret == FI_SUCCESS);
486
487 ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0,
488 NULL);
489 cr_assert(ret == 1);
490 ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0,
491 NULL);
492 cr_assert(ret == 1);
493
494 ret = fi_ep_bind(ep[0], &av[0]->fid, 0);
495 cr_assert(!ret, "fi_ep_bind");
496
497 ret = fi_ep_bind(ep[1], &av[0]->fid, 0);
498 cr_assert(!ret, "fi_ep_bind");
499
500 target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
501 assert(target_base);
502 target = GNIT_ALIGN_BUFFER(char *, target_base);
503
504 source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ));
505 assert(source_base);
506 source = GNIT_ALIGN_BUFFER(char *, source_base);
507
508 ret = fi_mr_reg(dom[0],
509 target,
510 BUF_SZ,
511 FI_REMOTE_WRITE,
512 0,
513 (USING_SCALABLE(fi) ? 1 : 0),
514 0,
515 &rem_mr[0],
516 &target);
517 cr_assert_eq(ret, 0);
518
519 ret = fi_mr_reg(dom[0],
520 source,
521 BUF_SZ,
522 FI_REMOTE_WRITE,
523 0,
524 (USING_SCALABLE(fi) ? 2 : 0),
525 0,
526 &loc_mr[0],
527 &source);
528 cr_assert_eq(ret, 0);
529
530 if (USING_SCALABLE(fi)) {
531 MR_ENABLE(rem_mr[0], target, BUF_SZ);
532 MR_ENABLE(loc_mr[0], source, BUF_SZ);
533 }
534
535 uc_source = malloc(BUF_SZ);
536 assert(uc_source);
537
538 mr_key[0] = fi_mr_key(rem_mr[0]);
539 mr_key[1] = mr_key[0];
540
541 ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0);
542 cr_assert(!ret, "fi_cntr_open");
543
544 ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_WRITE);
545 cr_assert(!ret, "fi_ep_bind");
546
547 ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0);
548 cr_assert(!ret, "fi_cntr_open");
549
550 ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_READ);
551 cr_assert(!ret, "fi_ep_bind");
552
553 ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr + 1, 0);
554 cr_assert(!ret, "fi_cntr_open");
555
556 ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_WRITE);
557 cr_assert(!ret, "fi_ep_bind");
558
559 ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr + 1, 0);
560 cr_assert(!ret, "fi_cntr_open");
561
562 ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_READ);
563 cr_assert(!ret, "fi_ep_bind");
564
565 if (hints->caps & FI_RMA_EVENT) {
566 ret = fi_cntr_open(dom[0], &cntr_attr, &rwrite_cntr, 0);
567 cr_assert(!ret, "fi_cntr_open");
568
569 ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE);
570 cr_assert(!ret, "fi_ep_bind");
571
572 ret = fi_cntr_open(dom[0], &cntr_attr, &rread_cntr, 0);
573 cr_assert(!ret, "fi_cntr_open");
574
575 ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ);
576 cr_assert(!ret, "fi_ep_bind");
577 }
578
579 ret = fi_enable(ep[0]);
580 cr_assert(!ret, "fi_ep_enable");
581
582 ret = fi_enable(ep[1]);
583 cr_assert(!ret, "fi_ep_enable");
584
585 }
586
rdm_rma_basic_setup(void)587 static void rdm_rma_basic_setup(void)
588 {
589 hints = fi_allocinfo();
590 cr_assert(hints, "fi_allocinfo");
591 hints->ep_attr->type = FI_EP_RDM;
592 hints->caps = FI_RMA_EVENT;
593 common_setup_stx(fi_version(), GNIX_MR_BASIC);
594 }
595
dgram_basic_setup(void)596 static void dgram_basic_setup(void)
597 {
598 hints = fi_allocinfo();
599 cr_assert(hints, "fi_allocinfo");
600 hints->ep_attr->type = FI_EP_DGRAM;
601 hints->caps = FI_RMA_EVENT;
602 common_setup_stx(fi_version(), GNIX_MR_BASIC);
603 }
604
dgram_basic_setup_1dom(void)605 static void dgram_basic_setup_1dom(void)
606 {
607 hints = fi_allocinfo();
608 cr_assert(hints, "fi_allocinfo");
609 hints->ep_attr->type = FI_EP_DGRAM;
610 hints->caps = FI_RMA_EVENT;
611 common_setup_stx_1dom(fi_version(), GNIX_MR_BASIC);
612 }
613
rdm_rma_scalable_setup(void)614 static void rdm_rma_scalable_setup(void)
615 {
616 hints = fi_allocinfo();
617 cr_assert(hints, "fi_allocinfo");
618 hints->ep_attr->type = FI_EP_RDM;
619 hints->caps = FI_RMA_EVENT;
620 common_setup_stx(fi_version(), GNIX_MR_SCALABLE);
621 }
622
dgram_scalable_setup(void)623 static void dgram_scalable_setup(void)
624 {
625 hints = fi_allocinfo();
626 cr_assert(hints, "fi_allocinfo");
627 hints->ep_attr->type = FI_EP_DGRAM;
628 hints->caps = FI_RMA_EVENT;
629 common_setup_stx(fi_version(), GNIX_MR_SCALABLE);
630 }
631
dgram_scalable_setup_1dom(void)632 static void dgram_scalable_setup_1dom(void)
633 {
634 hints = fi_allocinfo();
635 cr_assert(hints, "fi_allocinfo");
636 hints->ep_attr->type = FI_EP_DGRAM;
637 hints->caps = FI_RMA_EVENT;
638 common_setup_stx_1dom(fi_version(), GNIX_MR_SCALABLE);
639 }
640
rdm_rma_stx_teardown(void)641 static void rdm_rma_stx_teardown(void)
642 {
643 int ret = 0;
644
645 if (hints->caps & FI_RMA_EVENT) {
646 ret = fi_close(&rwrite_cntr->fid);
647 cr_assert(!ret, "failure in closing dom[1] rwrite counter.");
648
649 ret = fi_close(&rread_cntr->fid);
650 cr_assert(!ret, "failure in closing dom[1] rread counter.");
651 }
652
653 ret = fi_close(&read_cntr[0]->fid);
654 cr_assert(!ret, "failure in closing dom[0] read counter.");
655
656 ret = fi_close(&read_cntr[1]->fid);
657 cr_assert(!ret, "failure in closing dom[1] read counter.");
658
659 ret = fi_close(&write_cntr[0]->fid);
660 cr_assert(!ret, "failure in closing dom[0] write counter.");
661
662 ret = fi_close(&write_cntr[1]->fid);
663 cr_assert(!ret, "failure in closing dom[1] write counter.");
664
665 free(uc_source);
666
667 ret = fi_close(&loc_mr[0]->fid);
668 cr_assert(!ret, "failure in closing dom[0] local mr.");
669
670 if (loc_mr[1] != NULL) {
671 ret = fi_close(&loc_mr[1]->fid);
672 cr_assert(!ret, "failure in closing dom[1] local mr.");
673 }
674
675 ret = fi_close(&rem_mr[0]->fid);
676 cr_assert(!ret, "failure in closing dom[0] remote mr.");
677
678 if (rem_mr[1] != NULL) {
679 ret = fi_close(&rem_mr[1]->fid);
680 cr_assert(!ret, "failure in closing dom[1] remote mr.");
681 }
682
683 ret = fi_close(&stx_ctx[0]->fid);
684 cr_assert(!ret, "failure in closing dom[0] stx_ctx.");
685
686 if (stx_ctx[1] != NULL) {
687 ret = fi_close(&stx_ctx[1]->fid);
688 cr_assert(!ret, "failure in closing dom[1] stx_ctx.");
689 }
690
691 free(target_base);
692 free(source_base);
693
694 ret = fi_close(&ep[0]->fid);
695 cr_assert(!ret, "failure in closing ep[0].");
696
697 ret = fi_close(&ep[1]->fid);
698 cr_assert(!ret, "failure in closing ep[1].");
699
700 ret = fi_close(&recv_cq[0]->fid);
701 cr_assert(!ret, "failure in dom[0] recv cq.");
702
703 ret = fi_close(&recv_cq[1]->fid);
704 cr_assert(!ret, "failure in dom[1] recv cq.");
705
706 ret = fi_close(&send_cq[0]->fid);
707 cr_assert(!ret, "failure in dom[0] send cq.");
708
709 ret = fi_close(&send_cq[1]->fid);
710 cr_assert(!ret, "failure in dom[1] send cq.");
711
712 ret = fi_close(&av[0]->fid);
713 cr_assert(!ret, "failure in closing dom[0] av.");
714
715 if (av[1] != NULL) {
716 ret = fi_close(&av[1]->fid);
717 cr_assert(!ret, "failure in closing dom[1] av.");
718 }
719
720 ret = fi_close(&dom[0]->fid);
721 cr_assert(!ret, "failure in closing domain dom[0].");
722
723 if (dom[1] != NULL) {
724 ret = fi_close(&dom[1]->fid);
725 cr_assert(!ret,
726 "failure in closing domain dom[1].");
727 }
728
729 ret = fi_close(&fab->fid);
730 cr_assert(!ret, "failure in closing fabric.");
731
732 fi_freeinfo(fi);
733 fi_freeinfo(hints);
734 hints = NULL;
735 dgm_fail = 0;
736 free(ep_name[0]);
737 free(ep_name[1]);
738 }
739
init_data(char * buf,int len,char seed)740 static void init_data(char *buf, int len, char seed)
741 {
742 int i;
743
744 for (i = 0; i < len; i++) {
745 buf[i] = seed++;
746 }
747 }
748
check_data(char * buf1,char * buf2,int len)749 static int check_data(char *buf1, char *buf2, int len)
750 {
751 int i;
752
753 for (i = 0; i < len; i++) {
754 if (buf1[i] != buf2[i]) {
755 printf("data mismatch, elem: %d, b1: 0x%hhx,"
756 " b2: 0x%hhx, len: %d\n",
757 i, buf1[i], buf2[i], len);
758 return 0;
759 }
760 }
761
762 return 1;
763 }
764
rdm_rma_check_tcqe(struct fi_cq_tagged_entry * tcqe,void * ctx,uint64_t flags,uint64_t data,struct fid_ep * fid_ep)765 static void rdm_rma_check_tcqe(struct fi_cq_tagged_entry *tcqe, void *ctx,
766 uint64_t flags, uint64_t data,
767 struct fid_ep *fid_ep)
768 {
769 struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep);
770
771 cr_assert(tcqe->op_context == ctx, "CQE Context mismatch");
772 cr_assert(tcqe->flags == flags, "CQE flags mismatch");
773
774 /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */
775 if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) {
776 cr_assert(tcqe->data == data, "CQE data invalid");
777 } else {
778 cr_assert(tcqe->data == 0, "CQE data invalid");
779 }
780
781 cr_assert(tcqe->len == 0, "CQE length mismatch");
782 cr_assert(tcqe->buf == 0, "CQE address mismatch");
783 cr_assert(tcqe->tag == 0, "CQE tag invalid");
784 }
785
rdm_rma_check_cntrs(uint64_t w[2],uint64_t r[2],uint64_t w_e[2],uint64_t r_e[2])786 static void rdm_rma_check_cntrs(uint64_t w[2], uint64_t r[2], uint64_t w_e[2],
787 uint64_t r_e[2])
788 {
789 /* Domain 0 */
790 writes[0] += w[0];
791 reads[0] += r[0];
792 write_errs[0] += w_e[0];
793 read_errs[0] += r_e[0];
794 /*dbg_printf("%ld, %ld\n", fi_cntr_read(write_cntr[0]), writes[0]);*/
795 cr_assert(fi_cntr_read(write_cntr[0]) == writes[0], "Bad write count");
796 cr_assert(fi_cntr_read(read_cntr[0]) == reads[0], "Bad read count");
797 cr_assert(fi_cntr_readerr(write_cntr[0]) == write_errs[0],
798 "Bad write err count");
799 cr_assert(fi_cntr_readerr(read_cntr[0]) == read_errs[0],
800 "Bad read err count");
801
802 /* Domain 1 */
803 writes[1] += w[1];
804 reads[1] += r[1];
805 write_errs[1] += w_e[1];
806 read_errs[1] += r_e[1];
807 cr_assert(fi_cntr_read(write_cntr[1]) == writes[1], "Bad write count");
808 cr_assert(fi_cntr_read(read_cntr[1]) == reads[1], "Bad read count");
809 cr_assert(fi_cntr_readerr(write_cntr[1]) == write_errs[1],
810 "Bad write err count");
811 cr_assert(fi_cntr_readerr(read_cntr[1]) == read_errs[1],
812 "Bad read err count");
813
814 if (hints->caps & FI_RMA_EVENT) {
815 cr_assert(fi_cntr_read(rwrite_cntr) == writes[0],
816 "Bad rwrite count");
817 cr_assert(fi_cntr_read(rread_cntr) == reads[0],
818 "Bad rread count");
819 cr_assert(fi_cntr_readerr(rwrite_cntr) == 0,
820 "Bad rwrite err count");
821 cr_assert(fi_cntr_readerr(rread_cntr) == 0,
822 "Bad rread err count");
823 }
824 }
825
xfer_for_each_size(void (* xfer)(int len),int slen,int elen)826 static void xfer_for_each_size(void (*xfer)(int len), int slen, int elen)
827 {
828 int i;
829
830 for (i = slen; i <= elen; i *= 2) {
831 xfer(i);
832 }
833 }
834
err_inject_enable(void)835 static void err_inject_enable(void)
836 {
837 int ret, err_count_val = 1;
838
839 ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_ERR_INJECT_COUNT,
840 &err_count_val);
841 cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)");
842
843 if (gni_domain_ops[1] != NULL) {
844 ret = gni_domain_ops[1]->set_val(&dom[1]->fid,
845 GNI_ERR_INJECT_COUNT,
846 &err_count_val);
847 cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)");
848 }
849 }
850
851 /*******************************************************************************
852 * Test RMA functions
853 ******************************************************************************/
854 TestSuite(dgram_rma_stx_basic,
855 .init = dgram_basic_setup,
856 .fini = rdm_rma_stx_teardown,
857 .disabled = false);
858
859 TestSuite(rdm_rma_stx_basic,
860 .init = rdm_rma_basic_setup,
861 .fini = rdm_rma_stx_teardown,
862 .disabled = false);
863
864 TestSuite(dgram_rma_1dom_stx_basic,
865 .init = dgram_basic_setup_1dom,
866 .fini = rdm_rma_stx_teardown,
867 .disabled = false);
868
869 TestSuite(dgram_rma_stx_scalable,
870 .init = dgram_scalable_setup,
871 .fini = rdm_rma_stx_teardown,
872 .disabled = false);
873
874 TestSuite(rdm_rma_stx_scalable,
875 .init = rdm_rma_scalable_setup,
876 .fini = rdm_rma_stx_teardown,
877 .disabled = false);
878
879 TestSuite(dgram_rma_1dom_stx_scalable,
880 .init = dgram_scalable_setup_1dom,
881 .fini = rdm_rma_stx_teardown,
882 .disabled = false);
883
do_write(int len)884 static void do_write(int len)
885 {
886 int ret;
887 ssize_t sz;
888 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
889 (void *) -1, UINT_MAX, UINT_MAX };
890 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
891
892 init_data(source, len, 0xab);
893 init_data(target, len, 0);
894
895 sz = fi_write(ep[0], source, len,
896 loc_mr[0], gni_addr[1],
897 _REM_ADDR(fi, target, target), mr_key[1],
898 target);
899 cr_assert_eq(sz, 0);
900
901 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
902 pthread_yield();
903 }
904
905 if (dgm_fail) {
906 cr_assert_eq(ret, -FI_EAVAIL);
907 return;
908 }
909 cr_assert_eq(ret, 1);
910 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
911
912 w[0] = 1;
913 rdm_rma_check_cntrs(w, r, w_e, r_e);
914
915 dbg_printf("got write context event!\n");
916
917 cr_assert(check_data(source, target, len), "Data mismatch");
918 }
919
Test(rdm_rma_stx_basic,write)920 Test(rdm_rma_stx_basic, write)
921 {
922 xfer_for_each_size(do_write, 8, BUF_SZ);
923 }
924
Test(rdm_rma_stx_basic,write_retrans)925 Test(rdm_rma_stx_basic, write_retrans)
926 {
927 err_inject_enable();
928 xfer_for_each_size(do_write, 8, BUF_SZ);
929 }
930
Test(dgram_rma_stx_basic,write)931 Test(dgram_rma_stx_basic, write)
932 {
933 xfer_for_each_size(do_write, 8, BUF_SZ);
934 }
935
Test(dgram_rma_stx_basic,write_retrans)936 Test(dgram_rma_stx_basic, write_retrans)
937 {
938 dgm_fail = 1;
939 err_inject_enable();
940 xfer_for_each_size(do_write, 8, BUF_SZ);
941 }
942
Test(dgram_rma_1dom_stx_basic,write)943 Test(dgram_rma_1dom_stx_basic, write)
944 {
945 xfer_for_each_size(do_write, 8, BUF_SZ);
946 }
947
Test(dgram_rma_1dom_stx_basic,write_retrans)948 Test(dgram_rma_1dom_stx_basic, write_retrans)
949 {
950 dgm_fail = 1;
951 err_inject_enable();
952 xfer_for_each_size(do_write, 8, BUF_SZ);
953 }
954
955 /* scalable */
956
Test(rdm_rma_stx_scalable,write)957 Test(rdm_rma_stx_scalable, write)
958 {
959 xfer_for_each_size(do_write, 8, BUF_SZ);
960 }
961
Test(rdm_rma_stx_scalable,write_retrans)962 Test(rdm_rma_stx_scalable, write_retrans)
963 {
964 err_inject_enable();
965 xfer_for_each_size(do_write, 8, BUF_SZ);
966 }
967
Test(dgram_rma_stx_scalable,write)968 Test(dgram_rma_stx_scalable, write)
969 {
970 xfer_for_each_size(do_write, 8, BUF_SZ);
971 }
972
Test(dgram_rma_stx_scalable,write_retrans)973 Test(dgram_rma_stx_scalable, write_retrans)
974 {
975 dgm_fail = 1;
976 err_inject_enable();
977 xfer_for_each_size(do_write, 8, BUF_SZ);
978 }
979
Test(dgram_rma_1dom_stx_scalable,write)980 Test(dgram_rma_1dom_stx_scalable, write)
981 {
982 xfer_for_each_size(do_write, 8, BUF_SZ);
983 }
984
Test(dgram_rma_1dom_stx_scalable,write_retrans)985 Test(dgram_rma_1dom_stx_scalable, write_retrans)
986 {
987 dgm_fail = 1;
988 err_inject_enable();
989 xfer_for_each_size(do_write, 8, BUF_SZ);
990 }
991
do_writev(int len)992 static void do_writev(int len)
993 {
994 int ret;
995 ssize_t sz;
996 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
997 (void *) -1, UINT_MAX, UINT_MAX };
998 struct iovec iov;
999 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1000
1001 iov.iov_base = source;
1002 iov.iov_len = len;
1003
1004 init_data(source, len, 0x25);
1005 init_data(target, len, 0);
1006
1007 sz = fi_writev(ep[0], &iov, (void **)loc_mr, 1,
1008 gni_addr[1],
1009 _REM_ADDR(fi, target, target), mr_key[1],
1010 target);
1011 cr_assert_eq(sz, 0);
1012
1013 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1014 pthread_yield();
1015 }
1016
1017 if (dgm_fail) {
1018 cr_assert_eq(ret, -FI_EAVAIL);
1019 return;
1020 }
1021
1022 cr_assert_eq(ret, 1);
1023 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1024
1025 w[0] = 1;
1026 rdm_rma_check_cntrs(w, r, w_e, r_e);
1027
1028 dbg_printf("got write context event!\n");
1029
1030 cr_assert(check_data(source, target, len), "Data mismatch");
1031 }
1032
Test(rdm_rma_stx_basic,writev)1033 Test(rdm_rma_stx_basic, writev)
1034 {
1035 xfer_for_each_size(do_writev, 8, BUF_SZ);
1036 }
1037
Test(rdm_rma_stx_basic,writev_retrans)1038 Test(rdm_rma_stx_basic, writev_retrans)
1039 {
1040 err_inject_enable();
1041 xfer_for_each_size(do_writev, 8, BUF_SZ);
1042 }
1043
Test(dgram_rma_stx_basic,writev)1044 Test(dgram_rma_stx_basic, writev)
1045 {
1046 xfer_for_each_size(do_writev, 8, BUF_SZ);
1047 }
1048
Test(dgram_rma_stx_basic,writev_retrans)1049 Test(dgram_rma_stx_basic, writev_retrans)
1050 {
1051 dgm_fail = 1;
1052 err_inject_enable();
1053 xfer_for_each_size(do_writev, 8, BUF_SZ);
1054 }
1055
Test(dgram_rma_1dom_stx_basic,writev)1056 Test(dgram_rma_1dom_stx_basic, writev)
1057 {
1058 xfer_for_each_size(do_writev, 8, BUF_SZ);
1059 }
1060
Test(dgram_rma_1dom_stx_basic,writev_retrans)1061 Test(dgram_rma_1dom_stx_basic, writev_retrans)
1062 {
1063 dgm_fail = 1;
1064 err_inject_enable();
1065 xfer_for_each_size(do_writev, 8, BUF_SZ);
1066 }
1067
1068 /* scalable */
1069
Test(rdm_rma_stx_scalable,writev)1070 Test(rdm_rma_stx_scalable, writev)
1071 {
1072 xfer_for_each_size(do_writev, 8, BUF_SZ);
1073 }
1074
Test(rdm_rma_stx_scalable,writev_retrans)1075 Test(rdm_rma_stx_scalable, writev_retrans)
1076 {
1077 err_inject_enable();
1078 xfer_for_each_size(do_writev, 8, BUF_SZ);
1079 }
1080
Test(dgram_rma_stx_scalable,writev)1081 Test(dgram_rma_stx_scalable, writev)
1082 {
1083 xfer_for_each_size(do_writev, 8, BUF_SZ);
1084 }
1085
Test(dgram_rma_stx_scalable,writev_retrans)1086 Test(dgram_rma_stx_scalable, writev_retrans)
1087 {
1088 dgm_fail = 1;
1089 err_inject_enable();
1090 xfer_for_each_size(do_writev, 8, BUF_SZ);
1091 }
1092
Test(dgram_rma_1dom_stx_scalable,writev)1093 Test(dgram_rma_1dom_stx_scalable, writev)
1094 {
1095 xfer_for_each_size(do_writev, 8, BUF_SZ);
1096 }
1097
Test(dgram_rma_1dom_stx_scalable,writev_retrans)1098 Test(dgram_rma_1dom_stx_scalable, writev_retrans)
1099 {
1100 dgm_fail = 1;
1101 err_inject_enable();
1102 xfer_for_each_size(do_writev, 8, BUF_SZ);
1103 }
1104
do_writemsg(int len)1105 static void do_writemsg(int len)
1106 {
1107 int ret;
1108 ssize_t sz;
1109 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1110 (void *) -1, UINT_MAX, UINT_MAX };
1111 struct iovec iov;
1112 struct fi_msg_rma msg;
1113 struct fi_rma_iov rma_iov;
1114 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1115
1116 iov.iov_base = source;
1117 iov.iov_len = len;
1118
1119 rma_iov.addr = _REM_ADDR(fi, target, target);
1120 rma_iov.len = len;
1121 rma_iov.key = mr_key[1];
1122
1123 msg.msg_iov = &iov;
1124 msg.desc = (void **)loc_mr;
1125 msg.iov_count = 1;
1126 msg.addr = gni_addr[1];
1127 msg.rma_iov = &rma_iov;
1128 msg.rma_iov_count = 1;
1129 msg.context = target;
1130 msg.data = (uint64_t)target;
1131
1132 init_data(source, len, 0xef);
1133 init_data(target, len, 0);
1134 sz = fi_writemsg(ep[0], &msg, 0);
1135 cr_assert_eq(sz, 0);
1136
1137 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1138 pthread_yield();
1139 }
1140
1141 if (dgm_fail) {
1142 cr_assert_eq(ret, -FI_EAVAIL);
1143 return;
1144 }
1145 cr_assert_eq(ret, 1);
1146 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1147
1148 w[0] = 1;
1149 rdm_rma_check_cntrs(w, r, w_e, r_e);
1150
1151 dbg_printf("got write context event!\n");
1152
1153 cr_assert(check_data(source, target, len), "Data mismatch");
1154 }
1155
Test(rdm_rma_stx_basic,writemsg)1156 Test(rdm_rma_stx_basic, writemsg)
1157 {
1158 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1159 }
1160
Test(rdm_rma_stx_basic,writemsg_retrans)1161 Test(rdm_rma_stx_basic, writemsg_retrans)
1162 {
1163 err_inject_enable();
1164 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1165 }
1166
Test(dgram_rma_stx_basic,writemsg)1167 Test(dgram_rma_stx_basic, writemsg)
1168 {
1169 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1170 }
1171
Test(dgram_rma_stx_basic,writemsg_retrans)1172 Test(dgram_rma_stx_basic, writemsg_retrans)
1173 {
1174 dgm_fail = 1;
1175 err_inject_enable();
1176 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1177 }
1178
Test(dgram_rma_1dom_stx_basic,writemsg)1179 Test(dgram_rma_1dom_stx_basic, writemsg)
1180 {
1181 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1182 }
1183
Test(dgram_rma_1dom_stx_basic,writemsg_retrans)1184 Test(dgram_rma_1dom_stx_basic, writemsg_retrans)
1185 {
1186 dgm_fail = 1;
1187 err_inject_enable();
1188 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1189 }
1190
1191 /* scalable */
1192
Test(rdm_rma_stx_scalable,writemsg)1193 Test(rdm_rma_stx_scalable, writemsg)
1194 {
1195 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1196 }
1197
Test(rdm_rma_stx_scalable,writemsg_retrans)1198 Test(rdm_rma_stx_scalable, writemsg_retrans)
1199 {
1200 err_inject_enable();
1201 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1202 }
1203
Test(dgram_rma_stx_scalable,writemsg)1204 Test(dgram_rma_stx_scalable, writemsg)
1205 {
1206 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1207 }
1208
Test(dgram_rma_stx_scalable,writemsg_retrans)1209 Test(dgram_rma_stx_scalable, writemsg_retrans)
1210 {
1211 dgm_fail = 1;
1212 err_inject_enable();
1213 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1214 }
1215
Test(dgram_rma_1dom_stx_scalable,writemsg)1216 Test(dgram_rma_1dom_stx_scalable, writemsg)
1217 {
1218 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1219 }
1220
Test(dgram_rma_1dom_stx_scalable,writemsg_retrans)1221 Test(dgram_rma_1dom_stx_scalable, writemsg_retrans)
1222 {
1223 dgm_fail = 1;
1224 err_inject_enable();
1225 xfer_for_each_size(do_writemsg, 8, BUF_SZ);
1226 }
1227
1228 /*
1229 * write_fence should be validated by inspecting debug.
1230 *
1231 * The following sequence of events should be seen:
1232 *
1233 * TX request processed: A
1234 * TX request queue stalled on FI_FENCE request: B
1235 * Added event: A
1236 * TX request processed: B
1237 *
1238 */
1239
do_write_fence(int len)1240 static void do_write_fence(int len)
1241 {
1242 int ret;
1243 ssize_t sz;
1244 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1245 (void *) -1, UINT_MAX, UINT_MAX };
1246 struct iovec iov;
1247 struct fi_msg_rma msg;
1248 struct fi_rma_iov rma_iov;
1249 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1250
1251 iov.iov_base = source;
1252 iov.iov_len = len;
1253
1254 rma_iov.addr = _REM_ADDR(fi, target, target);
1255 rma_iov.len = sizeof(target);
1256 rma_iov.key = mr_key[1];
1257
1258 msg.msg_iov = &iov;
1259 msg.desc = (void **)loc_mr;
1260 msg.iov_count = 1;
1261 msg.addr = gni_addr[1];
1262 msg.rma_iov = &rma_iov;
1263 msg.rma_iov_count = 1;
1264 msg.context = target;
1265 msg.data = (uint64_t)target;
1266
1267 init_data(source, len, 0xef);
1268 init_data(target, len, 0);
1269
1270 /* write A */
1271 sz = fi_writemsg(ep[0], &msg, 0);
1272 cr_assert_eq(sz, 0);
1273
1274 /* write B */
1275 sz = fi_writemsg(ep[0], &msg, FI_FENCE);
1276 cr_assert_eq(sz, 0);
1277
1278 /* event A */
1279 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1280 pthread_yield();
1281 }
1282
1283 if (dgm_fail) {
1284 cr_assert_eq(ret, -FI_EAVAIL);
1285 return;
1286 }
1287
1288 cr_assert_eq(ret, 1);
1289 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1290
1291 /* reset cqe */
1292 cqe.op_context = cqe.buf = (void *) -1;
1293 cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX;
1294
1295 /* event B */
1296 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1297 pthread_yield();
1298 }
1299
1300 cr_assert_eq(ret, 1);
1301 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1302
1303 w[0] = 2;
1304 rdm_rma_check_cntrs(w, r, w_e, r_e);
1305
1306 dbg_printf("got write context event!\n");
1307
1308 cr_assert(check_data(source, target, len), "Data mismatch");
1309 }
1310
Test(rdm_rma_stx_basic,write_fence)1311 Test(rdm_rma_stx_basic, write_fence)
1312 {
1313 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1314 }
1315
Test(rdm_rma_stx_basic,write_fence_retrans)1316 Test(rdm_rma_stx_basic, write_fence_retrans)
1317 {
1318 err_inject_enable();
1319 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1320 }
1321
Test(dgram_rma_stx_basic,write_fence)1322 Test(dgram_rma_stx_basic, write_fence)
1323 {
1324 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1325 }
1326
Test(dgram_rma_stx_basic,write_fence_retrans)1327 Test(dgram_rma_stx_basic, write_fence_retrans)
1328 {
1329 dgm_fail = 1;
1330 err_inject_enable();
1331 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1332 }
1333
Test(dgram_rma_1dom_stx_basic,write_fence)1334 Test(dgram_rma_1dom_stx_basic, write_fence)
1335 {
1336 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1337 }
1338
Test(dgram_rma_1dom_stx_basic,write_fence_retrans)1339 Test(dgram_rma_1dom_stx_basic, write_fence_retrans)
1340 {
1341 dgm_fail = 1;
1342 err_inject_enable();
1343 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1344 }
1345
1346 /* scalable */
1347
Test(rdm_rma_stx_scalable,write_fence)1348 Test(rdm_rma_stx_scalable, write_fence)
1349 {
1350 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1351 }
1352
Test(rdm_rma_stx_scalable,write_fence_retrans)1353 Test(rdm_rma_stx_scalable, write_fence_retrans)
1354 {
1355 err_inject_enable();
1356 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1357 }
1358
Test(dgram_rma_stx_scalable,write_fence)1359 Test(dgram_rma_stx_scalable, write_fence)
1360 {
1361 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1362 }
1363
Test(dgram_rma_stx_scalable,write_fence_retrans)1364 Test(dgram_rma_stx_scalable, write_fence_retrans)
1365 {
1366 dgm_fail = 1;
1367 err_inject_enable();
1368 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1369 }
1370
Test(dgram_rma_1dom_stx_scalable,write_fence)1371 Test(dgram_rma_1dom_stx_scalable, write_fence)
1372 {
1373 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1374 }
1375
Test(dgram_rma_1dom_stx_scalable,write_fence_retrans)1376 Test(dgram_rma_1dom_stx_scalable, write_fence_retrans)
1377 {
1378 dgm_fail = 1;
1379 err_inject_enable();
1380 xfer_for_each_size(do_write_fence, 8, BUF_SZ);
1381 }
1382
1383 #define INJECT_SIZE 64
do_inject_write(int len)1384 static void do_inject_write(int len)
1385 {
1386 ssize_t sz;
1387 int ret, i, loops = 0;
1388 struct fi_cq_tagged_entry cqe;
1389
1390 init_data(source, len, 0x23);
1391 init_data(target, len, 0);
1392 sz = fi_inject_write(ep[0], source, len,
1393 gni_addr[1],
1394 _REM_ADDR(fi, target, target), mr_key[1]);
1395 cr_assert_eq(sz, 0);
1396
1397 for (i = 0; i < len; i++) {
1398 loops = 0;
1399 while (source[i] != target[i]) {
1400 /* for progress */
1401 ret = fi_cq_read(send_cq[0], &cqe, 1);
1402 cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL,
1403 "Received unexpected event\n");
1404
1405 pthread_yield();
1406 cr_assert(++loops < MLOOPS || dgm_fail,
1407 "Data mismatch");
1408 if (dgm_fail && loops > MLOOPS)
1409 break;
1410 }
1411 }
1412 cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail");
1413 }
1414
Test(rdm_rma_stx_basic,inject_write)1415 Test(rdm_rma_stx_basic, inject_write)
1416 {
1417 /* FIXME intermittent test failures */
1418 cr_skip_test("intermittent test failures");
1419 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1420 }
1421
Test(rdm_rma_stx_basic,inject_write_retrans)1422 Test(rdm_rma_stx_basic, inject_write_retrans)
1423 {
1424 /* FIXME intermittent test failures */
1425 cr_skip_test("intermittent test failures");
1426 err_inject_enable();
1427 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1428 }
1429
Test(dgram_rma_stx_basic,inject_write)1430 Test(dgram_rma_stx_basic, inject_write)
1431 {
1432 /* FIXME intermittent test failures */
1433 cr_skip_test("intermittent test failures");
1434 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1435 }
1436
Test(dgram_rma_stx_basic,inject_write_retrans)1437 Test(dgram_rma_stx_basic, inject_write_retrans)
1438 {
1439 dgm_fail = 1;
1440 err_inject_enable();
1441 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1442 }
1443
Test(dgram_rma_1dom_stx_basic,inject_write)1444 Test(dgram_rma_1dom_stx_basic, inject_write)
1445 {
1446 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1447 }
1448
Test(dgram_rma_1dom_stx_basic,inject_write_retrans)1449 Test(dgram_rma_1dom_stx_basic, inject_write_retrans)
1450 {
1451 dgm_fail = 1;
1452 err_inject_enable();
1453 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1454 }
1455
1456 /* scalable */
1457
Test(rdm_rma_stx_scalable,inject_write)1458 Test(rdm_rma_stx_scalable, inject_write)
1459 {
1460 /* FIXME intermittent test failures */
1461 cr_skip_test("intermittent test failures");
1462 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1463 }
1464
Test(rdm_rma_stx_scalable,inject_write_retrans)1465 Test(rdm_rma_stx_scalable, inject_write_retrans)
1466 {
1467 /* FIXME intermittent test failures */
1468 cr_skip_test("intermittent test failures");
1469 err_inject_enable();
1470 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1471 }
1472
Test(dgram_rma_stx_scalable,inject_write)1473 Test(dgram_rma_stx_scalable, inject_write)
1474 {
1475 /* FIXME intermittent test failures */
1476 cr_skip_test("intermittent test failures");
1477 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1478 }
1479
Test(dgram_rma_stx_scalable,inject_write_retrans)1480 Test(dgram_rma_stx_scalable, inject_write_retrans)
1481 {
1482 dgm_fail = 1;
1483 err_inject_enable();
1484 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1485 }
1486
Test(dgram_rma_1dom_stx_scalable,inject_write)1487 Test(dgram_rma_1dom_stx_scalable, inject_write)
1488 {
1489 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1490 }
1491
Test(dgram_rma_1dom_stx_scalable,inject_write_retrans)1492 Test(dgram_rma_1dom_stx_scalable, inject_write_retrans)
1493 {
1494 dgm_fail = 1;
1495 err_inject_enable();
1496 xfer_for_each_size(do_inject_write, 8, INJECT_SIZE);
1497 }
1498
do_writedata(int len)1499 static void do_writedata(int len)
1500 {
1501 int ret;
1502 ssize_t sz;
1503 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1504 (void *) -1, UINT_MAX, UINT_MAX };
1505 struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX,
1506 (void *) -1, UINT_MAX, UINT_MAX };
1507 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1508
1509
1510 #define WRITE_DATA 0x5123da1a145
1511 init_data(source, len, 0x23);
1512 init_data(target, len, 0);
1513 sz = fi_writedata(ep[0], source, len, loc_mr[0], WRITE_DATA,
1514 gni_addr[1],
1515 _REM_ADDR(fi, target, target), mr_key[1],
1516 target);
1517 cr_assert_eq(sz, 0);
1518
1519 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1520 pthread_yield();
1521 }
1522
1523 if (dgm_fail) {
1524 cr_assert_eq(ret, -FI_EAVAIL);
1525 return;
1526 }
1527
1528 cr_assert_eq(ret, 1);
1529 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
1530
1531 w[0] = 1;
1532 rdm_rma_check_cntrs(w, r, w_e, r_e);
1533
1534 dbg_printf("got write context event!\n");
1535
1536 cr_assert(check_data(source, target, len), "Data mismatch");
1537
1538 while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) {
1539 pthread_yield();
1540 }
1541 cr_assert(ret != FI_SUCCESS, "Missing remote data");
1542
1543 rdm_rma_check_tcqe(&dcqe, NULL,
1544 (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA),
1545 WRITE_DATA, ep[1]);
1546 }
1547
Test(rdm_rma_stx_basic,writedata)1548 Test(rdm_rma_stx_basic, writedata)
1549 {
1550 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1551 }
1552
Test(rdm_rma_stx_basic,writedata_retrans)1553 Test(rdm_rma_stx_basic, writedata_retrans)
1554 {
1555 err_inject_enable();
1556 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1557 }
1558
Test(dgram_rma_stx_basic,writedata)1559 Test(dgram_rma_stx_basic, writedata)
1560 {
1561 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1562 }
1563
Test(dgram_rma_stx_basic,writedata_retrans)1564 Test(dgram_rma_stx_basic, writedata_retrans)
1565 {
1566 dgm_fail = 1;
1567 err_inject_enable();
1568 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1569 }
1570
Test(dgram_rma_1dom_stx_basic,writedata)1571 Test(dgram_rma_1dom_stx_basic, writedata)
1572 {
1573 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1574 }
1575
Test(dgram_rma_1dom_stx_basic,writedata_retrans)1576 Test(dgram_rma_1dom_stx_basic, writedata_retrans)
1577 {
1578 dgm_fail = 1;
1579 err_inject_enable();
1580 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1581 }
1582
1583 /* scalable */
1584
Test(rdm_rma_stx_scalable,writedata)1585 Test(rdm_rma_stx_scalable, writedata)
1586 {
1587 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1588 }
1589
Test(rdm_rma_stx_scalable,writedata_retrans)1590 Test(rdm_rma_stx_scalable, writedata_retrans)
1591 {
1592 err_inject_enable();
1593 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1594 }
1595
Test(dgram_rma_stx_scalable,writedata)1596 Test(dgram_rma_stx_scalable, writedata)
1597 {
1598 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1599 }
1600
Test(dgram_rma_stx_scalable,writedata_retrans)1601 Test(dgram_rma_stx_scalable, writedata_retrans)
1602 {
1603 dgm_fail = 1;
1604 err_inject_enable();
1605 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1606 }
1607
Test(dgram_rma_1dom_stx_scalable,writedata)1608 Test(dgram_rma_1dom_stx_scalable, writedata)
1609 {
1610 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1611 }
1612
Test(dgram_rma_1dom_stx_scalable,writedata_retrans)1613 Test(dgram_rma_1dom_stx_scalable, writedata_retrans)
1614 {
1615 dgm_fail = 1;
1616 err_inject_enable();
1617 xfer_for_each_size(do_writedata, 8, BUF_SZ);
1618 }
1619
1620 #define INJECTWRITE_DATA 0xdededadadeadbeaf
do_inject_writedata(int len)1621 static void do_inject_writedata(int len)
1622 {
1623 ssize_t sz;
1624 int ret, i, loops = 0;
1625 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1626 (void *) -1, UINT_MAX, UINT_MAX };
1627 struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX,
1628 (void *) -1, UINT_MAX, UINT_MAX };
1629
1630 init_data(source, len, 0x23);
1631 init_data(target, len, 0);
1632 sz = fi_inject_writedata(ep[0], source, len, INJECTWRITE_DATA,
1633 gni_addr[1],
1634 _REM_ADDR(fi, target, target), mr_key[1]);
1635 cr_assert_eq(sz, 0);
1636
1637 for (i = 0; i < len; i++) {
1638 loops = 0;
1639 while (source[i] != target[i]) {
1640 /* for progress */
1641 ret = fi_cq_read(send_cq[0], &cqe, 1);
1642 cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL,
1643 "Received unexpected event\n");
1644
1645 pthread_yield();
1646 cr_assert(++loops < MLOOPS || dgm_fail,
1647 "Data mismatch");
1648 if (dgm_fail && loops > MLOOPS)
1649 break;
1650 }
1651 }
1652 cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail");
1653 if (dgm_fail && loops >= MLOOPS)
1654 return;
1655
1656 while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) {
1657 ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */
1658 pthread_yield();
1659 }
1660 cr_assert(ret != FI_SUCCESS, "Missing remote data");
1661
1662 rdm_rma_check_tcqe(&dcqe, NULL,
1663 (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA),
1664 INJECTWRITE_DATA, ep[1]);
1665 }
1666
Test(rdm_rma_stx_basic,inject_writedata)1667 Test(rdm_rma_stx_basic, inject_writedata)
1668 {
1669 /* FIXME intermittent test failures */
1670 cr_skip_test("intermittent test failures");
1671 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1672 }
1673
Test(rdm_rma_stx_basic,inject_writedata_retrans)1674 Test(rdm_rma_stx_basic, inject_writedata_retrans)
1675 {
1676 /* FIXME intermittent test failures */
1677 cr_skip_test("intermittent test failures");
1678 err_inject_enable();
1679 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1680 }
1681
Test(dgram_rma_stx_basic,inject_writedata)1682 Test(dgram_rma_stx_basic, inject_writedata)
1683 {
1684 /* FIXME intermittent test failures */
1685 cr_skip_test("intermittent test failures");
1686 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1687 }
1688
Test(dgram_rma_stx_basic,inject_writedata_retrans)1689 Test(dgram_rma_stx_basic, inject_writedata_retrans)
1690 {
1691 dgm_fail = 1;
1692 err_inject_enable();
1693 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1694 }
1695
Test(dgram_rma_1dom_stx_basic,inject_writedata)1696 Test(dgram_rma_1dom_stx_basic, inject_writedata)
1697 {
1698 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1699 }
1700
Test(dgram_rma_1dom_stx_basic,inject_writedata_retrans)1701 Test(dgram_rma_1dom_stx_basic, inject_writedata_retrans)
1702 {
1703 dgm_fail = 1;
1704 err_inject_enable();
1705 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1706 }
1707
1708 /* scalable */
1709
Test(rdm_rma_stx_scalable,inject_writedata)1710 Test(rdm_rma_stx_scalable, inject_writedata)
1711 {
1712 /* FIXME intermittent test failures */
1713 cr_skip_test("intermittent test failures");
1714 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1715 }
1716
Test(rdm_rma_stx_scalable,inject_writedata_retrans)1717 Test(rdm_rma_stx_scalable, inject_writedata_retrans)
1718 {
1719 /* FIXME intermittent test failures */
1720 cr_skip_test("intermittent test failures");
1721 err_inject_enable();
1722 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1723 }
1724
Test(dgram_rma_stx_scalable,inject_writedata)1725 Test(dgram_rma_stx_scalable, inject_writedata)
1726 {
1727 /* FIXME intermittent test failures */
1728 cr_skip_test("intermittent test failures");
1729 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1730 }
1731
Test(dgram_rma_stx_scalable,inject_writedata_retrans)1732 Test(dgram_rma_stx_scalable, inject_writedata_retrans)
1733 {
1734 dgm_fail = 1;
1735 err_inject_enable();
1736 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1737 }
1738
Test(dgram_rma_1dom_stx_scalable,inject_writedata)1739 Test(dgram_rma_1dom_stx_scalable, inject_writedata)
1740 {
1741 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1742 }
1743
Test(dgram_rma_1dom_stx_scalable,inject_writedata_retrans)1744 Test(dgram_rma_1dom_stx_scalable, inject_writedata_retrans)
1745 {
1746 dgm_fail = 1;
1747 err_inject_enable();
1748 xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE);
1749 }
1750
do_read(int len)1751 static void do_read(int len)
1752 {
1753 int ret;
1754 ssize_t sz;
1755 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1756 (void *) -1, UINT_MAX, UINT_MAX };
1757 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1758
1759 #define READ_CTX 0x4e3dda1aULL
1760 init_data(source, len, 0);
1761 init_data(target, len, 0xad);
1762
1763 /* domain 0 from domain 1 */
1764 sz = fi_read(ep[0], source, len,
1765 loc_mr[0], gni_addr[1],
1766 _REM_ADDR(fi, target, target), mr_key[1],
1767 (void *)READ_CTX);
1768 cr_assert_eq(sz, 0);
1769
1770 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1771 pthread_yield();
1772 }
1773
1774 cr_assert_eq(ret, 1);
1775 rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]);
1776
1777 r[0] = 1;
1778 rdm_rma_check_cntrs(w, r, w_e, r_e);
1779
1780 dbg_printf("got read context event!\n");
1781
1782 cr_assert(check_data(source, target, len), "Data mismatch");
1783 }
1784
Test(rdm_rma_stx_basic,read)1785 Test(rdm_rma_stx_basic, read)
1786 {
1787 xfer_for_each_size(do_read, 8, BUF_SZ);
1788 }
1789
Test(rdm_rma_stx_basic,read_retrans)1790 Test(rdm_rma_stx_basic, read_retrans)
1791 {
1792 err_inject_enable();
1793 xfer_for_each_size(do_read, 8, BUF_SZ);
1794 }
1795
Test(dgram_rma_stx_basic,read)1796 Test(dgram_rma_stx_basic, read)
1797 {
1798 xfer_for_each_size(do_read, 8, BUF_SZ);
1799 }
1800
Test(dgram_rma_1dom_stx_basic,read)1801 Test(dgram_rma_1dom_stx_basic, read)
1802 {
1803 xfer_for_each_size(do_read, 8, BUF_SZ);
1804 }
1805
1806 /* scalable */
1807
Test(rdm_rma_stx_scalable,read)1808 Test(rdm_rma_stx_scalable, read)
1809 {
1810 xfer_for_each_size(do_read, 8, BUF_SZ);
1811 }
1812
Test(rdm_rma_stx_scalable,read_retrans)1813 Test(rdm_rma_stx_scalable, read_retrans)
1814 {
1815 err_inject_enable();
1816 xfer_for_each_size(do_read, 8, BUF_SZ);
1817 }
1818
Test(dgram_rma_stx_scalable,read)1819 Test(dgram_rma_stx_scalable, read)
1820 {
1821 xfer_for_each_size(do_read, 8, BUF_SZ);
1822 }
1823
Test(dgram_rma_1dom_stx_scalable,read)1824 Test(dgram_rma_1dom_stx_scalable, read)
1825 {
1826 xfer_for_each_size(do_read, 8, BUF_SZ);
1827 }
1828
do_readv(int len)1829 static void do_readv(int len)
1830 {
1831 int ret;
1832 ssize_t sz;
1833 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1834 (void *) -1, UINT_MAX, UINT_MAX };
1835 struct iovec iov;
1836 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1837
1838 iov.iov_base = source;
1839 iov.iov_len = len;
1840
1841 init_data(target, len, 0x25);
1842 init_data(source, len, 0);
1843 sz = fi_readv(ep[0], &iov, (void **)loc_mr, 1,
1844 gni_addr[1],
1845 _REM_ADDR(fi, target, target), mr_key[1],
1846 target);
1847 cr_assert_eq(sz, 0);
1848
1849 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1850 pthread_yield();
1851 }
1852
1853 cr_assert_eq(ret, 1);
1854 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]);
1855
1856 r[0] = 1;
1857 rdm_rma_check_cntrs(w, r, w_e, r_e);
1858
1859 dbg_printf("got write context event!\n");
1860
1861 cr_assert(check_data(source, target, len), "Data mismatch");
1862 }
1863
Test(rdm_rma_stx_basic,readv)1864 Test(rdm_rma_stx_basic, readv)
1865 {
1866 xfer_for_each_size(do_readv, 8, BUF_SZ);
1867 }
1868
Test(rdm_rma_stx_basic,readv_retrans)1869 Test(rdm_rma_stx_basic, readv_retrans)
1870 {
1871 err_inject_enable();
1872 xfer_for_each_size(do_readv, 8, BUF_SZ);
1873 }
1874
Test(dgram_rma_stx_basic,readv)1875 Test(dgram_rma_stx_basic, readv)
1876 {
1877 xfer_for_each_size(do_readv, 8, BUF_SZ);
1878 }
1879
Test(dgram_rma_1dom_stx_basic,readv)1880 Test(dgram_rma_1dom_stx_basic, readv)
1881 {
1882 xfer_for_each_size(do_readv, 8, BUF_SZ);
1883 }
1884
1885 /* scalable */
1886
Test(rdm_rma_stx_scalable,readv)1887 Test(rdm_rma_stx_scalable, readv)
1888 {
1889 xfer_for_each_size(do_readv, 8, BUF_SZ);
1890 }
1891
Test(rdm_rma_stx_scalable,readv_retrans)1892 Test(rdm_rma_stx_scalable, readv_retrans)
1893 {
1894 err_inject_enable();
1895 xfer_for_each_size(do_readv, 8, BUF_SZ);
1896 }
1897
Test(dgram_rma_stx_scalable,readv)1898 Test(dgram_rma_stx_scalable, readv)
1899 {
1900 xfer_for_each_size(do_readv, 8, BUF_SZ);
1901 }
1902
Test(dgram_rma_1dom_stx_scalable,readv)1903 Test(dgram_rma_1dom_stx_scalable, readv)
1904 {
1905 xfer_for_each_size(do_readv, 8, BUF_SZ);
1906 }
1907
do_readmsg(int len)1908 static void do_readmsg(int len)
1909 {
1910 int ret;
1911 ssize_t sz;
1912 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
1913 (void *) -1, UINT_MAX, UINT_MAX };
1914 struct iovec iov;
1915 struct fi_msg_rma msg;
1916 struct fi_rma_iov rma_iov;
1917 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
1918
1919 iov.iov_base = source;
1920 iov.iov_len = len;
1921
1922 rma_iov.addr = _REM_ADDR(fi, target, target);
1923 rma_iov.len = len;
1924 rma_iov.key = mr_key[1];
1925
1926 msg.msg_iov = &iov;
1927 msg.desc = (void **)loc_mr;
1928 msg.iov_count = 1;
1929 msg.addr = gni_addr[1];
1930 msg.rma_iov = &rma_iov;
1931 msg.rma_iov_count = 1;
1932 msg.context = target;
1933 msg.data = (uint64_t)target;
1934
1935 init_data(target, len, 0xef);
1936 init_data(source, len, 0);
1937 sz = fi_readmsg(ep[0], &msg, 0);
1938 cr_assert_eq(sz, 0);
1939
1940 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
1941 pthread_yield();
1942 }
1943
1944 cr_assert_eq(ret, 1);
1945 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]);
1946
1947 r[0] = 1;
1948 rdm_rma_check_cntrs(w, r, w_e, r_e);
1949
1950 dbg_printf("got write context event!\n");
1951
1952 cr_assert(check_data(source, target, len), "Data mismatch");
1953
1954 iov.iov_base = source;
1955 iov.iov_len = len;
1956
1957 rma_iov.addr = (uint64_t)target;
1958 rma_iov.len = len;
1959 rma_iov.key = mr_key[0];
1960
1961 msg.msg_iov = &iov;
1962 msg.desc = (void **)(loc_mr + 1);
1963 msg.iov_count = 1;
1964 msg.addr = gni_addr[0];
1965 msg.rma_iov = &rma_iov;
1966 msg.rma_iov_count = 1;
1967 msg.context = target;
1968 msg.data = (uint64_t)target;
1969 }
1970
Test(rdm_rma_stx_basic,readmsg)1971 Test(rdm_rma_stx_basic, readmsg)
1972 {
1973 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1974 }
1975
Test(rdm_rma_stx_basic,readmsg_retrans)1976 Test(rdm_rma_stx_basic, readmsg_retrans)
1977 {
1978 err_inject_enable();
1979 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1980 }
1981
Test(dgram_rma_stx_basic,readmsg)1982 Test(dgram_rma_stx_basic, readmsg)
1983 {
1984 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1985 }
1986
Test(dgram_rma_1dom_stx_basic,readmsg)1987 Test(dgram_rma_1dom_stx_basic, readmsg)
1988 {
1989 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1990 }
1991
1992 /* scalable */
1993
Test(rdm_rma_stx_scalable,readmsg)1994 Test(rdm_rma_stx_scalable, readmsg)
1995 {
1996 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
1997 }
1998
Test(rdm_rma_stx_scalable,readmsg_retrans)1999 Test(rdm_rma_stx_scalable, readmsg_retrans)
2000 {
2001 err_inject_enable();
2002 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
2003 }
2004
Test(dgram_rma_stx_scalable,readmsg)2005 Test(dgram_rma_stx_scalable, readmsg)
2006 {
2007 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
2008 }
2009
Test(dgram_rma_1dom_stx_scalable,readmsg)2010 Test(dgram_rma_1dom_stx_scalable, readmsg)
2011 {
2012 xfer_for_each_size(do_readmsg, 8, BUF_SZ);
2013 }
2014
inject_common(void)2015 static void inject_common(void)
2016 {
2017 int ret;
2018 ssize_t sz;
2019 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2020 (void *) -1, UINT_MAX, UINT_MAX };
2021 struct iovec iov;
2022 struct fi_msg_rma msg;
2023 struct fi_rma_iov rma_iov;
2024 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2025
2026 iov.iov_base = source;
2027 iov.iov_len = GNIX_INJECT_SIZE;
2028
2029 rma_iov.addr = _REM_ADDR(fi, target, target);
2030 rma_iov.len = GNIX_INJECT_SIZE;
2031 rma_iov.key = mr_key[1];
2032
2033 msg.msg_iov = &iov;
2034 msg.desc = (void **)loc_mr;
2035 msg.iov_count = 1;
2036 msg.addr = gni_addr[1];
2037 msg.rma_iov = &rma_iov;
2038 msg.rma_iov_count = 1;
2039 msg.context = target;
2040 msg.data = (uint64_t)target;
2041
2042 init_data(source, GNIX_INJECT_SIZE, 0xef);
2043 init_data(target, GNIX_INJECT_SIZE, 0);
2044
2045 sz = fi_writemsg(ep[0], &msg, FI_INJECT);
2046 cr_assert_eq(sz, 0);
2047
2048 iov.iov_len = GNIX_INJECT_SIZE+1;
2049 sz = fi_writemsg(ep[0], &msg, FI_INJECT);
2050 cr_assert_eq(sz, -FI_EINVAL);
2051
2052 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2053 pthread_yield();
2054 }
2055
2056 cr_assert_eq(ret, 1);
2057 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
2058
2059 w[0] = 1;
2060 rdm_rma_check_cntrs(w, r, w_e, r_e);
2061
2062 dbg_printf("got write context event!\n");
2063
2064 cr_assert(check_data(source, target, GNIX_INJECT_SIZE),
2065 "Data mismatch");
2066 }
2067
Test(rdm_rma_stx_basic,inject)2068 Test(rdm_rma_stx_basic, inject)
2069 {
2070 inject_common();
2071 }
2072
Test(dgram_rma_stx_basic,inject)2073 Test(dgram_rma_stx_basic, inject)
2074 {
2075 inject_common();
2076 }
2077
Test(dgram_rma_1dom_stx_basic,inject)2078 Test(dgram_rma_1dom_stx_basic, inject)
2079 {
2080 inject_common();
2081 }
2082
2083 /* scalable */
2084
Test(rdm_rma_stx_scalable,inject)2085 Test(rdm_rma_stx_scalable, inject)
2086 {
2087 inject_common();
2088 }
2089
Test(dgram_rma_stx_scalable,inject)2090 Test(dgram_rma_stx_scalable, inject)
2091 {
2092 inject_common();
2093 }
2094
Test(dgram_rma_1dom_stx_scalable,inject)2095 Test(dgram_rma_1dom_stx_scalable, inject)
2096 {
2097 inject_common();
2098 }
2099
do_write_autoreg(int len)2100 static void do_write_autoreg(int len)
2101 {
2102 int ret;
2103 ssize_t sz;
2104 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2105 (void *) -1, UINT_MAX, UINT_MAX };
2106 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2107
2108 init_data(source, len, 0xab);
2109 init_data(target, len, 0);
2110 sz = fi_write(ep[0], source, len,
2111 NULL, gni_addr[1],
2112 _REM_ADDR(fi, target, target), mr_key[1],
2113 target);
2114 cr_assert_eq(sz, 0);
2115
2116 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2117 pthread_yield();
2118 }
2119
2120 cr_assert_eq(ret, 1);
2121 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
2122
2123 w[0] = 1;
2124 rdm_rma_check_cntrs(w, r, w_e, r_e);
2125
2126 dbg_printf("got write context event!\n");
2127
2128 cr_assert(check_data(source, target, len), "Data mismatch");
2129 }
2130
Test(rdm_rma_stx_basic,write_autoreg)2131 Test(rdm_rma_stx_basic, write_autoreg)
2132 {
2133 xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2134 }
2135
Test(dgram_rma_stx_basic,write_autoreg)2136 Test(dgram_rma_stx_basic, write_autoreg)
2137 {
2138 xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2139 }
2140
Test(dgram_rma_1dom_stx_basic,write_autoreg)2141 Test(dgram_rma_1dom_stx_basic, write_autoreg)
2142 {
2143 xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2144 }
2145
2146 /* scalable */
2147
Test(rdm_rma_stx_scalable,write_autoreg)2148 Test(rdm_rma_stx_scalable, write_autoreg)
2149 {
2150 xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2151 }
2152
Test(dgram_rma_stx_scalable,write_autoreg)2153 Test(dgram_rma_stx_scalable, write_autoreg)
2154 {
2155 xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2156 }
2157
Test(dgram_rma_1dom_stx_scalable,write_autoreg)2158 Test(dgram_rma_1dom_stx_scalable, write_autoreg)
2159 {
2160 xfer_for_each_size(do_write_autoreg, 8, BUF_SZ);
2161 }
2162
do_write_autoreg_uncached(int len)2163 static void do_write_autoreg_uncached(int len)
2164 {
2165 int ret;
2166 ssize_t sz;
2167 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2168 (void *) -1, UINT_MAX, UINT_MAX };
2169 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2170
2171 init_data(uc_source, len, 0xab);
2172 init_data(target, len, 0);
2173 sz = fi_write(ep[0], uc_source, len,
2174 NULL, gni_addr[1],
2175 _REM_ADDR(fi, target, target), mr_key[1],
2176 target);
2177 cr_assert_eq(sz, 0);
2178
2179 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2180 pthread_yield();
2181 }
2182
2183 cr_assert_eq(ret, 1);
2184 rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]);
2185
2186 w[0] = 1;
2187 rdm_rma_check_cntrs(w, r, w_e, r_e);
2188
2189 dbg_printf("got write context event!\n");
2190
2191 cr_assert(check_data(uc_source, target, len), "Data mismatch");
2192 }
2193
Test(rdm_rma_stx_basic,write_autoreg_uncached)2194 Test(rdm_rma_stx_basic, write_autoreg_uncached)
2195 {
2196 xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2197 }
2198
Test(dgram_rma_stx_basic,write_autoreg_uncached)2199 Test(dgram_rma_stx_basic, write_autoreg_uncached)
2200 {
2201 xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2202 }
2203
Test(dgram_rma_1dom_stx_basic,write_autoreg_uncached)2204 Test(dgram_rma_1dom_stx_basic, write_autoreg_uncached)
2205 {
2206 xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2207 }
2208
2209 /* scalable */
2210
Test(rdm_rma_stx_scalable,write_autoreg_uncached)2211 Test(rdm_rma_stx_scalable, write_autoreg_uncached)
2212 {
2213 xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2214 }
2215
Test(dgram_rma_stx_scalable,write_autoreg_uncached)2216 Test(dgram_rma_stx_scalable, write_autoreg_uncached)
2217 {
2218 xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2219 }
2220
Test(dgram_rma_1dom_stx_scalable,write_autoreg_uncached)2221 Test(dgram_rma_1dom_stx_scalable, write_autoreg_uncached)
2222 {
2223 xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ);
2224 }
2225
do_write_error(int len)2226 static void do_write_error(int len)
2227 {
2228 int ret;
2229 ssize_t sz;
2230 struct fi_cq_tagged_entry cqe;
2231 struct fi_cq_err_entry err_cqe = {0};
2232
2233 err_cqe.err_data_size = 0;
2234 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2235
2236 init_data(source, len, 0xab);
2237 init_data(target, len, 0);
2238 sz = fi_write(ep[0], source, len,
2239 loc_mr[0], gni_addr[1],
2240 _REM_ADDR(fi, target, target), mr_key[1],
2241 target);
2242 cr_assert_eq(sz, 0);
2243
2244 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2245 pthread_yield();
2246 }
2247
2248 cr_assert_eq(ret, -FI_EAVAIL);
2249
2250 ret = fi_cq_readerr(send_cq[0], &err_cqe, 0);
2251 cr_assert_eq(ret, 1);
2252
2253 cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target,
2254 "Bad error context");
2255 cr_assert(err_cqe.flags == (FI_RMA | FI_WRITE));
2256 cr_assert(err_cqe.len == 0, "Bad error len");
2257 cr_assert(err_cqe.buf == 0, "Bad error buf");
2258 cr_assert(err_cqe.data == 0, "Bad error data");
2259 cr_assert(err_cqe.tag == 0, "Bad error tag");
2260 cr_assert(err_cqe.olen == 0, "Bad error olen");
2261 cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno");
2262 cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR),
2263 "Bad prov errno");
2264 cr_assert(err_cqe.err_data == NULL, "Bad error provider data");
2265
2266 w_e[0] = 1;
2267 rdm_rma_check_cntrs(w, r, w_e, r_e);
2268 }
2269
__write_error(void)2270 static inline void __write_error(void)
2271 {
2272 int ret, max_retrans_val = 1;
2273
2274 ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS,
2275 &max_retrans_val);
2276 cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2277
2278 ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS,
2279 &max_retrans_val);
2280 cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2281 err_inject_enable();
2282
2283 xfer_for_each_size(do_write_error, 8, BUF_SZ);
2284 }
2285
Test(rdm_rma_stx_basic,write_error)2286 Test(rdm_rma_stx_basic, write_error)
2287 {
2288 __write_error();
2289 }
2290
Test(rdm_rma_stx_scalable,write_error)2291 Test(rdm_rma_stx_scalable, write_error)
2292 {
2293 __write_error();
2294 }
2295
Test(dgram_rma_stx_basic,write_error)2296 Test(dgram_rma_stx_basic, write_error)
2297 {
2298 __write_error();
2299 }
2300
Test(dgram_rma_stx_scalable,write_error)2301 Test(dgram_rma_stx_scalable, write_error)
2302 {
2303 __write_error();
2304 }
2305
do_read_error(int len)2306 static void do_read_error(int len)
2307 {
2308 int ret;
2309 ssize_t sz;
2310 struct fi_cq_tagged_entry cqe;
2311 struct fi_cq_err_entry err_cqe = {0};
2312
2313 err_cqe.err_data_size = 0;
2314 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2315
2316 init_data(source, len, 0);
2317 init_data(target, len, 0xad);
2318 sz = fi_read(ep[0], source, len,
2319 loc_mr[0], gni_addr[1],
2320 _REM_ADDR(fi, target, target), mr_key[1],
2321 (void *)READ_CTX);
2322 cr_assert_eq(sz, 0);
2323
2324 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2325 pthread_yield();
2326 }
2327
2328 cr_assert_eq(ret, -FI_EAVAIL);
2329
2330 ret = fi_cq_readerr(send_cq[0], &err_cqe, 0);
2331 cr_assert_eq(ret, 1);
2332
2333 cr_assert((uint64_t)err_cqe.op_context == (uint64_t)READ_CTX,
2334 "Bad error context");
2335 cr_assert(err_cqe.flags == (FI_RMA | FI_READ));
2336 cr_assert(err_cqe.len == 0, "Bad error len");
2337 cr_assert(err_cqe.buf == 0, "Bad error buf");
2338 cr_assert(err_cqe.data == 0, "Bad error data");
2339 cr_assert(err_cqe.tag == 0, "Bad error tag");
2340 cr_assert(err_cqe.olen == 0, "Bad error olen");
2341 cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno");
2342 cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR),
2343 "Bad prov errno");
2344 cr_assert(err_cqe.err_data == NULL, "Bad error provider data");
2345
2346 r_e[0] = 1;
2347 rdm_rma_check_cntrs(w, r, w_e, r_e);
2348 }
2349
__read_error(void)2350 static inline void __read_error(void)
2351 {
2352 int ret, max_retrans_val = 1;
2353
2354 ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS,
2355 &max_retrans_val);
2356 cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2357
2358 ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS,
2359 &max_retrans_val);
2360 cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)");
2361 err_inject_enable();
2362
2363 xfer_for_each_size(do_read_error, 8, BUF_SZ);
2364 }
2365
Test(rdm_rma_stx_basic,read_error)2366 Test(rdm_rma_stx_basic, read_error)
2367 {
2368 __read_error();
2369 }
2370
Test(rdm_rma_stx_scalable,read_error)2371 Test(rdm_rma_stx_scalable, read_error)
2372 {
2373 __read_error();
2374 }
2375
do_read_buf(void * s,void * t,int len)2376 static void do_read_buf(void *s, void *t, int len)
2377 {
2378 int ret;
2379 ssize_t sz;
2380 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2381 (void *) -1, UINT_MAX, UINT_MAX };
2382 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2383
2384 #define READ_CTX 0x4e3dda1aULL
2385 init_data(s, len, 0);
2386 init_data(t, len, 0xad);
2387 sz = fi_read(ep[0], s, len, NULL, gni_addr[1],
2388 _REM_ADDR(fi, target, t), mr_key[1],
2389 (void *)READ_CTX);
2390 cr_assert_eq(sz, 0);
2391
2392 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2393 pthread_yield();
2394 }
2395
2396 cr_assert_eq(ret, 1);
2397 rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]);
2398
2399 r[0] = 1;
2400 rdm_rma_check_cntrs(w, r, w_e, r_e);
2401
2402 dbg_printf("got read context event!\n");
2403
2404 cr_assert(check_data(s, t, len), "Data mismatch");
2405 }
2406
do_read_alignment(int len)2407 static void do_read_alignment(int len)
2408 {
2409 int s_off, t_off, l_off;
2410
2411 for (s_off = 0; s_off < 7; s_off++) {
2412 for (t_off = 0; t_off < 7; t_off++) {
2413 for (l_off = 0; l_off < 7; l_off++) {
2414 do_read_buf(source + s_off,
2415 target + t_off,
2416 len + l_off);
2417 }
2418 }
2419 }
2420 }
2421
Test(rdm_rma_stx_basic,read_alignment)2422 Test(rdm_rma_stx_basic, read_alignment)
2423 {
2424 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2425 }
2426
Test(rdm_rma_stx_basic,read_alignment_retrans)2427 Test(rdm_rma_stx_basic, read_alignment_retrans)
2428 {
2429 err_inject_enable();
2430 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2431 }
2432
Test(dgram_rma_stx_basic,read_alignment)2433 Test(dgram_rma_stx_basic, read_alignment)
2434 {
2435 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2436 }
2437
Test(dgram_rma_1dom_stx_basic,read_alignment)2438 Test(dgram_rma_1dom_stx_basic, read_alignment)
2439 {
2440 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2441 }
2442
2443 /* scalable */
2444
Test(rdm_rma_stx_scalable,read_alignment)2445 Test(rdm_rma_stx_scalable, read_alignment)
2446 {
2447 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2448 }
2449
Test(rdm_rma_stx_scalable,read_alignment_retrans)2450 Test(rdm_rma_stx_scalable, read_alignment_retrans)
2451 {
2452 err_inject_enable();
2453 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2454 }
2455
Test(dgram_rma_stx_scalable,read_alignment)2456 Test(dgram_rma_stx_scalable, read_alignment)
2457 {
2458 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2459 }
2460
Test(dgram_rma_1dom_stx_scalable,read_alignment)2461 Test(dgram_rma_1dom_stx_scalable, read_alignment)
2462 {
2463 xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1));
2464 }
2465
do_write_buf(void * s,void * t,int len)2466 static void do_write_buf(void *s, void *t, int len)
2467 {
2468 int ret;
2469 ssize_t sz;
2470 struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX,
2471 (void *) -1, UINT_MAX, UINT_MAX };
2472 struct fi_cq_err_entry cq_err;
2473 int errors_to_read = (dgm_fail) ? 1 : 0;
2474 uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0};
2475
2476 init_data(s, len, 0xab);
2477 init_data(t, len, 0);
2478 sz = fi_write(ep[0], s, len, NULL, gni_addr[1],
2479 _REM_ADDR(fi, target, t), mr_key[1], t);
2480 cr_assert_eq(sz, 0);
2481
2482 do {
2483 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2484 pthread_yield();
2485 }
2486
2487 if (dgm_fail) {
2488 cr_assert_eq(ret, -FI_EAVAIL);
2489
2490 ret = fi_cq_readerr(send_cq[0], &cq_err, 0);
2491 cr_assert_eq(ret, 1);
2492
2493 errors_to_read--;
2494 }
2495 } while (errors_to_read > 0);
2496
2497 if (dgm_fail)
2498 return;
2499
2500 cr_assert_eq(ret, 1);
2501 rdm_rma_check_tcqe(&cqe, t, FI_RMA | FI_WRITE, 0, ep[0]);
2502
2503 w[0] = 1;
2504 rdm_rma_check_cntrs(w, r, w_e, r_e);
2505
2506 dbg_printf("got write context event!\n");
2507
2508 cr_assert(check_data(s, t, len), "Data mismatch");
2509 }
2510
do_write_alignment(int len)2511 static void do_write_alignment(int len)
2512 {
2513 int s_off, t_off, l_off;
2514
2515 for (s_off = 0; s_off < 7; s_off++) {
2516 for (t_off = 0; t_off < 7; t_off++) {
2517 for (l_off = 0; l_off < 7; l_off++) {
2518 do_write_buf(source + s_off,
2519 target + t_off,
2520 len + l_off);
2521 }
2522 }
2523 }
2524 }
2525
Test(rdm_rma_stx_basic,write_alignment)2526 Test(rdm_rma_stx_basic, write_alignment)
2527 {
2528 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2529 }
2530
Test(rdm_rma_stx_basic,write_alignment_retrans)2531 Test(rdm_rma_stx_basic, write_alignment_retrans)
2532 {
2533 err_inject_enable();
2534 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2535 }
2536
Test(dgram_rma_stx_basic,write_alignment)2537 Test(dgram_rma_stx_basic, write_alignment)
2538 {
2539 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2540 }
2541
Test(dgram_rma_stx_basic,write_alignment_retrans)2542 Test(dgram_rma_stx_basic, write_alignment_retrans)
2543 {
2544 dgm_fail = 1;
2545 err_inject_enable();
2546 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2547 }
2548
Test(dgram_rma_1dom_stx_basic,write_alignment)2549 Test(dgram_rma_1dom_stx_basic, write_alignment)
2550 {
2551 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2552 }
2553
Test(dgram_rma_1dom_stx_basic,write_alignment_retrans)2554 Test(dgram_rma_1dom_stx_basic, write_alignment_retrans)
2555 {
2556 dgm_fail = 1;
2557 err_inject_enable();
2558 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2559 }
2560
2561 /* scalable */
2562
Test(rdm_rma_stx_scalable,write_alignment)2563 Test(rdm_rma_stx_scalable, write_alignment)
2564 {
2565 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2566 }
2567
Test(rdm_rma_stx_scalable,write_alignment_retrans)2568 Test(rdm_rma_stx_scalable, write_alignment_retrans)
2569 {
2570 err_inject_enable();
2571 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2572 }
2573
Test(dgram_rma_stx_scalable,write_alignment)2574 Test(dgram_rma_stx_scalable, write_alignment)
2575 {
2576 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2577 }
2578
Test(dgram_rma_stx_scalable,write_alignment_retrans)2579 Test(dgram_rma_stx_scalable, write_alignment_retrans)
2580 {
2581 dgm_fail = 1;
2582 err_inject_enable();
2583 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2584 }
2585
Test(dgram_rma_1dom_stx_scalable,write_alignment)2586 Test(dgram_rma_1dom_stx_scalable, write_alignment)
2587 {
2588 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2589 }
2590
Test(dgram_rma_1dom_stx_scalable,write_alignment_retrans)2591 Test(dgram_rma_1dom_stx_scalable, write_alignment_retrans)
2592 {
2593 dgm_fail = 1;
2594 err_inject_enable();
2595 xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1));
2596 }
2597
do_trigger(int len)2598 static void do_trigger(int len)
2599 {
2600 int ret, i;
2601 ssize_t sz;
2602 struct fi_cq_tagged_entry cqe;
2603 struct fi_msg_rma msg[4];
2604 struct iovec iov;
2605 struct fi_rma_iov rma_iov;
2606 struct fi_triggered_context t_ctx[4];
2607 void *ctxs[4];
2608
2609 iov.iov_base = source;
2610 iov.iov_len = len;
2611
2612 rma_iov.addr = _REM_ADDR(fi, target, target);
2613 rma_iov.len = len;
2614 rma_iov.key = mr_key[1];
2615
2616 msg[0].msg_iov = &iov;
2617 msg[0].desc = (void **)loc_mr;
2618 msg[0].iov_count = 1;
2619 msg[0].addr = gni_addr[1];
2620 msg[0].rma_iov = &rma_iov;
2621 msg[0].rma_iov_count = 1;
2622 msg[0].data = (uint64_t)target;
2623 msg[1] = msg[2] = msg[3] = msg[0];
2624
2625 /* XXX: Req 0 is guaranteed to be sent before req 2, but req 2 will
2626 * race req 0 through the network. Fix race if needed. */
2627 t_ctx[0].trigger.threshold.threshold = 1;
2628 t_ctx[1].trigger.threshold.threshold = 2;
2629 t_ctx[2].trigger.threshold.threshold = 1;
2630 t_ctx[3].trigger.threshold.threshold = 0;
2631 ctxs[0] = &t_ctx[3];
2632 ctxs[1] = &t_ctx[0];
2633 ctxs[2] = &t_ctx[2];
2634 ctxs[3] = &t_ctx[1];
2635
2636 for (i = 0; i < 4; i++) {
2637 t_ctx[i].event_type = FI_TRIGGER_THRESHOLD;
2638 t_ctx[i].trigger.threshold.cntr = write_cntr[0];
2639 msg[i].context = &t_ctx[i];
2640
2641 sz = fi_writemsg(ep[0], &msg[i], FI_TRIGGER);
2642 cr_assert_eq(sz, 0);
2643 }
2644
2645 for (i = 0; i < 4; i++) {
2646 /* reset cqe */
2647 cqe.op_context = cqe.buf = (void *) -1;
2648 cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX;
2649 while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) {
2650 pthread_yield();
2651 }
2652
2653 cr_assert_eq(ret, 1);
2654
2655 rdm_rma_check_tcqe(&cqe, ctxs[i], FI_RMA | FI_WRITE, 0, ep[0]);
2656 }
2657
2658 sz = fi_cntr_set(write_cntr[0], 0);
2659 cr_assert_eq(sz, 0);
2660 }
2661
2662 /*
2663 * TODO: fix this test. fails sporadically
2664 */
2665 Test(rdm_rma_stx_basic, trigger, .disabled = true)
2666 {
2667 xfer_for_each_size(do_trigger, 8, BUF_SZ);
2668 }
2669
2670 Test(rdm_rma_stx_scalable, trigger, .disabled = true)
2671 {
2672 /* FIXME intermittent test failures */
2673 cr_skip_test("intermittent test failures");
2674 xfer_for_each_size(do_trigger, 8, BUF_SZ);
2675 }
2676