1 /*
2 * Copyright (C) 2016 by Argonne National Laboratory.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 #ifndef _FI_BGQ_DIRECT_ATOMIC_H_
33 #define _FI_BGQ_DIRECT_ATOMIC_H_
34
35 #define FABRIC_DIRECT_ATOMIC 1
36
37 #include "rdma/bgq/fi_bgq_compiler.h"
38 #include "rdma/bgq/fi_bgq_spi.h"
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43
44 #include <complex.h>
45 #if 0
46
47 #define FI_BGQ_DATATYPES \
48 sizeof(int8_t), \
49 sizeof(uint8_t), \
50 sizeof(int16_t), \
51 sizeof(uint16_t), \
52 sizeof(int32_t), \
53 sizeof(uint32_t), \
54 sizeof(int64_t), \
55 sizeof(uint64_t), \
56 sizeof(float), \
57 sizeof(double), \
58 sizeof(float complex), \
59 sizeof(double complex), \
60 sizeof(long double), \
61 sizeof(long double complex),
62
63 #ifdef __cplusplus
64 struct __fi_bgq_datatype{
65 static const size_t size(int index){
66 static size_t __fi_bgq_datatype_size[] =
67 {
68 FI_BGQ_DATATYPES
69 };
70 return __fi_bgq_datatype_size[index];
71 }
72 };
73 #else
74 static size_t __fi_bgq_datatype_size[] =
75 {
76 FI_BGQ_DATATYPES
77 };
78 #endif
79
80 /*
81 * Warning: bogus datatype will result in out of bounds array access.
82 * Use with caution.
83 */
84 static inline size_t fi_bgq_datatype_size_unsafe(enum fi_datatype dt)
85 {
86 #ifdef __cplusplus
87 return __fi_bgq_datatype::size(dt);
88 #else
89 return __fi_bgq_datatype_size[dt];
90 #endif
91 }
92
93 static inline size_t fi_bgq_datatype_size(enum fi_datatype dt)
94 {
95 return
96 (((int)dt) < 0 || dt >= FI_DATATYPE_LAST)
97 ? 0
98 : fi_bgq_datatype_size_unsafe(dt);
99 }
100 #endif
101
102
103
fi_bgq_check_atomic(struct fi_bgq_ep * bgq_ep,enum fi_av_type av_type,enum fi_datatype dt,enum fi_op op,size_t count)104 static inline int fi_bgq_check_atomic(struct fi_bgq_ep *bgq_ep,
105 enum fi_av_type av_type, enum fi_datatype dt, enum fi_op op,
106 size_t count)
107 {
108 #ifdef DEBUG
109 switch((int)op) {
110 case FI_MIN:
111 case FI_MAX:
112 case FI_SUM:
113 case FI_PROD:
114 case FI_LOR:
115 case FI_LAND:
116 case FI_BOR:
117 case FI_BAND:
118 case FI_LXOR:
119 case FI_ATOMIC_READ:
120 case FI_ATOMIC_WRITE:
121 case FI_CSWAP:
122 case FI_CSWAP_NE:
123 case FI_CSWAP_LE:
124 case FI_CSWAP_LT:
125 case FI_CSWAP_GE:
126 case FI_CSWAP_GT:
127 case FI_MSWAP:
128 break;
129 default:
130 return -FI_EINVAL;
131 }
132 if (((int) dt >= FI_DATATYPE_LAST) || ((int) dt < 0))
133 return -FI_EINVAL;
134
135 if (!bgq_ep)
136 return -FI_EINVAL;
137 if (bgq_ep->state != FI_BGQ_EP_ENABLED)
138 return -FI_EINVAL;
139
140 if (count == 0)
141 return -FI_EINVAL;
142
143 if (av_type == FI_AV_UNSPEC)
144 return -FI_EINVAL;
145 if (av_type == FI_AV_MAP && bgq_ep->av_type != FI_AV_MAP)
146 return -FI_EINVAL;
147 if (av_type == FI_AV_TABLE && bgq_ep->av_type != FI_AV_TABLE)
148 return -FI_EINVAL;
149 #endif
150 return 0;
151 }
152
sizeofdt(const enum fi_datatype datatype)153 static inline size_t sizeofdt(const enum fi_datatype datatype) {
154
155 static const size_t sizeofdt[FI_DATATYPE_LAST] = {
156 sizeof(int8_t), /* FI_INT8 */
157 sizeof(uint8_t), /* FI_UINT8 */
158 sizeof(int16_t), /* FI_INT16 */
159 sizeof(uint16_t), /* FI_UINT16 */
160 sizeof(int32_t), /* FI_INT32 */
161 sizeof(uint32_t), /* FI_UINT32 */
162 sizeof(int64_t), /* FI_INT64 */
163 sizeof(uint64_t), /* FI_UINT64 */
164 sizeof(float), /* FI_FLOAT */
165 sizeof(double), /* FI_DOUBLE */
166 sizeof(complex float), /* FI_FLOAT_COMPLEX */
167 sizeof(complex double), /* FI_DOUBLE_COMPLEX */
168 sizeof(long double), /* FI_LONG_DOUBLE */
169 sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */
170 };
171
172 return sizeofdt[datatype];
173 }
174
maxcount(const enum fi_datatype datatype,const unsigned is_compare,const unsigned is_fetch)175 static inline size_t maxcount (const enum fi_datatype datatype,
176 const unsigned is_compare,
177 const unsigned is_fetch) {
178
179 #define INIT_MAXCOUNT_ARRAY(maxbytes) \
180 maxbytes / sizeof(int8_t), /* FI_INT8 */ \
181 maxbytes / sizeof(uint8_t), /* FI_UINT8 */ \
182 maxbytes / sizeof(int16_t), /* FI_INT16 */ \
183 maxbytes / sizeof(uint16_t), /* FI_UINT16 */ \
184 maxbytes / sizeof(int32_t), /* FI_INT32 */ \
185 maxbytes / sizeof(uint32_t), /* FI_UINT32 */ \
186 maxbytes / sizeof(int64_t), /* FI_INT64 */ \
187 maxbytes / sizeof(uint64_t), /* FI_UINT64 */ \
188 maxbytes / sizeof(float), /* FI_FLOAT */ \
189 maxbytes / sizeof(double), /* FI_DOUBLE */ \
190 maxbytes / sizeof(complex float), /* FI_FLOAT_COMPLEX */ \
191 maxbytes / sizeof(complex double), /* FI_DOUBLE_COMPLEX */ \
192 maxbytes / sizeof(long double), /* FI_LONG_DOUBLE */ \
193 maxbytes / sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */
194
195 static const size_t maxcount[2][2][FI_DATATYPE_LAST] = {
196 {
197 { /* !compare, !fetch */
198 INIT_MAXCOUNT_ARRAY(512)
199 },
200 { /* !compare, fetch */
201 INIT_MAXCOUNT_ARRAY((512-sizeof(struct fi_bgq_mu_fetch_metadata)))
202 }
203 },
204 {
205 { /* compare, !fetch */
206 INIT_MAXCOUNT_ARRAY(256)
207 },
208 { /* compare, fetch */
209 INIT_MAXCOUNT_ARRAY((256-sizeof(struct fi_bgq_mu_fetch_metadata)))
210 }
211 }
212 };
213
214 #undef INIT_MAXCOUNT_ARRAY
215
216 return maxcount[is_compare][is_fetch][datatype];
217 }
218
fi_bgq_atomic_fence(struct fi_bgq_ep * bgq_ep,const uint64_t tx_op_flags,const union fi_bgq_addr * bgq_dst_addr,union fi_bgq_context * bgq_context,const int lock_required)219 static inline void fi_bgq_atomic_fence (struct fi_bgq_ep * bgq_ep,
220 const uint64_t tx_op_flags,
221 const union fi_bgq_addr * bgq_dst_addr,
222 union fi_bgq_context * bgq_context,
223 const int lock_required)
224 {
225 const uint64_t do_cq = ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION);
226
227 struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr;
228 const uint64_t do_cntr = (write_cntr != 0);
229
230 assert(do_cq || do_cntr);
231
232 MUHWI_Descriptor_t * model = &bgq_ep->tx.atomic.emulation.fence.mfifo_model;
233
234 MUHWI_Descriptor_t * desc =
235 fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
236
237 qpx_memcpy64((void*)desc, (const void*)model);
238
239 /* set the destination torus address and fifo map */
240 desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi);
241
242 const uint64_t fifo_map = (uint64_t) fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi);
243 desc->Torus_FIFO_Map = fifo_map;
244
245 desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
246 fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi);
247
248 /* locate the payload lookaside slot */
249 void * payload =
250 fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
251 desc, &desc->Pa_Payload);
252
253 if (do_cntr && !do_cq) { /* likely */
254
255 /* increment the origin fi_cntr value */
256
257 /* copy the 'fi_atomic' counter completion descriptor
258 * model into the payload lookaside slot */
259 model = &bgq_ep->tx.atomic.emulation.fence.cntr_model;
260 MUHWI_Descriptor_t * cntr_desc = (MUHWI_Descriptor_t *) payload;
261 qpx_memcpy64((void*)cntr_desc, (const void*)model);
262
263 cntr_desc->Torus_FIFO_Map = fifo_map;
264
265 MUSPI_SetRecPayloadBaseAddressInfo(cntr_desc, write_cntr->std.batid,
266 MUSPI_GetAtomicAddress(0, MUHWI_ATOMIC_OPCODE_STORE_ADD)); /* TODO - init */
267
268 } else if (do_cq) {
269
270 /* add the cq byte counter decrement direct-put
271 * descriptor to the tail of the rget/mfifo payload */
272
273 /* initialize the completion entry */
274 assert(bgq_context);
275 assert(((uintptr_t)bgq_context & 0x07ull) == 0); /* must be 8 byte aligned */
276 bgq_context->flags = FI_RMA | FI_READ;
277 bgq_context->len = 0;
278 bgq_context->buf = NULL;
279 bgq_context->byte_counter = 1;
280 bgq_context->tag = 0;
281
282 uint64_t byte_counter_paddr = 0;
283 uint32_t cnk_rc __attribute__ ((unused));
284 cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter,
285 sizeof(uint64_t), &byte_counter_paddr);
286 assert(cnk_rc == 0);
287
288 /* copy the 'fi_atomic' cq completion descriptor
289 * model into the payload lookaside slot */
290 model = &bgq_ep->tx.atomic.emulation.fence.cq_model;
291 MUHWI_Descriptor_t * cq_desc = (MUHWI_Descriptor_t *) payload;
292 qpx_memcpy64((void*)cq_desc, (const void*)model);
293
294 cq_desc->Torus_FIFO_Map = fifo_map;
295
296 MUSPI_SetRecPayloadBaseAddressInfo(cq_desc,
297 FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr);
298
299 fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required);
300
301 if (do_cntr) {
302
303 /* increment the origin fi_cntr value */
304
305 /* copy the 'fi_atomic' counter completion descriptor
306 * model into the payload lookaside slot */
307 model = &bgq_ep->tx.atomic.emulation.fence.cntr_model;
308 MUHWI_Descriptor_t * cntr_desc = &(((MUHWI_Descriptor_t *) payload)[1]);
309 qpx_memcpy64((void*)cntr_desc, (const void*)model);
310
311 cntr_desc->Torus_FIFO_Map = fifo_map;
312
313 MUSPI_SetRecPayloadBaseAddressInfo(cntr_desc, write_cntr->std.batid,
314 MUSPI_GetAtomicAddress(0, MUHWI_ATOMIC_OPCODE_STORE_ADD)); /* TODO - init */
315
316 desc->Message_Length += sizeof(MUHWI_Descriptor_t);
317 union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
318 hdr->rma.ndesc += 1;
319 }
320
321 } else { /* !do_cntr && !do_cq */
322
323 assert(0);
324
325 }
326
327 MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
328 }
329
fi_bgq_atomic_internal(struct fi_bgq_ep * bgq_ep,const void * buf,size_t count,union fi_bgq_addr * bgq_dst_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,const unsigned is_fetch,const void * fetch_vaddr,const unsigned is_compare,const void * compare_vaddr,const uint64_t tx_op_flags,const int lock_required,const uint64_t enable_cntr,const uint64_t enable_cq,const unsigned is_inject)330 static inline size_t fi_bgq_atomic_internal(struct fi_bgq_ep *bgq_ep,
331 const void *buf, size_t count, union fi_bgq_addr *bgq_dst_addr,
332 uint64_t addr, uint64_t key, enum fi_datatype datatype,
333 enum fi_op op, void *context,
334 const unsigned is_fetch, const void * fetch_vaddr,
335 const unsigned is_compare, const void * compare_vaddr,
336 const uint64_t tx_op_flags, const int lock_required,
337 const uint64_t enable_cntr, const uint64_t enable_cq,
338 const unsigned is_inject)
339 {
340 assert((is_fetch==0)||(is_fetch==1));
341 assert((is_compare==0)||(is_compare==1));
342
343 const uint64_t do_cq = enable_cq && ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION);
344 struct fi_bgq_cntr * write_cntr = bgq_ep->tx.write_cntr;
345 const uint64_t do_cntr = enable_cntr && (write_cntr != 0);
346
347 MUHWI_Descriptor_t * desc =
348 fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
349
350 qpx_memcpy64((void*)desc, (const void*)&bgq_ep->tx.atomic.emulation.mfifo_model);
351
352 /* set the destination torus address and fifo map */
353 desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi);
354 const uint64_t fifo_map = (uint64_t) fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi);
355 desc->Torus_FIFO_Map = fifo_map;
356
357 desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
358 fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi);
359
360 const size_t max_count = maxcount(datatype, is_compare, is_fetch);
361 const size_t xfer_count = MIN(max_count,count);
362 const uint32_t nbytes = (uint32_t)(sizeofdt(datatype) * xfer_count);
363
364 union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
365 hdr->atomic.dt = datatype;
366 hdr->atomic.op = op;
367 hdr->atomic.do_cntr = do_cntr;
368 hdr->atomic.cntr_bat_id = do_cntr ? write_cntr->std.batid : -1;
369 hdr->atomic.nbytes_minus_1 = nbytes - 1;
370 hdr->atomic.key = (uint16_t)key;
371 hdr->atomic.offset = addr;
372 hdr->atomic.is_local = fi_bgq_addr_is_local(bgq_dst_addr->fi);
373
374 hdr->atomic.is_fetch = is_fetch;
375
376
377 if (is_inject) { /* const expression with cause branch to compile out */
378
379 /* locate the payload lookaside slot */
380 void * payload =
381 fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
382 desc, &desc->Pa_Payload);
383
384 desc->Message_Length = nbytes;
385
386 if (buf) memcpy((void *)payload, (const void *)buf, nbytes);
387
388 } else if (!is_fetch && !is_compare) { /* const expression with cause branch to compile out */
389
390 desc->Message_Length = nbytes;
391 fi_bgq_cnk_vaddr2paddr(buf, nbytes, &desc->Pa_Payload);
392
393 assert(!do_cq);
394
395 } else {
396
397 /* locate the payload lookaside slot */
398 union fi_bgq_mu_packet_payload * payload =
399 (union fi_bgq_mu_packet_payload *)fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
400 desc, &desc->Pa_Payload);
401
402 /* initialize the atomic operation metadata in the packet payload */
403 payload->atomic_fetch.metadata.fifo_map = fifo_map;
404 payload->atomic_fetch.metadata.cq_paddr = 0;
405
406 if (is_fetch) {
407 fi_bgq_cnk_vaddr2paddr(fetch_vaddr, nbytes,
408 &payload->atomic_fetch.metadata.dst_paddr);
409
410 /* copy the origin (source) data into the injection lookaside buffer */
411 if (buf) memcpy((void*)&payload->atomic_fetch.data[0], (const void*) buf, nbytes);
412 desc->Message_Length = sizeof(struct fi_bgq_mu_fetch_metadata) +
413 nbytes + nbytes * is_compare;
414
415 if (is_compare) {
416 /* copy the origin (compare) data into the injection lookaside buffer */
417 memcpy((void*)&payload->atomic_fetch.data[nbytes], compare_vaddr, nbytes);
418 }
419
420 if (do_cq) {
421
422 /* initialize the completion entry */
423 assert(context);
424 assert(((uintptr_t)context & 0x07ull) == 0); /* must be 8 byte aligned */
425 union fi_bgq_context * bgq_context = (union fi_bgq_context *)context;
426 bgq_context->flags = 0; /* TODO */
427 bgq_context->len = nbytes;
428 bgq_context->buf = NULL;
429 bgq_context->byte_counter = nbytes;
430 bgq_context->tag = 0;
431
432 fi_bgq_cnk_vaddr2paddr((const void*)&bgq_context->byte_counter,
433 sizeof(uint64_t), &payload->atomic_fetch.metadata.cq_paddr);
434
435 fi_bgq_cq_enqueue_pending(bgq_ep->tx.send_cq, bgq_context, lock_required);
436 }
437
438 } else {
439 assert(0); /* !fetch, compare */
440 }
441 }
442
443 MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
444
445 return xfer_count;
446 }
447
448
fi_bgq_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,fi_addr_t dst_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,const int lock_required)449 static inline ssize_t fi_bgq_atomic_generic(struct fid_ep *ep,
450 const void *buf, size_t count,
451 fi_addr_t dst_addr, uint64_t addr,
452 uint64_t key, enum fi_datatype datatype,
453 enum fi_op op, void* context,
454 const int lock_required)
455 {
456 int ret;
457 struct fi_bgq_ep *bgq_ep;
458
459 bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
460
461 /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */
462 ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
463 if (ret) return ret;
464
465 size_t xfer __attribute__ ((unused));
466 xfer = fi_bgq_atomic_internal(bgq_ep, buf, count,
467 (union fi_bgq_addr *)&dst_addr, addr, key, datatype, op,
468 context, 0, NULL, 0, NULL,
469 bgq_ep->tx.op_flags, lock_required, 0, 0, 0);
470 assert(xfer == count);
471
472 /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */
473 ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
474 if (ret) return ret;
475
476 return 0;
477 }
478
fi_bgq_atomic_writemsg_generic(struct fid_ep * ep,const struct fi_msg_atomic * msg,const uint64_t flags,const int lock_required)479 static inline ssize_t fi_bgq_atomic_writemsg_generic(struct fid_ep *ep,
480 const struct fi_msg_atomic *msg, const uint64_t flags,
481 const int lock_required)
482 {
483 int ret;
484 struct fi_bgq_ep *bgq_ep;
485
486 bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
487
488 const enum fi_datatype datatype = msg->datatype;
489 const enum fi_op op = msg->op;
490
491 ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1);
492 if (ret) return ret;
493
494 ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
495 if (ret) return ret;
496
497 union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr;
498
499 const size_t dtsize = sizeofdt(datatype);
500
501 size_t rma_iov_index = 0;
502 const size_t rma_iov_count = msg->rma_iov_count;
503 uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
504 uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
505 uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key;
506
507 size_t msg_iov_index = 0;
508 const size_t msg_iov_count = msg->iov_count;
509 uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
510 uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
511
512 while (msg_iov_dtcount != 0 && rma_iov_dtcount != 0) {
513
514 const size_t count_requested = MIN(msg_iov_dtcount,rma_iov_dtcount);
515
516 const size_t count_transfered =
517 fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr,
518 count_requested, bgq_dst_addr, rma_iov_addr,
519 rma_iov_key, datatype, op, NULL,
520 0, NULL, 0, NULL, flags, lock_required, 0, 0, 0);
521
522 const size_t bytes_transfered = dtsize * count_transfered;
523
524 msg_iov_dtcount -= count_transfered;
525 msg_iov_vaddr += bytes_transfered;
526
527 if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) {
528 ++msg_iov_index;
529 msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
530 msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
531 }
532
533 rma_iov_dtcount -= count_transfered;
534 rma_iov_addr += bytes_transfered;
535
536 if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
537 ++rma_iov_index;
538 rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
539 rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
540 rma_iov_key = msg->rma_iov[rma_iov_index].key;
541 }
542 }
543
544 fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr,
545 (union fi_bgq_context *)msg->context,
546 lock_required);
547
548 ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
549 if (ret) return ret;
550
551 return 0;
552 }
553
554
555
fi_bgq_atomic_readwritemsg_generic(struct fid_ep * ep,const struct fi_msg_atomic * msg,struct fi_ioc * resultv,const size_t result_count,const uint64_t flags,const int lock_required)556 static inline ssize_t fi_bgq_atomic_readwritemsg_generic (struct fid_ep *ep,
557 const struct fi_msg_atomic *msg,
558 struct fi_ioc *resultv,
559 const size_t result_count,
560 const uint64_t flags,
561 const int lock_required)
562 {
563 int ret;
564 struct fi_bgq_ep *bgq_ep;
565
566 bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
567
568 const enum fi_datatype datatype = msg->datatype;
569 const enum fi_op op = msg->op;
570
571 ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1);
572 if (ret) return ret;
573
574 ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
575 if (ret) return ret;
576
577 union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr;
578
579 const size_t dtsize = sizeofdt(datatype);
580
581 size_t rma_iov_index = 0;
582 const size_t rma_iov_count = msg->rma_iov_count;
583 uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
584 uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
585 uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key;
586
587 size_t rst_iov_index = 0;
588 const size_t rst_iov_count = result_count;
589 uint64_t rst_iov_dtcount = resultv[rst_iov_index].count;
590 uintptr_t rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
591
592 if (op != FI_ATOMIC_READ) { /* likely */
593
594 size_t msg_iov_index = 0;
595 const size_t msg_iov_count = msg->iov_count;
596 uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
597 uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
598
599 size_t count_requested = MIN3(msg_iov_dtcount, rma_iov_dtcount, rst_iov_dtcount);
600
601 while (count_requested > 0) {
602
603 const size_t count_transfered =
604 fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr,
605 count_requested, bgq_dst_addr, rma_iov_addr,
606 rma_iov_key, datatype, op, NULL,
607 1, (const void *)rst_iov_vaddr, 0, NULL,
608 flags, lock_required, 0, 0, 0);
609
610 const size_t bytes_transfered = dtsize * count_transfered;
611
612 msg_iov_dtcount -= count_transfered;
613 msg_iov_vaddr += bytes_transfered;
614
615 if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) {
616 ++msg_iov_index;
617 msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
618 msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
619 }
620
621 rma_iov_dtcount -= count_transfered;
622 rma_iov_addr += bytes_transfered;
623
624 if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
625 ++rma_iov_index;
626 rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
627 rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
628 rma_iov_key = msg->rma_iov[rma_iov_index].key;
629 }
630
631 rst_iov_dtcount -= count_transfered;
632 rst_iov_vaddr += bytes_transfered;
633
634 if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) {
635 ++rst_iov_index;
636 rst_iov_dtcount = resultv[rst_iov_index].count;
637 rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
638 }
639
640 count_requested = MIN3(msg_iov_dtcount, rma_iov_dtcount, rst_iov_dtcount);
641 }
642
643 } else {
644
645 size_t count_requested = MIN(rma_iov_dtcount, rst_iov_dtcount);
646
647 while (rma_iov_dtcount != 0 && rst_iov_dtcount != 0) {
648
649 const size_t count_transfered =
650 fi_bgq_atomic_internal(bgq_ep, NULL,
651 count_requested, bgq_dst_addr, rma_iov_addr,
652 rma_iov_key, datatype, op, NULL,
653 1, (const void *)rst_iov_vaddr, 0, NULL,
654 flags, lock_required, 0, 0, 0);
655
656 const size_t bytes_transfered = dtsize * count_transfered;
657
658 rma_iov_dtcount -= count_transfered;
659 rma_iov_addr += bytes_transfered;
660
661 if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
662 ++rma_iov_index;
663 rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
664 rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
665 rma_iov_key = msg->rma_iov[rma_iov_index].key;
666 }
667
668 rst_iov_dtcount -= count_transfered;
669 rst_iov_vaddr += bytes_transfered;
670
671 if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) {
672 ++rst_iov_index;
673 rst_iov_dtcount = resultv[rst_iov_index].count;
674 rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
675 }
676
677 count_requested = MIN(rma_iov_dtcount, rst_iov_dtcount);
678 }
679 }
680
681 fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr,
682 (union fi_bgq_context *)msg->context,
683 lock_required);
684
685 ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
686 if (ret) return ret;
687
688 return 0;
689 }
690
fi_bgq_atomic_compwritemsg_generic(struct fid_ep * ep,const struct fi_msg_atomic * msg,const struct fi_ioc * comparev,size_t compare_count,struct fi_ioc * resultv,size_t result_count,uint64_t flags,const int lock_required)691 static inline ssize_t fi_bgq_atomic_compwritemsg_generic (struct fid_ep *ep,
692 const struct fi_msg_atomic *msg,
693 const struct fi_ioc *comparev,
694 size_t compare_count,
695 struct fi_ioc *resultv,
696 size_t result_count,
697 uint64_t flags,
698 const int lock_required)
699 {
700 int ret;
701 struct fi_bgq_ep *bgq_ep;
702
703 bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
704
705 const enum fi_datatype datatype = msg->datatype;
706 const enum fi_op op = msg->op;
707
708 ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1);
709 if (ret) return ret;
710
711 ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
712 if (ret) return ret;
713
714 union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr;
715
716 const size_t dtsize = sizeofdt(datatype);
717
718 size_t rma_iov_index = 0;
719 const size_t rma_iov_count = msg->rma_iov_count;
720 uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
721 uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
722 uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key;
723
724 size_t msg_iov_index = 0;
725 const size_t msg_iov_count = msg->iov_count;
726 uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
727 uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
728
729 size_t rst_iov_index = 0;
730 const size_t rst_iov_count = result_count;
731 uint64_t rst_iov_dtcount = resultv[rst_iov_index].count;
732 uintptr_t rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
733
734 size_t cmp_iov_index = 0;
735 const size_t cmp_iov_count = compare_count;
736 uint64_t cmp_iov_dtcount = comparev[cmp_iov_index].count;
737 uintptr_t cmp_iov_vaddr = (uintptr_t)comparev[cmp_iov_index].addr;
738
739 while (msg_iov_dtcount != 0 && rma_iov_dtcount != 0 && rst_iov_dtcount != 0 && cmp_iov_dtcount != 0) {
740
741 const size_t count_requested =
742 MIN4(msg_iov_dtcount,rma_iov_dtcount,rst_iov_dtcount,cmp_iov_dtcount);
743
744 const size_t count_transfered =
745 fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr,
746 count_requested, bgq_dst_addr, rma_iov_addr,
747 rma_iov_key, datatype, op, NULL,
748 1, (const void *)rst_iov_vaddr, 1, (const void *)cmp_iov_vaddr,
749 flags, lock_required, 0, 0, 0);
750
751 const size_t bytes_transfered = dtsize * count_transfered;
752
753 msg_iov_dtcount -= count_transfered;
754 msg_iov_vaddr += bytes_transfered;
755
756 if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) {
757 ++msg_iov_index;
758 msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
759 msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
760 }
761
762 rma_iov_dtcount -= count_transfered;
763 rma_iov_addr += bytes_transfered;
764
765 if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
766 ++rma_iov_index;
767 rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
768 rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
769 rma_iov_key = msg->rma_iov[rma_iov_index].key;
770 }
771
772 rst_iov_dtcount -= count_transfered;
773 rst_iov_vaddr += bytes_transfered;
774
775 if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) {
776 ++rst_iov_index;
777 rst_iov_dtcount = resultv[rst_iov_index].count;
778 rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
779 }
780
781 cmp_iov_dtcount -= count_transfered;
782 cmp_iov_vaddr += bytes_transfered;
783
784 if ((cmp_iov_dtcount == 0) && ((cmp_iov_index+1) < cmp_iov_count)) {
785 ++cmp_iov_index;
786 cmp_iov_dtcount = comparev[cmp_iov_index].count;
787 cmp_iov_vaddr = (uintptr_t)comparev[cmp_iov_index].addr;
788 }
789 }
790
791 fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr,
792 (union fi_bgq_context *)msg->context,
793 lock_required);
794
795 ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
796 if (ret) return ret;
797
798 return 0;
799 }
800
801 /*
802 * Generic function to handle both fetching (1 operand) and compare
803 * (2 operand) atomics.
804 */
fi_bgq_fetch_compare_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,void * desc,const void * compare,void * compare_desc,void * result,void * result_desc,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,int lock_required)805 static inline ssize_t fi_bgq_fetch_compare_atomic_generic(struct fid_ep *ep,
806 const void *buf, size_t count,
807 void *desc,
808 const void *compare, void *compare_desc,
809 void *result, void *result_desc,
810 fi_addr_t dest_addr, uint64_t addr,
811 uint64_t key, enum fi_datatype datatype,
812 enum fi_op op, void *context,
813 int lock_required)
814 {
815 int ret;
816 struct fi_bgq_ep *bgq_ep;
817 /* MPICH does NOT call fi_fetch_atomic or fi_compare_atomic so these functions
818 * have not been properly tested - for now just assert 0 and come back later
819 * and implement if an application on BGQ needs this.
820 */
821 assert(0);
822 bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
823
824 ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, count);
825 if (ret)
826 return ret;
827
828 ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
829 if (ret)
830 return ret;
831
832 ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
833 if (ret)
834 return ret;
835
836 return 0;
837
838 }
839
fi_bgq_fetch_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,void * desc,void * result,void * result_desc,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,int lock_required)840 static inline ssize_t fi_bgq_fetch_atomic_generic(struct fid_ep *ep,
841 const void *buf, size_t count,
842 void *desc,
843 void *result, void *result_desc,
844 fi_addr_t dest_addr, uint64_t addr,
845 uint64_t key, enum fi_datatype datatype,
846 enum fi_op op, void *context,
847 int lock_required)
848 {
849
850
851
852 return fi_bgq_fetch_compare_atomic_generic(ep,
853 buf, count, desc, NULL, NULL,
854 result, result_desc, dest_addr, addr,
855 key, datatype, op, context,
856 lock_required);
857 }
858
fi_bgq_compare_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,void * desc,const void * compare,void * compare_desc,void * result,void * result_desc,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,int lock_required)859 static inline ssize_t fi_bgq_compare_atomic_generic(struct fid_ep *ep,
860 const void *buf, size_t count, void *desc,
861 const void *compare, void *compare_desc,
862 void *result, void *result_desc,
863 fi_addr_t dest_addr, uint64_t addr,
864 uint64_t key, enum fi_datatype datatype,
865 enum fi_op op, void *context,
866 int lock_required)
867 {
868 return fi_bgq_fetch_compare_atomic_generic(ep,
869 buf, count, desc, compare, compare_desc,
870 result, result_desc, dest_addr, addr,
871 key, datatype, op, context,
872 lock_required);
873 }
874
fi_bgq_inject_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,int lock_required)875 static inline ssize_t fi_bgq_inject_atomic_generic(struct fid_ep *ep,
876 const void *buf, size_t count,
877 fi_addr_t dest_addr, uint64_t addr, uint64_t key,
878 enum fi_datatype datatype, enum fi_op op,
879 int lock_required)
880 {
881 int ret = 0;
882 struct fi_bgq_ep *bgq_ep;
883
884 bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
885 ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, count);
886 if (ret)
887 return ret;
888
889 ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
890 if (ret)
891 return ret;
892
893 fi_bgq_atomic_internal(bgq_ep, buf, count,
894 (union fi_bgq_addr *)&dest_addr, addr, key, datatype, op,
895 NULL, 0, NULL, 0, NULL,
896 bgq_ep->tx.op_flags, lock_required, 1, 0, 1);
897
898 ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
899 if (ret)
900 return ret;
901
902 return 0;
903 }
904
905 /* Declare specialized functions that qualify for FABRIC_DIRECT.
906 * - No locks
907 */
908
909 #define FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK 0
910
FI_BGQ_ATOMIC_SPECIALIZED_FUNC(FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)911 FI_BGQ_ATOMIC_SPECIALIZED_FUNC(FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)
912
913 #ifdef FABRIC_DIRECT
914 #define fi_atomic(ep, buf, count, desc, dest_addr, \
915 addr, key, datatype, op, context) \
916 (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(atomic, \
917 FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \
918 (ep, buf, count, desc, dest_addr, addr, key, \
919 datatype, op, context))
920
921 #define fi_inject_atomic(ep, buf, count, dest_addr, addr, key, \
922 datatype, op) \
923 (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(inject_atomic, \
924 FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \
925 (ep, buf, count, dest_addr, addr, key, datatype, op))
926
927 #define fi_fetch_atomic(ep, buf, count, desc, result, result_desc, \
928 dest_addr, addr, key, datatype, op, context) \
929 (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(fetch_atomic, \
930 FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \
931 (ep, buf, count, desc, result, result_desc, \
932 dest_addr, addr, key, datatype, op, context))
933
934 #define fi_compare_atomic(ep, buf, count, desc, compare, compare_desc, \
935 result, result_desc, dest_addr, addr, key, datatype, \
936 op, context) \
937 (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(compare_atomic, \
938 FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \
939 (ep, buf, count, desc, compare, compare_desc, \
940 result, result_desc, dest_addr, addr, key, \
941 datatype, op, context))
942
943 static inline int
944 fi_atomicvalid(struct fid_ep *ep,
945 enum fi_datatype datatype, enum fi_op op, size_t *count)
946 {
947 return ep->atomic->writevalid(ep, datatype, op, count);
948 }
949
950 static inline int
fi_fetch_atomicvalid(struct fid_ep * ep,enum fi_datatype datatype,enum fi_op op,size_t * count)951 fi_fetch_atomicvalid(struct fid_ep *ep,
952 enum fi_datatype datatype, enum fi_op op, size_t *count)
953 {
954 return ep->atomic->readwritevalid(ep, datatype, op, count);
955 }
956
957 static inline int
fi_compare_atomicvalid(struct fid_ep * ep,enum fi_datatype datatype,enum fi_op op,size_t * count)958 fi_compare_atomicvalid(struct fid_ep *ep,
959 enum fi_datatype datatype, enum fi_op op, size_t *count)
960 {
961 return ep->atomic->compwritevalid(ep, datatype, op, count);
962 }
963
964 static inline ssize_t
fi_atomicmsg(struct fid_ep * ep,const struct fi_msg_atomic * msg,uint64_t flags)965 fi_atomicmsg(struct fid_ep *ep,
966 const struct fi_msg_atomic *msg, uint64_t flags)
967 {
968 return ep->atomic->writemsg(ep, msg, flags);
969 }
970
971 static inline ssize_t
fi_fetch_atomicmsg(struct fid_ep * ep,const struct fi_msg_atomic * msg,struct fi_ioc * resultv,void ** result_desc,size_t result_count,uint64_t flags)972 fi_fetch_atomicmsg(struct fid_ep *ep,
973 const struct fi_msg_atomic *msg,
974 struct fi_ioc *resultv, void **result_desc, size_t result_count,
975 uint64_t flags)
976 {
977 return ep->atomic->readwritemsg(ep, msg, resultv, result_desc,
978 result_count, flags);
979 }
980
981 static inline ssize_t
fi_compare_atomicmsg(struct fid_ep * ep,const struct fi_msg_atomic * msg,const struct fi_ioc * comparev,void ** compare_desc,size_t compare_count,struct fi_ioc * resultv,void ** result_desc,size_t result_count,uint64_t flags)982 fi_compare_atomicmsg(struct fid_ep *ep, const struct fi_msg_atomic *msg,
983 const struct fi_ioc *comparev, void **compare_desc,
984 size_t compare_count, struct fi_ioc *resultv,
985 void **result_desc, size_t result_count, uint64_t flags)
986 {
987 return ep->atomic->compwritemsg(ep, msg, comparev, compare_desc,
988 compare_count, resultv, result_desc, result_count, flags);
989 }
990
991 #endif
992
993 #ifdef __cplusplus
994 }
995 #endif
996
997 #endif /* _FI_BGQ_DIRECT_ATOMIC_H_ */
998