1 /*
2  * Copyright (C) 2016 by Argonne National Laboratory.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #ifndef _FI_BGQ_DIRECT_ATOMIC_H_
33 #define _FI_BGQ_DIRECT_ATOMIC_H_
34 
35 #define FABRIC_DIRECT_ATOMIC 1
36 
37 #include "rdma/bgq/fi_bgq_compiler.h"
38 #include "rdma/bgq/fi_bgq_spi.h"
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 #include <complex.h>
45 #if 0
46 
47 #define FI_BGQ_DATATYPES		\
48 	sizeof(int8_t),			\
49 	sizeof(uint8_t),		\
50 	sizeof(int16_t),		\
51 	sizeof(uint16_t),		\
52 	sizeof(int32_t),		\
53 	sizeof(uint32_t),		\
54 	sizeof(int64_t),		\
55 	sizeof(uint64_t),		\
56 	sizeof(float),			\
57 	sizeof(double),			\
58 	sizeof(float complex),		\
59 	sizeof(double complex),		\
60 	sizeof(long double),		\
61 	sizeof(long double complex),
62 
63 #ifdef __cplusplus
64 struct __fi_bgq_datatype{
65 	static const size_t size(int index){
66 		static size_t __fi_bgq_datatype_size[] =
67 		{
68 			FI_BGQ_DATATYPES
69 		};
70 		return __fi_bgq_datatype_size[index];
71 	}
72 };
73 #else
74 static size_t __fi_bgq_datatype_size[] =
75 {
76 	FI_BGQ_DATATYPES
77 };
78 #endif
79 
80 /*
81  * Warning: bogus datatype will result in out of bounds array access.
82  * Use with caution.
83  */
84 static inline size_t fi_bgq_datatype_size_unsafe(enum fi_datatype dt)
85 {
86 #ifdef __cplusplus
87         return __fi_bgq_datatype::size(dt);
88 #else
89 	return __fi_bgq_datatype_size[dt];
90 #endif
91 }
92 
93 static inline size_t fi_bgq_datatype_size(enum fi_datatype dt)
94 {
95 	return
96 		(((int)dt) < 0 || dt >= FI_DATATYPE_LAST)
97 			? 0
98 			: fi_bgq_datatype_size_unsafe(dt);
99 }
100 #endif
101 
102 
103 
fi_bgq_check_atomic(struct fi_bgq_ep * bgq_ep,enum fi_av_type av_type,enum fi_datatype dt,enum fi_op op,size_t count)104 static inline int fi_bgq_check_atomic(struct fi_bgq_ep *bgq_ep,
105 		enum fi_av_type av_type, enum fi_datatype dt, enum fi_op op,
106 		size_t count)
107 {
108 #ifdef DEBUG
109 	switch((int)op) {
110 	case FI_MIN:
111 	case FI_MAX:
112 	case FI_SUM:
113 	case FI_PROD:
114 	case FI_LOR:
115 	case FI_LAND:
116 	case FI_BOR:
117 	case FI_BAND:
118 	case FI_LXOR:
119 	case FI_ATOMIC_READ:
120 	case FI_ATOMIC_WRITE:
121 	case FI_CSWAP:
122 	case FI_CSWAP_NE:
123 	case FI_CSWAP_LE:
124 	case FI_CSWAP_LT:
125 	case FI_CSWAP_GE:
126 	case FI_CSWAP_GT:
127 	case FI_MSWAP:
128 		break;
129 	default:
130 		return -FI_EINVAL;
131 	}
132 	if (((int) dt >= FI_DATATYPE_LAST) || ((int) dt < 0))
133 		return -FI_EINVAL;
134 
135 	if (!bgq_ep)
136 		return -FI_EINVAL;
137 	if (bgq_ep->state != FI_BGQ_EP_ENABLED)
138 		return -FI_EINVAL;
139 
140 	if (count == 0)
141 		return -FI_EINVAL;
142 
143 	if (av_type == FI_AV_UNSPEC)
144 		return -FI_EINVAL;
145 	if (av_type == FI_AV_MAP && bgq_ep->av_type != FI_AV_MAP)
146 		return -FI_EINVAL;
147 	if (av_type == FI_AV_TABLE && bgq_ep->av_type != FI_AV_TABLE)
148 		return -FI_EINVAL;
149 #endif
150 	return 0;
151 }
152 
sizeofdt(const enum fi_datatype datatype)153 static inline size_t sizeofdt(const enum fi_datatype datatype) {
154 
155 	static const size_t sizeofdt[FI_DATATYPE_LAST] = {
156 		sizeof(int8_t),			/* FI_INT8 */
157 		sizeof(uint8_t),		/* FI_UINT8 */
158 		sizeof(int16_t),		/* FI_INT16 */
159 		sizeof(uint16_t),		/* FI_UINT16 */
160 		sizeof(int32_t),		/* FI_INT32 */
161 		sizeof(uint32_t),		/* FI_UINT32 */
162 		sizeof(int64_t),		/* FI_INT64 */
163 		sizeof(uint64_t),		/* FI_UINT64 */
164 		sizeof(float),			/* FI_FLOAT */
165 		sizeof(double),			/* FI_DOUBLE */
166 		sizeof(complex float),		/* FI_FLOAT_COMPLEX */
167 		sizeof(complex double),		/* FI_DOUBLE_COMPLEX */
168 		sizeof(long double),		/* FI_LONG_DOUBLE */
169 		sizeof(complex long double)	/* FI_LONG_DOUBLE_COMPLEX */
170 	};
171 
172 	return sizeofdt[datatype];
173 }
174 
maxcount(const enum fi_datatype datatype,const unsigned is_compare,const unsigned is_fetch)175 static inline size_t maxcount (const enum fi_datatype datatype,
176 		const unsigned is_compare,
177 		const unsigned is_fetch) {
178 
179 #define INIT_MAXCOUNT_ARRAY(maxbytes)			\
180 	maxbytes / sizeof(int8_t),		/* FI_INT8 */		\
181 	maxbytes / sizeof(uint8_t),		/* FI_UINT8 */		\
182 	maxbytes / sizeof(int16_t),		/* FI_INT16 */		\
183 	maxbytes / sizeof(uint16_t),		/* FI_UINT16 */		\
184 	maxbytes / sizeof(int32_t),		/* FI_INT32 */		\
185 	maxbytes / sizeof(uint32_t),		/* FI_UINT32 */		\
186 	maxbytes / sizeof(int64_t),		/* FI_INT64 */		\
187 	maxbytes / sizeof(uint64_t),		/* FI_UINT64 */		\
188 	maxbytes / sizeof(float),		/* FI_FLOAT */		\
189 	maxbytes / sizeof(double),		/* FI_DOUBLE */		\
190 	maxbytes / sizeof(complex float),	/* FI_FLOAT_COMPLEX */	\
191 	maxbytes / sizeof(complex double),	/* FI_DOUBLE_COMPLEX */	\
192 	maxbytes / sizeof(long double),		/* FI_LONG_DOUBLE */	\
193 	maxbytes / sizeof(complex long double)	/* FI_LONG_DOUBLE_COMPLEX */
194 
195 	static const size_t maxcount[2][2][FI_DATATYPE_LAST] = {
196 		{
197 			{	/* !compare, !fetch */
198 				INIT_MAXCOUNT_ARRAY(512)
199 			},
200 			{	/* !compare, fetch */
201 				INIT_MAXCOUNT_ARRAY((512-sizeof(struct fi_bgq_mu_fetch_metadata)))
202 			}
203 		},
204 		{
205 			{	/* compare, !fetch */
206 				INIT_MAXCOUNT_ARRAY(256)
207 			},
208 			{	/* compare, fetch */
209 				INIT_MAXCOUNT_ARRAY((256-sizeof(struct fi_bgq_mu_fetch_metadata)))
210 			}
211 		}
212 	};
213 
214 #undef INIT_MAXCOUNT_ARRAY
215 
216 	return maxcount[is_compare][is_fetch][datatype];
217 }
218 
fi_bgq_atomic_fence(struct fi_bgq_ep * bgq_ep,const uint64_t tx_op_flags,const union fi_bgq_addr * bgq_dst_addr,union fi_bgq_context * bgq_context,const int lock_required)219 static inline void fi_bgq_atomic_fence (struct fi_bgq_ep * bgq_ep,
220 		const uint64_t tx_op_flags,
221 		const union fi_bgq_addr * bgq_dst_addr,
222 		union fi_bgq_context * bgq_context,
223 		const int lock_required)
224 {
225 	const uint64_t do_cq = ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION);
226 
227 	struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr;
228 	const uint64_t do_cntr = (write_cntr != 0);
229 
230 	assert(do_cq || do_cntr);
231 
232 		MUHWI_Descriptor_t * model = &bgq_ep->tx.atomic.emulation.fence.mfifo_model;
233 
234 		MUHWI_Descriptor_t * desc =
235 			fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
236 
237 		qpx_memcpy64((void*)desc, (const void*)model);
238 
239 		/* set the destination torus address and fifo map */
240 		desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi);
241 
242 		const uint64_t fifo_map = (uint64_t) fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi);
243 		desc->Torus_FIFO_Map = fifo_map;
244 
245 		desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
246 			fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi);
247 
248 		/* locate the payload lookaside slot */
249 		void * payload =
250 			fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
251 				desc, &desc->Pa_Payload);
252 
253 		if (do_cntr && !do_cq) {	/* likely */
254 
255 			/* increment the origin fi_cntr value */
256 
257 			/* copy the 'fi_atomic' counter completion descriptor
258 			 * model into the payload lookaside slot */
259 			model = &bgq_ep->tx.atomic.emulation.fence.cntr_model;
260 			MUHWI_Descriptor_t * cntr_desc = (MUHWI_Descriptor_t *) payload;
261 			qpx_memcpy64((void*)cntr_desc, (const void*)model);
262 
263 			cntr_desc->Torus_FIFO_Map = fifo_map;
264 
265 			MUSPI_SetRecPayloadBaseAddressInfo(cntr_desc, write_cntr->std.batid,
266 				MUSPI_GetAtomicAddress(0, MUHWI_ATOMIC_OPCODE_STORE_ADD));	/* TODO - init */
267 
268 		} else if (do_cq) {
269 
270 			/* add the cq byte counter decrement direct-put
271 			 * descriptor to the tail of the rget/mfifo payload */
272 
273 			/* initialize the completion entry */
274 			assert(bgq_context);
275 			assert(((uintptr_t)bgq_context & 0x07ull) == 0);	/* must be 8 byte aligned */
276 			bgq_context->flags = FI_RMA | FI_READ;
277 			bgq_context->len = 0;
278 			bgq_context->buf = NULL;
279 			bgq_context->byte_counter = 1;
280 			bgq_context->tag = 0;
281 
282 			uint64_t byte_counter_paddr = 0;
283 			uint32_t cnk_rc __attribute__ ((unused));
284 			cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter,
285 					sizeof(uint64_t), &byte_counter_paddr);
286 			assert(cnk_rc == 0);
287 
288 			/* copy the 'fi_atomic' cq completion descriptor
289 			 * model into the payload lookaside slot */
290 			model = &bgq_ep->tx.atomic.emulation.fence.cq_model;
291 			MUHWI_Descriptor_t * cq_desc = (MUHWI_Descriptor_t *) payload;
292 			qpx_memcpy64((void*)cq_desc, (const void*)model);
293 
294 			cq_desc->Torus_FIFO_Map = fifo_map;
295 
296 			MUSPI_SetRecPayloadBaseAddressInfo(cq_desc,
297 				FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr);
298 
299 			fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required);
300 
301 			if (do_cntr) {
302 
303 				/* increment the origin fi_cntr value */
304 
305 				/* copy the 'fi_atomic' counter completion descriptor
306 				 * model into the payload lookaside slot */
307 				model = &bgq_ep->tx.atomic.emulation.fence.cntr_model;
308 				MUHWI_Descriptor_t * cntr_desc = &(((MUHWI_Descriptor_t *) payload)[1]);
309 				qpx_memcpy64((void*)cntr_desc, (const void*)model);
310 
311 				cntr_desc->Torus_FIFO_Map = fifo_map;
312 
313 				MUSPI_SetRecPayloadBaseAddressInfo(cntr_desc, write_cntr->std.batid,
314 					MUSPI_GetAtomicAddress(0, MUHWI_ATOMIC_OPCODE_STORE_ADD));	/* TODO - init */
315 
316 				desc->Message_Length += sizeof(MUHWI_Descriptor_t);
317 				union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
318 				hdr->rma.ndesc += 1;
319 			}
320 
321 		} else {	/* !do_cntr && !do_cq */
322 
323 			assert(0);
324 
325 		}
326 
327 		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
328 }
329 
fi_bgq_atomic_internal(struct fi_bgq_ep * bgq_ep,const void * buf,size_t count,union fi_bgq_addr * bgq_dst_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,const unsigned is_fetch,const void * fetch_vaddr,const unsigned is_compare,const void * compare_vaddr,const uint64_t tx_op_flags,const int lock_required,const uint64_t enable_cntr,const uint64_t enable_cq,const unsigned is_inject)330 static inline size_t fi_bgq_atomic_internal(struct fi_bgq_ep *bgq_ep,
331 		const void *buf, size_t count, union fi_bgq_addr *bgq_dst_addr,
332 		uint64_t addr, uint64_t key, enum fi_datatype datatype,
333 		enum fi_op op, void *context,
334 		const unsigned is_fetch, const void * fetch_vaddr,
335 		const unsigned is_compare, const void * compare_vaddr,
336 		const uint64_t tx_op_flags, const int lock_required,
337 		const uint64_t enable_cntr, const uint64_t enable_cq,
338 		const unsigned is_inject)
339 {
340 	assert((is_fetch==0)||(is_fetch==1));
341 	assert((is_compare==0)||(is_compare==1));
342 
343 	const uint64_t do_cq = enable_cq && ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION);
344 	struct fi_bgq_cntr * write_cntr = bgq_ep->tx.write_cntr;
345 	const uint64_t do_cntr = enable_cntr && (write_cntr != 0);
346 
347 	MUHWI_Descriptor_t * desc =
348 		fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
349 
350 	qpx_memcpy64((void*)desc, (const void*)&bgq_ep->tx.atomic.emulation.mfifo_model);
351 
352 	/* set the destination torus address and fifo map */
353 	desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi);
354 	const uint64_t fifo_map = (uint64_t) fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi);
355 	desc->Torus_FIFO_Map = fifo_map;
356 
357 	desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
358 		fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi);
359 
360 	const size_t max_count = maxcount(datatype, is_compare, is_fetch);
361 	const size_t xfer_count = MIN(max_count,count);
362 	const uint32_t nbytes = (uint32_t)(sizeofdt(datatype) * xfer_count);
363 
364 	union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
365 	hdr->atomic.dt = datatype;
366 	hdr->atomic.op = op;
367 	hdr->atomic.do_cntr = do_cntr;
368 	hdr->atomic.cntr_bat_id = do_cntr ? write_cntr->std.batid : -1;
369 	hdr->atomic.nbytes_minus_1 = nbytes - 1;
370 	hdr->atomic.key = (uint16_t)key;
371 	hdr->atomic.offset = addr;
372 	hdr->atomic.is_local = fi_bgq_addr_is_local(bgq_dst_addr->fi);
373 
374 	hdr->atomic.is_fetch = is_fetch;
375 
376 
377 	if (is_inject) {	/* const expression with cause branch to compile out */
378 
379 		/* locate the payload lookaside slot */
380 		void * payload =
381 			fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
382 				desc, &desc->Pa_Payload);
383 
384 		desc->Message_Length = nbytes;
385 
386 		if (buf) memcpy((void *)payload, (const void *)buf, nbytes);
387 
388 	} else if (!is_fetch && !is_compare) {	/* const expression with cause branch to compile out */
389 
390 		desc->Message_Length = nbytes;
391 		fi_bgq_cnk_vaddr2paddr(buf, nbytes, &desc->Pa_Payload);
392 
393 		assert(!do_cq);
394 
395 	} else {
396 
397 		/* locate the payload lookaside slot */
398 		union fi_bgq_mu_packet_payload * payload =
399 			(union fi_bgq_mu_packet_payload *)fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
400 				desc, &desc->Pa_Payload);
401 
402 		/* initialize the atomic operation metadata in the packet payload */
403 		payload->atomic_fetch.metadata.fifo_map = fifo_map;
404 		payload->atomic_fetch.metadata.cq_paddr = 0;
405 
406 		if (is_fetch) {
407 			fi_bgq_cnk_vaddr2paddr(fetch_vaddr, nbytes,
408 				&payload->atomic_fetch.metadata.dst_paddr);
409 
410 			/* copy the origin (source) data into the injection lookaside buffer */
411 			if (buf) memcpy((void*)&payload->atomic_fetch.data[0], (const void*) buf, nbytes);
412 			desc->Message_Length = sizeof(struct fi_bgq_mu_fetch_metadata) +
413 				nbytes + nbytes * is_compare;
414 
415 			if (is_compare) {
416 				/* copy the origin (compare) data into the injection lookaside buffer */
417 				memcpy((void*)&payload->atomic_fetch.data[nbytes], compare_vaddr, nbytes);
418 			}
419 
420 			if (do_cq) {
421 
422 				/* initialize the completion entry */
423 				assert(context);
424 				assert(((uintptr_t)context & 0x07ull) == 0);	/* must be 8 byte aligned */
425 				union fi_bgq_context * bgq_context = (union fi_bgq_context *)context;
426 				bgq_context->flags = 0;		/* TODO */
427 				bgq_context->len = nbytes;
428 				bgq_context->buf = NULL;
429 				bgq_context->byte_counter = nbytes;
430 				bgq_context->tag = 0;
431 
432 				fi_bgq_cnk_vaddr2paddr((const void*)&bgq_context->byte_counter,
433 					sizeof(uint64_t), &payload->atomic_fetch.metadata.cq_paddr);
434 
435 				fi_bgq_cq_enqueue_pending(bgq_ep->tx.send_cq, bgq_context, lock_required);
436 			}
437 
438 		} else {
439 			assert(0);	/* !fetch, compare */
440 		}
441 	}
442 
443 	MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
444 
445 	return xfer_count;
446 }
447 
448 
fi_bgq_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,fi_addr_t dst_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,const int lock_required)449 static inline ssize_t fi_bgq_atomic_generic(struct fid_ep *ep,
450 		const void *buf, size_t count,
451 		fi_addr_t dst_addr, uint64_t addr,
452 		uint64_t key, enum fi_datatype datatype,
453 		enum fi_op op, void* context,
454 		const int lock_required)
455 {
456 	int			ret;
457 	struct fi_bgq_ep	*bgq_ep;
458 
459 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
460 
461 	/* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */
462 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
463 	if (ret) return ret;
464 
465 	size_t xfer __attribute__ ((unused));
466 	xfer = fi_bgq_atomic_internal(bgq_ep, buf, count,
467 		(union fi_bgq_addr *)&dst_addr,	addr, key, datatype, op,
468 		context, 0, NULL, 0, NULL,
469 		bgq_ep->tx.op_flags, lock_required, 0, 0, 0);
470 	assert(xfer == count);
471 
472 	/* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */
473 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
474 	if (ret) return ret;
475 
476 	return 0;
477 }
478 
fi_bgq_atomic_writemsg_generic(struct fid_ep * ep,const struct fi_msg_atomic * msg,const uint64_t flags,const int lock_required)479 static inline ssize_t fi_bgq_atomic_writemsg_generic(struct fid_ep *ep,
480 		const struct fi_msg_atomic *msg, const uint64_t flags,
481 		const int lock_required)
482 {
483 	int			ret;
484 	struct fi_bgq_ep	*bgq_ep;
485 
486 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
487 
488 	const enum fi_datatype datatype = msg->datatype;
489 	const enum fi_op op = msg->op;
490 
491 	ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1);
492 	if (ret) return ret;
493 
494 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
495 	if (ret) return ret;
496 
497 	union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr;
498 
499 	const size_t dtsize = sizeofdt(datatype);
500 
501 	size_t rma_iov_index = 0;
502 	const size_t rma_iov_count = msg->rma_iov_count;
503 	uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
504 	uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
505 	uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key;
506 
507 	size_t msg_iov_index = 0;
508 	const size_t msg_iov_count = msg->iov_count;
509 	uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
510 	uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
511 
512 	while (msg_iov_dtcount != 0 && rma_iov_dtcount != 0) {
513 
514 		const size_t count_requested = MIN(msg_iov_dtcount,rma_iov_dtcount);
515 
516 		const size_t count_transfered =
517 			fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr,
518 				count_requested, bgq_dst_addr, rma_iov_addr,
519 				rma_iov_key, datatype, op, NULL,
520 				0, NULL, 0, NULL, flags, lock_required, 0, 0, 0);
521 
522 		const size_t bytes_transfered = dtsize * count_transfered;
523 
524 		msg_iov_dtcount -= count_transfered;
525 		msg_iov_vaddr += bytes_transfered;
526 
527 		if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) {
528 			++msg_iov_index;
529 			msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
530 			msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
531 		}
532 
533 		rma_iov_dtcount -= count_transfered;
534 		rma_iov_addr  += bytes_transfered;
535 
536 		if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
537 			++rma_iov_index;
538 			rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
539 			rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
540 			rma_iov_key = msg->rma_iov[rma_iov_index].key;
541 		}
542 	}
543 
544 	fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr,
545 		(union fi_bgq_context *)msg->context,
546 		lock_required);
547 
548 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
549 	if (ret) return ret;
550 
551 	return 0;
552 }
553 
554 
555 
fi_bgq_atomic_readwritemsg_generic(struct fid_ep * ep,const struct fi_msg_atomic * msg,struct fi_ioc * resultv,const size_t result_count,const uint64_t flags,const int lock_required)556 static inline ssize_t fi_bgq_atomic_readwritemsg_generic (struct fid_ep *ep,
557 		const struct fi_msg_atomic *msg,
558 		struct fi_ioc *resultv,
559 		const size_t result_count,
560 		const uint64_t flags,
561 		const int lock_required)
562 {
563 	int			ret;
564 	struct fi_bgq_ep	*bgq_ep;
565 
566 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
567 
568 	const enum fi_datatype datatype = msg->datatype;
569 	const enum fi_op op = msg->op;
570 
571 	ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1);
572 	if (ret) return ret;
573 
574 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
575 	if (ret) return ret;
576 
577 	union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr;
578 
579 	const size_t dtsize = sizeofdt(datatype);
580 
581 	size_t rma_iov_index = 0;
582 	const size_t rma_iov_count = msg->rma_iov_count;
583 	uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
584 	uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
585 	uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key;
586 
587 	size_t rst_iov_index = 0;
588 	const size_t rst_iov_count = result_count;
589 	uint64_t rst_iov_dtcount = resultv[rst_iov_index].count;
590 	uintptr_t rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
591 
592 	if (op != FI_ATOMIC_READ) {	/* likely */
593 
594 		size_t msg_iov_index = 0;
595 		const size_t msg_iov_count = msg->iov_count;
596 		uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
597 		uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
598 
599 		size_t count_requested = MIN3(msg_iov_dtcount, rma_iov_dtcount, rst_iov_dtcount);
600 
601 		while (count_requested > 0) {
602 
603 			const size_t count_transfered =
604 				fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr,
605 					count_requested, bgq_dst_addr, rma_iov_addr,
606 					rma_iov_key, datatype, op, NULL,
607 					1, (const void *)rst_iov_vaddr, 0, NULL,
608 					flags, lock_required, 0, 0, 0);
609 
610 			const size_t bytes_transfered = dtsize * count_transfered;
611 
612 			msg_iov_dtcount -= count_transfered;
613 			msg_iov_vaddr += bytes_transfered;
614 
615 			if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) {
616 				++msg_iov_index;
617 				msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
618 				msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
619 			}
620 
621 			rma_iov_dtcount -= count_transfered;
622 			rma_iov_addr  += bytes_transfered;
623 
624 			if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
625 				++rma_iov_index;
626 				rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
627 				rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
628 				rma_iov_key = msg->rma_iov[rma_iov_index].key;
629 			}
630 
631 			rst_iov_dtcount -= count_transfered;
632 			rst_iov_vaddr += bytes_transfered;
633 
634 			if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) {
635 				++rst_iov_index;
636 				rst_iov_dtcount = resultv[rst_iov_index].count;
637 				rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
638 			}
639 
640 			count_requested = MIN3(msg_iov_dtcount, rma_iov_dtcount, rst_iov_dtcount);
641 		}
642 
643 	} else {
644 
645 		size_t count_requested = MIN(rma_iov_dtcount, rst_iov_dtcount);
646 
647 		while (rma_iov_dtcount != 0 && rst_iov_dtcount != 0) {
648 
649 			const size_t count_transfered =
650 				fi_bgq_atomic_internal(bgq_ep, NULL,
651 					count_requested, bgq_dst_addr, rma_iov_addr,
652 					rma_iov_key, datatype, op, NULL,
653 					1, (const void *)rst_iov_vaddr, 0, NULL,
654 					flags, lock_required, 0, 0, 0);
655 
656 			const size_t bytes_transfered = dtsize * count_transfered;
657 
658 			rma_iov_dtcount -= count_transfered;
659 			rma_iov_addr  += bytes_transfered;
660 
661 			if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
662 				++rma_iov_index;
663 				rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
664 				rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
665 				rma_iov_key = msg->rma_iov[rma_iov_index].key;
666 			}
667 
668 			rst_iov_dtcount -= count_transfered;
669 			rst_iov_vaddr += bytes_transfered;
670 
671 			if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) {
672 				++rst_iov_index;
673 				rst_iov_dtcount = resultv[rst_iov_index].count;
674 				rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
675 			}
676 
677 			count_requested = MIN(rma_iov_dtcount, rst_iov_dtcount);
678 		}
679 	}
680 
681 	fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr,
682 		(union fi_bgq_context *)msg->context,
683 		lock_required);
684 
685 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
686 	if (ret) return ret;
687 
688 	return 0;
689 }
690 
fi_bgq_atomic_compwritemsg_generic(struct fid_ep * ep,const struct fi_msg_atomic * msg,const struct fi_ioc * comparev,size_t compare_count,struct fi_ioc * resultv,size_t result_count,uint64_t flags,const int lock_required)691 static inline ssize_t fi_bgq_atomic_compwritemsg_generic (struct fid_ep *ep,
692 		const struct fi_msg_atomic *msg,
693 		const struct fi_ioc *comparev,
694 		size_t compare_count,
695 		struct fi_ioc *resultv,
696 		size_t result_count,
697 		uint64_t flags,
698 		const int lock_required)
699 {
700 	int			ret;
701 	struct fi_bgq_ep	*bgq_ep;
702 
703 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
704 
705 	const enum fi_datatype datatype = msg->datatype;
706 	const enum fi_op op = msg->op;
707 
708 	ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1);
709 	if (ret) return ret;
710 
711 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
712 	if (ret) return ret;
713 
714 	union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr;
715 
716 	const size_t dtsize = sizeofdt(datatype);
717 
718 	size_t rma_iov_index = 0;
719 	const size_t rma_iov_count = msg->rma_iov_count;
720 	uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
721 	uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
722 	uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key;
723 
724 	size_t msg_iov_index = 0;
725 	const size_t msg_iov_count = msg->iov_count;
726 	uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
727 	uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
728 
729 	size_t rst_iov_index = 0;
730 	const size_t rst_iov_count = result_count;
731 	uint64_t rst_iov_dtcount = resultv[rst_iov_index].count;
732 	uintptr_t rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
733 
734 	size_t cmp_iov_index = 0;
735 	const size_t cmp_iov_count = compare_count;
736 	uint64_t cmp_iov_dtcount = comparev[cmp_iov_index].count;
737 	uintptr_t cmp_iov_vaddr = (uintptr_t)comparev[cmp_iov_index].addr;
738 
739 	while (msg_iov_dtcount != 0 && rma_iov_dtcount != 0 && rst_iov_dtcount != 0 && cmp_iov_dtcount != 0) {
740 
741 		const size_t count_requested =
742 			MIN4(msg_iov_dtcount,rma_iov_dtcount,rst_iov_dtcount,cmp_iov_dtcount);
743 
744 		const size_t count_transfered =
745 			fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr,
746 				count_requested, bgq_dst_addr, rma_iov_addr,
747 				rma_iov_key, datatype, op, NULL,
748 				1, (const void *)rst_iov_vaddr, 1, (const void *)cmp_iov_vaddr,
749 				flags, lock_required, 0, 0, 0);
750 
751 		const size_t bytes_transfered = dtsize * count_transfered;
752 
753 		msg_iov_dtcount -= count_transfered;
754 		msg_iov_vaddr += bytes_transfered;
755 
756 		if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) {
757 			++msg_iov_index;
758 			msg_iov_dtcount = msg->msg_iov[msg_iov_index].count;
759 			msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr;
760 		}
761 
762 		rma_iov_dtcount -= count_transfered;
763 		rma_iov_addr  += bytes_transfered;
764 
765 		if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) {
766 			++rma_iov_index;
767 			rma_iov_dtcount = msg->rma_iov[rma_iov_index].count;
768 			rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
769 			rma_iov_key = msg->rma_iov[rma_iov_index].key;
770 		}
771 
772 		rst_iov_dtcount -= count_transfered;
773 		rst_iov_vaddr += bytes_transfered;
774 
775 		if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) {
776 			++rst_iov_index;
777 			rst_iov_dtcount = resultv[rst_iov_index].count;
778 			rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr;
779 		}
780 
781 		cmp_iov_dtcount -= count_transfered;
782 		cmp_iov_vaddr += bytes_transfered;
783 
784 		if ((cmp_iov_dtcount == 0) && ((cmp_iov_index+1) < cmp_iov_count)) {
785 			++cmp_iov_index;
786 			cmp_iov_dtcount = comparev[cmp_iov_index].count;
787 			cmp_iov_vaddr = (uintptr_t)comparev[cmp_iov_index].addr;
788 		}
789 	}
790 
791 	fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr,
792 		(union fi_bgq_context *)msg->context,
793 		lock_required);
794 
795 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
796 	if (ret) return ret;
797 
798 	return 0;
799 }
800 
801 /*
802  * Generic function to handle both fetching (1 operand) and compare
803  * (2 operand) atomics.
804  */
fi_bgq_fetch_compare_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,void * desc,const void * compare,void * compare_desc,void * result,void * result_desc,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,int lock_required)805 static inline ssize_t fi_bgq_fetch_compare_atomic_generic(struct fid_ep *ep,
806 		const void *buf, size_t count,
807 		void *desc,
808 		const void *compare, void *compare_desc,
809 		void *result, void *result_desc,
810 		fi_addr_t dest_addr, uint64_t addr,
811 		uint64_t key, enum fi_datatype datatype,
812 		enum fi_op op, void *context,
813 		int lock_required)
814 {
815 	int			ret;
816 	struct fi_bgq_ep	*bgq_ep;
817 /* MPICH does NOT call fi_fetch_atomic or fi_compare_atomic so these functions
818  * have not been properly tested - for now just assert 0 and come back later
819  * and implement if an application on BGQ needs this.
820  */
821 	assert(0);
822 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
823 
824 	ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, count);
825 	if (ret)
826 		return ret;
827 
828 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
829 	if (ret)
830 		return ret;
831 
832 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
833 	if (ret)
834 		return ret;
835 
836 	return 0;
837 
838 }
839 
fi_bgq_fetch_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,void * desc,void * result,void * result_desc,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,int lock_required)840 static inline ssize_t fi_bgq_fetch_atomic_generic(struct fid_ep *ep,
841 		const void *buf, size_t count,
842 		void *desc,
843 		void *result, void *result_desc,
844 		fi_addr_t dest_addr, uint64_t addr,
845 		uint64_t key, enum fi_datatype datatype,
846 		enum fi_op op, void *context,
847 		int lock_required)
848 {
849 
850 
851 
852 	return fi_bgq_fetch_compare_atomic_generic(ep,
853 			buf, count, desc, NULL, NULL,
854 			result, result_desc, dest_addr, addr,
855 			key, datatype, op, context,
856 			lock_required);
857 }
858 
fi_bgq_compare_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,void * desc,const void * compare,void * compare_desc,void * result,void * result_desc,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,void * context,int lock_required)859 static inline ssize_t fi_bgq_compare_atomic_generic(struct fid_ep *ep,
860 		const void *buf, size_t count, void *desc,
861 		const void *compare, void *compare_desc,
862 		void  *result, void *result_desc,
863 		fi_addr_t dest_addr, uint64_t addr,
864 		uint64_t key, enum fi_datatype datatype,
865 		enum fi_op op, void *context,
866 		int lock_required)
867 {
868 	return fi_bgq_fetch_compare_atomic_generic(ep,
869 			buf, count, desc, compare, compare_desc,
870 			result, result_desc, dest_addr, addr,
871 			key, datatype, op, context,
872 			lock_required);
873 }
874 
fi_bgq_inject_atomic_generic(struct fid_ep * ep,const void * buf,size_t count,fi_addr_t dest_addr,uint64_t addr,uint64_t key,enum fi_datatype datatype,enum fi_op op,int lock_required)875 static inline ssize_t fi_bgq_inject_atomic_generic(struct fid_ep *ep,
876                 const void *buf, size_t count,
877                 fi_addr_t dest_addr, uint64_t addr, uint64_t key,
878                 enum fi_datatype datatype, enum fi_op op,
879 		int lock_required)
880 {
881 	int			ret = 0;
882 	struct fi_bgq_ep        *bgq_ep;
883 
884 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
885 	ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, count);
886 	if (ret)
887 		return ret;
888 
889 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
890 	if (ret)
891 		return ret;
892 
893 	fi_bgq_atomic_internal(bgq_ep, buf, count,
894 		(union fi_bgq_addr *)&dest_addr, addr, key, datatype, op,
895 		NULL, 0, NULL, 0, NULL,
896 		bgq_ep->tx.op_flags, lock_required, 1, 0, 1);
897 
898 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
899 	if (ret)
900 		return ret;
901 
902 	return 0;
903 }
904 
905 /* Declare specialized functions that qualify for FABRIC_DIRECT.
906  * - No locks
907  */
908 
909 #define FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK	0
910 
FI_BGQ_ATOMIC_SPECIALIZED_FUNC(FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)911 FI_BGQ_ATOMIC_SPECIALIZED_FUNC(FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)
912 
913 #ifdef FABRIC_DIRECT
914 #define fi_atomic(ep, buf, count, desc, dest_addr, 			\
915 		addr, key, datatype, op, context) 			\
916 	(FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(atomic,			\
917 			FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)		\
918 	(ep, buf, count, desc, dest_addr, addr, key,			\
919 		datatype, op, context))
920 
921 #define fi_inject_atomic(ep, buf, count, dest_addr, addr, key,		\
922 		datatype, op)						\
923 	(FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(inject_atomic,		\
924 			FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)		\
925 	(ep, buf, count, dest_addr, addr, key, datatype, op))
926 
927 #define fi_fetch_atomic(ep, buf, count, desc, result, result_desc,	\
928 		dest_addr, addr, key, datatype, op, context)		\
929 	(FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(fetch_atomic,		\
930 			FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)		\
931 	 (ep, buf, count, desc, result, result_desc,			\
932 		dest_addr, addr, key, datatype, op, context))
933 
934 #define fi_compare_atomic(ep, buf, count, desc, compare, compare_desc,	\
935 		result, result_desc, dest_addr, addr, key, datatype,	\
936 		op, context)						\
937 	(FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(compare_atomic,		\
938 			FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK)		\
939 	 (ep, buf, count, desc, compare, compare_desc,			\
940 		result, result_desc, dest_addr, addr, key,		\
941 		datatype, op, context))
942 
943 static inline int
944 fi_atomicvalid(struct fid_ep *ep,
945 	       enum fi_datatype datatype, enum fi_op op, size_t *count)
946 {
947 	return ep->atomic->writevalid(ep, datatype, op, count);
948 }
949 
950 static inline int
fi_fetch_atomicvalid(struct fid_ep * ep,enum fi_datatype datatype,enum fi_op op,size_t * count)951 fi_fetch_atomicvalid(struct fid_ep *ep,
952 		     enum fi_datatype datatype, enum fi_op op, size_t *count)
953 {
954 	return ep->atomic->readwritevalid(ep, datatype, op, count);
955 }
956 
957 static inline int
fi_compare_atomicvalid(struct fid_ep * ep,enum fi_datatype datatype,enum fi_op op,size_t * count)958 fi_compare_atomicvalid(struct fid_ep *ep,
959 		       enum fi_datatype datatype, enum fi_op op, size_t *count)
960 {
961 	return ep->atomic->compwritevalid(ep, datatype, op, count);
962 }
963 
964 static inline ssize_t
fi_atomicmsg(struct fid_ep * ep,const struct fi_msg_atomic * msg,uint64_t flags)965 fi_atomicmsg(struct fid_ep *ep,
966 		const struct fi_msg_atomic *msg, uint64_t flags)
967 {
968 	return ep->atomic->writemsg(ep, msg, flags);
969 }
970 
971 static inline ssize_t
fi_fetch_atomicmsg(struct fid_ep * ep,const struct fi_msg_atomic * msg,struct fi_ioc * resultv,void ** result_desc,size_t result_count,uint64_t flags)972 fi_fetch_atomicmsg(struct fid_ep *ep,
973 		const struct fi_msg_atomic *msg,
974 		struct fi_ioc *resultv, void **result_desc, size_t result_count,
975 		uint64_t flags)
976 {
977 	return ep->atomic->readwritemsg(ep, msg, resultv, result_desc,
978 			result_count, flags);
979 }
980 
981 static inline ssize_t
fi_compare_atomicmsg(struct fid_ep * ep,const struct fi_msg_atomic * msg,const struct fi_ioc * comparev,void ** compare_desc,size_t compare_count,struct fi_ioc * resultv,void ** result_desc,size_t result_count,uint64_t flags)982 fi_compare_atomicmsg(struct fid_ep *ep, const struct fi_msg_atomic *msg,
983 		const struct fi_ioc *comparev, void **compare_desc,
984 		size_t compare_count, struct fi_ioc *resultv,
985 		void **result_desc, size_t result_count, uint64_t flags)
986 {
987 	return ep->atomic->compwritemsg(ep, msg, comparev, compare_desc,
988 		compare_count, resultv, result_desc, result_count, flags);
989 }
990 
991 #endif
992 
993 #ifdef __cplusplus
994 }
995 #endif
996 
997 #endif /* _FI_BGQ_DIRECT_ATOMIC_H_ */
998