1 /*
2  * Copyright (C) 2016 by Argonne National Laboratory.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #ifndef _FI_BGQ_DIRECT_RMA_H_
33 #define _FI_BGQ_DIRECT_RMA_H_
34 
35 #define FABRIC_DIRECT_RMA 1
36 
37 #include <pthread.h>
38 
39 #include "rdma/bgq/fi_bgq_compiler.h"
40 #include "rdma/bgq/fi_bgq_spi.h"
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45 
fi_bgq_check_rma(struct fi_bgq_ep * bgq_ep,enum fi_av_type av_type)46 static inline int fi_bgq_check_rma(struct fi_bgq_ep *bgq_ep,
47 		enum fi_av_type av_type)
48 {
49 #ifdef DEBUG
50 	if (!bgq_ep)
51 		return -FI_EINVAL;
52 	if (bgq_ep->state != FI_BGQ_EP_ENABLED)
53 		return -FI_EINVAL;
54 
55 	if (av_type == FI_AV_UNSPEC)
56 		return -FI_EINVAL;
57 	if (av_type == FI_AV_MAP && bgq_ep->av_type != FI_AV_MAP)
58 		return -FI_EINVAL;
59 	if (av_type == FI_AV_TABLE && bgq_ep->av_type != FI_AV_TABLE)
60 		return -FI_EINVAL;
61 #endif
62 	return 0;
63 }
64 
65 
fi_bgq_readv_internal(struct fi_bgq_ep * bgq_ep,const struct iovec * iov,const size_t niov,const union fi_bgq_addr * bgq_target_addr,const uint64_t * addr,const uint64_t * key,union fi_bgq_context * bgq_context,const uint64_t tx_op_flags,const uint64_t enable_cq,const uint64_t enable_cntr,const int lock_required)66 static inline void fi_bgq_readv_internal (struct fi_bgq_ep * bgq_ep,
67 		const struct iovec * iov,
68 		const size_t niov,
69 		const union fi_bgq_addr * bgq_target_addr,
70 		const uint64_t * addr,
71 		const uint64_t * key,
72 		union fi_bgq_context * bgq_context,
73 		const uint64_t tx_op_flags,
74 		const uint64_t enable_cq,
75 		const uint64_t enable_cntr,
76 		const int lock_required)
77 {
78 #ifdef FI_BGQ_TRACE
79 fprintf(stderr,"fi_bgq_readv_internal starting - niov is %ld do_cntr is %d\n",niov,(enable_cntr && ( bgq_ep->write_cntr != 0)));
80 fflush(stderr);
81 #endif
82 	assert(niov <= 8);
83 
84 	const uint64_t do_cq = enable_cq && (tx_op_flags & FI_COMPLETION);
85 
86 	struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr;
87 	const uint64_t do_cntr = enable_cntr && (write_cntr != 0);
88 
89 	MUHWI_Descriptor_t * model = &bgq_ep->tx.read.emulation.mfifo_model;
90 
91 	const uint64_t fifo_map = fi_bgq_addr_get_fifo_map(bgq_target_addr->fi);
92 
93 	/* busy-wait until a fifo slot is available .. */
94 	MUHWI_Descriptor_t * desc =
95 		fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
96 
97 	/* copy the descriptor model into the injection fifo */
98 	qpx_memcpy64((void*)desc, (const void *)model);
99 
100 	/* set the target torus address and fifo map */
101 	desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_target_addr->uid.fi);
102 	desc->Torus_FIFO_Map = fifo_map;
103 
104 	/* locate the payload lookaside slot */
105 	MUHWI_Descriptor_t * dput_desc =
106 		(MUHWI_Descriptor_t *)fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
107 			desc, &desc->Pa_Payload);
108 	desc->Message_Length = (niov << BGQ_MU_DESCRIPTOR_SIZE_IN_POWER_OF_2);
109 
110 
111 	desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
112 	fi_bgq_addr_rec_fifo_id(bgq_target_addr->fi);
113 
114 	union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
115 	hdr->rma.ndesc = niov;
116 
117 	/* TODO - how to specify multiple remote injection fifos? */
118 
119 	union fi_bgq_mu_descriptor * fi_dput_desc = (union fi_bgq_mu_descriptor *) dput_desc;
120 
121 	unsigned i;
122 	for (i = 0; i < niov; ++i) {	/* on fence this loop will compile out (niov is 0) */
123 
124 		qpx_memcpy64((void*)&dput_desc[i],
125 			(const void*)&bgq_ep->tx.read.emulation.dput_model);
126 
127 		dput_desc[i].Torus_FIFO_Map = fifo_map;
128 		dput_desc[i].Message_Length = iov[i].iov_len;
129 		dput_desc[i].Pa_Payload = addr[i];
130 
131 		/* determine the physical address of the destination data location */
132 		uint64_t iov_base_paddr = 0;
133 		uint32_t cnk_rc __attribute__ ((unused));
134 		cnk_rc = fi_bgq_cnk_vaddr2paddr(iov[i].iov_base, iov[i].iov_len, &iov_base_paddr);
135 		assert(cnk_rc==0);
136 		MUSPI_SetRecPayloadBaseAddressInfo(&dput_desc[i], FI_BGQ_MU_BAT_ID_GLOBAL, iov_base_paddr);
137 
138 		assert((key[i] & 0xFFFF000000000000ul) == 0);	/* TODO - change this when key size > 48b */
139 		fi_dput_desc[i].rma.key_lsb = key[i];
140 	}
141 
142 	if (do_cntr && niov < 8) {	/* likely */
143 #ifdef FI_BGQ_TRACE
144 fprintf(stderr,"fi_bgq_readv_internal do_cntr && niov %ld < 8\n",niov);
145 fflush(stderr);
146 #endif
147 		/* add the counter update direct-put descriptor to the
148 		 * tail of the rget/mfifo payload */
149 
150 		qpx_memcpy64((void*)&dput_desc[niov],
151 			(const void*)&bgq_ep->tx.read.cntr_model);
152 
153 		dput_desc[niov].Torus_FIFO_Map = fifo_map;
154 		MUSPI_SetRecPayloadBaseAddressInfo(&dput_desc[niov],
155 			FI_BGQ_MU_BAT_ID_GLOBAL,
156 			MUSPI_GetAtomicAddress(write_cntr->std.paddr, MUHWI_ATOMIC_OPCODE_STORE_ADD));
157 
158 		desc->Message_Length += sizeof(MUHWI_Descriptor_t);
159 		union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
160 		hdr->rma.ndesc += 1;
161 
162 		if (!do_cq) {	/* likely */
163 
164 #ifdef FI_BGQ_TRACE
165 fprintf(stderr,"fi_bgq_readv_internal do_cntr && niov < 8 AND (!do_cq)\n");
166 fflush(stderr);
167 #endif
168 			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
169 
170 		} else 	if (niov < 7) {
171 
172 			/* add the cq update direct-put descriptor to the
173 			 * tail of the rget/mfifo payload (after the cntr update) */
174 
175 			/* initialize the completion entry */
176 			assert(bgq_context);
177 			assert(((uintptr_t)bgq_context & 0x07ull) == 0);	/* must be 8 byte aligned */
178 			bgq_context->flags = FI_RMA | FI_READ;
179 			bgq_context->len = 0;
180 			bgq_context->buf = NULL;
181 			bgq_context->byte_counter = 1;
182 			bgq_context->tag = 0;
183 
184 			uint64_t byte_counter_paddr = 0;
185 			uint32_t cnk_rc __attribute__ ((unused));
186 			cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter,
187 						sizeof(uint64_t), &byte_counter_paddr);
188 			assert(cnk_rc == 0);
189 
190 			MUHWI_Descriptor_t * cq_desc = &dput_desc[niov+1];
191 
192 			qpx_memcpy64((void*)cq_desc,
193 				(const void*)&bgq_ep->tx.read.cq_model);
194 
195 			cq_desc->Torus_FIFO_Map = fifo_map;
196 			MUSPI_SetRecPayloadBaseAddressInfo(cq_desc,
197 				FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr);
198 
199 			desc->Message_Length += sizeof(MUHWI_Descriptor_t);
200 			union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
201 			hdr->rma.ndesc += 1;
202 
203 			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
204 
205 			fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required);
206 
207 		} else {
208 
209 			/* the rget/mfifo payload is full - inject the data
210 			 * movement descriptors, then inject the counter
211 			 * completion descriptor */
212 			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
213 
214 			/* be lazy and do a single recursive call */
215 			fi_bgq_readv_internal(bgq_ep,
216 				NULL, 0,		/* no iovec array */
217 				bgq_target_addr,
218 				NULL, NULL,		/* no addr array, no key array */
219 				bgq_context, tx_op_flags,
220 				1,			/* enable cq */
221 				0,			/* disable cntr */
222 				lock_required);
223 		}
224 
225 	} else if (do_cntr) {	/* unlikely */
226 
227 		/* the rget/mfifo payload is full - inject the data
228 		 * movement descriptors, then inject any counter or cq
229 		 * completion descriptor(s) via a recursive call */
230 		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
231 
232 		fi_bgq_readv_internal(bgq_ep,
233 			NULL, 0,		/* no iovec array */
234 			bgq_target_addr,
235 			NULL, NULL,		/* no addr array, no key array */
236 			bgq_context, tx_op_flags,
237 			do_cq,
238 			1,			/* enable cntr */
239 			lock_required);
240 
241 	} else if (do_cq && niov < 8) {
242 
243 		/* no cntr completion
244 		 *
245 		 * add the cq byte counter decrement direct-put
246 		 * descriptor to the tail of the rget/mfifo payload */
247 
248 		/* initialize the completion entry */
249 		assert(bgq_context);
250 		assert(((uintptr_t)bgq_context & 0x07ull) == 0);	/* must be 8 byte aligned */
251 		bgq_context->flags = FI_RMA | FI_READ;
252 		bgq_context->len = 0;
253 		bgq_context->buf = NULL;
254 		bgq_context->byte_counter = 1;
255 		bgq_context->tag = 0;
256 
257 		uint64_t byte_counter_paddr = 0;
258 		uint32_t cnk_rc __attribute__ ((unused));
259 		cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter,
260 				sizeof(uint64_t), &byte_counter_paddr);
261 		assert(cnk_rc == 0);
262 
263 		MUHWI_Descriptor_t * cq_desc = &dput_desc[niov];
264 
265 		qpx_memcpy64((void*)cq_desc,
266 			(const void*)&bgq_ep->tx.read.cq_model);
267 
268 		cq_desc->Torus_FIFO_Map = fifo_map;
269 		MUSPI_SetRecPayloadBaseAddressInfo(cq_desc,
270 			FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr);
271 
272 		desc->Message_Length += sizeof(MUHWI_Descriptor_t);
273 		union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
274 		hdr->rma.ndesc += 1;
275 
276 		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
277 
278 		fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required);
279 
280 	} else if (do_cq) {
281 
282 		/* the rget/mfifo payload is full - inject the data
283 		 * movement descriptors, then inject the cq completion
284 		 * descriptor via a recursive call */
285 		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
286 
287 		fi_bgq_readv_internal(bgq_ep,
288 			NULL, 0,		/* no iovec array */
289 			bgq_target_addr,
290 			NULL, NULL,		/* no addr array, no key array */
291 			bgq_context, tx_op_flags,
292 			1,	/* enable cq */
293 			0,	/* disable cntr */
294 			lock_required);
295 
296 	} else {
297 		/* no cntr and no cq? very unlikely, if not invalid */
298 
299 		/* if there are no completion operations then there *must* be
300 		 * at least one data movement operations */
301 		assert(niov > 0);
302 
303 		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
304 	}
305 }
306 
fi_bgq_inject_write_generic(struct fid_ep * ep,const void * buf,size_t len,fi_addr_t dst_addr,uint64_t addr,uint64_t key,int lock_required)307 static inline ssize_t fi_bgq_inject_write_generic(struct fid_ep *ep,
308 		const void *buf, size_t len, fi_addr_t dst_addr,
309 		uint64_t addr, uint64_t key,
310 		int lock_required)
311 {
312 #ifdef FI_BGQ_TRACE
313         fprintf(stderr,"fi_bgq_inject_write_generic starting\n");
314 #endif
315 	int			ret;
316 	struct fi_bgq_ep	*bgq_ep;
317 
318 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
319 
320 	ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV);
321 	if (ret) return ret;
322 
323 //	if (av_type == FI_AV_TABLE)
324 //		dst_addr = bgq_ep->av->table[(size_t)dst_addr];
325 
326 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
327 	if (ret) return ret;
328 
329 	MUHWI_Descriptor_t * model =
330 		(FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) ?
331 			&bgq_ep->tx.write.direct.dput_model :
332 			&bgq_ep->tx.write.emulation.mfifo_model;
333 
334 	/*
335 	 * busy-wait until a fifo slot is available ..
336 	 */
337 	MUHWI_Descriptor_t * desc =
338 		fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
339 
340 	/* copy the descriptor model into the injection fifo */
341 	qpx_memcpy64((void*)desc, (const void *)model);
342 
343 	/* set the destination torus address and fifo map */
344 	union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&dst_addr;
345 	desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi);
346 	desc->Torus_FIFO_Map = fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi);
347 	desc->Message_Length = len;
348 
349 	/* locate the payload lookaside slot */
350 	void * payload =
351 		fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
352 			desc, &desc->Pa_Payload);
353 	assert(len <= sizeof(union fi_bgq_mu_packet_payload));
354 	memcpy(payload, buf, len);
355 
356 	if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) {		/* branch will compile out */
357 #ifdef FI_BGQ_TRACE
358         fprintf(stderr,"fi_bgq_inject_write_generic - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu  \n",addr,(addr-key),key);
359 #endif
360 
361 		/* the 'key' is the paddr of the remote memory region */
362 		MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key);
363 
364 	} else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) {	/* branch will compile out */
365 
366 		desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
367 			fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi);
368 
369 		/* the 'key' is used to index into the remote base address table */
370 		union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
371 		hdr->rma.key = key;
372 		hdr->rma.offset = addr;
373 		hdr->rma.nbytes = len;
374 		hdr->rma.ndesc = 0;
375 
376 	} else {
377 		assert(0);
378 	}
379 
380 	/* the src buffer is available for reuse - increment the endpoint counter */
381 	struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr;
382 	if (write_cntr) L2_AtomicStoreAdd(write_cntr->std.l2_vaddr, 1);
383 
384 	MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
385 
386 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
387 	if (ret) return ret;
388 
389 	return 0;
390 }
391 
fi_bgq_write_fence(struct fi_bgq_ep * bgq_ep,const uint64_t tx_op_flags,const union fi_bgq_addr * bgq_dst_addr,union fi_bgq_context * bgq_context,const int lock_required)392 static inline void fi_bgq_write_fence (struct fi_bgq_ep * bgq_ep,
393 		const uint64_t tx_op_flags,
394 		const union fi_bgq_addr * bgq_dst_addr,
395 		union fi_bgq_context * bgq_context,
396 		const int lock_required)
397 {
398 	fi_bgq_readv_internal(bgq_ep,
399 		NULL, 0,		/* no iovec array */
400 		bgq_dst_addr,
401 		NULL, NULL,		/* no addr array, key array */
402 		bgq_context, tx_op_flags,
403 		1,
404 		1,
405 		lock_required);
406 }
407 
fi_bgq_write_internal(struct fi_bgq_ep * bgq_ep,const void * buf,size_t len,const union fi_bgq_addr * bgq_dst_addr,uint64_t addr,const uint64_t key,union fi_bgq_context * bgq_context,const uint64_t tx_op_flags,const uint64_t enable_cq,const uint64_t enable_cntr,const int lock_required)408 static inline void fi_bgq_write_internal (struct fi_bgq_ep * bgq_ep,
409 		const void * buf,
410 		size_t len,
411 		const union fi_bgq_addr * bgq_dst_addr,
412 		uint64_t addr,
413 		const uint64_t key,
414 		union fi_bgq_context * bgq_context,
415 		const uint64_t tx_op_flags,
416 		const uint64_t enable_cq,
417 		const uint64_t enable_cntr,
418 		const int lock_required)
419 {
420 
421 #ifdef FI_BGQ_TRACE
422         fprintf(stderr,"fi_bgq_write_internal starting\n");
423 #endif
424 	const uint64_t do_cq = enable_cq && ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION);
425 
426 	struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr;
427 	const uint64_t do_cntr = enable_cntr && (write_cntr != 0);
428 
429 	MUHWI_Descriptor_t * model =
430 		(FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) ?
431 			&bgq_ep->tx.write.direct.dput_model :
432 			&bgq_ep->tx.write.emulation.mfifo_model;
433 
434 	/* busy-wait until a fifo slot is available .. */
435 	MUHWI_Descriptor_t * desc =
436 		fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
437 
438 	/* copy the descriptor model into the injection fifo */
439 	qpx_memcpy64((void*)desc, (const void *)model);
440 
441 	/* set the destination torus address and fifo map */
442 	desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi);
443 	desc->Torus_FIFO_Map = fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi);
444 
445 	if (tx_op_flags & FI_INJECT) {	/* unlikely */
446 
447 		assert(len <= sizeof(union fi_bgq_mu_packet_payload));
448 
449 		/* locate the payload lookaside slot */
450 		void * payload =
451 			fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
452 				desc, &desc->Pa_Payload);
453 
454 		memcpy(payload, buf, len);
455 		desc->Message_Length = len;
456 
457 		if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) {		/* branch will compile out */
458 
459 #ifdef FI_BGQ_TRACE
460         fprintf(stderr,"fi_bgq_write_internal tx_op_flags & FI_INJECT - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu  \n",addr,(addr-key),key);
461 #endif
462 			/* the 'key' is the paddr of the remote memory region */
463 			MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key);
464 
465 		} else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) {	/* branch will compile out */
466 
467 			desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
468 				fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi);
469 
470 			/* the 'key' is used to index into the remote base address table */
471 			union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
472 			hdr->rma.key = key;
473 			hdr->rma.offset = addr;
474 			hdr->rma.nbytes = len;
475 			hdr->rma.ndesc = 0;
476 
477 		} else {
478 			assert(0);
479 		}
480 
481 		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
482 
483 		/* FI_TRANSMIT_COMPLETE and FI_DELIVERY_COMPLETE are not supported */
484 		assert((tx_op_flags & (FI_COMPLETION | FI_TRANSMIT_COMPLETE)) != (FI_COMPLETION | FI_TRANSMIT_COMPLETE));
485 		assert((tx_op_flags & (FI_COMPLETION | FI_DELIVERY_COMPLETE)) != (FI_COMPLETION | FI_DELIVERY_COMPLETE));
486 
487 		if (do_cq) {
488 
489 			assert(bgq_context);
490 			assert(((uintptr_t)bgq_context & 0x07ull) == 0);	/* must be 8 byte aligned */
491 			bgq_context->flags = FI_RMA | FI_WRITE;
492 			bgq_context->len = 0;
493 			bgq_context->buf = NULL;
494 			bgq_context->byte_counter = 0;
495 			bgq_context->tag = 0;
496 
497 			fi_bgq_cq_enqueue_completed(bgq_ep->send_cq, bgq_context, lock_required);
498 		}
499 
500 		/* the src buffer is available for reuse - increment the endpoint counter */
501 		if (do_cntr) L2_AtomicStoreAdd(write_cntr->std.l2_vaddr, 1);
502 
503 	} else {
504 		size_t xfer_bytes = MIN(len, sizeof(union fi_bgq_mu_packet_payload));
505 
506 		if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) {		/* branch will compile out */
507 
508 #ifdef FI_BGQ_TRACE
509         fprintf(stderr,"fi_bgq_write_internal - NOT tx_op_flags & FI_INJECT - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu  \n",addr,(addr-key),key);
510 #endif
511 			/* the 'key' is the paddr of the remote memory region */
512 			MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key);
513 
514 		} else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) {	/* branch will compile out */
515 
516 			desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
517 				fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi);
518 
519 			/* the 'key' is used to index into the remote base address table */
520 			union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
521 			hdr->rma.key = key;
522 			hdr->rma.offset = addr;
523 			hdr->rma.nbytes = xfer_bytes;
524 			hdr->rma.ndesc = 0;
525 
526 		} else {
527 			assert(0);
528 		}
529 
530 		/* determine the physical address of the source data */
531 		uint64_t src_paddr = 0;
532 		uint32_t cnk_rc __attribute__ ((unused));
533 		cnk_rc = fi_bgq_cnk_vaddr2paddr(buf, len, &src_paddr);
534 		assert(cnk_rc==0);
535 
536 		desc->Message_Length = xfer_bytes;
537 		desc->Pa_Payload = src_paddr;
538 
539 		if (len <= sizeof(union fi_bgq_mu_packet_payload)) {	/* likely */
540 
541 			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
542 
543 		} else {
544 
545 			MUHWI_Descriptor_t model = *desc;
546 			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
547 
548 			src_paddr += xfer_bytes;
549 			len -= xfer_bytes;
550 			addr += xfer_bytes;
551 
552 			while (len > 0) {
553 				desc = fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);
554 
555 				qpx_memcpy64((void*)desc, (const void*)&model);
556 
557 				xfer_bytes = MIN(len, sizeof(union fi_bgq_mu_packet_payload));
558 				desc->Message_Length = xfer_bytes;
559 				desc->Pa_Payload = src_paddr;
560 
561 				union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
562 				if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) {
563 #ifdef FI_BGQ_TRACE
564         fprintf(stderr,"fi_bgq_write_internal for multiple packets - NOT tx_op_flags & FI_INJECT - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu  \n",addr,(addr-key),key);
565 #endif
566 					/* the 'key' is the paddr of the remote memory region */
567 					MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key);
568 
569 				}
570 				else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) {
571 					hdr->rma.offset = addr;
572 					hdr->rma.nbytes = xfer_bytes;
573 				}
574 				else {
575                 		        assert(0);
576 		                }
577 
578 
579 				MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
580 
581 				src_paddr += xfer_bytes;
582 				len -= xfer_bytes;
583 				addr += xfer_bytes;
584 			}
585 		}
586 
587 		if (do_cq || do_cntr)
588 			fi_bgq_readv_internal(bgq_ep, NULL, 0, bgq_dst_addr,
589 				NULL, NULL, bgq_context,
590 				tx_op_flags, do_cq, do_cntr, lock_required);
591 	}
592 }
593 
594 
595 
596 
597 
fi_bgq_write_generic(struct fid_ep * ep,const void * buf,size_t len,void * desc,fi_addr_t dst_addr,uint64_t addr,uint64_t key,void * context,int lock_required)598 static inline ssize_t fi_bgq_write_generic(struct fid_ep *ep,
599 		const void *buf, size_t len, void *desc, fi_addr_t dst_addr,
600 		uint64_t addr, uint64_t key, void *context,
601 		int lock_required)
602 {
603 	int			ret;
604 	struct fi_bgq_ep	*bgq_ep;
605 
606 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
607 
608 	ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV);
609 	if (ret) return ret;
610 
611 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
612 	if (ret) return ret;
613 
614 	fi_bgq_write_internal(bgq_ep, buf, len, (union fi_bgq_addr *)&dst_addr,
615 		addr, key, (union fi_bgq_context *)context,
616 		bgq_ep->tx.op_flags, 1, 1, lock_required);
617 
618 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
619 	if (ret) {
620 		return ret;
621 	}
622 
623 	return 0;
624 }
625 
fi_bgq_writev_generic(struct fid_ep * ep,const struct iovec * iov,void ** desc,size_t count,fi_addr_t dst_addr,uint64_t addr,uint64_t key,void * context,int lock_required)626 static inline ssize_t fi_bgq_writev_generic(struct fid_ep *ep,
627 		const struct iovec *iov, void **desc, size_t count,
628 		fi_addr_t dst_addr, uint64_t addr, uint64_t key, void *context,
629 		int lock_required)
630 {
631 	int			ret;
632 	struct fi_bgq_ep	*bgq_ep;
633 
634 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
635 
636 	ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV);
637 	if (ret) return ret;
638 
639 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
640 	if (ret) return ret;
641 
642 	const union fi_bgq_addr bgq_dst_addr = *((union fi_bgq_addr *)&dst_addr);
643 
644 	size_t index = 0;
645 	for (index = 0; index < count; ++index) {
646 
647 		size_t len = iov[index].iov_len;
648 		void * buf = iov[index].iov_base;
649 
650 		fi_bgq_write_internal(bgq_ep, buf, len, &bgq_dst_addr,
651 			addr, key, (union fi_bgq_context *)context,
652 			0, 0, 0, lock_required);
653 
654 		addr += len;
655 	}
656 
657 	fi_bgq_write_fence(bgq_ep, bgq_ep->tx.op_flags, &bgq_dst_addr, (union fi_bgq_context *)context,
658 		lock_required);
659 
660 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
661 	if (ret) return ret;
662 
663 	return 0;
664 }
665 
666 
fi_bgq_writemsg_generic(struct fid_ep * ep,const struct fi_msg_rma * msg,uint64_t flags,int lock_required)667 static inline ssize_t fi_bgq_writemsg_generic(struct fid_ep *ep,
668 		const struct fi_msg_rma *msg, uint64_t flags,
669 		int lock_required)
670 {
671 	int			ret;
672 	struct fi_bgq_ep	*bgq_ep;
673 
674 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
675 
676 	ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV);
677 	if (ret) return ret;
678 
679 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
680 	if (ret) return ret;
681 
682 	union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr;
683 
684 
685 	size_t rma_iov_index = 0;
686 	const size_t rma_iov_count = msg->rma_iov_count;
687 	uint64_t rma_iov_bytes = msg->rma_iov[rma_iov_index].len;
688 	uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
689 	uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key;
690 
691 	size_t msg_iov_index = 0;
692 	const size_t msg_iov_count = msg->iov_count;
693 	uint64_t msg_iov_bytes = msg->msg_iov[msg_iov_index].iov_len;
694 	uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].iov_base;
695 
696 #ifdef FI_BGQ_TRACE
697 fprintf(stderr,"fi_bgq_writemsg_generic msg_iov_bytes is %lu rma_iov_bytes is %lu base vadder is 0x%016lx lock_required is %d\n",msg_iov_bytes,rma_iov_bytes,msg_iov_vaddr,lock_required);
698 fflush(stderr);
699 #endif
700 	while (msg_iov_bytes != 0 && rma_iov_bytes != 0) {
701 
702 		size_t len = (msg_iov_bytes <= rma_iov_bytes) ? msg_iov_bytes : rma_iov_bytes;
703 
704 #ifdef FI_BGQ_TRACE
705 fprintf(stderr,"fi_bgq_writemsg_generic calling fi_bgq_write_internal with msg_iov_vaddr 0x%016lx and len %lu\n",msg_iov_vaddr,len);
706 fflush(stderr);
707 #endif
708 		fi_bgq_write_internal(bgq_ep, (void*)msg_iov_vaddr, len, bgq_dst_addr,
709 			rma_iov_addr, rma_iov_key, NULL, 0, 0, 0, lock_required);
710 
711 		msg_iov_bytes -= len;
712 		msg_iov_vaddr += len;
713 
714 		if ((msg_iov_bytes == 0) && ((msg_iov_index+1) < msg_iov_count)) {
715 			++msg_iov_index;
716 			msg_iov_bytes = msg->msg_iov[msg_iov_index].iov_len;
717 			msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].iov_base;
718 		}
719 
720 		rma_iov_bytes -= len;
721 		rma_iov_addr  += len;
722 
723 		if ((rma_iov_bytes == 0) && ((rma_iov_index+1) < rma_iov_count)) {
724 			++rma_iov_index;
725 			rma_iov_bytes = msg->rma_iov[rma_iov_index].len;
726 			rma_iov_addr = msg->rma_iov[rma_iov_index].addr;
727 			rma_iov_key = msg->rma_iov[rma_iov_index].key;
728 		}
729 	}
730 
731 #ifdef FI_BGQ_TRACE
732 fprintf(stderr,"fi_bgq_writemsg_generic calling fi_bgq_write_fence\n");
733 fflush(stderr);
734 #endif
735 	fi_bgq_write_fence(bgq_ep, flags, bgq_dst_addr,
736 		(union fi_bgq_context *)msg->context,
737 		lock_required);
738 
739 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
740 	if (ret) return ret;
741 
742 	return 0;
743 }
744 
745 
fi_bgq_read_generic(struct fid_ep * ep,void * buf,size_t len,void * desc,fi_addr_t src_addr,uint64_t addr,uint64_t key,void * context,int lock_required)746 static inline ssize_t fi_bgq_read_generic(struct fid_ep *ep,
747 		void *buf, size_t len, void *desc, fi_addr_t src_addr,
748 		uint64_t addr, uint64_t key, void *context,
749 		int lock_required)
750 {
751 	int			ret;
752 	struct fi_bgq_ep	*bgq_ep;
753 
754 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
755 
756 	ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV);
757 	if (ret) return ret;
758 
759 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
760 	if (ret) return ret;
761 
762 	struct iovec iov;
763 	iov.iov_base = buf;
764 	iov.iov_len = len;
765 
766 	fi_bgq_readv_internal(bgq_ep, &iov, 1, (union fi_bgq_addr *)&src_addr,
767 		&addr, &key, (union fi_bgq_context *)context,
768 		bgq_ep->tx.op_flags, 1, 1, lock_required);
769 
770 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
771 	if (ret)
772 		return ret;
773 
774 	return 0;
775 }
776 
fi_bgq_readv_generic(struct fid_ep * ep,const struct iovec * iov,void ** desc,size_t count,fi_addr_t src_addr,uint64_t addr,uint64_t key,void * context,int lock_required)777 static inline ssize_t fi_bgq_readv_generic (struct fid_ep *ep,
778 		const struct iovec *iov, void **desc, size_t count,
779 		fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context,
780 		int lock_required)
781 {
782 
783 #ifdef FI_BGQ_TRACE
784 fprintf(stderr,"fi_bgq_readv_generic count is %lu addr is 0x%016lx key is 0x%016lx\n",count,addr,key);
785 fflush(stderr);
786 #endif
787 
788 	int			ret;
789 	struct fi_bgq_ep	*bgq_ep;
790 
791 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
792 
793 	ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV);
794 	if (ret) return ret;
795 
796 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
797 	if (ret) return ret;
798 
799 	union fi_bgq_addr * bgq_addr = (union fi_bgq_addr *)&src_addr;
800 	union fi_bgq_context * bgq_context = (union fi_bgq_context *)context;
801 	const uint64_t tx_op_flags = bgq_ep->tx.op_flags;
802 
803 	uint64_t addr_v[8] = { addr, addr, addr, addr, addr, addr, addr, addr };
804 	uint64_t key_v[8] = { key, key, key, key, key, key, key, key };
805 
806 	/* max 8 descriptors (iovecs) per readv_internal */
807 	size_t index = 0;
808 	const size_t full_count = count >> 3;
809 	for (index = 0; index < full_count; index += 8) {
810 
811 		fi_bgq_readv_internal(bgq_ep, &iov[index], 8, bgq_addr,
812 			addr_v, key_v, NULL, 0, 0, 0,
813 			lock_required);
814 	}
815 
816 	/* if 'partial_ndesc' is zero, the fi_bgq_readv_internal() will fence */
817 	const size_t partial_ndesc = count & 0x07ull;
818 	fi_bgq_readv_internal(bgq_ep, &iov[index], partial_ndesc, bgq_addr,
819 		addr_v, key_v, bgq_context, tx_op_flags, 1, 1,
820 		lock_required);
821 
822 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
823 	if (ret)
824 		return ret;
825 
826 	return 0;
827 }
828 
829 
fi_bgq_readmsg_generic(struct fid_ep * ep,const struct fi_msg_rma * msg,uint64_t flags,int lock_required)830 static inline ssize_t fi_bgq_readmsg_generic(struct fid_ep *ep,
831 		const struct fi_msg_rma *msg, uint64_t flags,
832 		int lock_required)
833 {
834 #ifdef FI_BGQ_TRACE
835 fprintf(stderr,"fi_bgq_readmsg_generic starting\n");
836 fflush(stderr);
837 #endif
838 	int			ret;
839 	struct fi_bgq_ep	*bgq_ep;
840 
841 	bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
842 
843 	ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV);
844 	if (ret) return ret;
845 
846 	ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required);
847 	if (ret) return ret;
848 
849 	struct fi_bgq_cq * cq = bgq_ep->send_cq;
850 	const uint64_t enable_cq =
851 		(cq == NULL) || ((cq != NULL) && ((cq->bflags & FI_SELECTIVE_COMPLETION) && (flags & FI_COMPLETION) == 0)) ? 0 : 1;
852 
853 	union fi_bgq_context * bgq_context = (union fi_bgq_context *) msg->context;
854 	union fi_bgq_addr * bgq_src_addr = (union fi_bgq_addr *)&msg->addr;
855 
856 	/* for fi_read*(), the 'src' is the remote data */
857 	size_t src_iov_index = 0;
858 	const size_t src_iov_count = msg->rma_iov_count;
859 	uint64_t src_iov_bytes = msg->rma_iov[0].len;
860 	uint64_t src_iov_addr = msg->rma_iov[0].addr;
861 	uint64_t src_iov_key = msg->rma_iov[0].key;
862 
863 	/* for fi_read*(), the 'dst' is the local data */
864 	size_t dst_iov_index = 0;
865 	const size_t dst_iov_count = msg->iov_count;
866 	uint64_t dst_iov_bytes = msg->msg_iov[0].iov_len;
867 	void * dst_iov_vaddr = msg->msg_iov[0].iov_base;
868 
869 	size_t niov;
870 	struct iovec iov[8];
871 	uint64_t addr[8];
872 	uint64_t key[8];
873 
874 	while (src_iov_index < src_iov_count) {
875 
876 		for (niov = 0; niov < 8; ++niov) {
877 			const size_t len = (dst_iov_bytes <= src_iov_bytes) ? dst_iov_bytes : src_iov_bytes;
878 			iov[niov].iov_len = len;
879 			iov[niov].iov_base = dst_iov_vaddr;
880 			addr[niov] = src_iov_addr;
881 			key[niov] = src_iov_key;
882 
883 			dst_iov_bytes -= len;
884 			src_iov_bytes -= len;
885 
886 			if (src_iov_bytes == 0) {
887 
888 				/* all done with this src rma iovec */
889 
890 				if (src_iov_index == (src_iov_count-1)) {
891 
892 					/* this is the last src rma iovec .. perform
893 					 * read with completion processing and return
894 					 *
895 					 * the 'dst_iov_bytes' must be zero and it must
896 					 * be the last dst iovec as well */
897 					assert(dst_iov_bytes==0);
898 					assert(dst_iov_index == (dst_iov_count-1));
899 
900 					fi_bgq_readv_internal(bgq_ep, iov, niov+1,
901 						bgq_src_addr, addr, key,
902 						bgq_context,
903 						flags,
904 						enable_cq, 1,				/* enable_cq, enable_cntr */
905 						lock_required);
906 
907 					ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
908 					if (ret) return ret;
909 
910 					return 0;
911 
912 				} else {
913 
914 					/* advance to next src rma iovec */
915 					++src_iov_index;
916 					src_iov_bytes = msg->rma_iov[src_iov_index].len;
917 					src_iov_addr = msg->rma_iov[src_iov_index].addr;
918 					src_iov_key = msg->rma_iov[src_iov_index].key;
919 				}
920 			} else {
921 				src_iov_addr += len;
922 			}
923 
924 
925 			if (dst_iov_bytes == 0) {
926 
927 				/* all done with this dst iovec */
928 
929 				if (dst_iov_index == (dst_iov_count-1)) {
930 					/* this is the last dst iovec .. do nothing since
931 					 * the 'src_iov_bytes' must be zero and it must
932 					 * be the last src rma iovec as well */
933 					assert(src_iov_bytes==0);
934 					assert(src_iov_index == (src_iov_count-1));
935 
936 					/* in fact, it should be impossible to get here */
937 					assert(0);
938 				} else {
939 
940 					/* advance to next dst iovec */
941 					++dst_iov_index;
942 					dst_iov_bytes = msg->msg_iov[dst_iov_index].iov_len;
943 					dst_iov_vaddr = msg->msg_iov[dst_iov_index].iov_base;
944 				}
945 			} else {
946 				dst_iov_vaddr = (void*)((uintptr_t)dst_iov_vaddr + len);
947 			}
948 
949 
950 		}	/* end for */
951 
952 		fi_bgq_readv_internal(bgq_ep, iov, 8, bgq_src_addr, addr, key,
953 			NULL, 0,
954 			0, 0,	/* disable_cq, disable_cntr */
955 			lock_required);
956 
957 	}	/* end while */
958 
959 	/* should never get here */
960 	assert(0);
961 
962 	ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required);
963 	if (ret) return ret;
964 
965 	return 0;
966 }
967 
968 
969 /* Declare specialized functions that qualify for FABRIC_DIRECT.
970  * - No locks
971  */
972 
973 #define FI_BGQ_RMA_FABRIC_DIRECT_LOCK	0
974 
FI_BGQ_RMA_SPECIALIZED_FUNC(FI_BGQ_RMA_FABRIC_DIRECT_LOCK)975 FI_BGQ_RMA_SPECIALIZED_FUNC(FI_BGQ_RMA_FABRIC_DIRECT_LOCK)
976 
977 #ifdef FABRIC_DIRECT
978 
979 #define fi_write(ep, buf, len, desc, dst_addr, addr, key, context)	\
980 	(FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(write,			\
981 			FI_BGQ_RMA_FABRIC_DIRECT_LOCK)			\
982 	(ep, buf, len, desc, dst_addr, addr, key, context))
983 
984 #define fi_inject_write(ep, buf, len, dst_addr, addr, key)		\
985 	(FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(inject_write,			\
986 			FI_BGQ_RMA_FABRIC_DIRECT_LOCK)			\
987 			(ep, buf, len, dst_addr, addr, key))
988 
989 #define fi_read(ep, buf, len, desc, src_addr, addr, key, context)	\
990 	(FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(read,				\
991 			FI_BGQ_RMA_FABRIC_DIRECT_LOCK)			\
992 			(ep, buf, len, desc, src_addr, addr, key, context))
993 
994 #define fi_readmsg(ep, msg, flags)					\
995 	(FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(readmsg,			\
996 			FI_BGQ_RMA_FABRIC_DIRECT_LOCK)			\
997 			(ep, msg, flags))
998 
999 static inline ssize_t
1000 fi_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags)
1001 {
1002 	return ep->rma->writemsg(ep, msg, flags);
1003 }
1004 static inline ssize_t
fi_writev(struct fid_ep * ep,const struct iovec * iov,void ** desc,size_t count,fi_addr_t dest_addr,uint64_t addr,uint64_t key,void * context)1005 fi_writev(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count,
1006 		fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context)
1007 {
1008 	return ep->rma->writev(ep, iov, desc, count, dest_addr, addr, key, context);
1009 }
1010 
1011 #endif
1012 
1013 #ifdef __cplusplus
1014 }
1015 #endif
1016 
1017 #endif /* _FI_BGQ_DIRECT_RMA_H_ */
1018