1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #ifndef OFI_IMPL_H_INCLUDED
7 #define OFI_IMPL_H_INCLUDED
8
9 #include <mpidimpl.h>
10 #include "ofi_types.h"
11 #include "mpidch4r.h"
12 #include "mpidig_am.h"
13 #include "ch4_impl.h"
14
15 #define MPIDI_OFI_ENAVAIL -1 /* OFI resource not available */
16 #define MPIDI_OFI_EPERROR -2 /* OFI endpoint error */
17
18 #define MPIDI_OFI_DT(dt) ((dt)->dev.netmod.ofi)
19 #define MPIDI_OFI_OP(op) ((op)->dev.netmod.ofi)
20 #define MPIDI_OFI_COMM(comm) ((comm)->dev.ch4.netmod.ofi)
21 #define MPIDI_OFI_COMM_TO_INDEX(comm,rank) \
22 MPIDIU_comm_rank_to_pid(comm, rank, NULL, NULL)
23 #define MPIDI_OFI_TO_PHYS(avtid, lpid) \
24 MPIDI_OFI_AV(&MPIDIU_get_av((avtid), (lpid))).dest[0][0]
25
26 #define MPIDI_OFI_WIN(win) ((win)->dev.netmod.ofi)
27
28 int MPIDI_OFI_progress(int vci, int blocking);
29
30 /* vni mapping */
31 /* NOTE: concerned by the modulo? If we restrict num_vnis to power of 2,
32 * we may get away with bit mask */
MPIDI_OFI_get_vni(int flag,MPIR_Comm * comm_ptr,int src_rank,int dst_rank,int tag)33 MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_get_vni(int flag, MPIR_Comm * comm_ptr,
34 int src_rank, int dst_rank, int tag)
35 {
36 return MPIDI_get_vci(flag, comm_ptr, src_rank, dst_rank, tag) % MPIDI_OFI_global.num_vnis;
37 }
38
39 /*
40 * Helper routines and macros for request completion
41 */
42 #define MPIDI_OFI_PROGRESS(vni) \
43 do { \
44 mpi_errno = MPIDI_OFI_progress(vni, 0); \
45 MPIR_ERR_CHECK(mpi_errno); \
46 MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); \
47 } while (0)
48
49 #define MPIDI_OFI_PROGRESS_WHILE(cond, vni) \
50 while (cond) MPIDI_OFI_PROGRESS(vni)
51
52 #define MPIDI_OFI_ERR MPIR_ERR_CHKANDJUMP4
53 #define MPIDI_OFI_CALL(FUNC,STR) \
54 do { \
55 ssize_t _ret = FUNC; \
56 MPIDI_OFI_ERR(_ret<0, \
57 mpi_errno, \
58 MPI_ERR_OTHER, \
59 "**ofid_"#STR, \
60 "**ofid_"#STR" %s %d %s %s", \
61 __SHORT_FILE__, \
62 __LINE__, \
63 __func__, \
64 fi_strerror(-_ret)); \
65 } while (0)
66
67 #define MPIDI_OFI_CALL_RETRY(FUNC,vci_,STR,EAGAIN) \
68 do { \
69 ssize_t _ret; \
70 int _retry = MPIR_CVAR_CH4_OFI_MAX_EAGAIN_RETRY; \
71 do { \
72 _ret = FUNC; \
73 if (likely(_ret==0)) break; \
74 MPIDI_OFI_ERR(_ret!=-FI_EAGAIN, \
75 mpi_errno, \
76 MPI_ERR_OTHER, \
77 "**ofid_"#STR, \
78 "**ofid_"#STR" %s %d %s %s", \
79 __SHORT_FILE__, \
80 __LINE__, \
81 __func__, \
82 fi_strerror(-_ret)); \
83 MPIR_ERR_CHKANDJUMP(_retry == 0 && EAGAIN, \
84 mpi_errno, \
85 MPIX_ERR_EAGAIN, \
86 "**eagain"); \
87 /* FIXME: by fixing the recursive locking interface to account
88 * for recursive locking in more than one lock (currently limited
89 * to one due to scalar TLS counter), this lock yielding
90 * operation can be avoided since we are inside a finite loop. */\
91 MPID_THREAD_CS_EXIT(VCI, MPIDI_VCI(vci_).lock); \
92 mpi_errno = MPIDI_OFI_retry_progress(); \
93 MPID_THREAD_CS_ENTER(VCI, MPIDI_VCI(vci_).lock); \
94 MPIR_ERR_CHECK(mpi_errno); \
95 _retry--; \
96 } while (_ret == -FI_EAGAIN); \
97 } while (0)
98
99 /* per-vci macros - we'll transition into these macros once the locks are
100 * moved down to ofi-layer */
101 #define MPIDI_OFI_VCI_PROGRESS(vci_) \
102 do { \
103 MPID_THREAD_CS_ENTER(VCI, MPIDI_VCI(vci_).lock); \
104 mpi_errno = MPIDI_OFI_progress(vci_, 0); \
105 MPID_THREAD_CS_EXIT(VCI, MPIDI_VCI(vci_).lock); \
106 MPIR_ERR_CHECK(mpi_errno); \
107 MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); \
108 } while (0)
109
110 #define MPIDI_OFI_VCI_PROGRESS_WHILE(vci_, cond) \
111 do { \
112 MPID_THREAD_CS_ENTER(VCI, MPIDI_VCI(vci_).lock); \
113 while (cond) { \
114 mpi_errno = MPIDI_OFI_progress(vci_, 0); \
115 if (mpi_errno) { \
116 MPID_THREAD_CS_EXIT(VCI, MPIDI_VCI(vci_).lock); \
117 MPIR_ERR_POP(mpi_errno); \
118 } \
119 MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); \
120 } \
121 MPID_THREAD_CS_EXIT(VCI, MPIDI_VCI(vci_).lock); \
122 } while (0)
123
124 #define MPIDI_OFI_VCI_CALL(FUNC,vci_,STR) \
125 do { \
126 MPID_THREAD_CS_ENTER(VCI, MPIDI_VCI(vci_).lock); \
127 ssize_t _ret = FUNC; \
128 MPID_THREAD_CS_EXIT(VCI, MPIDI_VCI(vci_).lock); \
129 MPIDI_OFI_ERR(_ret<0, \
130 mpi_errno, \
131 MPI_ERR_OTHER, \
132 "**ofid_"#STR, \
133 "**ofid_"#STR" %s %d %s %s", \
134 __SHORT_FILE__, \
135 __LINE__, \
136 __func__, \
137 fi_strerror(-_ret)); \
138 } while (0)
139
140 #define MPIDI_OFI_VCI_CALL_RETRY(FUNC,vci_,STR,EAGAIN) \
141 do { \
142 ssize_t _ret; \
143 int _retry = MPIR_CVAR_CH4_OFI_MAX_EAGAIN_RETRY; \
144 do { \
145 MPID_THREAD_CS_ENTER(VCI, MPIDI_VCI(vci_).lock); \
146 _ret = FUNC; \
147 MPID_THREAD_CS_EXIT(VCI, MPIDI_VCI(vci_).lock); \
148 if (likely(_ret==0)) break; \
149 MPIDI_OFI_ERR(_ret!=-FI_EAGAIN, \
150 mpi_errno, \
151 MPI_ERR_OTHER, \
152 "**ofid_"#STR, \
153 "**ofid_"#STR" %s %d %s %s", \
154 __SHORT_FILE__, \
155 __LINE__, \
156 __func__, \
157 fi_strerror(-_ret)); \
158 MPIR_ERR_CHKANDJUMP(_retry == 0 && EAGAIN, \
159 mpi_errno, \
160 MPIX_ERR_EAGAIN, \
161 "**eagain"); \
162 mpi_errno = MPID_Progress_test(NULL); \
163 MPIR_ERR_CHECK(mpi_errno); \
164 _retry--; \
165 } while (_ret == -FI_EAGAIN); \
166 } while (0)
167
168 #define MPIDI_OFI_CALL_RETURN(FUNC, _ret) \
169 do { \
170 (_ret) = FUNC; \
171 } while (0)
172
173 #define MPIDI_OFI_STR_CALL(FUNC,STR) \
174 do \
175 { \
176 str_errno = FUNC; \
177 MPIDI_OFI_ERR(str_errno!=MPL_SUCCESS, \
178 mpi_errno, \
179 MPI_ERR_OTHER, \
180 "**"#STR, \
181 "**"#STR" %s %d %s %s", \
182 __SHORT_FILE__, \
183 __LINE__, \
184 __func__, \
185 #STR); \
186 } while (0)
187
188 #define MPIDI_OFI_REQUEST_CREATE(req, kind, vni) \
189 do { \
190 (req) = MPIR_Request_create_from_pool(kind, vni); \
191 MPIR_ERR_CHKANDSTMT((req) == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq"); \
192 MPIR_Request_add_ref((req)); \
193 } while (0)
194
MPIDI_OFI_winfo_base(MPIR_Win * w,int rank)195 MPL_STATIC_INLINE_PREFIX uintptr_t MPIDI_OFI_winfo_base(MPIR_Win * w, int rank)
196 {
197 if (!MPIDI_OFI_ENABLE_MR_VIRT_ADDRESS)
198 return 0;
199 else
200 return MPIDI_OFI_WIN(w).winfo[rank].base;
201 }
202
MPIDI_OFI_winfo_mr_key(MPIR_Win * w,int rank)203 MPL_STATIC_INLINE_PREFIX uint64_t MPIDI_OFI_winfo_mr_key(MPIR_Win * w, int rank)
204 {
205 if (!MPIDI_OFI_ENABLE_MR_PROV_KEY)
206 return MPIDI_OFI_WIN(w).mr_key;
207 else
208 return MPIDI_OFI_WIN(w).winfo[rank].mr_key;
209 }
210
MPIDI_OFI_win_cntr_incr(MPIR_Win * win)211 MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_win_cntr_incr(MPIR_Win * win)
212 {
213 (*MPIDI_OFI_WIN(win).issued_cntr)++;
214 }
215
MPIDI_OFI_cntr_incr()216 MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_cntr_incr()
217 {
218 MPIDI_OFI_global.rma_issued_cntr++;
219 }
220
221 /* Externs: see util.c for definition */
222 #define MPIDI_OFI_LOCAL_MR_KEY 0
223 #define MPIDI_OFI_COLL_MR_KEY 1
224 #define MPIDI_OFI_INVALID_MR_KEY 0xFFFFFFFFFFFFFFFFULL
225 int MPIDI_OFI_handle_cq_error_util(int ep_idx, ssize_t ret);
226 int MPIDI_OFI_retry_progress(void);
227 int MPIDI_OFI_control_handler(int handler_id, void *am_hdr, void *data, MPI_Aint data_sz,
228 int is_local, int is_async, MPIR_Request ** req);
229 int MPIDI_OFI_control_dispatch(void *buf);
230 void MPIDI_OFI_index_datatypes(void);
231 int MPIDI_OFI_mr_key_allocator_init(void);
232 uint64_t MPIDI_OFI_mr_key_alloc(int key_type, uint64_t requested_key);
233 void MPIDI_OFI_mr_key_free(int key_type, uint64_t index);
234 void MPIDI_OFI_mr_key_allocator_destroy(void);
235
236 /* RMA */
237 #define MPIDI_OFI_INIT_CHUNK_CONTEXT(win,sigreq) \
238 do { \
239 if (sigreq) { \
240 int tmp; \
241 MPIDI_OFI_chunk_request *creq; \
242 MPIR_cc_incr((*sigreq)->cc_ptr, &tmp); \
243 creq=(MPIDI_OFI_chunk_request*)MPL_malloc(sizeof(*creq), MPL_MEM_BUFFER); \
244 MPIR_ERR_CHKANDSTMT(creq == NULL, mpi_errno, MPI_ERR_NO_MEM, goto fn_fail, "**nomem"); \
245 creq->event_id = MPIDI_OFI_EVENT_CHUNK_DONE; \
246 creq->parent = *(sigreq); \
247 msg.context = &creq->context; \
248 } \
249 MPIDI_OFI_win_cntr_incr(win); \
250 } while (0)
251
MPIDI_OFI_winfo_disp_unit(MPIR_Win * win,int rank)252 MPL_STATIC_INLINE_PREFIX uint32_t MPIDI_OFI_winfo_disp_unit(MPIR_Win * win, int rank)
253 {
254 uint32_t ret;
255
256 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_OFI_WINFO_DISP_UNIT);
257 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_OFI_WINFO_DISP_UNIT);
258
259 if (MPIDI_OFI_ENABLE_MR_PROV_KEY || MPIDI_OFI_ENABLE_MR_VIRT_ADDRESS) {
260 /* Always use winfo[rank].disp_unit if any of PROV_KEY and VIRT_ADDRESS is on.
261 * Compiler can eliminate the branch in such a case. */
262 ret = MPIDI_OFI_WIN(win).winfo[rank].disp_unit;
263 } else if (MPIDI_OFI_WIN(win).winfo) {
264 ret = MPIDI_OFI_WIN(win).winfo[rank].disp_unit;
265 } else {
266 ret = win->disp_unit;
267 }
268
269 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_OFI_WINFO_DISP_UNIT);
270 return ret;
271 }
272
MPIDI_OFI_sigreq_complete(MPIR_Request ** sigreq)273 MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_sigreq_complete(MPIR_Request ** sigreq)
274 {
275 if (sigreq) {
276 /* If sigreq is not NULL, *sigreq should be a valid object now. */
277 MPIR_Assert(*sigreq != NULL);
278 MPID_Request_complete(*sigreq);
279 }
280 }
281
MPIDI_OFI_load_iov(const void * buffer,int count,MPI_Datatype datatype,MPI_Aint max_len,MPI_Aint * loaded_iov_offset,struct iovec * iov)282 MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_load_iov(const void *buffer, int count,
283 MPI_Datatype datatype, MPI_Aint max_len,
284 MPI_Aint * loaded_iov_offset, struct iovec *iov)
285 {
286 MPI_Aint outlen;
287 MPIR_Typerep_to_iov_offset(buffer, count, datatype, *loaded_iov_offset, iov, max_len, &outlen);
288 *loaded_iov_offset += outlen;
289 }
290
291 int MPIDI_OFI_issue_deferred_rma(MPIR_Win * win);
292 void MPIDI_OFI_complete_chunks(MPIDI_OFI_win_request_t * winreq);
293 int MPIDI_OFI_nopack_putget(const void *origin_addr, int origin_count,
294 MPI_Datatype origin_datatype, int target_rank,
295 int target_count, MPI_Datatype target_datatype,
296 MPIDI_OFI_target_mr_t target_mr, MPIR_Win * win,
297 MPIDI_av_entry_t * addr, int rma_type, MPIR_Request ** sigreq);
298 int MPIDI_OFI_pack_put(const void *origin_addr, int origin_count,
299 MPI_Datatype origin_datatype, int target_rank,
300 int target_count, MPI_Datatype target_datatype,
301 MPIDI_OFI_target_mr_t target_mr, MPIR_Win * win,
302 MPIDI_av_entry_t * addr, MPIR_Request ** sigreq);
303 int MPIDI_OFI_pack_get(void *origin_addr, int origin_count,
304 MPI_Datatype origin_datatype, int target_rank,
305 int target_count, MPI_Datatype target_datatype,
306 MPIDI_OFI_target_mr_t target_mr, MPIR_Win * win,
307 MPIDI_av_entry_t * addr, MPIR_Request ** sigreq);
308
309 /* Common Utility functions used by the
310 * C and C++ components
311 */
312 /* Set max size based on OFI acc ordering limit. */
MPIDI_OFI_check_acc_order_size(MPIR_Win * win,MPI_Aint max_size)313 MPL_STATIC_INLINE_PREFIX MPI_Aint MPIDI_OFI_check_acc_order_size(MPIR_Win * win, MPI_Aint max_size)
314 {
315 /* Check ordering limit, a value of -1 guarantees ordering for any data size. */
316 if ((MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_WAR)
317 && MPIDI_OFI_global.max_order_war != -1) {
318 /* An order size value of 0 indicates that ordering is not guaranteed. */
319 MPIR_Assert(MPIDI_OFI_global.max_order_war != 0);
320 max_size = MPL_MIN(max_size, MPIDI_OFI_global.max_order_war);
321 }
322 if ((MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_WAW)
323 && MPIDI_OFI_global.max_order_waw != -1) {
324 MPIR_Assert(MPIDI_OFI_global.max_order_waw != 0);
325 max_size = MPL_MIN(max_size, MPIDI_OFI_global.max_order_waw);
326 }
327 if ((MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_RAW)
328 && MPIDI_OFI_global.max_order_raw != -1) {
329 MPIR_Assert(MPIDI_OFI_global.max_order_raw != 0);
330 max_size = MPL_MIN(max_size, MPIDI_OFI_global.max_order_raw);
331 }
332 return max_size;
333 }
334
MPIDI_OFI_win_request_create(void)335 MPL_STATIC_INLINE_PREFIX MPIDI_OFI_win_request_t *MPIDI_OFI_win_request_create(void)
336 {
337 MPIDI_OFI_win_request_t *winreq;
338 winreq = MPL_malloc(sizeof(*winreq), MPL_MEM_OTHER);
339 return winreq;
340 }
341
MPIDI_OFI_win_request_complete(MPIDI_OFI_win_request_t * winreq)342 MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_win_request_complete(MPIDI_OFI_win_request_t * winreq)
343 {
344 MPIDI_OFI_complete_chunks(winreq);
345 if (winreq->rma_type == MPIDI_OFI_PUT &&
346 winreq->noncontig.put.origin.datatype != MPI_DATATYPE_NULL &&
347 winreq->noncontig.put.target.datatype != MPI_DATATYPE_NULL) {
348 MPIR_Datatype_release_if_not_builtin(winreq->noncontig.put.origin.datatype);
349 MPIR_Datatype_release_if_not_builtin(winreq->noncontig.put.target.datatype);
350 } else if (winreq->rma_type == MPIDI_OFI_GET &&
351 winreq->noncontig.get.origin.datatype != MPI_DATATYPE_NULL &&
352 winreq->noncontig.get.target.datatype != MPI_DATATYPE_NULL) {
353 MPIR_Datatype_release_if_not_builtin(winreq->noncontig.get.origin.datatype);
354 MPIR_Datatype_release_if_not_builtin(winreq->noncontig.get.target.datatype);
355 }
356 MPL_free(winreq);
357 }
358
359 /* This function implements netmod vci to vni(context) mapping.
360 * Currently, we only support one-to-one mapping.
361 */
MPIDI_OFI_vci_to_vni(int vci)362 MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_vci_to_vni(int vci)
363 {
364 return vci;
365 }
366
MPIDI_OFI_vci_to_vni_assert(int vci)367 MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_vci_to_vni_assert(int vci)
368 {
369 int vni = MPIDI_OFI_vci_to_vni(vci);
370 MPIR_Assert(vni < MPIDI_OFI_global.num_vnis);
371 return vni;
372 }
373
MPIDI_OFI_av_to_phys(MPIDI_av_entry_t * av,int vni_local,int vni_remote)374 MPL_STATIC_INLINE_PREFIX fi_addr_t MPIDI_OFI_av_to_phys(MPIDI_av_entry_t * av,
375 int vni_local, int vni_remote)
376 {
377 #ifdef MPIDI_OFI_VNI_USE_DOMAIN
378 if (MPIDI_OFI_ENABLE_SCALABLE_ENDPOINTS) {
379 return fi_rx_addr(MPIDI_OFI_AV(av).dest[vni_local][vni_remote], 0,
380 MPIDI_OFI_MAX_ENDPOINTS_BITS);
381 } else {
382 return MPIDI_OFI_AV(av).dest[vni_local][vni_remote];
383 }
384 #else /* MPIDI_OFI_VNI_USE_SEPCTX */
385 if (MPIDI_OFI_ENABLE_SCALABLE_ENDPOINTS) {
386 return fi_rx_addr(MPIDI_OFI_AV(av).dest[0][0], vni_remote, MPIDI_OFI_MAX_ENDPOINTS_BITS);
387 } else {
388 MPIR_Assert(vni_remote == 0);
389 return MPIDI_OFI_AV(av).dest[0][0];
390 }
391 #endif
392 }
393
MPIDI_OFI_comm_to_phys(MPIR_Comm * comm,int rank,int vni_local,int vni_remote)394 MPL_STATIC_INLINE_PREFIX fi_addr_t MPIDI_OFI_comm_to_phys(MPIR_Comm * comm, int rank,
395 int vni_local, int vni_remote)
396 {
397 MPIDI_av_entry_t *av = MPIDIU_comm_rank_to_av(comm, rank);
398 return MPIDI_OFI_av_to_phys(av, vni_local, vni_remote);
399 }
400
MPIDI_OFI_is_tag_sync(uint64_t match_bits)401 MPL_STATIC_INLINE_PREFIX bool MPIDI_OFI_is_tag_sync(uint64_t match_bits)
402 {
403 return (0 != (MPIDI_OFI_SYNC_SEND & match_bits));
404 }
405
MPIDI_OFI_init_sendtag(MPIR_Context_id_t contextid,int tag,uint64_t type)406 MPL_STATIC_INLINE_PREFIX uint64_t MPIDI_OFI_init_sendtag(MPIR_Context_id_t contextid,
407 int tag, uint64_t type)
408 {
409 uint64_t match_bits;
410 match_bits = contextid;
411
412 match_bits = (match_bits << MPIDI_OFI_TAG_BITS);
413 match_bits |= (MPIDI_OFI_TAG_MASK & tag) | type;
414 return match_bits;
415 }
416
417 /* receive posting */
MPIDI_OFI_init_recvtag(uint64_t * mask_bits,MPIR_Context_id_t contextid,int tag)418 MPL_STATIC_INLINE_PREFIX uint64_t MPIDI_OFI_init_recvtag(uint64_t * mask_bits,
419 MPIR_Context_id_t contextid, int tag)
420 {
421 uint64_t match_bits = 0;
422 *mask_bits = MPIDI_OFI_PROTOCOL_MASK;
423 match_bits = contextid;
424
425 match_bits = (match_bits << MPIDI_OFI_TAG_BITS);
426
427 if (MPI_ANY_TAG == tag)
428 *mask_bits |= MPIDI_OFI_TAG_MASK;
429 else
430 match_bits |= (MPIDI_OFI_TAG_MASK & tag);
431
432 return match_bits;
433 }
434
MPIDI_OFI_init_get_tag(uint64_t match_bits)435 MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_init_get_tag(uint64_t match_bits)
436 {
437 return ((int) (match_bits & MPIDI_OFI_TAG_MASK));
438 }
439
MPIDI_OFI_context_to_request(void * context)440 MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIDI_OFI_context_to_request(void *context)
441 {
442 char *base = (char *) context;
443 return (MPIR_Request *) MPL_container_of(base, MPIR_Request, dev.ch4.netmod);
444 }
445
446 struct MPIDI_OFI_contig_blocks_params {
447 size_t max_pipe;
448 MPI_Aint count;
449 MPI_Aint last_loc;
450 MPI_Aint start_loc;
451 size_t last_chunk;
452 };
453
MPIDI_OFI_count_iov(int dt_count,MPI_Datatype dt_datatype,size_t total_bytes,size_t max_pipe)454 MPL_STATIC_INLINE_PREFIX size_t MPIDI_OFI_count_iov(int dt_count, /* number of data elements in dt_datatype */
455 MPI_Datatype dt_datatype, size_t total_bytes, /* total byte size, passed in here for reusing */
456 size_t max_pipe)
457 {
458 ssize_t rem_size = total_bytes;
459 MPI_Aint num_iov, total_iov = 0;
460
461 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_OFI_COUNT_IOV);
462 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_OFI_COUNT_IOV);
463
464 if (dt_datatype == MPI_DATATYPE_NULL)
465 goto fn_exit;
466
467 do {
468 MPI_Aint tmp_size = (rem_size > max_pipe) ? max_pipe : rem_size;
469
470 MPIR_Typerep_iov_len(dt_count, dt_datatype, tmp_size, &num_iov);
471 total_iov += num_iov;
472
473 rem_size -= tmp_size;
474 } while (rem_size);
475
476 fn_exit:
477 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_OFI_COUNT_IOV);
478 return total_iov;
479 }
480
481 #endif /* OFI_IMPL_H_INCLUDED */
482