1 /*
2  * Copyright (c) 2011-2017 Sandia National Laboratories.  All rights reserved.
3  * Copyright (c) 2014      The University of Tennessee and The University
4  *                         of Tennessee Research Foundation.  All rights
5  *                         reserved.
6  * Copyright (c) 2015-2017 Research Organization for Information Science
7  *                         and Technology (RIST). All rights reserved.
8  * $COPYRIGHT$
9  *
10  * Additional copyrights may follow
11  *
12  * $HEADER$
13  */
14 
15 #include "ompi_config.h"
16 
17 #include "ompi/mca/osc/osc.h"
18 #include "ompi/mca/osc/base/base.h"
19 #include "ompi/mca/osc/base/osc_base_obj_convert.h"
20 
21 #include "osc_portals4.h"
22 #include "osc_portals4_request.h"
23 
24 
25 static int
ompi_osc_portals4_get_op(struct ompi_op_t * op,ptl_op_t * ptl_op)26 ompi_osc_portals4_get_op(struct ompi_op_t *op, ptl_op_t *ptl_op)
27 {
28     if (MPI_MAX == op) {
29         *ptl_op = PTL_MAX;
30     } else if (MPI_MIN == op) {
31         *ptl_op = PTL_MIN;
32     } else if (MPI_SUM == op) {
33         *ptl_op = PTL_SUM;
34     } else if (MPI_PROD == op) {
35         *ptl_op = PTL_PROD;
36     } else if (MPI_LAND == op) {
37         *ptl_op = PTL_LAND;
38     } else if (MPI_BAND == op) {
39         *ptl_op = PTL_BAND;
40     } else if (MPI_LOR == op) {
41         *ptl_op = PTL_LOR;
42     } else if (MPI_BOR == op) {
43         *ptl_op = PTL_BOR;
44     } else if (MPI_LXOR == op) {
45         *ptl_op = PTL_LXOR;
46     } else if (MPI_BXOR == op) {
47         *ptl_op = PTL_BXOR;
48     } else {
49         return OMPI_ERROR;
50     }
51 
52     return OMPI_SUCCESS;
53 }
54 
55 
56 static int
get_sized_type(bool sign,size_t size,ptl_datatype_t * ptl_dt)57 get_sized_type(bool sign, size_t size, ptl_datatype_t *ptl_dt)
58 {
59     if (sign) {
60         switch (size) {
61         case 1:
62             *ptl_dt = PTL_INT8_T;
63             break;
64         case 2:
65             *ptl_dt = PTL_INT16_T;
66             break;
67         case 4:
68             *ptl_dt = PTL_INT32_T;
69             break;
70         case 8:
71             *ptl_dt = PTL_INT64_T;
72             break;
73         default:
74             return OMPI_ERROR;
75         }
76     } else {
77         switch (size) {
78         case 1:
79             *ptl_dt = PTL_UINT8_T;
80             break;
81         case 2:
82             *ptl_dt = PTL_UINT16_T;
83             break;
84         case 4:
85             *ptl_dt = PTL_UINT32_T;
86             break;
87         case 8:
88             *ptl_dt = PTL_UINT64_T;
89             break;
90         default:
91             return OMPI_ERROR;
92         }
93     }
94 
95     return OMPI_SUCCESS;
96 }
97 
98 
99 static int
ompi_osc_portals4_get_dt(struct ompi_datatype_t * dt,ptl_datatype_t * ptl_dt)100 ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
101 {
102     ompi_datatype_t *base_dt = ompi_datatype_get_single_predefined_type_from_args(dt);
103 
104     if (MPI_BYTE == base_dt) {
105         *ptl_dt = PTL_INT8_T;
106     } else if (MPI_CHAR == base_dt) {
107         *ptl_dt = PTL_INT8_T;
108     } else if (MPI_SHORT == base_dt) {
109         return get_sized_type(true, sizeof(short), ptl_dt);
110     } else if (MPI_INT == base_dt) {
111         return get_sized_type(true, sizeof(int), ptl_dt);
112     } else if (MPI_LONG == base_dt) {
113         return get_sized_type(true, sizeof(long), ptl_dt);
114     } else if (MPI_FLOAT == base_dt) {
115         *ptl_dt = PTL_FLOAT;
116     } else if (MPI_DOUBLE == base_dt) {
117         *ptl_dt = PTL_DOUBLE;
118     } else if (MPI_LONG_DOUBLE == base_dt) {
119         *ptl_dt = PTL_LONG_DOUBLE;
120     } else if (MPI_UNSIGNED_CHAR == base_dt) {
121         *ptl_dt = PTL_UINT8_T;
122     } else if (MPI_SIGNED_CHAR == base_dt) {
123         *ptl_dt = PTL_UINT8_T;
124     } else if (MPI_UNSIGNED_SHORT == base_dt) {
125         return get_sized_type(false, sizeof(short), ptl_dt);
126     } else if (MPI_UNSIGNED_LONG == base_dt) {
127         return get_sized_type(false, sizeof(long), ptl_dt);
128     } else if (MPI_UNSIGNED == base_dt) {
129         return get_sized_type(false, sizeof(int), ptl_dt);
130 #if OPAL_HAVE_LONG_LONG
131     } else if (MPI_LONG_LONG_INT == base_dt) {
132         return get_sized_type(true, sizeof(long long int), ptl_dt);
133     } else if (MPI_LONG_LONG == base_dt) {
134         return get_sized_type(true, sizeof(long long), ptl_dt);
135 #endif
136     } else if (MPI_INT8_T == base_dt) {
137         *ptl_dt = PTL_INT8_T;
138     } else if (MPI_UINT8_T == base_dt) {
139         *ptl_dt = PTL_UINT8_T;
140     } else if (MPI_INT16_T == base_dt) {
141         *ptl_dt = PTL_INT16_T;
142     } else if (MPI_UINT16_T == base_dt) {
143         *ptl_dt = PTL_UINT16_T;
144     } else if (MPI_INT32_T == base_dt) {
145         *ptl_dt = PTL_INT32_T;
146     } else if (MPI_UINT32_T == base_dt) {
147         *ptl_dt = PTL_UINT32_T;
148     } else if (MPI_INT64_T == base_dt) {
149         *ptl_dt = PTL_INT64_T;
150     } else if (MPI_UINT64_T == base_dt) {
151         *ptl_dt = PTL_UINT64_T;
152 #if HAVE_FLOAT__COMPLEX
153     } else if (MPI_C_COMPLEX == base_dt) {
154         *ptl_dt = PTL_DOUBLE_COMPLEX;
155     } else if (MPI_C_FLOAT_COMPLEX == base_dt) {
156         *ptl_dt = PTL_FLOAT_COMPLEX;
157 #endif
158 #if HAVE_DOUBLE__COMPLEX
159     } else if (MPI_C_DOUBLE_COMPLEX == base_dt) {
160         *ptl_dt = PTL_DOUBLE_COMPLEX;
161 #endif
162 #if HAVE_LONG_DOUBLE__COMPLEX
163     } else if (MPI_C_LONG_DOUBLE_COMPLEX == base_dt) {
164         *ptl_dt = PTL_LONG_DOUBLE_COMPLEX;
165 #endif
166     } else if (MPI_AINT == base_dt) {
167         if (sizeof(MPI_Aint) == 2) {
168             *ptl_dt = PTL_UINT16_T;
169         } else if (sizeof(MPI_Aint) == 4) {
170             *ptl_dt = PTL_UINT32_T;
171         } else if (sizeof(MPI_Aint) == 8) {
172             *ptl_dt = PTL_UINT64_T;
173         }
174     } else {
175         return OMPI_ERROR;
176     }
177 
178     return 0;
179 }
180 
181 static  ptl_size_t
number_of_fragments(ptl_size_t length,ptl_size_t maxlength)182 number_of_fragments(ptl_size_t length, ptl_size_t maxlength)
183 {
184     ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1;
185     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
186                          "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag));
187     return nb_frag;
188 }
189 
190 /* put in segments no larger than segment_length */
191 static int
segmentedPut(int64_t * opcount,ptl_handle_md_t md_h,ptl_size_t origin_offset,ptl_size_t put_length,ptl_size_t segment_length,ptl_ack_req_t ack_req,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_hdr_data_t hdr_data)192 segmentedPut(int64_t *opcount,
193              ptl_handle_md_t md_h,
194              ptl_size_t origin_offset,
195              ptl_size_t put_length,
196              ptl_size_t segment_length,
197              ptl_ack_req_t ack_req,
198              ptl_process_t target_id,
199              ptl_pt_index_t pt_index,
200              ptl_match_bits_t match_bits,
201              ptl_size_t target_offset,
202              void *user_ptr,
203              ptl_hdr_data_t hdr_data)
204 {
205     int ret;
206     ptl_size_t bytes_put = 0;
207 
208     do {
209         opal_atomic_add_64(opcount, 1);
210 
211         ptl_size_t frag_length = MIN(put_length, segment_length);
212         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
213                              "Put size : %lu/%lu, offset:%lu", frag_length, put_length, bytes_put));
214         ret = PtlPut(md_h,
215                      origin_offset + bytes_put,
216                      frag_length,
217                      ack_req,
218                      target_id,
219                      pt_index,
220                      match_bits,
221                      target_offset + bytes_put,
222                      user_ptr,
223                      hdr_data);
224         if (PTL_OK != ret) {
225             opal_atomic_add_64(opcount, -1);
226             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
227                                  "%s:%d PtlPut failed with return value %d",
228                                  __FUNCTION__, __LINE__, ret);
229             return ret;
230         }
231         put_length -= frag_length;
232         bytes_put += frag_length;
233     } while (put_length);
234     return PTL_OK;
235 }
236 
237 /* get in segments no larger than segment_length */
238 static int
segmentedGet(int64_t * opcount,ptl_handle_md_t md_h,ptl_size_t origin_offset,ptl_size_t get_length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr)239 segmentedGet(int64_t *opcount,
240              ptl_handle_md_t md_h,
241              ptl_size_t origin_offset,
242              ptl_size_t get_length,
243              ptl_size_t segment_length,
244              ptl_process_t target_id,
245              ptl_pt_index_t pt_index,
246              ptl_match_bits_t match_bits,
247              ptl_size_t target_offset,
248              void *user_ptr)
249 {
250     int ret;
251     ptl_size_t bytes_gotten = 0;
252 
253     do {
254         opal_atomic_add_64(opcount, 1);
255 
256         ptl_size_t frag_length = MIN(get_length, segment_length);
257         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
258                              "Get size : %lu/%lu, offset:%lu", frag_length, get_length, bytes_gotten));
259 
260         ret = PtlGet(md_h,
261                      (ptl_size_t) origin_offset + bytes_gotten,
262                      frag_length,
263                      target_id,
264                      pt_index,
265                      match_bits,
266                      target_offset + bytes_gotten,
267                      user_ptr);
268         if (PTL_OK != ret) {
269             opal_atomic_add_64(opcount, -1);
270             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
271                                  "%s:%d PtlGet failed with return value %d",
272                                  __FUNCTION__, __LINE__, ret);
273             return ret;
274         }
275         get_length -= frag_length;
276         bytes_gotten += frag_length;
277     } while (get_length);
278     return PTL_OK;
279 }
280 
281 /* atomic op in segments no larger than segment_length */
282 static int
segmentedAtomic(int64_t * opcount,ptl_handle_md_t md_h,ptl_size_t origin_offset,ptl_size_t length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_op_t ptl_op,ptl_datatype_t ptl_dt)283 segmentedAtomic(int64_t *opcount,
284                 ptl_handle_md_t md_h,
285                 ptl_size_t origin_offset,
286                 ptl_size_t length,
287                 ptl_size_t segment_length,
288                 ptl_process_t target_id,
289                 ptl_pt_index_t pt_index,
290                 ptl_match_bits_t match_bits,
291                 ptl_size_t target_offset,
292                 void *user_ptr,
293                 ptl_op_t ptl_op,
294                 ptl_datatype_t ptl_dt)
295 {
296     int ret;
297     ptl_size_t sent = 0;
298 
299     do {
300         opal_atomic_add_64(opcount, 1);
301 
302         ptl_size_t frag_length = MIN(length, segment_length);
303         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
304                              "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
305         ret = PtlAtomic(md_h,
306                         (ptl_size_t) origin_offset + sent,
307                         frag_length,
308                         PTL_ACK_REQ,
309                         target_id,
310                         pt_index,
311                         match_bits,
312                         target_offset + sent,
313                         user_ptr,
314                         0,
315                         ptl_op,
316                         ptl_dt);
317         if (PTL_OK != ret) {
318             opal_atomic_add_64(opcount, -1);
319             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
320                                  "%s:%d PtlAtomic failed with return value %d",
321                                  __FUNCTION__, __LINE__, ret);
322             return ret;
323         }
324         length -= frag_length;
325         sent += frag_length;
326     } while (length);
327     return PTL_OK;
328 }
329 
330 /* atomic op in segments no larger than segment_length */
331 static int
segmentedFetchAtomic(int64_t * opcount,ptl_handle_md_t result_md_h,ptl_size_t result_offset,ptl_handle_md_t origin_md_h,ptl_size_t origin_offset,ptl_size_t length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_op_t ptl_op,ptl_datatype_t ptl_dt)332 segmentedFetchAtomic(int64_t *opcount,
333                      ptl_handle_md_t result_md_h,
334                      ptl_size_t result_offset,
335                      ptl_handle_md_t origin_md_h,
336                      ptl_size_t origin_offset,
337                      ptl_size_t length,
338                      ptl_size_t segment_length,
339                      ptl_process_t target_id,
340                      ptl_pt_index_t pt_index,
341                      ptl_match_bits_t match_bits,
342                      ptl_size_t target_offset,
343                      void *user_ptr,
344                      ptl_op_t ptl_op,
345                      ptl_datatype_t ptl_dt)
346 {
347     int ret;
348     ptl_size_t sent = 0;
349 
350     do {
351         opal_atomic_add_64(opcount, 1);
352 
353         ptl_size_t frag_length = MIN(length, segment_length);
354         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
355                              "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
356         ret = PtlFetchAtomic(result_md_h,
357                              result_offset + sent,
358                              origin_md_h,
359                              origin_offset + sent,
360                              frag_length,
361                              target_id,
362                              pt_index,
363                              match_bits,
364                              target_offset + sent,
365                              user_ptr,
366                              0,
367                              ptl_op,
368                              ptl_dt);
369         if (PTL_OK != ret) {
370             opal_atomic_add_64(opcount, -1);
371             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
372                                  "%s:%d PtlFetchAtomic failed with return value %d",
373                                  __FUNCTION__, __LINE__, ret);
374             return ret;
375         }
376         length -= frag_length;
377         sent += frag_length;
378     } while (length);
379     return PTL_OK;
380 }
381 
382 /* swap in segments no larger than segment_length */
383 static int
segmentedSwap(int64_t * opcount,ptl_handle_md_t result_md_h,ptl_size_t result_offset,ptl_handle_md_t origin_md_h,ptl_size_t origin_offset,ptl_size_t length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_datatype_t ptl_dt)384 segmentedSwap(int64_t *opcount,
385               ptl_handle_md_t result_md_h,
386               ptl_size_t result_offset,
387               ptl_handle_md_t origin_md_h,
388               ptl_size_t origin_offset,
389               ptl_size_t length,
390               ptl_size_t segment_length,
391               ptl_process_t target_id,
392               ptl_pt_index_t pt_index,
393               ptl_match_bits_t match_bits,
394               ptl_size_t target_offset,
395               void *user_ptr,
396               ptl_datatype_t ptl_dt)
397 {
398     int ret;
399     ptl_size_t sent = 0;
400 
401     do {
402         opal_atomic_add_64(opcount, 1);
403 
404         ptl_size_t frag_length = MIN(length, segment_length);
405         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
406                              "Swap size : %lu/%lu, offset:%lu", frag_length, length, sent));
407         ret = PtlSwap(result_md_h,
408                       result_offset + sent,
409                       origin_md_h,
410                       (ptl_size_t) origin_offset + sent,
411                       frag_length,
412                       target_id,
413                       pt_index,
414                       match_bits,
415                       target_offset + sent,
416                       user_ptr,
417                       0,
418                       NULL,
419                       PTL_SWAP,
420                       ptl_dt);
421         if (PTL_OK != ret) {
422             opal_atomic_add_64(opcount, -1);
423             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
424                                  "%s:%d PtlSwap failed with return value %d",
425                                  __FUNCTION__, __LINE__, ret);
426             return ret;
427         }
428         length -= frag_length;
429         sent += frag_length;
430     } while (length);
431     return PTL_OK;
432 }
433 
434 static int
create_iov_list(const void * address,int count,ompi_datatype_t * datatype,ptl_iovec_t ** ptl_iovec,ptl_size_t * ptl_iovec_count)435 create_iov_list(const void       *address,
436                 int               count,
437                 ompi_datatype_t  *datatype,
438                 ptl_iovec_t     **ptl_iovec,
439                 ptl_size_t       *ptl_iovec_count)
440 {
441     struct iovec iov[OSC_PORTALS4_IOVEC_MAX];
442     opal_convertor_t convertor;
443     uint32_t iov_count;
444     uint32_t iov_index, ptl_iovec_index;
445     /* needed for opal_convertor_raw but not used */
446     size_t size;
447     int ret;
448     bool done;
449 
450     OBJ_CONSTRUCT(&convertor, opal_convertor_t);
451     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &datatype->super, count,
452                                                     address, 0, &convertor);
453     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
454         return ret;
455     }
456 
457 
458     *ptl_iovec_count = 0;
459     ptl_iovec_index = 0;
460     do {
461         /* decode segments of the data */
462         iov_count = OSC_PORTALS4_IOVEC_MAX;
463         iov_index = 0;
464 
465         /* opal_convertor_raw returns done when it has reached the end of the data */
466         done = opal_convertor_raw (&convertor, iov, &iov_count, &size);
467 
468         *ptl_iovec_count += iov_count;
469         *ptl_iovec = (ptl_iovec_t *)realloc(*ptl_iovec, *ptl_iovec_count * sizeof(ptl_iovec_t));
470 
471         while (iov_index != iov_count) {
472             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
473                                  "adding iov[%d].[%p,%lu] to ptl_iovec", iov_index, iov[iov_index].iov_base, iov[iov_index].iov_len));
474             (*ptl_iovec)[ptl_iovec_index].iov_base = iov[iov_index].iov_base;
475             (*ptl_iovec)[ptl_iovec_index].iov_len  = iov[iov_index].iov_len;
476 
477             ptl_iovec_index++;
478             iov_index++;
479         }
480 
481         assert(*ptl_iovec_count == ptl_iovec_index);
482     } while (!done);
483 
484     return OMPI_SUCCESS;
485 
486 }
487 
488 /* get from a contiguous remote to an iovec local */
489 static int
get_to_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)490 get_to_iovec(ompi_osc_portals4_module_t *module,
491              const void       *origin_address,
492              int               origin_count,
493              ompi_datatype_t  *origin_datatype,
494              ptl_process_t     peer,
495              int               target_count,
496              ompi_datatype_t  *target_datatype,
497              size_t            offset,
498              ptl_pt_index_t    pt_index,
499              ptl_match_bits_t  match_bits,
500              void             *user_ptr)
501 {
502     int ret;
503     size_t size;
504     ptrdiff_t length, origin_lb, target_lb, extent;
505     ptl_md_t md;
506 
507     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
508         PtlMDRelease(module->origin_iovec_md_h);
509         free(module->origin_iovec_list);
510         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
511         module->origin_iovec_list = NULL;
512     }
513 
514     ptl_size_t iovec_count=0;
515     create_iov_list(
516         origin_address,
517         origin_count,
518         origin_datatype,
519         &module->origin_iovec_list,
520         &iovec_count);
521 
522     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
523     if (OMPI_SUCCESS != ret) {
524         return ret;
525     }
526     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
527     if (OMPI_SUCCESS != ret) {
528         return ret;
529     }
530     ompi_datatype_type_size(origin_datatype, &size);
531     length = size * origin_count;
532 
533     md.start = module->origin_iovec_list;
534     md.length = iovec_count;
535     if (user_ptr) {
536         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
537     } else {
538         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
539     }
540     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
541     md.ct_handle = module->ct_h;
542     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
543     if (PTL_OK != ret) {
544         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
545                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
546                             __FILE__, __LINE__, ret);
547         return ret;
548     }
549 
550     opal_atomic_add_64(&module->opcount, 1);
551 
552     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
553                  "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
554                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
555     ret = PtlGet(module->origin_iovec_md_h,
556                  (ptl_size_t) origin_lb,
557                  length,
558                  peer,
559                  module->pt_idx,
560                  module->match_bits,
561                  offset + target_lb,
562                  user_ptr);
563     if (PTL_OK != ret) {
564         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
565                      "%s,%d PtlGet() failed: ret = %d",
566                      __FUNCTION__, __LINE__, ret));
567         opal_atomic_add_64(&module->opcount, -1);
568         return ret;
569     }
570 
571     return OMPI_SUCCESS;
572 }
573 
574 /* get to an iovec MD from a contiguous target using fragments no larger
575  * than max_fetch_atomic_size to guarantee atomic writes at the origin */
576 static int
atomic_get_to_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)577 atomic_get_to_iovec(ompi_osc_portals4_module_t *module,
578                     const void       *origin_address,
579                     int               origin_count,
580                     ompi_datatype_t  *origin_datatype,
581                     ptl_process_t     peer,
582                     int               target_count,
583                     ompi_datatype_t  *target_datatype,
584                     size_t            offset,
585                     ptl_pt_index_t    pt_index,
586                     ptl_match_bits_t  match_bits,
587                     void             *user_ptr)
588 {
589     int ret;
590     size_t size;
591     ptrdiff_t length, origin_lb, target_lb, extent;
592     ptl_md_t md;
593 
594     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
595         PtlMDRelease(module->origin_iovec_md_h);
596         free(module->origin_iovec_list);
597         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
598         module->origin_iovec_list = NULL;
599     }
600 
601     ptl_size_t iovec_count=0;
602     create_iov_list(
603         origin_address,
604         origin_count,
605         origin_datatype,
606         &module->origin_iovec_list,
607         &iovec_count);
608 
609     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
610     if (OMPI_SUCCESS != ret) {
611         return ret;
612     }
613     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
614     if (OMPI_SUCCESS != ret) {
615         return ret;
616     }
617     ompi_datatype_type_size(origin_datatype, &size);
618     length = size * origin_count;
619 
620     md.start = module->origin_iovec_list;
621     md.length = iovec_count;
622     if (user_ptr) {
623         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
624     } else {
625         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
626     }
627     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
628     md.ct_handle = module->ct_h;
629     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
630     if (PTL_OK != ret) {
631         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
632                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
633                             __FILE__, __LINE__, ret);
634         return ret;
635     }
636 
637     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
638                  "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
639                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
640     ret = segmentedGet(&module->opcount,
641                        module->origin_iovec_md_h,
642                        (ptl_size_t) origin_lb,
643                        length,
644                        module->fetch_atomic_max,
645                        peer,
646                        module->pt_idx,
647                        module->match_bits,
648                        offset + target_lb,
649                        user_ptr);
650     if (PTL_OK != ret) {
651         return ret;
652     }
653 
654     return OMPI_SUCCESS;
655 }
656 
657 /* put from an iovec MD into a contiguous target */
658 static int
put_from_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)659 put_from_iovec(ompi_osc_portals4_module_t *module,
660                const void       *origin_address,
661                int               origin_count,
662                ompi_datatype_t  *origin_datatype,
663                ptl_process_t     peer,
664                int               target_count,
665                ompi_datatype_t  *target_datatype,
666                size_t            offset,
667                ptl_pt_index_t    pt_index,
668                ptl_match_bits_t  match_bits,
669                void             *user_ptr)
670 {
671     int ret;
672     size_t size;
673     ptrdiff_t length, origin_lb, target_lb, extent;
674     ptl_md_t md;
675 
676     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
677         PtlMDRelease(module->origin_iovec_md_h);
678         free(module->origin_iovec_list);
679         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
680         module->origin_iovec_list = NULL;
681     }
682 
683     ptl_size_t iovec_count=0;
684     create_iov_list(
685         origin_address,
686         origin_count,
687         origin_datatype,
688         &module->origin_iovec_list,
689         &iovec_count);
690 
691     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
692     if (OMPI_SUCCESS != ret) {
693         return ret;
694     }
695     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
696     if (OMPI_SUCCESS != ret) {
697         return ret;
698     }
699     ompi_datatype_type_size(origin_datatype, &size);
700     length = size * origin_count;
701 
702     md.start = module->origin_iovec_list;
703     md.length = iovec_count;
704     if (user_ptr) {
705         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
706     } else {
707         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
708     }
709     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
710     md.ct_handle = module->ct_h;
711     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
712     if (PTL_OK != ret) {
713         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
714                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
715                             __FILE__, __LINE__, ret);
716         return ret;
717     }
718 
719     opal_atomic_add_64(&module->opcount, 1);
720 
721     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
722                  "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
723                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
724     ret = PtlPut(module->origin_iovec_md_h,
725                  (ptl_size_t) origin_lb,
726                  length,
727                  PTL_ACK_REQ,
728                  peer,
729                  module->pt_idx,
730                  module->match_bits,
731                  offset + target_lb,
732                  user_ptr,
733                  0);
734     if (PTL_OK != ret) {
735         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
736                      "%s,%d PtlPut() failed: ret = %d",
737                      __FUNCTION__, __LINE__, ret));
738         opal_atomic_add_64(&module->opcount, -1);
739         return ret;
740     }
741 
742     return OMPI_SUCCESS;
743 }
744 
745 /* put from an iovec MD into a contiguous target using fragments no larger
746  * than max_atomic_size to guarantee atomic writes at the target */
747 static int
atomic_put_from_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)748 atomic_put_from_iovec(ompi_osc_portals4_module_t *module,
749                       const void       *origin_address,
750                       int               origin_count,
751                       ompi_datatype_t  *origin_datatype,
752                       ptl_process_t     peer,
753                       int               target_count,
754                       ompi_datatype_t  *target_datatype,
755                       size_t            offset,
756                       ptl_pt_index_t    pt_index,
757                       ptl_match_bits_t  match_bits,
758                       void             *user_ptr)
759 {
760     int ret;
761     size_t size;
762     ptrdiff_t length, origin_lb, target_lb, extent;
763     ptl_md_t md;
764 
765     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
766         PtlMDRelease(module->origin_iovec_md_h);
767         free(module->origin_iovec_list);
768         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
769         module->origin_iovec_list = NULL;
770     }
771 
772     ptl_size_t iovec_count=0;
773     create_iov_list(
774         origin_address,
775         origin_count,
776         origin_datatype,
777         &module->origin_iovec_list,
778         &iovec_count);
779 
780     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
781     if (OMPI_SUCCESS != ret) {
782         return ret;
783     }
784     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
785     if (OMPI_SUCCESS != ret) {
786         return ret;
787     }
788     ompi_datatype_type_size(origin_datatype, &size);
789     length = size * origin_count;
790 
791     md.start = module->origin_iovec_list;
792     md.length = iovec_count;
793     if (user_ptr) {
794         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
795     } else {
796         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
797     }
798     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
799     md.ct_handle = module->ct_h;
800     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
801     if (PTL_OK != ret) {
802         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
803                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
804                             __FILE__, __LINE__, ret);
805         return ret;
806     }
807 
808     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
809                  "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
810                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
811     ret = segmentedPut(&module->opcount,
812                        module->origin_iovec_md_h,
813                        (ptl_size_t) origin_lb,
814                        length,
815                        module->atomic_max,
816                        PTL_ACK_REQ,
817                        peer,
818                        module->pt_idx,
819                        module->match_bits,
820                        offset + target_lb,
821                        NULL,
822                        0);
823     if (OMPI_SUCCESS != ret) {
824         return ret;
825     }
826 
827     return OMPI_SUCCESS;
828 }
829 
830 /* perform atomic operation on iovec local and contiguous remote */
831 static int
atomic_from_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)832 atomic_from_iovec(ompi_osc_portals4_module_t *module,
833                   const void       *origin_address,
834                   int               origin_count,
835                   ompi_datatype_t  *origin_datatype,
836                   ptl_process_t     peer,
837                   int               target_count,
838                   ompi_datatype_t  *target_datatype,
839                   size_t            offset,
840                   ptl_pt_index_t    pt_index,
841                   ptl_match_bits_t  match_bits,
842                   struct ompi_op_t *op,
843                   void             *user_ptr)
844 {
845     int ret;
846     size_t size;
847     ptrdiff_t length, origin_lb, target_lb, extent;
848     ptl_md_t md;
849     ptl_op_t ptl_op;
850     ptl_datatype_t ptl_dt;
851 
852     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
853         PtlMDRelease(module->origin_iovec_md_h);
854         free(module->origin_iovec_list);
855         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
856         module->origin_iovec_list = NULL;
857     }
858 
859     ptl_size_t iovec_count=0;
860     create_iov_list(
861         origin_address,
862         origin_count,
863         origin_datatype,
864         &module->origin_iovec_list,
865         &iovec_count);
866 
867     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
868     if (OMPI_SUCCESS != ret) {
869         opal_output(ompi_osc_base_framework.framework_output,
870                 "datatype is not currently supported");
871         return OMPI_ERR_NOT_SUPPORTED;
872     }
873     ret = ompi_osc_portals4_get_op(op, &ptl_op);
874     if (OMPI_SUCCESS != ret) {
875         opal_output(ompi_osc_base_framework.framework_output,
876                 "operation is not currently supported");
877         return OMPI_ERR_NOT_SUPPORTED;
878     }
879 
880     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
881     if (OMPI_SUCCESS != ret) {
882         return ret;
883     }
884     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
885     if (OMPI_SUCCESS != ret) {
886         return ret;
887     }
888     ompi_datatype_type_size(origin_datatype, &size);
889     length = size * origin_count;
890 
891     md.start = module->origin_iovec_list;
892     md.length = iovec_count;
893     if (user_ptr) {
894         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
895     } else {
896         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
897     }
898     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
899     md.ct_handle = module->ct_h;
900     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
901     if (PTL_OK != ret) {
902         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
903                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
904                             __FILE__, __LINE__, ret);
905         return ret;
906     }
907 
908     ret = segmentedAtomic(&module->opcount,
909                           module->origin_iovec_md_h,
910                           (ptl_size_t) origin_lb,
911                           length,
912                           module->atomic_max,
913                           peer,
914                           module->pt_idx,
915                           module->match_bits,
916                           offset + target_lb,
917                           user_ptr,
918                           ptl_op,
919                           ptl_dt);
920     if (OMPI_SUCCESS != ret) {
921         return ret;
922     }
923 
924     return OMPI_SUCCESS;
925 }
926 
927 /* perform atomic operation on iovec local and contiguous remote */
928 static int
swap_to_iovec(ompi_osc_portals4_module_t * module,const void * result_address,int result_count,ompi_datatype_t * result_datatype,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)929 swap_to_iovec(ompi_osc_portals4_module_t *module,
930               const void       *result_address,
931               int               result_count,
932               ompi_datatype_t  *result_datatype,
933               const void       *origin_address,
934               int               origin_count,
935               ompi_datatype_t  *origin_datatype,
936               ptl_process_t     peer,
937               int               target_count,
938               ompi_datatype_t  *target_datatype,
939               size_t            offset,
940               ptl_pt_index_t    pt_index,
941               ptl_match_bits_t  match_bits,
942               void             *user_ptr)
943 {
944     int ret;
945     size_t size;
946     ptl_size_t iovec_count=0;
947     ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
948     ptl_md_t md;
949     ptl_datatype_t ptl_dt;
950 
951     if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
952         PtlMDRelease(module->result_iovec_md_h);
953         free(module->result_iovec_list);
954         module->result_iovec_md_h = PTL_INVALID_HANDLE;
955         module->result_iovec_list = NULL;
956     }
957 
958     create_iov_list(
959         result_address,
960         result_count,
961         result_datatype,
962         &module->result_iovec_list,
963         &iovec_count);
964 
965     md.start = module->result_iovec_list;
966     md.length = iovec_count;
967     if (user_ptr) {
968         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
969     } else {
970         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
971     }
972     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
973     md.ct_handle = module->ct_h;
974     ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
975     if (PTL_OK != ret) {
976         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
977                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
978                             __FILE__, __LINE__, ret);
979         return ret;
980     }
981 
982     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
983         PtlMDRelease(module->origin_iovec_md_h);
984         free(module->origin_iovec_list);
985         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
986         module->origin_iovec_list = NULL;
987     }
988 
989     create_iov_list(
990         origin_address,
991         origin_count,
992         origin_datatype,
993         &module->origin_iovec_list,
994         &iovec_count);
995 
996     md.start = module->origin_iovec_list;
997     md.length = iovec_count;
998     md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
999     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1000     md.ct_handle = module->ct_h;
1001     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
1002     if (PTL_OK != ret) {
1003         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1004                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
1005                             __FILE__, __LINE__, ret);
1006         return ret;
1007     }
1008 
1009     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1010     if (OMPI_SUCCESS != ret) {
1011         opal_output(ompi_osc_base_framework.framework_output,
1012                 "datatype is not currently supported");
1013         return OMPI_ERR_NOT_SUPPORTED;
1014     }
1015 
1016     ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1017     if (OMPI_SUCCESS != ret) {
1018         return ret;
1019     }
1020     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1021     if (OMPI_SUCCESS != ret) {
1022         return ret;
1023     }
1024     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1025     if (OMPI_SUCCESS != ret) {
1026         return ret;
1027     }
1028     ompi_datatype_type_size(origin_datatype, &size);
1029     length = size * origin_count;
1030 
1031     ret = segmentedSwap(&module->opcount,
1032                         module->result_iovec_md_h,
1033                         (ptl_size_t) result_lb,
1034                         module->origin_iovec_md_h,
1035                         (ptl_size_t) origin_lb,
1036                         length,
1037                         module->fetch_atomic_max,
1038                         peer,
1039                         module->pt_idx,
1040                         module->match_bits,
1041                         offset + target_lb,
1042                         user_ptr,
1043                         ptl_dt);
1044     if (OMPI_SUCCESS != ret) {
1045         return ret;
1046     }
1047 
1048     return OMPI_SUCCESS;
1049 }
1050 
1051 /* perform fetch atomic operation on iovec local and contiguous remote */
1052 static int
fetch_atomic_to_iovec(ompi_osc_portals4_module_t * module,const void * result_address,int result_count,ompi_datatype_t * result_datatype,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)1053 fetch_atomic_to_iovec(ompi_osc_portals4_module_t *module,
1054                       const void       *result_address,
1055                       int               result_count,
1056                       ompi_datatype_t  *result_datatype,
1057                       const void       *origin_address,
1058                       int               origin_count,
1059                       ompi_datatype_t  *origin_datatype,
1060                       ptl_process_t     peer,
1061                       int               target_count,
1062                       ompi_datatype_t  *target_datatype,
1063                       size_t            offset,
1064                       ptl_pt_index_t    pt_index,
1065                       ptl_match_bits_t  match_bits,
1066                       struct ompi_op_t *op,
1067                       void             *user_ptr)
1068 {
1069     int ret;
1070     size_t size;
1071     ptl_size_t iovec_count=0;
1072     ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
1073     ptl_md_t md;
1074     ptl_op_t ptl_op;
1075     ptl_datatype_t ptl_dt;
1076 
1077     if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
1078         PtlMDRelease(module->result_iovec_md_h);
1079         free(module->result_iovec_list);
1080         module->result_iovec_md_h = PTL_INVALID_HANDLE;
1081         module->result_iovec_list = NULL;
1082     }
1083 
1084     create_iov_list(
1085         result_address,
1086         result_count,
1087         result_datatype,
1088         &module->result_iovec_list,
1089         &iovec_count);
1090 
1091     md.start = module->result_iovec_list;
1092     md.length = iovec_count;
1093     if (user_ptr) {
1094         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1095     } else {
1096         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1097     }
1098     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1099     md.ct_handle = module->ct_h;
1100     ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
1101     if (PTL_OK != ret) {
1102         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1103                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
1104                             __FILE__, __LINE__, ret);
1105         return ret;
1106     }
1107 
1108     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
1109         PtlMDRelease(module->origin_iovec_md_h);
1110         free(module->origin_iovec_list);
1111         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
1112         module->origin_iovec_list = NULL;
1113     }
1114 
1115     create_iov_list(
1116         origin_address,
1117         origin_count,
1118         origin_datatype,
1119         &module->origin_iovec_list,
1120         &iovec_count);
1121 
1122     md.start = module->origin_iovec_list;
1123     md.length = iovec_count;
1124     md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1125     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1126     md.ct_handle = module->ct_h;
1127     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
1128     if (PTL_OK != ret) {
1129         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1130                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
1131                             __FILE__, __LINE__, ret);
1132         return ret;
1133     }
1134 
1135     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1136     if (OMPI_SUCCESS != ret) {
1137         opal_output(ompi_osc_base_framework.framework_output,
1138                 "datatype is not currently supported");
1139         return OMPI_ERR_NOT_SUPPORTED;
1140     }
1141     ret = ompi_osc_portals4_get_op(op, &ptl_op);
1142     if (OMPI_SUCCESS != ret) {
1143         opal_output(ompi_osc_base_framework.framework_output,
1144                 "operation is not currently supported");
1145         return OMPI_ERR_NOT_SUPPORTED;
1146     }
1147 
1148     ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1149     if (OMPI_SUCCESS != ret) {
1150         return ret;
1151     }
1152     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1153     if (OMPI_SUCCESS != ret) {
1154         return ret;
1155     }
1156     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1157     if (OMPI_SUCCESS != ret) {
1158         return ret;
1159     }
1160     ompi_datatype_type_size(origin_datatype, &size);
1161     length = size * origin_count;
1162 
1163     ret = segmentedFetchAtomic(&module->opcount,
1164                                module->result_iovec_md_h,
1165                                (ptl_size_t) result_lb,
1166                                module->origin_iovec_md_h,
1167                                (ptl_size_t) origin_lb,
1168                                length,
1169                                module->fetch_atomic_max,
1170                                peer,
1171                                module->pt_idx,
1172                                module->match_bits,
1173                                offset + target_lb,
1174                                user_ptr,
1175                                ptl_op,
1176                                ptl_dt);
1177     if (OMPI_SUCCESS != ret) {
1178         return ret;
1179     }
1180 
1181     return OMPI_SUCCESS;
1182 }
1183 
1184 /*
1185  * Derived from ompi_osc_rdma_master_noncontig()
1186  */
1187 
1188 /* put in the largest chunks possible given the noncontiguous restriction */
1189 static int
put_to_noncontig(int64_t * opcount,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1190 put_to_noncontig(int64_t          *opcount,
1191                  ptl_handle_md_t   md_h,
1192                  const void       *origin_address,
1193                  int               origin_count,
1194                  ompi_datatype_t  *origin_datatype,
1195                  ptl_process_t     peer,
1196                  int               target_count,
1197                  ompi_datatype_t  *target_datatype,
1198                  size_t            offset,
1199                  ptl_pt_index_t    pt_index,
1200                  ptl_match_bits_t  match_bits,
1201                  void             *user_ptr)
1202 {
1203     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1204     opal_convertor_t origin_convertor, target_convertor;
1205     uint32_t origin_iov_count, target_iov_count;
1206     uint32_t origin_iov_index, target_iov_index;
1207     /* needed for opal_convertor_raw but not used */
1208     size_t origin_size, target_size, rdma_len;
1209     size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1210     int ret;
1211     bool done;
1212 
1213     /* prepare convertors for the source and target. these convertors will be used to determine the
1214      * contiguous segments within the source and target. */
1215     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1216     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1217                                                     (void*)origin_address, 0, &origin_convertor);
1218     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1219         return ret;
1220     }
1221 
1222     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1223     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1224                                                     (void *)NULL, 0, &target_convertor);
1225     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1226         return ret;
1227     }
1228 
1229     origin_iov_index = 0;
1230     origin_iov_count = 0;
1231 
1232     do {
1233         /* decode segments of the remote data */
1234         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1235         target_iov_index = 0;
1236 
1237         /* opal_convertor_raw returns done when it has reached the end of the data */
1238         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1239 
1240         /* loop on the target segments until we have exhaused the decoded source data */
1241         while (target_iov_index != target_iov_count) {
1242             if (origin_iov_index == origin_iov_count) {
1243                 /* decode segments of the target buffer */
1244                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1245                 origin_iov_index = 0;
1246                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1247             }
1248 
1249             /* we already checked that the target was large enough. this should be impossible */
1250             assert (0 != origin_iov_count);
1251 
1252             /* determine how much to transfer in this operation */
1253             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1254 
1255             opal_atomic_add_64(opcount, 1);
1256 
1257             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1258                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1259                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1260                              (unsigned long) target_iovec[target_iov_index].iov_len));
1261 
1262             ret = PtlPut(md_h,
1263                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1264                          rdma_len,
1265                          PTL_ACK_REQ,
1266                          peer,
1267                          pt_index,
1268                          match_bits,
1269                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1270                          user_ptr,
1271                          0);
1272             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1273                 opal_atomic_add_64(opcount, -1);
1274                 return ret;
1275             }
1276 
1277             /* adjust io vectors */
1278             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1279             target_iovec[target_iov_index].iov_len -= rdma_len;
1280             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1281             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1282 
1283             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1284             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1285         }
1286     } while (!done);
1287 
1288     /* clean up convertors */
1289     opal_convertor_cleanup (&origin_convertor);
1290     OBJ_DESTRUCT(&origin_convertor);
1291     opal_convertor_cleanup (&target_convertor);
1292     OBJ_DESTRUCT(&target_convertor);
1293 
1294     return OMPI_SUCCESS;
1295 }
1296 
1297 /* put in fragments no larger than max_atomic_size to guarantee atomic writes at the target */
1298 static int
atomic_put_to_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1299 atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
1300                         ptl_handle_md_t   md_h,
1301                         const void       *origin_address,
1302                         int               origin_count,
1303                         ompi_datatype_t  *origin_datatype,
1304                         ptl_process_t     peer,
1305                         int               target_count,
1306                         ompi_datatype_t  *target_datatype,
1307                         size_t            offset,
1308                         ptl_pt_index_t    pt_index,
1309                         ptl_match_bits_t  match_bits,
1310                         void             *user_ptr)
1311 {
1312     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1313     opal_convertor_t origin_convertor, target_convertor;
1314     uint32_t origin_iov_count, target_iov_count;
1315     uint32_t origin_iov_index, target_iov_index;
1316     /* needed for opal_convertor_raw but not used */
1317     size_t origin_size, target_size, rdma_len;
1318     size_t max_rdma_len = module->atomic_max;
1319     int ret;
1320     bool done;
1321 
1322     /* prepare convertors for the source and target. these convertors will be used to determine the
1323      * contiguous segments within the source and target. */
1324     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1325     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1326                                                     (void*)origin_address, 0, &origin_convertor);
1327     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1328         return ret;
1329     }
1330 
1331     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1332     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1333                                                     (void *)NULL, 0, &target_convertor);
1334     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1335         return ret;
1336     }
1337 
1338     origin_iov_index = 0;
1339     origin_iov_count = 0;
1340 
1341     do {
1342         /* decode segments of the remote data */
1343         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1344         target_iov_index = 0;
1345 
1346         /* opal_convertor_raw returns done when it has reached the end of the data */
1347         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1348 
1349         /* loop on the target segments until we have exhaused the decoded source data */
1350         while (target_iov_index != target_iov_count) {
1351             if (origin_iov_index == origin_iov_count) {
1352                 /* decode segments of the target buffer */
1353                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1354                 origin_iov_index = 0;
1355                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1356             }
1357 
1358             /* we already checked that the target was large enough. this should be impossible */
1359             assert (0 != origin_iov_count);
1360 
1361             /* determine how much to transfer in this operation */
1362             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1363 
1364             opal_atomic_add_64(&module->opcount, 1);
1365 
1366             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1367                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1368                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1369                              (unsigned long) target_iovec[target_iov_index].iov_len));
1370 
1371             ret = PtlPut(md_h,
1372                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1373                          rdma_len,
1374                          PTL_ACK_REQ,
1375                          peer,
1376                          pt_index,
1377                          match_bits,
1378                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1379                          user_ptr,
1380                          0);
1381             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1382                 opal_atomic_add_64(&module->opcount, -1);
1383                 return ret;
1384             }
1385 
1386             /* adjust io vectors */
1387             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1388             target_iovec[target_iov_index].iov_len -= rdma_len;
1389             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1390             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1391 
1392             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1393             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1394         }
1395     } while (!done);
1396 
1397     return OMPI_SUCCESS;
1398 }
1399 
1400 /* perform atomic operation on (non)contiguous local and noncontiguous remote */
1401 static int
atomic_to_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)1402 atomic_to_noncontig(ompi_osc_portals4_module_t *module,
1403                     ptl_handle_md_t   md_h,
1404                     const void       *origin_address,
1405                     int               origin_count,
1406                     ompi_datatype_t  *origin_datatype,
1407                     ptl_process_t     peer,
1408                     int               target_count,
1409                     ompi_datatype_t  *target_datatype,
1410                     size_t            offset,
1411                     ptl_pt_index_t    pt_index,
1412                     ptl_match_bits_t  match_bits,
1413                     struct ompi_op_t *op,
1414                     void             *user_ptr)
1415 {
1416     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1417     opal_convertor_t origin_convertor, target_convertor;
1418     uint32_t origin_iov_count, target_iov_count;
1419     uint32_t origin_iov_index, target_iov_index;
1420     ptl_op_t ptl_op;
1421     ptl_datatype_t ptl_dt;
1422     /* needed for opal_convertor_raw but not used */
1423     size_t origin_size, target_size, atomic_len;
1424     int ret;
1425     bool done;
1426 
1427     /* prepare convertors for the source and target. these convertors will be used to determine the
1428      * contiguous segments within the source and target. */
1429     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1430     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1431                                                     (void*)origin_address, 0, &origin_convertor);
1432     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1433         return ret;
1434     }
1435 
1436     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1437     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1438                                                     (void *)NULL, 0, &target_convertor);
1439     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1440         return ret;
1441     }
1442 
1443     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1444     if (OMPI_SUCCESS != ret) {
1445         opal_output(ompi_osc_base_framework.framework_output,
1446                 "datatype is not currently supported");
1447         return OMPI_ERR_NOT_SUPPORTED;
1448     }
1449     ret = ompi_osc_portals4_get_op(op, &ptl_op);
1450     if (OMPI_SUCCESS != ret) {
1451         opal_output(ompi_osc_base_framework.framework_output,
1452                 "operation is not currently supported");
1453         return OMPI_ERR_NOT_SUPPORTED;
1454     }
1455 
1456     origin_iov_index = 0;
1457     origin_iov_count = 0;
1458 
1459     do {
1460         /* decode segments of the remote data */
1461         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1462         target_iov_index = 0;
1463 
1464         /* opal_convertor_raw returns done when it has reached the end of the data */
1465         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1466 
1467         /* loop on the target segments until we have exhaused the decoded source data */
1468         while (target_iov_index != target_iov_count) {
1469             if (origin_iov_index == origin_iov_count) {
1470                 /* decode segments of the target buffer */
1471                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1472                 origin_iov_index = 0;
1473                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1474             }
1475 
1476             /* we already checked that the target was large enough. this should be impossible */
1477             assert (0 != origin_iov_count);
1478 
1479             /* determine how much to transfer in this operation */
1480             atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max);
1481 
1482             opal_atomic_add_64(&module->opcount, 1);
1483 
1484             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1485                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1486                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1487                              (unsigned long) target_iovec[target_iov_index].iov_len));
1488 
1489             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1490                          "%s,%d Atomic", __FUNCTION__, __LINE__));
1491             ret = PtlAtomic(md_h,
1492                             (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1493                             atomic_len,
1494                             PTL_ACK_REQ,
1495                             peer,
1496                             pt_index,
1497                             match_bits,
1498                             offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1499                             user_ptr,
1500                             0,
1501                             ptl_op,
1502                             ptl_dt);
1503             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1504                 opal_atomic_add_64(&module->opcount, -1);
1505                 return ret;
1506             }
1507 
1508             /* adjust io vectors */
1509             origin_iovec[origin_iov_index].iov_len -= atomic_len;
1510             target_iovec[target_iov_index].iov_len -= atomic_len;
1511             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + atomic_len);
1512             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + atomic_len);
1513 
1514             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1515             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1516         }
1517     } while (!done);
1518 
1519     return OMPI_SUCCESS;
1520 }
1521 
1522 /* get from a noncontiguous remote to an (non)contiguous local */
1523 static int
get_from_noncontig(int64_t * opcount,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1524 get_from_noncontig(int64_t          *opcount,
1525                    ptl_handle_md_t   md_h,
1526                    const void       *origin_address,
1527                    int               origin_count,
1528                    ompi_datatype_t  *origin_datatype,
1529                    ptl_process_t     peer,
1530                    int               target_count,
1531                    ompi_datatype_t  *target_datatype,
1532                    size_t            offset,
1533                    ptl_pt_index_t    pt_index,
1534                    ptl_match_bits_t  match_bits,
1535                    void             *user_ptr)
1536 {
1537     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1538     opal_convertor_t origin_convertor, target_convertor;
1539     uint32_t origin_iov_count, target_iov_count;
1540     uint32_t origin_iov_index, target_iov_index;
1541     /* needed for opal_convertor_raw but not used */
1542     size_t origin_size, target_size, rdma_len;
1543     size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1544     int ret;
1545     bool done;
1546 
1547     /* prepare convertors for the source and target. these convertors will be used to determine the
1548      * contiguous segments within the source and target. */
1549     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1550     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1551                                                     (void*)origin_address, 0, &origin_convertor);
1552     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1553         return ret;
1554     }
1555 
1556     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1557     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1558                                                     (void *)NULL, 0, &target_convertor);
1559     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1560         return ret;
1561     }
1562 
1563     origin_iov_index = 0;
1564     origin_iov_count = 0;
1565 
1566     do {
1567         /* decode segments of the remote data */
1568         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1569         target_iov_index = 0;
1570 
1571         /* opal_convertor_raw returns done when it has reached the end of the data */
1572         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1573 
1574         /* loop on the target segments until we have exhaused the decoded source data */
1575         while (target_iov_index != target_iov_count) {
1576             if (origin_iov_index == origin_iov_count) {
1577                 /* decode segments of the target buffer */
1578                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1579                 origin_iov_index = 0;
1580                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1581             }
1582 
1583             /* we already checked that the target was large enough. this should be impossible */
1584             assert (0 != origin_iov_count);
1585 
1586             /* determine how much to transfer in this operation */
1587             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1588 
1589             opal_atomic_add_64(opcount, 1);
1590 
1591             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1592                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1593                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1594                              (unsigned long) target_iovec[target_iov_index].iov_len));
1595 
1596             ret = PtlGet(md_h,
1597                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1598                          rdma_len,
1599                          peer,
1600                          pt_index,
1601                          match_bits,
1602                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1603                          user_ptr);
1604             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1605                 opal_atomic_add_64(opcount, -1);
1606                 return ret;
1607             }
1608 
1609             /* adjust io vectors */
1610             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1611             target_iovec[target_iov_index].iov_len -= rdma_len;
1612             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1613             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1614 
1615             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1616             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1617         }
1618     } while (!done);
1619 
1620     return OMPI_SUCCESS;
1621 }
1622 
1623 /* get from a noncontiguous remote to an (non)contiguous local */
1624 static int
atomic_get_from_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1625 atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
1626                           ptl_handle_md_t   md_h,
1627                           const void       *origin_address,
1628                           int               origin_count,
1629                           ompi_datatype_t  *origin_datatype,
1630                           ptl_process_t     peer,
1631                           int               target_count,
1632                           ompi_datatype_t  *target_datatype,
1633                           size_t            offset,
1634                           ptl_pt_index_t    pt_index,
1635                           ptl_match_bits_t  match_bits,
1636                           void             *user_ptr)
1637 {
1638     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1639     opal_convertor_t origin_convertor, target_convertor;
1640     uint32_t origin_iov_count, target_iov_count;
1641     uint32_t origin_iov_index, target_iov_index;
1642     /* needed for opal_convertor_raw but not used */
1643     size_t origin_size, target_size, rdma_len;
1644     size_t max_rdma_len = module->fetch_atomic_max;
1645     int ret;
1646     bool done;
1647 
1648     /* prepare convertors for the source and target. these convertors will be used to determine the
1649      * contiguous segments within the source and target. */
1650     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1651     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1652                                                     (void*)origin_address, 0, &origin_convertor);
1653     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1654         return ret;
1655     }
1656 
1657     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1658     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1659                                                     (void *)NULL, 0, &target_convertor);
1660     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1661         return ret;
1662     }
1663 
1664     origin_iov_index = 0;
1665     origin_iov_count = 0;
1666 
1667     do {
1668         /* decode segments of the remote data */
1669         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1670         target_iov_index = 0;
1671 
1672         /* opal_convertor_raw returns done when it has reached the end of the data */
1673         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1674 
1675         /* loop on the target segments until we have exhaused the decoded source data */
1676         while (target_iov_index != target_iov_count) {
1677             if (origin_iov_index == origin_iov_count) {
1678                 /* decode segments of the target buffer */
1679                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1680                 origin_iov_index = 0;
1681                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1682             }
1683 
1684             /* we already checked that the target was large enough. this should be impossible */
1685             assert (0 != origin_iov_count);
1686 
1687             /* determine how much to transfer in this operation */
1688             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1689 
1690             opal_atomic_add_64(&module->opcount, 1);
1691 
1692             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1693                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1694                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1695                              (unsigned long) target_iovec[target_iov_index].iov_len));
1696 
1697             ret = PtlGet(md_h,
1698                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1699                          rdma_len,
1700                          peer,
1701                          pt_index,
1702                          match_bits,
1703                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1704                          user_ptr);
1705             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1706                 opal_atomic_add_64(&module->opcount, -1);
1707                 return ret;
1708             }
1709 
1710             /* adjust io vectors */
1711             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1712             target_iovec[target_iov_index].iov_len -= rdma_len;
1713             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1714             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1715 
1716             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1717             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1718         }
1719     } while (!done);
1720 
1721     return OMPI_SUCCESS;
1722 }
1723 
1724 /* swap from a noncontiguous remote to an (non)contiguous local */
1725 static int
swap_from_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t result_md_h,const void * result_address,int result_count,ompi_datatype_t * result_datatype,ptl_handle_md_t origin_md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1726 swap_from_noncontig(ompi_osc_portals4_module_t *module,
1727                     ptl_handle_md_t   result_md_h,
1728                     const void       *result_address,
1729                     int               result_count,
1730                     ompi_datatype_t  *result_datatype,
1731                     ptl_handle_md_t   origin_md_h,
1732                     const void       *origin_address,
1733                     int               origin_count,
1734                     ompi_datatype_t  *origin_datatype,
1735                     ptl_process_t     peer,
1736                     int               target_count,
1737                     ompi_datatype_t  *target_datatype,
1738                     size_t            offset,
1739                     ptl_pt_index_t    pt_index,
1740                     ptl_match_bits_t  match_bits,
1741                     void             *user_ptr)
1742 {
1743     struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1744     opal_convertor_t result_convertor, origin_convertor, target_convertor;
1745     uint32_t result_iov_count, origin_iov_count, target_iov_count;
1746     uint32_t result_iov_index, origin_iov_index, target_iov_index;
1747     /* needed for opal_convertor_raw but not used */
1748     size_t result_size, origin_size, target_size, rdma_len;
1749     size_t max_rdma_len = module->fetch_atomic_max;
1750     ptl_datatype_t ptl_dt;
1751 
1752     int ret;
1753     bool done;
1754 
1755     /* prepare convertors for the result, source and target. these convertors will be used to determine the
1756      * contiguous segments within the source and target. */
1757     OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1758     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1759                                                     (void*)result_address, 0, &result_convertor);
1760     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1761         return ret;
1762     }
1763 
1764     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1765     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1766                                                     (void*)origin_address, 0, &origin_convertor);
1767     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1768         return ret;
1769     }
1770 
1771     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1772     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1773                                                     (void *)NULL, 0, &target_convertor);
1774     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1775         return ret;
1776     }
1777 
1778     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1779     if (OMPI_SUCCESS != ret) {
1780         opal_output(ompi_osc_base_framework.framework_output,
1781                 "datatype is not currently supported");
1782         return OMPI_ERR_NOT_SUPPORTED;
1783     }
1784 
1785     result_iov_index = 0;
1786     result_iov_count = 0;
1787     origin_iov_index = 0;
1788     origin_iov_count = 0;
1789 
1790     do {
1791         /* decode segments of the remote data */
1792         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1793         target_iov_index = 0;
1794 
1795         /* opal_convertor_raw returns done when it has reached the end of the data */
1796         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1797 
1798         /* loop on the target segments until we have exhaused the decoded source data */
1799         while (target_iov_index != target_iov_count) {
1800             if (result_iov_index == result_iov_count) {
1801                 /* decode segments of the target buffer */
1802                 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1803                 result_iov_index = 0;
1804                 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1805             }
1806             if (origin_iov_index == origin_iov_count) {
1807                 /* decode segments of the target buffer */
1808                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1809                 origin_iov_index = 0;
1810                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1811             }
1812 
1813             /* we already checked that the target was large enough. this should be impossible */
1814             assert (0 != result_iov_count);
1815             assert (0 != origin_iov_count);
1816 
1817             /* determine how much to transfer in this operation */
1818             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1819 
1820             opal_atomic_add_64(&module->opcount, 1);
1821 
1822             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1823                              "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1824                              result_iovec[result_iov_index].iov_base,
1825                              origin_iovec[origin_iov_index].iov_base,
1826                              target_iovec[target_iov_index].iov_base,
1827                              (unsigned long) target_iovec[target_iov_index].iov_len));
1828 
1829             ret = PtlSwap(result_md_h,
1830                           (ptl_size_t)result_iovec[result_iov_index].iov_base,
1831                           origin_md_h,
1832                           (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1833                           rdma_len,
1834                           peer,
1835                           pt_index,
1836                           match_bits,
1837                           offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1838                           user_ptr,
1839                           0,
1840                           NULL,
1841                           PTL_SWAP,
1842                           ptl_dt);
1843             if (PTL_OK != ret) {
1844                 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1845                                      "%s:%d PtlSwap failed with return value %d",
1846                                      __FUNCTION__, __LINE__, ret);
1847                 opal_atomic_add_64(&module->opcount, -1);
1848                 return ret;
1849             }
1850 
1851             /* adjust io vectors */
1852             result_iovec[result_iov_index].iov_len -= rdma_len;
1853             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1854             target_iovec[target_iov_index].iov_len -= rdma_len;
1855             result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
1856             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1857             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1858 
1859             result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
1860             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1861             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1862         }
1863     } while (!done);
1864 
1865     return OMPI_SUCCESS;
1866 }
1867 
1868 /* swap from a noncontiguous remote to an (non)contiguous local */
1869 static int
fetch_atomic_from_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t result_md_h,const void * result_address,int result_count,ompi_datatype_t * result_datatype,ptl_handle_md_t origin_md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)1870 fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
1871                             ptl_handle_md_t   result_md_h,
1872                             const void       *result_address,
1873                             int               result_count,
1874                             ompi_datatype_t  *result_datatype,
1875                             ptl_handle_md_t   origin_md_h,
1876                             const void       *origin_address,
1877                             int               origin_count,
1878                             ompi_datatype_t  *origin_datatype,
1879                             ptl_process_t     peer,
1880                             int               target_count,
1881                             ompi_datatype_t  *target_datatype,
1882                             size_t            offset,
1883                             ptl_pt_index_t    pt_index,
1884                             ptl_match_bits_t  match_bits,
1885                             struct ompi_op_t *op,
1886                             void             *user_ptr)
1887 {
1888     struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1889     opal_convertor_t result_convertor, origin_convertor, target_convertor;
1890     uint32_t result_iov_count, origin_iov_count, target_iov_count;
1891     uint32_t result_iov_index, origin_iov_index, target_iov_index;
1892     /* needed for opal_convertor_raw but not used */
1893     size_t result_size, origin_size, target_size, rdma_len;
1894     size_t max_rdma_len = module->fetch_atomic_max;
1895     ptl_op_t ptl_op;
1896     ptl_datatype_t ptl_dt;
1897 
1898     int ret;
1899     bool done;
1900 
1901     /* prepare convertors for the result, source and target. these convertors will be used to determine the
1902      * contiguous segments within the source and target. */
1903     OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1904     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1905                                                     (void*)result_address, 0, &result_convertor);
1906     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1907         return ret;
1908     }
1909 
1910     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1911     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1912                                                     (void*)origin_address, 0, &origin_convertor);
1913     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1914         return ret;
1915     }
1916 
1917     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1918     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1919                                                     (void *)NULL, 0, &target_convertor);
1920     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1921         return ret;
1922     }
1923 
1924     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1925     if (OMPI_SUCCESS != ret) {
1926         opal_output(ompi_osc_base_framework.framework_output,
1927                 "datatype is not currently supported");
1928         return OMPI_ERR_NOT_SUPPORTED;
1929     }
1930     ret = ompi_osc_portals4_get_op(op, &ptl_op);
1931     if (OMPI_SUCCESS != ret) {
1932         opal_output(ompi_osc_base_framework.framework_output,
1933                 "operation is not currently supported");
1934         return OMPI_ERR_NOT_SUPPORTED;
1935     }
1936 
1937     result_iov_index = 0;
1938     result_iov_count = 0;
1939     origin_iov_index = 0;
1940     origin_iov_count = 0;
1941 
1942     do {
1943         /* decode segments of the remote data */
1944         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1945         target_iov_index = 0;
1946 
1947         /* opal_convertor_raw returns done when it has reached the end of the data */
1948         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1949 
1950         /* loop on the target segments until we have exhaused the decoded source data */
1951         while (target_iov_index != target_iov_count) {
1952             if (result_iov_index == result_iov_count) {
1953                 /* decode segments of the target buffer */
1954                 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1955                 result_iov_index = 0;
1956                 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1957             }
1958             if (origin_iov_index == origin_iov_count) {
1959                 /* decode segments of the target buffer */
1960                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1961                 origin_iov_index = 0;
1962                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1963             }
1964 
1965             /* we already checked that the target was large enough. this should be impossible */
1966             assert (0 != result_iov_count);
1967             assert (0 != origin_iov_count);
1968 
1969             /* determine how much to transfer in this operation */
1970             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1971 
1972             opal_atomic_add_64(&module->opcount, 1);
1973 
1974             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1975                              "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1976                              result_iovec[result_iov_index].iov_base,
1977                              origin_iovec[origin_iov_index].iov_base,
1978                              target_iovec[target_iov_index].iov_base,
1979                              (unsigned long) target_iovec[target_iov_index].iov_len));
1980 
1981             ret = PtlFetchAtomic(result_md_h,
1982                                  (ptl_size_t)result_iovec[result_iov_index].iov_base,
1983                                  origin_md_h,
1984                                  (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1985                                  rdma_len,
1986                                  peer,
1987                                  pt_index,
1988                                  match_bits,
1989                                  offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1990                                  user_ptr,
1991                                  0,
1992                                  ptl_op,
1993                                  ptl_dt);
1994             if (PTL_OK != ret) {
1995                 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1996                                      "%s:%d PtlFetchAtomic failed with return value %d",
1997                                      __FUNCTION__, __LINE__, ret);
1998                 opal_atomic_add_64(&module->opcount, -1);
1999                 return ret;
2000             }
2001 
2002             /* adjust io vectors */
2003             result_iovec[result_iov_index].iov_len -= rdma_len;
2004             origin_iovec[origin_iov_index].iov_len -= rdma_len;
2005             target_iovec[target_iov_index].iov_len -= rdma_len;
2006             result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
2007             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
2008             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
2009 
2010             result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
2011             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
2012             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
2013         }
2014     } while (!done);
2015 
2016     return OMPI_SUCCESS;
2017 }
2018 
2019 int
ompi_osc_portals4_rput(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2020 ompi_osc_portals4_rput(const void *origin_addr,
2021                        int origin_count,
2022                        struct ompi_datatype_t *origin_dt,
2023                        int target,
2024                        ptrdiff_t target_disp,
2025                        int target_count,
2026                        struct ompi_datatype_t *target_dt,
2027                        struct ompi_win_t *win,
2028                        struct ompi_request_t **ompi_req)
2029 {
2030     int ret;
2031     ompi_osc_portals4_request_t *request;
2032     ompi_osc_portals4_module_t *module =
2033         (ompi_osc_portals4_module_t*) win->w_osc_module;
2034     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2035     size_t size, offset;
2036     ptrdiff_t length, origin_lb, target_lb, extent;
2037 
2038     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2039                          "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2040                          (unsigned long) origin_addr, origin_count,
2041                          origin_dt->name, target, (unsigned long) target_disp,
2042                          target_count, target_dt->name,
2043                          (unsigned long) win));
2044 
2045     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2046     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2047     *ompi_req = &request->super;
2048 
2049     offset = get_displacement(module, target) * target_disp;
2050 
2051     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2052         ret = put_to_noncontig(&module->opcount,
2053                                module->req_md_h,
2054                                origin_addr,
2055                                origin_count,
2056                                origin_dt,
2057                                peer,
2058                                target_count,
2059                                target_dt,
2060                                offset,
2061                                module->pt_idx,
2062                                module->match_bits,
2063                                request);
2064         if (PTL_OK != ret) {
2065             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2066             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2067                          "%s,%d put_to_noncontig() failed: ret = %d",
2068                          __FUNCTION__, __LINE__, ret));
2069             return ret;
2070         }
2071     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2072         ret = put_from_iovec(module,
2073                              origin_addr,
2074                              origin_count,
2075                              origin_dt,
2076                              peer,
2077                              target_count,
2078                              target_dt,
2079                              offset,
2080                              module->pt_idx,
2081                              module->match_bits,
2082                              request);
2083         if (PTL_OK != ret) {
2084             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2085             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2086                          "%s,%d put_from_iovec() failed: ret = %d",
2087                          __FUNCTION__, __LINE__, ret));
2088             return ret;
2089         }
2090     } else {
2091         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2092         if (OMPI_SUCCESS != ret) {
2093             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2094             return ret;
2095         }
2096         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2097         if (OMPI_SUCCESS != ret) {
2098             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2099             return ret;
2100         }
2101         ompi_datatype_type_size(origin_dt, &size);
2102         length = size * origin_count;
2103 
2104         request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2105 
2106         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2107                      "%s,%d RPut(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2108                      __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2109         ret = segmentedPut(&module->opcount,
2110                            module->req_md_h,
2111                            (ptl_size_t) origin_addr + origin_lb,
2112                            length,
2113                            mca_osc_portals4_component.ptl_max_msg_size,
2114                            PTL_ACK_REQ,
2115                            peer,
2116                            module->pt_idx,
2117                            module->match_bits,
2118                            offset + target_lb,
2119                            request,
2120                            0);
2121         if (OMPI_SUCCESS != ret) {
2122             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2123             return ret;
2124         }
2125     }
2126 
2127     return OMPI_SUCCESS;
2128 }
2129 
2130 
2131 int
ompi_osc_portals4_rget(void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2132 ompi_osc_portals4_rget(void *origin_addr,
2133                        int origin_count,
2134                        struct ompi_datatype_t *origin_dt,
2135                        int target,
2136                        ptrdiff_t target_disp,
2137                        int target_count,
2138                        struct ompi_datatype_t *target_dt,
2139                        struct ompi_win_t *win,
2140                        struct ompi_request_t **ompi_req)
2141 {
2142     int ret;
2143     ompi_osc_portals4_request_t *request;
2144     ompi_osc_portals4_module_t *module =
2145         (ompi_osc_portals4_module_t*) win->w_osc_module;
2146     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2147     size_t offset, size;
2148     ptrdiff_t length, origin_lb, target_lb, extent;
2149 
2150     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2151                          "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2152                          (unsigned long) origin_addr, origin_count,
2153                          origin_dt->name, target, (unsigned long) target_disp,
2154                          target_count, target_dt->name,
2155                          (unsigned long) win));
2156 
2157     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2158     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2159     *ompi_req = &request->super;
2160 
2161     offset = get_displacement(module, target) * target_disp;
2162 
2163     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2164         ret = get_from_noncontig(&module->opcount,
2165                                  module->req_md_h,
2166                                  origin_addr,
2167                                  origin_count,
2168                                  origin_dt,
2169                                  peer,
2170                                  target_count,
2171                                  target_dt,
2172                                  offset,
2173                                  module->pt_idx,
2174                                  module->match_bits,
2175                                  request);
2176         if (PTL_OK != ret) {
2177             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2178                          "%s,%d get_from_noncontig() failed: ret = %d",
2179                          __FUNCTION__, __LINE__, ret));
2180             return ret;
2181         }
2182     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2183         ret = get_to_iovec(module,
2184                            origin_addr,
2185                            origin_count,
2186                            origin_dt,
2187                            peer,
2188                            target_count,
2189                            target_dt,
2190                            offset,
2191                            module->pt_idx,
2192                            module->match_bits,
2193                            request);
2194         if (PTL_OK != ret) {
2195             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2196                          "%s,%d get_to_iovec() failed: ret = %d",
2197                          __FUNCTION__, __LINE__, ret));
2198             return ret;
2199         }
2200     } else {
2201         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2202         if (OMPI_SUCCESS != ret) {
2203             return ret;
2204         }
2205         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2206         if (OMPI_SUCCESS != ret) {
2207             return ret;
2208         }
2209         ompi_datatype_type_size(origin_dt, &size);
2210         length = size * origin_count;
2211 
2212         request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2213 
2214         OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2215                               "%s,%d RGet", __FUNCTION__, __LINE__));
2216         ret = segmentedGet(&module->opcount,
2217                            module->req_md_h,
2218                            (ptl_size_t) origin_addr + origin_lb,
2219                            length,
2220                            mca_osc_portals4_component.ptl_max_msg_size,
2221                            peer,
2222                            module->pt_idx,
2223                            module->match_bits,
2224                            offset + target_lb,
2225                            request);
2226         if (OMPI_SUCCESS != ret) {
2227             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2228             return ret;
2229         }
2230     }
2231 
2232     return OMPI_SUCCESS;
2233 }
2234 
2235 
2236 int
ompi_osc_portals4_raccumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2237 ompi_osc_portals4_raccumulate(const void *origin_addr,
2238                               int origin_count,
2239                               struct ompi_datatype_t *origin_dt,
2240                               int target,
2241                               ptrdiff_t target_disp,
2242                               int target_count,
2243                               struct ompi_datatype_t *target_dt,
2244                               struct ompi_op_t *op,
2245                               struct ompi_win_t *win,
2246                               struct ompi_request_t **ompi_req)
2247 {
2248     int ret;
2249     ompi_osc_portals4_request_t *request;
2250     ompi_osc_portals4_module_t *module =
2251         (ompi_osc_portals4_module_t*) win->w_osc_module;
2252     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2253     size_t offset, size;
2254     ptl_op_t ptl_op;
2255     ptl_datatype_t ptl_dt;
2256     ptrdiff_t sent, length, origin_lb, target_lb, extent;
2257 
2258     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2259                          "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx",
2260                          (unsigned long) origin_addr, origin_count,
2261                          origin_dt->name, target, (unsigned long) target_disp,
2262                          target_count, target_dt->name,
2263                          op->o_name,
2264                          (unsigned long) win));
2265 
2266     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2267     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2268     *ompi_req = &request->super;
2269 
2270     offset = get_displacement(module, target) * target_disp;
2271 
2272     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2273         if (MPI_REPLACE == op) {
2274             ret = atomic_put_to_noncontig(module,
2275                                           module->req_md_h,
2276                                           origin_addr,
2277                                           origin_count,
2278                                           origin_dt,
2279                                           peer,
2280                                           target_count,
2281                                           target_dt,
2282                                           offset,
2283                                           module->pt_idx,
2284                                           module->match_bits,
2285                                           request);
2286             if (PTL_OK != ret) {
2287                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2288                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2289                     "%s,%d atomic_put_to_noncontig() failed: ret = %d",
2290                     __FUNCTION__, __LINE__, ret));
2291                 return ret;
2292             }
2293         } else {
2294             ret = atomic_to_noncontig(module,
2295                                       module->req_md_h,
2296                                       origin_addr,
2297                                       origin_count,
2298                                       origin_dt,
2299                                       peer,
2300                                       target_count,
2301                                       target_dt,
2302                                       offset,
2303                                       module->pt_idx,
2304                                       module->match_bits,
2305                                       op,
2306                                       request);
2307             if (PTL_OK != ret) {
2308                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2309                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2310                     "%s,%d atomic_to_noncontig() failed: ret = %d",
2311                     __FUNCTION__, __LINE__, ret));
2312                 return ret;
2313             }
2314         }
2315     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2316         if (MPI_REPLACE == op) {
2317             ret = atomic_put_from_iovec(module,
2318                                         origin_addr,
2319                                         origin_count,
2320                                         origin_dt,
2321                                         peer,
2322                                         target_count,
2323                                         target_dt,
2324                                         offset,
2325                                         module->pt_idx,
2326                                         module->match_bits,
2327                                         request);
2328             if (PTL_OK != ret) {
2329                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2330                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2331                     "%s,%d atomic_put_from_iovec() failed: ret = %d",
2332                     __FUNCTION__, __LINE__, ret));
2333                 return ret;
2334             }
2335         } else {
2336             ret = atomic_from_iovec(module,
2337                                     origin_addr,
2338                                     origin_count,
2339                                     origin_dt,
2340                                     peer,
2341                                     target_count,
2342                                     target_dt,
2343                                     offset,
2344                                     module->pt_idx,
2345                                     module->match_bits,
2346                                     op,
2347                                     request);
2348             if (PTL_OK != ret) {
2349                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2350                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2351                     "%s,%d atomic_from_iovec() failed: ret = %d",
2352                     __FUNCTION__, __LINE__, ret));
2353                 return ret;
2354             }
2355         }
2356     } else {
2357         ptl_size_t md_offset;
2358 
2359         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2360         if (OMPI_SUCCESS != ret) {
2361             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2362             return ret;
2363         }
2364         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2365         if (OMPI_SUCCESS != ret) {
2366             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2367             return ret;
2368         }
2369         ompi_datatype_type_size(origin_dt, &size);
2370         length = size * origin_count;
2371         sent = 0;
2372 
2373         md_offset = (ptl_size_t) origin_addr;
2374 
2375         request->ops_expected += number_of_fragments(length, module->atomic_max);
2376 
2377         if (MPI_REPLACE == op) {
2378             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2379                                  "%s,%d Put", __FUNCTION__, __LINE__));
2380             ret = segmentedPut(&module->opcount,
2381                                module->req_md_h,
2382                                md_offset + origin_lb,
2383                                length,
2384                                module->atomic_max,
2385                                PTL_ACK_REQ,
2386                                peer,
2387                                module->pt_idx,
2388                                module->match_bits,
2389                                offset + target_lb,
2390                                request,
2391                                0);
2392             if (OMPI_SUCCESS != ret) {
2393                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2394                 return ret;
2395             }
2396         } else {
2397             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2398             if (OMPI_SUCCESS != ret) {
2399                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2400                 opal_output(ompi_osc_base_framework.framework_output,
2401                         "datatype is not currently supported");
2402                 return OMPI_ERR_NOT_SUPPORTED;
2403             }
2404             ret = ompi_osc_portals4_get_op(op, &ptl_op);
2405             if (OMPI_SUCCESS != ret) {
2406                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2407                 opal_output(ompi_osc_base_framework.framework_output,
2408                         "operation is not currently supported");
2409                 return OMPI_ERR_NOT_SUPPORTED;
2410             }
2411             do {
2412                 size_t msg_length = MIN(module->atomic_max, length - sent);
2413 
2414                 (void)opal_atomic_add_64(&module->opcount, 1);
2415 
2416                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2417                              "%s,%d Atomic", __FUNCTION__, __LINE__));
2418                 ret = PtlAtomic(module->req_md_h,
2419                                 md_offset + sent + origin_lb,
2420                                 msg_length,
2421                                 PTL_ACK_REQ,
2422                                 peer,
2423                                 module->pt_idx,
2424                                 module->match_bits,
2425                                 offset + sent + target_lb,
2426                                 request,
2427                                 0,
2428                                 ptl_op,
2429                                 ptl_dt);
2430                 if (OMPI_SUCCESS != ret) {
2431                     (void)opal_atomic_add_64(&module->opcount, -1);
2432                     OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2433                     return ret;
2434                 }
2435                 sent += msg_length;
2436             } while (sent < length);
2437         }
2438     }
2439 
2440     return OMPI_SUCCESS;
2441 }
2442 
2443 
2444 int
ompi_osc_portals4_rget_accumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,void * result_addr,int result_count,struct ompi_datatype_t * result_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2445 ompi_osc_portals4_rget_accumulate(const void *origin_addr,
2446                                   int origin_count,
2447                                   struct ompi_datatype_t *origin_dt,
2448                                   void *result_addr,
2449                                   int result_count,
2450                                   struct ompi_datatype_t *result_dt,
2451                                   int target,
2452                                   ptrdiff_t target_disp,
2453                                   int target_count,
2454                                   struct ompi_datatype_t *target_dt,
2455                                   struct ompi_op_t *op,
2456                                   struct ompi_win_t *win,
2457                                   struct ompi_request_t **ompi_req)
2458 {
2459     int ret;
2460     ompi_osc_portals4_request_t *request;
2461     ompi_osc_portals4_module_t *module =
2462         (ompi_osc_portals4_module_t*) win->w_osc_module;
2463     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2464     size_t target_offset, size;
2465     ptl_op_t ptl_op;
2466     ptl_datatype_t ptl_dt;
2467     ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
2468 
2469     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2470                          "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
2471                          (unsigned long) origin_addr, origin_count,
2472                          origin_dt->name, (unsigned long) result_addr,
2473                          result_count, result_dt->name,
2474                          target, (unsigned long) target_disp,
2475                          target_count, target_dt->name,
2476                          op->o_name,
2477                          (unsigned long) win));
2478 
2479     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2480     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2481     *ompi_req = &request->super;
2482 
2483     target_offset = get_displacement(module, target) * target_disp;
2484 
2485     if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2486         if (MPI_REPLACE == op) {
2487             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2488                                  "rget_accumulate: MPI_REPLACE  non-contiguous target"));
2489             ret = swap_from_noncontig(module,
2490                                       module->req_md_h,
2491                                       result_addr,
2492                                       result_count,
2493                                       result_dt,
2494                                       module->md_h,
2495                                       origin_addr,
2496                                       origin_count,
2497                                       origin_dt,
2498                                       peer,
2499                                       target_count,
2500                                       target_dt,
2501                                       target_offset,
2502                                       module->pt_idx,
2503                                       module->match_bits,
2504                                       request);
2505             if (PTL_OK != ret) {
2506                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2507                              "%s,%d swap_from_noncontig() failed: ret = %d",
2508                              __FUNCTION__, __LINE__, ret));
2509                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2510                 return ret;
2511             }
2512         } else if (MPI_NO_OP == op) {
2513             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2514                                  "rget_accumulate: MPI_NO_OP  non-contiguous target"));
2515             ret = atomic_get_from_noncontig(module,
2516                                             module->req_md_h,
2517                                             result_addr,
2518                                             result_count,
2519                                             result_dt,
2520                                             peer,
2521                                             target_count,
2522                                             target_dt,
2523                                             target_offset,
2524                                             module->pt_idx,
2525                                             module->match_bits,
2526                                             request);
2527             if (PTL_OK != ret) {
2528                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2529                              "%s,%d atomic_get_from_noncontig() failed: ret = %d",
2530                              __FUNCTION__, __LINE__, ret));
2531                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2532                 return ret;
2533             }
2534         } else {
2535             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2536                                  "rget_accumulate: other-op  non-contiguous target"));
2537             ret = fetch_atomic_from_noncontig(module,
2538                                               module->req_md_h,
2539                                               result_addr,
2540                                               result_count,
2541                                               result_dt,
2542                                               module->md_h,
2543                                               origin_addr,
2544                                               origin_count,
2545                                               origin_dt,
2546                                               peer,
2547                                               target_count,
2548                                               target_dt,
2549                                               target_offset,
2550                                               module->pt_idx,
2551                                               module->match_bits,
2552                                               op,
2553                                               request);
2554             if (PTL_OK != ret) {
2555                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2556                     "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
2557                     __FUNCTION__, __LINE__, ret));
2558                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2559                 return ret;
2560             }
2561         }
2562     } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
2563                (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
2564         if (MPI_REPLACE == op) {
2565             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2566                                  "rget_accumulate: MPI_REPLACE  non-contiguous origin/result"));
2567             ret = swap_to_iovec(module,
2568                                 result_addr,
2569                                 result_count,
2570                                 result_dt,
2571                                 origin_addr,
2572                                 origin_count,
2573                                 origin_dt,
2574                                 peer,
2575                                 target_count,
2576                                 target_dt,
2577                                 target_offset,
2578                                 module->pt_idx,
2579                                 module->match_bits,
2580                                 request);
2581             if (PTL_OK != ret) {
2582                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2583                              "%s,%d swap_to_iovec() failed: ret = %d",
2584                              __FUNCTION__, __LINE__, ret));
2585                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2586                 return ret;
2587             }
2588         } else if (MPI_NO_OP == op) {
2589             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2590                                  "rget_accumulate: MPI_NO_OP  non-contiguous origin/result"));
2591             ret = atomic_get_to_iovec(module,
2592                                       result_addr,
2593                                       result_count,
2594                                       result_dt,
2595                                       peer,
2596                                       target_count,
2597                                       target_dt,
2598                                       target_offset,
2599                                       module->pt_idx,
2600                                       module->match_bits,
2601                                       request);
2602             if (PTL_OK != ret) {
2603                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2604                              "%s,%d atomic_get_to_iovec() failed: ret = %d",
2605                              __FUNCTION__, __LINE__, ret));
2606                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2607                 return ret;
2608             }
2609         } else {
2610             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2611                                  "rget_accumulate: other-op  non-contiguous origin/result"));
2612             ret = fetch_atomic_to_iovec(module,
2613                                         result_addr,
2614                                         result_count,
2615                                         result_dt,
2616                                         origin_addr,
2617                                         origin_count,
2618                                         origin_dt,
2619                                         peer,
2620                                         target_count,
2621                                         target_dt,
2622                                         target_offset,
2623                                         module->pt_idx,
2624                                         module->match_bits,
2625                                         op,
2626                                         request);
2627             if (PTL_OK != ret) {
2628                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2629                              "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
2630                              __FUNCTION__, __LINE__, ret));
2631                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2632                 return ret;
2633             }
2634         }
2635     } else {
2636         if (MPI_REPLACE == op) {
2637             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2638                                  "rget_accumulate: MPI_REPLACE  contiguous"));
2639             ptl_size_t result_md_offset, origin_md_offset;
2640 
2641             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2642             if (OMPI_SUCCESS != ret) {
2643                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2644                 return ret;
2645             }
2646             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2647             if (OMPI_SUCCESS != ret) {
2648                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2649                 return ret;
2650             }
2651             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2652             if (OMPI_SUCCESS != ret) {
2653                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2654                 return ret;
2655             }
2656             ompi_datatype_type_size(origin_dt, &size);
2657             length = size * origin_count;
2658 
2659             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2660             if (OMPI_SUCCESS != ret) {
2661                 opal_output(ompi_osc_base_framework.framework_output,
2662                         "datatype is not currently supported");
2663                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2664                 return OMPI_ERR_NOT_SUPPORTED;
2665             }
2666 
2667             result_md_offset = (ptl_size_t) result_addr;
2668             origin_md_offset = (ptl_size_t) origin_addr;
2669 
2670             request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2671 
2672             ret = segmentedSwap(&module->opcount,
2673                                 module->req_md_h,
2674                                 result_md_offset + result_lb,
2675                                 module->md_h,
2676                                 origin_md_offset + origin_lb,
2677                                 length,
2678                                 module->fetch_atomic_max,
2679                                 peer,
2680                                 module->pt_idx,
2681                                 module->match_bits,
2682                                 target_offset + target_lb,
2683                                 request,
2684                                 ptl_dt);
2685             if (OMPI_SUCCESS != ret) {
2686                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2687                 return ret;
2688             }
2689         } else if (MPI_NO_OP == op) {
2690             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2691                                  "rget_accumulate: MPI_NO_OP  contiguous"));
2692             ptl_size_t md_offset;
2693 
2694             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2695             if (OMPI_SUCCESS != ret) {
2696                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2697                 return ret;
2698             }
2699             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2700             if (OMPI_SUCCESS != ret) {
2701                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2702                 return ret;
2703             }
2704             ompi_datatype_type_size(target_dt, &size);
2705             length = size * target_count;
2706 
2707             md_offset = (ptl_size_t) result_addr;
2708 
2709             request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2710 
2711             OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2712                                   "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
2713             ret = segmentedGet(&module->opcount,
2714                                module->req_md_h,
2715                                (ptl_size_t) md_offset + result_lb,
2716                                length,
2717                                module->fetch_atomic_max,
2718                                peer,
2719                                module->pt_idx,
2720                                module->match_bits,
2721                                target_offset + target_lb,
2722                                request);
2723             if (OMPI_SUCCESS != ret) {
2724                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2725                 return ret;
2726             }
2727         } else {
2728             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2729                                  "rget_accumulate: other-op  contiguous"));
2730             ptl_size_t result_md_offset, origin_md_offset;
2731 
2732             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2733             if (OMPI_SUCCESS != ret) {
2734                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2735                 return ret;
2736             }
2737             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2738             if (OMPI_SUCCESS != ret) {
2739                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2740                 return ret;
2741             }
2742             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2743             if (OMPI_SUCCESS != ret) {
2744                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2745                 return ret;
2746             }
2747             ompi_datatype_type_size(origin_dt, &size);
2748             length = size * origin_count;
2749 
2750             result_md_offset = (ptl_size_t) result_addr;
2751             origin_md_offset = (ptl_size_t) origin_addr;
2752 
2753             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2754             if (OMPI_SUCCESS != ret) {
2755                 opal_output(ompi_osc_base_framework.framework_output,
2756                         "datatype is not currently supported");
2757                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2758                 return OMPI_ERR_NOT_SUPPORTED;
2759             }
2760 
2761             ret = ompi_osc_portals4_get_op(op, &ptl_op);
2762             if (OMPI_SUCCESS != ret) {
2763                 opal_output(ompi_osc_base_framework.framework_output,
2764                         "operation is not currently supported");
2765                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2766                 return OMPI_ERR_NOT_SUPPORTED;
2767             }
2768 
2769             request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2770 
2771             ret = segmentedFetchAtomic(&module->opcount,
2772                                        module->req_md_h,
2773                                        result_md_offset + result_lb,
2774                                        module->md_h,
2775                                        origin_md_offset + origin_lb,
2776                                        length,
2777                                        module->fetch_atomic_max,
2778                                        peer,
2779                                        module->pt_idx,
2780                                        module->match_bits,
2781                                        target_offset + target_lb,
2782                                        request,
2783                                        ptl_op,
2784                                        ptl_dt);
2785             if (OMPI_SUCCESS != ret) {
2786                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2787                 return ret;
2788             }
2789         }
2790     }
2791 
2792     return OMPI_SUCCESS;
2793 }
2794 
2795 
2796 int
ompi_osc_portals4_put(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win)2797 ompi_osc_portals4_put(const void *origin_addr,
2798                       int origin_count,
2799                       struct ompi_datatype_t *origin_dt,
2800                       int target,
2801                       ptrdiff_t target_disp,
2802                       int target_count,
2803                       struct ompi_datatype_t *target_dt,
2804                       struct ompi_win_t *win)
2805 {
2806     int ret;
2807     ompi_osc_portals4_module_t *module =
2808         (ompi_osc_portals4_module_t*) win->w_osc_module;
2809     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2810     size_t offset, size;
2811     ptrdiff_t length, origin_lb, target_lb, extent;
2812 
2813     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2814                          "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2815                          (unsigned long) origin_addr, origin_count,
2816                          origin_dt->name, target, (unsigned long) target_disp,
2817                          target_count, target_dt->name,
2818                          (unsigned long) win));
2819 
2820     offset = get_displacement(module, target) * target_disp;
2821 
2822     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2823         ret = put_to_noncontig(&module->opcount,
2824                                module->md_h,
2825                                origin_addr,
2826                                origin_count,
2827                                origin_dt,
2828                                peer,
2829                                target_count,
2830                                target_dt,
2831                                offset,
2832                                module->pt_idx,
2833                                module->match_bits,
2834                                NULL);
2835         if (PTL_OK != ret) {
2836             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2837                          "%s,%d put_to_noncontig() failed: ret = %d",
2838                          __FUNCTION__, __LINE__, ret));
2839             return ret;
2840         }
2841     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2842         ret = put_from_iovec(module,
2843                              origin_addr,
2844                              origin_count,
2845                              origin_dt,
2846                              peer,
2847                              target_count,
2848                              target_dt,
2849                              offset,
2850                              module->pt_idx,
2851                              module->match_bits,
2852                              NULL);
2853         if (PTL_OK != ret) {
2854             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2855                          "%s,%d put_from_iovec() failed: ret = %d",
2856                          __FUNCTION__, __LINE__, ret));
2857             return ret;
2858         }
2859     } else {
2860         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2861         if (OMPI_SUCCESS != ret) {
2862             return ret;
2863         }
2864         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2865         if (OMPI_SUCCESS != ret) {
2866             return ret;
2867         }
2868         ompi_datatype_type_size(origin_dt, &size);
2869         length = size * origin_count;
2870 
2871         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2872                      "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2873                      __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2874         ret = segmentedPut(&module->opcount,
2875                            module->md_h,
2876                            (ptl_size_t) origin_addr + origin_lb,
2877                            length,
2878                            mca_osc_portals4_component.ptl_max_msg_size,
2879                            PTL_ACK_REQ,
2880                            peer,
2881                            module->pt_idx,
2882                            module->match_bits,
2883                            offset + target_lb,
2884                            NULL,
2885                            0);
2886         if (OMPI_SUCCESS != ret) {
2887             return ret;
2888         }
2889     }
2890 
2891     return OMPI_SUCCESS;
2892 }
2893 
2894 
2895 int
ompi_osc_portals4_get(void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win)2896 ompi_osc_portals4_get(void *origin_addr,
2897                       int origin_count,
2898                       struct ompi_datatype_t *origin_dt,
2899                       int target,
2900                       ptrdiff_t target_disp,
2901                       int target_count,
2902                       struct ompi_datatype_t *target_dt,
2903                       struct ompi_win_t *win)
2904 {
2905     int ret;
2906     ompi_osc_portals4_module_t *module =
2907         (ompi_osc_portals4_module_t*) win->w_osc_module;
2908     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2909     size_t offset, size;
2910     ptrdiff_t length, origin_lb, target_lb, extent;
2911 
2912     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2913                          "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2914                          (unsigned long) origin_addr, origin_count,
2915                          origin_dt->name, target, (unsigned long) target_disp,
2916                          target_count, target_dt->name,
2917                          (unsigned long) win));
2918 
2919     offset = get_displacement(module, target) * target_disp;
2920 
2921     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2922         ret = get_from_noncontig(&module->opcount,
2923                                  module->md_h,
2924                                  origin_addr,
2925                                  origin_count,
2926                                  origin_dt,
2927                                  peer,
2928                                  target_count,
2929                                  target_dt,
2930                                  offset,
2931                                  module->pt_idx,
2932                                  module->match_bits,
2933                                  NULL);
2934         if (PTL_OK != ret) {
2935             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2936                          "%s,%d get_from_noncontig() failed: ret = %d",
2937                          __FUNCTION__, __LINE__, ret));
2938             return ret;
2939         }
2940     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2941         ret = get_to_iovec(module,
2942                            origin_addr,
2943                            origin_count,
2944                            origin_dt,
2945                            peer,
2946                            target_count,
2947                            target_dt,
2948                            offset,
2949                            module->pt_idx,
2950                            module->match_bits,
2951                            NULL);
2952         if (PTL_OK != ret) {
2953             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2954                          "%s,%d get_to_iovec() failed: ret = %d",
2955                          __FUNCTION__, __LINE__, ret));
2956             return ret;
2957         }
2958     } else {
2959         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2960         if (OMPI_SUCCESS != ret) {
2961             return ret;
2962         }
2963         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2964         if (OMPI_SUCCESS != ret) {
2965             return ret;
2966         }
2967         ompi_datatype_type_size(origin_dt, &size);
2968         length = size * origin_count;
2969 
2970         OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2971                               "%s,%d Get", __FUNCTION__, __LINE__));
2972         ret = segmentedGet(&module->opcount,
2973                            module->md_h,
2974                            (ptl_size_t) origin_addr + origin_lb,
2975                            length,
2976                            mca_osc_portals4_component.ptl_max_msg_size,
2977                            peer,
2978                            module->pt_idx,
2979                            module->match_bits,
2980                            offset + target_lb,
2981                            NULL);
2982         if (OMPI_SUCCESS != ret) {
2983             return ret;
2984         }
2985     }
2986 
2987     return OMPI_SUCCESS;
2988 }
2989 
2990 
2991 int
ompi_osc_portals4_accumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win)2992 ompi_osc_portals4_accumulate(const void *origin_addr,
2993                              int origin_count,
2994                              struct ompi_datatype_t *origin_dt,
2995                              int target,
2996                              ptrdiff_t target_disp,
2997                              int target_count,
2998                              struct ompi_datatype_t *target_dt,
2999                              struct ompi_op_t *op,
3000                              struct ompi_win_t *win)
3001 {
3002     int ret;
3003     ompi_osc_portals4_module_t *module =
3004         (ompi_osc_portals4_module_t*) win->w_osc_module;
3005     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3006     size_t offset, size;
3007     ptl_op_t ptl_op;
3008     ptl_datatype_t ptl_dt;
3009     ptrdiff_t sent, length, origin_lb, target_lb, extent;
3010 
3011     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3012                          "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3013                          (unsigned long) origin_addr, origin_count,
3014                          origin_dt->name, target, (unsigned long) target_disp,
3015                          target_count, target_dt->name,
3016                          op->o_name,
3017                          (unsigned long) win));
3018 
3019     offset = get_displacement(module, target) * target_disp;
3020 
3021     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3022         if (MPI_REPLACE == op) {
3023             ret = atomic_put_to_noncontig(module,
3024                                           module->md_h,
3025                                           origin_addr,
3026                                           origin_count,
3027                                           origin_dt,
3028                                           peer,
3029                                           target_count,
3030                                           target_dt,
3031                                           offset,
3032                                           module->pt_idx,
3033                                           module->match_bits,
3034                                           NULL);
3035             if (PTL_OK != ret) {
3036                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3037                     "%s,%d atomic_put_to_noncontig() failed: ret = %d",
3038                     __FUNCTION__, __LINE__, ret));
3039                 return ret;
3040             }
3041         } else {
3042             ret = atomic_to_noncontig(module,
3043                                       module->md_h,
3044                                       origin_addr,
3045                                       origin_count,
3046                                       origin_dt,
3047                                       peer,
3048                                       target_count,
3049                                       target_dt,
3050                                       offset,
3051                                       module->pt_idx,
3052                                       module->match_bits,
3053                                       op,
3054                                       NULL);
3055             if (PTL_OK != ret) {
3056                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3057                     "%s,%d atomic_to_noncontig() failed: ret = %d",
3058                     __FUNCTION__, __LINE__, ret));
3059                 return ret;
3060             }
3061         }
3062     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
3063         if (MPI_REPLACE == op) {
3064             ret = atomic_put_from_iovec(module,
3065                                         origin_addr,
3066                                         origin_count,
3067                                         origin_dt,
3068                                         peer,
3069                                         target_count,
3070                                         target_dt,
3071                                         offset,
3072                                         module->pt_idx,
3073                                         module->match_bits,
3074                                         NULL);
3075             if (PTL_OK != ret) {
3076                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3077                     "%s,%d atomic_put_from_iovec() failed: ret = %d",
3078                     __FUNCTION__, __LINE__, ret));
3079                 return ret;
3080             }
3081         } else {
3082             ret = atomic_from_iovec(module,
3083                                     origin_addr,
3084                                     origin_count,
3085                                     origin_dt,
3086                                     peer,
3087                                     target_count,
3088                                     target_dt,
3089                                     offset,
3090                                     module->pt_idx,
3091                                     module->match_bits,
3092                                     op,
3093                                     NULL);
3094             if (PTL_OK != ret) {
3095                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3096                     "%s,%d atomic_from_iovec() failed: ret = %d",
3097                     __FUNCTION__, __LINE__, ret));
3098                 return ret;
3099             }
3100         }
3101     } else {
3102         ptl_size_t md_offset;
3103 
3104         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3105         if (OMPI_SUCCESS != ret) {
3106             return ret;
3107         }
3108         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3109         if (OMPI_SUCCESS != ret) {
3110             return ret;
3111         }
3112         ompi_datatype_type_size(origin_dt, &size);
3113         length = size * origin_count;
3114         sent = 0;
3115 
3116         md_offset = (ptl_size_t) origin_addr;
3117 
3118         if (MPI_REPLACE == op) {
3119             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3120                                  "%s,%d Put", __FUNCTION__, __LINE__));
3121             ret = segmentedPut(&module->opcount,
3122                                module->md_h,
3123                                md_offset + origin_lb,
3124                                length,
3125                                module->atomic_max,
3126                                PTL_ACK_REQ,
3127                                peer,
3128                                module->pt_idx,
3129                                module->match_bits,
3130                                offset + target_lb,
3131                                NULL,
3132                                0);
3133             if (OMPI_SUCCESS != ret) {
3134                 return ret;
3135             }
3136         } else {
3137             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3138             if (OMPI_SUCCESS != ret) {
3139                 opal_output(ompi_osc_base_framework.framework_output,
3140                         "datatype is not currently supported");
3141                 return OMPI_ERR_NOT_SUPPORTED;
3142             }
3143             ret = ompi_osc_portals4_get_op(op, &ptl_op);
3144             if (OMPI_SUCCESS != ret) {
3145                 opal_output(ompi_osc_base_framework.framework_output,
3146                         "operation is not currently supported");
3147                 return OMPI_ERR_NOT_SUPPORTED;
3148             }
3149             do {
3150                 size_t msg_length = MIN(module->atomic_max, length - sent);
3151 
3152                 (void)opal_atomic_add_64(&module->opcount, 1);
3153 
3154                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3155                              "%s,%d Atomic", __FUNCTION__, __LINE__));
3156                 ret = PtlAtomic(module->md_h,
3157                                 md_offset + sent + origin_lb,
3158                                 msg_length,
3159                                 PTL_ACK_REQ,
3160                                 peer,
3161                                 module->pt_idx,
3162                                 module->match_bits,
3163                                 offset + sent + target_lb,
3164                                 NULL,
3165                                 0,
3166                                 ptl_op,
3167                                 ptl_dt);
3168                 if (OMPI_SUCCESS != ret) {
3169                     (void)opal_atomic_add_64(&module->opcount, -1);
3170                     return ret;
3171                 }
3172                 sent += msg_length;
3173             } while (sent < length);
3174         }
3175     }
3176 
3177     return OMPI_SUCCESS;
3178 }
3179 
3180 
3181 int
ompi_osc_portals4_get_accumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,void * result_addr,int result_count,struct ompi_datatype_t * result_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win)3182 ompi_osc_portals4_get_accumulate(const void *origin_addr,
3183                                  int origin_count,
3184                                  struct ompi_datatype_t *origin_dt,
3185                                  void *result_addr,
3186                                  int result_count,
3187                                  struct ompi_datatype_t *result_dt,
3188                                  int target,
3189                                  ptrdiff_t target_disp,
3190                                  int target_count,
3191                                  struct ompi_datatype_t *target_dt,
3192                                  struct ompi_op_t *op,
3193                                  struct ompi_win_t *win)
3194 {
3195     int ret;
3196     ompi_osc_portals4_module_t *module =
3197         (ompi_osc_portals4_module_t*) win->w_osc_module;
3198     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3199     size_t target_offset, size;
3200     ptl_op_t ptl_op;
3201     ptl_datatype_t ptl_dt;
3202     ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
3203 
3204     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3205                          "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3206                          (unsigned long) origin_addr, origin_count,
3207                          origin_dt->name, (unsigned long) result_addr,
3208                          result_count, result_dt->name,
3209                          target, (unsigned long) target_disp,
3210                          target_count, target_dt->name,
3211                          op->o_name,
3212                          (unsigned long) win));
3213 
3214     target_offset = get_displacement(module, target) * target_disp;
3215 
3216     if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3217         if (MPI_REPLACE == op) {
3218             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3219                                  "get_accumulate: MPI_REPLACE  non-contiguous target"));
3220             ret = swap_from_noncontig(module,
3221                                       module->md_h,
3222                                       result_addr,
3223                                       result_count,
3224                                       result_dt,
3225                                       module->md_h,
3226                                       origin_addr,
3227                                       origin_count,
3228                                       origin_dt,
3229                                       peer,
3230                                       target_count,
3231                                       target_dt,
3232                                       target_offset,
3233                                       module->pt_idx,
3234                                       module->match_bits,
3235                                       NULL);
3236             if (PTL_OK != ret) {
3237                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3238                              "%s,%d swap_from_noncontig() failed: ret = %d",
3239                              __FUNCTION__, __LINE__, ret));
3240                 return ret;
3241             }
3242         } else if (MPI_NO_OP == op) {
3243             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3244                                  "get_accumulate: MPI_NO_OP  non-contiguous target"));
3245             ret = atomic_get_from_noncontig(module,
3246                                             module->md_h,
3247                                             result_addr,
3248                                             result_count,
3249                                             result_dt,
3250                                             peer,
3251                                             target_count,
3252                                             target_dt,
3253                                             target_offset,
3254                                             module->pt_idx,
3255                                             module->match_bits,
3256                                             NULL);
3257             if (PTL_OK != ret) {
3258                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3259                              "%s,%d atomic_get_from_noncontig() failed: ret = %d",
3260                              __FUNCTION__, __LINE__, ret));
3261                 return ret;
3262             }
3263         } else {
3264             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3265                                  "get_accumulate: other-op  non-contiguous target"));
3266             ret = fetch_atomic_from_noncontig(module,
3267                                               module->md_h,
3268                                               result_addr,
3269                                               result_count,
3270                                               result_dt,
3271                                               module->md_h,
3272                                               origin_addr,
3273                                               origin_count,
3274                                               origin_dt,
3275                                               peer,
3276                                               target_count,
3277                                               target_dt,
3278                                               target_offset,
3279                                               module->pt_idx,
3280                                               module->match_bits,
3281                                               op,
3282                                               NULL);
3283             if (PTL_OK != ret) {
3284                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3285                     "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
3286                     __FUNCTION__, __LINE__, ret));
3287                 return ret;
3288             }
3289         }
3290     } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
3291                (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
3292         if (MPI_REPLACE == op) {
3293             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3294                                  "get_accumulate: MPI_REPLACE  non-contiguous origin/result"));
3295             ret = swap_to_iovec(module,
3296                                 result_addr,
3297                                 result_count,
3298                                 result_dt,
3299                                 origin_addr,
3300                                 origin_count,
3301                                 origin_dt,
3302                                 peer,
3303                                 target_count,
3304                                 target_dt,
3305                                 target_offset,
3306                                 module->pt_idx,
3307                                 module->match_bits,
3308                                 NULL);
3309             if (PTL_OK != ret) {
3310                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3311                              "%s,%d swap_to_iovec() failed: ret = %d",
3312                              __FUNCTION__, __LINE__, ret));
3313                 return ret;
3314             }
3315         } else if (MPI_NO_OP == op) {
3316             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3317                                  "get_accumulate: MPI_NO_OP  non-contiguous origin/result"));
3318             ret = atomic_get_to_iovec(module,
3319                                       result_addr,
3320                                       result_count,
3321                                       result_dt,
3322                                       peer,
3323                                       target_count,
3324                                       target_dt,
3325                                       target_offset,
3326                                       module->pt_idx,
3327                                       module->match_bits,
3328                                       NULL);
3329             if (PTL_OK != ret) {
3330                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3331                              "%s,%d atomic_get_to_iovec() failed: ret = %d",
3332                              __FUNCTION__, __LINE__, ret));
3333                 return ret;
3334             }
3335         } else {
3336             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3337                                  "get_accumulate: other-op  non-contiguous origin/result"));
3338             ret = fetch_atomic_to_iovec(module,
3339                                         result_addr,
3340                                         result_count,
3341                                         result_dt,
3342                                         origin_addr,
3343                                         origin_count,
3344                                         origin_dt,
3345                                         peer,
3346                                         target_count,
3347                                         target_dt,
3348                                         target_offset,
3349                                         module->pt_idx,
3350                                         module->match_bits,
3351                                         op,
3352                                         NULL);
3353             if (PTL_OK != ret) {
3354                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3355                              "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
3356                              __FUNCTION__, __LINE__, ret));
3357                 return ret;
3358             }
3359         }
3360     } else {
3361         if (MPI_REPLACE == op) {
3362             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3363                                  "get_accumulate: MPI_REPLACE  contiguous"));
3364             ptl_size_t result_md_offset, origin_md_offset;
3365 
3366             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3367             if (OMPI_SUCCESS != ret) {
3368                 return ret;
3369             }
3370             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3371             if (OMPI_SUCCESS != ret) {
3372                 return ret;
3373             }
3374             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3375             if (OMPI_SUCCESS != ret) {
3376                 return ret;
3377             }
3378             ompi_datatype_type_size(origin_dt, &size);
3379             length = size * origin_count;
3380 
3381             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3382             if (OMPI_SUCCESS != ret) {
3383                 opal_output(ompi_osc_base_framework.framework_output,
3384                         "MPI_Get_accumulate: datatype is not currently supported");
3385                 return OMPI_ERR_NOT_SUPPORTED;
3386             }
3387 
3388             result_md_offset = (ptl_size_t) result_addr;
3389             origin_md_offset = (ptl_size_t) origin_addr;
3390 
3391             ret = segmentedSwap(&module->opcount,
3392                                 module->md_h,
3393                                 result_md_offset + result_lb,
3394                                 module->md_h,
3395                                 origin_md_offset + origin_lb,
3396                                 length,
3397                                 module->fetch_atomic_max,
3398                                 peer,
3399                                 module->pt_idx,
3400                                 module->match_bits,
3401                                 target_offset + target_lb,
3402                                 NULL,
3403                                 ptl_dt);
3404             if (OMPI_SUCCESS != ret) {
3405                 return ret;
3406             }
3407         } else if (MPI_NO_OP == op) {
3408             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3409                                  "get_accumulate: MPI_NO_OP  contiguous"));
3410             ptl_size_t md_offset;
3411 
3412             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3413             if (OMPI_SUCCESS != ret) {
3414                 return ret;
3415             }
3416             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3417             if (OMPI_SUCCESS != ret) {
3418                 return ret;
3419             }
3420             ompi_datatype_type_size(target_dt, &size);
3421             length = size * target_count;
3422 
3423             md_offset = (ptl_size_t) result_addr;
3424 
3425             OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3426                                   "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
3427             ret = segmentedGet(&module->opcount,
3428                                module->md_h,
3429                                (ptl_size_t) md_offset + result_lb,
3430                                length,
3431                                module->fetch_atomic_max,
3432                                peer,
3433                                module->pt_idx,
3434                                module->match_bits,
3435                                target_offset + target_lb,
3436                                NULL);
3437             if (OMPI_SUCCESS != ret) {
3438                 return ret;
3439             }
3440         } else {
3441             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3442                                  "get_accumulate: other-op  contiguous"));
3443             ptl_size_t result_md_offset, origin_md_offset;
3444 
3445             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3446             if (OMPI_SUCCESS != ret) {
3447                 return ret;
3448             }
3449             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3450             if (OMPI_SUCCESS != ret) {
3451                 return ret;
3452             }
3453             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3454             if (OMPI_SUCCESS != ret) {
3455                 return ret;
3456             }
3457             ompi_datatype_type_size(origin_dt, &size);
3458             length = size * origin_count;
3459 
3460             result_md_offset = (ptl_size_t) result_addr;
3461             origin_md_offset = (ptl_size_t) origin_addr;
3462 
3463             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3464             if (OMPI_SUCCESS != ret) {
3465                 opal_output(ompi_osc_base_framework.framework_output,
3466                         "MPI_Get_accumulate: datatype is not currently supported");
3467                 return OMPI_ERR_NOT_SUPPORTED;
3468             }
3469 
3470             ret = ompi_osc_portals4_get_op(op, &ptl_op);
3471             if (OMPI_SUCCESS != ret) {
3472                 opal_output(ompi_osc_base_framework.framework_output,
3473                         "MPI_Get_accumulate: operation is not currently supported");
3474                 return OMPI_ERR_NOT_SUPPORTED;
3475             }
3476 
3477             ret = segmentedFetchAtomic(&module->opcount,
3478                                        module->md_h,
3479                                        result_md_offset + result_lb,
3480                                        module->md_h,
3481                                        origin_md_offset + origin_lb,
3482                                        length,
3483                                        module->fetch_atomic_max,
3484                                        peer,
3485                                        module->pt_idx,
3486                                        module->match_bits,
3487                                        target_offset + target_lb,
3488                                        NULL,
3489                                        ptl_op,
3490                                        ptl_dt);
3491             if (OMPI_SUCCESS != ret) {
3492                 return ret;
3493             }
3494         }
3495     }
3496 
3497     return OMPI_SUCCESS;
3498 }
3499 
3500 
3501 int
ompi_osc_portals4_compare_and_swap(const void * origin_addr,const void * compare_addr,void * result_addr,struct ompi_datatype_t * dt,int target,ptrdiff_t target_disp,struct ompi_win_t * win)3502 ompi_osc_portals4_compare_and_swap(const void *origin_addr,
3503                                    const void *compare_addr,
3504                                    void *result_addr,
3505                                    struct ompi_datatype_t *dt,
3506                                    int target,
3507                                    ptrdiff_t target_disp,
3508                                    struct ompi_win_t *win)
3509 {
3510     int ret;
3511     ompi_osc_portals4_module_t *module =
3512         (ompi_osc_portals4_module_t*) win->w_osc_module;
3513     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3514     size_t length;
3515     size_t offset;
3516     ptl_datatype_t ptl_dt;
3517     ptl_size_t result_md_offset, origin_md_offset;
3518 
3519     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3520                          "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx",
3521                          (unsigned long) origin_addr,
3522                          (unsigned long) compare_addr,
3523                          (unsigned long) result_addr,
3524                          dt->name, target, (unsigned long) target_disp,
3525                          (unsigned long) win));
3526 
3527     ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3528     if (OMPI_SUCCESS != ret) {
3529         opal_output(ompi_osc_base_framework.framework_output,
3530                 "MPI_Compare_and_swap: datatype is not currently supported");
3531         return OMPI_ERR_NOT_SUPPORTED;
3532     }
3533 
3534     offset = get_displacement(module, target) * target_disp;
3535 
3536     ret = ompi_datatype_type_size(dt, &length);
3537     if (OMPI_SUCCESS != ret) return ret;
3538 
3539     assert(length <= module->fetch_atomic_max);
3540 
3541     result_md_offset = (ptl_size_t) result_addr;
3542     origin_md_offset = (ptl_size_t) origin_addr;
3543 
3544     (void)opal_atomic_add_64(&module->opcount, 1);
3545 
3546     OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3547                          "%s,%d Swap", __FUNCTION__, __LINE__));
3548     ret = PtlSwap(module->md_h,
3549                   result_md_offset,
3550                   module->md_h,
3551                   origin_md_offset,
3552                   length,
3553                   peer,
3554                   module->pt_idx,
3555                   module->match_bits,
3556                   offset,
3557                   NULL,
3558                   0,
3559                   compare_addr,
3560                   PTL_CSWAP,
3561                   ptl_dt);
3562     if (OMPI_SUCCESS != ret) {
3563         return ret;
3564     }
3565 
3566     return OMPI_SUCCESS;
3567 }
3568 
3569 
3570 int
ompi_osc_portals4_fetch_and_op(const void * origin_addr,void * result_addr,struct ompi_datatype_t * dt,int target,ptrdiff_t target_disp,struct ompi_op_t * op,struct ompi_win_t * win)3571 ompi_osc_portals4_fetch_and_op(const void *origin_addr,
3572                                void *result_addr,
3573                                struct ompi_datatype_t *dt,
3574                                int target,
3575                                ptrdiff_t target_disp,
3576                                struct ompi_op_t *op,
3577                                struct ompi_win_t *win)
3578 {
3579     int ret;
3580     ompi_osc_portals4_module_t *module =
3581         (ompi_osc_portals4_module_t*) win->w_osc_module;
3582     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3583     size_t length;
3584     size_t offset;
3585     ptl_op_t ptl_op;
3586     ptl_datatype_t ptl_dt;
3587 
3588     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3589                          "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx",
3590                          (unsigned long) origin_addr,
3591                          (unsigned long) result_addr,
3592                          dt->name, target, (unsigned long) target_disp,
3593                          op->o_name,
3594                          (unsigned long) win));
3595 
3596     ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3597     if (OMPI_SUCCESS != ret) {
3598         opal_output(ompi_osc_base_framework.framework_output,
3599                 "MPI_Fetch_and_op: datatype is not currently supported");
3600         return OMPI_ERR_NOT_SUPPORTED;
3601     }
3602 
3603     offset = get_displacement(module, target) * target_disp;
3604 
3605     ret = ompi_datatype_type_size(dt, &length);
3606     if (OMPI_SUCCESS != ret) return ret;
3607 
3608     assert(length <= module->fetch_atomic_max);
3609 
3610     if (MPI_REPLACE == op) {
3611         ptl_size_t result_md_offset, origin_md_offset;
3612 
3613         result_md_offset = (ptl_size_t) result_addr;
3614         origin_md_offset = (ptl_size_t) origin_addr;
3615 
3616         (void)opal_atomic_add_64(&module->opcount, 1);
3617         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3618                              "%s,%d Swap", __FUNCTION__, __LINE__));
3619         ret = PtlSwap(module->md_h,
3620                       result_md_offset,
3621                       module->md_h,
3622                       origin_md_offset,
3623                       length,
3624                       peer,
3625                       module->pt_idx,
3626                       module->match_bits,
3627                       offset,
3628                       NULL,
3629                       0,
3630                       NULL,
3631                       PTL_SWAP,
3632                       ptl_dt);
3633     } else if (MPI_NO_OP == op) {
3634         ptl_size_t md_offset;
3635 
3636         md_offset = (ptl_size_t) result_addr;
3637 
3638         (void)opal_atomic_add_64(&module->opcount, 1);
3639         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3640                              "%s,%d Get", __FUNCTION__, __LINE__));
3641         ret = PtlGet(module->md_h,
3642                      md_offset,
3643                      length,
3644                      peer,
3645                      module->pt_idx,
3646                      module->match_bits,
3647                      offset,
3648                      NULL);
3649     } else {
3650         ptl_size_t result_md_offset, origin_md_offset;
3651         (void)opal_atomic_add_64(&module->opcount, 1);
3652 
3653         ret = ompi_osc_portals4_get_op(op, &ptl_op);
3654         if (OMPI_SUCCESS != ret) {
3655             opal_output(ompi_osc_base_framework.framework_output,
3656                     "MPI_Fetch_and_op: operation is not currently supported");
3657             return OMPI_ERR_NOT_SUPPORTED;
3658         }
3659 
3660         result_md_offset = (ptl_size_t) result_addr;
3661         origin_md_offset = (ptl_size_t) origin_addr;
3662 
3663         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3664                              "%s,%d FetchAtomic", __FUNCTION__, __LINE__));
3665         ret = PtlFetchAtomic(module->md_h,
3666                              result_md_offset,
3667                              module->md_h,
3668                              origin_md_offset,
3669                              length,
3670                              peer,
3671                              module->pt_idx,
3672                              module->match_bits,
3673                              offset,
3674                              NULL,
3675                              0,
3676                              ptl_op,
3677                              ptl_dt);
3678     }
3679     if (OMPI_SUCCESS != ret) {
3680         return ret;
3681     }
3682 
3683     return OMPI_SUCCESS;
3684 }
3685