1 /*
2 * Copyright (c) 2011-2017 Sandia National Laboratories. All rights reserved.
3 * Copyright (c) 2014 The University of Tennessee and The University
4 * of Tennessee Research Foundation. All rights
5 * reserved.
6 * Copyright (c) 2015-2017 Research Organization for Information Science
7 * and Technology (RIST). All rights reserved.
8 * $COPYRIGHT$
9 *
10 * Additional copyrights may follow
11 *
12 * $HEADER$
13 */
14
15 #include "ompi_config.h"
16
17 #include "ompi/mca/osc/osc.h"
18 #include "ompi/mca/osc/base/base.h"
19 #include "ompi/mca/osc/base/osc_base_obj_convert.h"
20
21 #include "osc_portals4.h"
22 #include "osc_portals4_request.h"
23
24
25 static int
ompi_osc_portals4_get_op(struct ompi_op_t * op,ptl_op_t * ptl_op)26 ompi_osc_portals4_get_op(struct ompi_op_t *op, ptl_op_t *ptl_op)
27 {
28 if (MPI_MAX == op) {
29 *ptl_op = PTL_MAX;
30 } else if (MPI_MIN == op) {
31 *ptl_op = PTL_MIN;
32 } else if (MPI_SUM == op) {
33 *ptl_op = PTL_SUM;
34 } else if (MPI_PROD == op) {
35 *ptl_op = PTL_PROD;
36 } else if (MPI_LAND == op) {
37 *ptl_op = PTL_LAND;
38 } else if (MPI_BAND == op) {
39 *ptl_op = PTL_BAND;
40 } else if (MPI_LOR == op) {
41 *ptl_op = PTL_LOR;
42 } else if (MPI_BOR == op) {
43 *ptl_op = PTL_BOR;
44 } else if (MPI_LXOR == op) {
45 *ptl_op = PTL_LXOR;
46 } else if (MPI_BXOR == op) {
47 *ptl_op = PTL_BXOR;
48 } else {
49 return OMPI_ERROR;
50 }
51
52 return OMPI_SUCCESS;
53 }
54
55
56 static int
get_sized_type(bool sign,size_t size,ptl_datatype_t * ptl_dt)57 get_sized_type(bool sign, size_t size, ptl_datatype_t *ptl_dt)
58 {
59 if (sign) {
60 switch (size) {
61 case 1:
62 *ptl_dt = PTL_INT8_T;
63 break;
64 case 2:
65 *ptl_dt = PTL_INT16_T;
66 break;
67 case 4:
68 *ptl_dt = PTL_INT32_T;
69 break;
70 case 8:
71 *ptl_dt = PTL_INT64_T;
72 break;
73 default:
74 return OMPI_ERROR;
75 }
76 } else {
77 switch (size) {
78 case 1:
79 *ptl_dt = PTL_UINT8_T;
80 break;
81 case 2:
82 *ptl_dt = PTL_UINT16_T;
83 break;
84 case 4:
85 *ptl_dt = PTL_UINT32_T;
86 break;
87 case 8:
88 *ptl_dt = PTL_UINT64_T;
89 break;
90 default:
91 return OMPI_ERROR;
92 }
93 }
94
95 return OMPI_SUCCESS;
96 }
97
98
99 static int
ompi_osc_portals4_get_dt(struct ompi_datatype_t * dt,ptl_datatype_t * ptl_dt)100 ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
101 {
102 ompi_datatype_t *base_dt = ompi_datatype_get_single_predefined_type_from_args(dt);
103
104 if (MPI_BYTE == base_dt) {
105 *ptl_dt = PTL_INT8_T;
106 } else if (MPI_CHAR == base_dt) {
107 *ptl_dt = PTL_INT8_T;
108 } else if (MPI_SHORT == base_dt) {
109 return get_sized_type(true, sizeof(short), ptl_dt);
110 } else if (MPI_INT == base_dt) {
111 return get_sized_type(true, sizeof(int), ptl_dt);
112 } else if (MPI_LONG == base_dt) {
113 return get_sized_type(true, sizeof(long), ptl_dt);
114 } else if (MPI_FLOAT == base_dt) {
115 *ptl_dt = PTL_FLOAT;
116 } else if (MPI_DOUBLE == base_dt) {
117 *ptl_dt = PTL_DOUBLE;
118 } else if (MPI_LONG_DOUBLE == base_dt) {
119 *ptl_dt = PTL_LONG_DOUBLE;
120 } else if (MPI_UNSIGNED_CHAR == base_dt) {
121 *ptl_dt = PTL_UINT8_T;
122 } else if (MPI_SIGNED_CHAR == base_dt) {
123 *ptl_dt = PTL_UINT8_T;
124 } else if (MPI_UNSIGNED_SHORT == base_dt) {
125 return get_sized_type(false, sizeof(short), ptl_dt);
126 } else if (MPI_UNSIGNED_LONG == base_dt) {
127 return get_sized_type(false, sizeof(long), ptl_dt);
128 } else if (MPI_UNSIGNED == base_dt) {
129 return get_sized_type(false, sizeof(int), ptl_dt);
130 #if OPAL_HAVE_LONG_LONG
131 } else if (MPI_LONG_LONG_INT == base_dt) {
132 return get_sized_type(true, sizeof(long long int), ptl_dt);
133 } else if (MPI_LONG_LONG == base_dt) {
134 return get_sized_type(true, sizeof(long long), ptl_dt);
135 #endif
136 } else if (MPI_INT8_T == base_dt) {
137 *ptl_dt = PTL_INT8_T;
138 } else if (MPI_UINT8_T == base_dt) {
139 *ptl_dt = PTL_UINT8_T;
140 } else if (MPI_INT16_T == base_dt) {
141 *ptl_dt = PTL_INT16_T;
142 } else if (MPI_UINT16_T == base_dt) {
143 *ptl_dt = PTL_UINT16_T;
144 } else if (MPI_INT32_T == base_dt) {
145 *ptl_dt = PTL_INT32_T;
146 } else if (MPI_UINT32_T == base_dt) {
147 *ptl_dt = PTL_UINT32_T;
148 } else if (MPI_INT64_T == base_dt) {
149 *ptl_dt = PTL_INT64_T;
150 } else if (MPI_UINT64_T == base_dt) {
151 *ptl_dt = PTL_UINT64_T;
152 #if HAVE_FLOAT__COMPLEX
153 } else if (MPI_C_COMPLEX == base_dt) {
154 *ptl_dt = PTL_DOUBLE_COMPLEX;
155 } else if (MPI_C_FLOAT_COMPLEX == base_dt) {
156 *ptl_dt = PTL_FLOAT_COMPLEX;
157 #endif
158 #if HAVE_DOUBLE__COMPLEX
159 } else if (MPI_C_DOUBLE_COMPLEX == base_dt) {
160 *ptl_dt = PTL_DOUBLE_COMPLEX;
161 #endif
162 #if HAVE_LONG_DOUBLE__COMPLEX
163 } else if (MPI_C_LONG_DOUBLE_COMPLEX == base_dt) {
164 *ptl_dt = PTL_LONG_DOUBLE_COMPLEX;
165 #endif
166 } else if (MPI_AINT == base_dt) {
167 if (sizeof(MPI_Aint) == 2) {
168 *ptl_dt = PTL_UINT16_T;
169 } else if (sizeof(MPI_Aint) == 4) {
170 *ptl_dt = PTL_UINT32_T;
171 } else if (sizeof(MPI_Aint) == 8) {
172 *ptl_dt = PTL_UINT64_T;
173 }
174 } else {
175 return OMPI_ERROR;
176 }
177
178 return 0;
179 }
180
181 static ptl_size_t
number_of_fragments(ptl_size_t length,ptl_size_t maxlength)182 number_of_fragments(ptl_size_t length, ptl_size_t maxlength)
183 {
184 ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1;
185 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
186 "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag));
187 return nb_frag;
188 }
189
190 /* put in segments no larger than segment_length */
191 static int
segmentedPut(int64_t * opcount,ptl_handle_md_t md_h,ptl_size_t origin_offset,ptl_size_t put_length,ptl_size_t segment_length,ptl_ack_req_t ack_req,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_hdr_data_t hdr_data)192 segmentedPut(int64_t *opcount,
193 ptl_handle_md_t md_h,
194 ptl_size_t origin_offset,
195 ptl_size_t put_length,
196 ptl_size_t segment_length,
197 ptl_ack_req_t ack_req,
198 ptl_process_t target_id,
199 ptl_pt_index_t pt_index,
200 ptl_match_bits_t match_bits,
201 ptl_size_t target_offset,
202 void *user_ptr,
203 ptl_hdr_data_t hdr_data)
204 {
205 int ret;
206 ptl_size_t bytes_put = 0;
207
208 do {
209 opal_atomic_add_64(opcount, 1);
210
211 ptl_size_t frag_length = MIN(put_length, segment_length);
212 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
213 "Put size : %lu/%lu, offset:%lu", frag_length, put_length, bytes_put));
214 ret = PtlPut(md_h,
215 origin_offset + bytes_put,
216 frag_length,
217 ack_req,
218 target_id,
219 pt_index,
220 match_bits,
221 target_offset + bytes_put,
222 user_ptr,
223 hdr_data);
224 if (PTL_OK != ret) {
225 opal_atomic_add_64(opcount, -1);
226 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
227 "%s:%d PtlPut failed with return value %d",
228 __FUNCTION__, __LINE__, ret);
229 return ret;
230 }
231 put_length -= frag_length;
232 bytes_put += frag_length;
233 } while (put_length);
234 return PTL_OK;
235 }
236
237 /* get in segments no larger than segment_length */
238 static int
segmentedGet(int64_t * opcount,ptl_handle_md_t md_h,ptl_size_t origin_offset,ptl_size_t get_length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr)239 segmentedGet(int64_t *opcount,
240 ptl_handle_md_t md_h,
241 ptl_size_t origin_offset,
242 ptl_size_t get_length,
243 ptl_size_t segment_length,
244 ptl_process_t target_id,
245 ptl_pt_index_t pt_index,
246 ptl_match_bits_t match_bits,
247 ptl_size_t target_offset,
248 void *user_ptr)
249 {
250 int ret;
251 ptl_size_t bytes_gotten = 0;
252
253 do {
254 opal_atomic_add_64(opcount, 1);
255
256 ptl_size_t frag_length = MIN(get_length, segment_length);
257 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
258 "Get size : %lu/%lu, offset:%lu", frag_length, get_length, bytes_gotten));
259
260 ret = PtlGet(md_h,
261 (ptl_size_t) origin_offset + bytes_gotten,
262 frag_length,
263 target_id,
264 pt_index,
265 match_bits,
266 target_offset + bytes_gotten,
267 user_ptr);
268 if (PTL_OK != ret) {
269 opal_atomic_add_64(opcount, -1);
270 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
271 "%s:%d PtlGet failed with return value %d",
272 __FUNCTION__, __LINE__, ret);
273 return ret;
274 }
275 get_length -= frag_length;
276 bytes_gotten += frag_length;
277 } while (get_length);
278 return PTL_OK;
279 }
280
281 /* atomic op in segments no larger than segment_length */
282 static int
segmentedAtomic(int64_t * opcount,ptl_handle_md_t md_h,ptl_size_t origin_offset,ptl_size_t length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_op_t ptl_op,ptl_datatype_t ptl_dt)283 segmentedAtomic(int64_t *opcount,
284 ptl_handle_md_t md_h,
285 ptl_size_t origin_offset,
286 ptl_size_t length,
287 ptl_size_t segment_length,
288 ptl_process_t target_id,
289 ptl_pt_index_t pt_index,
290 ptl_match_bits_t match_bits,
291 ptl_size_t target_offset,
292 void *user_ptr,
293 ptl_op_t ptl_op,
294 ptl_datatype_t ptl_dt)
295 {
296 int ret;
297 ptl_size_t sent = 0;
298
299 do {
300 opal_atomic_add_64(opcount, 1);
301
302 ptl_size_t frag_length = MIN(length, segment_length);
303 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
304 "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
305 ret = PtlAtomic(md_h,
306 (ptl_size_t) origin_offset + sent,
307 frag_length,
308 PTL_ACK_REQ,
309 target_id,
310 pt_index,
311 match_bits,
312 target_offset + sent,
313 user_ptr,
314 0,
315 ptl_op,
316 ptl_dt);
317 if (PTL_OK != ret) {
318 opal_atomic_add_64(opcount, -1);
319 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
320 "%s:%d PtlAtomic failed with return value %d",
321 __FUNCTION__, __LINE__, ret);
322 return ret;
323 }
324 length -= frag_length;
325 sent += frag_length;
326 } while (length);
327 return PTL_OK;
328 }
329
330 /* atomic op in segments no larger than segment_length */
331 static int
segmentedFetchAtomic(int64_t * opcount,ptl_handle_md_t result_md_h,ptl_size_t result_offset,ptl_handle_md_t origin_md_h,ptl_size_t origin_offset,ptl_size_t length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_op_t ptl_op,ptl_datatype_t ptl_dt)332 segmentedFetchAtomic(int64_t *opcount,
333 ptl_handle_md_t result_md_h,
334 ptl_size_t result_offset,
335 ptl_handle_md_t origin_md_h,
336 ptl_size_t origin_offset,
337 ptl_size_t length,
338 ptl_size_t segment_length,
339 ptl_process_t target_id,
340 ptl_pt_index_t pt_index,
341 ptl_match_bits_t match_bits,
342 ptl_size_t target_offset,
343 void *user_ptr,
344 ptl_op_t ptl_op,
345 ptl_datatype_t ptl_dt)
346 {
347 int ret;
348 ptl_size_t sent = 0;
349
350 do {
351 opal_atomic_add_64(opcount, 1);
352
353 ptl_size_t frag_length = MIN(length, segment_length);
354 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
355 "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
356 ret = PtlFetchAtomic(result_md_h,
357 result_offset + sent,
358 origin_md_h,
359 origin_offset + sent,
360 frag_length,
361 target_id,
362 pt_index,
363 match_bits,
364 target_offset + sent,
365 user_ptr,
366 0,
367 ptl_op,
368 ptl_dt);
369 if (PTL_OK != ret) {
370 opal_atomic_add_64(opcount, -1);
371 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
372 "%s:%d PtlFetchAtomic failed with return value %d",
373 __FUNCTION__, __LINE__, ret);
374 return ret;
375 }
376 length -= frag_length;
377 sent += frag_length;
378 } while (length);
379 return PTL_OK;
380 }
381
382 /* swap in segments no larger than segment_length */
383 static int
segmentedSwap(int64_t * opcount,ptl_handle_md_t result_md_h,ptl_size_t result_offset,ptl_handle_md_t origin_md_h,ptl_size_t origin_offset,ptl_size_t length,ptl_size_t segment_length,ptl_process_t target_id,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,ptl_size_t target_offset,void * user_ptr,ptl_datatype_t ptl_dt)384 segmentedSwap(int64_t *opcount,
385 ptl_handle_md_t result_md_h,
386 ptl_size_t result_offset,
387 ptl_handle_md_t origin_md_h,
388 ptl_size_t origin_offset,
389 ptl_size_t length,
390 ptl_size_t segment_length,
391 ptl_process_t target_id,
392 ptl_pt_index_t pt_index,
393 ptl_match_bits_t match_bits,
394 ptl_size_t target_offset,
395 void *user_ptr,
396 ptl_datatype_t ptl_dt)
397 {
398 int ret;
399 ptl_size_t sent = 0;
400
401 do {
402 opal_atomic_add_64(opcount, 1);
403
404 ptl_size_t frag_length = MIN(length, segment_length);
405 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
406 "Swap size : %lu/%lu, offset:%lu", frag_length, length, sent));
407 ret = PtlSwap(result_md_h,
408 result_offset + sent,
409 origin_md_h,
410 (ptl_size_t) origin_offset + sent,
411 frag_length,
412 target_id,
413 pt_index,
414 match_bits,
415 target_offset + sent,
416 user_ptr,
417 0,
418 NULL,
419 PTL_SWAP,
420 ptl_dt);
421 if (PTL_OK != ret) {
422 opal_atomic_add_64(opcount, -1);
423 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
424 "%s:%d PtlSwap failed with return value %d",
425 __FUNCTION__, __LINE__, ret);
426 return ret;
427 }
428 length -= frag_length;
429 sent += frag_length;
430 } while (length);
431 return PTL_OK;
432 }
433
434 static int
create_iov_list(const void * address,int count,ompi_datatype_t * datatype,ptl_iovec_t ** ptl_iovec,ptl_size_t * ptl_iovec_count)435 create_iov_list(const void *address,
436 int count,
437 ompi_datatype_t *datatype,
438 ptl_iovec_t **ptl_iovec,
439 ptl_size_t *ptl_iovec_count)
440 {
441 struct iovec iov[OSC_PORTALS4_IOVEC_MAX];
442 opal_convertor_t convertor;
443 uint32_t iov_count;
444 uint32_t iov_index, ptl_iovec_index;
445 /* needed for opal_convertor_raw but not used */
446 size_t size;
447 int ret;
448 bool done;
449
450 OBJ_CONSTRUCT(&convertor, opal_convertor_t);
451 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &datatype->super, count,
452 address, 0, &convertor);
453 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
454 return ret;
455 }
456
457
458 *ptl_iovec_count = 0;
459 ptl_iovec_index = 0;
460 do {
461 /* decode segments of the data */
462 iov_count = OSC_PORTALS4_IOVEC_MAX;
463 iov_index = 0;
464
465 /* opal_convertor_raw returns done when it has reached the end of the data */
466 done = opal_convertor_raw (&convertor, iov, &iov_count, &size);
467
468 *ptl_iovec_count += iov_count;
469 *ptl_iovec = (ptl_iovec_t *)realloc(*ptl_iovec, *ptl_iovec_count * sizeof(ptl_iovec_t));
470
471 while (iov_index != iov_count) {
472 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
473 "adding iov[%d].[%p,%lu] to ptl_iovec", iov_index, iov[iov_index].iov_base, iov[iov_index].iov_len));
474 (*ptl_iovec)[ptl_iovec_index].iov_base = iov[iov_index].iov_base;
475 (*ptl_iovec)[ptl_iovec_index].iov_len = iov[iov_index].iov_len;
476
477 ptl_iovec_index++;
478 iov_index++;
479 }
480
481 assert(*ptl_iovec_count == ptl_iovec_index);
482 } while (!done);
483
484 return OMPI_SUCCESS;
485
486 }
487
488 /* get from a contiguous remote to an iovec local */
489 static int
get_to_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)490 get_to_iovec(ompi_osc_portals4_module_t *module,
491 const void *origin_address,
492 int origin_count,
493 ompi_datatype_t *origin_datatype,
494 ptl_process_t peer,
495 int target_count,
496 ompi_datatype_t *target_datatype,
497 size_t offset,
498 ptl_pt_index_t pt_index,
499 ptl_match_bits_t match_bits,
500 void *user_ptr)
501 {
502 int ret;
503 size_t size;
504 ptrdiff_t length, origin_lb, target_lb, extent;
505 ptl_md_t md;
506
507 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
508 PtlMDRelease(module->origin_iovec_md_h);
509 free(module->origin_iovec_list);
510 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
511 module->origin_iovec_list = NULL;
512 }
513
514 ptl_size_t iovec_count=0;
515 create_iov_list(
516 origin_address,
517 origin_count,
518 origin_datatype,
519 &module->origin_iovec_list,
520 &iovec_count);
521
522 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
523 if (OMPI_SUCCESS != ret) {
524 return ret;
525 }
526 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
527 if (OMPI_SUCCESS != ret) {
528 return ret;
529 }
530 ompi_datatype_type_size(origin_datatype, &size);
531 length = size * origin_count;
532
533 md.start = module->origin_iovec_list;
534 md.length = iovec_count;
535 if (user_ptr) {
536 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
537 } else {
538 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
539 }
540 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
541 md.ct_handle = module->ct_h;
542 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
543 if (PTL_OK != ret) {
544 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
545 "%s:%d: PtlMDBind(iovec) failed: %d\n",
546 __FILE__, __LINE__, ret);
547 return ret;
548 }
549
550 opal_atomic_add_64(&module->opcount, 1);
551
552 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
553 "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
554 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
555 ret = PtlGet(module->origin_iovec_md_h,
556 (ptl_size_t) origin_lb,
557 length,
558 peer,
559 module->pt_idx,
560 module->match_bits,
561 offset + target_lb,
562 user_ptr);
563 if (PTL_OK != ret) {
564 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
565 "%s,%d PtlGet() failed: ret = %d",
566 __FUNCTION__, __LINE__, ret));
567 opal_atomic_add_64(&module->opcount, -1);
568 return ret;
569 }
570
571 return OMPI_SUCCESS;
572 }
573
574 /* get to an iovec MD from a contiguous target using fragments no larger
575 * than max_fetch_atomic_size to guarantee atomic writes at the origin */
576 static int
atomic_get_to_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)577 atomic_get_to_iovec(ompi_osc_portals4_module_t *module,
578 const void *origin_address,
579 int origin_count,
580 ompi_datatype_t *origin_datatype,
581 ptl_process_t peer,
582 int target_count,
583 ompi_datatype_t *target_datatype,
584 size_t offset,
585 ptl_pt_index_t pt_index,
586 ptl_match_bits_t match_bits,
587 void *user_ptr)
588 {
589 int ret;
590 size_t size;
591 ptrdiff_t length, origin_lb, target_lb, extent;
592 ptl_md_t md;
593
594 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
595 PtlMDRelease(module->origin_iovec_md_h);
596 free(module->origin_iovec_list);
597 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
598 module->origin_iovec_list = NULL;
599 }
600
601 ptl_size_t iovec_count=0;
602 create_iov_list(
603 origin_address,
604 origin_count,
605 origin_datatype,
606 &module->origin_iovec_list,
607 &iovec_count);
608
609 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
610 if (OMPI_SUCCESS != ret) {
611 return ret;
612 }
613 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
614 if (OMPI_SUCCESS != ret) {
615 return ret;
616 }
617 ompi_datatype_type_size(origin_datatype, &size);
618 length = size * origin_count;
619
620 md.start = module->origin_iovec_list;
621 md.length = iovec_count;
622 if (user_ptr) {
623 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
624 } else {
625 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
626 }
627 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
628 md.ct_handle = module->ct_h;
629 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
630 if (PTL_OK != ret) {
631 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
632 "%s:%d: PtlMDBind(iovec) failed: %d\n",
633 __FILE__, __LINE__, ret);
634 return ret;
635 }
636
637 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
638 "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
639 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
640 ret = segmentedGet(&module->opcount,
641 module->origin_iovec_md_h,
642 (ptl_size_t) origin_lb,
643 length,
644 module->fetch_atomic_max,
645 peer,
646 module->pt_idx,
647 module->match_bits,
648 offset + target_lb,
649 user_ptr);
650 if (PTL_OK != ret) {
651 return ret;
652 }
653
654 return OMPI_SUCCESS;
655 }
656
657 /* put from an iovec MD into a contiguous target */
658 static int
put_from_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)659 put_from_iovec(ompi_osc_portals4_module_t *module,
660 const void *origin_address,
661 int origin_count,
662 ompi_datatype_t *origin_datatype,
663 ptl_process_t peer,
664 int target_count,
665 ompi_datatype_t *target_datatype,
666 size_t offset,
667 ptl_pt_index_t pt_index,
668 ptl_match_bits_t match_bits,
669 void *user_ptr)
670 {
671 int ret;
672 size_t size;
673 ptrdiff_t length, origin_lb, target_lb, extent;
674 ptl_md_t md;
675
676 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
677 PtlMDRelease(module->origin_iovec_md_h);
678 free(module->origin_iovec_list);
679 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
680 module->origin_iovec_list = NULL;
681 }
682
683 ptl_size_t iovec_count=0;
684 create_iov_list(
685 origin_address,
686 origin_count,
687 origin_datatype,
688 &module->origin_iovec_list,
689 &iovec_count);
690
691 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
692 if (OMPI_SUCCESS != ret) {
693 return ret;
694 }
695 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
696 if (OMPI_SUCCESS != ret) {
697 return ret;
698 }
699 ompi_datatype_type_size(origin_datatype, &size);
700 length = size * origin_count;
701
702 md.start = module->origin_iovec_list;
703 md.length = iovec_count;
704 if (user_ptr) {
705 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
706 } else {
707 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
708 }
709 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
710 md.ct_handle = module->ct_h;
711 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
712 if (PTL_OK != ret) {
713 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
714 "%s:%d: PtlMDBind(iovec) failed: %d\n",
715 __FILE__, __LINE__, ret);
716 return ret;
717 }
718
719 opal_atomic_add_64(&module->opcount, 1);
720
721 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
722 "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
723 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
724 ret = PtlPut(module->origin_iovec_md_h,
725 (ptl_size_t) origin_lb,
726 length,
727 PTL_ACK_REQ,
728 peer,
729 module->pt_idx,
730 module->match_bits,
731 offset + target_lb,
732 user_ptr,
733 0);
734 if (PTL_OK != ret) {
735 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
736 "%s,%d PtlPut() failed: ret = %d",
737 __FUNCTION__, __LINE__, ret));
738 opal_atomic_add_64(&module->opcount, -1);
739 return ret;
740 }
741
742 return OMPI_SUCCESS;
743 }
744
745 /* put from an iovec MD into a contiguous target using fragments no larger
746 * than max_atomic_size to guarantee atomic writes at the target */
747 static int
atomic_put_from_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)748 atomic_put_from_iovec(ompi_osc_portals4_module_t *module,
749 const void *origin_address,
750 int origin_count,
751 ompi_datatype_t *origin_datatype,
752 ptl_process_t peer,
753 int target_count,
754 ompi_datatype_t *target_datatype,
755 size_t offset,
756 ptl_pt_index_t pt_index,
757 ptl_match_bits_t match_bits,
758 void *user_ptr)
759 {
760 int ret;
761 size_t size;
762 ptrdiff_t length, origin_lb, target_lb, extent;
763 ptl_md_t md;
764
765 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
766 PtlMDRelease(module->origin_iovec_md_h);
767 free(module->origin_iovec_list);
768 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
769 module->origin_iovec_list = NULL;
770 }
771
772 ptl_size_t iovec_count=0;
773 create_iov_list(
774 origin_address,
775 origin_count,
776 origin_datatype,
777 &module->origin_iovec_list,
778 &iovec_count);
779
780 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
781 if (OMPI_SUCCESS != ret) {
782 return ret;
783 }
784 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
785 if (OMPI_SUCCESS != ret) {
786 return ret;
787 }
788 ompi_datatype_type_size(origin_datatype, &size);
789 length = size * origin_count;
790
791 md.start = module->origin_iovec_list;
792 md.length = iovec_count;
793 if (user_ptr) {
794 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
795 } else {
796 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
797 }
798 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
799 md.ct_handle = module->ct_h;
800 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
801 if (PTL_OK != ret) {
802 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
803 "%s:%d: PtlMDBind(iovec) failed: %d\n",
804 __FILE__, __LINE__, ret);
805 return ret;
806 }
807
808 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
809 "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
810 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
811 ret = segmentedPut(&module->opcount,
812 module->origin_iovec_md_h,
813 (ptl_size_t) origin_lb,
814 length,
815 module->atomic_max,
816 PTL_ACK_REQ,
817 peer,
818 module->pt_idx,
819 module->match_bits,
820 offset + target_lb,
821 NULL,
822 0);
823 if (OMPI_SUCCESS != ret) {
824 return ret;
825 }
826
827 return OMPI_SUCCESS;
828 }
829
830 /* perform atomic operation on iovec local and contiguous remote */
831 static int
atomic_from_iovec(ompi_osc_portals4_module_t * module,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)832 atomic_from_iovec(ompi_osc_portals4_module_t *module,
833 const void *origin_address,
834 int origin_count,
835 ompi_datatype_t *origin_datatype,
836 ptl_process_t peer,
837 int target_count,
838 ompi_datatype_t *target_datatype,
839 size_t offset,
840 ptl_pt_index_t pt_index,
841 ptl_match_bits_t match_bits,
842 struct ompi_op_t *op,
843 void *user_ptr)
844 {
845 int ret;
846 size_t size;
847 ptrdiff_t length, origin_lb, target_lb, extent;
848 ptl_md_t md;
849 ptl_op_t ptl_op;
850 ptl_datatype_t ptl_dt;
851
852 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
853 PtlMDRelease(module->origin_iovec_md_h);
854 free(module->origin_iovec_list);
855 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
856 module->origin_iovec_list = NULL;
857 }
858
859 ptl_size_t iovec_count=0;
860 create_iov_list(
861 origin_address,
862 origin_count,
863 origin_datatype,
864 &module->origin_iovec_list,
865 &iovec_count);
866
867 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
868 if (OMPI_SUCCESS != ret) {
869 opal_output(ompi_osc_base_framework.framework_output,
870 "datatype is not currently supported");
871 return OMPI_ERR_NOT_SUPPORTED;
872 }
873 ret = ompi_osc_portals4_get_op(op, &ptl_op);
874 if (OMPI_SUCCESS != ret) {
875 opal_output(ompi_osc_base_framework.framework_output,
876 "operation is not currently supported");
877 return OMPI_ERR_NOT_SUPPORTED;
878 }
879
880 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
881 if (OMPI_SUCCESS != ret) {
882 return ret;
883 }
884 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
885 if (OMPI_SUCCESS != ret) {
886 return ret;
887 }
888 ompi_datatype_type_size(origin_datatype, &size);
889 length = size * origin_count;
890
891 md.start = module->origin_iovec_list;
892 md.length = iovec_count;
893 if (user_ptr) {
894 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
895 } else {
896 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
897 }
898 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
899 md.ct_handle = module->ct_h;
900 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
901 if (PTL_OK != ret) {
902 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
903 "%s:%d: PtlMDBind(iovec) failed: %d\n",
904 __FILE__, __LINE__, ret);
905 return ret;
906 }
907
908 ret = segmentedAtomic(&module->opcount,
909 module->origin_iovec_md_h,
910 (ptl_size_t) origin_lb,
911 length,
912 module->atomic_max,
913 peer,
914 module->pt_idx,
915 module->match_bits,
916 offset + target_lb,
917 user_ptr,
918 ptl_op,
919 ptl_dt);
920 if (OMPI_SUCCESS != ret) {
921 return ret;
922 }
923
924 return OMPI_SUCCESS;
925 }
926
927 /* perform atomic operation on iovec local and contiguous remote */
928 static int
swap_to_iovec(ompi_osc_portals4_module_t * module,const void * result_address,int result_count,ompi_datatype_t * result_datatype,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)929 swap_to_iovec(ompi_osc_portals4_module_t *module,
930 const void *result_address,
931 int result_count,
932 ompi_datatype_t *result_datatype,
933 const void *origin_address,
934 int origin_count,
935 ompi_datatype_t *origin_datatype,
936 ptl_process_t peer,
937 int target_count,
938 ompi_datatype_t *target_datatype,
939 size_t offset,
940 ptl_pt_index_t pt_index,
941 ptl_match_bits_t match_bits,
942 void *user_ptr)
943 {
944 int ret;
945 size_t size;
946 ptl_size_t iovec_count=0;
947 ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
948 ptl_md_t md;
949 ptl_datatype_t ptl_dt;
950
951 if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
952 PtlMDRelease(module->result_iovec_md_h);
953 free(module->result_iovec_list);
954 module->result_iovec_md_h = PTL_INVALID_HANDLE;
955 module->result_iovec_list = NULL;
956 }
957
958 create_iov_list(
959 result_address,
960 result_count,
961 result_datatype,
962 &module->result_iovec_list,
963 &iovec_count);
964
965 md.start = module->result_iovec_list;
966 md.length = iovec_count;
967 if (user_ptr) {
968 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
969 } else {
970 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
971 }
972 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
973 md.ct_handle = module->ct_h;
974 ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
975 if (PTL_OK != ret) {
976 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
977 "%s:%d: PtlMDBind(iovec) failed: %d\n",
978 __FILE__, __LINE__, ret);
979 return ret;
980 }
981
982 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
983 PtlMDRelease(module->origin_iovec_md_h);
984 free(module->origin_iovec_list);
985 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
986 module->origin_iovec_list = NULL;
987 }
988
989 create_iov_list(
990 origin_address,
991 origin_count,
992 origin_datatype,
993 &module->origin_iovec_list,
994 &iovec_count);
995
996 md.start = module->origin_iovec_list;
997 md.length = iovec_count;
998 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
999 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1000 md.ct_handle = module->ct_h;
1001 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
1002 if (PTL_OK != ret) {
1003 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1004 "%s:%d: PtlMDBind(iovec) failed: %d\n",
1005 __FILE__, __LINE__, ret);
1006 return ret;
1007 }
1008
1009 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1010 if (OMPI_SUCCESS != ret) {
1011 opal_output(ompi_osc_base_framework.framework_output,
1012 "datatype is not currently supported");
1013 return OMPI_ERR_NOT_SUPPORTED;
1014 }
1015
1016 ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1017 if (OMPI_SUCCESS != ret) {
1018 return ret;
1019 }
1020 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1021 if (OMPI_SUCCESS != ret) {
1022 return ret;
1023 }
1024 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1025 if (OMPI_SUCCESS != ret) {
1026 return ret;
1027 }
1028 ompi_datatype_type_size(origin_datatype, &size);
1029 length = size * origin_count;
1030
1031 ret = segmentedSwap(&module->opcount,
1032 module->result_iovec_md_h,
1033 (ptl_size_t) result_lb,
1034 module->origin_iovec_md_h,
1035 (ptl_size_t) origin_lb,
1036 length,
1037 module->fetch_atomic_max,
1038 peer,
1039 module->pt_idx,
1040 module->match_bits,
1041 offset + target_lb,
1042 user_ptr,
1043 ptl_dt);
1044 if (OMPI_SUCCESS != ret) {
1045 return ret;
1046 }
1047
1048 return OMPI_SUCCESS;
1049 }
1050
1051 /* perform fetch atomic operation on iovec local and contiguous remote */
1052 static int
fetch_atomic_to_iovec(ompi_osc_portals4_module_t * module,const void * result_address,int result_count,ompi_datatype_t * result_datatype,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)1053 fetch_atomic_to_iovec(ompi_osc_portals4_module_t *module,
1054 const void *result_address,
1055 int result_count,
1056 ompi_datatype_t *result_datatype,
1057 const void *origin_address,
1058 int origin_count,
1059 ompi_datatype_t *origin_datatype,
1060 ptl_process_t peer,
1061 int target_count,
1062 ompi_datatype_t *target_datatype,
1063 size_t offset,
1064 ptl_pt_index_t pt_index,
1065 ptl_match_bits_t match_bits,
1066 struct ompi_op_t *op,
1067 void *user_ptr)
1068 {
1069 int ret;
1070 size_t size;
1071 ptl_size_t iovec_count=0;
1072 ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
1073 ptl_md_t md;
1074 ptl_op_t ptl_op;
1075 ptl_datatype_t ptl_dt;
1076
1077 if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
1078 PtlMDRelease(module->result_iovec_md_h);
1079 free(module->result_iovec_list);
1080 module->result_iovec_md_h = PTL_INVALID_HANDLE;
1081 module->result_iovec_list = NULL;
1082 }
1083
1084 create_iov_list(
1085 result_address,
1086 result_count,
1087 result_datatype,
1088 &module->result_iovec_list,
1089 &iovec_count);
1090
1091 md.start = module->result_iovec_list;
1092 md.length = iovec_count;
1093 if (user_ptr) {
1094 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1095 } else {
1096 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1097 }
1098 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1099 md.ct_handle = module->ct_h;
1100 ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
1101 if (PTL_OK != ret) {
1102 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1103 "%s:%d: PtlMDBind(iovec) failed: %d\n",
1104 __FILE__, __LINE__, ret);
1105 return ret;
1106 }
1107
1108 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
1109 PtlMDRelease(module->origin_iovec_md_h);
1110 free(module->origin_iovec_list);
1111 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
1112 module->origin_iovec_list = NULL;
1113 }
1114
1115 create_iov_list(
1116 origin_address,
1117 origin_count,
1118 origin_datatype,
1119 &module->origin_iovec_list,
1120 &iovec_count);
1121
1122 md.start = module->origin_iovec_list;
1123 md.length = iovec_count;
1124 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1125 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1126 md.ct_handle = module->ct_h;
1127 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
1128 if (PTL_OK != ret) {
1129 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1130 "%s:%d: PtlMDBind(iovec) failed: %d\n",
1131 __FILE__, __LINE__, ret);
1132 return ret;
1133 }
1134
1135 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1136 if (OMPI_SUCCESS != ret) {
1137 opal_output(ompi_osc_base_framework.framework_output,
1138 "datatype is not currently supported");
1139 return OMPI_ERR_NOT_SUPPORTED;
1140 }
1141 ret = ompi_osc_portals4_get_op(op, &ptl_op);
1142 if (OMPI_SUCCESS != ret) {
1143 opal_output(ompi_osc_base_framework.framework_output,
1144 "operation is not currently supported");
1145 return OMPI_ERR_NOT_SUPPORTED;
1146 }
1147
1148 ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1149 if (OMPI_SUCCESS != ret) {
1150 return ret;
1151 }
1152 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1153 if (OMPI_SUCCESS != ret) {
1154 return ret;
1155 }
1156 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1157 if (OMPI_SUCCESS != ret) {
1158 return ret;
1159 }
1160 ompi_datatype_type_size(origin_datatype, &size);
1161 length = size * origin_count;
1162
1163 ret = segmentedFetchAtomic(&module->opcount,
1164 module->result_iovec_md_h,
1165 (ptl_size_t) result_lb,
1166 module->origin_iovec_md_h,
1167 (ptl_size_t) origin_lb,
1168 length,
1169 module->fetch_atomic_max,
1170 peer,
1171 module->pt_idx,
1172 module->match_bits,
1173 offset + target_lb,
1174 user_ptr,
1175 ptl_op,
1176 ptl_dt);
1177 if (OMPI_SUCCESS != ret) {
1178 return ret;
1179 }
1180
1181 return OMPI_SUCCESS;
1182 }
1183
1184 /*
1185 * Derived from ompi_osc_rdma_master_noncontig()
1186 */
1187
1188 /* put in the largest chunks possible given the noncontiguous restriction */
1189 static int
put_to_noncontig(int64_t * opcount,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1190 put_to_noncontig(int64_t *opcount,
1191 ptl_handle_md_t md_h,
1192 const void *origin_address,
1193 int origin_count,
1194 ompi_datatype_t *origin_datatype,
1195 ptl_process_t peer,
1196 int target_count,
1197 ompi_datatype_t *target_datatype,
1198 size_t offset,
1199 ptl_pt_index_t pt_index,
1200 ptl_match_bits_t match_bits,
1201 void *user_ptr)
1202 {
1203 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1204 opal_convertor_t origin_convertor, target_convertor;
1205 uint32_t origin_iov_count, target_iov_count;
1206 uint32_t origin_iov_index, target_iov_index;
1207 /* needed for opal_convertor_raw but not used */
1208 size_t origin_size, target_size, rdma_len;
1209 size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1210 int ret;
1211 bool done;
1212
1213 /* prepare convertors for the source and target. these convertors will be used to determine the
1214 * contiguous segments within the source and target. */
1215 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1216 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1217 (void*)origin_address, 0, &origin_convertor);
1218 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1219 return ret;
1220 }
1221
1222 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1223 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1224 (void *)NULL, 0, &target_convertor);
1225 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1226 return ret;
1227 }
1228
1229 origin_iov_index = 0;
1230 origin_iov_count = 0;
1231
1232 do {
1233 /* decode segments of the remote data */
1234 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1235 target_iov_index = 0;
1236
1237 /* opal_convertor_raw returns done when it has reached the end of the data */
1238 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1239
1240 /* loop on the target segments until we have exhaused the decoded source data */
1241 while (target_iov_index != target_iov_count) {
1242 if (origin_iov_index == origin_iov_count) {
1243 /* decode segments of the target buffer */
1244 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1245 origin_iov_index = 0;
1246 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1247 }
1248
1249 /* we already checked that the target was large enough. this should be impossible */
1250 assert (0 != origin_iov_count);
1251
1252 /* determine how much to transfer in this operation */
1253 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1254
1255 opal_atomic_add_64(opcount, 1);
1256
1257 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1258 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1259 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1260 (unsigned long) target_iovec[target_iov_index].iov_len));
1261
1262 ret = PtlPut(md_h,
1263 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1264 rdma_len,
1265 PTL_ACK_REQ,
1266 peer,
1267 pt_index,
1268 match_bits,
1269 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1270 user_ptr,
1271 0);
1272 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1273 opal_atomic_add_64(opcount, -1);
1274 return ret;
1275 }
1276
1277 /* adjust io vectors */
1278 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1279 target_iovec[target_iov_index].iov_len -= rdma_len;
1280 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1281 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1282
1283 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1284 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1285 }
1286 } while (!done);
1287
1288 /* clean up convertors */
1289 opal_convertor_cleanup (&origin_convertor);
1290 OBJ_DESTRUCT(&origin_convertor);
1291 opal_convertor_cleanup (&target_convertor);
1292 OBJ_DESTRUCT(&target_convertor);
1293
1294 return OMPI_SUCCESS;
1295 }
1296
1297 /* put in fragments no larger than max_atomic_size to guarantee atomic writes at the target */
1298 static int
atomic_put_to_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1299 atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
1300 ptl_handle_md_t md_h,
1301 const void *origin_address,
1302 int origin_count,
1303 ompi_datatype_t *origin_datatype,
1304 ptl_process_t peer,
1305 int target_count,
1306 ompi_datatype_t *target_datatype,
1307 size_t offset,
1308 ptl_pt_index_t pt_index,
1309 ptl_match_bits_t match_bits,
1310 void *user_ptr)
1311 {
1312 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1313 opal_convertor_t origin_convertor, target_convertor;
1314 uint32_t origin_iov_count, target_iov_count;
1315 uint32_t origin_iov_index, target_iov_index;
1316 /* needed for opal_convertor_raw but not used */
1317 size_t origin_size, target_size, rdma_len;
1318 size_t max_rdma_len = module->atomic_max;
1319 int ret;
1320 bool done;
1321
1322 /* prepare convertors for the source and target. these convertors will be used to determine the
1323 * contiguous segments within the source and target. */
1324 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1325 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1326 (void*)origin_address, 0, &origin_convertor);
1327 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1328 return ret;
1329 }
1330
1331 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1332 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1333 (void *)NULL, 0, &target_convertor);
1334 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1335 return ret;
1336 }
1337
1338 origin_iov_index = 0;
1339 origin_iov_count = 0;
1340
1341 do {
1342 /* decode segments of the remote data */
1343 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1344 target_iov_index = 0;
1345
1346 /* opal_convertor_raw returns done when it has reached the end of the data */
1347 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1348
1349 /* loop on the target segments until we have exhaused the decoded source data */
1350 while (target_iov_index != target_iov_count) {
1351 if (origin_iov_index == origin_iov_count) {
1352 /* decode segments of the target buffer */
1353 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1354 origin_iov_index = 0;
1355 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1356 }
1357
1358 /* we already checked that the target was large enough. this should be impossible */
1359 assert (0 != origin_iov_count);
1360
1361 /* determine how much to transfer in this operation */
1362 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1363
1364 opal_atomic_add_64(&module->opcount, 1);
1365
1366 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1367 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1368 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1369 (unsigned long) target_iovec[target_iov_index].iov_len));
1370
1371 ret = PtlPut(md_h,
1372 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1373 rdma_len,
1374 PTL_ACK_REQ,
1375 peer,
1376 pt_index,
1377 match_bits,
1378 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1379 user_ptr,
1380 0);
1381 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1382 opal_atomic_add_64(&module->opcount, -1);
1383 return ret;
1384 }
1385
1386 /* adjust io vectors */
1387 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1388 target_iovec[target_iov_index].iov_len -= rdma_len;
1389 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1390 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1391
1392 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1393 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1394 }
1395 } while (!done);
1396
1397 return OMPI_SUCCESS;
1398 }
1399
1400 /* perform atomic operation on (non)contiguous local and noncontiguous remote */
1401 static int
atomic_to_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)1402 atomic_to_noncontig(ompi_osc_portals4_module_t *module,
1403 ptl_handle_md_t md_h,
1404 const void *origin_address,
1405 int origin_count,
1406 ompi_datatype_t *origin_datatype,
1407 ptl_process_t peer,
1408 int target_count,
1409 ompi_datatype_t *target_datatype,
1410 size_t offset,
1411 ptl_pt_index_t pt_index,
1412 ptl_match_bits_t match_bits,
1413 struct ompi_op_t *op,
1414 void *user_ptr)
1415 {
1416 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1417 opal_convertor_t origin_convertor, target_convertor;
1418 uint32_t origin_iov_count, target_iov_count;
1419 uint32_t origin_iov_index, target_iov_index;
1420 ptl_op_t ptl_op;
1421 ptl_datatype_t ptl_dt;
1422 /* needed for opal_convertor_raw but not used */
1423 size_t origin_size, target_size, atomic_len;
1424 int ret;
1425 bool done;
1426
1427 /* prepare convertors for the source and target. these convertors will be used to determine the
1428 * contiguous segments within the source and target. */
1429 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1430 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1431 (void*)origin_address, 0, &origin_convertor);
1432 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1433 return ret;
1434 }
1435
1436 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1437 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1438 (void *)NULL, 0, &target_convertor);
1439 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1440 return ret;
1441 }
1442
1443 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1444 if (OMPI_SUCCESS != ret) {
1445 opal_output(ompi_osc_base_framework.framework_output,
1446 "datatype is not currently supported");
1447 return OMPI_ERR_NOT_SUPPORTED;
1448 }
1449 ret = ompi_osc_portals4_get_op(op, &ptl_op);
1450 if (OMPI_SUCCESS != ret) {
1451 opal_output(ompi_osc_base_framework.framework_output,
1452 "operation is not currently supported");
1453 return OMPI_ERR_NOT_SUPPORTED;
1454 }
1455
1456 origin_iov_index = 0;
1457 origin_iov_count = 0;
1458
1459 do {
1460 /* decode segments of the remote data */
1461 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1462 target_iov_index = 0;
1463
1464 /* opal_convertor_raw returns done when it has reached the end of the data */
1465 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1466
1467 /* loop on the target segments until we have exhaused the decoded source data */
1468 while (target_iov_index != target_iov_count) {
1469 if (origin_iov_index == origin_iov_count) {
1470 /* decode segments of the target buffer */
1471 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1472 origin_iov_index = 0;
1473 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1474 }
1475
1476 /* we already checked that the target was large enough. this should be impossible */
1477 assert (0 != origin_iov_count);
1478
1479 /* determine how much to transfer in this operation */
1480 atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max);
1481
1482 opal_atomic_add_64(&module->opcount, 1);
1483
1484 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1485 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1486 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1487 (unsigned long) target_iovec[target_iov_index].iov_len));
1488
1489 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1490 "%s,%d Atomic", __FUNCTION__, __LINE__));
1491 ret = PtlAtomic(md_h,
1492 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1493 atomic_len,
1494 PTL_ACK_REQ,
1495 peer,
1496 pt_index,
1497 match_bits,
1498 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1499 user_ptr,
1500 0,
1501 ptl_op,
1502 ptl_dt);
1503 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1504 opal_atomic_add_64(&module->opcount, -1);
1505 return ret;
1506 }
1507
1508 /* adjust io vectors */
1509 origin_iovec[origin_iov_index].iov_len -= atomic_len;
1510 target_iovec[target_iov_index].iov_len -= atomic_len;
1511 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + atomic_len);
1512 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + atomic_len);
1513
1514 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1515 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1516 }
1517 } while (!done);
1518
1519 return OMPI_SUCCESS;
1520 }
1521
1522 /* get from a noncontiguous remote to an (non)contiguous local */
1523 static int
get_from_noncontig(int64_t * opcount,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1524 get_from_noncontig(int64_t *opcount,
1525 ptl_handle_md_t md_h,
1526 const void *origin_address,
1527 int origin_count,
1528 ompi_datatype_t *origin_datatype,
1529 ptl_process_t peer,
1530 int target_count,
1531 ompi_datatype_t *target_datatype,
1532 size_t offset,
1533 ptl_pt_index_t pt_index,
1534 ptl_match_bits_t match_bits,
1535 void *user_ptr)
1536 {
1537 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1538 opal_convertor_t origin_convertor, target_convertor;
1539 uint32_t origin_iov_count, target_iov_count;
1540 uint32_t origin_iov_index, target_iov_index;
1541 /* needed for opal_convertor_raw but not used */
1542 size_t origin_size, target_size, rdma_len;
1543 size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1544 int ret;
1545 bool done;
1546
1547 /* prepare convertors for the source and target. these convertors will be used to determine the
1548 * contiguous segments within the source and target. */
1549 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1550 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1551 (void*)origin_address, 0, &origin_convertor);
1552 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1553 return ret;
1554 }
1555
1556 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1557 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1558 (void *)NULL, 0, &target_convertor);
1559 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1560 return ret;
1561 }
1562
1563 origin_iov_index = 0;
1564 origin_iov_count = 0;
1565
1566 do {
1567 /* decode segments of the remote data */
1568 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1569 target_iov_index = 0;
1570
1571 /* opal_convertor_raw returns done when it has reached the end of the data */
1572 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1573
1574 /* loop on the target segments until we have exhaused the decoded source data */
1575 while (target_iov_index != target_iov_count) {
1576 if (origin_iov_index == origin_iov_count) {
1577 /* decode segments of the target buffer */
1578 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1579 origin_iov_index = 0;
1580 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1581 }
1582
1583 /* we already checked that the target was large enough. this should be impossible */
1584 assert (0 != origin_iov_count);
1585
1586 /* determine how much to transfer in this operation */
1587 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1588
1589 opal_atomic_add_64(opcount, 1);
1590
1591 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1592 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1593 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1594 (unsigned long) target_iovec[target_iov_index].iov_len));
1595
1596 ret = PtlGet(md_h,
1597 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1598 rdma_len,
1599 peer,
1600 pt_index,
1601 match_bits,
1602 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1603 user_ptr);
1604 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1605 opal_atomic_add_64(opcount, -1);
1606 return ret;
1607 }
1608
1609 /* adjust io vectors */
1610 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1611 target_iovec[target_iov_index].iov_len -= rdma_len;
1612 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1613 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1614
1615 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1616 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1617 }
1618 } while (!done);
1619
1620 return OMPI_SUCCESS;
1621 }
1622
1623 /* get from a noncontiguous remote to an (non)contiguous local */
1624 static int
atomic_get_from_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1625 atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
1626 ptl_handle_md_t md_h,
1627 const void *origin_address,
1628 int origin_count,
1629 ompi_datatype_t *origin_datatype,
1630 ptl_process_t peer,
1631 int target_count,
1632 ompi_datatype_t *target_datatype,
1633 size_t offset,
1634 ptl_pt_index_t pt_index,
1635 ptl_match_bits_t match_bits,
1636 void *user_ptr)
1637 {
1638 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1639 opal_convertor_t origin_convertor, target_convertor;
1640 uint32_t origin_iov_count, target_iov_count;
1641 uint32_t origin_iov_index, target_iov_index;
1642 /* needed for opal_convertor_raw but not used */
1643 size_t origin_size, target_size, rdma_len;
1644 size_t max_rdma_len = module->fetch_atomic_max;
1645 int ret;
1646 bool done;
1647
1648 /* prepare convertors for the source and target. these convertors will be used to determine the
1649 * contiguous segments within the source and target. */
1650 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1651 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1652 (void*)origin_address, 0, &origin_convertor);
1653 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1654 return ret;
1655 }
1656
1657 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1658 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1659 (void *)NULL, 0, &target_convertor);
1660 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1661 return ret;
1662 }
1663
1664 origin_iov_index = 0;
1665 origin_iov_count = 0;
1666
1667 do {
1668 /* decode segments of the remote data */
1669 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1670 target_iov_index = 0;
1671
1672 /* opal_convertor_raw returns done when it has reached the end of the data */
1673 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1674
1675 /* loop on the target segments until we have exhaused the decoded source data */
1676 while (target_iov_index != target_iov_count) {
1677 if (origin_iov_index == origin_iov_count) {
1678 /* decode segments of the target buffer */
1679 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1680 origin_iov_index = 0;
1681 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1682 }
1683
1684 /* we already checked that the target was large enough. this should be impossible */
1685 assert (0 != origin_iov_count);
1686
1687 /* determine how much to transfer in this operation */
1688 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1689
1690 opal_atomic_add_64(&module->opcount, 1);
1691
1692 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1693 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1694 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1695 (unsigned long) target_iovec[target_iov_index].iov_len));
1696
1697 ret = PtlGet(md_h,
1698 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1699 rdma_len,
1700 peer,
1701 pt_index,
1702 match_bits,
1703 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1704 user_ptr);
1705 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1706 opal_atomic_add_64(&module->opcount, -1);
1707 return ret;
1708 }
1709
1710 /* adjust io vectors */
1711 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1712 target_iovec[target_iov_index].iov_len -= rdma_len;
1713 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1714 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1715
1716 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1717 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1718 }
1719 } while (!done);
1720
1721 return OMPI_SUCCESS;
1722 }
1723
1724 /* swap from a noncontiguous remote to an (non)contiguous local */
1725 static int
swap_from_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t result_md_h,const void * result_address,int result_count,ompi_datatype_t * result_datatype,ptl_handle_md_t origin_md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,void * user_ptr)1726 swap_from_noncontig(ompi_osc_portals4_module_t *module,
1727 ptl_handle_md_t result_md_h,
1728 const void *result_address,
1729 int result_count,
1730 ompi_datatype_t *result_datatype,
1731 ptl_handle_md_t origin_md_h,
1732 const void *origin_address,
1733 int origin_count,
1734 ompi_datatype_t *origin_datatype,
1735 ptl_process_t peer,
1736 int target_count,
1737 ompi_datatype_t *target_datatype,
1738 size_t offset,
1739 ptl_pt_index_t pt_index,
1740 ptl_match_bits_t match_bits,
1741 void *user_ptr)
1742 {
1743 struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1744 opal_convertor_t result_convertor, origin_convertor, target_convertor;
1745 uint32_t result_iov_count, origin_iov_count, target_iov_count;
1746 uint32_t result_iov_index, origin_iov_index, target_iov_index;
1747 /* needed for opal_convertor_raw but not used */
1748 size_t result_size, origin_size, target_size, rdma_len;
1749 size_t max_rdma_len = module->fetch_atomic_max;
1750 ptl_datatype_t ptl_dt;
1751
1752 int ret;
1753 bool done;
1754
1755 /* prepare convertors for the result, source and target. these convertors will be used to determine the
1756 * contiguous segments within the source and target. */
1757 OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1758 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1759 (void*)result_address, 0, &result_convertor);
1760 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1761 return ret;
1762 }
1763
1764 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1765 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1766 (void*)origin_address, 0, &origin_convertor);
1767 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1768 return ret;
1769 }
1770
1771 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1772 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1773 (void *)NULL, 0, &target_convertor);
1774 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1775 return ret;
1776 }
1777
1778 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1779 if (OMPI_SUCCESS != ret) {
1780 opal_output(ompi_osc_base_framework.framework_output,
1781 "datatype is not currently supported");
1782 return OMPI_ERR_NOT_SUPPORTED;
1783 }
1784
1785 result_iov_index = 0;
1786 result_iov_count = 0;
1787 origin_iov_index = 0;
1788 origin_iov_count = 0;
1789
1790 do {
1791 /* decode segments of the remote data */
1792 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1793 target_iov_index = 0;
1794
1795 /* opal_convertor_raw returns done when it has reached the end of the data */
1796 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1797
1798 /* loop on the target segments until we have exhaused the decoded source data */
1799 while (target_iov_index != target_iov_count) {
1800 if (result_iov_index == result_iov_count) {
1801 /* decode segments of the target buffer */
1802 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1803 result_iov_index = 0;
1804 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1805 }
1806 if (origin_iov_index == origin_iov_count) {
1807 /* decode segments of the target buffer */
1808 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1809 origin_iov_index = 0;
1810 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1811 }
1812
1813 /* we already checked that the target was large enough. this should be impossible */
1814 assert (0 != result_iov_count);
1815 assert (0 != origin_iov_count);
1816
1817 /* determine how much to transfer in this operation */
1818 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1819
1820 opal_atomic_add_64(&module->opcount, 1);
1821
1822 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1823 "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1824 result_iovec[result_iov_index].iov_base,
1825 origin_iovec[origin_iov_index].iov_base,
1826 target_iovec[target_iov_index].iov_base,
1827 (unsigned long) target_iovec[target_iov_index].iov_len));
1828
1829 ret = PtlSwap(result_md_h,
1830 (ptl_size_t)result_iovec[result_iov_index].iov_base,
1831 origin_md_h,
1832 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1833 rdma_len,
1834 peer,
1835 pt_index,
1836 match_bits,
1837 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1838 user_ptr,
1839 0,
1840 NULL,
1841 PTL_SWAP,
1842 ptl_dt);
1843 if (PTL_OK != ret) {
1844 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1845 "%s:%d PtlSwap failed with return value %d",
1846 __FUNCTION__, __LINE__, ret);
1847 opal_atomic_add_64(&module->opcount, -1);
1848 return ret;
1849 }
1850
1851 /* adjust io vectors */
1852 result_iovec[result_iov_index].iov_len -= rdma_len;
1853 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1854 target_iovec[target_iov_index].iov_len -= rdma_len;
1855 result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
1856 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1857 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1858
1859 result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
1860 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1861 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1862 }
1863 } while (!done);
1864
1865 return OMPI_SUCCESS;
1866 }
1867
1868 /* swap from a noncontiguous remote to an (non)contiguous local */
1869 static int
fetch_atomic_from_noncontig(ompi_osc_portals4_module_t * module,ptl_handle_md_t result_md_h,const void * result_address,int result_count,ompi_datatype_t * result_datatype,ptl_handle_md_t origin_md_h,const void * origin_address,int origin_count,ompi_datatype_t * origin_datatype,ptl_process_t peer,int target_count,ompi_datatype_t * target_datatype,size_t offset,ptl_pt_index_t pt_index,ptl_match_bits_t match_bits,struct ompi_op_t * op,void * user_ptr)1870 fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
1871 ptl_handle_md_t result_md_h,
1872 const void *result_address,
1873 int result_count,
1874 ompi_datatype_t *result_datatype,
1875 ptl_handle_md_t origin_md_h,
1876 const void *origin_address,
1877 int origin_count,
1878 ompi_datatype_t *origin_datatype,
1879 ptl_process_t peer,
1880 int target_count,
1881 ompi_datatype_t *target_datatype,
1882 size_t offset,
1883 ptl_pt_index_t pt_index,
1884 ptl_match_bits_t match_bits,
1885 struct ompi_op_t *op,
1886 void *user_ptr)
1887 {
1888 struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1889 opal_convertor_t result_convertor, origin_convertor, target_convertor;
1890 uint32_t result_iov_count, origin_iov_count, target_iov_count;
1891 uint32_t result_iov_index, origin_iov_index, target_iov_index;
1892 /* needed for opal_convertor_raw but not used */
1893 size_t result_size, origin_size, target_size, rdma_len;
1894 size_t max_rdma_len = module->fetch_atomic_max;
1895 ptl_op_t ptl_op;
1896 ptl_datatype_t ptl_dt;
1897
1898 int ret;
1899 bool done;
1900
1901 /* prepare convertors for the result, source and target. these convertors will be used to determine the
1902 * contiguous segments within the source and target. */
1903 OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1904 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1905 (void*)result_address, 0, &result_convertor);
1906 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1907 return ret;
1908 }
1909
1910 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1911 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1912 (void*)origin_address, 0, &origin_convertor);
1913 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1914 return ret;
1915 }
1916
1917 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1918 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1919 (void *)NULL, 0, &target_convertor);
1920 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1921 return ret;
1922 }
1923
1924 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1925 if (OMPI_SUCCESS != ret) {
1926 opal_output(ompi_osc_base_framework.framework_output,
1927 "datatype is not currently supported");
1928 return OMPI_ERR_NOT_SUPPORTED;
1929 }
1930 ret = ompi_osc_portals4_get_op(op, &ptl_op);
1931 if (OMPI_SUCCESS != ret) {
1932 opal_output(ompi_osc_base_framework.framework_output,
1933 "operation is not currently supported");
1934 return OMPI_ERR_NOT_SUPPORTED;
1935 }
1936
1937 result_iov_index = 0;
1938 result_iov_count = 0;
1939 origin_iov_index = 0;
1940 origin_iov_count = 0;
1941
1942 do {
1943 /* decode segments of the remote data */
1944 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1945 target_iov_index = 0;
1946
1947 /* opal_convertor_raw returns done when it has reached the end of the data */
1948 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1949
1950 /* loop on the target segments until we have exhaused the decoded source data */
1951 while (target_iov_index != target_iov_count) {
1952 if (result_iov_index == result_iov_count) {
1953 /* decode segments of the target buffer */
1954 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1955 result_iov_index = 0;
1956 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1957 }
1958 if (origin_iov_index == origin_iov_count) {
1959 /* decode segments of the target buffer */
1960 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1961 origin_iov_index = 0;
1962 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1963 }
1964
1965 /* we already checked that the target was large enough. this should be impossible */
1966 assert (0 != result_iov_count);
1967 assert (0 != origin_iov_count);
1968
1969 /* determine how much to transfer in this operation */
1970 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1971
1972 opal_atomic_add_64(&module->opcount, 1);
1973
1974 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1975 "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1976 result_iovec[result_iov_index].iov_base,
1977 origin_iovec[origin_iov_index].iov_base,
1978 target_iovec[target_iov_index].iov_base,
1979 (unsigned long) target_iovec[target_iov_index].iov_len));
1980
1981 ret = PtlFetchAtomic(result_md_h,
1982 (ptl_size_t)result_iovec[result_iov_index].iov_base,
1983 origin_md_h,
1984 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1985 rdma_len,
1986 peer,
1987 pt_index,
1988 match_bits,
1989 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1990 user_ptr,
1991 0,
1992 ptl_op,
1993 ptl_dt);
1994 if (PTL_OK != ret) {
1995 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1996 "%s:%d PtlFetchAtomic failed with return value %d",
1997 __FUNCTION__, __LINE__, ret);
1998 opal_atomic_add_64(&module->opcount, -1);
1999 return ret;
2000 }
2001
2002 /* adjust io vectors */
2003 result_iovec[result_iov_index].iov_len -= rdma_len;
2004 origin_iovec[origin_iov_index].iov_len -= rdma_len;
2005 target_iovec[target_iov_index].iov_len -= rdma_len;
2006 result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
2007 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
2008 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
2009
2010 result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
2011 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
2012 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
2013 }
2014 } while (!done);
2015
2016 return OMPI_SUCCESS;
2017 }
2018
2019 int
ompi_osc_portals4_rput(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2020 ompi_osc_portals4_rput(const void *origin_addr,
2021 int origin_count,
2022 struct ompi_datatype_t *origin_dt,
2023 int target,
2024 ptrdiff_t target_disp,
2025 int target_count,
2026 struct ompi_datatype_t *target_dt,
2027 struct ompi_win_t *win,
2028 struct ompi_request_t **ompi_req)
2029 {
2030 int ret;
2031 ompi_osc_portals4_request_t *request;
2032 ompi_osc_portals4_module_t *module =
2033 (ompi_osc_portals4_module_t*) win->w_osc_module;
2034 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2035 size_t size, offset;
2036 ptrdiff_t length, origin_lb, target_lb, extent;
2037
2038 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2039 "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2040 (unsigned long) origin_addr, origin_count,
2041 origin_dt->name, target, (unsigned long) target_disp,
2042 target_count, target_dt->name,
2043 (unsigned long) win));
2044
2045 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2046 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2047 *ompi_req = &request->super;
2048
2049 offset = get_displacement(module, target) * target_disp;
2050
2051 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2052 ret = put_to_noncontig(&module->opcount,
2053 module->req_md_h,
2054 origin_addr,
2055 origin_count,
2056 origin_dt,
2057 peer,
2058 target_count,
2059 target_dt,
2060 offset,
2061 module->pt_idx,
2062 module->match_bits,
2063 request);
2064 if (PTL_OK != ret) {
2065 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2066 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2067 "%s,%d put_to_noncontig() failed: ret = %d",
2068 __FUNCTION__, __LINE__, ret));
2069 return ret;
2070 }
2071 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2072 ret = put_from_iovec(module,
2073 origin_addr,
2074 origin_count,
2075 origin_dt,
2076 peer,
2077 target_count,
2078 target_dt,
2079 offset,
2080 module->pt_idx,
2081 module->match_bits,
2082 request);
2083 if (PTL_OK != ret) {
2084 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2085 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2086 "%s,%d put_from_iovec() failed: ret = %d",
2087 __FUNCTION__, __LINE__, ret));
2088 return ret;
2089 }
2090 } else {
2091 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2092 if (OMPI_SUCCESS != ret) {
2093 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2094 return ret;
2095 }
2096 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2097 if (OMPI_SUCCESS != ret) {
2098 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2099 return ret;
2100 }
2101 ompi_datatype_type_size(origin_dt, &size);
2102 length = size * origin_count;
2103
2104 request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2105
2106 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2107 "%s,%d RPut(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2108 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2109 ret = segmentedPut(&module->opcount,
2110 module->req_md_h,
2111 (ptl_size_t) origin_addr + origin_lb,
2112 length,
2113 mca_osc_portals4_component.ptl_max_msg_size,
2114 PTL_ACK_REQ,
2115 peer,
2116 module->pt_idx,
2117 module->match_bits,
2118 offset + target_lb,
2119 request,
2120 0);
2121 if (OMPI_SUCCESS != ret) {
2122 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2123 return ret;
2124 }
2125 }
2126
2127 return OMPI_SUCCESS;
2128 }
2129
2130
2131 int
ompi_osc_portals4_rget(void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2132 ompi_osc_portals4_rget(void *origin_addr,
2133 int origin_count,
2134 struct ompi_datatype_t *origin_dt,
2135 int target,
2136 ptrdiff_t target_disp,
2137 int target_count,
2138 struct ompi_datatype_t *target_dt,
2139 struct ompi_win_t *win,
2140 struct ompi_request_t **ompi_req)
2141 {
2142 int ret;
2143 ompi_osc_portals4_request_t *request;
2144 ompi_osc_portals4_module_t *module =
2145 (ompi_osc_portals4_module_t*) win->w_osc_module;
2146 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2147 size_t offset, size;
2148 ptrdiff_t length, origin_lb, target_lb, extent;
2149
2150 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2151 "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2152 (unsigned long) origin_addr, origin_count,
2153 origin_dt->name, target, (unsigned long) target_disp,
2154 target_count, target_dt->name,
2155 (unsigned long) win));
2156
2157 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2158 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2159 *ompi_req = &request->super;
2160
2161 offset = get_displacement(module, target) * target_disp;
2162
2163 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2164 ret = get_from_noncontig(&module->opcount,
2165 module->req_md_h,
2166 origin_addr,
2167 origin_count,
2168 origin_dt,
2169 peer,
2170 target_count,
2171 target_dt,
2172 offset,
2173 module->pt_idx,
2174 module->match_bits,
2175 request);
2176 if (PTL_OK != ret) {
2177 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2178 "%s,%d get_from_noncontig() failed: ret = %d",
2179 __FUNCTION__, __LINE__, ret));
2180 return ret;
2181 }
2182 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2183 ret = get_to_iovec(module,
2184 origin_addr,
2185 origin_count,
2186 origin_dt,
2187 peer,
2188 target_count,
2189 target_dt,
2190 offset,
2191 module->pt_idx,
2192 module->match_bits,
2193 request);
2194 if (PTL_OK != ret) {
2195 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2196 "%s,%d get_to_iovec() failed: ret = %d",
2197 __FUNCTION__, __LINE__, ret));
2198 return ret;
2199 }
2200 } else {
2201 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2202 if (OMPI_SUCCESS != ret) {
2203 return ret;
2204 }
2205 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2206 if (OMPI_SUCCESS != ret) {
2207 return ret;
2208 }
2209 ompi_datatype_type_size(origin_dt, &size);
2210 length = size * origin_count;
2211
2212 request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2213
2214 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2215 "%s,%d RGet", __FUNCTION__, __LINE__));
2216 ret = segmentedGet(&module->opcount,
2217 module->req_md_h,
2218 (ptl_size_t) origin_addr + origin_lb,
2219 length,
2220 mca_osc_portals4_component.ptl_max_msg_size,
2221 peer,
2222 module->pt_idx,
2223 module->match_bits,
2224 offset + target_lb,
2225 request);
2226 if (OMPI_SUCCESS != ret) {
2227 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2228 return ret;
2229 }
2230 }
2231
2232 return OMPI_SUCCESS;
2233 }
2234
2235
2236 int
ompi_osc_portals4_raccumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2237 ompi_osc_portals4_raccumulate(const void *origin_addr,
2238 int origin_count,
2239 struct ompi_datatype_t *origin_dt,
2240 int target,
2241 ptrdiff_t target_disp,
2242 int target_count,
2243 struct ompi_datatype_t *target_dt,
2244 struct ompi_op_t *op,
2245 struct ompi_win_t *win,
2246 struct ompi_request_t **ompi_req)
2247 {
2248 int ret;
2249 ompi_osc_portals4_request_t *request;
2250 ompi_osc_portals4_module_t *module =
2251 (ompi_osc_portals4_module_t*) win->w_osc_module;
2252 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2253 size_t offset, size;
2254 ptl_op_t ptl_op;
2255 ptl_datatype_t ptl_dt;
2256 ptrdiff_t sent, length, origin_lb, target_lb, extent;
2257
2258 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2259 "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx",
2260 (unsigned long) origin_addr, origin_count,
2261 origin_dt->name, target, (unsigned long) target_disp,
2262 target_count, target_dt->name,
2263 op->o_name,
2264 (unsigned long) win));
2265
2266 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2267 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2268 *ompi_req = &request->super;
2269
2270 offset = get_displacement(module, target) * target_disp;
2271
2272 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2273 if (MPI_REPLACE == op) {
2274 ret = atomic_put_to_noncontig(module,
2275 module->req_md_h,
2276 origin_addr,
2277 origin_count,
2278 origin_dt,
2279 peer,
2280 target_count,
2281 target_dt,
2282 offset,
2283 module->pt_idx,
2284 module->match_bits,
2285 request);
2286 if (PTL_OK != ret) {
2287 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2288 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2289 "%s,%d atomic_put_to_noncontig() failed: ret = %d",
2290 __FUNCTION__, __LINE__, ret));
2291 return ret;
2292 }
2293 } else {
2294 ret = atomic_to_noncontig(module,
2295 module->req_md_h,
2296 origin_addr,
2297 origin_count,
2298 origin_dt,
2299 peer,
2300 target_count,
2301 target_dt,
2302 offset,
2303 module->pt_idx,
2304 module->match_bits,
2305 op,
2306 request);
2307 if (PTL_OK != ret) {
2308 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2309 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2310 "%s,%d atomic_to_noncontig() failed: ret = %d",
2311 __FUNCTION__, __LINE__, ret));
2312 return ret;
2313 }
2314 }
2315 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2316 if (MPI_REPLACE == op) {
2317 ret = atomic_put_from_iovec(module,
2318 origin_addr,
2319 origin_count,
2320 origin_dt,
2321 peer,
2322 target_count,
2323 target_dt,
2324 offset,
2325 module->pt_idx,
2326 module->match_bits,
2327 request);
2328 if (PTL_OK != ret) {
2329 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2330 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2331 "%s,%d atomic_put_from_iovec() failed: ret = %d",
2332 __FUNCTION__, __LINE__, ret));
2333 return ret;
2334 }
2335 } else {
2336 ret = atomic_from_iovec(module,
2337 origin_addr,
2338 origin_count,
2339 origin_dt,
2340 peer,
2341 target_count,
2342 target_dt,
2343 offset,
2344 module->pt_idx,
2345 module->match_bits,
2346 op,
2347 request);
2348 if (PTL_OK != ret) {
2349 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2350 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2351 "%s,%d atomic_from_iovec() failed: ret = %d",
2352 __FUNCTION__, __LINE__, ret));
2353 return ret;
2354 }
2355 }
2356 } else {
2357 ptl_size_t md_offset;
2358
2359 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2360 if (OMPI_SUCCESS != ret) {
2361 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2362 return ret;
2363 }
2364 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2365 if (OMPI_SUCCESS != ret) {
2366 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2367 return ret;
2368 }
2369 ompi_datatype_type_size(origin_dt, &size);
2370 length = size * origin_count;
2371 sent = 0;
2372
2373 md_offset = (ptl_size_t) origin_addr;
2374
2375 request->ops_expected += number_of_fragments(length, module->atomic_max);
2376
2377 if (MPI_REPLACE == op) {
2378 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2379 "%s,%d Put", __FUNCTION__, __LINE__));
2380 ret = segmentedPut(&module->opcount,
2381 module->req_md_h,
2382 md_offset + origin_lb,
2383 length,
2384 module->atomic_max,
2385 PTL_ACK_REQ,
2386 peer,
2387 module->pt_idx,
2388 module->match_bits,
2389 offset + target_lb,
2390 request,
2391 0);
2392 if (OMPI_SUCCESS != ret) {
2393 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2394 return ret;
2395 }
2396 } else {
2397 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2398 if (OMPI_SUCCESS != ret) {
2399 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2400 opal_output(ompi_osc_base_framework.framework_output,
2401 "datatype is not currently supported");
2402 return OMPI_ERR_NOT_SUPPORTED;
2403 }
2404 ret = ompi_osc_portals4_get_op(op, &ptl_op);
2405 if (OMPI_SUCCESS != ret) {
2406 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2407 opal_output(ompi_osc_base_framework.framework_output,
2408 "operation is not currently supported");
2409 return OMPI_ERR_NOT_SUPPORTED;
2410 }
2411 do {
2412 size_t msg_length = MIN(module->atomic_max, length - sent);
2413
2414 (void)opal_atomic_add_64(&module->opcount, 1);
2415
2416 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2417 "%s,%d Atomic", __FUNCTION__, __LINE__));
2418 ret = PtlAtomic(module->req_md_h,
2419 md_offset + sent + origin_lb,
2420 msg_length,
2421 PTL_ACK_REQ,
2422 peer,
2423 module->pt_idx,
2424 module->match_bits,
2425 offset + sent + target_lb,
2426 request,
2427 0,
2428 ptl_op,
2429 ptl_dt);
2430 if (OMPI_SUCCESS != ret) {
2431 (void)opal_atomic_add_64(&module->opcount, -1);
2432 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2433 return ret;
2434 }
2435 sent += msg_length;
2436 } while (sent < length);
2437 }
2438 }
2439
2440 return OMPI_SUCCESS;
2441 }
2442
2443
2444 int
ompi_osc_portals4_rget_accumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,void * result_addr,int result_count,struct ompi_datatype_t * result_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win,struct ompi_request_t ** ompi_req)2445 ompi_osc_portals4_rget_accumulate(const void *origin_addr,
2446 int origin_count,
2447 struct ompi_datatype_t *origin_dt,
2448 void *result_addr,
2449 int result_count,
2450 struct ompi_datatype_t *result_dt,
2451 int target,
2452 ptrdiff_t target_disp,
2453 int target_count,
2454 struct ompi_datatype_t *target_dt,
2455 struct ompi_op_t *op,
2456 struct ompi_win_t *win,
2457 struct ompi_request_t **ompi_req)
2458 {
2459 int ret;
2460 ompi_osc_portals4_request_t *request;
2461 ompi_osc_portals4_module_t *module =
2462 (ompi_osc_portals4_module_t*) win->w_osc_module;
2463 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2464 size_t target_offset, size;
2465 ptl_op_t ptl_op;
2466 ptl_datatype_t ptl_dt;
2467 ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
2468
2469 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2470 "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
2471 (unsigned long) origin_addr, origin_count,
2472 origin_dt->name, (unsigned long) result_addr,
2473 result_count, result_dt->name,
2474 target, (unsigned long) target_disp,
2475 target_count, target_dt->name,
2476 op->o_name,
2477 (unsigned long) win));
2478
2479 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2480 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2481 *ompi_req = &request->super;
2482
2483 target_offset = get_displacement(module, target) * target_disp;
2484
2485 if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2486 if (MPI_REPLACE == op) {
2487 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2488 "rget_accumulate: MPI_REPLACE non-contiguous target"));
2489 ret = swap_from_noncontig(module,
2490 module->req_md_h,
2491 result_addr,
2492 result_count,
2493 result_dt,
2494 module->md_h,
2495 origin_addr,
2496 origin_count,
2497 origin_dt,
2498 peer,
2499 target_count,
2500 target_dt,
2501 target_offset,
2502 module->pt_idx,
2503 module->match_bits,
2504 request);
2505 if (PTL_OK != ret) {
2506 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2507 "%s,%d swap_from_noncontig() failed: ret = %d",
2508 __FUNCTION__, __LINE__, ret));
2509 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2510 return ret;
2511 }
2512 } else if (MPI_NO_OP == op) {
2513 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2514 "rget_accumulate: MPI_NO_OP non-contiguous target"));
2515 ret = atomic_get_from_noncontig(module,
2516 module->req_md_h,
2517 result_addr,
2518 result_count,
2519 result_dt,
2520 peer,
2521 target_count,
2522 target_dt,
2523 target_offset,
2524 module->pt_idx,
2525 module->match_bits,
2526 request);
2527 if (PTL_OK != ret) {
2528 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2529 "%s,%d atomic_get_from_noncontig() failed: ret = %d",
2530 __FUNCTION__, __LINE__, ret));
2531 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2532 return ret;
2533 }
2534 } else {
2535 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2536 "rget_accumulate: other-op non-contiguous target"));
2537 ret = fetch_atomic_from_noncontig(module,
2538 module->req_md_h,
2539 result_addr,
2540 result_count,
2541 result_dt,
2542 module->md_h,
2543 origin_addr,
2544 origin_count,
2545 origin_dt,
2546 peer,
2547 target_count,
2548 target_dt,
2549 target_offset,
2550 module->pt_idx,
2551 module->match_bits,
2552 op,
2553 request);
2554 if (PTL_OK != ret) {
2555 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2556 "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
2557 __FUNCTION__, __LINE__, ret));
2558 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2559 return ret;
2560 }
2561 }
2562 } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
2563 (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
2564 if (MPI_REPLACE == op) {
2565 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2566 "rget_accumulate: MPI_REPLACE non-contiguous origin/result"));
2567 ret = swap_to_iovec(module,
2568 result_addr,
2569 result_count,
2570 result_dt,
2571 origin_addr,
2572 origin_count,
2573 origin_dt,
2574 peer,
2575 target_count,
2576 target_dt,
2577 target_offset,
2578 module->pt_idx,
2579 module->match_bits,
2580 request);
2581 if (PTL_OK != ret) {
2582 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2583 "%s,%d swap_to_iovec() failed: ret = %d",
2584 __FUNCTION__, __LINE__, ret));
2585 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2586 return ret;
2587 }
2588 } else if (MPI_NO_OP == op) {
2589 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2590 "rget_accumulate: MPI_NO_OP non-contiguous origin/result"));
2591 ret = atomic_get_to_iovec(module,
2592 result_addr,
2593 result_count,
2594 result_dt,
2595 peer,
2596 target_count,
2597 target_dt,
2598 target_offset,
2599 module->pt_idx,
2600 module->match_bits,
2601 request);
2602 if (PTL_OK != ret) {
2603 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2604 "%s,%d atomic_get_to_iovec() failed: ret = %d",
2605 __FUNCTION__, __LINE__, ret));
2606 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2607 return ret;
2608 }
2609 } else {
2610 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2611 "rget_accumulate: other-op non-contiguous origin/result"));
2612 ret = fetch_atomic_to_iovec(module,
2613 result_addr,
2614 result_count,
2615 result_dt,
2616 origin_addr,
2617 origin_count,
2618 origin_dt,
2619 peer,
2620 target_count,
2621 target_dt,
2622 target_offset,
2623 module->pt_idx,
2624 module->match_bits,
2625 op,
2626 request);
2627 if (PTL_OK != ret) {
2628 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2629 "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
2630 __FUNCTION__, __LINE__, ret));
2631 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2632 return ret;
2633 }
2634 }
2635 } else {
2636 if (MPI_REPLACE == op) {
2637 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2638 "rget_accumulate: MPI_REPLACE contiguous"));
2639 ptl_size_t result_md_offset, origin_md_offset;
2640
2641 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2642 if (OMPI_SUCCESS != ret) {
2643 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2644 return ret;
2645 }
2646 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2647 if (OMPI_SUCCESS != ret) {
2648 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2649 return ret;
2650 }
2651 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2652 if (OMPI_SUCCESS != ret) {
2653 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2654 return ret;
2655 }
2656 ompi_datatype_type_size(origin_dt, &size);
2657 length = size * origin_count;
2658
2659 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2660 if (OMPI_SUCCESS != ret) {
2661 opal_output(ompi_osc_base_framework.framework_output,
2662 "datatype is not currently supported");
2663 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2664 return OMPI_ERR_NOT_SUPPORTED;
2665 }
2666
2667 result_md_offset = (ptl_size_t) result_addr;
2668 origin_md_offset = (ptl_size_t) origin_addr;
2669
2670 request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2671
2672 ret = segmentedSwap(&module->opcount,
2673 module->req_md_h,
2674 result_md_offset + result_lb,
2675 module->md_h,
2676 origin_md_offset + origin_lb,
2677 length,
2678 module->fetch_atomic_max,
2679 peer,
2680 module->pt_idx,
2681 module->match_bits,
2682 target_offset + target_lb,
2683 request,
2684 ptl_dt);
2685 if (OMPI_SUCCESS != ret) {
2686 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2687 return ret;
2688 }
2689 } else if (MPI_NO_OP == op) {
2690 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2691 "rget_accumulate: MPI_NO_OP contiguous"));
2692 ptl_size_t md_offset;
2693
2694 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2695 if (OMPI_SUCCESS != ret) {
2696 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2697 return ret;
2698 }
2699 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2700 if (OMPI_SUCCESS != ret) {
2701 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2702 return ret;
2703 }
2704 ompi_datatype_type_size(target_dt, &size);
2705 length = size * target_count;
2706
2707 md_offset = (ptl_size_t) result_addr;
2708
2709 request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2710
2711 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2712 "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
2713 ret = segmentedGet(&module->opcount,
2714 module->req_md_h,
2715 (ptl_size_t) md_offset + result_lb,
2716 length,
2717 module->fetch_atomic_max,
2718 peer,
2719 module->pt_idx,
2720 module->match_bits,
2721 target_offset + target_lb,
2722 request);
2723 if (OMPI_SUCCESS != ret) {
2724 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2725 return ret;
2726 }
2727 } else {
2728 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2729 "rget_accumulate: other-op contiguous"));
2730 ptl_size_t result_md_offset, origin_md_offset;
2731
2732 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2733 if (OMPI_SUCCESS != ret) {
2734 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2735 return ret;
2736 }
2737 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2738 if (OMPI_SUCCESS != ret) {
2739 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2740 return ret;
2741 }
2742 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2743 if (OMPI_SUCCESS != ret) {
2744 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2745 return ret;
2746 }
2747 ompi_datatype_type_size(origin_dt, &size);
2748 length = size * origin_count;
2749
2750 result_md_offset = (ptl_size_t) result_addr;
2751 origin_md_offset = (ptl_size_t) origin_addr;
2752
2753 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2754 if (OMPI_SUCCESS != ret) {
2755 opal_output(ompi_osc_base_framework.framework_output,
2756 "datatype is not currently supported");
2757 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2758 return OMPI_ERR_NOT_SUPPORTED;
2759 }
2760
2761 ret = ompi_osc_portals4_get_op(op, &ptl_op);
2762 if (OMPI_SUCCESS != ret) {
2763 opal_output(ompi_osc_base_framework.framework_output,
2764 "operation is not currently supported");
2765 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2766 return OMPI_ERR_NOT_SUPPORTED;
2767 }
2768
2769 request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2770
2771 ret = segmentedFetchAtomic(&module->opcount,
2772 module->req_md_h,
2773 result_md_offset + result_lb,
2774 module->md_h,
2775 origin_md_offset + origin_lb,
2776 length,
2777 module->fetch_atomic_max,
2778 peer,
2779 module->pt_idx,
2780 module->match_bits,
2781 target_offset + target_lb,
2782 request,
2783 ptl_op,
2784 ptl_dt);
2785 if (OMPI_SUCCESS != ret) {
2786 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2787 return ret;
2788 }
2789 }
2790 }
2791
2792 return OMPI_SUCCESS;
2793 }
2794
2795
2796 int
ompi_osc_portals4_put(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win)2797 ompi_osc_portals4_put(const void *origin_addr,
2798 int origin_count,
2799 struct ompi_datatype_t *origin_dt,
2800 int target,
2801 ptrdiff_t target_disp,
2802 int target_count,
2803 struct ompi_datatype_t *target_dt,
2804 struct ompi_win_t *win)
2805 {
2806 int ret;
2807 ompi_osc_portals4_module_t *module =
2808 (ompi_osc_portals4_module_t*) win->w_osc_module;
2809 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2810 size_t offset, size;
2811 ptrdiff_t length, origin_lb, target_lb, extent;
2812
2813 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2814 "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2815 (unsigned long) origin_addr, origin_count,
2816 origin_dt->name, target, (unsigned long) target_disp,
2817 target_count, target_dt->name,
2818 (unsigned long) win));
2819
2820 offset = get_displacement(module, target) * target_disp;
2821
2822 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2823 ret = put_to_noncontig(&module->opcount,
2824 module->md_h,
2825 origin_addr,
2826 origin_count,
2827 origin_dt,
2828 peer,
2829 target_count,
2830 target_dt,
2831 offset,
2832 module->pt_idx,
2833 module->match_bits,
2834 NULL);
2835 if (PTL_OK != ret) {
2836 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2837 "%s,%d put_to_noncontig() failed: ret = %d",
2838 __FUNCTION__, __LINE__, ret));
2839 return ret;
2840 }
2841 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2842 ret = put_from_iovec(module,
2843 origin_addr,
2844 origin_count,
2845 origin_dt,
2846 peer,
2847 target_count,
2848 target_dt,
2849 offset,
2850 module->pt_idx,
2851 module->match_bits,
2852 NULL);
2853 if (PTL_OK != ret) {
2854 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2855 "%s,%d put_from_iovec() failed: ret = %d",
2856 __FUNCTION__, __LINE__, ret));
2857 return ret;
2858 }
2859 } else {
2860 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2861 if (OMPI_SUCCESS != ret) {
2862 return ret;
2863 }
2864 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2865 if (OMPI_SUCCESS != ret) {
2866 return ret;
2867 }
2868 ompi_datatype_type_size(origin_dt, &size);
2869 length = size * origin_count;
2870
2871 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2872 "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2873 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2874 ret = segmentedPut(&module->opcount,
2875 module->md_h,
2876 (ptl_size_t) origin_addr + origin_lb,
2877 length,
2878 mca_osc_portals4_component.ptl_max_msg_size,
2879 PTL_ACK_REQ,
2880 peer,
2881 module->pt_idx,
2882 module->match_bits,
2883 offset + target_lb,
2884 NULL,
2885 0);
2886 if (OMPI_SUCCESS != ret) {
2887 return ret;
2888 }
2889 }
2890
2891 return OMPI_SUCCESS;
2892 }
2893
2894
2895 int
ompi_osc_portals4_get(void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_win_t * win)2896 ompi_osc_portals4_get(void *origin_addr,
2897 int origin_count,
2898 struct ompi_datatype_t *origin_dt,
2899 int target,
2900 ptrdiff_t target_disp,
2901 int target_count,
2902 struct ompi_datatype_t *target_dt,
2903 struct ompi_win_t *win)
2904 {
2905 int ret;
2906 ompi_osc_portals4_module_t *module =
2907 (ompi_osc_portals4_module_t*) win->w_osc_module;
2908 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2909 size_t offset, size;
2910 ptrdiff_t length, origin_lb, target_lb, extent;
2911
2912 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2913 "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2914 (unsigned long) origin_addr, origin_count,
2915 origin_dt->name, target, (unsigned long) target_disp,
2916 target_count, target_dt->name,
2917 (unsigned long) win));
2918
2919 offset = get_displacement(module, target) * target_disp;
2920
2921 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2922 ret = get_from_noncontig(&module->opcount,
2923 module->md_h,
2924 origin_addr,
2925 origin_count,
2926 origin_dt,
2927 peer,
2928 target_count,
2929 target_dt,
2930 offset,
2931 module->pt_idx,
2932 module->match_bits,
2933 NULL);
2934 if (PTL_OK != ret) {
2935 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2936 "%s,%d get_from_noncontig() failed: ret = %d",
2937 __FUNCTION__, __LINE__, ret));
2938 return ret;
2939 }
2940 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2941 ret = get_to_iovec(module,
2942 origin_addr,
2943 origin_count,
2944 origin_dt,
2945 peer,
2946 target_count,
2947 target_dt,
2948 offset,
2949 module->pt_idx,
2950 module->match_bits,
2951 NULL);
2952 if (PTL_OK != ret) {
2953 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2954 "%s,%d get_to_iovec() failed: ret = %d",
2955 __FUNCTION__, __LINE__, ret));
2956 return ret;
2957 }
2958 } else {
2959 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2960 if (OMPI_SUCCESS != ret) {
2961 return ret;
2962 }
2963 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2964 if (OMPI_SUCCESS != ret) {
2965 return ret;
2966 }
2967 ompi_datatype_type_size(origin_dt, &size);
2968 length = size * origin_count;
2969
2970 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2971 "%s,%d Get", __FUNCTION__, __LINE__));
2972 ret = segmentedGet(&module->opcount,
2973 module->md_h,
2974 (ptl_size_t) origin_addr + origin_lb,
2975 length,
2976 mca_osc_portals4_component.ptl_max_msg_size,
2977 peer,
2978 module->pt_idx,
2979 module->match_bits,
2980 offset + target_lb,
2981 NULL);
2982 if (OMPI_SUCCESS != ret) {
2983 return ret;
2984 }
2985 }
2986
2987 return OMPI_SUCCESS;
2988 }
2989
2990
2991 int
ompi_osc_portals4_accumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win)2992 ompi_osc_portals4_accumulate(const void *origin_addr,
2993 int origin_count,
2994 struct ompi_datatype_t *origin_dt,
2995 int target,
2996 ptrdiff_t target_disp,
2997 int target_count,
2998 struct ompi_datatype_t *target_dt,
2999 struct ompi_op_t *op,
3000 struct ompi_win_t *win)
3001 {
3002 int ret;
3003 ompi_osc_portals4_module_t *module =
3004 (ompi_osc_portals4_module_t*) win->w_osc_module;
3005 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3006 size_t offset, size;
3007 ptl_op_t ptl_op;
3008 ptl_datatype_t ptl_dt;
3009 ptrdiff_t sent, length, origin_lb, target_lb, extent;
3010
3011 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3012 "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3013 (unsigned long) origin_addr, origin_count,
3014 origin_dt->name, target, (unsigned long) target_disp,
3015 target_count, target_dt->name,
3016 op->o_name,
3017 (unsigned long) win));
3018
3019 offset = get_displacement(module, target) * target_disp;
3020
3021 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3022 if (MPI_REPLACE == op) {
3023 ret = atomic_put_to_noncontig(module,
3024 module->md_h,
3025 origin_addr,
3026 origin_count,
3027 origin_dt,
3028 peer,
3029 target_count,
3030 target_dt,
3031 offset,
3032 module->pt_idx,
3033 module->match_bits,
3034 NULL);
3035 if (PTL_OK != ret) {
3036 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3037 "%s,%d atomic_put_to_noncontig() failed: ret = %d",
3038 __FUNCTION__, __LINE__, ret));
3039 return ret;
3040 }
3041 } else {
3042 ret = atomic_to_noncontig(module,
3043 module->md_h,
3044 origin_addr,
3045 origin_count,
3046 origin_dt,
3047 peer,
3048 target_count,
3049 target_dt,
3050 offset,
3051 module->pt_idx,
3052 module->match_bits,
3053 op,
3054 NULL);
3055 if (PTL_OK != ret) {
3056 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3057 "%s,%d atomic_to_noncontig() failed: ret = %d",
3058 __FUNCTION__, __LINE__, ret));
3059 return ret;
3060 }
3061 }
3062 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
3063 if (MPI_REPLACE == op) {
3064 ret = atomic_put_from_iovec(module,
3065 origin_addr,
3066 origin_count,
3067 origin_dt,
3068 peer,
3069 target_count,
3070 target_dt,
3071 offset,
3072 module->pt_idx,
3073 module->match_bits,
3074 NULL);
3075 if (PTL_OK != ret) {
3076 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3077 "%s,%d atomic_put_from_iovec() failed: ret = %d",
3078 __FUNCTION__, __LINE__, ret));
3079 return ret;
3080 }
3081 } else {
3082 ret = atomic_from_iovec(module,
3083 origin_addr,
3084 origin_count,
3085 origin_dt,
3086 peer,
3087 target_count,
3088 target_dt,
3089 offset,
3090 module->pt_idx,
3091 module->match_bits,
3092 op,
3093 NULL);
3094 if (PTL_OK != ret) {
3095 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3096 "%s,%d atomic_from_iovec() failed: ret = %d",
3097 __FUNCTION__, __LINE__, ret));
3098 return ret;
3099 }
3100 }
3101 } else {
3102 ptl_size_t md_offset;
3103
3104 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3105 if (OMPI_SUCCESS != ret) {
3106 return ret;
3107 }
3108 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3109 if (OMPI_SUCCESS != ret) {
3110 return ret;
3111 }
3112 ompi_datatype_type_size(origin_dt, &size);
3113 length = size * origin_count;
3114 sent = 0;
3115
3116 md_offset = (ptl_size_t) origin_addr;
3117
3118 if (MPI_REPLACE == op) {
3119 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3120 "%s,%d Put", __FUNCTION__, __LINE__));
3121 ret = segmentedPut(&module->opcount,
3122 module->md_h,
3123 md_offset + origin_lb,
3124 length,
3125 module->atomic_max,
3126 PTL_ACK_REQ,
3127 peer,
3128 module->pt_idx,
3129 module->match_bits,
3130 offset + target_lb,
3131 NULL,
3132 0);
3133 if (OMPI_SUCCESS != ret) {
3134 return ret;
3135 }
3136 } else {
3137 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3138 if (OMPI_SUCCESS != ret) {
3139 opal_output(ompi_osc_base_framework.framework_output,
3140 "datatype is not currently supported");
3141 return OMPI_ERR_NOT_SUPPORTED;
3142 }
3143 ret = ompi_osc_portals4_get_op(op, &ptl_op);
3144 if (OMPI_SUCCESS != ret) {
3145 opal_output(ompi_osc_base_framework.framework_output,
3146 "operation is not currently supported");
3147 return OMPI_ERR_NOT_SUPPORTED;
3148 }
3149 do {
3150 size_t msg_length = MIN(module->atomic_max, length - sent);
3151
3152 (void)opal_atomic_add_64(&module->opcount, 1);
3153
3154 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3155 "%s,%d Atomic", __FUNCTION__, __LINE__));
3156 ret = PtlAtomic(module->md_h,
3157 md_offset + sent + origin_lb,
3158 msg_length,
3159 PTL_ACK_REQ,
3160 peer,
3161 module->pt_idx,
3162 module->match_bits,
3163 offset + sent + target_lb,
3164 NULL,
3165 0,
3166 ptl_op,
3167 ptl_dt);
3168 if (OMPI_SUCCESS != ret) {
3169 (void)opal_atomic_add_64(&module->opcount, -1);
3170 return ret;
3171 }
3172 sent += msg_length;
3173 } while (sent < length);
3174 }
3175 }
3176
3177 return OMPI_SUCCESS;
3178 }
3179
3180
3181 int
ompi_osc_portals4_get_accumulate(const void * origin_addr,int origin_count,struct ompi_datatype_t * origin_dt,void * result_addr,int result_count,struct ompi_datatype_t * result_dt,int target,ptrdiff_t target_disp,int target_count,struct ompi_datatype_t * target_dt,struct ompi_op_t * op,struct ompi_win_t * win)3182 ompi_osc_portals4_get_accumulate(const void *origin_addr,
3183 int origin_count,
3184 struct ompi_datatype_t *origin_dt,
3185 void *result_addr,
3186 int result_count,
3187 struct ompi_datatype_t *result_dt,
3188 int target,
3189 ptrdiff_t target_disp,
3190 int target_count,
3191 struct ompi_datatype_t *target_dt,
3192 struct ompi_op_t *op,
3193 struct ompi_win_t *win)
3194 {
3195 int ret;
3196 ompi_osc_portals4_module_t *module =
3197 (ompi_osc_portals4_module_t*) win->w_osc_module;
3198 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3199 size_t target_offset, size;
3200 ptl_op_t ptl_op;
3201 ptl_datatype_t ptl_dt;
3202 ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
3203
3204 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3205 "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3206 (unsigned long) origin_addr, origin_count,
3207 origin_dt->name, (unsigned long) result_addr,
3208 result_count, result_dt->name,
3209 target, (unsigned long) target_disp,
3210 target_count, target_dt->name,
3211 op->o_name,
3212 (unsigned long) win));
3213
3214 target_offset = get_displacement(module, target) * target_disp;
3215
3216 if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3217 if (MPI_REPLACE == op) {
3218 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3219 "get_accumulate: MPI_REPLACE non-contiguous target"));
3220 ret = swap_from_noncontig(module,
3221 module->md_h,
3222 result_addr,
3223 result_count,
3224 result_dt,
3225 module->md_h,
3226 origin_addr,
3227 origin_count,
3228 origin_dt,
3229 peer,
3230 target_count,
3231 target_dt,
3232 target_offset,
3233 module->pt_idx,
3234 module->match_bits,
3235 NULL);
3236 if (PTL_OK != ret) {
3237 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3238 "%s,%d swap_from_noncontig() failed: ret = %d",
3239 __FUNCTION__, __LINE__, ret));
3240 return ret;
3241 }
3242 } else if (MPI_NO_OP == op) {
3243 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3244 "get_accumulate: MPI_NO_OP non-contiguous target"));
3245 ret = atomic_get_from_noncontig(module,
3246 module->md_h,
3247 result_addr,
3248 result_count,
3249 result_dt,
3250 peer,
3251 target_count,
3252 target_dt,
3253 target_offset,
3254 module->pt_idx,
3255 module->match_bits,
3256 NULL);
3257 if (PTL_OK != ret) {
3258 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3259 "%s,%d atomic_get_from_noncontig() failed: ret = %d",
3260 __FUNCTION__, __LINE__, ret));
3261 return ret;
3262 }
3263 } else {
3264 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3265 "get_accumulate: other-op non-contiguous target"));
3266 ret = fetch_atomic_from_noncontig(module,
3267 module->md_h,
3268 result_addr,
3269 result_count,
3270 result_dt,
3271 module->md_h,
3272 origin_addr,
3273 origin_count,
3274 origin_dt,
3275 peer,
3276 target_count,
3277 target_dt,
3278 target_offset,
3279 module->pt_idx,
3280 module->match_bits,
3281 op,
3282 NULL);
3283 if (PTL_OK != ret) {
3284 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3285 "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
3286 __FUNCTION__, __LINE__, ret));
3287 return ret;
3288 }
3289 }
3290 } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
3291 (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
3292 if (MPI_REPLACE == op) {
3293 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3294 "get_accumulate: MPI_REPLACE non-contiguous origin/result"));
3295 ret = swap_to_iovec(module,
3296 result_addr,
3297 result_count,
3298 result_dt,
3299 origin_addr,
3300 origin_count,
3301 origin_dt,
3302 peer,
3303 target_count,
3304 target_dt,
3305 target_offset,
3306 module->pt_idx,
3307 module->match_bits,
3308 NULL);
3309 if (PTL_OK != ret) {
3310 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3311 "%s,%d swap_to_iovec() failed: ret = %d",
3312 __FUNCTION__, __LINE__, ret));
3313 return ret;
3314 }
3315 } else if (MPI_NO_OP == op) {
3316 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3317 "get_accumulate: MPI_NO_OP non-contiguous origin/result"));
3318 ret = atomic_get_to_iovec(module,
3319 result_addr,
3320 result_count,
3321 result_dt,
3322 peer,
3323 target_count,
3324 target_dt,
3325 target_offset,
3326 module->pt_idx,
3327 module->match_bits,
3328 NULL);
3329 if (PTL_OK != ret) {
3330 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3331 "%s,%d atomic_get_to_iovec() failed: ret = %d",
3332 __FUNCTION__, __LINE__, ret));
3333 return ret;
3334 }
3335 } else {
3336 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3337 "get_accumulate: other-op non-contiguous origin/result"));
3338 ret = fetch_atomic_to_iovec(module,
3339 result_addr,
3340 result_count,
3341 result_dt,
3342 origin_addr,
3343 origin_count,
3344 origin_dt,
3345 peer,
3346 target_count,
3347 target_dt,
3348 target_offset,
3349 module->pt_idx,
3350 module->match_bits,
3351 op,
3352 NULL);
3353 if (PTL_OK != ret) {
3354 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3355 "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
3356 __FUNCTION__, __LINE__, ret));
3357 return ret;
3358 }
3359 }
3360 } else {
3361 if (MPI_REPLACE == op) {
3362 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3363 "get_accumulate: MPI_REPLACE contiguous"));
3364 ptl_size_t result_md_offset, origin_md_offset;
3365
3366 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3367 if (OMPI_SUCCESS != ret) {
3368 return ret;
3369 }
3370 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3371 if (OMPI_SUCCESS != ret) {
3372 return ret;
3373 }
3374 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3375 if (OMPI_SUCCESS != ret) {
3376 return ret;
3377 }
3378 ompi_datatype_type_size(origin_dt, &size);
3379 length = size * origin_count;
3380
3381 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3382 if (OMPI_SUCCESS != ret) {
3383 opal_output(ompi_osc_base_framework.framework_output,
3384 "MPI_Get_accumulate: datatype is not currently supported");
3385 return OMPI_ERR_NOT_SUPPORTED;
3386 }
3387
3388 result_md_offset = (ptl_size_t) result_addr;
3389 origin_md_offset = (ptl_size_t) origin_addr;
3390
3391 ret = segmentedSwap(&module->opcount,
3392 module->md_h,
3393 result_md_offset + result_lb,
3394 module->md_h,
3395 origin_md_offset + origin_lb,
3396 length,
3397 module->fetch_atomic_max,
3398 peer,
3399 module->pt_idx,
3400 module->match_bits,
3401 target_offset + target_lb,
3402 NULL,
3403 ptl_dt);
3404 if (OMPI_SUCCESS != ret) {
3405 return ret;
3406 }
3407 } else if (MPI_NO_OP == op) {
3408 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3409 "get_accumulate: MPI_NO_OP contiguous"));
3410 ptl_size_t md_offset;
3411
3412 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3413 if (OMPI_SUCCESS != ret) {
3414 return ret;
3415 }
3416 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3417 if (OMPI_SUCCESS != ret) {
3418 return ret;
3419 }
3420 ompi_datatype_type_size(target_dt, &size);
3421 length = size * target_count;
3422
3423 md_offset = (ptl_size_t) result_addr;
3424
3425 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3426 "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
3427 ret = segmentedGet(&module->opcount,
3428 module->md_h,
3429 (ptl_size_t) md_offset + result_lb,
3430 length,
3431 module->fetch_atomic_max,
3432 peer,
3433 module->pt_idx,
3434 module->match_bits,
3435 target_offset + target_lb,
3436 NULL);
3437 if (OMPI_SUCCESS != ret) {
3438 return ret;
3439 }
3440 } else {
3441 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3442 "get_accumulate: other-op contiguous"));
3443 ptl_size_t result_md_offset, origin_md_offset;
3444
3445 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3446 if (OMPI_SUCCESS != ret) {
3447 return ret;
3448 }
3449 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3450 if (OMPI_SUCCESS != ret) {
3451 return ret;
3452 }
3453 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3454 if (OMPI_SUCCESS != ret) {
3455 return ret;
3456 }
3457 ompi_datatype_type_size(origin_dt, &size);
3458 length = size * origin_count;
3459
3460 result_md_offset = (ptl_size_t) result_addr;
3461 origin_md_offset = (ptl_size_t) origin_addr;
3462
3463 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3464 if (OMPI_SUCCESS != ret) {
3465 opal_output(ompi_osc_base_framework.framework_output,
3466 "MPI_Get_accumulate: datatype is not currently supported");
3467 return OMPI_ERR_NOT_SUPPORTED;
3468 }
3469
3470 ret = ompi_osc_portals4_get_op(op, &ptl_op);
3471 if (OMPI_SUCCESS != ret) {
3472 opal_output(ompi_osc_base_framework.framework_output,
3473 "MPI_Get_accumulate: operation is not currently supported");
3474 return OMPI_ERR_NOT_SUPPORTED;
3475 }
3476
3477 ret = segmentedFetchAtomic(&module->opcount,
3478 module->md_h,
3479 result_md_offset + result_lb,
3480 module->md_h,
3481 origin_md_offset + origin_lb,
3482 length,
3483 module->fetch_atomic_max,
3484 peer,
3485 module->pt_idx,
3486 module->match_bits,
3487 target_offset + target_lb,
3488 NULL,
3489 ptl_op,
3490 ptl_dt);
3491 if (OMPI_SUCCESS != ret) {
3492 return ret;
3493 }
3494 }
3495 }
3496
3497 return OMPI_SUCCESS;
3498 }
3499
3500
3501 int
ompi_osc_portals4_compare_and_swap(const void * origin_addr,const void * compare_addr,void * result_addr,struct ompi_datatype_t * dt,int target,ptrdiff_t target_disp,struct ompi_win_t * win)3502 ompi_osc_portals4_compare_and_swap(const void *origin_addr,
3503 const void *compare_addr,
3504 void *result_addr,
3505 struct ompi_datatype_t *dt,
3506 int target,
3507 ptrdiff_t target_disp,
3508 struct ompi_win_t *win)
3509 {
3510 int ret;
3511 ompi_osc_portals4_module_t *module =
3512 (ompi_osc_portals4_module_t*) win->w_osc_module;
3513 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3514 size_t length;
3515 size_t offset;
3516 ptl_datatype_t ptl_dt;
3517 ptl_size_t result_md_offset, origin_md_offset;
3518
3519 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3520 "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx",
3521 (unsigned long) origin_addr,
3522 (unsigned long) compare_addr,
3523 (unsigned long) result_addr,
3524 dt->name, target, (unsigned long) target_disp,
3525 (unsigned long) win));
3526
3527 ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3528 if (OMPI_SUCCESS != ret) {
3529 opal_output(ompi_osc_base_framework.framework_output,
3530 "MPI_Compare_and_swap: datatype is not currently supported");
3531 return OMPI_ERR_NOT_SUPPORTED;
3532 }
3533
3534 offset = get_displacement(module, target) * target_disp;
3535
3536 ret = ompi_datatype_type_size(dt, &length);
3537 if (OMPI_SUCCESS != ret) return ret;
3538
3539 assert(length <= module->fetch_atomic_max);
3540
3541 result_md_offset = (ptl_size_t) result_addr;
3542 origin_md_offset = (ptl_size_t) origin_addr;
3543
3544 (void)opal_atomic_add_64(&module->opcount, 1);
3545
3546 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3547 "%s,%d Swap", __FUNCTION__, __LINE__));
3548 ret = PtlSwap(module->md_h,
3549 result_md_offset,
3550 module->md_h,
3551 origin_md_offset,
3552 length,
3553 peer,
3554 module->pt_idx,
3555 module->match_bits,
3556 offset,
3557 NULL,
3558 0,
3559 compare_addr,
3560 PTL_CSWAP,
3561 ptl_dt);
3562 if (OMPI_SUCCESS != ret) {
3563 return ret;
3564 }
3565
3566 return OMPI_SUCCESS;
3567 }
3568
3569
3570 int
ompi_osc_portals4_fetch_and_op(const void * origin_addr,void * result_addr,struct ompi_datatype_t * dt,int target,ptrdiff_t target_disp,struct ompi_op_t * op,struct ompi_win_t * win)3571 ompi_osc_portals4_fetch_and_op(const void *origin_addr,
3572 void *result_addr,
3573 struct ompi_datatype_t *dt,
3574 int target,
3575 ptrdiff_t target_disp,
3576 struct ompi_op_t *op,
3577 struct ompi_win_t *win)
3578 {
3579 int ret;
3580 ompi_osc_portals4_module_t *module =
3581 (ompi_osc_portals4_module_t*) win->w_osc_module;
3582 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3583 size_t length;
3584 size_t offset;
3585 ptl_op_t ptl_op;
3586 ptl_datatype_t ptl_dt;
3587
3588 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3589 "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx",
3590 (unsigned long) origin_addr,
3591 (unsigned long) result_addr,
3592 dt->name, target, (unsigned long) target_disp,
3593 op->o_name,
3594 (unsigned long) win));
3595
3596 ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3597 if (OMPI_SUCCESS != ret) {
3598 opal_output(ompi_osc_base_framework.framework_output,
3599 "MPI_Fetch_and_op: datatype is not currently supported");
3600 return OMPI_ERR_NOT_SUPPORTED;
3601 }
3602
3603 offset = get_displacement(module, target) * target_disp;
3604
3605 ret = ompi_datatype_type_size(dt, &length);
3606 if (OMPI_SUCCESS != ret) return ret;
3607
3608 assert(length <= module->fetch_atomic_max);
3609
3610 if (MPI_REPLACE == op) {
3611 ptl_size_t result_md_offset, origin_md_offset;
3612
3613 result_md_offset = (ptl_size_t) result_addr;
3614 origin_md_offset = (ptl_size_t) origin_addr;
3615
3616 (void)opal_atomic_add_64(&module->opcount, 1);
3617 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3618 "%s,%d Swap", __FUNCTION__, __LINE__));
3619 ret = PtlSwap(module->md_h,
3620 result_md_offset,
3621 module->md_h,
3622 origin_md_offset,
3623 length,
3624 peer,
3625 module->pt_idx,
3626 module->match_bits,
3627 offset,
3628 NULL,
3629 0,
3630 NULL,
3631 PTL_SWAP,
3632 ptl_dt);
3633 } else if (MPI_NO_OP == op) {
3634 ptl_size_t md_offset;
3635
3636 md_offset = (ptl_size_t) result_addr;
3637
3638 (void)opal_atomic_add_64(&module->opcount, 1);
3639 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3640 "%s,%d Get", __FUNCTION__, __LINE__));
3641 ret = PtlGet(module->md_h,
3642 md_offset,
3643 length,
3644 peer,
3645 module->pt_idx,
3646 module->match_bits,
3647 offset,
3648 NULL);
3649 } else {
3650 ptl_size_t result_md_offset, origin_md_offset;
3651 (void)opal_atomic_add_64(&module->opcount, 1);
3652
3653 ret = ompi_osc_portals4_get_op(op, &ptl_op);
3654 if (OMPI_SUCCESS != ret) {
3655 opal_output(ompi_osc_base_framework.framework_output,
3656 "MPI_Fetch_and_op: operation is not currently supported");
3657 return OMPI_ERR_NOT_SUPPORTED;
3658 }
3659
3660 result_md_offset = (ptl_size_t) result_addr;
3661 origin_md_offset = (ptl_size_t) origin_addr;
3662
3663 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3664 "%s,%d FetchAtomic", __FUNCTION__, __LINE__));
3665 ret = PtlFetchAtomic(module->md_h,
3666 result_md_offset,
3667 module->md_h,
3668 origin_md_offset,
3669 length,
3670 peer,
3671 module->pt_idx,
3672 module->match_bits,
3673 offset,
3674 NULL,
3675 0,
3676 ptl_op,
3677 ptl_dt);
3678 }
3679 if (OMPI_SUCCESS != ret) {
3680 return ret;
3681 }
3682
3683 return OMPI_SUCCESS;
3684 }
3685