1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #ifndef MPID_RMA_SHM_H_INCLUDED
7 #define MPID_RMA_SHM_H_INCLUDED
8
9 #include "utlist.h"
10 #include "mpid_rma_types.h"
11
12 static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datatype source_dtp,
13 void *target_buf, int target_count, MPI_Datatype target_dtp,
14 MPI_Aint stream_offset, MPI_Op acc_op,
15 MPIDI_RMA_Acc_srcbuf_kind_t srckind);
16
17 #define ASSIGN_COPY(src, dest, count, type) \
18 { \
19 type *src_ = (type *) src; \
20 type *dest_ = (type *) dest; \
21 int i; \
22 for (i = 0; i < count; i++) \
23 dest_[i] = src_[i]; \
24 goto fn_exit; \
25 }
26
shm_copy(const void * src,int scount,MPI_Datatype stype,void * dest,int dcount,MPI_Datatype dtype)27 static inline int shm_copy(const void *src, int scount, MPI_Datatype stype,
28 void *dest, int dcount, MPI_Datatype dtype)
29 {
30 int mpi_errno = MPI_SUCCESS;
31
32 /* We use a threshold of operations under which a for loop of assignments is
33 * used. Even though this happens at smaller block lengths, making it
34 * potentially inefficient, it can take advantage of some vectorization
35 * available on most modern processors. */
36 #define SHM_OPS_THRESHOLD (16)
37
38 if (MPIR_DATATYPE_IS_PREDEFINED(stype) && MPIR_DATATYPE_IS_PREDEFINED(dtype) &&
39 scount <= SHM_OPS_THRESHOLD) {
40
41 /* FIXME: We currently only optimize a few predefined datatypes, which
42 * have a direct C datatype mapping. */
43
44 /* The below list of datatypes is based on those specified in the MPI-3
45 * standard on page 665. */
46 switch (stype) {
47 case MPI_CHAR:
48 ASSIGN_COPY(src, dest, scount, char);
49
50 case MPI_SHORT:
51 ASSIGN_COPY(src, dest, scount, signed short int);
52
53 case MPI_INT:
54 ASSIGN_COPY(src, dest, scount, signed int);
55
56 case MPI_LONG:
57 ASSIGN_COPY(src, dest, scount, signed long int);
58
59 case MPI_LONG_LONG_INT: /* covers MPI_LONG_LONG too */
60 ASSIGN_COPY(src, dest, scount, signed long long int);
61
62 case MPI_SIGNED_CHAR:
63 ASSIGN_COPY(src, dest, scount, signed char);
64
65 case MPI_UNSIGNED_CHAR:
66 ASSIGN_COPY(src, dest, scount, unsigned char);
67
68 case MPI_UNSIGNED_SHORT:
69 ASSIGN_COPY(src, dest, scount, unsigned short int);
70
71 case MPI_UNSIGNED:
72 ASSIGN_COPY(src, dest, scount, unsigned int);
73
74 case MPI_UNSIGNED_LONG:
75 ASSIGN_COPY(src, dest, scount, unsigned long int);
76
77 case MPI_UNSIGNED_LONG_LONG:
78 ASSIGN_COPY(src, dest, scount, unsigned long long int);
79
80 case MPI_FLOAT:
81 ASSIGN_COPY(src, dest, scount, float);
82
83 case MPI_DOUBLE:
84 ASSIGN_COPY(src, dest, scount, double);
85
86 case MPI_LONG_DOUBLE:
87 ASSIGN_COPY(src, dest, scount, long double);
88
89 #if 0
90 /* FIXME: we need a configure check to define HAVE_WCHAR_T before
91 * this can be enabled */
92 case MPI_WCHAR:
93 ASSIGN_COPY(src, dest, scount, wchar_t);
94 #endif
95
96 #if 0
97 /* FIXME: we need a configure check to define HAVE_C_BOOL before
98 * this can be enabled */
99 case MPI_C_BOOL:
100 ASSIGN_COPY(src, dest, scount, _Bool);
101 #endif
102
103 #if HAVE_INT8_T
104 case MPI_INT8_T:
105 ASSIGN_COPY(src, dest, scount, int8_t);
106 #endif /* HAVE_INT8_T */
107
108 #if HAVE_INT16_T
109 case MPI_INT16_T:
110 ASSIGN_COPY(src, dest, scount, int16_t);
111 #endif /* HAVE_INT16_T */
112
113 #if HAVE_INT32_T
114 case MPI_INT32_T:
115 ASSIGN_COPY(src, dest, scount, int32_t);
116 #endif /* HAVE_INT32_T */
117
118 #if HAVE_INT64_T
119 case MPI_INT64_T:
120 ASSIGN_COPY(src, dest, scount, int64_t);
121 #endif /* HAVE_INT64_T */
122
123 #if HAVE_UINT8_T
124 case MPI_UINT8_T:
125 ASSIGN_COPY(src, dest, scount, uint8_t);
126 #endif /* HAVE_UINT8_T */
127
128 #if HAVE_UINT16_T
129 case MPI_UINT16_T:
130 ASSIGN_COPY(src, dest, scount, uint16_t);
131 #endif /* HAVE_UINT16_T */
132
133 #if HAVE_UINT32_T
134 case MPI_UINT32_T:
135 ASSIGN_COPY(src, dest, scount, uint32_t);
136 #endif /* HAVE_UINT32_T */
137
138 #if HAVE_UINT64_T
139 case MPI_UINT64_T:
140 ASSIGN_COPY(src, dest, scount, uint64_t);
141 #endif /* HAVE_UINT64_T */
142
143 case MPI_AINT:
144 ASSIGN_COPY(src, dest, scount, MPI_Aint);
145
146 case MPI_COUNT:
147 ASSIGN_COPY(src, dest, scount, MPI_Count);
148
149 case MPI_OFFSET:
150 ASSIGN_COPY(src, dest, scount, MPI_Offset);
151
152 #if 0
153 /* FIXME: we need a configure check to define HAVE_C_COMPLEX before
154 * this can be enabled */
155 case MPI_C_COMPLEX: /* covers MPI_C_FLOAT_COMPLEX as well */
156 ASSIGN_COPY(src, dest, scount, float _Complex);
157 #endif
158
159 #if 0
160 /* FIXME: we need a configure check to define HAVE_C_DOUPLE_COMPLEX
161 * before this can be enabled */
162 case MPI_C_DOUBLE_COMPLEX:
163 ASSIGN_COPY(src, dest, scount, double _Complex);
164 #endif
165
166 #if 0
167 /* FIXME: we need a configure check to define
168 * HAVE_C_LONG_DOUPLE_COMPLEX before this can be enabled */
169 case MPI_C_LONG_DOUBLE_COMPLEX:
170 ASSIGN_COPY(src, dest, scount, long double _Complex);
171 #endif
172
173 #if 0
174 /* Types that don't have a direct equivalent */
175 case MPI_BYTE:
176 case MPI_PACKED:
177 #endif
178
179 #if 0 /* Fortran types */
180 case MPI_INTEGER:
181 case MPI_REAL:
182 case MPI_DOUBLE_PRECISION:
183 case MPI_COMPLEX:
184 case MPI_LOGICAL:
185 case MPI_CHARACTER:
186 #endif
187
188 #if 0 /* C++ types */
189 case MPI_CXX_BOOL:
190 case MPI_CXX_FLOAT_COMPLEX:
191 case MPI_CXX_DOUBLE_COMPLEX:
192 case MPI_CXX_LONG_DOUBLE_COMPLEX:
193 #endif
194
195 #if 0 /* Optional Fortran types */
196 case MPI_DOUBLE_COMPLEX:
197 case MPI_INTEGER1:
198 case MPI_INTEGER2:
199 case MPI_INTEGER4:
200 case MPI_INTEGER8:
201 case MPI_INTEGER16:
202 case MPI_REAL2:
203 case MPI_REAL4:
204 case MPI_REAL8:
205 case MPI_REAL16:
206 case MPI_COMPLEX4:
207 case MPI_COMPLEX8:
208 case MPI_COMPLEX16:
209 case MPI_COMPLEX32:
210 #endif
211
212 #if 0 /* C datatypes for reduction functions */
213 case MPI_FLOAT_INT:
214 case MPI_DOUBLE_INT:
215 case MPI_LONG_INT:
216 case MPI_2INT:
217 case MPI_LONG_DOUBLE_INT:
218 #endif
219
220 #if 0 /* Fortran datatypes for reduction functions */
221 case MPI_2REAL:
222 case MPI_2DOUBLE_PRECISION:
223 case MPI_2INTEGER:
224 #endif
225
226 default:
227 /* Just to make sure the switch statement is not empty */
228 ;
229 }
230 }
231
232 mpi_errno = MPIR_Localcopy(src, scount, stype, dest, dcount, dtype);
233 MPIR_ERR_CHECK(mpi_errno);
234
235 fn_exit:
236 return mpi_errno;
237 /* --BEGIN ERROR HANDLING-- */
238 fn_fail:
239 goto fn_exit;
240 /* --END ERROR HANDLING-- */
241 }
242
MPIDI_CH3I_Shm_put_op(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win_ptr)243 static inline int MPIDI_CH3I_Shm_put_op(const void *origin_addr, int origin_count, MPI_Datatype
244 origin_datatype, int target_rank, MPI_Aint target_disp,
245 int target_count, MPI_Datatype target_datatype,
246 MPIR_Win * win_ptr)
247 {
248 int mpi_errno = MPI_SUCCESS;
249 void *base = NULL;
250 int disp_unit;
251 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
252
253 MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
254
255 if (win_ptr->shm_allocated == TRUE) {
256 int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
257 MPIR_Assert(local_target_rank >= 0);
258 base = win_ptr->shm_base_addrs[local_target_rank];
259 disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
260 }
261 else {
262 base = win_ptr->base;
263 disp_unit = win_ptr->disp_unit;
264 }
265
266 mpi_errno = shm_copy(origin_addr, origin_count, origin_datatype,
267 (char *) base + disp_unit * target_disp, target_count, target_datatype);
268 MPIR_ERR_CHECK(mpi_errno);
269
270 fn_exit:
271 MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
272 return mpi_errno;
273 /* --BEGIN ERROR HANDLING-- */
274 fn_fail:
275 goto fn_exit;
276 /* --END ERROR HANDLING-- */
277 }
278
279
MPIDI_CH3I_Shm_acc_op(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win_ptr)280 static inline int MPIDI_CH3I_Shm_acc_op(const void *origin_addr, int origin_count, MPI_Datatype
281 origin_datatype, int target_rank, MPI_Aint target_disp,
282 int target_count, MPI_Datatype target_datatype, MPI_Op op,
283 MPIR_Win * win_ptr)
284 {
285 void *base = NULL;
286 int disp_unit, shm_op = 0;
287 int mpi_errno = MPI_SUCCESS;
288 int i;
289 MPI_Datatype basic_type;
290 MPI_Aint stream_elem_count, stream_unit_count;
291 MPI_Aint predefined_dtp_size, predefined_dtp_extent, predefined_dtp_count;
292 MPI_Aint total_len, rest_len;
293 MPI_Aint origin_dtp_size;
294 MPIR_Datatype*origin_dtp_ptr = NULL;
295 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
296
297 MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
298
299 if (win_ptr->shm_allocated == TRUE) {
300 int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
301 MPIR_Assert(local_target_rank >= 0);
302 shm_op = 1;
303 base = win_ptr->shm_base_addrs[local_target_rank];
304 disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
305 }
306 else {
307 base = win_ptr->base;
308 disp_unit = win_ptr->disp_unit;
309 }
310
311 if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
312 if (shm_op) {
313 MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
314 }
315 mpi_errno = do_accumulate_op((void *) origin_addr, origin_count, origin_datatype,
316 (void *) ((char *) base + disp_unit * target_disp),
317 target_count, target_datatype, 0, op,
318 MPIDI_RMA_ACC_SRCBUF_DEFAULT);
319 if (shm_op) {
320 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
321 }
322
323 MPIR_ERR_CHECK(mpi_errno);
324
325 goto fn_exit;
326 }
327
328 /* Get total length of origin data */
329 MPIR_Datatype_get_size_macro(origin_datatype, origin_dtp_size);
330 total_len = origin_dtp_size * origin_count;
331
332 MPIR_Datatype_get_ptr(origin_datatype, origin_dtp_ptr);
333 MPIR_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
334 basic_type = origin_dtp_ptr->basic_type;
335 MPIR_Datatype_get_size_macro(basic_type, predefined_dtp_size);
336 predefined_dtp_count = total_len / predefined_dtp_size;
337 MPIR_Datatype_get_extent_macro(basic_type, predefined_dtp_extent);
338 MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
339
340 stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
341 stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
342 MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
343
344 rest_len = total_len;
345 for (i = 0; i < stream_unit_count; i++) {
346 void *packed_buf = NULL;
347 MPI_Aint stream_offset, stream_size, stream_count;
348
349 stream_offset = i * stream_elem_count * predefined_dtp_size;
350 stream_size = MPL_MIN(stream_elem_count * predefined_dtp_size, rest_len);
351 stream_count = stream_size / predefined_dtp_size;
352 rest_len -= stream_size;
353
354 packed_buf = MPL_malloc(stream_size, MPL_MEM_BUFFER);
355
356 MPI_Aint actual_pack_bytes;
357 MPIR_Typerep_pack(origin_addr, origin_count, origin_datatype,
358 stream_offset, packed_buf, stream_size, &actual_pack_bytes);
359 MPIR_Assert(actual_pack_bytes == stream_size);
360
361 if (shm_op) {
362 MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
363 }
364
365 MPIR_Assert(stream_count == (int) stream_count);
366 mpi_errno = do_accumulate_op((void *) packed_buf, (int) stream_count, basic_type,
367 (void *) ((char *) base + disp_unit * target_disp),
368 target_count, target_datatype, stream_offset, op,
369 MPIDI_RMA_ACC_SRCBUF_PACKED);
370
371 if (shm_op) {
372 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
373 }
374
375 MPIR_ERR_CHECK(mpi_errno);
376
377 MPL_free(packed_buf);
378 }
379
380 fn_exit:
381 MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
382 return mpi_errno;
383 /* --BEGIN ERROR HANDLING-- */
384 fn_fail:
385 goto fn_exit;
386 /* --END ERROR HANDLING-- */
387 }
388
389
MPIDI_CH3I_Shm_get_acc_op(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,void * result_addr,int result_count,MPI_Datatype result_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win_ptr)390 static inline int MPIDI_CH3I_Shm_get_acc_op(const void *origin_addr, int origin_count, MPI_Datatype
391 origin_datatype, void *result_addr, int result_count,
392 MPI_Datatype result_datatype, int target_rank, MPI_Aint
393 target_disp, int target_count,
394 MPI_Datatype target_datatype, MPI_Op op,
395 MPIR_Win * win_ptr)
396 {
397 int disp_unit, shm_locked = 0;
398 void *base = NULL;
399 int i;
400 MPI_Datatype basic_type;
401 MPI_Aint stream_elem_count, stream_unit_count;
402 MPI_Aint predefined_dtp_size, predefined_dtp_extent, predefined_dtp_count;
403 MPI_Aint total_len, rest_len;
404 MPI_Aint origin_dtp_size;
405 MPIR_Datatype*origin_dtp_ptr = NULL;
406 int is_empty_origin = FALSE;
407 int mpi_errno = MPI_SUCCESS;
408 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
409
410 MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
411
412 /* Judge if origin buffer is empty */
413 if (op == MPI_NO_OP)
414 is_empty_origin = TRUE;
415
416 if (win_ptr->shm_allocated == TRUE) {
417 int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
418 MPIR_Assert(local_target_rank >= 0);
419 base = win_ptr->shm_base_addrs[local_target_rank];
420 disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
421 MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
422 shm_locked = 1;
423 }
424 else {
425 base = win_ptr->base;
426 disp_unit = win_ptr->disp_unit;
427 }
428
429 /* Perform the local get first, then the accumulate */
430 mpi_errno = shm_copy((char *) base + disp_unit * target_disp, target_count, target_datatype,
431 result_addr, result_count, result_datatype);
432 MPIR_ERR_CHECK(mpi_errno);
433
434 if (is_empty_origin == TRUE || MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
435
436 mpi_errno = do_accumulate_op((void *) origin_addr, origin_count, origin_datatype,
437 (void *) ((char *) base + disp_unit * target_disp),
438 target_count, target_datatype, 0, op,
439 MPIDI_RMA_ACC_SRCBUF_DEFAULT);
440 if (shm_locked) {
441 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
442 }
443
444 MPIR_ERR_CHECK(mpi_errno);
445
446 goto fn_exit;
447 }
448
449 /* Get total length of origin data */
450 MPIR_Datatype_get_size_macro(origin_datatype, origin_dtp_size);
451 total_len = origin_dtp_size * origin_count;
452
453 MPIR_Datatype_get_ptr(origin_datatype, origin_dtp_ptr);
454 MPIR_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
455 basic_type = origin_dtp_ptr->basic_type;
456 MPIR_Datatype_get_size_macro(basic_type, predefined_dtp_size);
457 predefined_dtp_count = total_len / predefined_dtp_size;
458 MPIR_Datatype_get_extent_macro(basic_type, predefined_dtp_extent);
459 MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
460
461 stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
462 stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
463 MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
464
465 rest_len = total_len;
466 for (i = 0; i < stream_unit_count; i++) {
467 void *packed_buf = NULL;
468 MPI_Aint stream_offset, stream_size, stream_count;
469
470 stream_offset = i * stream_elem_count * predefined_dtp_size;
471 stream_size = MPL_MIN(stream_elem_count * predefined_dtp_size, rest_len);
472 stream_count = stream_size / predefined_dtp_size;
473 rest_len -= stream_size;
474
475 packed_buf = MPL_malloc(stream_size, MPL_MEM_BUFFER);
476
477 MPI_Aint actual_pack_bytes;
478 MPIR_Typerep_pack(origin_addr, origin_count, origin_datatype,
479 stream_offset, packed_buf, stream_size, &actual_pack_bytes);
480 MPIR_Assert(actual_pack_bytes == stream_size);
481
482 MPIR_Assert(stream_count == (int) stream_count);
483 mpi_errno = do_accumulate_op((void *) packed_buf, (int) stream_count, basic_type,
484 (void *) ((char *) base + disp_unit * target_disp),
485 target_count, target_datatype, stream_offset, op,
486 MPIDI_RMA_ACC_SRCBUF_PACKED);
487
488 MPIR_ERR_CHECK(mpi_errno);
489
490 MPL_free(packed_buf);
491 }
492
493 if (shm_locked) {
494 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
495 shm_locked = 0;
496 }
497
498 fn_exit:
499 MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
500 return mpi_errno;
501 /* --BEGIN ERROR HANDLING-- */
502 fn_fail:
503 if (shm_locked) {
504 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
505 }
506 goto fn_exit;
507 /* --END ERROR HANDLING-- */
508 }
509
510
MPIDI_CH3I_Shm_get_op(void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win_ptr)511 static inline int MPIDI_CH3I_Shm_get_op(void *origin_addr, int origin_count,
512 MPI_Datatype origin_datatype, int target_rank,
513 MPI_Aint target_disp, int target_count,
514 MPI_Datatype target_datatype, MPIR_Win * win_ptr)
515 {
516 void *base = NULL;
517 int disp_unit;
518 int mpi_errno = MPI_SUCCESS;
519 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
520
521 MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
522
523 if (win_ptr->shm_allocated == TRUE) {
524 int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
525 MPIR_Assert(local_target_rank >= 0);
526 base = win_ptr->shm_base_addrs[local_target_rank];
527 disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
528 }
529 else {
530 base = win_ptr->base;
531 disp_unit = win_ptr->disp_unit;
532 }
533
534 mpi_errno = shm_copy((char *) base + disp_unit * target_disp, target_count, target_datatype,
535 origin_addr, origin_count, origin_datatype);
536 MPIR_ERR_CHECK(mpi_errno);
537
538 fn_exit:
539 MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
540 return mpi_errno;
541 /* --BEGIN ERROR HANDLING-- */
542 fn_fail:
543 goto fn_exit;
544 /* --END ERROR HANDLING-- */
545 }
546
547
MPIDI_CH3I_Shm_cas_op(const void * origin_addr,const void * compare_addr,void * result_addr,MPI_Datatype datatype,int target_rank,MPI_Aint target_disp,MPIR_Win * win_ptr)548 static inline int MPIDI_CH3I_Shm_cas_op(const void *origin_addr, const void *compare_addr,
549 void *result_addr, MPI_Datatype datatype, int target_rank,
550 MPI_Aint target_disp, MPIR_Win * win_ptr)
551 {
552 void *base = NULL, *dest_addr = NULL;
553 int disp_unit;
554 MPI_Aint len;
555 int shm_locked = 0;
556 int mpi_errno = MPI_SUCCESS;
557 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
558
559 MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
560
561 if (win_ptr->shm_allocated == TRUE) {
562 int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
563 MPIR_Assert(local_target_rank >= 0);
564 base = win_ptr->shm_base_addrs[local_target_rank];
565 disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
566
567 MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
568 shm_locked = 1;
569 }
570 else {
571 base = win_ptr->base;
572 disp_unit = win_ptr->disp_unit;
573 }
574
575 dest_addr = (char *) base + disp_unit * target_disp;
576
577 MPIR_Datatype_get_size_macro(datatype, len);
578 MPIR_Memcpy(result_addr, dest_addr, len);
579
580 if (MPIR_Compare_equal(compare_addr, dest_addr, datatype)) {
581 MPIR_Memcpy(dest_addr, origin_addr, len);
582 }
583
584 if (shm_locked) {
585 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
586 shm_locked = 0;
587 }
588
589 fn_exit:
590 MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
591 return mpi_errno;
592 /* --BEGIN ERROR HANDLING-- */
593 fn_fail:
594 if (shm_locked) {
595 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
596 }
597 goto fn_exit;
598 /* --END ERROR HANDLING-- */
599 }
600
601
MPIDI_CH3I_Shm_fop_op(const void * origin_addr,void * result_addr,MPI_Datatype datatype,int target_rank,MPI_Aint target_disp,MPI_Op op,MPIR_Win * win_ptr)602 static inline int MPIDI_CH3I_Shm_fop_op(const void *origin_addr, void *result_addr,
603 MPI_Datatype datatype, int target_rank,
604 MPI_Aint target_disp, MPI_Op op, MPIR_Win * win_ptr)
605 {
606 void *base = NULL, *dest_addr = NULL;
607 MPI_User_function *uop = NULL;
608 int disp_unit;
609 MPI_Aint len;
610 int one, shm_locked = 0;
611 int mpi_errno = MPI_SUCCESS;
612 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
613
614 MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
615
616 if ((*MPIR_OP_HDL_TO_DTYPE_FN(op)) (datatype) != MPI_SUCCESS)
617 goto fn_exit;
618
619 if (win_ptr->shm_allocated == TRUE) {
620 int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
621 MPIR_Assert(local_target_rank >= 0);
622 base = win_ptr->shm_base_addrs[local_target_rank];
623 disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
624
625 MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
626 shm_locked = 1;
627 }
628 else {
629 base = win_ptr->base;
630 disp_unit = win_ptr->disp_unit;
631 }
632
633 dest_addr = (char *) base + disp_unit * target_disp;
634
635 MPIR_Datatype_get_size_macro(datatype, len);
636 MPIR_Memcpy(result_addr, dest_addr, len);
637
638 uop = MPIR_OP_HDL_TO_FN(op);
639 one = 1;
640
641 (*uop) ((void *) origin_addr, dest_addr, &one, &datatype);
642
643 if (shm_locked) {
644 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
645 shm_locked = 0;
646 }
647
648 fn_exit:
649 MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
650 return mpi_errno;
651 /* --BEGIN ERROR HANDLING-- */
652 fn_fail:
653 if (shm_locked) {
654 MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
655 }
656 goto fn_exit;
657 /* --END ERROR HANDLING-- */
658 }
659
660
661 #endif /* MPID_RMA_SHM_H_INCLUDED */
662