1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #ifndef MPID_RMA_SHM_H_INCLUDED
7 #define MPID_RMA_SHM_H_INCLUDED
8 
9 #include "utlist.h"
10 #include "mpid_rma_types.h"
11 
12 static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datatype source_dtp,
13                                    void *target_buf, int target_count, MPI_Datatype target_dtp,
14                                    MPI_Aint stream_offset, MPI_Op acc_op,
15                                    MPIDI_RMA_Acc_srcbuf_kind_t srckind);
16 
17 #define ASSIGN_COPY(src, dest, count, type)     \
18     {                                           \
19         type *src_ = (type *) src;              \
20         type *dest_ = (type *) dest;            \
21         int i;                                  \
22         for (i = 0; i < count; i++)             \
23             dest_[i] = src_[i];                 \
24         goto fn_exit;                           \
25     }
26 
shm_copy(const void * src,int scount,MPI_Datatype stype,void * dest,int dcount,MPI_Datatype dtype)27 static inline int shm_copy(const void *src, int scount, MPI_Datatype stype,
28                            void *dest, int dcount, MPI_Datatype dtype)
29 {
30     int mpi_errno = MPI_SUCCESS;
31 
32     /* We use a threshold of operations under which a for loop of assignments is
33      * used.  Even though this happens at smaller block lengths, making it
34      * potentially inefficient, it can take advantage of some vectorization
35      * available on most modern processors. */
36 #define SHM_OPS_THRESHOLD  (16)
37 
38     if (MPIR_DATATYPE_IS_PREDEFINED(stype) && MPIR_DATATYPE_IS_PREDEFINED(dtype) &&
39         scount <= SHM_OPS_THRESHOLD) {
40 
41         /* FIXME: We currently only optimize a few predefined datatypes, which
42          * have a direct C datatype mapping. */
43 
44         /* The below list of datatypes is based on those specified in the MPI-3
45          * standard on page 665. */
46         switch (stype) {
47         case MPI_CHAR:
48             ASSIGN_COPY(src, dest, scount, char);
49 
50         case MPI_SHORT:
51             ASSIGN_COPY(src, dest, scount, signed short int);
52 
53         case MPI_INT:
54             ASSIGN_COPY(src, dest, scount, signed int);
55 
56         case MPI_LONG:
57             ASSIGN_COPY(src, dest, scount, signed long int);
58 
59         case MPI_LONG_LONG_INT:        /* covers MPI_LONG_LONG too */
60             ASSIGN_COPY(src, dest, scount, signed long long int);
61 
62         case MPI_SIGNED_CHAR:
63             ASSIGN_COPY(src, dest, scount, signed char);
64 
65         case MPI_UNSIGNED_CHAR:
66             ASSIGN_COPY(src, dest, scount, unsigned char);
67 
68         case MPI_UNSIGNED_SHORT:
69             ASSIGN_COPY(src, dest, scount, unsigned short int);
70 
71         case MPI_UNSIGNED:
72             ASSIGN_COPY(src, dest, scount, unsigned int);
73 
74         case MPI_UNSIGNED_LONG:
75             ASSIGN_COPY(src, dest, scount, unsigned long int);
76 
77         case MPI_UNSIGNED_LONG_LONG:
78             ASSIGN_COPY(src, dest, scount, unsigned long long int);
79 
80         case MPI_FLOAT:
81             ASSIGN_COPY(src, dest, scount, float);
82 
83         case MPI_DOUBLE:
84             ASSIGN_COPY(src, dest, scount, double);
85 
86         case MPI_LONG_DOUBLE:
87             ASSIGN_COPY(src, dest, scount, long double);
88 
89 #if 0
90             /* FIXME: we need a configure check to define HAVE_WCHAR_T before
91              * this can be enabled */
92         case MPI_WCHAR:
93             ASSIGN_COPY(src, dest, scount, wchar_t);
94 #endif
95 
96 #if 0
97             /* FIXME: we need a configure check to define HAVE_C_BOOL before
98              * this can be enabled */
99         case MPI_C_BOOL:
100             ASSIGN_COPY(src, dest, scount, _Bool);
101 #endif
102 
103 #if HAVE_INT8_T
104         case MPI_INT8_T:
105             ASSIGN_COPY(src, dest, scount, int8_t);
106 #endif /* HAVE_INT8_T */
107 
108 #if HAVE_INT16_T
109         case MPI_INT16_T:
110             ASSIGN_COPY(src, dest, scount, int16_t);
111 #endif /* HAVE_INT16_T */
112 
113 #if HAVE_INT32_T
114         case MPI_INT32_T:
115             ASSIGN_COPY(src, dest, scount, int32_t);
116 #endif /* HAVE_INT32_T */
117 
118 #if HAVE_INT64_T
119         case MPI_INT64_T:
120             ASSIGN_COPY(src, dest, scount, int64_t);
121 #endif /* HAVE_INT64_T */
122 
123 #if HAVE_UINT8_T
124         case MPI_UINT8_T:
125             ASSIGN_COPY(src, dest, scount, uint8_t);
126 #endif /* HAVE_UINT8_T */
127 
128 #if HAVE_UINT16_T
129         case MPI_UINT16_T:
130             ASSIGN_COPY(src, dest, scount, uint16_t);
131 #endif /* HAVE_UINT16_T */
132 
133 #if HAVE_UINT32_T
134         case MPI_UINT32_T:
135             ASSIGN_COPY(src, dest, scount, uint32_t);
136 #endif /* HAVE_UINT32_T */
137 
138 #if HAVE_UINT64_T
139         case MPI_UINT64_T:
140             ASSIGN_COPY(src, dest, scount, uint64_t);
141 #endif /* HAVE_UINT64_T */
142 
143         case MPI_AINT:
144             ASSIGN_COPY(src, dest, scount, MPI_Aint);
145 
146         case MPI_COUNT:
147             ASSIGN_COPY(src, dest, scount, MPI_Count);
148 
149         case MPI_OFFSET:
150             ASSIGN_COPY(src, dest, scount, MPI_Offset);
151 
152 #if 0
153             /* FIXME: we need a configure check to define HAVE_C_COMPLEX before
154              * this can be enabled */
155         case MPI_C_COMPLEX:    /* covers MPI_C_FLOAT_COMPLEX as well */
156             ASSIGN_COPY(src, dest, scount, float _Complex);
157 #endif
158 
159 #if 0
160             /* FIXME: we need a configure check to define HAVE_C_DOUPLE_COMPLEX
161              * before this can be enabled */
162         case MPI_C_DOUBLE_COMPLEX:
163             ASSIGN_COPY(src, dest, scount, double _Complex);
164 #endif
165 
166 #if 0
167             /* FIXME: we need a configure check to define
168              * HAVE_C_LONG_DOUPLE_COMPLEX before this can be enabled */
169         case MPI_C_LONG_DOUBLE_COMPLEX:
170             ASSIGN_COPY(src, dest, scount, long double _Complex);
171 #endif
172 
173 #if 0
174             /* Types that don't have a direct equivalent */
175         case MPI_BYTE:
176         case MPI_PACKED:
177 #endif
178 
179 #if 0   /* Fortran types */
180         case MPI_INTEGER:
181         case MPI_REAL:
182         case MPI_DOUBLE_PRECISION:
183         case MPI_COMPLEX:
184         case MPI_LOGICAL:
185         case MPI_CHARACTER:
186 #endif
187 
188 #if 0   /* C++ types */
189         case MPI_CXX_BOOL:
190         case MPI_CXX_FLOAT_COMPLEX:
191         case MPI_CXX_DOUBLE_COMPLEX:
192         case MPI_CXX_LONG_DOUBLE_COMPLEX:
193 #endif
194 
195 #if 0   /* Optional Fortran types */
196         case MPI_DOUBLE_COMPLEX:
197         case MPI_INTEGER1:
198         case MPI_INTEGER2:
199         case MPI_INTEGER4:
200         case MPI_INTEGER8:
201         case MPI_INTEGER16:
202         case MPI_REAL2:
203         case MPI_REAL4:
204         case MPI_REAL8:
205         case MPI_REAL16:
206         case MPI_COMPLEX4:
207         case MPI_COMPLEX8:
208         case MPI_COMPLEX16:
209         case MPI_COMPLEX32:
210 #endif
211 
212 #if 0   /* C datatypes for reduction functions */
213         case MPI_FLOAT_INT:
214         case MPI_DOUBLE_INT:
215         case MPI_LONG_INT:
216         case MPI_2INT:
217         case MPI_LONG_DOUBLE_INT:
218 #endif
219 
220 #if 0   /* Fortran datatypes for reduction functions */
221         case MPI_2REAL:
222         case MPI_2DOUBLE_PRECISION:
223         case MPI_2INTEGER:
224 #endif
225 
226         default:
227             /* Just to make sure the switch statement is not empty */
228             ;
229         }
230     }
231 
232     mpi_errno = MPIR_Localcopy(src, scount, stype, dest, dcount, dtype);
233     MPIR_ERR_CHECK(mpi_errno);
234 
235   fn_exit:
236     return mpi_errno;
237     /* --BEGIN ERROR HANDLING-- */
238   fn_fail:
239     goto fn_exit;
240     /* --END ERROR HANDLING-- */
241 }
242 
MPIDI_CH3I_Shm_put_op(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win_ptr)243 static inline int MPIDI_CH3I_Shm_put_op(const void *origin_addr, int origin_count, MPI_Datatype
244                                         origin_datatype, int target_rank, MPI_Aint target_disp,
245                                         int target_count, MPI_Datatype target_datatype,
246                                         MPIR_Win * win_ptr)
247 {
248     int mpi_errno = MPI_SUCCESS;
249     void *base = NULL;
250     int disp_unit;
251     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
252 
253     MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
254 
255     if (win_ptr->shm_allocated == TRUE) {
256         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
257         MPIR_Assert(local_target_rank >= 0);
258         base = win_ptr->shm_base_addrs[local_target_rank];
259         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
260     }
261     else {
262         base = win_ptr->base;
263         disp_unit = win_ptr->disp_unit;
264     }
265 
266     mpi_errno = shm_copy(origin_addr, origin_count, origin_datatype,
267                          (char *) base + disp_unit * target_disp, target_count, target_datatype);
268     MPIR_ERR_CHECK(mpi_errno);
269 
270   fn_exit:
271     MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
272     return mpi_errno;
273     /* --BEGIN ERROR HANDLING-- */
274   fn_fail:
275     goto fn_exit;
276     /* --END ERROR HANDLING-- */
277 }
278 
279 
MPIDI_CH3I_Shm_acc_op(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win_ptr)280 static inline int MPIDI_CH3I_Shm_acc_op(const void *origin_addr, int origin_count, MPI_Datatype
281                                         origin_datatype, int target_rank, MPI_Aint target_disp,
282                                         int target_count, MPI_Datatype target_datatype, MPI_Op op,
283                                         MPIR_Win * win_ptr)
284 {
285     void *base = NULL;
286     int disp_unit, shm_op = 0;
287     int mpi_errno = MPI_SUCCESS;
288     int i;
289     MPI_Datatype basic_type;
290     MPI_Aint stream_elem_count, stream_unit_count;
291     MPI_Aint predefined_dtp_size, predefined_dtp_extent, predefined_dtp_count;
292     MPI_Aint total_len, rest_len;
293     MPI_Aint origin_dtp_size;
294     MPIR_Datatype*origin_dtp_ptr = NULL;
295     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
296 
297     MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
298 
299     if (win_ptr->shm_allocated == TRUE) {
300         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
301         MPIR_Assert(local_target_rank >= 0);
302         shm_op = 1;
303         base = win_ptr->shm_base_addrs[local_target_rank];
304         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
305     }
306     else {
307         base = win_ptr->base;
308         disp_unit = win_ptr->disp_unit;
309     }
310 
311     if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
312         if (shm_op) {
313             MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
314         }
315         mpi_errno = do_accumulate_op((void *) origin_addr, origin_count, origin_datatype,
316                                      (void *) ((char *) base + disp_unit * target_disp),
317                                      target_count, target_datatype, 0, op,
318                                      MPIDI_RMA_ACC_SRCBUF_DEFAULT);
319         if (shm_op) {
320             MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
321         }
322 
323         MPIR_ERR_CHECK(mpi_errno);
324 
325         goto fn_exit;
326     }
327 
328     /* Get total length of origin data */
329     MPIR_Datatype_get_size_macro(origin_datatype, origin_dtp_size);
330     total_len = origin_dtp_size * origin_count;
331 
332     MPIR_Datatype_get_ptr(origin_datatype, origin_dtp_ptr);
333     MPIR_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
334     basic_type = origin_dtp_ptr->basic_type;
335     MPIR_Datatype_get_size_macro(basic_type, predefined_dtp_size);
336     predefined_dtp_count = total_len / predefined_dtp_size;
337     MPIR_Datatype_get_extent_macro(basic_type, predefined_dtp_extent);
338     MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
339 
340     stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
341     stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
342     MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
343 
344     rest_len = total_len;
345     for (i = 0; i < stream_unit_count; i++) {
346         void *packed_buf = NULL;
347         MPI_Aint stream_offset, stream_size, stream_count;
348 
349         stream_offset = i * stream_elem_count * predefined_dtp_size;
350         stream_size = MPL_MIN(stream_elem_count * predefined_dtp_size, rest_len);
351         stream_count = stream_size / predefined_dtp_size;
352         rest_len -= stream_size;
353 
354         packed_buf = MPL_malloc(stream_size, MPL_MEM_BUFFER);
355 
356         MPI_Aint actual_pack_bytes;
357         MPIR_Typerep_pack(origin_addr, origin_count, origin_datatype,
358                        stream_offset, packed_buf, stream_size, &actual_pack_bytes);
359         MPIR_Assert(actual_pack_bytes == stream_size);
360 
361         if (shm_op) {
362             MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
363         }
364 
365         MPIR_Assert(stream_count == (int) stream_count);
366         mpi_errno = do_accumulate_op((void *) packed_buf, (int) stream_count, basic_type,
367                                      (void *) ((char *) base + disp_unit * target_disp),
368                                      target_count, target_datatype, stream_offset, op,
369                                      MPIDI_RMA_ACC_SRCBUF_PACKED);
370 
371         if (shm_op) {
372             MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
373         }
374 
375         MPIR_ERR_CHECK(mpi_errno);
376 
377         MPL_free(packed_buf);
378     }
379 
380   fn_exit:
381     MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
382     return mpi_errno;
383     /* --BEGIN ERROR HANDLING-- */
384   fn_fail:
385     goto fn_exit;
386     /* --END ERROR HANDLING-- */
387 }
388 
389 
MPIDI_CH3I_Shm_get_acc_op(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,void * result_addr,int result_count,MPI_Datatype result_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win_ptr)390 static inline int MPIDI_CH3I_Shm_get_acc_op(const void *origin_addr, int origin_count, MPI_Datatype
391                                             origin_datatype, void *result_addr, int result_count,
392                                             MPI_Datatype result_datatype, int target_rank, MPI_Aint
393                                             target_disp, int target_count,
394                                             MPI_Datatype target_datatype, MPI_Op op,
395                                             MPIR_Win * win_ptr)
396 {
397     int disp_unit, shm_locked = 0;
398     void *base = NULL;
399     int i;
400     MPI_Datatype basic_type;
401     MPI_Aint stream_elem_count, stream_unit_count;
402     MPI_Aint predefined_dtp_size, predefined_dtp_extent, predefined_dtp_count;
403     MPI_Aint total_len, rest_len;
404     MPI_Aint origin_dtp_size;
405     MPIR_Datatype*origin_dtp_ptr = NULL;
406     int is_empty_origin = FALSE;
407     int mpi_errno = MPI_SUCCESS;
408     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
409 
410     MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
411 
412     /* Judge if origin buffer is empty */
413     if (op == MPI_NO_OP)
414         is_empty_origin = TRUE;
415 
416     if (win_ptr->shm_allocated == TRUE) {
417         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
418         MPIR_Assert(local_target_rank >= 0);
419         base = win_ptr->shm_base_addrs[local_target_rank];
420         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
421         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
422         shm_locked = 1;
423     }
424     else {
425         base = win_ptr->base;
426         disp_unit = win_ptr->disp_unit;
427     }
428 
429     /* Perform the local get first, then the accumulate */
430     mpi_errno = shm_copy((char *) base + disp_unit * target_disp, target_count, target_datatype,
431                          result_addr, result_count, result_datatype);
432     MPIR_ERR_CHECK(mpi_errno);
433 
434     if (is_empty_origin == TRUE || MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
435 
436         mpi_errno = do_accumulate_op((void *) origin_addr, origin_count, origin_datatype,
437                                      (void *) ((char *) base + disp_unit * target_disp),
438                                      target_count, target_datatype, 0, op,
439                                      MPIDI_RMA_ACC_SRCBUF_DEFAULT);
440         if (shm_locked) {
441             MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
442         }
443 
444         MPIR_ERR_CHECK(mpi_errno);
445 
446         goto fn_exit;
447     }
448 
449     /* Get total length of origin data */
450     MPIR_Datatype_get_size_macro(origin_datatype, origin_dtp_size);
451     total_len = origin_dtp_size * origin_count;
452 
453     MPIR_Datatype_get_ptr(origin_datatype, origin_dtp_ptr);
454     MPIR_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
455     basic_type = origin_dtp_ptr->basic_type;
456     MPIR_Datatype_get_size_macro(basic_type, predefined_dtp_size);
457     predefined_dtp_count = total_len / predefined_dtp_size;
458     MPIR_Datatype_get_extent_macro(basic_type, predefined_dtp_extent);
459     MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
460 
461     stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
462     stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
463     MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
464 
465     rest_len = total_len;
466     for (i = 0; i < stream_unit_count; i++) {
467         void *packed_buf = NULL;
468         MPI_Aint stream_offset, stream_size, stream_count;
469 
470         stream_offset = i * stream_elem_count * predefined_dtp_size;
471         stream_size = MPL_MIN(stream_elem_count * predefined_dtp_size, rest_len);
472         stream_count = stream_size / predefined_dtp_size;
473         rest_len -= stream_size;
474 
475         packed_buf = MPL_malloc(stream_size, MPL_MEM_BUFFER);
476 
477         MPI_Aint actual_pack_bytes;
478         MPIR_Typerep_pack(origin_addr, origin_count, origin_datatype,
479                        stream_offset, packed_buf, stream_size, &actual_pack_bytes);
480         MPIR_Assert(actual_pack_bytes == stream_size);
481 
482         MPIR_Assert(stream_count == (int) stream_count);
483         mpi_errno = do_accumulate_op((void *) packed_buf, (int) stream_count, basic_type,
484                                      (void *) ((char *) base + disp_unit * target_disp),
485                                      target_count, target_datatype, stream_offset, op,
486                                      MPIDI_RMA_ACC_SRCBUF_PACKED);
487 
488         MPIR_ERR_CHECK(mpi_errno);
489 
490         MPL_free(packed_buf);
491     }
492 
493     if (shm_locked) {
494         MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
495         shm_locked = 0;
496     }
497 
498   fn_exit:
499     MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
500     return mpi_errno;
501     /* --BEGIN ERROR HANDLING-- */
502   fn_fail:
503     if (shm_locked) {
504         MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
505     }
506     goto fn_exit;
507     /* --END ERROR HANDLING-- */
508 }
509 
510 
MPIDI_CH3I_Shm_get_op(void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win_ptr)511 static inline int MPIDI_CH3I_Shm_get_op(void *origin_addr, int origin_count,
512                                         MPI_Datatype origin_datatype, int target_rank,
513                                         MPI_Aint target_disp, int target_count,
514                                         MPI_Datatype target_datatype, MPIR_Win * win_ptr)
515 {
516     void *base = NULL;
517     int disp_unit;
518     int mpi_errno = MPI_SUCCESS;
519     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
520 
521     MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
522 
523     if (win_ptr->shm_allocated == TRUE) {
524         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
525         MPIR_Assert(local_target_rank >= 0);
526         base = win_ptr->shm_base_addrs[local_target_rank];
527         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
528     }
529     else {
530         base = win_ptr->base;
531         disp_unit = win_ptr->disp_unit;
532     }
533 
534     mpi_errno = shm_copy((char *) base + disp_unit * target_disp, target_count, target_datatype,
535                          origin_addr, origin_count, origin_datatype);
536     MPIR_ERR_CHECK(mpi_errno);
537 
538   fn_exit:
539     MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
540     return mpi_errno;
541     /* --BEGIN ERROR HANDLING-- */
542   fn_fail:
543     goto fn_exit;
544     /* --END ERROR HANDLING-- */
545 }
546 
547 
MPIDI_CH3I_Shm_cas_op(const void * origin_addr,const void * compare_addr,void * result_addr,MPI_Datatype datatype,int target_rank,MPI_Aint target_disp,MPIR_Win * win_ptr)548 static inline int MPIDI_CH3I_Shm_cas_op(const void *origin_addr, const void *compare_addr,
549                                         void *result_addr, MPI_Datatype datatype, int target_rank,
550                                         MPI_Aint target_disp, MPIR_Win * win_ptr)
551 {
552     void *base = NULL, *dest_addr = NULL;
553     int disp_unit;
554     MPI_Aint len;
555     int shm_locked = 0;
556     int mpi_errno = MPI_SUCCESS;
557     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
558 
559     MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
560 
561     if (win_ptr->shm_allocated == TRUE) {
562         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
563         MPIR_Assert(local_target_rank >= 0);
564         base = win_ptr->shm_base_addrs[local_target_rank];
565         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
566 
567         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
568         shm_locked = 1;
569     }
570     else {
571         base = win_ptr->base;
572         disp_unit = win_ptr->disp_unit;
573     }
574 
575     dest_addr = (char *) base + disp_unit * target_disp;
576 
577     MPIR_Datatype_get_size_macro(datatype, len);
578     MPIR_Memcpy(result_addr, dest_addr, len);
579 
580     if (MPIR_Compare_equal(compare_addr, dest_addr, datatype)) {
581         MPIR_Memcpy(dest_addr, origin_addr, len);
582     }
583 
584     if (shm_locked) {
585         MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
586         shm_locked = 0;
587     }
588 
589   fn_exit:
590     MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
591     return mpi_errno;
592     /* --BEGIN ERROR HANDLING-- */
593   fn_fail:
594     if (shm_locked) {
595         MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
596     }
597     goto fn_exit;
598     /* --END ERROR HANDLING-- */
599 }
600 
601 
MPIDI_CH3I_Shm_fop_op(const void * origin_addr,void * result_addr,MPI_Datatype datatype,int target_rank,MPI_Aint target_disp,MPI_Op op,MPIR_Win * win_ptr)602 static inline int MPIDI_CH3I_Shm_fop_op(const void *origin_addr, void *result_addr,
603                                         MPI_Datatype datatype, int target_rank,
604                                         MPI_Aint target_disp, MPI_Op op, MPIR_Win * win_ptr)
605 {
606     void *base = NULL, *dest_addr = NULL;
607     MPI_User_function *uop = NULL;
608     int disp_unit;
609     MPI_Aint len;
610     int one, shm_locked = 0;
611     int mpi_errno = MPI_SUCCESS;
612     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
613 
614     MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
615 
616     if ((*MPIR_OP_HDL_TO_DTYPE_FN(op)) (datatype) != MPI_SUCCESS)
617         goto fn_exit;
618 
619     if (win_ptr->shm_allocated == TRUE) {
620         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
621         MPIR_Assert(local_target_rank >= 0);
622         base = win_ptr->shm_base_addrs[local_target_rank];
623         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
624 
625         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
626         shm_locked = 1;
627     }
628     else {
629         base = win_ptr->base;
630         disp_unit = win_ptr->disp_unit;
631     }
632 
633     dest_addr = (char *) base + disp_unit * target_disp;
634 
635     MPIR_Datatype_get_size_macro(datatype, len);
636     MPIR_Memcpy(result_addr, dest_addr, len);
637 
638     uop = MPIR_OP_HDL_TO_FN(op);
639     one = 1;
640 
641     (*uop) ((void *) origin_addr, dest_addr, &one, &datatype);
642 
643     if (shm_locked) {
644         MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
645         shm_locked = 0;
646     }
647 
648   fn_exit:
649     MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
650     return mpi_errno;
651     /* --BEGIN ERROR HANDLING-- */
652   fn_fail:
653     if (shm_locked) {
654         MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
655     }
656     goto fn_exit;
657     /* --END ERROR HANDLING-- */
658 }
659 
660 
661 #endif /* MPID_RMA_SHM_H_INCLUDED */
662