1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #ifndef CH4R_RMA_H_INCLUDED
7 #define CH4R_RMA_H_INCLUDED
8 
9 #include "ch4_impl.h"
10 
11 extern MPIR_T_pvar_timer_t PVAR_TIMER_rma_amhdr_set ATTRIBUTE((unused));
12 
13 /* Create a completed RMA request. Used when a request-based operation (e.g. RPUT)
14  * completes immediately (=without actually issuing active messages) */
15 #define MPIDI_RMA_REQUEST_CREATE_COMPLETE(sreq_)                        \
16     do {                                                                \
17         /* create a completed request for user if issuing is completed immediately. */ \
18         (sreq_) = MPIR_Request_create_complete(MPIR_REQUEST_KIND__RMA); \
19         MPIR_ERR_CHKANDSTMT((sreq_) == NULL, mpi_errno, MPIX_ERR_NOREQ, \
20                             goto fn_fail, "**nomemreq");                \
21     } while (0)
22 
MPIDIG_do_put(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win,MPIR_Request ** sreq_ptr)23 MPL_STATIC_INLINE_PREFIX int MPIDIG_do_put(const void *origin_addr, int origin_count,
24                                            MPI_Datatype origin_datatype, int target_rank,
25                                            MPI_Aint target_disp, int target_count,
26                                            MPI_Datatype target_datatype, MPIR_Win * win,
27                                            MPIR_Request ** sreq_ptr)
28 {
29     int mpi_errno = MPI_SUCCESS, c;
30     MPIR_Request *sreq = NULL;
31     MPIDIG_put_msg_t am_hdr;
32     uint64_t offset;
33     size_t data_sz;
34     struct iovec am_iov[2];
35     size_t am_hdr_max_size;
36 #ifndef MPIDI_CH4_DIRECT_NETMOD
37     int is_local;
38 #endif
39 
40     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_DO_PUT);
41     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_DO_PUT);
42 
43 #ifndef MPIDI_CH4_DIRECT_NETMOD
44     is_local = MPIDI_rank_is_local(target_rank, win->comm_ptr);
45 #endif
46 
47     MPIDIG_RMA_OP_CHECK_SYNC(target_rank, win);
48 
49     MPIDI_Datatype_check_size(origin_datatype, origin_count, data_sz);
50     if (data_sz == 0)
51         goto immed_cmpl;
52 
53     if (target_rank == win->comm_ptr->rank) {
54         offset = win->disp_unit * target_disp;
55         mpi_errno = MPIR_Localcopy(origin_addr,
56                                    origin_count,
57                                    origin_datatype,
58                                    (char *) win->base + offset, target_count, target_datatype);
59         MPIR_ERR_CHECK(mpi_errno);
60         goto immed_cmpl;
61     }
62 
63     /* Only create request when issuing is not completed.
64      * We initialize two ref_count for progress engine and request-based OP,
65      * then put needs to free the second ref_count.*/
66     sreq = MPIDIG_request_create(MPIR_REQUEST_KIND__RMA, 2);
67     MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
68     MPIDIG_REQUEST(sreq, req->preq.win_ptr) = win;
69     MPIDIG_REQUEST(sreq, req->preq.target_datatype) = target_datatype;
70     MPIR_Datatype_add_ref_if_not_builtin(target_datatype);
71 
72     MPIR_cc_incr(sreq->cc_ptr, &c);
73     MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
74     am_hdr.src_rank = win->comm_ptr->rank;
75     am_hdr.target_disp = target_disp;
76     if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
77         am_hdr.target_count = target_count;
78         am_hdr.target_datatype = target_datatype;
79     } else {
80         am_hdr.target_count = data_sz;
81         am_hdr.target_datatype = MPI_BYTE;
82     }
83     am_hdr.preq_ptr = sreq;
84     am_hdr.win_id = MPIDIG_WIN(win, win_id);
85 
86     /* Increase local and remote completion counters and set the local completion
87      * counter in request, thus it can be decreased at request completion. */
88     MPIDIG_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
89     MPIDIG_REQUEST(sreq, rank) = target_rank;
90 
91     int is_contig;
92     MPIR_Datatype_is_contig(target_datatype, &is_contig);
93     if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype) || is_contig) {
94         am_hdr.flattened_sz = 0;
95         MPIR_Datatype_get_true_lb(target_datatype, &am_hdr.target_true_lb);
96         MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
97 
98 #ifndef MPIDI_CH4_DIRECT_NETMOD
99         if (is_local)
100             mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_PUT_REQ,
101                                            &am_hdr, sizeof(am_hdr), origin_addr,
102                                            origin_count, origin_datatype, sreq);
103         else
104 #endif
105         {
106             mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_PUT_REQ,
107                                           &am_hdr, sizeof(am_hdr), origin_addr,
108                                           origin_count, origin_datatype, sreq);
109         }
110 
111         MPIR_ERR_CHECK(mpi_errno);
112         goto fn_exit;
113     }
114 
115     int flattened_sz;
116     void *flattened_dt;
117     MPIR_Datatype_get_flattened(target_datatype, &flattened_dt, &flattened_sz);
118     am_hdr.flattened_sz = flattened_sz;
119 
120     am_iov[0].iov_base = &am_hdr;
121     am_iov[0].iov_len = sizeof(am_hdr);
122     am_iov[1].iov_base = flattened_dt;
123     am_iov[1].iov_len = flattened_sz;
124     MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
125 
126 #ifndef MPIDI_CH4_DIRECT_NETMOD
127     am_hdr_max_size = is_local ? MPIDI_SHM_am_hdr_max_sz() : MPIDI_NM_am_hdr_max_sz();
128 #else
129     am_hdr_max_size = MPIDI_NM_am_hdr_max_sz();
130 #endif
131 
132     if ((am_iov[0].iov_len + am_iov[1].iov_len) <= am_hdr_max_size) {
133 #ifndef MPIDI_CH4_DIRECT_NETMOD
134         if (is_local)
135             mpi_errno = MPIDI_SHM_am_isendv(target_rank, win->comm_ptr, MPIDIG_PUT_REQ,
136                                             am_iov, 2, origin_addr, origin_count,
137                                             origin_datatype, sreq);
138         else
139 #endif
140         {
141             mpi_errno = MPIDI_NM_am_isendv(target_rank, win->comm_ptr, MPIDIG_PUT_REQ,
142                                            am_iov, 2, origin_addr, origin_count,
143                                            origin_datatype, sreq);
144         }
145     } else {
146         MPIDIG_REQUEST(sreq, req->preq.origin_addr) = (void *) origin_addr;
147         MPIDIG_REQUEST(sreq, req->preq.origin_count) = origin_count;
148         MPIDIG_REQUEST(sreq, req->preq.origin_datatype) = origin_datatype;
149         MPIR_Datatype_add_ref_if_not_builtin(origin_datatype);
150 
151 #ifndef MPIDI_CH4_DIRECT_NETMOD
152         if (is_local)
153             mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_PUT_DT_REQ,
154                                            &am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
155                                            am_iov[1].iov_len, MPI_BYTE, sreq);
156         else
157 #endif
158         {
159             mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_PUT_DT_REQ,
160                                           &am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
161                                           am_iov[1].iov_len, MPI_BYTE, sreq);
162         }
163     }
164     MPIR_ERR_CHECK(mpi_errno);
165 
166   fn_exit:
167     if (sreq_ptr)
168         *sreq_ptr = sreq;
169     else if (sreq != NULL)
170         MPIR_Request_free_unsafe(sreq);
171 
172     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_DO_PUT);
173     return mpi_errno;
174 
175   immed_cmpl:
176     if (sreq_ptr)
177         MPIDI_RMA_REQUEST_CREATE_COMPLETE(sreq);
178     goto fn_exit;
179 
180   fn_fail:
181     goto fn_exit;
182 }
183 
MPIDIG_do_get(void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win,MPIR_Request ** sreq_ptr)184 MPL_STATIC_INLINE_PREFIX int MPIDIG_do_get(void *origin_addr, int origin_count,
185                                            MPI_Datatype origin_datatype, int target_rank,
186                                            MPI_Aint target_disp, int target_count,
187                                            MPI_Datatype target_datatype, MPIR_Win * win,
188                                            MPIR_Request ** sreq_ptr)
189 {
190     int mpi_errno = MPI_SUCCESS, c;
191     size_t offset;
192     MPIR_Request *sreq = NULL;
193     MPIDIG_get_msg_t am_hdr;
194     size_t data_sz;
195 #ifndef MPIDI_CH4_DIRECT_NETMOD
196     int is_local;
197 #endif
198 
199     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_DO_GET);
200     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_DO_GET);
201 
202 #ifndef MPIDI_CH4_DIRECT_NETMOD
203     is_local = MPIDI_rank_is_local(target_rank, win->comm_ptr);
204 #endif
205 
206     MPIDIG_RMA_OP_CHECK_SYNC(target_rank, win);
207 
208     MPIDI_Datatype_check_size(origin_datatype, origin_count, data_sz);
209     if (data_sz == 0)
210         goto immed_cmpl;
211 
212     if (target_rank == win->comm_ptr->rank) {
213         offset = win->disp_unit * target_disp;
214         mpi_errno = MPIR_Localcopy((char *) win->base + offset,
215                                    target_count,
216                                    target_datatype, origin_addr, origin_count, origin_datatype);
217         MPIR_ERR_CHECK(mpi_errno);
218         goto immed_cmpl;
219     }
220 
221     /* Only create request when issuing is not completed.
222      * We initialize two ref_count for progress engine and request-based OP,
223      * then get needs to free the second ref_count.*/
224     sreq = MPIDIG_request_create(MPIR_REQUEST_KIND__RMA, 2);
225     MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
226 
227     MPIDIG_REQUEST(sreq, req->greq.win_ptr) = win;
228     MPIDIG_REQUEST(sreq, req->greq.addr) = origin_addr;
229     MPIDIG_REQUEST(sreq, req->greq.count) = origin_count;
230     MPIDIG_REQUEST(sreq, req->greq.datatype) = origin_datatype;
231     MPIDIG_REQUEST(sreq, req->greq.target_datatype) = target_datatype;
232     MPIDIG_REQUEST(sreq, rank) = target_rank;
233     MPIR_Datatype_add_ref_if_not_builtin(origin_datatype);
234     MPIR_Datatype_add_ref_if_not_builtin(target_datatype);
235 
236     MPIR_cc_incr(sreq->cc_ptr, &c);
237     MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
238     am_hdr.target_disp = target_disp;
239     if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
240         am_hdr.target_count = target_count;
241         am_hdr.target_datatype = target_datatype;
242     } else {
243         am_hdr.target_count = data_sz;
244         am_hdr.target_datatype = MPI_BYTE;
245     }
246     am_hdr.greq_ptr = sreq;
247     am_hdr.win_id = MPIDIG_WIN(win, win_id);
248     am_hdr.src_rank = win->comm_ptr->rank;
249 
250     /* Increase local and remote completion counters and set the local completion
251      * counter in request, thus it can be decreased at request completion. */
252     MPIDIG_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
253 
254     int is_contig;
255     MPIR_Datatype_is_contig(target_datatype, &is_contig);
256     if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype) || is_contig) {
257         am_hdr.flattened_sz = 0;
258         MPIR_Datatype_get_true_lb(target_datatype, &am_hdr.target_true_lb);
259         MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
260 
261 #ifndef MPIDI_CH4_DIRECT_NETMOD
262         if (is_local)
263             mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr,
264                                            MPIDIG_GET_REQ, &am_hdr, sizeof(am_hdr),
265                                            NULL, 0, MPI_DATATYPE_NULL, sreq);
266         else
267 #endif
268         {
269             mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr,
270                                           MPIDIG_GET_REQ, &am_hdr, sizeof(am_hdr),
271                                           NULL, 0, MPI_DATATYPE_NULL, sreq);
272         }
273 
274         MPIR_ERR_CHECK(mpi_errno);
275         goto fn_exit;
276     }
277 
278     int flattened_sz;
279     void *flattened_dt;
280     MPIR_Datatype_get_flattened(target_datatype, &flattened_dt, &flattened_sz);
281     am_hdr.flattened_sz = flattened_sz;
282     MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
283 
284 #ifndef MPIDI_CH4_DIRECT_NETMOD
285     if (is_local)
286         mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_GET_REQ,
287                                        &am_hdr, sizeof(am_hdr), flattened_dt,
288                                        flattened_sz, MPI_BYTE, sreq);
289     else
290 #endif
291     {
292         mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_GET_REQ,
293                                       &am_hdr, sizeof(am_hdr), flattened_dt,
294                                       flattened_sz, MPI_BYTE, sreq);
295     }
296 
297     MPIR_ERR_CHECK(mpi_errno);
298 
299   fn_exit:
300     if (sreq_ptr)
301         *sreq_ptr = sreq;
302     else if (sreq != NULL)
303         MPIR_Request_free_unsafe(sreq);
304 
305     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_DO_GET);
306     return mpi_errno;
307 
308   immed_cmpl:
309     if (sreq_ptr)
310         MPIDI_RMA_REQUEST_CREATE_COMPLETE(sreq);
311     goto fn_exit;
312 
313   fn_fail:
314     goto fn_exit;
315 }
316 
317 
MPIDIG_do_accumulate(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win,MPIR_Request ** sreq_ptr)318 MPL_STATIC_INLINE_PREFIX int MPIDIG_do_accumulate(const void *origin_addr, int origin_count,
319                                                   MPI_Datatype origin_datatype, int target_rank,
320                                                   MPI_Aint target_disp, int target_count,
321                                                   MPI_Datatype target_datatype,
322                                                   MPI_Op op, MPIR_Win * win,
323                                                   MPIR_Request ** sreq_ptr)
324 {
325     int mpi_errno = MPI_SUCCESS, c;
326     MPIR_Request *sreq = NULL;
327     size_t basic_type_size;
328     MPIDIG_acc_req_msg_t am_hdr;
329     uint64_t data_sz, target_data_sz;
330     struct iovec am_iov[2];
331     MPIR_Datatype *dt_ptr;
332     int am_hdr_max_sz;
333 #ifndef MPIDI_CH4_DIRECT_NETMOD
334     int is_local;
335 #endif
336 
337     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_DO_ACCUMULATE);
338     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_DO_ACCUMULATE);
339 
340 #ifndef MPIDI_CH4_DIRECT_NETMOD
341     is_local = MPIDI_rank_is_local(target_rank, win->comm_ptr);
342 #endif
343 
344     MPIDIG_RMA_OP_CHECK_SYNC(target_rank, win);
345 
346     MPIDI_Datatype_get_size_dt_ptr(origin_count, origin_datatype, data_sz, dt_ptr);
347     MPIDI_Datatype_check_size(target_datatype, target_count, target_data_sz);
348     if (data_sz == 0 || target_data_sz == 0) {
349         goto immed_cmpl;
350     }
351 
352     /* Only create request when issuing is not completed.
353      * We initialize two ref_count for progress engine and request-based OP,
354      * then acc needs to free the second ref_count.*/
355     sreq = MPIDIG_request_create(MPIR_REQUEST_KIND__RMA, 2);
356     MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
357     MPIDIG_REQUEST(sreq, req->areq.win_ptr) = win;
358     MPIDIG_REQUEST(sreq, req->areq.target_datatype) = target_datatype;
359     MPIR_Datatype_add_ref_if_not_builtin(target_datatype);
360 
361     MPIR_cc_incr(sreq->cc_ptr, &c);
362 
363     MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
364     am_hdr.req_ptr = sreq;
365     am_hdr.origin_count = origin_count;
366 
367     if (HANDLE_IS_BUILTIN(origin_datatype)) {
368         am_hdr.origin_datatype = origin_datatype;
369     } else {
370         am_hdr.origin_datatype = (dt_ptr) ? dt_ptr->basic_type : MPI_DATATYPE_NULL;
371         MPIR_Datatype_get_size_macro(am_hdr.origin_datatype, basic_type_size);
372         am_hdr.origin_count = (basic_type_size > 0) ? data_sz / basic_type_size : 0;
373     }
374 
375     am_hdr.target_count = target_count;
376     am_hdr.target_datatype = target_datatype;
377     am_hdr.target_disp = target_disp;
378     am_hdr.op = op;
379     am_hdr.win_id = MPIDIG_WIN(win, win_id);
380     am_hdr.src_rank = win->comm_ptr->rank;
381 
382     /* Increase local and remote completion counters and set the local completion
383      * counter in request, thus it can be decreased at request completion. */
384     MPIDIG_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
385     /* Increase remote completion counter for acc. */
386     MPIDIG_win_remote_acc_cmpl_cnt_incr(win, target_rank);
387 
388     MPIDIG_REQUEST(sreq, rank) = target_rank;
389     MPIDIG_REQUEST(sreq, req->areq.data_sz) = data_sz;
390     if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
391         am_hdr.flattened_sz = 0;
392         MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
393 
394 #ifndef MPIDI_CH4_DIRECT_NETMOD
395         if (is_local)
396             mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_ACC_REQ,
397                                            &am_hdr, sizeof(am_hdr), origin_addr,
398                                            (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
399                                            sreq);
400         else
401 #endif
402         {
403             mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_ACC_REQ,
404                                           &am_hdr, sizeof(am_hdr), origin_addr,
405                                           (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
406                                           sreq);
407         }
408 
409         MPIR_ERR_CHECK(mpi_errno);
410         goto fn_exit;
411     }
412 
413     int flattened_sz;
414     void *flattened_dt;
415     MPIR_Datatype_get_flattened(target_datatype, &flattened_dt, &flattened_sz);
416     am_hdr.flattened_sz = flattened_sz;
417 
418     am_iov[0].iov_base = &am_hdr;
419     am_iov[0].iov_len = sizeof(am_hdr);
420     am_iov[1].iov_base = flattened_dt;
421     am_iov[1].iov_len = flattened_sz;
422     MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
423 
424 #ifndef MPIDI_CH4_DIRECT_NETMOD
425     am_hdr_max_sz = is_local ? MPIDI_SHM_am_hdr_max_sz() : MPIDI_NM_am_hdr_max_sz();
426 #else
427     am_hdr_max_sz = MPIDI_NM_am_hdr_max_sz();
428 #endif
429 
430     if ((am_iov[0].iov_len + am_iov[1].iov_len) <= am_hdr_max_sz) {
431 #ifndef MPIDI_CH4_DIRECT_NETMOD
432         if (is_local)
433             mpi_errno = MPIDI_SHM_am_isendv(target_rank, win->comm_ptr, MPIDIG_ACC_REQ,
434                                             am_iov, 2, origin_addr,
435                                             (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
436                                             sreq);
437         else
438 #endif
439         {
440             mpi_errno = MPIDI_NM_am_isendv(target_rank, win->comm_ptr, MPIDIG_ACC_REQ,
441                                            am_iov, 2, origin_addr,
442                                            (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
443                                            sreq);
444         }
445     } else {
446         MPIDIG_REQUEST(sreq, req->areq.origin_addr) = (void *) origin_addr;
447         MPIDIG_REQUEST(sreq, req->areq.origin_count) = origin_count;
448         MPIDIG_REQUEST(sreq, req->areq.origin_datatype) = origin_datatype;
449         MPIR_Datatype_add_ref_if_not_builtin(origin_datatype);
450 
451 #ifndef MPIDI_CH4_DIRECT_NETMOD
452         if (is_local)
453             mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_ACC_DT_REQ,
454                                            &am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
455                                            am_iov[1].iov_len, MPI_BYTE, sreq);
456         else
457 #endif
458         {
459             mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_ACC_DT_REQ,
460                                           &am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
461                                           am_iov[1].iov_len, MPI_BYTE, sreq);
462         }
463     }
464     MPIR_ERR_CHECK(mpi_errno);
465 
466   fn_exit:
467     if (sreq_ptr)
468         *sreq_ptr = sreq;
469     else if (sreq != NULL)
470         MPIR_Request_free_unsafe(sreq);
471 
472     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_DO_ACCUMULATE);
473     return mpi_errno;
474 
475   immed_cmpl:
476     if (sreq_ptr)
477         MPIDI_RMA_REQUEST_CREATE_COMPLETE(sreq);
478     goto fn_exit;
479 
480   fn_fail:
481     goto fn_exit;
482 }
483 
MPIDIG_do_get_accumulate(const void * origin_addr,int origin_count_,MPI_Datatype origin_datatype_,void * result_addr,int result_count,MPI_Datatype result_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win,MPIR_Request ** sreq_ptr)484 MPL_STATIC_INLINE_PREFIX int MPIDIG_do_get_accumulate(const void *origin_addr,
485                                                       int origin_count_,
486                                                       MPI_Datatype origin_datatype_,
487                                                       void *result_addr,
488                                                       int result_count,
489                                                       MPI_Datatype result_datatype,
490                                                       int target_rank,
491                                                       MPI_Aint target_disp,
492                                                       int target_count,
493                                                       MPI_Datatype target_datatype,
494                                                       MPI_Op op, MPIR_Win * win,
495                                                       MPIR_Request ** sreq_ptr)
496 {
497     int mpi_errno = MPI_SUCCESS, c;
498     MPIR_Request *sreq = NULL;
499     size_t basic_type_size;
500     MPIDIG_get_acc_req_msg_t am_hdr;
501     uint64_t data_sz, result_data_sz, target_data_sz;
502     struct iovec am_iov[2];
503     MPIR_Datatype *dt_ptr;
504     int am_hdr_max_sz;
505     int origin_count = origin_count_;
506     MPI_Datatype origin_datatype = origin_datatype_;
507 #ifndef MPIDI_CH4_DIRECT_NETMOD
508     int is_local;
509 #endif
510 
511     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_DO_GET_ACCUMULATE);
512     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_DO_GET_ACCUMULATE);
513 
514 #ifndef MPIDI_CH4_DIRECT_NETMOD
515     is_local = MPIDI_rank_is_local(target_rank, win->comm_ptr);
516 #endif
517 
518     MPIDIG_RMA_OP_CHECK_SYNC(target_rank, win);
519 
520     if (op == MPI_NO_OP) {
521         origin_count = 0;
522         origin_datatype = MPI_DATATYPE_NULL;
523         data_sz = 0;
524         dt_ptr = NULL;
525     } else {
526         MPIDI_Datatype_get_size_dt_ptr(origin_count, origin_datatype, data_sz, dt_ptr);
527     }
528     MPIDI_Datatype_check_size(target_datatype, target_count, target_data_sz);
529     MPIDI_Datatype_check_size(result_datatype, result_count, result_data_sz);
530 
531     if (target_data_sz == 0 || (data_sz == 0 && result_data_sz == 0)) {
532         goto immed_cmpl;
533     }
534 
535     /* Only create request when issuing is not completed.
536      * We initialize two ref_count for progress engine and request-based OP,
537      * then get_acc needs to free the second ref_count.*/
538     sreq = MPIDIG_request_create(MPIR_REQUEST_KIND__RMA, 2);
539     MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
540 
541     MPIDIG_REQUEST(sreq, req->areq.win_ptr) = win;
542     MPIDIG_REQUEST(sreq, req->areq.result_addr) = result_addr;
543     MPIDIG_REQUEST(sreq, req->areq.result_count) = result_count;
544     MPIDIG_REQUEST(sreq, req->areq.result_datatype) = result_datatype;
545     MPIR_Datatype_add_ref_if_not_builtin(result_datatype);
546     MPIDIG_REQUEST(sreq, req->areq.target_datatype) = target_datatype;
547     MPIR_Datatype_add_ref_if_not_builtin(target_datatype);
548     MPIR_cc_incr(sreq->cc_ptr, &c);
549 
550     /* TODO: have common routine for accumulate/get_accumulate */
551     MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
552     am_hdr.req_ptr = sreq;
553     am_hdr.origin_count = origin_count;
554 
555     if (HANDLE_IS_BUILTIN(origin_datatype)) {
556         am_hdr.origin_datatype = origin_datatype;
557     } else {
558         am_hdr.origin_datatype = (dt_ptr) ? dt_ptr->basic_type : MPI_DATATYPE_NULL;
559         MPIR_Datatype_get_size_macro(am_hdr.origin_datatype, basic_type_size);
560         am_hdr.origin_count = (basic_type_size > 0) ? data_sz / basic_type_size : 0;
561     }
562 
563     am_hdr.target_count = target_count;
564     am_hdr.target_datatype = target_datatype;
565     am_hdr.target_disp = target_disp;
566     am_hdr.op = op;
567     am_hdr.win_id = MPIDIG_WIN(win, win_id);
568     am_hdr.src_rank = win->comm_ptr->rank;
569 
570     am_hdr.result_data_sz = result_data_sz;
571 
572     /* Increase local and remote completion counters and set the local completion
573      * counter in request, thus it can be decreased at request completion. */
574     MPIDIG_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
575     /* Increase remote completion counter for acc. */
576     MPIDIG_win_remote_acc_cmpl_cnt_incr(win, target_rank);
577 
578     MPIDIG_REQUEST(sreq, rank) = target_rank;
579     MPIDIG_REQUEST(sreq, req->areq.data_sz) = data_sz;
580     if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
581         am_hdr.flattened_sz = 0;
582         MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
583 
584 #ifndef MPIDI_CH4_DIRECT_NETMOD
585         if (is_local)
586             mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_GET_ACC_REQ,
587                                            &am_hdr, sizeof(am_hdr), origin_addr,
588                                            (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
589                                            sreq);
590         else
591 #endif
592         {
593             mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_GET_ACC_REQ,
594                                           &am_hdr, sizeof(am_hdr), origin_addr,
595                                           (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
596                                           sreq);
597         }
598 
599         MPIR_ERR_CHECK(mpi_errno);
600         goto fn_exit;
601     }
602 
603     int flattened_sz;
604     void *flattened_dt;
605     MPIR_Datatype_get_flattened(target_datatype, &flattened_dt, &flattened_sz);
606     am_hdr.flattened_sz = flattened_sz;
607 
608     am_iov[0].iov_base = &am_hdr;
609     am_iov[0].iov_len = sizeof(am_hdr);
610     am_iov[1].iov_base = flattened_dt;
611     am_iov[1].iov_len = flattened_sz;
612     MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
613 
614 #ifndef MPIDI_CH4_DIRECT_NETMOD
615     am_hdr_max_sz = is_local ? MPIDI_SHM_am_hdr_max_sz() : MPIDI_NM_am_hdr_max_sz();
616 #else
617     am_hdr_max_sz = MPIDI_NM_am_hdr_max_sz();
618 #endif
619 
620     if ((am_iov[0].iov_len + am_iov[1].iov_len) <= am_hdr_max_sz) {
621 #ifndef MPIDI_CH4_DIRECT_NETMOD
622         if (is_local)
623             mpi_errno = MPIDI_SHM_am_isendv(target_rank, win->comm_ptr, MPIDIG_GET_ACC_REQ,
624                                             am_iov, 2, origin_addr,
625                                             (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
626                                             sreq);
627         else
628 #endif
629         {
630             mpi_errno = MPIDI_NM_am_isendv(target_rank, win->comm_ptr, MPIDIG_GET_ACC_REQ,
631                                            am_iov, 2, origin_addr,
632                                            (op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
633                                            sreq);
634         }
635     } else {
636         MPIDIG_REQUEST(sreq, req->areq.origin_addr) = (void *) origin_addr;
637         MPIDIG_REQUEST(sreq, req->areq.origin_count) = origin_count;
638         MPIDIG_REQUEST(sreq, req->areq.origin_datatype) = origin_datatype;
639         MPIR_Datatype_add_ref_if_not_builtin(origin_datatype);
640 
641 #ifndef MPIDI_CH4_DIRECT_NETMOD
642         if (is_local)
643             mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_GET_ACC_DT_REQ,
644                                            &am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
645                                            am_iov[1].iov_len, MPI_BYTE, sreq);
646         else
647 #endif
648         {
649             mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_GET_ACC_DT_REQ,
650                                           &am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
651                                           am_iov[1].iov_len, MPI_BYTE, sreq);
652         }
653     }
654     MPIR_ERR_CHECK(mpi_errno);
655 
656   fn_exit:
657     if (sreq_ptr)
658         *sreq_ptr = sreq;
659     else if (sreq != NULL)
660         MPIR_Request_free_unsafe(sreq);
661 
662     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_DO_GET_ACCUMULATE);
663     return mpi_errno;
664 
665   immed_cmpl:
666     if (sreq_ptr)
667         MPIDI_RMA_REQUEST_CREATE_COMPLETE(sreq);
668     goto fn_exit;
669 
670   fn_fail:
671     goto fn_exit;
672 }
673 
MPIDIG_mpi_put(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win)674 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_put(const void *origin_addr, int origin_count,
675                                             MPI_Datatype origin_datatype, int target_rank,
676                                             MPI_Aint target_disp, int target_count,
677                                             MPI_Datatype target_datatype, MPIR_Win * win)
678 {
679     int mpi_errno = MPI_SUCCESS;
680     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_PUT);
681     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_PUT);
682 
683     mpi_errno = MPIDIG_do_put(origin_addr, origin_count, origin_datatype,
684                               target_rank, target_disp, target_count, target_datatype, win, NULL);
685     MPIR_ERR_CHECK(mpi_errno);
686 
687   fn_exit:
688     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_PUT);
689     return mpi_errno;
690   fn_fail:
691     goto fn_exit;
692 }
693 
MPIDIG_mpi_rput(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win,MPIR_Request ** request)694 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_rput(const void *origin_addr, int origin_count,
695                                              MPI_Datatype origin_datatype, int target_rank,
696                                              MPI_Aint target_disp, int target_count,
697                                              MPI_Datatype target_datatype, MPIR_Win * win,
698                                              MPIR_Request ** request)
699 {
700     int mpi_errno = MPI_SUCCESS;
701     MPIR_Request *sreq = NULL;
702     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_RPUT);
703     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_RPUT);
704 
705     mpi_errno = MPIDIG_do_put(origin_addr, origin_count, origin_datatype, target_rank, target_disp,
706                               target_count, target_datatype, win, &sreq);
707     MPIR_ERR_CHECK(mpi_errno);
708 
709   fn_exit:
710     *request = sreq;
711     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_RPUT);
712     return mpi_errno;
713   fn_fail:
714     goto fn_exit;
715 }
716 
717 
MPIDIG_mpi_get(void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win)718 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_get(void *origin_addr, int origin_count,
719                                             MPI_Datatype origin_datatype, int target_rank,
720                                             MPI_Aint target_disp, int target_count,
721                                             MPI_Datatype target_datatype, MPIR_Win * win)
722 {
723     int mpi_errno = MPI_SUCCESS;
724     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_GET);
725     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_GET);
726     mpi_errno = MPIDIG_do_get(origin_addr, origin_count, origin_datatype,
727                               target_rank, target_disp, target_count, target_datatype, win, NULL);
728     MPIR_ERR_CHECK(mpi_errno);
729 
730   fn_exit:
731     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_GET);
732     return mpi_errno;
733   fn_fail:
734     goto fn_exit;
735 }
736 
MPIDIG_mpi_rget(void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPIR_Win * win,MPIR_Request ** request)737 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_rget(void *origin_addr, int origin_count,
738                                              MPI_Datatype origin_datatype, int target_rank,
739                                              MPI_Aint target_disp, int target_count,
740                                              MPI_Datatype target_datatype, MPIR_Win * win,
741                                              MPIR_Request ** request)
742 {
743     int mpi_errno = MPI_SUCCESS;
744     MPIR_Request *sreq = NULL;
745     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_RGET);
746     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_RGET);
747 
748     mpi_errno = MPIDIG_do_get(origin_addr, origin_count, origin_datatype, target_rank, target_disp,
749                               target_count, target_datatype, win, &sreq);
750     MPIR_ERR_CHECK(mpi_errno);
751 
752   fn_exit:
753     *request = sreq;
754     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_RGET);
755     return mpi_errno;
756   fn_fail:
757     goto fn_exit;
758 }
759 
760 
MPIDIG_mpi_raccumulate(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win,MPIR_Request ** request)761 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_raccumulate(const void *origin_addr, int origin_count,
762                                                     MPI_Datatype origin_datatype, int target_rank,
763                                                     MPI_Aint target_disp, int target_count,
764                                                     MPI_Datatype target_datatype, MPI_Op op,
765                                                     MPIR_Win * win, MPIR_Request ** request)
766 {
767     int mpi_errno = MPI_SUCCESS;
768     MPIR_Request *sreq = NULL;
769     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_RACCUMULATE);
770     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_RACCUMULATE);
771 
772     mpi_errno = MPIDIG_do_accumulate(origin_addr, origin_count, origin_datatype, target_rank,
773                                      target_disp, target_count, target_datatype, op, win, &sreq);
774     MPIR_ERR_CHECK(mpi_errno);
775 
776   fn_exit:
777     *request = sreq;
778     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_RACCUMULATE);
779     return mpi_errno;
780   fn_fail:
781     goto fn_exit;
782 }
783 
MPIDIG_mpi_accumulate(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win)784 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_accumulate(const void *origin_addr, int origin_count,
785                                                    MPI_Datatype origin_datatype, int target_rank,
786                                                    MPI_Aint target_disp, int target_count,
787                                                    MPI_Datatype target_datatype, MPI_Op op,
788                                                    MPIR_Win * win)
789 {
790     int mpi_errno = MPI_SUCCESS;
791     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_ACCUMULATE);
792     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_ACCUMULATE);
793 
794     mpi_errno = MPIDIG_do_accumulate(origin_addr, origin_count, origin_datatype,
795                                      target_rank, target_disp, target_count, target_datatype, op,
796                                      win, NULL);
797     MPIR_ERR_CHECK(mpi_errno);
798 
799   fn_exit:
800     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_ACCUMULATE);
801     return mpi_errno;
802   fn_fail:
803     goto fn_exit;
804 }
805 
806 
MPIDIG_mpi_rget_accumulate(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,void * result_addr,int result_count,MPI_Datatype result_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win,MPIR_Request ** request)807 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_rget_accumulate(const void *origin_addr,
808                                                         int origin_count,
809                                                         MPI_Datatype origin_datatype,
810                                                         void *result_addr, int result_count,
811                                                         MPI_Datatype result_datatype,
812                                                         int target_rank, MPI_Aint target_disp,
813                                                         int target_count,
814                                                         MPI_Datatype target_datatype, MPI_Op op,
815                                                         MPIR_Win * win, MPIR_Request ** request)
816 {
817     int mpi_errno = MPI_SUCCESS;
818     MPIR_Request *sreq = NULL;
819     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_RGET_ACCUMULATE);
820     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_RGET_ACCUMULATE);
821 
822     mpi_errno = MPIDIG_do_get_accumulate(origin_addr, origin_count, origin_datatype, result_addr,
823                                          result_count, result_datatype, target_rank, target_disp,
824                                          target_count, target_datatype, op, win, &sreq);
825     MPIR_ERR_CHECK(mpi_errno);
826 
827   fn_exit:
828     *request = sreq;
829     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_RGET_ACCUMULATE);
830     return mpi_errno;
831   fn_fail:
832     goto fn_exit;
833 }
834 
MPIDIG_mpi_get_accumulate(const void * origin_addr,int origin_count,MPI_Datatype origin_datatype,void * result_addr,int result_count,MPI_Datatype result_datatype,int target_rank,MPI_Aint target_disp,int target_count,MPI_Datatype target_datatype,MPI_Op op,MPIR_Win * win)835 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_get_accumulate(const void *origin_addr,
836                                                        int origin_count,
837                                                        MPI_Datatype origin_datatype,
838                                                        void *result_addr, int result_count,
839                                                        MPI_Datatype result_datatype,
840                                                        int target_rank, MPI_Aint target_disp,
841                                                        int target_count,
842                                                        MPI_Datatype target_datatype,
843                                                        MPI_Op op, MPIR_Win * win)
844 {
845     int mpi_errno = MPI_SUCCESS;
846     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_GET_ACCUMULATE);
847     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_GET_ACCUMULATE);
848 
849     mpi_errno = MPIDIG_do_get_accumulate(origin_addr, origin_count, origin_datatype,
850                                          result_addr, result_count, result_datatype,
851                                          target_rank, target_disp, target_count, target_datatype,
852                                          op, win, NULL);
853     MPIR_ERR_CHECK(mpi_errno);
854 
855   fn_exit:
856     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_GET_ACCUMULATE);
857     return mpi_errno;
858   fn_fail:
859     goto fn_exit;
860 }
861 
MPIDIG_mpi_compare_and_swap(const void * origin_addr,const void * compare_addr,void * result_addr,MPI_Datatype datatype,int target_rank,MPI_Aint target_disp,MPIR_Win * win)862 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_compare_and_swap(const void *origin_addr,
863                                                          const void *compare_addr,
864                                                          void *result_addr, MPI_Datatype datatype,
865                                                          int target_rank, MPI_Aint target_disp,
866                                                          MPIR_Win * win)
867 {
868     int mpi_errno = MPI_SUCCESS, c;
869     MPIR_Request *sreq = NULL;
870     MPIDIG_cswap_req_msg_t am_hdr;
871     size_t data_sz;
872     void *p_data;
873 
874     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_COMPARE_AND_SWAP);
875     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_COMPARE_AND_SWAP);
876 
877     MPIDIG_RMA_OP_CHECK_SYNC(target_rank, win);
878 
879     MPIDI_Datatype_check_size(datatype, 1, data_sz);
880     if (data_sz == 0)
881         goto fn_exit;
882 
883     p_data = MPL_malloc(data_sz * 2, MPL_MEM_BUFFER);
884     MPIR_Assert(p_data);
885     MPIR_Typerep_copy(p_data, (char *) origin_addr, data_sz);
886     MPIR_Typerep_copy((char *) p_data + data_sz, (char *) compare_addr, data_sz);
887 
888     sreq = MPIDIG_request_create(MPIR_REQUEST_KIND__RMA, 1);
889     MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
890 
891     MPIDIG_REQUEST(sreq, req->creq.win_ptr) = win;
892     MPIDIG_REQUEST(sreq, req->creq.addr) = result_addr;
893     MPIDIG_REQUEST(sreq, req->creq.datatype) = datatype;
894     MPIDIG_REQUEST(sreq, req->creq.result_addr) = result_addr;
895     MPIDIG_REQUEST(sreq, req->creq.data) = p_data;
896     MPIDIG_REQUEST(sreq, rank) = target_rank;
897     MPIR_cc_incr(sreq->cc_ptr, &c);
898 
899     MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
900     am_hdr.target_disp = target_disp;
901     am_hdr.datatype = datatype;
902     am_hdr.req_ptr = sreq;
903     am_hdr.win_id = MPIDIG_WIN(win, win_id);
904     am_hdr.src_rank = win->comm_ptr->rank;
905     MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
906 
907     MPIDIG_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
908     /* Increase remote completion counter for acc. */
909     MPIDIG_win_remote_acc_cmpl_cnt_incr(win, target_rank);
910 
911 #ifndef MPIDI_CH4_DIRECT_NETMOD
912     if (MPIDI_rank_is_local(target_rank, win->comm_ptr))
913         mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDIG_CSWAP_REQ,
914                                        &am_hdr, sizeof(am_hdr), (char *) p_data, 2, datatype, sreq);
915     else
916 #endif
917     {
918         mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDIG_CSWAP_REQ,
919                                       &am_hdr, sizeof(am_hdr), (char *) p_data, 2, datatype, sreq);
920     }
921     MPIR_ERR_CHECK(mpi_errno);
922   fn_exit:
923     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_COMPARE_AND_SWAP);
924     return mpi_errno;
925   fn_fail:
926     goto fn_exit;
927 }
928 
MPIDIG_mpi_fetch_and_op(const void * origin_addr,void * result_addr,MPI_Datatype datatype,int target_rank,MPI_Aint target_disp,MPI_Op op,MPIR_Win * win)929 MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_fetch_and_op(const void *origin_addr, void *result_addr,
930                                                      MPI_Datatype datatype, int target_rank,
931                                                      MPI_Aint target_disp, MPI_Op op,
932                                                      MPIR_Win * win)
933 {
934     int mpi_errno = MPI_SUCCESS;
935     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_FETCH_AND_OP);
936     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_FETCH_AND_OP);
937 
938     mpi_errno = MPIDIG_mpi_get_accumulate(origin_addr, 1, datatype, result_addr, 1, datatype,
939                                           target_rank, target_disp, 1, datatype, op, win);
940     MPIR_ERR_CHECK(mpi_errno);
941   fn_exit:
942     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_FETCH_AND_OP);
943     return mpi_errno;
944   fn_fail:
945     goto fn_exit;
946 }
947 
948 #endif /* CH4R_RMA_H_INCLUDED */
949