1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpidimpl.h"
7 #include "mpidch4r.h"
8 #include "ch4r_win.h"
9 
10 enum {
11     SHM_WIN_OPTIONAL,
12     SHM_WIN_REQUIRED,
13 };
14 
15 static void parse_info_accu_ops_str(const char *str, uint32_t * ops_ptr);
16 static void get_info_accu_ops_str(uint32_t val, char *buf, size_t maxlen);
17 static int win_set_info(MPIR_Win * win, MPIR_Info * info, bool is_init);
18 static int win_init(MPI_Aint length, int disp_unit, MPIR_Win ** win_ptr, MPIR_Info * info,
19                     MPIR_Comm * comm_ptr, int create_flavor, int model);
20 static int win_finalize(MPIR_Win ** win_ptr);
21 static int win_shm_alloc_impl(MPI_Aint size, int disp_unit, MPIR_Comm * comm_ptr, void **base_ptr,
22                               MPIR_Win ** win_ptr, int shm_option);
23 
parse_info_accu_ops_str(const char * str,uint32_t * ops_ptr)24 static void parse_info_accu_ops_str(const char *str, uint32_t * ops_ptr)
25 {
26     uint32_t ops = 0;
27     char *value, *token, *savePtr = NULL;
28 
29     value = (char *) str;
30     /* str can never be NULL. */
31     MPIR_Assert(value);
32 
33     /* handle special value */
34     if (!strncmp(value, "none", strlen("none"))) {
35         *ops_ptr = 0;
36         return;
37     } else if (!strncmp(value, "any_op", strlen("any_op"))) {
38         /* add all ops */
39         int op_index;
40         for (op_index = 0; op_index < MPIDIG_ACCU_NUM_OP; op_index++)
41             ops |= (1 << op_index);
42         *ops_ptr = ops;
43         return;
44     }
45 
46     token = (char *) strtok_r(value, ",", &savePtr);
47     while (token != NULL) {
48         /* Use OP_NULL for special cswap */
49         if (!strncmp(token, "cswap", strlen("cswap")) ||
50             !strncmp(token, "compare_and_swap", strlen("compare_and_swap"))) {
51             ops |= (1 << MPIDIU_win_acc_op_get_index(MPI_OP_NULL));
52         } else {
53             /* search other reduce op by short name */
54             MPI_Op op = MPIR_Op_builtin_search_by_shortname(token);
55             if (op != MPI_OP_NULL) {
56                 ops |= (1 << MPIDIU_win_acc_op_get_index(op));
57             }
58         }
59 
60         token = (char *) strtok_r(NULL, ",", &savePtr);
61     }
62 
63     /* update info only when any valid value is set */
64     if (ops)
65         *ops_ptr = ops;
66 }
67 
get_info_accu_ops_str(uint32_t val,char * buf,size_t maxlen)68 static void get_info_accu_ops_str(uint32_t val, char *buf, size_t maxlen)
69 {
70     int c = 0, op_index;
71     for (op_index = 0; op_index < MPIDIG_ACCU_NUM_OP; op_index++) {
72         if (val & (1 << op_index)) {
73             MPI_Op op = MPIDIU_win_acc_get_op(op_index);
74 
75             MPIR_Assert(c < maxlen);
76             /* use OP_NULL as special cswap */
77             if (op == MPI_OP_NULL) {
78                 c += snprintf(buf + c, maxlen - c, "%scswap", (c > 0) ? "," : "");
79             } else {
80                 const char *short_name = MPIR_Op_builtin_get_shortname(op);
81                 c += snprintf(buf + c, maxlen - c, "%s%s", (c > 0) ? "," : "", short_name);
82             }
83         }
84     }
85 
86     if (c == 0)
87         strncpy(buf, "none", maxlen);
88 }
89 
update_winattr_after_set_info(MPIR_Win * win)90 static void update_winattr_after_set_info(MPIR_Win * win)
91 {
92     if (MPIDIG_WIN(win, info_args).disable_shm_accumulate)
93         MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_ACCU_NO_SHM;
94     else
95         MPIDI_WIN(win, winattr) &= ~((unsigned) MPIDI_WINATTR_ACCU_NO_SHM);
96 
97     if (MPIDIG_WIN(win, info_args).accumulate_ops == MPIDIG_ACCU_SAME_OP_NO_OP)
98         MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_ACCU_SAME_OP_NO_OP;
99     else
100         MPIDI_WIN(win, winattr) &= ~((unsigned) MPIDI_WINATTR_ACCU_SAME_OP_NO_OP);
101 }
102 
win_set_info(MPIR_Win * win,MPIR_Info * info,bool is_init)103 static int win_set_info(MPIR_Win * win, MPIR_Info * info, bool is_init)
104 {
105     int mpi_errno = MPI_SUCCESS;
106     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_SET_INFO);
107     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_SET_INFO);
108 
109     MPIR_Info *curr_ptr;
110     char *value, *token, *savePtr = NULL;
111     int save_ordering;
112 
113     curr_ptr = info->next;
114 
115     while (curr_ptr) {
116         if (!strcmp(curr_ptr->key, "no_locks")) {
117             if (!strcmp(curr_ptr->value, "true"))
118                 MPIDIG_WIN(win, info_args).no_locks = 1;
119             else if (!strcmp(curr_ptr->value, "false"))
120                 MPIDIG_WIN(win, info_args).no_locks = 0;
121         } else if (!strcmp(curr_ptr->key, "accumulate_ordering")) {
122             save_ordering = MPIDIG_WIN(win, info_args).accumulate_ordering;
123             MPIDIG_WIN(win, info_args).accumulate_ordering = 0;
124             if (!strcmp(curr_ptr->value, "none")) {
125                 /* For MPI-3, "none" means no ordering and is not default. */
126                 goto next;
127             }
128 
129             /* value can never be NULL. */
130             MPIR_Assert(curr_ptr->value);
131 
132             value = curr_ptr->value;
133             token = (char *) strtok_r(value, ",", &savePtr);
134 
135             while (token) {
136                 if (!memcmp(token, "rar", 3))
137                     MPIDIG_WIN(win, info_args).accumulate_ordering =
138                         (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_RAR);
139                 else if (!memcmp(token, "raw", 3))
140                     MPIDIG_WIN(win, info_args).accumulate_ordering =
141                         (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_RAW);
142                 else if (!memcmp(token, "war", 3))
143                     MPIDIG_WIN(win, info_args).accumulate_ordering =
144                         (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_WAR);
145                 else if (!memcmp(token, "waw", 3))
146                     MPIDIG_WIN(win, info_args).accumulate_ordering =
147                         (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_WAW);
148                 else
149                     MPIR_ERR_SETANDSTMT(mpi_errno, MPI_ERR_ARG, goto fn_fail, "**info");
150 
151                 token = (char *) strtok_r(NULL, ",", &savePtr);
152             }
153 
154             if (MPIDIG_WIN(win, info_args).accumulate_ordering == 0)
155                 MPIDIG_WIN(win, info_args).accumulate_ordering = save_ordering;
156         } else if (!strcmp(curr_ptr->key, "accumulate_ops")) {
157             if (!strcmp(curr_ptr->value, "same_op"))
158                 MPIDIG_WIN(win, info_args).accumulate_ops = MPIDIG_ACCU_SAME_OP;
159             else if (!strcmp(curr_ptr->value, "same_op_no_op"))
160                 MPIDIG_WIN(win, info_args).accumulate_ops = MPIDIG_ACCU_SAME_OP_NO_OP;
161         } else if (!strcmp(curr_ptr->key, "same_disp_unit")) {
162             if (!strcmp(curr_ptr->value, "true"))
163                 MPIDIG_WIN(win, info_args).same_disp_unit = 1;
164             else if (!strcmp(curr_ptr->value, "false"))
165                 MPIDIG_WIN(win, info_args).same_disp_unit = 0;
166         } else if (!strcmp(curr_ptr->key, "same_size")) {
167             if (!strcmp(curr_ptr->value, "true"))
168                 MPIDIG_WIN(win, info_args).same_size = 1;
169             else if (!strcmp(curr_ptr->value, "false"))
170                 MPIDIG_WIN(win, info_args).same_size = 0;
171         } else if (!strcmp(curr_ptr->key, "alloc_shared_noncontig")) {
172             if (!strcmp(curr_ptr->value, "true"))
173                 MPIDIG_WIN(win, info_args).alloc_shared_noncontig = 1;
174             else if (!strcmp(curr_ptr->value, "false"))
175                 MPIDIG_WIN(win, info_args).alloc_shared_noncontig = 0;
176         } else if (!strcmp(curr_ptr->key, "alloc_shm")) {
177             if (!strcmp(curr_ptr->value, "true"))
178                 MPIDIG_WIN(win, info_args).alloc_shm = 1;
179             else if (!strcmp(curr_ptr->value, "false"))
180                 MPIDIG_WIN(win, info_args).alloc_shm = 0;
181         }
182         /* We allow the user to set the following atomics hint only at window init time,
183          * all future updates by win_set_info are ignored. This is because we do not
184          * have a good way to ensure all outstanding atomic ops have been completed
185          * on all processes especially in passive-target epochs. */
186         else if (is_init && !strcmp(curr_ptr->key, "which_accumulate_ops")) {
187             parse_info_accu_ops_str(curr_ptr->value,
188                                     &MPIDIG_WIN(win, info_args).which_accumulate_ops);
189         } else if (is_init && !strcmp(curr_ptr->key, "accumulate_noncontig_dtype")) {
190             if (!strcmp(curr_ptr->value, "true"))
191                 MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype = true;
192             else if (!strcmp(curr_ptr->value, "false"))
193                 MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype = false;
194         } else if (is_init && !strcmp(curr_ptr->key, "accumulate_max_bytes")) {
195             if (!strcmp(curr_ptr->value, "unlimited") || !strcmp(curr_ptr->value, "-1"))
196                 MPIDIG_WIN(win, info_args).accumulate_max_bytes = -1;
197             else {
198                 long max_bytes = atol(curr_ptr->value);
199                 if (max_bytes >= 0)
200                     MPIDIG_WIN(win, info_args).accumulate_max_bytes = max_bytes;
201             }
202         } else if (is_init && !strcmp(curr_ptr->key, "disable_shm_accumulate")) {
203             if (!strcmp(curr_ptr->value, "true"))
204                 MPIDIG_WIN(win, info_args).disable_shm_accumulate = true;
205             else
206                 MPIDIG_WIN(win, info_args).disable_shm_accumulate = false;
207         } else if (is_init && !strcmp(curr_ptr->key, "coll_attach")) {
208             if (!strcmp(curr_ptr->value, "true"))
209                 MPIDIG_WIN(win, info_args).coll_attach = true;
210             else
211                 MPIDIG_WIN(win, info_args).coll_attach = false;
212         }
213       next:
214         curr_ptr = curr_ptr->next;
215     }
216 
217   fn_exit:
218     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_SET_INFO);
219     return mpi_errno;
220   fn_fail:
221     goto fn_exit;
222 }
223 
win_init(MPI_Aint length,int disp_unit,MPIR_Win ** win_ptr,MPIR_Info * info,MPIR_Comm * comm_ptr,int create_flavor,int model)224 static int win_init(MPI_Aint length, int disp_unit, MPIR_Win ** win_ptr, MPIR_Info * info,
225                     MPIR_Comm * comm_ptr, int create_flavor, int model)
226 {
227     int mpi_errno = MPI_SUCCESS;
228     MPIR_Win *win = (MPIR_Win *) MPIR_Handle_obj_alloc(&MPIR_Win_mem);
229     MPIDIG_win_target_t *targets = NULL;
230     MPIR_Comm *win_comm_ptr;
231 
232     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_INIT);
233     MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDIG_WIN_INIT);
234 
235     MPIR_ERR_CHKANDSTMT(win == NULL, mpi_errno, MPI_ERR_NO_MEM, goto fn_fail, "**nomem");
236     *win_ptr = win;
237 
238     memset(&win->dev.am, 0, sizeof(MPIDIG_win_t));
239 
240     /* Duplicate the original communicator here to avoid having collisions
241      * between internal collectives */
242     mpi_errno = MPIR_Comm_dup_impl(comm_ptr, NULL, &win_comm_ptr);
243     MPIR_ERR_CHECK(mpi_errno);
244 
245     MPIDIG_WIN(win, targets) = targets;
246 
247     win->errhandler = NULL;
248     win->base = NULL;
249     win->size = length;
250     win->disp_unit = disp_unit;
251     win->create_flavor = create_flavor;
252     win->model = model;
253     win->copyCreateFlavor = 0;
254     win->copyModel = 0;
255     win->attributes = NULL;
256     win->comm_ptr = win_comm_ptr;
257     win->copyDispUnit = 0;
258     win->copySize = 0;
259     MPIDIG_WIN(win, shared_table) = NULL;
260     MPIDIG_WIN(win, sync).assert_mode = 0;
261 
262     /* Initialize the info (hint) flags per window */
263     MPIDIG_WIN(win, info_args).no_locks = 0;
264     MPIDIG_WIN(win, info_args).accumulate_ordering = (MPIDIG_ACCU_ORDER_RAR |
265                                                       MPIDIG_ACCU_ORDER_RAW |
266                                                       MPIDIG_ACCU_ORDER_WAR |
267                                                       MPIDIG_ACCU_ORDER_WAW);
268     MPIDIG_WIN(win, info_args).accumulate_ops = MPIDIG_ACCU_SAME_OP_NO_OP;
269     MPIDIG_WIN(win, info_args).same_size = 0;
270     MPIDIG_WIN(win, info_args).same_disp_unit = 0;
271     MPIDIG_WIN(win, info_args).alloc_shared_noncontig = 0;
272     if (win->create_flavor == MPI_WIN_FLAVOR_ALLOCATE
273         || win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
274         MPIDIG_WIN(win, info_args).alloc_shm = 1;
275     } else {
276         MPIDIG_WIN(win, info_args).alloc_shm = 0;
277     }
278 
279     /* default any op */
280     int op_index;
281     MPIDIG_WIN(win, info_args).which_accumulate_ops = 0;
282     for (op_index = 0; op_index < MPIDIG_ACCU_NUM_OP; op_index++)
283         MPIDIG_WIN(win, info_args).which_accumulate_ops |= (1 << op_index);
284     MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype = true;
285     MPIDIG_WIN(win, info_args).accumulate_max_bytes = -1;
286     MPIDIG_WIN(win, info_args).disable_shm_accumulate = false;
287     MPIDIG_WIN(win, info_args).coll_attach = false;
288 
289     if ((info != NULL) && ((int *) info != (int *) MPI_INFO_NULL)) {
290         mpi_errno = win_set_info(win, info, TRUE /* is_init */);
291         MPIR_ERR_CHECK(mpi_errno);
292     }
293 
294 
295     MPIDIG_WIN(win, mmap_sz) = 0;
296     MPIDIG_WIN(win, mmap_addr) = NULL;
297 
298     MPIR_cc_set(&MPIDIG_WIN(win, local_cmpl_cnts), 0);
299     MPIR_cc_set(&MPIDIG_WIN(win, remote_cmpl_cnts), 0);
300     MPIR_cc_set(&MPIDIG_WIN(win, remote_acc_cmpl_cnts), 0);
301 
302     MPIDIG_WIN(win, win_id) = MPIDIG_generate_win_id(comm_ptr);
303     MPIDIU_map_set(MPIDI_global.win_map, MPIDIG_WIN(win, win_id), win, MPL_MEM_RMA);
304 
305     /* set winattr for performance optimization at fast path:
306      * - check if comm is COMM_WORLD or dup of COMM_WORLD
307      * - check if disable_shm_accumulate hint is set
308      * - check if SAME_OP_NO_OP is set for accumulates */
309     MPIDI_WIN(win, winattr) = 0;
310 
311     int comm_compare_result = MPI_UNEQUAL;
312     mpi_errno = MPIR_Comm_compare_impl(comm_ptr, MPIR_Process.comm_world, &comm_compare_result);
313     MPIR_ERR_CHECK(mpi_errno);
314 
315     if (comm_compare_result == MPI_CONGRUENT || comm_compare_result == MPI_IDENT)
316         MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_DIRECT_INTRA_COMM;
317 
318     update_winattr_after_set_info(win);
319 
320     /* If no local processes on each node, set ACCU_NO_SHM to enable native atomics */
321     bool no_local = false, all_no_local = false;
322     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
323     if (!comm_ptr->node_comm)
324         no_local = true;
325 
326     mpi_errno = MPIR_Allreduce(&no_local, &all_no_local, 1, MPI_C_BOOL,
327                                MPI_LAND, comm_ptr, &errflag);
328     MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
329     if (all_no_local)
330         MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_ACCU_NO_SHM;
331 
332   fn_exit:
333     MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDIG_WIN_INIT);
334     return mpi_errno;
335   fn_fail:
336     goto fn_exit;
337 }
338 
win_finalize(MPIR_Win ** win_ptr)339 static int win_finalize(MPIR_Win ** win_ptr)
340 {
341     int mpi_errno = MPI_SUCCESS;
342     int all_completed = 0;
343     MPIR_Win *win = *win_ptr;
344     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_FINALIZE);
345     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_FINALIZE);
346 
347     /* All local outstanding OPs should have been completed. */
348     MPIR_Assert(MPIR_cc_get(MPIDIG_WIN(win, local_cmpl_cnts)) == 0);
349     MPIR_Assert(MPIR_cc_get(MPIDIG_WIN(win, remote_cmpl_cnts)) == 0);
350     MPIR_Assert(MPIR_cc_get(MPIDIG_WIN(win, remote_acc_cmpl_cnts)) == 0);
351 
352     /* Make progress till all OPs have been completed */
353     do {
354         bool all_local_completed, all_remote_completed;
355 
356         /* NOTE: MPID_Win_free does not take on locks */
357         mpi_errno = MPID_Progress_test(NULL);
358         MPIR_ERR_CHECK(mpi_errno);
359 
360         all_local_completed = MPIDIG_win_check_all_targets_local_completed(win);
361         all_remote_completed = MPIDIG_win_check_all_targets_remote_completed(win);
362 
363         /* Local completion counter might be updated later than remote completion
364          * (at request completion), so we need to check it before release entire
365          * window. */
366         all_completed = (MPIR_cc_get(MPIDIG_WIN(win, local_cmpl_cnts)) == 0) &&
367             (MPIR_cc_get(MPIDIG_WIN(win, remote_cmpl_cnts)) == 0) &&
368             (MPIR_cc_get(MPIDIG_WIN(win, remote_acc_cmpl_cnts)) == 0) &&
369             all_local_completed && all_remote_completed;
370     } while (all_completed != 1);
371 
372     mpi_errno = MPIDI_NM_mpi_win_free_hook(win);
373     MPIR_ERR_CHECK(mpi_errno);
374 
375 #ifndef MPIDI_CH4_DIRECT_NETMOD
376     mpi_errno = MPIDI_SHM_mpi_win_free_hook(win);
377     MPIR_ERR_CHECK(mpi_errno);
378 #endif
379 
380     MPIDIG_win_target_cleanall(win);
381     MPIDIG_win_hash_clear(win);
382 
383     if (win->create_flavor == MPI_WIN_FLAVOR_ALLOCATE ||
384         win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
385         /* if more than one process on a node, we use shared memory by default */
386         if (MPIDIG_WIN(win, mmap_addr)) {
387             mpi_errno = MPIDU_shm_free(MPIDIG_WIN(win, mmap_addr));
388             MPIR_ERR_CHECK(mpi_errno);
389 
390             /* if shared memory allocation fails or zero size window, free the table at allocation. */
391             MPL_free(MPIDIG_WIN(win, shared_table));
392         } else
393             MPL_free(win->base);
394     }
395 
396     MPIDIU_map_erase(MPIDI_global.win_map, MPIDIG_WIN(win, win_id));
397 
398     MPIR_Comm_release(win->comm_ptr);
399     MPIR_Handle_obj_free(&MPIR_Win_mem, win);
400 
401   fn_exit:
402     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_FINALIZE);
403     return mpi_errno;
404   fn_fail:
405     goto fn_exit;
406 }
407 
408 /* Allocate RMA window over shared memory region. Used by both win_allocate
409  * and win_allocate_shared.
410  *
411  * This routine allocates window memory region on each node from shared
412  * memory, and initializes the shared_table structure that stores each
413  * node process's size, disp_unit, and start address for shm RMA operations
414  * and query routine.*/
win_shm_alloc_impl(MPI_Aint size,int disp_unit,MPIR_Comm * comm_ptr,void ** base_ptr,MPIR_Win ** win_ptr,int shm_option)415 static int win_shm_alloc_impl(MPI_Aint size, int disp_unit, MPIR_Comm * comm_ptr, void **base_ptr,
416                               MPIR_Win ** win_ptr, int shm_option)
417 {
418     int i, mpi_errno = MPI_SUCCESS;
419     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
420     MPIR_Win *win = NULL;
421     size_t total_shm_size = 0LL;
422     MPIDIG_win_shared_info_t *shared_table = NULL;
423     MPI_Aint *shm_offsets = NULL;
424     MPIR_Comm *shm_comm_ptr = comm_ptr->node_comm;
425     size_t page_sz = 0, mapsize;
426     bool symheap_mapfail_flag = false, shm_mapfail_flag = false;
427     bool symheap_flag = true, global_symheap_flag = false;
428 
429     MPIR_CHKPMEM_DECL(2);
430     MPIR_CHKLMEM_DECL(1);
431     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_SHM_ALLOC_IMPL);
432     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_SHM_ALLOC_IMPL);
433 
434     if (mpi_errno != MPI_SUCCESS)
435         goto fn_fail;
436 
437     win = *win_ptr;
438     *base_ptr = NULL;
439 
440     /* Check whether multiple processes exist on the local node. If so,
441      * we need to count the total size on a node for shared memory allocation. */
442     if (shm_comm_ptr != NULL) {
443         MPIR_T_PVAR_TIMER_START(RMA, rma_wincreate_allgather);
444         MPIR_CHKPMEM_MALLOC(MPIDIG_WIN(win, shared_table), MPIDIG_win_shared_info_t *,
445                             sizeof(MPIDIG_win_shared_info_t) * shm_comm_ptr->local_size,
446                             mpi_errno, "shared table", MPL_MEM_RMA);
447         shared_table = MPIDIG_WIN(win, shared_table);
448         shared_table[shm_comm_ptr->rank].size = size;
449         shared_table[shm_comm_ptr->rank].disp_unit = disp_unit;
450         shared_table[shm_comm_ptr->rank].shm_base_addr = NULL;
451 
452         mpi_errno = MPIR_Allgather(MPI_IN_PLACE,
453                                    0,
454                                    MPI_DATATYPE_NULL,
455                                    shared_table,
456                                    sizeof(MPIDIG_win_shared_info_t), MPI_BYTE, shm_comm_ptr,
457                                    &errflag);
458         MPIR_T_PVAR_TIMER_END(RMA, rma_wincreate_allgather);
459         if (mpi_errno != MPI_SUCCESS)
460             goto fn_fail;
461 
462         MPIR_CHKLMEM_MALLOC(shm_offsets, MPI_Aint *, shm_comm_ptr->local_size * sizeof(MPI_Aint),
463                             mpi_errno, "shm offset", MPL_MEM_RMA);
464 
465         /* No allreduce here because this is a shared memory domain
466          * and should be a relatively small number of processes
467          * and a non performance sensitive API.
468          */
469         for (i = 0; i < shm_comm_ptr->local_size; i++) {
470             shm_offsets[i] = (MPI_Aint) total_shm_size;
471             if (MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
472                 total_shm_size += MPIDU_shm_get_mapsize(shared_table[i].size, &page_sz);
473             else
474                 total_shm_size += shared_table[i].size;
475         }
476 
477         /* if all processes give zero size on a single node window, simply return. */
478         if (total_shm_size == 0 && shm_comm_ptr->local_size == comm_ptr->local_size)
479             goto fn_no_shm;
480 
481         /* if my size is not page aligned and noncontig is disabled, skip global symheap. */
482         if (size != MPIDU_shm_get_mapsize(size, &page_sz) &&
483             !MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
484             symheap_flag = false;
485     } else
486         total_shm_size = size;
487 
488     /* try global symm heap only when multiple processes exist */
489     if (comm_ptr->local_size > 1) {
490         /* global symm heap can be successful only when any of the following conditions meet.
491          * Thus, we can skip unnecessary global symm heap retry based on condition check.
492          * - no shared memory node (i.e., single process per node)
493          * - size of each process on the shared memory node is page aligned,
494          *   thus all process can be assigned to a page aligned start address.
495          * - user sets alloc_shared_noncontig=true, thus we can internally make
496          *   the size aligned on each process. */
497         mpi_errno = MPIR_Allreduce(&symheap_flag, &global_symheap_flag, 1, MPI_C_BOOL,
498                                    MPI_LAND, comm_ptr, &errflag);
499         MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
500     } else
501         global_symheap_flag = false;
502 
503     /* because MPI_shm follows a create & attach mode, we need to set the
504      * size of entire shared memory segment on each node as the size of
505      * each process. */
506     mapsize = MPIDU_shm_get_mapsize(total_shm_size, &page_sz);
507 
508     /* first try global symmetric heap segment allocation */
509     if (global_symheap_flag) {
510         size_t my_offset = (shm_comm_ptr) ? shm_offsets[shm_comm_ptr->rank] : 0;
511         MPIDIG_WIN(win, mmap_sz) = mapsize;
512         mpi_errno =
513             MPIDU_shm_alloc_symm_all(comm_ptr, mapsize, my_offset, &MPIDIG_WIN(win, mmap_addr),
514                                      &symheap_mapfail_flag);
515         if (mpi_errno != MPI_SUCCESS)
516             goto fn_fail;
517 
518         if (symheap_mapfail_flag) {
519             MPIDIG_WIN(win, mmap_sz) = 0;
520             MPIDIG_WIN(win, mmap_addr) = NULL;
521         }
522     }
523 
524     /* if symmetric heap is disabled or fails, try normal shm segment allocation */
525     if (!global_symheap_flag || symheap_mapfail_flag) {
526         if (shm_comm_ptr != NULL && mapsize) {
527             MPIDIG_WIN(win, mmap_sz) = mapsize;
528             mpi_errno =
529                 MPIDU_shm_alloc(shm_comm_ptr, mapsize, &MPIDIG_WIN(win, mmap_addr),
530                                 &shm_mapfail_flag);
531             if (mpi_errno != MPI_SUCCESS)
532                 goto fn_fail;
533 
534             if (shm_mapfail_flag) {
535                 MPIDIG_WIN(win, mmap_sz) = 0;
536                 MPIDIG_WIN(win, mmap_addr) = NULL;
537             }
538 
539             /* throw error here if shm allocation is required but fails */
540             if (shm_option == SHM_WIN_REQUIRED)
541                 MPIR_ERR_CHKANDJUMP(shm_mapfail_flag, mpi_errno, MPI_ERR_OTHER, "**alloc_shar_mem");
542         }
543 
544         /* If only single process on a node or shm segment allocation fails, try malloc. */
545         if ((shm_comm_ptr == NULL || shm_mapfail_flag) && size > 0) {
546             MPIR_CHKPMEM_MALLOC(*base_ptr, void *, size, mpi_errno, "(*win_ptr)->base",
547                                 MPL_MEM_RMA);
548             MPL_VG_MEM_INIT(*base_ptr, size);
549         }
550     }
551 
552     /* compute the base addresses of each process within the shared memory segment */
553     if (shm_comm_ptr != NULL && MPIDIG_WIN(win, mmap_addr)) {
554         char *cur_base = (char *) MPIDIG_WIN(win, mmap_addr);
555         for (i = 0; i < shm_comm_ptr->local_size; i++) {
556             if (shared_table[i].size)
557                 shared_table[i].shm_base_addr = cur_base;
558             else
559                 shared_table[i].shm_base_addr = NULL;
560 
561             if (MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
562                 cur_base += MPIDU_shm_get_mapsize(shared_table[i].size, &page_sz);
563             else
564                 cur_base += shared_table[i].size;
565         }
566 
567         *base_ptr = shared_table[shm_comm_ptr->rank].shm_base_addr;
568     } else if (MPIDIG_WIN(win, mmap_sz) > 0) {
569         /* if symm heap is allocated without shared memory, use the mapping address */
570         *base_ptr = MPIDIG_WIN(win, mmap_addr);
571     }
572     /* otherwise, it has already be assigned with a local memory region or NULL (zero size). */
573 
574   fn_no_shm:
575     /* free shared_table if no shm segment allocated */
576     if (shared_table && !MPIDIG_WIN(win, mmap_addr)) {
577         MPL_free(MPIDIG_WIN(win, shared_table));
578         MPIDIG_WIN(win, shared_table) = NULL;
579     }
580 
581   fn_exit:
582     MPIR_CHKLMEM_FREEALL();
583     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_SHM_ALLOC_IMPL);
584     return mpi_errno;
585   fn_fail:
586     MPIR_CHKPMEM_REAP();
587     goto fn_exit;
588 }
589 
MPIDIG_RMA_Init_sync_pvars(void)590 int MPIDIG_RMA_Init_sync_pvars(void)
591 {
592     int mpi_errno = MPI_SUCCESS;
593     /* rma_winlock_getlocallock */
594     MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
595                                       MPI_DOUBLE,
596                                       rma_winlock_getlocallock,
597                                       MPI_T_VERBOSITY_MPIDEV_DETAIL,
598                                       MPI_T_BIND_NO_OBJECT,
599                                       MPIR_T_PVAR_FLAG_READONLY,
600                                       "RMA", "WIN_LOCK:Get local lock (in seconds)");
601 
602     /* rma_wincreate_allgather */
603     MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
604                                       MPI_DOUBLE,
605                                       rma_wincreate_allgather,
606                                       MPI_T_VERBOSITY_MPIDEV_DETAIL,
607                                       MPI_T_BIND_NO_OBJECT,
608                                       MPIR_T_PVAR_FLAG_READONLY,
609                                       "RMA", "WIN_CREATE:Allgather (in seconds)");
610 
611     /* rma_amhdr_set */
612     MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
613                                       MPI_DOUBLE,
614                                       rma_amhdr_set,
615                                       MPI_T_VERBOSITY_MPIDEV_DETAIL,
616                                       MPI_T_BIND_NO_OBJECT,
617                                       MPIR_T_PVAR_FLAG_READONLY,
618                                       "RMA", "Set fields in AM Handler (in seconds)");
619 
620     return mpi_errno;
621 }
622 
MPIDIG_mpi_win_set_info(MPIR_Win * win,MPIR_Info * info)623 int MPIDIG_mpi_win_set_info(MPIR_Win * win, MPIR_Info * info)
624 {
625     int mpi_errno = MPI_SUCCESS;
626     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
627     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_SET_INFO);
628     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_SET_INFO);
629 
630     mpi_errno = win_set_info(win, info, FALSE /* is_init */);
631     MPIR_ERR_CHECK(mpi_errno);
632 
633     /* Do not update winattr except for info set at window creation.
634      * Because it will change RMA's behavior which requires collective synchronization. */
635 
636     mpi_errno = MPIR_Barrier(win->comm_ptr, &errflag);
637   fn_exit:
638     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_SET_INFO);
639     return mpi_errno;
640   fn_fail:
641     goto fn_exit;
642 }
643 
MPIDIG_mpi_win_get_info(MPIR_Win * win,MPIR_Info ** info_p_p)644 int MPIDIG_mpi_win_get_info(MPIR_Win * win, MPIR_Info ** info_p_p)
645 {
646     int mpi_errno = MPI_SUCCESS;
647     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_GET_INFO);
648     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_GET_INFO);
649 
650     mpi_errno = MPIR_Info_alloc(info_p_p);
651     if (MPI_SUCCESS != mpi_errno) {
652         *info_p_p = NULL;
653         MPIR_ERR_POP(mpi_errno);
654     }
655 
656     if (MPIDIG_WIN(win, info_args).no_locks)
657         mpi_errno = MPIR_Info_set_impl(*info_p_p, "no_locks", "true");
658     else
659         mpi_errno = MPIR_Info_set_impl(*info_p_p, "no_locks", "false");
660 
661     MPIR_ERR_CHECK(mpi_errno);
662 
663     {
664 #define BUFSIZE 32
665         char buf[BUFSIZE];
666         int c = 0;
667 
668         MPL_COMPILE_TIME_ASSERT(BUFSIZE >= 16); /* maximum: strlen("rar,raw,war,waw") + 1 */
669 
670         if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_RAR)
671             c += snprintf(buf, BUFSIZE, "rar");
672 
673         if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_RAW)
674             c += snprintf(buf + c, BUFSIZE - c, "%sraw", (c > 0) ? "," : "");
675 
676         if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_WAR)
677             c += snprintf(buf + c, BUFSIZE - c, "%swar", (c > 0) ? "," : "");
678 
679         if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_WAW)
680             c += snprintf(buf + c, BUFSIZE - c, "%swaw", (c > 0) ? "," : "");
681 
682         if (c == 0) {
683             strncpy(buf, "none", BUFSIZE);
684         }
685 
686         mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_ordering", buf);
687         MPIR_ERR_CHECK(mpi_errno);
688 #undef BUFSIZE
689     }
690 
691     if (MPIDIG_WIN(win, info_args).accumulate_ops == MPIDIG_ACCU_SAME_OP)
692         mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_ops", "same_op");
693     else
694         mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_ops", "same_op_no_op");
695 
696     MPIR_ERR_CHECK(mpi_errno);
697 
698     if (MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
699         mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shared_noncontig", "true");
700     else
701         mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shared_noncontig", "false");
702 
703     MPIR_ERR_CHECK(mpi_errno);
704 
705     if (MPIDIG_WIN(win, info_args).same_size)
706         mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_size", "true");
707     else
708         mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_size", "false");
709 
710     MPIR_ERR_CHECK(mpi_errno);
711 
712     if (MPIDIG_WIN(win, info_args).same_disp_unit)
713         mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_disp_unit", "true");
714     else
715         mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_disp_unit", "false");
716 
717     MPIR_ERR_CHECK(mpi_errno);
718 
719     if (MPIDIG_WIN(win, info_args).alloc_shm)
720         mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shm", "true");
721     else
722         mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shm", "false");
723 
724     MPIR_ERR_CHECK(mpi_errno);
725 
726     {   /* Keep buf as a local variable for which_accumulate_ops key. */
727         char buf[128];
728         get_info_accu_ops_str(MPIDIG_WIN(win, info_args).which_accumulate_ops, &buf[0],
729                               sizeof(buf));
730         mpi_errno = MPIR_Info_set_impl(*info_p_p, "which_accumulate_ops", buf);
731         MPIR_ERR_CHECK(mpi_errno);
732     }
733 
734     if (MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype)
735         mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_noncontig_dtype", "true");
736     else
737         mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_noncontig_dtype", "false");
738     MPIR_ERR_CHECK(mpi_errno);
739 
740     if (MPIDIG_WIN(win, info_args).accumulate_max_bytes >= 0) {
741         char buf[32];           /* make sure 64-bit integer can fit */
742         snprintf(buf, sizeof(buf), "%ld", (long) MPIDIG_WIN(win, info_args).accumulate_max_bytes);
743         mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_max_bytes", buf);
744     } else
745         mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_max_bytes", "unlimited");
746     MPIR_ERR_CHECK(mpi_errno);
747 
748     if (MPIDIG_WIN(win, info_args).disable_shm_accumulate)
749         mpi_errno = MPIR_Info_set_impl(*info_p_p, "disable_shm_accumulate", "true");
750     else
751         mpi_errno = MPIR_Info_set_impl(*info_p_p, "disable_shm_accumulate", "false");
752     MPIR_ERR_CHECK(mpi_errno);
753 
754     if (MPIDIG_WIN(win, info_args).coll_attach)
755         mpi_errno = MPIR_Info_set_impl(*info_p_p, "coll_attach", "true");
756     else
757         mpi_errno = MPIR_Info_set_impl(*info_p_p, "coll_attach", "false");
758     MPIR_ERR_CHECK(mpi_errno);
759 
760   fn_exit:
761     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_GET_INFO);
762     return mpi_errno;
763   fn_fail:
764     if (*info_p_p != NULL) {
765         MPIR_Info_free(*info_p_p);
766         *info_p_p = NULL;
767     }
768     goto fn_exit;
769 }
770 
MPIDIG_mpi_win_free(MPIR_Win ** win_ptr)771 int MPIDIG_mpi_win_free(MPIR_Win ** win_ptr)
772 {
773     int mpi_errno = MPI_SUCCESS;
774     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
775     MPIR_Win *win = *win_ptr;
776     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_FREE);
777     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_FREE);
778 
779     MPIDIG_ACCESS_EPOCH_CHECK_NONE(win, mpi_errno, return mpi_errno);
780     MPIDIG_EXPOSURE_EPOCH_CHECK_NONE(win, mpi_errno, return mpi_errno);
781 
782     mpi_errno = MPIR_Barrier(win->comm_ptr, &errflag);
783     if (mpi_errno != MPI_SUCCESS)
784         goto fn_fail;
785 
786     win_finalize(win_ptr);
787   fn_exit:
788     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_FREE);
789     return mpi_errno;
790   fn_fail:
791     goto fn_exit;
792 }
793 
MPIDIG_mpi_win_create(void * base,MPI_Aint length,int disp_unit,MPIR_Info * info,MPIR_Comm * comm_ptr,MPIR_Win ** win_ptr)794 int MPIDIG_mpi_win_create(void *base, MPI_Aint length, int disp_unit, MPIR_Info * info,
795                           MPIR_Comm * comm_ptr, MPIR_Win ** win_ptr)
796 {
797     int mpi_errno = MPI_SUCCESS;
798     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
799     MPIR_Win *win;
800 
801     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_CREATE);
802     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_CREATE);
803 
804     mpi_errno = win_init(length, disp_unit, win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_CREATE,
805                          MPI_WIN_UNIFIED);
806 
807     if (mpi_errno != MPI_SUCCESS)
808         goto fn_fail;
809 
810     win = *win_ptr;
811     win->base = base;
812 
813     mpi_errno = MPIDI_NM_mpi_win_create_hook(win);
814     MPIR_ERR_CHECK(mpi_errno);
815 
816 #ifndef MPIDI_CH4_DIRECT_NETMOD
817     mpi_errno = MPIDI_SHM_mpi_win_create_hook(win);
818     MPIR_ERR_CHECK(mpi_errno);
819 #endif
820 
821     mpi_errno = MPIR_Barrier(win->comm_ptr, &errflag);
822 
823     if (mpi_errno != MPI_SUCCESS)
824         goto fn_fail;
825 
826   fn_exit:
827     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_CREATE);
828     return mpi_errno;
829   fn_fail:
830     goto fn_exit;
831 }
832 
MPIDIG_mpi_win_attach(MPIR_Win * win,void * base,MPI_Aint size)833 int MPIDIG_mpi_win_attach(MPIR_Win * win, void *base, MPI_Aint size)
834 {
835     int mpi_errno = MPI_SUCCESS;
836     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_ATTACH);
837     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_ATTACH);
838 
839     MPIR_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_DYNAMIC), mpi_errno,
840                         MPI_ERR_RMA_FLAVOR, goto fn_fail, "**rmaflavor");
841 
842     mpi_errno = MPIDI_NM_mpi_win_attach_hook(win, base, size);
843     MPIR_ERR_CHECK(mpi_errno);
844 
845 #ifndef MPIDI_CH4_DIRECT_NETMOD
846     mpi_errno = MPIDI_SHM_mpi_win_attach_hook(win, base, size);
847     MPIR_ERR_CHECK(mpi_errno);
848 #endif
849 
850   fn_exit:
851     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_ATTACH);
852     return mpi_errno;
853   fn_fail:
854     goto fn_exit;
855 }
856 
MPIDIG_mpi_win_allocate_shared(MPI_Aint size,int disp_unit,MPIR_Info * info_ptr,MPIR_Comm * comm_ptr,void ** base_ptr,MPIR_Win ** win_ptr)857 int MPIDIG_mpi_win_allocate_shared(MPI_Aint size, int disp_unit, MPIR_Info * info_ptr,
858                                    MPIR_Comm * comm_ptr, void **base_ptr, MPIR_Win ** win_ptr)
859 {
860     int mpi_errno = MPI_SUCCESS;
861     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
862     MPIR_Win *win = NULL;
863     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE_SHARED);
864     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE_SHARED);
865 
866     mpi_errno = win_init(size, disp_unit, win_ptr, info_ptr, comm_ptr, MPI_WIN_FLAVOR_SHARED,
867                          MPI_WIN_UNIFIED);
868     MPIR_ERR_CHECK(mpi_errno);
869 
870     mpi_errno = win_shm_alloc_impl(size, disp_unit, comm_ptr, base_ptr, win_ptr, SHM_WIN_REQUIRED);
871     MPIR_ERR_CHECK(mpi_errno);
872 
873     win = *win_ptr;
874     win->base = *base_ptr;
875     win->size = size;
876 
877     mpi_errno = MPIDI_NM_mpi_win_allocate_shared_hook(win);
878     MPIR_ERR_CHECK(mpi_errno);
879 
880 #ifndef MPIDI_CH4_DIRECT_NETMOD
881     mpi_errno = MPIDI_SHM_mpi_win_allocate_shared_hook(win);
882     MPIR_ERR_CHECK(mpi_errno);
883 #endif
884 
885     mpi_errno = MPIR_Barrier(comm_ptr, &errflag);
886 
887   fn_exit:
888     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE_SHARED);
889     return mpi_errno;
890   fn_fail:
891     if (win_ptr)
892         win_finalize(win_ptr);
893     goto fn_exit;
894 }
895 
MPIDIG_mpi_win_detach(MPIR_Win * win,const void * base)896 int MPIDIG_mpi_win_detach(MPIR_Win * win, const void *base)
897 {
898     int mpi_errno = MPI_SUCCESS;
899     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_DETACH);
900     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_DETACH);
901     MPIR_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_DYNAMIC), mpi_errno,
902                         MPI_ERR_RMA_FLAVOR, goto fn_fail, "**rmaflavor");
903 
904     mpi_errno = MPIDI_NM_mpi_win_detach_hook(win, base);
905     MPIR_ERR_CHECK(mpi_errno);
906 
907 #ifndef MPIDI_CH4_DIRECT_NETMOD
908     mpi_errno = MPIDI_SHM_mpi_win_detach_hook(win, base);
909     MPIR_ERR_CHECK(mpi_errno);
910 #endif
911 
912   fn_exit:
913     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_DETACH);
914     return mpi_errno;
915   fn_fail:
916     goto fn_exit;
917 }
918 
MPIDIG_mpi_win_allocate(MPI_Aint size,int disp_unit,MPIR_Info * info,MPIR_Comm * comm,void * baseptr,MPIR_Win ** win_ptr)919 int MPIDIG_mpi_win_allocate(MPI_Aint size, int disp_unit, MPIR_Info * info, MPIR_Comm * comm,
920                             void *baseptr, MPIR_Win ** win_ptr)
921 {
922     int mpi_errno = MPI_SUCCESS;
923     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
924     MPIR_Win *win;
925     void **base_ptr = (void **) baseptr;
926 
927     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE);
928     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE);
929 
930     mpi_errno = win_init(size, disp_unit, win_ptr, info, comm, MPI_WIN_FLAVOR_ALLOCATE,
931                          MPI_WIN_UNIFIED);
932 
933     if (mpi_errno != MPI_SUCCESS)
934         goto fn_fail;
935 
936     mpi_errno = win_shm_alloc_impl(size, disp_unit, comm, base_ptr, win_ptr, SHM_WIN_OPTIONAL);
937     if (mpi_errno != MPI_SUCCESS)
938         goto fn_fail;
939 
940     win = *win_ptr;
941     win->base = *(void **) baseptr;
942     win->size = size;
943 
944     mpi_errno = MPIDI_NM_mpi_win_allocate_hook(win);
945     MPIR_ERR_CHECK(mpi_errno);
946 
947 #ifndef MPIDI_CH4_DIRECT_NETMOD
948     mpi_errno = MPIDI_SHM_mpi_win_allocate_hook(win);
949     MPIR_ERR_CHECK(mpi_errno);
950 #endif
951 
952     mpi_errno = MPIR_Barrier(comm, &errflag);
953 
954     if (mpi_errno != MPI_SUCCESS)
955         goto fn_fail;
956 
957   fn_exit:
958     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE);
959     return mpi_errno;
960   fn_fail:
961     if (win_ptr)
962         win_finalize(win_ptr);
963     goto fn_exit;
964 }
965 
MPIDIG_mpi_win_create_dynamic(MPIR_Info * info,MPIR_Comm * comm,MPIR_Win ** win_ptr)966 int MPIDIG_mpi_win_create_dynamic(MPIR_Info * info, MPIR_Comm * comm, MPIR_Win ** win_ptr)
967 {
968     int mpi_errno = MPI_SUCCESS;
969     int rc = MPI_SUCCESS;
970     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
971 
972     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_CREATE_DYNAMIC);
973     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_CREATE_DYNAMIC);
974 
975     MPIR_Win *win;
976 
977     rc = win_init(0, 1, win_ptr, info, comm, MPI_WIN_FLAVOR_DYNAMIC, MPI_WIN_UNIFIED);
978 
979     if (rc != MPI_SUCCESS)
980         goto fn_fail;
981 
982     win = *win_ptr;
983     win->base = MPI_BOTTOM;
984 
985     mpi_errno = MPIDI_NM_mpi_win_create_dynamic_hook(win);
986     MPIR_ERR_CHECK(mpi_errno);
987 
988 #ifndef MPIDI_CH4_DIRECT_NETMOD
989     mpi_errno = MPIDI_SHM_mpi_win_create_dynamic_hook(win);
990     MPIR_ERR_CHECK(mpi_errno);
991 #endif
992 
993     mpi_errno = MPIR_Barrier(comm, &errflag);
994 
995   fn_exit:
996     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_CREATE_DYNAMIC);
997     return mpi_errno;
998   fn_fail:
999     goto fn_exit;
1000 }
1001