1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "mpidimpl.h"
7 #include "mpidch4r.h"
8 #include "ch4r_win.h"
9
10 enum {
11 SHM_WIN_OPTIONAL,
12 SHM_WIN_REQUIRED,
13 };
14
15 static void parse_info_accu_ops_str(const char *str, uint32_t * ops_ptr);
16 static void get_info_accu_ops_str(uint32_t val, char *buf, size_t maxlen);
17 static int win_set_info(MPIR_Win * win, MPIR_Info * info, bool is_init);
18 static int win_init(MPI_Aint length, int disp_unit, MPIR_Win ** win_ptr, MPIR_Info * info,
19 MPIR_Comm * comm_ptr, int create_flavor, int model);
20 static int win_finalize(MPIR_Win ** win_ptr);
21 static int win_shm_alloc_impl(MPI_Aint size, int disp_unit, MPIR_Comm * comm_ptr, void **base_ptr,
22 MPIR_Win ** win_ptr, int shm_option);
23
parse_info_accu_ops_str(const char * str,uint32_t * ops_ptr)24 static void parse_info_accu_ops_str(const char *str, uint32_t * ops_ptr)
25 {
26 uint32_t ops = 0;
27 char *value, *token, *savePtr = NULL;
28
29 value = (char *) str;
30 /* str can never be NULL. */
31 MPIR_Assert(value);
32
33 /* handle special value */
34 if (!strncmp(value, "none", strlen("none"))) {
35 *ops_ptr = 0;
36 return;
37 } else if (!strncmp(value, "any_op", strlen("any_op"))) {
38 /* add all ops */
39 int op_index;
40 for (op_index = 0; op_index < MPIDIG_ACCU_NUM_OP; op_index++)
41 ops |= (1 << op_index);
42 *ops_ptr = ops;
43 return;
44 }
45
46 token = (char *) strtok_r(value, ",", &savePtr);
47 while (token != NULL) {
48 /* Use OP_NULL for special cswap */
49 if (!strncmp(token, "cswap", strlen("cswap")) ||
50 !strncmp(token, "compare_and_swap", strlen("compare_and_swap"))) {
51 ops |= (1 << MPIDIU_win_acc_op_get_index(MPI_OP_NULL));
52 } else {
53 /* search other reduce op by short name */
54 MPI_Op op = MPIR_Op_builtin_search_by_shortname(token);
55 if (op != MPI_OP_NULL) {
56 ops |= (1 << MPIDIU_win_acc_op_get_index(op));
57 }
58 }
59
60 token = (char *) strtok_r(NULL, ",", &savePtr);
61 }
62
63 /* update info only when any valid value is set */
64 if (ops)
65 *ops_ptr = ops;
66 }
67
get_info_accu_ops_str(uint32_t val,char * buf,size_t maxlen)68 static void get_info_accu_ops_str(uint32_t val, char *buf, size_t maxlen)
69 {
70 int c = 0, op_index;
71 for (op_index = 0; op_index < MPIDIG_ACCU_NUM_OP; op_index++) {
72 if (val & (1 << op_index)) {
73 MPI_Op op = MPIDIU_win_acc_get_op(op_index);
74
75 MPIR_Assert(c < maxlen);
76 /* use OP_NULL as special cswap */
77 if (op == MPI_OP_NULL) {
78 c += snprintf(buf + c, maxlen - c, "%scswap", (c > 0) ? "," : "");
79 } else {
80 const char *short_name = MPIR_Op_builtin_get_shortname(op);
81 c += snprintf(buf + c, maxlen - c, "%s%s", (c > 0) ? "," : "", short_name);
82 }
83 }
84 }
85
86 if (c == 0)
87 strncpy(buf, "none", maxlen);
88 }
89
update_winattr_after_set_info(MPIR_Win * win)90 static void update_winattr_after_set_info(MPIR_Win * win)
91 {
92 if (MPIDIG_WIN(win, info_args).disable_shm_accumulate)
93 MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_ACCU_NO_SHM;
94 else
95 MPIDI_WIN(win, winattr) &= ~((unsigned) MPIDI_WINATTR_ACCU_NO_SHM);
96
97 if (MPIDIG_WIN(win, info_args).accumulate_ops == MPIDIG_ACCU_SAME_OP_NO_OP)
98 MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_ACCU_SAME_OP_NO_OP;
99 else
100 MPIDI_WIN(win, winattr) &= ~((unsigned) MPIDI_WINATTR_ACCU_SAME_OP_NO_OP);
101 }
102
win_set_info(MPIR_Win * win,MPIR_Info * info,bool is_init)103 static int win_set_info(MPIR_Win * win, MPIR_Info * info, bool is_init)
104 {
105 int mpi_errno = MPI_SUCCESS;
106 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_SET_INFO);
107 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_SET_INFO);
108
109 MPIR_Info *curr_ptr;
110 char *value, *token, *savePtr = NULL;
111 int save_ordering;
112
113 curr_ptr = info->next;
114
115 while (curr_ptr) {
116 if (!strcmp(curr_ptr->key, "no_locks")) {
117 if (!strcmp(curr_ptr->value, "true"))
118 MPIDIG_WIN(win, info_args).no_locks = 1;
119 else if (!strcmp(curr_ptr->value, "false"))
120 MPIDIG_WIN(win, info_args).no_locks = 0;
121 } else if (!strcmp(curr_ptr->key, "accumulate_ordering")) {
122 save_ordering = MPIDIG_WIN(win, info_args).accumulate_ordering;
123 MPIDIG_WIN(win, info_args).accumulate_ordering = 0;
124 if (!strcmp(curr_ptr->value, "none")) {
125 /* For MPI-3, "none" means no ordering and is not default. */
126 goto next;
127 }
128
129 /* value can never be NULL. */
130 MPIR_Assert(curr_ptr->value);
131
132 value = curr_ptr->value;
133 token = (char *) strtok_r(value, ",", &savePtr);
134
135 while (token) {
136 if (!memcmp(token, "rar", 3))
137 MPIDIG_WIN(win, info_args).accumulate_ordering =
138 (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_RAR);
139 else if (!memcmp(token, "raw", 3))
140 MPIDIG_WIN(win, info_args).accumulate_ordering =
141 (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_RAW);
142 else if (!memcmp(token, "war", 3))
143 MPIDIG_WIN(win, info_args).accumulate_ordering =
144 (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_WAR);
145 else if (!memcmp(token, "waw", 3))
146 MPIDIG_WIN(win, info_args).accumulate_ordering =
147 (MPIDIG_WIN(win, info_args).accumulate_ordering | MPIDIG_ACCU_ORDER_WAW);
148 else
149 MPIR_ERR_SETANDSTMT(mpi_errno, MPI_ERR_ARG, goto fn_fail, "**info");
150
151 token = (char *) strtok_r(NULL, ",", &savePtr);
152 }
153
154 if (MPIDIG_WIN(win, info_args).accumulate_ordering == 0)
155 MPIDIG_WIN(win, info_args).accumulate_ordering = save_ordering;
156 } else if (!strcmp(curr_ptr->key, "accumulate_ops")) {
157 if (!strcmp(curr_ptr->value, "same_op"))
158 MPIDIG_WIN(win, info_args).accumulate_ops = MPIDIG_ACCU_SAME_OP;
159 else if (!strcmp(curr_ptr->value, "same_op_no_op"))
160 MPIDIG_WIN(win, info_args).accumulate_ops = MPIDIG_ACCU_SAME_OP_NO_OP;
161 } else if (!strcmp(curr_ptr->key, "same_disp_unit")) {
162 if (!strcmp(curr_ptr->value, "true"))
163 MPIDIG_WIN(win, info_args).same_disp_unit = 1;
164 else if (!strcmp(curr_ptr->value, "false"))
165 MPIDIG_WIN(win, info_args).same_disp_unit = 0;
166 } else if (!strcmp(curr_ptr->key, "same_size")) {
167 if (!strcmp(curr_ptr->value, "true"))
168 MPIDIG_WIN(win, info_args).same_size = 1;
169 else if (!strcmp(curr_ptr->value, "false"))
170 MPIDIG_WIN(win, info_args).same_size = 0;
171 } else if (!strcmp(curr_ptr->key, "alloc_shared_noncontig")) {
172 if (!strcmp(curr_ptr->value, "true"))
173 MPIDIG_WIN(win, info_args).alloc_shared_noncontig = 1;
174 else if (!strcmp(curr_ptr->value, "false"))
175 MPIDIG_WIN(win, info_args).alloc_shared_noncontig = 0;
176 } else if (!strcmp(curr_ptr->key, "alloc_shm")) {
177 if (!strcmp(curr_ptr->value, "true"))
178 MPIDIG_WIN(win, info_args).alloc_shm = 1;
179 else if (!strcmp(curr_ptr->value, "false"))
180 MPIDIG_WIN(win, info_args).alloc_shm = 0;
181 }
182 /* We allow the user to set the following atomics hint only at window init time,
183 * all future updates by win_set_info are ignored. This is because we do not
184 * have a good way to ensure all outstanding atomic ops have been completed
185 * on all processes especially in passive-target epochs. */
186 else if (is_init && !strcmp(curr_ptr->key, "which_accumulate_ops")) {
187 parse_info_accu_ops_str(curr_ptr->value,
188 &MPIDIG_WIN(win, info_args).which_accumulate_ops);
189 } else if (is_init && !strcmp(curr_ptr->key, "accumulate_noncontig_dtype")) {
190 if (!strcmp(curr_ptr->value, "true"))
191 MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype = true;
192 else if (!strcmp(curr_ptr->value, "false"))
193 MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype = false;
194 } else if (is_init && !strcmp(curr_ptr->key, "accumulate_max_bytes")) {
195 if (!strcmp(curr_ptr->value, "unlimited") || !strcmp(curr_ptr->value, "-1"))
196 MPIDIG_WIN(win, info_args).accumulate_max_bytes = -1;
197 else {
198 long max_bytes = atol(curr_ptr->value);
199 if (max_bytes >= 0)
200 MPIDIG_WIN(win, info_args).accumulate_max_bytes = max_bytes;
201 }
202 } else if (is_init && !strcmp(curr_ptr->key, "disable_shm_accumulate")) {
203 if (!strcmp(curr_ptr->value, "true"))
204 MPIDIG_WIN(win, info_args).disable_shm_accumulate = true;
205 else
206 MPIDIG_WIN(win, info_args).disable_shm_accumulate = false;
207 } else if (is_init && !strcmp(curr_ptr->key, "coll_attach")) {
208 if (!strcmp(curr_ptr->value, "true"))
209 MPIDIG_WIN(win, info_args).coll_attach = true;
210 else
211 MPIDIG_WIN(win, info_args).coll_attach = false;
212 }
213 next:
214 curr_ptr = curr_ptr->next;
215 }
216
217 fn_exit:
218 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_SET_INFO);
219 return mpi_errno;
220 fn_fail:
221 goto fn_exit;
222 }
223
win_init(MPI_Aint length,int disp_unit,MPIR_Win ** win_ptr,MPIR_Info * info,MPIR_Comm * comm_ptr,int create_flavor,int model)224 static int win_init(MPI_Aint length, int disp_unit, MPIR_Win ** win_ptr, MPIR_Info * info,
225 MPIR_Comm * comm_ptr, int create_flavor, int model)
226 {
227 int mpi_errno = MPI_SUCCESS;
228 MPIR_Win *win = (MPIR_Win *) MPIR_Handle_obj_alloc(&MPIR_Win_mem);
229 MPIDIG_win_target_t *targets = NULL;
230 MPIR_Comm *win_comm_ptr;
231
232 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_INIT);
233 MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDIG_WIN_INIT);
234
235 MPIR_ERR_CHKANDSTMT(win == NULL, mpi_errno, MPI_ERR_NO_MEM, goto fn_fail, "**nomem");
236 *win_ptr = win;
237
238 memset(&win->dev.am, 0, sizeof(MPIDIG_win_t));
239
240 /* Duplicate the original communicator here to avoid having collisions
241 * between internal collectives */
242 mpi_errno = MPIR_Comm_dup_impl(comm_ptr, NULL, &win_comm_ptr);
243 MPIR_ERR_CHECK(mpi_errno);
244
245 MPIDIG_WIN(win, targets) = targets;
246
247 win->errhandler = NULL;
248 win->base = NULL;
249 win->size = length;
250 win->disp_unit = disp_unit;
251 win->create_flavor = create_flavor;
252 win->model = model;
253 win->copyCreateFlavor = 0;
254 win->copyModel = 0;
255 win->attributes = NULL;
256 win->comm_ptr = win_comm_ptr;
257 win->copyDispUnit = 0;
258 win->copySize = 0;
259 MPIDIG_WIN(win, shared_table) = NULL;
260 MPIDIG_WIN(win, sync).assert_mode = 0;
261
262 /* Initialize the info (hint) flags per window */
263 MPIDIG_WIN(win, info_args).no_locks = 0;
264 MPIDIG_WIN(win, info_args).accumulate_ordering = (MPIDIG_ACCU_ORDER_RAR |
265 MPIDIG_ACCU_ORDER_RAW |
266 MPIDIG_ACCU_ORDER_WAR |
267 MPIDIG_ACCU_ORDER_WAW);
268 MPIDIG_WIN(win, info_args).accumulate_ops = MPIDIG_ACCU_SAME_OP_NO_OP;
269 MPIDIG_WIN(win, info_args).same_size = 0;
270 MPIDIG_WIN(win, info_args).same_disp_unit = 0;
271 MPIDIG_WIN(win, info_args).alloc_shared_noncontig = 0;
272 if (win->create_flavor == MPI_WIN_FLAVOR_ALLOCATE
273 || win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
274 MPIDIG_WIN(win, info_args).alloc_shm = 1;
275 } else {
276 MPIDIG_WIN(win, info_args).alloc_shm = 0;
277 }
278
279 /* default any op */
280 int op_index;
281 MPIDIG_WIN(win, info_args).which_accumulate_ops = 0;
282 for (op_index = 0; op_index < MPIDIG_ACCU_NUM_OP; op_index++)
283 MPIDIG_WIN(win, info_args).which_accumulate_ops |= (1 << op_index);
284 MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype = true;
285 MPIDIG_WIN(win, info_args).accumulate_max_bytes = -1;
286 MPIDIG_WIN(win, info_args).disable_shm_accumulate = false;
287 MPIDIG_WIN(win, info_args).coll_attach = false;
288
289 if ((info != NULL) && ((int *) info != (int *) MPI_INFO_NULL)) {
290 mpi_errno = win_set_info(win, info, TRUE /* is_init */);
291 MPIR_ERR_CHECK(mpi_errno);
292 }
293
294
295 MPIDIG_WIN(win, mmap_sz) = 0;
296 MPIDIG_WIN(win, mmap_addr) = NULL;
297
298 MPIR_cc_set(&MPIDIG_WIN(win, local_cmpl_cnts), 0);
299 MPIR_cc_set(&MPIDIG_WIN(win, remote_cmpl_cnts), 0);
300 MPIR_cc_set(&MPIDIG_WIN(win, remote_acc_cmpl_cnts), 0);
301
302 MPIDIG_WIN(win, win_id) = MPIDIG_generate_win_id(comm_ptr);
303 MPIDIU_map_set(MPIDI_global.win_map, MPIDIG_WIN(win, win_id), win, MPL_MEM_RMA);
304
305 /* set winattr for performance optimization at fast path:
306 * - check if comm is COMM_WORLD or dup of COMM_WORLD
307 * - check if disable_shm_accumulate hint is set
308 * - check if SAME_OP_NO_OP is set for accumulates */
309 MPIDI_WIN(win, winattr) = 0;
310
311 int comm_compare_result = MPI_UNEQUAL;
312 mpi_errno = MPIR_Comm_compare_impl(comm_ptr, MPIR_Process.comm_world, &comm_compare_result);
313 MPIR_ERR_CHECK(mpi_errno);
314
315 if (comm_compare_result == MPI_CONGRUENT || comm_compare_result == MPI_IDENT)
316 MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_DIRECT_INTRA_COMM;
317
318 update_winattr_after_set_info(win);
319
320 /* If no local processes on each node, set ACCU_NO_SHM to enable native atomics */
321 bool no_local = false, all_no_local = false;
322 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
323 if (!comm_ptr->node_comm)
324 no_local = true;
325
326 mpi_errno = MPIR_Allreduce(&no_local, &all_no_local, 1, MPI_C_BOOL,
327 MPI_LAND, comm_ptr, &errflag);
328 MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
329 if (all_no_local)
330 MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_ACCU_NO_SHM;
331
332 fn_exit:
333 MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDIG_WIN_INIT);
334 return mpi_errno;
335 fn_fail:
336 goto fn_exit;
337 }
338
win_finalize(MPIR_Win ** win_ptr)339 static int win_finalize(MPIR_Win ** win_ptr)
340 {
341 int mpi_errno = MPI_SUCCESS;
342 int all_completed = 0;
343 MPIR_Win *win = *win_ptr;
344 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_FINALIZE);
345 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_FINALIZE);
346
347 /* All local outstanding OPs should have been completed. */
348 MPIR_Assert(MPIR_cc_get(MPIDIG_WIN(win, local_cmpl_cnts)) == 0);
349 MPIR_Assert(MPIR_cc_get(MPIDIG_WIN(win, remote_cmpl_cnts)) == 0);
350 MPIR_Assert(MPIR_cc_get(MPIDIG_WIN(win, remote_acc_cmpl_cnts)) == 0);
351
352 /* Make progress till all OPs have been completed */
353 do {
354 bool all_local_completed, all_remote_completed;
355
356 /* NOTE: MPID_Win_free does not take on locks */
357 mpi_errno = MPID_Progress_test(NULL);
358 MPIR_ERR_CHECK(mpi_errno);
359
360 all_local_completed = MPIDIG_win_check_all_targets_local_completed(win);
361 all_remote_completed = MPIDIG_win_check_all_targets_remote_completed(win);
362
363 /* Local completion counter might be updated later than remote completion
364 * (at request completion), so we need to check it before release entire
365 * window. */
366 all_completed = (MPIR_cc_get(MPIDIG_WIN(win, local_cmpl_cnts)) == 0) &&
367 (MPIR_cc_get(MPIDIG_WIN(win, remote_cmpl_cnts)) == 0) &&
368 (MPIR_cc_get(MPIDIG_WIN(win, remote_acc_cmpl_cnts)) == 0) &&
369 all_local_completed && all_remote_completed;
370 } while (all_completed != 1);
371
372 mpi_errno = MPIDI_NM_mpi_win_free_hook(win);
373 MPIR_ERR_CHECK(mpi_errno);
374
375 #ifndef MPIDI_CH4_DIRECT_NETMOD
376 mpi_errno = MPIDI_SHM_mpi_win_free_hook(win);
377 MPIR_ERR_CHECK(mpi_errno);
378 #endif
379
380 MPIDIG_win_target_cleanall(win);
381 MPIDIG_win_hash_clear(win);
382
383 if (win->create_flavor == MPI_WIN_FLAVOR_ALLOCATE ||
384 win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
385 /* if more than one process on a node, we use shared memory by default */
386 if (MPIDIG_WIN(win, mmap_addr)) {
387 mpi_errno = MPIDU_shm_free(MPIDIG_WIN(win, mmap_addr));
388 MPIR_ERR_CHECK(mpi_errno);
389
390 /* if shared memory allocation fails or zero size window, free the table at allocation. */
391 MPL_free(MPIDIG_WIN(win, shared_table));
392 } else
393 MPL_free(win->base);
394 }
395
396 MPIDIU_map_erase(MPIDI_global.win_map, MPIDIG_WIN(win, win_id));
397
398 MPIR_Comm_release(win->comm_ptr);
399 MPIR_Handle_obj_free(&MPIR_Win_mem, win);
400
401 fn_exit:
402 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_FINALIZE);
403 return mpi_errno;
404 fn_fail:
405 goto fn_exit;
406 }
407
408 /* Allocate RMA window over shared memory region. Used by both win_allocate
409 * and win_allocate_shared.
410 *
411 * This routine allocates window memory region on each node from shared
412 * memory, and initializes the shared_table structure that stores each
413 * node process's size, disp_unit, and start address for shm RMA operations
414 * and query routine.*/
win_shm_alloc_impl(MPI_Aint size,int disp_unit,MPIR_Comm * comm_ptr,void ** base_ptr,MPIR_Win ** win_ptr,int shm_option)415 static int win_shm_alloc_impl(MPI_Aint size, int disp_unit, MPIR_Comm * comm_ptr, void **base_ptr,
416 MPIR_Win ** win_ptr, int shm_option)
417 {
418 int i, mpi_errno = MPI_SUCCESS;
419 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
420 MPIR_Win *win = NULL;
421 size_t total_shm_size = 0LL;
422 MPIDIG_win_shared_info_t *shared_table = NULL;
423 MPI_Aint *shm_offsets = NULL;
424 MPIR_Comm *shm_comm_ptr = comm_ptr->node_comm;
425 size_t page_sz = 0, mapsize;
426 bool symheap_mapfail_flag = false, shm_mapfail_flag = false;
427 bool symheap_flag = true, global_symheap_flag = false;
428
429 MPIR_CHKPMEM_DECL(2);
430 MPIR_CHKLMEM_DECL(1);
431 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_SHM_ALLOC_IMPL);
432 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_SHM_ALLOC_IMPL);
433
434 if (mpi_errno != MPI_SUCCESS)
435 goto fn_fail;
436
437 win = *win_ptr;
438 *base_ptr = NULL;
439
440 /* Check whether multiple processes exist on the local node. If so,
441 * we need to count the total size on a node for shared memory allocation. */
442 if (shm_comm_ptr != NULL) {
443 MPIR_T_PVAR_TIMER_START(RMA, rma_wincreate_allgather);
444 MPIR_CHKPMEM_MALLOC(MPIDIG_WIN(win, shared_table), MPIDIG_win_shared_info_t *,
445 sizeof(MPIDIG_win_shared_info_t) * shm_comm_ptr->local_size,
446 mpi_errno, "shared table", MPL_MEM_RMA);
447 shared_table = MPIDIG_WIN(win, shared_table);
448 shared_table[shm_comm_ptr->rank].size = size;
449 shared_table[shm_comm_ptr->rank].disp_unit = disp_unit;
450 shared_table[shm_comm_ptr->rank].shm_base_addr = NULL;
451
452 mpi_errno = MPIR_Allgather(MPI_IN_PLACE,
453 0,
454 MPI_DATATYPE_NULL,
455 shared_table,
456 sizeof(MPIDIG_win_shared_info_t), MPI_BYTE, shm_comm_ptr,
457 &errflag);
458 MPIR_T_PVAR_TIMER_END(RMA, rma_wincreate_allgather);
459 if (mpi_errno != MPI_SUCCESS)
460 goto fn_fail;
461
462 MPIR_CHKLMEM_MALLOC(shm_offsets, MPI_Aint *, shm_comm_ptr->local_size * sizeof(MPI_Aint),
463 mpi_errno, "shm offset", MPL_MEM_RMA);
464
465 /* No allreduce here because this is a shared memory domain
466 * and should be a relatively small number of processes
467 * and a non performance sensitive API.
468 */
469 for (i = 0; i < shm_comm_ptr->local_size; i++) {
470 shm_offsets[i] = (MPI_Aint) total_shm_size;
471 if (MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
472 total_shm_size += MPIDU_shm_get_mapsize(shared_table[i].size, &page_sz);
473 else
474 total_shm_size += shared_table[i].size;
475 }
476
477 /* if all processes give zero size on a single node window, simply return. */
478 if (total_shm_size == 0 && shm_comm_ptr->local_size == comm_ptr->local_size)
479 goto fn_no_shm;
480
481 /* if my size is not page aligned and noncontig is disabled, skip global symheap. */
482 if (size != MPIDU_shm_get_mapsize(size, &page_sz) &&
483 !MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
484 symheap_flag = false;
485 } else
486 total_shm_size = size;
487
488 /* try global symm heap only when multiple processes exist */
489 if (comm_ptr->local_size > 1) {
490 /* global symm heap can be successful only when any of the following conditions meet.
491 * Thus, we can skip unnecessary global symm heap retry based on condition check.
492 * - no shared memory node (i.e., single process per node)
493 * - size of each process on the shared memory node is page aligned,
494 * thus all process can be assigned to a page aligned start address.
495 * - user sets alloc_shared_noncontig=true, thus we can internally make
496 * the size aligned on each process. */
497 mpi_errno = MPIR_Allreduce(&symheap_flag, &global_symheap_flag, 1, MPI_C_BOOL,
498 MPI_LAND, comm_ptr, &errflag);
499 MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
500 } else
501 global_symheap_flag = false;
502
503 /* because MPI_shm follows a create & attach mode, we need to set the
504 * size of entire shared memory segment on each node as the size of
505 * each process. */
506 mapsize = MPIDU_shm_get_mapsize(total_shm_size, &page_sz);
507
508 /* first try global symmetric heap segment allocation */
509 if (global_symheap_flag) {
510 size_t my_offset = (shm_comm_ptr) ? shm_offsets[shm_comm_ptr->rank] : 0;
511 MPIDIG_WIN(win, mmap_sz) = mapsize;
512 mpi_errno =
513 MPIDU_shm_alloc_symm_all(comm_ptr, mapsize, my_offset, &MPIDIG_WIN(win, mmap_addr),
514 &symheap_mapfail_flag);
515 if (mpi_errno != MPI_SUCCESS)
516 goto fn_fail;
517
518 if (symheap_mapfail_flag) {
519 MPIDIG_WIN(win, mmap_sz) = 0;
520 MPIDIG_WIN(win, mmap_addr) = NULL;
521 }
522 }
523
524 /* if symmetric heap is disabled or fails, try normal shm segment allocation */
525 if (!global_symheap_flag || symheap_mapfail_flag) {
526 if (shm_comm_ptr != NULL && mapsize) {
527 MPIDIG_WIN(win, mmap_sz) = mapsize;
528 mpi_errno =
529 MPIDU_shm_alloc(shm_comm_ptr, mapsize, &MPIDIG_WIN(win, mmap_addr),
530 &shm_mapfail_flag);
531 if (mpi_errno != MPI_SUCCESS)
532 goto fn_fail;
533
534 if (shm_mapfail_flag) {
535 MPIDIG_WIN(win, mmap_sz) = 0;
536 MPIDIG_WIN(win, mmap_addr) = NULL;
537 }
538
539 /* throw error here if shm allocation is required but fails */
540 if (shm_option == SHM_WIN_REQUIRED)
541 MPIR_ERR_CHKANDJUMP(shm_mapfail_flag, mpi_errno, MPI_ERR_OTHER, "**alloc_shar_mem");
542 }
543
544 /* If only single process on a node or shm segment allocation fails, try malloc. */
545 if ((shm_comm_ptr == NULL || shm_mapfail_flag) && size > 0) {
546 MPIR_CHKPMEM_MALLOC(*base_ptr, void *, size, mpi_errno, "(*win_ptr)->base",
547 MPL_MEM_RMA);
548 MPL_VG_MEM_INIT(*base_ptr, size);
549 }
550 }
551
552 /* compute the base addresses of each process within the shared memory segment */
553 if (shm_comm_ptr != NULL && MPIDIG_WIN(win, mmap_addr)) {
554 char *cur_base = (char *) MPIDIG_WIN(win, mmap_addr);
555 for (i = 0; i < shm_comm_ptr->local_size; i++) {
556 if (shared_table[i].size)
557 shared_table[i].shm_base_addr = cur_base;
558 else
559 shared_table[i].shm_base_addr = NULL;
560
561 if (MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
562 cur_base += MPIDU_shm_get_mapsize(shared_table[i].size, &page_sz);
563 else
564 cur_base += shared_table[i].size;
565 }
566
567 *base_ptr = shared_table[shm_comm_ptr->rank].shm_base_addr;
568 } else if (MPIDIG_WIN(win, mmap_sz) > 0) {
569 /* if symm heap is allocated without shared memory, use the mapping address */
570 *base_ptr = MPIDIG_WIN(win, mmap_addr);
571 }
572 /* otherwise, it has already be assigned with a local memory region or NULL (zero size). */
573
574 fn_no_shm:
575 /* free shared_table if no shm segment allocated */
576 if (shared_table && !MPIDIG_WIN(win, mmap_addr)) {
577 MPL_free(MPIDIG_WIN(win, shared_table));
578 MPIDIG_WIN(win, shared_table) = NULL;
579 }
580
581 fn_exit:
582 MPIR_CHKLMEM_FREEALL();
583 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_SHM_ALLOC_IMPL);
584 return mpi_errno;
585 fn_fail:
586 MPIR_CHKPMEM_REAP();
587 goto fn_exit;
588 }
589
MPIDIG_RMA_Init_sync_pvars(void)590 int MPIDIG_RMA_Init_sync_pvars(void)
591 {
592 int mpi_errno = MPI_SUCCESS;
593 /* rma_winlock_getlocallock */
594 MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
595 MPI_DOUBLE,
596 rma_winlock_getlocallock,
597 MPI_T_VERBOSITY_MPIDEV_DETAIL,
598 MPI_T_BIND_NO_OBJECT,
599 MPIR_T_PVAR_FLAG_READONLY,
600 "RMA", "WIN_LOCK:Get local lock (in seconds)");
601
602 /* rma_wincreate_allgather */
603 MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
604 MPI_DOUBLE,
605 rma_wincreate_allgather,
606 MPI_T_VERBOSITY_MPIDEV_DETAIL,
607 MPI_T_BIND_NO_OBJECT,
608 MPIR_T_PVAR_FLAG_READONLY,
609 "RMA", "WIN_CREATE:Allgather (in seconds)");
610
611 /* rma_amhdr_set */
612 MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
613 MPI_DOUBLE,
614 rma_amhdr_set,
615 MPI_T_VERBOSITY_MPIDEV_DETAIL,
616 MPI_T_BIND_NO_OBJECT,
617 MPIR_T_PVAR_FLAG_READONLY,
618 "RMA", "Set fields in AM Handler (in seconds)");
619
620 return mpi_errno;
621 }
622
MPIDIG_mpi_win_set_info(MPIR_Win * win,MPIR_Info * info)623 int MPIDIG_mpi_win_set_info(MPIR_Win * win, MPIR_Info * info)
624 {
625 int mpi_errno = MPI_SUCCESS;
626 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
627 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_SET_INFO);
628 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_SET_INFO);
629
630 mpi_errno = win_set_info(win, info, FALSE /* is_init */);
631 MPIR_ERR_CHECK(mpi_errno);
632
633 /* Do not update winattr except for info set at window creation.
634 * Because it will change RMA's behavior which requires collective synchronization. */
635
636 mpi_errno = MPIR_Barrier(win->comm_ptr, &errflag);
637 fn_exit:
638 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_SET_INFO);
639 return mpi_errno;
640 fn_fail:
641 goto fn_exit;
642 }
643
MPIDIG_mpi_win_get_info(MPIR_Win * win,MPIR_Info ** info_p_p)644 int MPIDIG_mpi_win_get_info(MPIR_Win * win, MPIR_Info ** info_p_p)
645 {
646 int mpi_errno = MPI_SUCCESS;
647 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_GET_INFO);
648 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_GET_INFO);
649
650 mpi_errno = MPIR_Info_alloc(info_p_p);
651 if (MPI_SUCCESS != mpi_errno) {
652 *info_p_p = NULL;
653 MPIR_ERR_POP(mpi_errno);
654 }
655
656 if (MPIDIG_WIN(win, info_args).no_locks)
657 mpi_errno = MPIR_Info_set_impl(*info_p_p, "no_locks", "true");
658 else
659 mpi_errno = MPIR_Info_set_impl(*info_p_p, "no_locks", "false");
660
661 MPIR_ERR_CHECK(mpi_errno);
662
663 {
664 #define BUFSIZE 32
665 char buf[BUFSIZE];
666 int c = 0;
667
668 MPL_COMPILE_TIME_ASSERT(BUFSIZE >= 16); /* maximum: strlen("rar,raw,war,waw") + 1 */
669
670 if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_RAR)
671 c += snprintf(buf, BUFSIZE, "rar");
672
673 if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_RAW)
674 c += snprintf(buf + c, BUFSIZE - c, "%sraw", (c > 0) ? "," : "");
675
676 if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_WAR)
677 c += snprintf(buf + c, BUFSIZE - c, "%swar", (c > 0) ? "," : "");
678
679 if (MPIDIG_WIN(win, info_args).accumulate_ordering & MPIDIG_ACCU_ORDER_WAW)
680 c += snprintf(buf + c, BUFSIZE - c, "%swaw", (c > 0) ? "," : "");
681
682 if (c == 0) {
683 strncpy(buf, "none", BUFSIZE);
684 }
685
686 mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_ordering", buf);
687 MPIR_ERR_CHECK(mpi_errno);
688 #undef BUFSIZE
689 }
690
691 if (MPIDIG_WIN(win, info_args).accumulate_ops == MPIDIG_ACCU_SAME_OP)
692 mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_ops", "same_op");
693 else
694 mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_ops", "same_op_no_op");
695
696 MPIR_ERR_CHECK(mpi_errno);
697
698 if (MPIDIG_WIN(win, info_args).alloc_shared_noncontig)
699 mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shared_noncontig", "true");
700 else
701 mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shared_noncontig", "false");
702
703 MPIR_ERR_CHECK(mpi_errno);
704
705 if (MPIDIG_WIN(win, info_args).same_size)
706 mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_size", "true");
707 else
708 mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_size", "false");
709
710 MPIR_ERR_CHECK(mpi_errno);
711
712 if (MPIDIG_WIN(win, info_args).same_disp_unit)
713 mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_disp_unit", "true");
714 else
715 mpi_errno = MPIR_Info_set_impl(*info_p_p, "same_disp_unit", "false");
716
717 MPIR_ERR_CHECK(mpi_errno);
718
719 if (MPIDIG_WIN(win, info_args).alloc_shm)
720 mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shm", "true");
721 else
722 mpi_errno = MPIR_Info_set_impl(*info_p_p, "alloc_shm", "false");
723
724 MPIR_ERR_CHECK(mpi_errno);
725
726 { /* Keep buf as a local variable for which_accumulate_ops key. */
727 char buf[128];
728 get_info_accu_ops_str(MPIDIG_WIN(win, info_args).which_accumulate_ops, &buf[0],
729 sizeof(buf));
730 mpi_errno = MPIR_Info_set_impl(*info_p_p, "which_accumulate_ops", buf);
731 MPIR_ERR_CHECK(mpi_errno);
732 }
733
734 if (MPIDIG_WIN(win, info_args).accumulate_noncontig_dtype)
735 mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_noncontig_dtype", "true");
736 else
737 mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_noncontig_dtype", "false");
738 MPIR_ERR_CHECK(mpi_errno);
739
740 if (MPIDIG_WIN(win, info_args).accumulate_max_bytes >= 0) {
741 char buf[32]; /* make sure 64-bit integer can fit */
742 snprintf(buf, sizeof(buf), "%ld", (long) MPIDIG_WIN(win, info_args).accumulate_max_bytes);
743 mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_max_bytes", buf);
744 } else
745 mpi_errno = MPIR_Info_set_impl(*info_p_p, "accumulate_max_bytes", "unlimited");
746 MPIR_ERR_CHECK(mpi_errno);
747
748 if (MPIDIG_WIN(win, info_args).disable_shm_accumulate)
749 mpi_errno = MPIR_Info_set_impl(*info_p_p, "disable_shm_accumulate", "true");
750 else
751 mpi_errno = MPIR_Info_set_impl(*info_p_p, "disable_shm_accumulate", "false");
752 MPIR_ERR_CHECK(mpi_errno);
753
754 if (MPIDIG_WIN(win, info_args).coll_attach)
755 mpi_errno = MPIR_Info_set_impl(*info_p_p, "coll_attach", "true");
756 else
757 mpi_errno = MPIR_Info_set_impl(*info_p_p, "coll_attach", "false");
758 MPIR_ERR_CHECK(mpi_errno);
759
760 fn_exit:
761 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_GET_INFO);
762 return mpi_errno;
763 fn_fail:
764 if (*info_p_p != NULL) {
765 MPIR_Info_free(*info_p_p);
766 *info_p_p = NULL;
767 }
768 goto fn_exit;
769 }
770
MPIDIG_mpi_win_free(MPIR_Win ** win_ptr)771 int MPIDIG_mpi_win_free(MPIR_Win ** win_ptr)
772 {
773 int mpi_errno = MPI_SUCCESS;
774 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
775 MPIR_Win *win = *win_ptr;
776 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_FREE);
777 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_FREE);
778
779 MPIDIG_ACCESS_EPOCH_CHECK_NONE(win, mpi_errno, return mpi_errno);
780 MPIDIG_EXPOSURE_EPOCH_CHECK_NONE(win, mpi_errno, return mpi_errno);
781
782 mpi_errno = MPIR_Barrier(win->comm_ptr, &errflag);
783 if (mpi_errno != MPI_SUCCESS)
784 goto fn_fail;
785
786 win_finalize(win_ptr);
787 fn_exit:
788 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_FREE);
789 return mpi_errno;
790 fn_fail:
791 goto fn_exit;
792 }
793
MPIDIG_mpi_win_create(void * base,MPI_Aint length,int disp_unit,MPIR_Info * info,MPIR_Comm * comm_ptr,MPIR_Win ** win_ptr)794 int MPIDIG_mpi_win_create(void *base, MPI_Aint length, int disp_unit, MPIR_Info * info,
795 MPIR_Comm * comm_ptr, MPIR_Win ** win_ptr)
796 {
797 int mpi_errno = MPI_SUCCESS;
798 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
799 MPIR_Win *win;
800
801 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_CREATE);
802 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_CREATE);
803
804 mpi_errno = win_init(length, disp_unit, win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_CREATE,
805 MPI_WIN_UNIFIED);
806
807 if (mpi_errno != MPI_SUCCESS)
808 goto fn_fail;
809
810 win = *win_ptr;
811 win->base = base;
812
813 mpi_errno = MPIDI_NM_mpi_win_create_hook(win);
814 MPIR_ERR_CHECK(mpi_errno);
815
816 #ifndef MPIDI_CH4_DIRECT_NETMOD
817 mpi_errno = MPIDI_SHM_mpi_win_create_hook(win);
818 MPIR_ERR_CHECK(mpi_errno);
819 #endif
820
821 mpi_errno = MPIR_Barrier(win->comm_ptr, &errflag);
822
823 if (mpi_errno != MPI_SUCCESS)
824 goto fn_fail;
825
826 fn_exit:
827 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_CREATE);
828 return mpi_errno;
829 fn_fail:
830 goto fn_exit;
831 }
832
MPIDIG_mpi_win_attach(MPIR_Win * win,void * base,MPI_Aint size)833 int MPIDIG_mpi_win_attach(MPIR_Win * win, void *base, MPI_Aint size)
834 {
835 int mpi_errno = MPI_SUCCESS;
836 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_ATTACH);
837 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_ATTACH);
838
839 MPIR_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_DYNAMIC), mpi_errno,
840 MPI_ERR_RMA_FLAVOR, goto fn_fail, "**rmaflavor");
841
842 mpi_errno = MPIDI_NM_mpi_win_attach_hook(win, base, size);
843 MPIR_ERR_CHECK(mpi_errno);
844
845 #ifndef MPIDI_CH4_DIRECT_NETMOD
846 mpi_errno = MPIDI_SHM_mpi_win_attach_hook(win, base, size);
847 MPIR_ERR_CHECK(mpi_errno);
848 #endif
849
850 fn_exit:
851 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_ATTACH);
852 return mpi_errno;
853 fn_fail:
854 goto fn_exit;
855 }
856
MPIDIG_mpi_win_allocate_shared(MPI_Aint size,int disp_unit,MPIR_Info * info_ptr,MPIR_Comm * comm_ptr,void ** base_ptr,MPIR_Win ** win_ptr)857 int MPIDIG_mpi_win_allocate_shared(MPI_Aint size, int disp_unit, MPIR_Info * info_ptr,
858 MPIR_Comm * comm_ptr, void **base_ptr, MPIR_Win ** win_ptr)
859 {
860 int mpi_errno = MPI_SUCCESS;
861 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
862 MPIR_Win *win = NULL;
863 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE_SHARED);
864 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE_SHARED);
865
866 mpi_errno = win_init(size, disp_unit, win_ptr, info_ptr, comm_ptr, MPI_WIN_FLAVOR_SHARED,
867 MPI_WIN_UNIFIED);
868 MPIR_ERR_CHECK(mpi_errno);
869
870 mpi_errno = win_shm_alloc_impl(size, disp_unit, comm_ptr, base_ptr, win_ptr, SHM_WIN_REQUIRED);
871 MPIR_ERR_CHECK(mpi_errno);
872
873 win = *win_ptr;
874 win->base = *base_ptr;
875 win->size = size;
876
877 mpi_errno = MPIDI_NM_mpi_win_allocate_shared_hook(win);
878 MPIR_ERR_CHECK(mpi_errno);
879
880 #ifndef MPIDI_CH4_DIRECT_NETMOD
881 mpi_errno = MPIDI_SHM_mpi_win_allocate_shared_hook(win);
882 MPIR_ERR_CHECK(mpi_errno);
883 #endif
884
885 mpi_errno = MPIR_Barrier(comm_ptr, &errflag);
886
887 fn_exit:
888 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE_SHARED);
889 return mpi_errno;
890 fn_fail:
891 if (win_ptr)
892 win_finalize(win_ptr);
893 goto fn_exit;
894 }
895
MPIDIG_mpi_win_detach(MPIR_Win * win,const void * base)896 int MPIDIG_mpi_win_detach(MPIR_Win * win, const void *base)
897 {
898 int mpi_errno = MPI_SUCCESS;
899 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_DETACH);
900 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_DETACH);
901 MPIR_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_DYNAMIC), mpi_errno,
902 MPI_ERR_RMA_FLAVOR, goto fn_fail, "**rmaflavor");
903
904 mpi_errno = MPIDI_NM_mpi_win_detach_hook(win, base);
905 MPIR_ERR_CHECK(mpi_errno);
906
907 #ifndef MPIDI_CH4_DIRECT_NETMOD
908 mpi_errno = MPIDI_SHM_mpi_win_detach_hook(win, base);
909 MPIR_ERR_CHECK(mpi_errno);
910 #endif
911
912 fn_exit:
913 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_DETACH);
914 return mpi_errno;
915 fn_fail:
916 goto fn_exit;
917 }
918
MPIDIG_mpi_win_allocate(MPI_Aint size,int disp_unit,MPIR_Info * info,MPIR_Comm * comm,void * baseptr,MPIR_Win ** win_ptr)919 int MPIDIG_mpi_win_allocate(MPI_Aint size, int disp_unit, MPIR_Info * info, MPIR_Comm * comm,
920 void *baseptr, MPIR_Win ** win_ptr)
921 {
922 int mpi_errno = MPI_SUCCESS;
923 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
924 MPIR_Win *win;
925 void **base_ptr = (void **) baseptr;
926
927 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE);
928 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE);
929
930 mpi_errno = win_init(size, disp_unit, win_ptr, info, comm, MPI_WIN_FLAVOR_ALLOCATE,
931 MPI_WIN_UNIFIED);
932
933 if (mpi_errno != MPI_SUCCESS)
934 goto fn_fail;
935
936 mpi_errno = win_shm_alloc_impl(size, disp_unit, comm, base_ptr, win_ptr, SHM_WIN_OPTIONAL);
937 if (mpi_errno != MPI_SUCCESS)
938 goto fn_fail;
939
940 win = *win_ptr;
941 win->base = *(void **) baseptr;
942 win->size = size;
943
944 mpi_errno = MPIDI_NM_mpi_win_allocate_hook(win);
945 MPIR_ERR_CHECK(mpi_errno);
946
947 #ifndef MPIDI_CH4_DIRECT_NETMOD
948 mpi_errno = MPIDI_SHM_mpi_win_allocate_hook(win);
949 MPIR_ERR_CHECK(mpi_errno);
950 #endif
951
952 mpi_errno = MPIR_Barrier(comm, &errflag);
953
954 if (mpi_errno != MPI_SUCCESS)
955 goto fn_fail;
956
957 fn_exit:
958 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_ALLOCATE);
959 return mpi_errno;
960 fn_fail:
961 if (win_ptr)
962 win_finalize(win_ptr);
963 goto fn_exit;
964 }
965
MPIDIG_mpi_win_create_dynamic(MPIR_Info * info,MPIR_Comm * comm,MPIR_Win ** win_ptr)966 int MPIDIG_mpi_win_create_dynamic(MPIR_Info * info, MPIR_Comm * comm, MPIR_Win ** win_ptr)
967 {
968 int mpi_errno = MPI_SUCCESS;
969 int rc = MPI_SUCCESS;
970 MPIR_Errflag_t errflag = MPIR_ERR_NONE;
971
972 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_WIN_CREATE_DYNAMIC);
973 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_WIN_CREATE_DYNAMIC);
974
975 MPIR_Win *win;
976
977 rc = win_init(0, 1, win_ptr, info, comm, MPI_WIN_FLAVOR_DYNAMIC, MPI_WIN_UNIFIED);
978
979 if (rc != MPI_SUCCESS)
980 goto fn_fail;
981
982 win = *win_ptr;
983 win->base = MPI_BOTTOM;
984
985 mpi_errno = MPIDI_NM_mpi_win_create_dynamic_hook(win);
986 MPIR_ERR_CHECK(mpi_errno);
987
988 #ifndef MPIDI_CH4_DIRECT_NETMOD
989 mpi_errno = MPIDI_SHM_mpi_win_create_dynamic_hook(win);
990 MPIR_ERR_CHECK(mpi_errno);
991 #endif
992
993 mpi_errno = MPIR_Barrier(comm, &errflag);
994
995 fn_exit:
996 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_WIN_CREATE_DYNAMIC);
997 return mpi_errno;
998 fn_fail:
999 goto fn_exit;
1000 }
1001