1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpiimpl.h"
7 #include "mpicomm.h"
8 #include "mpir_info.h"  /* MPIR_Info_free */
9 
10 #include "utlist.h"
11 #include "uthash.h"
12 
13 /* This is the utility file for comm that contains the basic comm items
14    and storage management */
15 #ifndef MPID_COMM_PREALLOC
16 #define MPID_COMM_PREALLOC 8
17 #endif
18 
19 /* Preallocated comm objects */
20 /* initialized in initthread.c */
21 MPIR_Comm MPIR_Comm_builtin[MPIR_COMM_N_BUILTIN];
22 MPIR_Comm MPIR_Comm_direct[MPID_COMM_PREALLOC];
23 
24 MPIR_Object_alloc_t MPIR_Comm_mem = {
25     0,
26     0,
27     0,
28     0,
29     MPIR_COMM,
30     sizeof(MPIR_Comm),
31     MPIR_Comm_direct,
32     MPID_COMM_PREALLOC,
33     NULL
34 };
35 
36 /* Communicator creation functions */
37 struct MPIR_Commops *MPIR_Comm_fns = NULL;
38 static int MPIR_Comm_commit_internal(MPIR_Comm * comm);
39 
40 /* Communicator hint functions */
41 /* For balance of simplicity and feature, we'll internally use integers for both keys
42  * and values, and provide facilities to translate from and to string-based infos.
43  */
44 
45 struct MPIR_HINT {
46     const char *key;
47     MPIR_Comm_hint_fn_t fn;
48     int type;
49     int attr;                   /* e.g. whether this key is local */
50 };
51 static struct MPIR_HINT MPIR_comm_hint_list[MPIR_COMM_HINT_MAX];
52 static int next_comm_hint_index = MPIR_COMM_HINT_PREDEFINED_COUNT;
53 
MPIR_Comm_register_hint(int idx,const char * hint_key,MPIR_Comm_hint_fn_t fn,int type,int attr)54 int MPIR_Comm_register_hint(int idx, const char *hint_key, MPIR_Comm_hint_fn_t fn,
55                             int type, int attr)
56 {
57     if (idx == 0) {
58         idx = next_comm_hint_index;
59         next_comm_hint_index++;
60         MPIR_Assert(idx < MPIR_COMM_HINT_MAX);
61     } else {
62         MPIR_Assert(idx > 0 && idx < MPIR_COMM_HINT_PREDEFINED_COUNT);
63     }
64     MPIR_comm_hint_list[idx] = (struct MPIR_HINT) {
65     hint_key, fn, type, attr};
66     return idx;
67 }
68 
parse_string_value(const char * s,int type,int * val)69 static int parse_string_value(const char *s, int type, int *val)
70 {
71     if (type == MPIR_COMM_HINT_TYPE_BOOL) {
72         if (strcmp(s, "true") == 0) {
73             *val = 1;
74         } else if (strcmp(s, "false") == 0) {
75             *val = 0;
76         } else {
77             *val = atoi(s);
78         }
79     } else if (type == MPIR_COMM_HINT_TYPE_INT) {
80         *val = atoi(s);
81     } else {
82         return -1;
83     }
84     return 0;
85 }
86 
get_string_value(char * s,int type,int val)87 static int get_string_value(char *s, int type, int val)
88 {
89     if (type == MPIR_COMM_HINT_TYPE_BOOL) {
90         strncpy(s, val ? "true" : "false", MPI_MAX_INFO_VAL);
91     } else if (type == MPIR_COMM_HINT_TYPE_INT) {
92         MPL_snprintf(s, MPI_MAX_INFO_VAL, "%d", val);
93     } else {
94         return -1;
95     }
96     return 0;
97 }
98 
99 /* Hints are stored as hints array inside MPIR_Comm.
100  * All hints are initialized to zero. Communitcator creation hook can be used to
101  * to customize initialization value (make sure only do that when the value is zero
102  * or risk resetting user hints).
103  * If the hint is registered with callback function, it can be used for customization
104  * at both creation time and run-time.
105  */
MPII_Comm_set_hints(MPIR_Comm * comm_ptr,MPIR_Info * info)106 int MPII_Comm_set_hints(MPIR_Comm * comm_ptr, MPIR_Info * info)
107 {
108     MPIR_Info *curr_info;
109     LL_FOREACH(info, curr_info) {
110         if (curr_info->key == NULL)
111             continue;
112         for (int i = 0; i < next_comm_hint_index; i++) {
113             if (MPIR_comm_hint_list[i].key &&
114                 strcmp(curr_info->key, MPIR_comm_hint_list[i].key) == 0) {
115                 int val;
116                 int ret = parse_string_value(curr_info->value, MPIR_comm_hint_list[i].type, &val);
117                 if (ret == 0) {
118                     if (MPIR_comm_hint_list[i].fn) {
119                         MPIR_comm_hint_list[i].fn(comm_ptr, i, val);
120                     } else {
121                         comm_ptr->hints[i] = val;
122                     }
123                 }
124             }
125         }
126     }
127     /* FIXME: run collective to ensure hints consistency */
128     return MPI_SUCCESS;
129 }
130 
MPII_Comm_get_hints(MPIR_Comm * comm_ptr,MPIR_Info * info)131 int MPII_Comm_get_hints(MPIR_Comm * comm_ptr, MPIR_Info * info)
132 {
133     int mpi_errno = MPI_SUCCESS;
134 
135     char hint_val_str[MPI_MAX_INFO_VAL];
136     for (int i = 0; i < next_comm_hint_index; i++) {
137         if (MPIR_comm_hint_list[i].key) {
138             get_string_value(hint_val_str, MPIR_comm_hint_list[i].type, comm_ptr->hints[i]);
139             mpi_errno = MPIR_Info_set_impl(info, MPIR_comm_hint_list[i].key, hint_val_str);
140             MPIR_ERR_CHECK(mpi_errno);
141         }
142     }
143 
144   fn_exit:
145     return mpi_errno;
146   fn_fail:
147     goto fn_exit;
148 }
149 
MPII_Comm_check_hints(MPIR_Comm * comm)150 int MPII_Comm_check_hints(MPIR_Comm * comm)
151 {
152     /* for all non-local hints and non-zero hint values, run collective
153      * to check whether they are equal across the communicator */
154     /* TODO */
155     return MPI_SUCCESS;
156 }
157 
MPIR_Comm_hint_init(void)158 void MPIR_Comm_hint_init(void)
159 {
160     MPIR_Comm_register_hint(MPIR_COMM_HINT_NO_ANY_TAG, "mpi_assert_no_any_tag",
161                             NULL, MPIR_COMM_HINT_TYPE_BOOL, 0);
162     MPIR_Comm_register_hint(MPIR_COMM_HINT_NO_ANY_SOURCE, "mpi_assert_no_any_source",
163                             NULL, MPIR_COMM_HINT_TYPE_BOOL, 0);
164     MPIR_Comm_register_hint(MPIR_COMM_HINT_EXACT_LENGTH, "mpi_assert_exact_length",
165                             NULL, MPIR_COMM_HINT_TYPE_BOOL, 0);
166     MPIR_Comm_register_hint(MPIR_COMM_HINT_ALLOW_OVERTAKING, "mpi_assert_allow_overtaking",
167                             NULL, MPIR_COMM_HINT_TYPE_BOOL, 0);
168 }
169 
170 /* FIXME :
171    Reusing context ids can lead to a race condition if (as is desirable)
172    MPI_Comm_free does not include a barrier.  Consider the following:
173    Process A frees the communicator.
174    Process A creates a new communicator, reusing the just released id
175    Process B sends a message to A on the old communicator.
176    Process A receives the message, and believes that it belongs to the
177    new communicator.
178    Process B then cancels the message, and frees the communicator.
179 
180    The likelihood of this happening can be reduced by introducing a gap
181    between when a context id is released and when it is reused.  An alternative
182    is to use an explicit message (in the implementation of MPI_Comm_free)
183    to indicate that a communicator is being freed; this will often require
184    less communication than a barrier in MPI_Comm_free, and will ensure that
185    no messages are later sent to the same communicator (we may also want to
186    have a similar check when building fault-tolerant versions of MPI).
187  */
188 
189 /* Zeroes most non-handle fields in a communicator, as well as initializing any
190  * other special fields, such as a per-object mutex.  Also defaults the
191  * reference count to 1, under the assumption that the caller holds a reference
192  * to it.
193  *
194  * !!! The resulting struct is _not_ ready for communication !!! */
MPII_Comm_init(MPIR_Comm * comm_p)195 int MPII_Comm_init(MPIR_Comm * comm_p)
196 {
197     int mpi_errno = MPI_SUCCESS;
198 
199     MPIR_Object_set_ref(comm_p, 1);
200 
201     /* initialize local and remote sizes to -1 to allow other parts of
202      * the stack to detect errors more easily */
203     comm_p->local_size = -1;
204     comm_p->remote_size = -1;
205 
206     /* Clear many items (empty means to use the default; some of these
207      * may be overridden within the upper-level communicator initialization) */
208     comm_p->errhandler = NULL;
209     comm_p->attributes = NULL;
210     comm_p->remote_group = NULL;
211     comm_p->local_group = NULL;
212     comm_p->topo_fns = NULL;
213     comm_p->name[0] = '\0';
214     comm_p->seq = 0;    /* default to 0, to be updated at Comm_commit */
215     comm_p->tainted = 0;
216     memset(comm_p->hints, 0, sizeof(comm_p->hints));
217 
218     comm_p->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__FLAT;
219     comm_p->node_comm = NULL;
220     comm_p->node_roots_comm = NULL;
221     comm_p->intranode_table = NULL;
222     comm_p->internode_table = NULL;
223 
224     /* abstractions bleed a bit here... :(*/
225     comm_p->next_sched_tag = MPIR_FIRST_NBC_TAG;
226 
227     /* Initialize the revoked flag as false */
228     comm_p->revoked = 0;
229     comm_p->mapper_head = NULL;
230     comm_p->mapper_tail = NULL;
231 
232 #if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
233     {
234         int thr_err;
235         MPID_Thread_mutex_create(&MPIR_THREAD_POBJ_COMM_MUTEX(comm_p), &thr_err);
236         MPIR_Assert(thr_err == 0);
237     }
238 #endif
239     /* Fields not set include context_id, remote and local size, and
240      * kind, since different communicator construction routines need
241      * different values */
242     return mpi_errno;
243 }
244 
245 
246 /*
247     Create a communicator structure and perform basic initialization
248     (mostly clearing fields and updating the reference count).
249  */
MPIR_Comm_create(MPIR_Comm ** newcomm_ptr)250 int MPIR_Comm_create(MPIR_Comm ** newcomm_ptr)
251 {
252     int mpi_errno = MPI_SUCCESS;
253     MPIR_Comm *newptr;
254     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE);
255 
256     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE);
257 
258     newptr = (MPIR_Comm *) MPIR_Handle_obj_alloc(&MPIR_Comm_mem);
259     MPIR_ERR_CHKANDJUMP(!newptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
260 
261     *newcomm_ptr = newptr;
262 
263     mpi_errno = MPII_Comm_init(newptr);
264     MPIR_ERR_CHECK(mpi_errno);
265 
266     /* Insert this new communicator into the list of known communicators.
267      * Make this conditional on debugger support to match the test in
268      * MPIR_Comm_release . */
269     MPII_COMML_REMEMBER(newptr);
270 
271   fn_fail:
272     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE);
273 
274     return mpi_errno;
275 }
276 
277 /* Create a local intra communicator from the local group of the
278    specified intercomm. */
279 /* FIXME this is an alternative constructor that doesn't use MPIR_Comm_create! */
MPII_Setup_intercomm_localcomm(MPIR_Comm * intercomm_ptr)280 int MPII_Setup_intercomm_localcomm(MPIR_Comm * intercomm_ptr)
281 {
282     MPIR_Comm *localcomm_ptr;
283     int mpi_errno = MPI_SUCCESS;
284     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
285 
286     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
287 
288     localcomm_ptr = (MPIR_Comm *) MPIR_Handle_obj_alloc(&MPIR_Comm_mem);
289     MPIR_ERR_CHKANDJUMP(!localcomm_ptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
290 
291     /* get sensible default values for most fields (usually zeros) */
292     mpi_errno = MPII_Comm_init(localcomm_ptr);
293     MPIR_ERR_CHECK(mpi_errno);
294 
295     /* use the parent intercomm's recv ctx as the basis for our ctx */
296     localcomm_ptr->recvcontext_id =
297         MPIR_CONTEXT_SET_FIELD(IS_LOCALCOMM, intercomm_ptr->recvcontext_id, 1);
298     localcomm_ptr->context_id = localcomm_ptr->recvcontext_id;
299 
300     MPL_DBG_MSG_FMT(MPIR_DBG_COMM, TYPICAL,
301                     (MPL_DBG_FDEST,
302                      "setup_intercomm_localcomm ic=%p ic->context_id=%d ic->recvcontext_id=%d lc->recvcontext_id=%d",
303                      intercomm_ptr, intercomm_ptr->context_id, intercomm_ptr->recvcontext_id,
304                      localcomm_ptr->recvcontext_id));
305 
306     /* Save the kind of the communicator */
307     localcomm_ptr->comm_kind = MPIR_COMM_KIND__INTRACOMM;
308 
309     /* Set the sizes and ranks */
310     localcomm_ptr->remote_size = intercomm_ptr->local_size;
311     localcomm_ptr->local_size = intercomm_ptr->local_size;
312     localcomm_ptr->rank = intercomm_ptr->rank;
313 
314     MPIR_Comm_map_dup(localcomm_ptr, intercomm_ptr, MPIR_COMM_MAP_DIR__L2L);
315 
316     /* TODO More advanced version: if the group is available, dup it by
317      * increasing the reference count instead of recreating it later */
318     /* FIXME  : No local functions for the topology routines */
319 
320     intercomm_ptr->local_comm = localcomm_ptr;
321 
322     /* sets up the SMP-aware sub-communicators and tables */
323     /* This routine maybe used inside MPI_Comm_idup, so we can't synchronize
324      * seq using blocking collectives, thus mark as tainted. */
325     localcomm_ptr->tainted = 1;
326     mpi_errno = MPIR_Comm_commit(localcomm_ptr);
327     MPIR_ERR_CHECK(mpi_errno);
328 
329   fn_fail:
330     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
331 
332     return mpi_errno;
333 }
334 
MPIR_Comm_map_irregular(MPIR_Comm * newcomm,MPIR_Comm * src_comm,int * src_mapping,int src_mapping_size,MPIR_Comm_map_dir_t dir,MPIR_Comm_map_t ** map)335 int MPIR_Comm_map_irregular(MPIR_Comm * newcomm, MPIR_Comm * src_comm,
336                             int *src_mapping, int src_mapping_size,
337                             MPIR_Comm_map_dir_t dir, MPIR_Comm_map_t ** map)
338 {
339     int mpi_errno = MPI_SUCCESS;
340     MPIR_Comm_map_t *mapper;
341     MPIR_CHKPMEM_DECL(3);
342     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
343 
344     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
345 
346     MPIR_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper",
347                         MPL_MEM_COMM);
348 
349     mapper->type = MPIR_COMM_MAP_TYPE__IRREGULAR;
350     mapper->src_comm = src_comm;
351     mapper->dir = dir;
352     mapper->src_mapping_size = src_mapping_size;
353 
354     if (src_mapping) {
355         mapper->src_mapping = src_mapping;
356         mapper->free_mapping = 0;
357     } else {
358         MPIR_CHKPMEM_MALLOC(mapper->src_mapping, int *,
359                             src_mapping_size * sizeof(int), mpi_errno, "mapper mapping",
360                             MPL_MEM_COMM);
361         mapper->free_mapping = 1;
362     }
363 
364     mapper->next = NULL;
365 
366     LL_APPEND(newcomm->mapper_head, newcomm->mapper_tail, mapper);
367 
368     if (map)
369         *map = mapper;
370 
371   fn_exit:
372     MPIR_CHKPMEM_COMMIT();
373     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
374     return mpi_errno;
375   fn_fail:
376     MPIR_CHKPMEM_REAP();
377     goto fn_exit;
378 }
379 
MPIR_Comm_map_dup(MPIR_Comm * newcomm,MPIR_Comm * src_comm,MPIR_Comm_map_dir_t dir)380 int MPIR_Comm_map_dup(MPIR_Comm * newcomm, MPIR_Comm * src_comm, MPIR_Comm_map_dir_t dir)
381 {
382     int mpi_errno = MPI_SUCCESS;
383     MPIR_Comm_map_t *mapper;
384     MPIR_CHKPMEM_DECL(1);
385     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
386 
387     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
388 
389     MPIR_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper",
390                         MPL_MEM_COMM);
391 
392     mapper->type = MPIR_COMM_MAP_TYPE__DUP;
393     mapper->src_comm = src_comm;
394     mapper->dir = dir;
395 
396     mapper->next = NULL;
397 
398     LL_APPEND(newcomm->mapper_head, newcomm->mapper_tail, mapper);
399 
400   fn_exit:
401     MPIR_CHKPMEM_COMMIT();
402     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
403     return mpi_errno;
404   fn_fail:
405     MPIR_CHKPMEM_REAP();
406     goto fn_exit;
407 }
408 
409 
MPIR_Comm_map_free(MPIR_Comm * comm)410 int MPIR_Comm_map_free(MPIR_Comm * comm)
411 {
412     int mpi_errno = MPI_SUCCESS;
413     MPIR_Comm_map_t *mapper, *tmp;
414     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_FREE);
415 
416     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_FREE);
417 
418     for (mapper = comm->mapper_head; mapper;) {
419         tmp = mapper->next;
420         if (mapper->type == MPIR_COMM_MAP_TYPE__IRREGULAR && mapper->free_mapping)
421             MPL_free(mapper->src_mapping);
422         MPL_free(mapper);
423         mapper = tmp;
424     }
425     comm->mapper_head = NULL;
426 
427     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_FREE);
428     return mpi_errno;
429 }
430 
get_node_count(MPIR_Comm * comm,int * node_count)431 static int get_node_count(MPIR_Comm * comm, int *node_count)
432 {
433     int mpi_errno = MPI_SUCCESS;
434     struct uniq_nodes {
435         int id;
436         UT_hash_handle hh;
437     } *node_list = NULL;
438     struct uniq_nodes *s, *tmp;
439 
440     if (comm->comm_kind != MPIR_COMM_KIND__INTRACOMM) {
441         *node_count = comm->local_size;
442         goto fn_exit;
443     } else if (comm->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__NODE) {
444         *node_count = 1;
445         goto fn_exit;
446     } else if (comm->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__NODE_ROOTS) {
447         *node_count = comm->local_size;
448         goto fn_exit;
449     }
450 
451     /* go through the list of ranks and add the unique ones to the
452      * node_list array */
453     for (int i = 0; i < comm->local_size; i++) {
454         int node;
455 
456         mpi_errno = MPID_Get_node_id(comm, i, &node);
457         MPIR_ERR_CHECK(mpi_errno);
458 
459         HASH_FIND_INT(node_list, &node, s);
460         if (s == NULL) {
461             s = (struct uniq_nodes *) MPL_malloc(sizeof(struct uniq_nodes), MPL_MEM_COLL);
462             MPIR_Assert(s);
463             s->id = node;
464             HASH_ADD_INT(node_list, id, s, MPL_MEM_COLL);
465         }
466     }
467 
468     /* the final size of our hash table is our node count */
469     *node_count = HASH_COUNT(node_list);
470 
471     /* free up everything */
472     HASH_ITER(hh, node_list, s, tmp) {
473         HASH_DEL(node_list, s);
474         MPL_free(s);
475     }
476 
477   fn_exit:
478     return mpi_errno;
479   fn_fail:
480     goto fn_exit;
481 }
482 
MPIR_Comm_commit_internal(MPIR_Comm * comm)483 static int MPIR_Comm_commit_internal(MPIR_Comm * comm)
484 {
485     int mpi_errno = MPI_SUCCESS;
486     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT_INTERNAL);
487     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COMMIT_INTERNAL);
488 
489     /* Notify device of communicator creation */
490     mpi_errno = MPID_Comm_commit_pre_hook(comm);
491     MPIR_ERR_CHECK(mpi_errno);
492 
493     mpi_errno = get_node_count(comm, &comm->node_count);
494     MPIR_ERR_CHECK(mpi_errno);
495 
496     MPIR_Comm_map_free(comm);
497 
498   fn_exit:
499     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COMMIT_INTERNAL);
500     return mpi_errno;
501   fn_fail:
502     goto fn_exit;
503 }
504 
MPIR_Comm_create_subcomms(MPIR_Comm * comm)505 int MPIR_Comm_create_subcomms(MPIR_Comm * comm)
506 {
507     int mpi_errno = MPI_SUCCESS;
508     int num_local = -1, num_external = -1;
509     int local_rank = -1, external_rank = -1;
510     int *local_procs = NULL, *external_procs = NULL;
511 
512     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_SUBCOMMS);
513     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_SUBCOMMS);
514 
515     MPIR_Assert(comm->node_comm == NULL);
516     MPIR_Assert(comm->node_roots_comm == NULL);
517 
518     mpi_errno = MPIR_Find_local(comm, &num_local, &local_rank, &local_procs,
519                                 &comm->intranode_table);
520     /* --BEGIN ERROR HANDLING-- */
521     if (mpi_errno) {
522         if (MPIR_Err_is_fatal(mpi_errno))
523             MPIR_ERR_POP(mpi_errno);
524 
525         /* Non-fatal errors simply mean that this communicator will not have
526          * any node awareness.  Node-aware collectives are an optimization. */
527         MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "MPIR_Find_local failed for comm_ptr=%p", comm);
528         MPL_free(comm->intranode_table);
529 
530         mpi_errno = MPI_SUCCESS;
531         goto fn_exit;
532     }
533     /* --END ERROR HANDLING-- */
534 
535     mpi_errno = MPIR_Find_external(comm, &num_external, &external_rank, &external_procs,
536                                    &comm->internode_table);
537     /* --BEGIN ERROR HANDLING-- */
538     if (mpi_errno) {
539         if (MPIR_Err_is_fatal(mpi_errno))
540             MPIR_ERR_POP(mpi_errno);
541 
542         /* Non-fatal errors simply mean that this communicator will not have
543          * any node awareness.  Node-aware collectives are an optimization. */
544         MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "MPIR_Find_external failed for comm_ptr=%p", comm);
545         MPL_free(comm->internode_table);
546 
547         mpi_errno = MPI_SUCCESS;
548         goto fn_exit;
549     }
550     /* --END ERROR HANDLING-- */
551 
552     /* defensive checks */
553     MPIR_Assert(num_local > 0);
554     MPIR_Assert(num_local > 1 || external_rank >= 0);
555     MPIR_Assert(external_rank < 0 || external_procs != NULL);
556 
557     /* if the node_roots_comm and comm would be the same size, then creating
558      * the second communicator is useless and wasteful. */
559     if (num_external == comm->remote_size) {
560         MPIR_Assert(num_local == 1);
561         goto fn_exit;
562     }
563 
564     /* we don't need a local comm if this process is the only one on this node */
565     if (num_local > 1) {
566         mpi_errno = MPIR_Comm_create(&comm->node_comm);
567         MPIR_ERR_CHECK(mpi_errno);
568 
569         comm->node_comm->context_id = comm->context_id + MPIR_CONTEXT_INTRANODE_OFFSET;
570         comm->node_comm->recvcontext_id = comm->node_comm->context_id;
571         comm->node_comm->rank = local_rank;
572         comm->node_comm->comm_kind = MPIR_COMM_KIND__INTRACOMM;
573         comm->node_comm->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__NODE;
574         comm->node_comm->local_comm = NULL;
575         MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Create node_comm=%p\n", comm->node_comm);
576 
577         comm->node_comm->local_size = num_local;
578         comm->node_comm->remote_size = num_local;
579 
580         MPIR_Comm_map_irregular(comm->node_comm, comm, local_procs, num_local,
581                                 MPIR_COMM_MAP_DIR__L2L, NULL);
582         mpi_errno = MPIR_Comm_commit_internal(comm->node_comm);
583         MPIR_ERR_CHECK(mpi_errno);
584     }
585 
586     /* this process may not be a member of the node_roots_comm */
587     if (local_rank == 0) {
588         mpi_errno = MPIR_Comm_create(&comm->node_roots_comm);
589         MPIR_ERR_CHECK(mpi_errno);
590 
591         comm->node_roots_comm->context_id = comm->context_id + MPIR_CONTEXT_INTERNODE_OFFSET;
592         comm->node_roots_comm->recvcontext_id = comm->node_roots_comm->context_id;
593         comm->node_roots_comm->rank = external_rank;
594         comm->node_roots_comm->comm_kind = MPIR_COMM_KIND__INTRACOMM;
595         comm->node_roots_comm->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__NODE_ROOTS;
596         comm->node_roots_comm->local_comm = NULL;
597         MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Create node_roots_comm=%p\n", comm->node_roots_comm);
598 
599         comm->node_roots_comm->local_size = num_external;
600         comm->node_roots_comm->remote_size = num_external;
601 
602         MPIR_Comm_map_irregular(comm->node_roots_comm, comm, external_procs, num_external,
603                                 MPIR_COMM_MAP_DIR__L2L, NULL);
604         mpi_errno = MPIR_Comm_commit_internal(comm->node_roots_comm);
605         MPIR_ERR_CHECK(mpi_errno);
606     }
607 
608     comm->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__PARENT;
609 
610   fn_exit:
611     MPL_free(local_procs);
612     MPL_free(external_procs);
613     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE_SUBCOMMS);
614     return mpi_errno;
615   fn_fail:
616     goto fn_exit;
617 }
618 
619 /* static routines for MPIR_Comm_commit */
init_comm_seq(MPIR_Comm * comm)620 static int init_comm_seq(MPIR_Comm * comm)
621 {
622     int mpi_errno = MPI_SUCCESS;
623 
624     /* Every user-level communicator gets a sequence number, which can be
625      * used, for example, to hash vci.
626      * Builtin-comm, e.g. MPI_COMM_WORLD, always have seq at 0 */
627     if (!HANDLE_IS_BUILTIN(comm->handle)) {
628         static int vci_seq = 0;
629         vci_seq++;
630 
631         int tmp = vci_seq;
632         /* Bcast seq over vci 0 */
633         MPIR_Assert(comm->seq == 0);
634 
635         /* Every rank need share the same seq from root. NOTE: it is possible for
636          * different communicators to have the same seq. It is only used as an
637          * opportunistic optimization */
638         MPIR_Errflag_t errflag = MPIR_ERR_NONE;
639         mpi_errno = MPIR_Bcast_allcomm_auto(&tmp, 1, MPI_INT, 0, comm, &errflag);
640         MPIR_ERR_CHECK(mpi_errno);
641 
642         comm->seq = tmp;
643     }
644 
645     if (comm->node_comm) {
646         comm->node_comm->seq = comm->seq;
647     }
648 
649     if (comm->node_roots_comm) {
650         comm->node_roots_comm->seq = comm->seq;
651     }
652 
653   fn_exit:
654     return mpi_errno;
655   fn_fail:
656     goto fn_exit;
657 }
658 
659 /* Provides a hook for the top level functions to perform some manipulation on a
660    communicator just before it is given to the application level.
661 
662    For example, we create sub-communicators for SMP-aware collectives at this
663    step. */
MPIR_Comm_commit(MPIR_Comm * comm)664 int MPIR_Comm_commit(MPIR_Comm * comm)
665 {
666     int mpi_errno = MPI_SUCCESS;
667     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT);
668 
669     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COMMIT);
670 
671     /* It's OK to relax these assertions, but we should do so very
672      * intentionally.  For now this function is the only place that we create
673      * our hierarchy of communicators */
674     MPIR_Assert(comm->node_comm == NULL);
675     MPIR_Assert(comm->node_roots_comm == NULL);
676 
677     /* Notify device of communicator creation */
678     mpi_errno = MPIR_Comm_commit_internal(comm);
679     MPIR_ERR_CHECK(mpi_errno);
680 
681     if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM && !MPIR_CONTEXT_READ_FIELD(SUBCOMM, comm->context_id)) {  /*make sure this is not a subcomm */
682         mpi_errno = MPIR_Comm_create_subcomms(comm);
683         MPIR_ERR_CHECK(mpi_errno);
684     }
685 
686     /* Create collectives-specific infrastructure */
687     mpi_errno = MPIR_Coll_comm_init(comm);
688     MPIR_ERR_CHECK(mpi_errno);
689 
690     if (comm->node_comm) {
691         mpi_errno = MPIR_Coll_comm_init(comm->node_comm);
692         MPIR_ERR_CHECK(mpi_errno);
693     }
694 
695     if (comm->node_roots_comm) {
696         mpi_errno = MPIR_Coll_comm_init(comm->node_roots_comm);
697         MPIR_ERR_CHECK(mpi_errno);
698     }
699 
700     /* call post commit hooks */
701     mpi_errno = MPID_Comm_commit_post_hook(comm);
702     MPIR_ERR_CHECK(mpi_errno);
703 
704     if (comm->node_comm) {
705         mpi_errno = MPID_Comm_commit_post_hook(comm->node_comm);
706         MPIR_ERR_CHECK(mpi_errno);
707     }
708 
709     if (comm->node_roots_comm) {
710         mpi_errno = MPID_Comm_commit_post_hook(comm->node_roots_comm);
711         MPIR_ERR_CHECK(mpi_errno);
712     }
713 
714     if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM && !comm->tainted) {
715         mpi_errno = init_comm_seq(comm);
716         MPIR_ERR_CHECK(mpi_errno);
717     }
718 
719   fn_exit:
720     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COMMIT);
721     return mpi_errno;
722   fn_fail:
723     goto fn_exit;
724 }
725 
726 /* Returns true if the given communicator is aware of node topology information,
727    false otherwise.  Such information could be used to implement more efficient
728    collective communication, for example. */
MPIR_Comm_is_parent_comm(MPIR_Comm * comm)729 int MPIR_Comm_is_parent_comm(MPIR_Comm * comm)
730 {
731     return (comm->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__PARENT);
732 }
733 
734 /* Returns true if the communicator is node-aware and processes in all the nodes
735    are consecutive. For example, if node 0 contains "0, 1, 2, 3", node 1
736    contains "4, 5, 6", and node 2 contains "7", we shall return true. */
MPII_Comm_is_node_consecutive(MPIR_Comm * comm)737 int MPII_Comm_is_node_consecutive(MPIR_Comm * comm)
738 {
739     int i = 0, curr_nodeidx = 0;
740     int *internode_table = comm->internode_table;
741 
742     if (!MPIR_Comm_is_parent_comm(comm))
743         return 0;
744 
745     for (; i < comm->local_size; i++) {
746         if (internode_table[i] == curr_nodeidx + 1)
747             curr_nodeidx++;
748         else if (internode_table[i] != curr_nodeidx)
749             return 0;
750     }
751 
752     return 1;
753 }
754 
755 /*
756  * Copy a communicator, including creating a new context and copying the
757  * virtual connection tables and clearing the various fields.
758  * Does *not* copy attributes.  If size is < the size of the local group
759  * in the input communicator, copy only the first size elements.
760  * If this process is not a member, return a null pointer in outcomm_ptr.
761  * This is only supported in the case where the communicator is in
762  * Intracomm (not an Intercomm).  Note that this is all that is required
763  * for cart_create and graph_create.
764  *
765  * Used by cart_create, graph_create, and dup_create
766  */
MPII_Comm_copy(MPIR_Comm * comm_ptr,int size,MPIR_Info * info,MPIR_Comm ** outcomm_ptr)767 int MPII_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Info * info, MPIR_Comm ** outcomm_ptr)
768 {
769     int mpi_errno = MPI_SUCCESS;
770     MPIR_Context_id_t new_context_id, new_recvcontext_id;
771     MPIR_Comm *newcomm_ptr = NULL;
772     MPIR_Comm_map_t *map = NULL;
773     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COPY);
774 
775     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COPY);
776 
777     /* Get a new context first.  We need this to be collective over the
778      * input communicator */
779     /* If there is a context id cache in oldcomm, use it here.  Otherwise,
780      * use the appropriate algorithm to get a new context id.  Be careful
781      * of intercomms here */
782     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
783         mpi_errno = MPIR_Get_intercomm_contextid(comm_ptr, &new_context_id, &new_recvcontext_id);
784         MPIR_ERR_CHECK(mpi_errno);
785     } else {
786         mpi_errno = MPIR_Get_contextid_sparse(comm_ptr, &new_context_id, FALSE);
787         new_recvcontext_id = new_context_id;
788         MPIR_ERR_CHECK(mpi_errno);
789         MPIR_Assert(new_context_id != 0);
790     }
791 
792     /* This is the local size, not the remote size, in the case of
793      * an intercomm */
794     if (comm_ptr->rank >= size) {
795         *outcomm_ptr = 0;
796         /* always free the recvcontext ID, never the "send" ID */
797         MPIR_Free_contextid(new_recvcontext_id);
798         goto fn_exit;
799     }
800 
801     /* We're left with the processes that will have a non-null communicator.
802      * Create the object, initialize the data, and return the result */
803 
804     mpi_errno = MPIR_Comm_create(&newcomm_ptr);
805     if (mpi_errno)
806         goto fn_fail;
807 
808     newcomm_ptr->context_id = new_context_id;
809     newcomm_ptr->recvcontext_id = new_recvcontext_id;
810 
811     /* Save the kind of the communicator */
812     newcomm_ptr->comm_kind = comm_ptr->comm_kind;
813     newcomm_ptr->local_comm = 0;
814 
815     /* There are two cases here - size is the same as the old communicator,
816      * or it is smaller.  If the size is the same, we can just add a reference.
817      * Otherwise, we need to create a new network address mapping.  Note that this is the
818      * test that matches the test on rank above. */
819     if (size == comm_ptr->local_size) {
820         /* Duplicate the network address mapping */
821         if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM)
822             MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
823         else
824             MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__R2R);
825     } else {
826         int i;
827 
828         if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM)
829             MPIR_Comm_map_irregular(newcomm_ptr, comm_ptr, NULL, size, MPIR_COMM_MAP_DIR__L2L,
830                                     &map);
831         else
832             MPIR_Comm_map_irregular(newcomm_ptr, comm_ptr, NULL, size, MPIR_COMM_MAP_DIR__R2R,
833                                     &map);
834         for (i = 0; i < size; i++) {
835             /* For rank i in the new communicator, find the corresponding
836              * rank in the input communicator */
837             map->src_mapping[i] = i;
838         }
839     }
840 
841     /* If it is an intercomm, duplicate the local network address references */
842     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
843         MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
844     }
845 
846     /* Set the sizes and ranks */
847     newcomm_ptr->rank = comm_ptr->rank;
848     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
849         newcomm_ptr->local_size = comm_ptr->local_size;
850         newcomm_ptr->remote_size = comm_ptr->remote_size;
851         newcomm_ptr->is_low_group = comm_ptr->is_low_group;
852     } else {
853         newcomm_ptr->local_size = size;
854         newcomm_ptr->remote_size = size;
855     }
856 
857     /* Inherit the error handler (if any) */
858     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
859     newcomm_ptr->errhandler = comm_ptr->errhandler;
860     if (comm_ptr->errhandler) {
861         MPIR_Errhandler_add_ref(comm_ptr->errhandler);
862     }
863     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
864 
865 #if 1
866     /* FIXME: only copy over hints for MPI 3.1 and earlier */
867     memcpy((void *) (newcomm_ptr->hints), (void *) (comm_ptr->hints), sizeof(comm_ptr->hints));
868 #endif
869 
870     if (info) {
871         MPII_Comm_set_hints(newcomm_ptr, info);
872     }
873 
874     newcomm_ptr->tainted = comm_ptr->tainted;
875     mpi_errno = MPIR_Comm_commit(newcomm_ptr);
876     MPIR_ERR_CHECK(mpi_errno);
877 
878     /* Start with no attributes on this communicator */
879     newcomm_ptr->attributes = 0;
880 
881     *outcomm_ptr = newcomm_ptr;
882 
883   fn_fail:
884   fn_exit:
885 
886     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COPY);
887 
888     return mpi_errno;
889 }
890 
891 /* Copy a communicator, including copying the virtual connection tables and
892  * clearing the various fields.  Does *not* allocate a context ID or commit the
893  * communicator.  Does *not* copy attributes.
894  *
895  * Used by comm_idup.
896  */
MPII_Comm_copy_data(MPIR_Comm * comm_ptr,MPIR_Comm ** outcomm_ptr)897 int MPII_Comm_copy_data(MPIR_Comm * comm_ptr, MPIR_Comm ** outcomm_ptr)
898 {
899     int mpi_errno = MPI_SUCCESS;
900     MPIR_Comm *newcomm_ptr = NULL;
901     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COPY_DATA);
902 
903     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COPY_DATA);
904 
905     mpi_errno = MPIR_Comm_create(&newcomm_ptr);
906     if (mpi_errno)
907         goto fn_fail;
908 
909     /* use a large garbage value to ensure errors are caught more easily */
910     newcomm_ptr->context_id = 32767;
911     newcomm_ptr->recvcontext_id = 32767;
912 
913     /* Save the kind of the communicator */
914     newcomm_ptr->comm_kind = comm_ptr->comm_kind;
915     newcomm_ptr->local_comm = 0;
916 
917     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM)
918         MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
919     else
920         MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__R2R);
921 
922     /* If it is an intercomm, duplicate the network address mapping */
923     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
924         MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
925     }
926 
927     /* Set the sizes and ranks */
928     newcomm_ptr->rank = comm_ptr->rank;
929     newcomm_ptr->local_size = comm_ptr->local_size;
930     newcomm_ptr->remote_size = comm_ptr->remote_size;
931     newcomm_ptr->is_low_group = comm_ptr->is_low_group; /* only relevant for intercomms */
932 
933     /* Inherit the error handler (if any) */
934     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
935     newcomm_ptr->errhandler = comm_ptr->errhandler;
936     if (comm_ptr->errhandler) {
937         MPIR_Errhandler_add_ref(comm_ptr->errhandler);
938     }
939     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
940 
941     /* Start with no attributes on this communicator */
942     newcomm_ptr->attributes = 0;
943     *outcomm_ptr = newcomm_ptr;
944 
945     /* inherit tainted flag */
946     newcomm_ptr->tainted = comm_ptr->tainted;
947 
948   fn_fail:
949     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COPY_DATA);
950     return mpi_errno;
951 }
952 
953 /* Common body between MPIR_Comm_release and MPIR_comm_release_always.  This
954  * helper function frees the actual MPIR_Comm structure and any associated
955  * storage.  It also releases any references to other objects.
956  * This function should only be called when the communicator's reference count
957  * has dropped to 0.
958  *
959  * !!! This routine should *never* be called outside of MPIR_Comm_release{,_always} !!!
960  */
MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)961 int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
962 {
963     int in_use;
964     int mpi_errno = MPI_SUCCESS;
965     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_COMM_DELETE_INTERNAL);
966 
967     MPIR_FUNC_TERSE_ENTER(MPID_STATE_COMM_DELETE_INTERNAL);
968 
969     MPIR_Assert(MPIR_Object_get_ref(comm_ptr) == 0);    /* sanity check */
970 
971     /* Remove the attributes, executing the attribute delete routine.
972      * Do this only if the attribute functions are defined.
973      * This must be done first, because if freeing the attributes
974      * returns an error, the communicator is not freed */
975     if (MPIR_Process.attr_free && comm_ptr->attributes) {
976         /* Temporarily add a reference to this communicator because
977          * the attr_free code requires a valid communicator */
978         MPIR_Object_add_ref(comm_ptr);
979         mpi_errno = MPIR_Process.attr_free(comm_ptr->handle, &comm_ptr->attributes);
980         /* Release the temporary reference added before the call to
981          * attr_free */
982         MPIR_Object_release_ref(comm_ptr, &in_use);
983     }
984 
985     /* If the attribute delete functions return failure, the
986      * communicator must not be freed.  That is the reason for the
987      * test on mpi_errno here. */
988     if (mpi_errno == MPI_SUCCESS) {
989         /* If this communicator is our parent, and we're disconnecting
990          * from the parent, mark that fact */
991         if (MPIR_Process.comm_parent == comm_ptr)
992             MPIR_Process.comm_parent = NULL;
993 
994         /* Cleanup collectives-specific infrastructure */
995         mpi_errno = MPII_Coll_comm_cleanup(comm_ptr);
996         MPIR_ERR_CHECK(mpi_errno);
997 
998         /* Notify the device that the communicator is about to be
999          * destroyed */
1000         mpi_errno = MPID_Comm_free_hook(comm_ptr);
1001         MPIR_ERR_CHECK(mpi_errno);
1002 
1003         if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM && comm_ptr->local_comm)
1004             MPIR_Comm_release(comm_ptr->local_comm);
1005 
1006         /* Free the local and remote groups, if they exist */
1007         if (comm_ptr->local_group)
1008             MPIR_Group_release(comm_ptr->local_group);
1009         if (comm_ptr->remote_group)
1010             MPIR_Group_release(comm_ptr->remote_group);
1011 
1012         /* free the intra/inter-node communicators, if they exist */
1013         if (comm_ptr->node_comm)
1014             MPIR_Comm_release(comm_ptr->node_comm);
1015         if (comm_ptr->node_roots_comm)
1016             MPIR_Comm_release(comm_ptr->node_roots_comm);
1017         MPL_free(comm_ptr->intranode_table);
1018         MPL_free(comm_ptr->internode_table);
1019 
1020         /* Free the context value.  This should come after freeing the
1021          * intra/inter-node communicators since those free calls won't
1022          * release this context ID and releasing this before then could lead
1023          * to races once we make threading finer grained. */
1024         /* This must be the recvcontext_id (i.e. not the (send)context_id)
1025          * because in the case of intercommunicators the send context ID is
1026          * allocated out of the remote group's bit vector, not ours. */
1027         MPIR_Free_contextid(comm_ptr->recvcontext_id);
1028 
1029 #if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
1030         {
1031             int thr_err;
1032             MPID_Thread_mutex_destroy(&MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr), &thr_err);
1033             MPIR_Assert(thr_err == 0);
1034         }
1035 #endif
1036         /* We need to release the error handler */
1037         if (comm_ptr->errhandler && !(HANDLE_IS_BUILTIN(comm_ptr->errhandler->handle))) {
1038             int errhInuse;
1039             MPIR_Errhandler_release_ref(comm_ptr->errhandler, &errhInuse);
1040             if (!errhInuse) {
1041                 MPIR_Handle_obj_free(&MPIR_Errhandler_mem, comm_ptr->errhandler);
1042             }
1043         }
1044 
1045         /* Remove from the list of active communicators if
1046          * we are supporting message-queue debugging.  We make this
1047          * conditional on having debugger support since the
1048          * operation is not constant-time */
1049         MPII_COMML_FORGET(comm_ptr);
1050 
1051         /* Check for predefined communicators - these should not
1052          * be freed */
1053         if (!(HANDLE_IS_BUILTIN(comm_ptr->handle)))
1054             MPIR_Handle_obj_free(&MPIR_Comm_mem, comm_ptr);
1055     } else {
1056         /* If the user attribute free function returns an error,
1057          * then do not free the communicator */
1058         MPIR_Comm_add_ref(comm_ptr);
1059     }
1060 
1061   fn_exit:
1062     MPIR_FUNC_TERSE_EXIT(MPID_STATE_COMM_DELETE_INTERNAL);
1063     return mpi_errno;
1064   fn_fail:
1065     goto fn_exit;
1066 }
1067 
1068 /* Release a reference to a communicator.  If there are no pending
1069    references, delete the communicator and recover all storage and
1070    context ids.  This version of the function always manipulates the reference
1071    counts, even for predefined objects. */
MPIR_Comm_release_always(MPIR_Comm * comm_ptr)1072 int MPIR_Comm_release_always(MPIR_Comm * comm_ptr)
1073 {
1074     int mpi_errno = MPI_SUCCESS;
1075     int in_use;
1076     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
1077 
1078     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
1079 
1080     /* we want to short-circuit any optimization that avoids reference counting
1081      * predefined communicators, such as MPI_COMM_WORLD or MPI_COMM_SELF. */
1082     MPIR_Object_release_ref_always(comm_ptr, &in_use);
1083     if (!in_use) {
1084         mpi_errno = MPIR_Comm_delete_internal(comm_ptr);
1085         MPIR_ERR_CHECK(mpi_errno);
1086     }
1087 
1088   fn_exit:
1089     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
1090     return mpi_errno;
1091   fn_fail:
1092     goto fn_exit;
1093 }
1094