1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3 * (C) 2001 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
5 */
6
7 #include "mpiimpl.h"
8 #include "mpicomm.h"
9
10 /* This is the utility file for comm that contains the basic comm items
11 and storage management */
12 #ifndef MPID_COMM_PREALLOC
13 #define MPID_COMM_PREALLOC 8
14 #endif
15
16 /* Preallocated comm objects */
17 /* initialized in initthread.c */
18 MPID_Comm MPID_Comm_builtin[MPID_COMM_N_BUILTIN] = { {0} };
19 MPID_Comm MPID_Comm_direct[MPID_COMM_PREALLOC] = { {0} };
20 MPIU_Object_alloc_t MPID_Comm_mem = { 0, 0, 0, 0, MPID_COMM,
21 sizeof(MPID_Comm), MPID_Comm_direct,
22 MPID_COMM_PREALLOC};
23
24 /* Communicator creation functions */
25 struct MPID_CommOps *MPID_Comm_fns = NULL;
26
27 /* utility function to pretty print a context ID for debugging purposes, see
28 * mpiimpl.h for more info on the various fields */
29 #ifdef USE_DBG_LOGGING
MPIR_Comm_dump_context_id(MPIR_Context_id_t context_id,char * out_str,int len)30 static void MPIR_Comm_dump_context_id(MPIR_Context_id_t context_id, char *out_str, int len)
31 {
32 int subcomm_type = MPID_CONTEXT_READ_FIELD(SUBCOMM,context_id);
33 const char *subcomm_type_name = NULL;
34
35 switch (subcomm_type) {
36 case 0: subcomm_type_name = "parent"; break;
37 case 1: subcomm_type_name = "intranode"; break;
38 case 2: subcomm_type_name = "internode"; break;
39 default: MPIU_Assert(FALSE); break;
40 }
41 MPIU_Snprintf(out_str, len,
42 "context_id=%d (%#x): DYNAMIC_PROC=%d PREFIX=%#x IS_LOCALCOMM=%d SUBCOMM=%s SUFFIX=%s",
43 context_id,
44 context_id,
45 MPID_CONTEXT_READ_FIELD(DYNAMIC_PROC,context_id),
46 MPID_CONTEXT_READ_FIELD(PREFIX,context_id),
47 MPID_CONTEXT_READ_FIELD(IS_LOCALCOMM,context_id),
48 subcomm_type_name,
49 (MPID_CONTEXT_READ_FIELD(SUFFIX,context_id) ? "coll" : "pt2pt"));
50 }
51 #endif
52
53 /* FIXME :
54 Reusing context ids can lead to a race condition if (as is desirable)
55 MPI_Comm_free does not include a barrier. Consider the following:
56 Process A frees the communicator.
57 Process A creates a new communicator, reusing the just released id
58 Process B sends a message to A on the old communicator.
59 Process A receives the message, and believes that it belongs to the
60 new communicator.
61 Process B then cancels the message, and frees the communicator.
62
63 The likelyhood of this happening can be reduced by introducing a gap
64 between when a context id is released and when it is reused. An alternative
65 is to use an explicit message (in the implementation of MPI_Comm_free)
66 to indicate that a communicator is being freed; this will often require
67 less communication than a barrier in MPI_Comm_free, and will ensure that
68 no messages are later sent to the same communicator (we may also want to
69 have a similar check when building fault-tolerant versions of MPI).
70 */
71
72 /* Zeroes most non-handle fields in a communicator, as well as initializing any
73 * other special fields, such as a per-object mutex. Also defaults the
74 * reference count to 1, under the assumption that the caller holds a reference
75 * to it.
76 *
77 * !!! The resulting struct is _not_ ready for communication !!! */
MPIR_Comm_init(MPID_Comm * comm_p)78 int MPIR_Comm_init(MPID_Comm *comm_p)
79 {
80 int mpi_errno = MPI_SUCCESS;
81
82 MPIU_Object_set_ref(comm_p, 1);
83
84 MPIU_THREAD_MPI_OBJ_INIT(comm_p);
85
86 /* Clear many items (empty means to use the default; some of these
87 may be overridden within the upper-level communicator initialization) */
88 comm_p->errhandler = NULL;
89 comm_p->attributes = NULL;
90 comm_p->remote_group = NULL;
91 comm_p->local_group = NULL;
92 comm_p->coll_fns = NULL;
93 comm_p->topo_fns = NULL;
94 comm_p->name[0] = '\0';
95
96 comm_p->hierarchy_kind = MPID_HIERARCHY_FLAT;
97 comm_p->node_comm = NULL;
98 comm_p->node_roots_comm = NULL;
99 comm_p->intranode_table = NULL;
100 comm_p->internode_table = NULL;
101
102 /* abstractions bleed a bit here... :( */
103 comm_p->next_sched_tag = MPIR_FIRST_NBC_TAG;
104
105 /* Fields not set include context_id, remote and local size, and
106 kind, since different communicator construction routines need
107 different values */
108 fn_fail:
109 return mpi_errno;
110 }
111
112
113 /*
114 Create a communicator structure and perform basic initialization
115 (mostly clearing fields and updating the reference count).
116 */
117 #undef FUNCNAME
118 #define FUNCNAME MPIR_Comm_create
119 #undef FCNAME
120 #define FCNAME "MPIR_Comm_create"
MPIR_Comm_create(MPID_Comm ** newcomm_ptr)121 int MPIR_Comm_create( MPID_Comm **newcomm_ptr )
122 {
123 int mpi_errno = MPI_SUCCESS;
124 MPID_Comm *newptr;
125 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE);
126
127 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE);
128
129 newptr = (MPID_Comm *)MPIU_Handle_obj_alloc( &MPID_Comm_mem );
130 MPIU_ERR_CHKANDJUMP(!newptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
131
132 *newcomm_ptr = newptr;
133
134 mpi_errno = MPIR_Comm_init(newptr);
135 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
136
137 /* Insert this new communicator into the list of known communicators.
138 Make this conditional on debugger support to match the test in
139 MPIR_Comm_release . */
140 MPIR_COMML_REMEMBER( newptr );
141
142 fn_fail:
143 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE);
144
145 return mpi_errno;
146 }
147
148 /* Create a local intra communicator from the local group of the
149 specified intercomm. */
150 /* FIXME this is an alternative constructor that doesn't use MPIR_Comm_create! */
151 #undef FUNCNAME
152 #define FUNCNAME MPIR_Setup_intercomm_localcomm
153 #undef FCNAME
154 #define FCNAME "MPIR_Setup_intercomm_localcomm"
MPIR_Setup_intercomm_localcomm(MPID_Comm * intercomm_ptr)155 int MPIR_Setup_intercomm_localcomm( MPID_Comm *intercomm_ptr )
156 {
157 MPID_Comm *localcomm_ptr;
158 int mpi_errno = MPI_SUCCESS;
159 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
160
161 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
162
163 localcomm_ptr = (MPID_Comm *)MPIU_Handle_obj_alloc( &MPID_Comm_mem );
164 MPIU_ERR_CHKANDJUMP(!localcomm_ptr,mpi_errno,MPI_ERR_OTHER,"**nomem");
165
166 /* get sensible default values for most fields (usually zeros) */
167 mpi_errno = MPIR_Comm_init(localcomm_ptr);
168 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
169
170 /* use the parent intercomm's recv ctx as the basis for our ctx */
171 localcomm_ptr->recvcontext_id = MPID_CONTEXT_SET_FIELD(IS_LOCALCOMM, intercomm_ptr->recvcontext_id, 1);
172 localcomm_ptr->context_id = localcomm_ptr->recvcontext_id;
173
174 MPIU_DBG_MSG_FMT(COMM,TYPICAL,(MPIU_DBG_FDEST, "setup_intercomm_localcomm ic=%p ic->context_id=%d ic->recvcontext_id=%d lc->recvcontext_id=%d", intercomm_ptr, intercomm_ptr->context_id, intercomm_ptr->recvcontext_id, localcomm_ptr->recvcontext_id));
175
176 /* Duplicate the VCRT references */
177 MPID_VCRT_Add_ref( intercomm_ptr->local_vcrt );
178 localcomm_ptr->vcrt = intercomm_ptr->local_vcrt;
179 localcomm_ptr->vcr = intercomm_ptr->local_vcr;
180
181 /* Save the kind of the communicator */
182 localcomm_ptr->comm_kind = MPID_INTRACOMM;
183
184 /* Set the sizes and ranks */
185 localcomm_ptr->remote_size = intercomm_ptr->local_size;
186 localcomm_ptr->local_size = intercomm_ptr->local_size;
187 localcomm_ptr->rank = intercomm_ptr->rank;
188
189 /* TODO More advanced version: if the group is available, dup it by
190 increasing the reference count instead of recreating it later */
191 /* FIXME : No coll_fns functions for the collectives */
192 /* FIXME : No local functions for the topology routines */
193
194 intercomm_ptr->local_comm = localcomm_ptr;
195
196 /* sets up the SMP-aware sub-communicators and tables */
197 mpi_errno = MPIR_Comm_commit(localcomm_ptr);
198 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
199
200 fn_fail:
201 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
202
203 return mpi_errno;
204 }
205
206 /* holds default collop "vtables" for _intracomms_, where
207 * default[hierarchy_kind] is the pointer to the collop struct for that
208 * hierarchy kind */
209 static struct MPID_Collops *default_collops[MPID_HIERARCHY_SIZE] = {NULL};
210 /* default for intercomms */
211 static struct MPID_Collops *ic_default_collops = NULL;
212
213 #undef FUNCNAME
214 #define FUNCNAME cleanup_default_collops
215 #undef FCNAME
216 #define FCNAME MPIU_QUOTE(FUNCNAME)
cleanup_default_collops(void * unused)217 static int cleanup_default_collops(void *unused) {
218 int i;
219 for (i = 0; i < MPID_HIERARCHY_SIZE; ++i) {
220 if (default_collops[i]) {
221 MPIU_Assert(default_collops[i]->ref_count >= 1);
222 if (--default_collops[i]->ref_count == 0)
223 MPIU_Free(default_collops[i]);
224 default_collops[i] = NULL;
225 }
226 }
227 if (ic_default_collops) {
228 MPIU_Assert(ic_default_collops->ref_count >= 1);
229 if (--ic_default_collops->ref_count == 0)
230 MPIU_Free(ic_default_collops);
231 }
232 return MPI_SUCCESS;
233 }
234
235 #undef FUNCNAME
236 #define FUNCNAME init_default_collops
237 #undef FCNAME
238 #define FCNAME MPIU_QUOTE(FUNCNAME)
init_default_collops(void)239 static int init_default_collops(void)
240 {
241 int mpi_errno = MPI_SUCCESS;
242 int i;
243 struct MPID_Collops *ops = NULL;
244 MPIU_CHKPMEM_DECL(MPID_HIERARCHY_SIZE+1);
245
246 /* first initialize the intracomms */
247 for (i = 0; i < MPID_HIERARCHY_SIZE; ++i) {
248 MPIU_CHKPMEM_CALLOC(ops, struct MPID_Collops *, sizeof(struct MPID_Collops), mpi_errno, "default intracomm collops");
249 ops->ref_count = 1; /* force existence until finalize time */
250
251 /* intracomm default defaults... */
252 ops->Ibcast = &MPIR_Ibcast_intra;
253 ops->Ibarrier = &MPIR_Ibarrier_intra;
254 ops->Ireduce = &MPIR_Ireduce_intra;
255 ops->Ialltoall = &MPIR_Ialltoall_intra;
256 ops->Ialltoallv = &MPIR_Ialltoallv_intra;
257 ops->Ialltoallw = &MPIR_Ialltoallw_intra;
258 ops->Iallreduce = &MPIR_Iallreduce_intra;
259 ops->Igather = &MPIR_Igather_intra;
260 ops->Igatherv = &MPIR_Igatherv;
261 ops->Iscatter = &MPIR_Iscatter_intra;
262 ops->Iscatterv = &MPIR_Iscatterv;
263 ops->Ireduce_scatter = &MPIR_Ireduce_scatter_intra;
264 ops->Ireduce_scatter_block = &MPIR_Ireduce_scatter_block_intra;
265 ops->Iallgather = &MPIR_Iallgather_intra;
266 ops->Iallgatherv = &MPIR_Iallgatherv_intra;
267 ops->Iscan = &MPIR_Iscan_rec_dbl;
268 ops->Iexscan = &MPIR_Iexscan;
269 ops->Neighbor_allgather = &MPIR_Neighbor_allgather_default;
270 ops->Neighbor_allgatherv = &MPIR_Neighbor_allgatherv_default;
271 ops->Neighbor_alltoall = &MPIR_Neighbor_alltoall_default;
272 ops->Neighbor_alltoallv = &MPIR_Neighbor_alltoallv_default;
273 ops->Neighbor_alltoallw = &MPIR_Neighbor_alltoallw_default;
274 ops->Ineighbor_allgather = &MPIR_Ineighbor_allgather_default;
275 ops->Ineighbor_allgatherv = &MPIR_Ineighbor_allgatherv_default;
276 ops->Ineighbor_alltoall = &MPIR_Ineighbor_alltoall_default;
277 ops->Ineighbor_alltoallv = &MPIR_Ineighbor_alltoallv_default;
278 ops->Ineighbor_alltoallw = &MPIR_Ineighbor_alltoallw_default;
279
280 /* override defaults, such as for SMP */
281 switch (i) {
282 case MPID_HIERARCHY_FLAT:
283 break;
284 case MPID_HIERARCHY_PARENT:
285 ops->Ibcast = &MPIR_Ibcast_SMP;
286 ops->Iscan = &MPIR_Iscan_SMP;
287 ops->Iallreduce = &MPIR_Iallreduce_SMP;
288 ops->Ireduce = &MPIR_Ireduce_SMP;
289 break;
290 case MPID_HIERARCHY_NODE:
291 break;
292 case MPID_HIERARCHY_NODE_ROOTS:
293 break;
294
295 /* --BEGIN ERROR HANDLING-- */
296 default:
297 MPIU_Assertp(FALSE);
298 break;
299 /* --END ERROR HANDLING-- */
300 }
301
302 /* this is a default table, it's not overriding another table */
303 ops->prev_coll_fns = NULL;
304
305 default_collops[i] = ops;
306 }
307
308 /* now the intercomm table */
309 {
310 MPIU_CHKPMEM_CALLOC(ops, struct MPID_Collops *, sizeof(struct MPID_Collops), mpi_errno, "default intercomm collops");
311 ops->ref_count = 1; /* force existence until finalize time */
312
313 /* intercomm defaults */
314 ops->Ibcast = &MPIR_Ibcast_inter;
315 ops->Ibarrier = &MPIR_Ibarrier_inter;
316 ops->Ireduce = &MPIR_Ireduce_inter;
317 ops->Ialltoall = &MPIR_Ialltoall_inter;
318 ops->Ialltoallv = &MPIR_Ialltoallv_inter;
319 ops->Ialltoallw = &MPIR_Ialltoallw_inter;
320 ops->Iallreduce = &MPIR_Iallreduce_inter;
321 ops->Igather = &MPIR_Igather_inter;
322 ops->Igatherv = &MPIR_Igatherv;
323 ops->Iscatter = &MPIR_Iscatter_inter;
324 ops->Iscatterv = &MPIR_Iscatterv;
325 ops->Ireduce_scatter = &MPIR_Ireduce_scatter_inter;
326 ops->Ireduce_scatter_block = &MPIR_Ireduce_scatter_block_inter;
327 ops->Iallgather = &MPIR_Iallgather_inter;
328 ops->Iallgatherv = &MPIR_Iallgatherv_inter;
329 /* scan and exscan are not valid for intercommunicators, leave them NULL */
330 /* Ineighbor_all* routines are not valid for intercommunicators, leave
331 * them NULL */
332
333 /* this is a default table, it's not overriding another table */
334 ops->prev_coll_fns = NULL;
335
336 ic_default_collops = ops;
337 }
338
339
340 /* run after MPID_Finalize to permit collective usage during finalize */
341 MPIR_Add_finalize(cleanup_default_collops, NULL, MPIR_FINALIZE_CALLBACK_PRIO - 1);
342
343 MPIU_CHKPMEM_COMMIT();
344 fn_exit:
345 return mpi_errno;
346 /* --BEGIN ERROR HANDLING-- */
347 fn_fail:
348 MPIU_CHKPMEM_REAP();
349 goto fn_exit;
350 /* --END ERROR HANDLING-- */
351 }
352
353 /* Initializes the coll_fns field of comm to a sensible default. It may re-use
354 * an existing structure, so any override by a lower level should _not_ change
355 * any of the fields but replace the coll_fns object instead.
356 *
357 * NOTE: for now we only initialize nonblocking collective routines, since the
358 * blocking collectives all contain fallback logic that correctly handles NULL
359 * override functions. */
360 #undef FUNCNAME
361 #define FUNCNAME set_collops
362 #undef FCNAME
363 #define FCNAME MPIU_QUOTE(FUNCNAME)
set_collops(MPID_Comm * comm)364 static int set_collops(MPID_Comm *comm)
365 {
366 int mpi_errno = MPI_SUCCESS;
367 static int initialized = FALSE;
368
369 if (comm->coll_fns != NULL)
370 goto fn_exit;
371
372 if (unlikely(!initialized)) {
373 mpi_errno = init_default_collops();
374 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
375
376 initialized = TRUE;
377 }
378
379 if (comm->comm_kind == MPID_INTRACOMM) {
380 /* FIXME MT what protects access to this structure and ic_default_collops? */
381 comm->coll_fns = default_collops[comm->hierarchy_kind];
382 }
383 else { /* intercomm */
384 comm->coll_fns = ic_default_collops;
385 }
386
387 comm->coll_fns->ref_count++;
388
389 fn_exit:
390 return mpi_errno;
391 fn_fail:
392 goto fn_exit;
393 }
394
395 /* Provides a hook for the top level functions to perform some manipulation on a
396 communicator just before it is given to the application level.
397
398 For example, we create sub-communicators for SMP-aware collectives at this
399 step. */
400 #undef FUNCNAME
401 #define FUNCNAME MPIR_Comm_commit
402 #undef FCNAME
403 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Comm_commit(MPID_Comm * comm)404 int MPIR_Comm_commit(MPID_Comm *comm)
405 {
406 int mpi_errno = MPI_SUCCESS;
407 int i;
408 int num_local = -1, num_external = -1;
409 int local_rank = -1, external_rank = -1;
410 int *local_procs = NULL, *external_procs = NULL;
411 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT);
412
413 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COMMIT);
414
415 /* It's OK to relax these assertions, but we should do so very
416 intentionally. For now this function is the only place that we create
417 our hierarchy of communicators */
418 MPIU_Assert(comm->node_comm == NULL);
419 MPIU_Assert(comm->node_roots_comm == NULL);
420
421 if (comm->comm_kind == MPID_INTRACOMM) {
422
423 mpi_errno = MPIU_Find_local_and_external(comm,
424 &num_local, &local_rank, &local_procs,
425 &num_external, &external_rank, &external_procs,
426 &comm->intranode_table, &comm->internode_table);
427 /* --BEGIN ERROR HANDLING-- */
428 if (mpi_errno) {
429 if (MPIR_Err_is_fatal(mpi_errno)) MPIU_ERR_POP(mpi_errno);
430
431 /* Non-fatal errors simply mean that this communicator will not have
432 any node awareness. Node-aware collectives are an optimization. */
433 MPIU_DBG_MSG_P(COMM,VERBOSE,"MPIU_Find_local_and_external failed for comm_ptr=%p", comm);
434 if (comm->intranode_table)
435 MPIU_Free(comm->intranode_table);
436 if (comm->internode_table)
437 MPIU_Free(comm->internode_table);
438
439 mpi_errno = MPI_SUCCESS;
440 goto fn_exit;
441 }
442 /* --END ERROR HANDLING-- */
443
444 /* defensive checks */
445 MPIU_Assert(num_local > 0);
446 MPIU_Assert(num_local > 1 || external_rank >= 0);
447 MPIU_Assert(external_rank < 0 || external_procs != NULL);
448
449 /* if the node_roots_comm and comm would be the same size, then creating
450 the second communicator is useless and wasteful. */
451 if (num_external == comm->remote_size) {
452 MPIU_Assert(num_local == 1);
453 goto fn_exit;
454 }
455
456 /* we don't need a local comm if this process is the only one on this node */
457 if (num_local > 1) {
458 mpi_errno = MPIR_Comm_create(&comm->node_comm);
459 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
460
461 comm->node_comm->context_id = comm->context_id + MPID_CONTEXT_INTRANODE_OFFSET;
462 comm->node_comm->recvcontext_id = comm->node_comm->context_id;
463 comm->node_comm->rank = local_rank;
464 comm->node_comm->comm_kind = MPID_INTRACOMM;
465 comm->node_comm->hierarchy_kind = MPID_HIERARCHY_NODE;
466 comm->node_comm->local_comm = NULL;
467
468 comm->node_comm->local_size = num_local;
469 comm->node_comm->remote_size = num_local;
470
471 MPID_VCRT_Create( num_local, &comm->node_comm->vcrt );
472 MPID_VCRT_Get_ptr( comm->node_comm->vcrt, &comm->node_comm->vcr );
473 for (i = 0; i < num_local; ++i) {
474 /* For rank i in the new communicator, find the corresponding
475 rank in the input communicator */
476 MPID_VCR_Dup( comm->vcr[local_procs[i]],
477 &comm->node_comm->vcr[i] );
478 }
479
480 mpi_errno = set_collops(comm->node_comm);
481 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
482
483 /* Notify device of communicator creation */
484 mpi_errno = MPID_Dev_comm_create_hook( comm->node_comm );
485 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
486 /* don't call MPIR_Comm_commit here */
487 }
488
489
490 /* this process may not be a member of the node_roots_comm */
491 if (local_rank == 0) {
492 mpi_errno = MPIR_Comm_create(&comm->node_roots_comm);
493 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
494
495 comm->node_roots_comm->context_id = comm->context_id + MPID_CONTEXT_INTERNODE_OFFSET;
496 comm->node_roots_comm->recvcontext_id = comm->node_roots_comm->context_id;
497 comm->node_roots_comm->rank = external_rank;
498 comm->node_roots_comm->comm_kind = MPID_INTRACOMM;
499 comm->node_roots_comm->hierarchy_kind = MPID_HIERARCHY_NODE_ROOTS;
500 comm->node_roots_comm->local_comm = NULL;
501
502 comm->node_roots_comm->local_size = num_external;
503 comm->node_roots_comm->remote_size = num_external;
504
505 MPID_VCRT_Create( num_external, &comm->node_roots_comm->vcrt );
506 MPID_VCRT_Get_ptr( comm->node_roots_comm->vcrt, &comm->node_roots_comm->vcr );
507 for (i = 0; i < num_external; ++i) {
508 /* For rank i in the new communicator, find the corresponding
509 rank in the input communicator */
510 MPID_VCR_Dup( comm->vcr[external_procs[i]],
511 &comm->node_roots_comm->vcr[i] );
512 }
513
514 mpi_errno = set_collops(comm->node_roots_comm);
515 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
516
517 /* Notify device of communicator creation */
518 mpi_errno = MPID_Dev_comm_create_hook( comm->node_roots_comm );
519 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
520 /* don't call MPIR_Comm_commit here */
521 }
522
523 comm->hierarchy_kind = MPID_HIERARCHY_PARENT;
524 }
525
526 fn_exit:
527 if (!mpi_errno) {
528 /* catch all of the early-bail, non-error cases */
529
530 mpi_errno = set_collops(comm);
531 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
532
533 /* Notify device of communicator creation */
534 mpi_errno = MPID_Dev_comm_create_hook(comm);
535 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
536 }
537
538 if (external_procs != NULL)
539 MPIU_Free(external_procs);
540 if (local_procs != NULL)
541 MPIU_Free(local_procs);
542
543 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COMMIT);
544 return mpi_errno;
545 fn_fail:
546 goto fn_exit;
547 }
548
549 /* Returns true if the given communicator is aware of node topology information,
550 false otherwise. Such information could be used to implement more efficient
551 collective communication, for example. */
MPIR_Comm_is_node_aware(MPID_Comm * comm)552 int MPIR_Comm_is_node_aware(MPID_Comm * comm)
553 {
554 return (comm->hierarchy_kind == MPID_HIERARCHY_PARENT);
555 }
556
557 /* Returns true if the communicator is node-aware and processes in all the nodes
558 are consecutive. For example, if node 0 contains "0, 1, 2, 3", node 1
559 contains "4, 5, 6", and node 2 contains "7", we shall return true. */
MPIR_Comm_is_node_consecutive(MPID_Comm * comm)560 int MPIR_Comm_is_node_consecutive(MPID_Comm * comm)
561 {
562 int i = 0, curr_nodeidx = 0;
563 int *internode_table = comm->internode_table;
564
565 if (!MPIR_Comm_is_node_aware(comm))
566 return 0;
567
568 for (; i < comm->local_size; i++)
569 {
570 if (internode_table[i] == curr_nodeidx + 1)
571 curr_nodeidx++;
572 else if (internode_table[i] != curr_nodeidx)
573 return 0;
574 }
575
576 return 1;
577 }
578
579 /*
580 * Here are the routines to find a new context id. The algorithm is discussed
581 * in detail in the mpich2 coding document. There are versions for
582 * single threaded and multithreaded MPI.
583 *
584 * Both the threaded and non-threaded routines use the same mask of
585 * available context id values.
586 */
587 static uint32_t context_mask[MPIR_MAX_CONTEXT_MASK];
588 static int initialize_context_mask = 1;
589
590 #ifdef USE_DBG_LOGGING
591 /* Create a string that contains the context mask. This is
592 used only with the logging interface, and must be used by one thread at
593 a time (should this be enforced by the logging interface?).
594 Converts the mask to hex and returns a pointer to that string */
MPIR_ContextMaskToStr(void)595 static char *MPIR_ContextMaskToStr( void )
596 {
597 static char bufstr[MPIR_MAX_CONTEXT_MASK*8+1];
598 int i;
599 int maxset=0;
600
601 for (maxset=MPIR_MAX_CONTEXT_MASK-1; maxset>=0; maxset--) {
602 if (context_mask[maxset] != 0) break;
603 }
604
605 for (i=0; i<maxset; i++) {
606 MPIU_Snprintf( &bufstr[i*8], 9, "%.8x", context_mask[i] );
607 }
608 return bufstr;
609 }
610 #endif
611
612 #ifdef MPICH_DEBUG_HANDLEALLOC
MPIU_CheckContextIDsOnFinalize(void * context_mask_ptr)613 static int MPIU_CheckContextIDsOnFinalize(void *context_mask_ptr)
614 {
615 int i;
616 uint32_t *mask = context_mask_ptr;
617 /* the predefined communicators should be freed by this point, so we don't
618 * need to special case bits 0,1, and 2 */
619 for (i = 0; i < MPIR_MAX_CONTEXT_MASK; ++i) {
620 if (~mask[i]) {
621 /* some bits were still cleared */
622 printf("leaked context IDs detected: mask=%p mask[%d]=%#x\n", mask, i, (int)mask[i]);
623 }
624 }
625 return MPI_SUCCESS;
626 }
627 #endif
628
MPIR_Init_contextid(void)629 static void MPIR_Init_contextid(void)
630 {
631 int i;
632
633 for (i=1; i<MPIR_MAX_CONTEXT_MASK; i++) {
634 context_mask[i] = 0xFFFFFFFF;
635 }
636 /* the first three values are already used (comm_world, comm_self,
637 and the internal-only copy of comm_world) */
638 context_mask[0] = 0xFFFFFFF8;
639 initialize_context_mask = 0;
640
641 #ifdef MPICH_DEBUG_HANDLEALLOC
642 /* check for context ID leaks in MPI_Finalize. Use (_PRIO-1) to make sure
643 * that we run after MPID_Finalize. */
644 MPIR_Add_finalize(MPIU_CheckContextIDsOnFinalize, context_mask,
645 MPIR_FINALIZE_CALLBACK_PRIO - 1);
646 #endif
647 }
648
649 /* Return the context id corresponding to the first set bit in the mask.
650 Return 0 if no bit found. This function does _not_ alter local_mask. */
MPIR_Locate_context_bit(uint32_t local_mask[])651 static int MPIR_Locate_context_bit(uint32_t local_mask[])
652 {
653 int i, j, context_id = 0;
654 for (i=0; i<MPIR_MAX_CONTEXT_MASK; i++) {
655 if (local_mask[i]) {
656 /* There is a bit set in this word. */
657 register uint32_t val, nval;
658 /* The following code finds the highest set bit by recursively
659 checking the top half of a subword for a bit, and incrementing
660 the bit location by the number of bit of the lower sub word if
661 the high subword contains a set bit. The assumption is that
662 full-word bitwise operations and compares against zero are
663 fast */
664 val = local_mask[i];
665 j = 0;
666 nval = val & 0xFFFF0000;
667 if (nval) {
668 j += 16;
669 val = nval;
670 }
671 nval = val & 0xFF00FF00;
672 if (nval) {
673 j += 8;
674 val = nval;
675 }
676 nval = val & 0xF0F0F0F0;
677 if (nval) {
678 j += 4;
679 val = nval;
680 }
681 nval = val & 0xCCCCCCCC;
682 if (nval) {
683 j += 2;
684 val = nval;
685 }
686 if (val & 0xAAAAAAAA) {
687 j += 1;
688 }
689 context_id = (MPIR_CONTEXT_INT_BITS * i + j) << MPID_CONTEXT_PREFIX_SHIFT;
690 return context_id;
691 }
692 }
693 return 0;
694 }
695
696 /* Allocates a context ID from the given mask by clearing the bit
697 * corresponding to the the given id. Returns 0 on failure, id on
698 * success. */
MPIR_Allocate_context_bit(uint32_t mask[],MPIR_Context_id_t id)699 static int MPIR_Allocate_context_bit(uint32_t mask[], MPIR_Context_id_t id)
700 {
701 int raw_prefix, idx, bitpos;
702 raw_prefix = MPID_CONTEXT_READ_FIELD(PREFIX,id);
703 idx = raw_prefix / MPIR_CONTEXT_INT_BITS;
704 bitpos = raw_prefix % MPIR_CONTEXT_INT_BITS;
705
706 /* the bit should not already be cleared (allocated) */
707 MPIU_Assert(mask[idx] & (1<<bitpos));
708
709 /* clear the bit */
710 mask[idx] &= ~(1<<bitpos);
711
712 MPIU_DBG_MSG_FMT(COMM,VERBOSE,(MPIU_DBG_FDEST,
713 "allocating contextid = %d, (mask=%p, mask[%d], bit %d)",
714 id, mask, idx, bitpos));
715 return id;
716 }
717
718 /* Allocates the first available context ID from context_mask based on the available
719 * bits given in local_mask. This function will clear the corresponding bit in
720 * context_mask if allocation was successful.
721 *
722 * Returns 0 on failure. Returns the allocated context ID on success. */
MPIR_Find_and_allocate_context_id(uint32_t local_mask[])723 static int MPIR_Find_and_allocate_context_id(uint32_t local_mask[])
724 {
725 MPIR_Context_id_t context_id;
726 context_id = MPIR_Locate_context_bit(local_mask);
727 if (context_id != 0) {
728 context_id = MPIR_Allocate_context_bit(context_mask, context_id);
729 }
730 return context_id;
731 }
732
733 /* Older, simpler interface. Allocates a context ID collectively over the given
734 * communicator. */
735 #undef FUNCNAME
736 #define FUNCNAME MPIR_Get_contextid
737 #undef FCNAME
738 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Get_contextid(MPID_Comm * comm_ptr,MPIR_Context_id_t * context_id)739 int MPIR_Get_contextid(MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id)
740 {
741 int mpi_errno = MPI_SUCCESS;
742 mpi_errno = MPIR_Get_contextid_sparse(comm_ptr, context_id, FALSE);
743 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
744 MPIU_Assert(*context_id != MPIR_INVALID_CONTEXT_ID);
745 fn_fail:
746 return mpi_errno;
747 }
748
749
750 #ifndef MPICH_IS_THREADED
751 /* Unthreaded (only one MPI call active at any time) */
752
753 #undef FUNCNAME
754 #define FUNCNAME MPIR_Get_contextid_sparse
755 #undef FCNAME
756 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Get_contextid_sparse(MPID_Comm * comm_ptr,MPIR_Context_id_t * context_id,int ignore_id)757 int MPIR_Get_contextid_sparse(MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id, int ignore_id)
758 {
759 return MPIR_Get_contextid_sparse_group(comm_ptr, NULL /* group_ptr */, MPIR_Process.attrs.tag_ub /* tag */, context_id, ignore_id);
760 }
761
762 #undef FUNCNAME
763 #define FUNCNAME MPIR_Get_contextid_sparse_group
764 #undef FCNAME
765 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Get_contextid_sparse_group(MPID_Comm * comm_ptr,MPID_Group * group_ptr,int tag,MPIR_Context_id_t * context_id,int ignore_id)766 int MPIR_Get_contextid_sparse_group(MPID_Comm *comm_ptr, MPID_Group *group_ptr, int tag, MPIR_Context_id_t *context_id, int ignore_id)
767 {
768 int mpi_errno = MPI_SUCCESS;
769 uint32_t local_mask[MPIR_MAX_CONTEXT_MASK];
770 int errflag = FALSE;
771 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);
772
773 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);
774
775 *context_id = 0;
776
777 if (initialize_context_mask) {
778 MPIR_Init_contextid();
779 }
780
781 if (ignore_id) {
782 /* We are not participating in the resulting communicator, so our
783 * context ID space doesn't matter. Set the mask to "all available". */
784 memset(local_mask, 0xff, MPIR_MAX_CONTEXT_MASK * sizeof(int));
785 }
786 else {
787 MPIU_Memcpy( local_mask, context_mask, MPIR_MAX_CONTEXT_MASK * sizeof(int) );
788 }
789
790 /* Note that this is the unthreaded version */
791 /* Comm must be an intracommunicator */
792 if (group_ptr != NULL) {
793 int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */
794 mpi_errno = MPIR_Allreduce_group( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
795 MPI_INT, MPI_BAND, comm_ptr, group_ptr, coll_tag, &errflag );
796 } else {
797 mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
798 MPI_INT, MPI_BAND, comm_ptr, &errflag );
799 }
800
801 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
802 MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
803
804 if (ignore_id) {
805 *context_id = MPIR_Locate_context_bit(local_mask);
806 MPIU_ERR_CHKANDJUMP(!(*context_id), mpi_errno, MPIR_ERR_RECOVERABLE, "**toomanycomm");
807 }
808 else {
809 *context_id = MPIR_Find_and_allocate_context_id(local_mask);
810 MPIU_ERR_CHKANDJUMP(!(*context_id), mpi_errno, MPIR_ERR_RECOVERABLE, "**toomanycomm");
811 }
812
813 fn_exit:
814 if (ignore_id)
815 *context_id = MPIR_INVALID_CONTEXT_ID;
816 MPIU_DBG_MSG_S(COMM,VERBOSE,"Context mask = %s",MPIR_ContextMaskToStr());
817
818 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID);
819 return mpi_errno;
820 fn_fail:
821 goto fn_exit;
822 }
823
824 #else /* MPICH_IS_THREADED is set and true */
825
826 /* EAGER CONTEXT ID ALLOCATION: Attempt to allocate the context ID during the
827 * initial synchronization step. If eager protocol fails, threads fall back to
828 * the base algorithm.
829 */
830 static volatile int eager_nelem = -1;
831 static volatile int eager_in_use = 0;
832
833 /* Additional values needed to maintain thread safety */
834 static volatile int mask_in_use = 0;
835
836 /* lowestContextId is used to prioritize access when multiple threads
837 * are contending for the mask. lowestTag is used to break ties when
838 * MPI_Comm_create_group is invoked my multiple threads on the same parent
839 * communicator.
840 */
841 #define MPIR_MAXID (1 << 30)
842 static volatile int lowestContextId = MPIR_MAXID;
843 static volatile int lowestTag = -1;
844
845 #undef FUNCNAME
846 #define FUNCNAME MPIR_Get_contextid_sparse
847 #undef FCNAME
848 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Get_contextid_sparse(MPID_Comm * comm_ptr,MPIR_Context_id_t * context_id,int ignore_id)849 int MPIR_Get_contextid_sparse(MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id, int ignore_id)
850 {
851 return MPIR_Get_contextid_sparse_group(comm_ptr, NULL /*group_ptr*/,
852 MPIR_Process.attrs.tag_ub /*tag*/,
853 context_id, ignore_id);
854 }
855
856 /* Allocates a new context ID collectively over the given communicator. This
857 * routine is "sparse" in the sense that while it is collective, some processes
858 * may not care about the value selected context ID.
859 *
860 * One example of this case is processes who pass MPI_UNDEFINED as the color
861 * value to MPI_Comm_split. Such processes should pass ignore_id==TRUE to
862 * obtain the best performance and utilization of the context ID space.
863 *
864 * Processes that pass ignore_id==TRUE will receive
865 * (*context_id==MPIR_INVALID_CONTEXT_ID) and should not attempt to use it.
866 *
867 * If a group pointer is given, the call is _not_ sparse, and only processes
868 * in the group should call this routine. That is, it is collective only over
869 * the given group.
870 */
871 /* NOTE-C1: We need a special test in this loop for the case where some process
872 * has exhausted its supply of context ids. In the single threaded case, this
873 * is simple, because the algorithm is deterministic (see above). In the
874 * multithreaded case, it is more complicated, because we may get a zero for the
875 * context mask because some other thread holds the mask. In addition, we can't
876 * check for the case where this process did not select MPI_THREAD_MULTIPLE,
877 * because one of the other processes may have selected MPI_THREAD_MULTIPLE. To
878 * handle this case, after a fixed number of failures, we test to see if some
879 * process has exhausted its supply of context ids. If so, all processes can
880 * invoke the out-of-context-id error. That fixed number of tests is in
881 * testCount */
882 #undef FUNCNAME
883 #define FUNCNAME MPIR_Get_contextid_sparse_group
884 #undef FCNAME
885 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Get_contextid_sparse_group(MPID_Comm * comm_ptr,MPID_Group * group_ptr,int tag,MPIR_Context_id_t * context_id,int ignore_id)886 int MPIR_Get_contextid_sparse_group(MPID_Comm *comm_ptr, MPID_Group *group_ptr, int tag, MPIR_Context_id_t *context_id, int ignore_id)
887 {
888 int mpi_errno = MPI_SUCCESS;
889 uint32_t local_mask[MPIR_MAX_CONTEXT_MASK];
890 int own_mask = 0;
891 int own_eager_mask = 0;
892 static const int NUM_CTX_TESTS = 10;
893 int testCount = NUM_CTX_TESTS;
894 int errflag = FALSE;
895 int first_iter = 1;
896 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);
897
898 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);
899
900 /* Group-collective and ignore_id should never be combined */
901 MPIU_Assert(! (group_ptr != NULL && ignore_id) );
902
903 *context_id = 0;
904
905 MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST,
906 "Entering; shared state is %d:%d:%d, my ctx id is %d, tag=%d",
907 mask_in_use, lowestContextId, lowestTag, comm_ptr->context_id, tag));
908
909
910 /* see NOTE-C1 for info about the "exhausted IDs" test */
911 while (*context_id == 0) {
912 /* We lock only around access to the mask (except in the global locking
913 * case). If another thread is using the mask, we take a mask of zero. */
914 MPIU_THREAD_CS_ENTER(CONTEXTID,);
915
916 if (initialize_context_mask) {
917 MPIR_Init_contextid();
918 }
919
920 if (eager_nelem < 0) {
921 /* Ensure that at least one word of deadlock-free context IDs is
922 always set aside for the base protocol */
923 MPIU_Assert( MPIR_PARAM_CTXID_EAGER_SIZE >= 0 && MPIR_PARAM_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK-1 );
924 eager_nelem = MPIR_PARAM_CTXID_EAGER_SIZE;
925 }
926
927 if (ignore_id) {
928 /* We are not participating in the resulting communicator, so our
929 * context ID space doesn't matter. Set the mask to "all available". */
930 memset(local_mask, 0xff, MPIR_MAX_CONTEXT_MASK * sizeof(int));
931 own_mask = 0;
932 /* don't need to touch mask_in_use/lowestContextId b/c our thread
933 * doesn't ever need to "win" the mask */
934 }
935
936 /* Deadlock avoidance: Only participate in context id loop when all
937 * processes have called this routine. On the first iteration, use the
938 * "eager" allocation protocol.
939 */
940 else if (first_iter) {
941 memset(local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
942 own_eager_mask = 0;
943
944 /* Attempt to reserve the eager mask segment */
945 if (!eager_in_use && eager_nelem > 0) {
946 int i;
947 for (i = 0; i < eager_nelem; i++)
948 local_mask[i] = context_mask[i];
949
950 eager_in_use = 1;
951 own_eager_mask = 1;
952 }
953 }
954
955 else {
956 /* lowestTag breaks ties when contextIds are the same (happens only
957 in calls to MPI_Comm_create_group. */
958 if (comm_ptr->context_id < lowestContextId ||
959 (comm_ptr->context_id == lowestContextId && tag < lowestTag)) {
960 lowestContextId = comm_ptr->context_id;
961 lowestTag = tag;
962 }
963
964 if (mask_in_use || ! (comm_ptr->context_id == lowestContextId && tag == lowestTag)) {
965 memset(local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
966 own_mask = 0;
967 MPIU_DBG_MSG_D(COMM, VERBOSE, "In in-use, set lowestContextId to %d", lowestContextId);
968 }
969 else {
970 int i;
971
972 /* Copy safe mask segment to local_mask */
973 for (i = 0; i < eager_nelem; i++)
974 local_mask[i] = 0;
975 for (i = eager_nelem; i < MPIR_MAX_CONTEXT_MASK; i++)
976 local_mask[i] = context_mask[i];
977
978 mask_in_use = 1;
979 own_mask = 1;
980 MPIU_DBG_MSG(COMM, VERBOSE, "Copied local_mask");
981 }
982 }
983 MPIU_THREAD_CS_EXIT(CONTEXTID,);
984
985 /* Now, try to get a context id */
986 MPIU_Assert(comm_ptr->comm_kind == MPID_INTRACOMM);
987 /* In the global and brief-global cases, note that this routine will
988 release that global lock when it needs to wait. That will allow
989 other processes to enter the global or brief global critical section.
990 */
991 if (group_ptr != NULL) {
992 int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */
993 mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
994 MPI_INT, MPI_BAND, comm_ptr, group_ptr, coll_tag, &errflag);
995 } else {
996 mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, local_mask, MPIR_MAX_CONTEXT_MASK,
997 MPI_INT, MPI_BAND, comm_ptr, &errflag);
998 }
999 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1000 MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
1001
1002 /* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and
1003 * narrow this region */
1004 MPIU_THREAD_CS_ENTER(CONTEXTID,);
1005 if (ignore_id) {
1006 /* we don't care what the value was, but make sure that everyone
1007 * who did care agreed on a value */
1008 *context_id = MPIR_Locate_context_bit(local_mask);
1009 /* used later in out-of-context ids check and outer while loop condition */
1010 }
1011 else if (own_eager_mask) {
1012 /* There is a chance that we've found a context id */
1013 /* Find_and_allocate_context_id updates the context_mask if it finds a match */
1014 *context_id = MPIR_Find_and_allocate_context_id(local_mask);
1015 MPIU_DBG_MSG_D(COMM, VERBOSE, "Context id is now %hd", *context_id);
1016
1017 own_eager_mask = 0;
1018 eager_in_use = 0;
1019
1020 if (*context_id <= 0) {
1021 /* else we did not find a context id. Give up the mask in case
1022 * there is another thread (with a lower input context id)
1023 * waiting for it. We need to ensure that any other threads
1024 * have the opportunity to run, hence yielding */
1025 MPIU_THREAD_CS_YIELD(CONTEXTID,);
1026 }
1027 }
1028 else if (own_mask) {
1029 /* There is a chance that we've found a context id */
1030 /* Find_and_allocate_context_id updates the context_mask if it finds a match */
1031 *context_id = MPIR_Find_and_allocate_context_id(local_mask);
1032 MPIU_DBG_MSG_D(COMM, VERBOSE, "Context id is now %hd", *context_id);
1033
1034 mask_in_use = 0;
1035
1036 if (*context_id > 0) {
1037 /* If we were the lowest context id, reset the value to
1038 allow the other threads to compete for the mask */
1039 if (lowestContextId == comm_ptr->context_id && lowestTag == tag) {
1040 lowestContextId = MPIR_MAXID;
1041 lowestTag = -1;
1042 /* Else leave it alone; there is another thread waiting */
1043 }
1044 }
1045 else {
1046 /* else we did not find a context id. Give up the mask in case
1047 * there is another thread (with a lower input context id)
1048 * waiting for it. We need to ensure that any other threads
1049 * have the opportunity to run, hence yielding */
1050 MPIU_THREAD_CS_YIELD(CONTEXTID,);
1051 }
1052 }
1053 else {
1054 /* As above, force this thread to yield */
1055 MPIU_THREAD_CS_YIELD(CONTEXTID,);
1056 }
1057 MPIU_THREAD_CS_EXIT(CONTEXTID,);
1058
1059 /* Here is the test for out-of-context ids */
1060 /* FIXME we may be able to "rotate" this a half iteration up to the top
1061 * where we already have to grab the lock */
1062 if ((testCount-- == 0) && (*context_id == 0)) {
1063 int hasNoId, totalHasNoId;
1064 int i;
1065 MPIU_THREAD_CS_ENTER(CONTEXTID,);
1066
1067 /* Copy safe mask segment to local_mask */
1068 for (i = 0; i < eager_nelem; i++)
1069 local_mask[i] = 0;
1070 for (i = eager_nelem; i < MPIR_MAX_CONTEXT_MASK; i++)
1071 local_mask[i] = context_mask[i];
1072
1073 hasNoId = MPIR_Locate_context_bit(local_mask) == 0;
1074 MPIU_THREAD_CS_EXIT(CONTEXTID,);
1075
1076 /* we _must_ release the lock above in order to avoid deadlocking on
1077 * this blocking allreduce operation */
1078 if (group_ptr != NULL) {
1079 int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */
1080 mpi_errno = MPIR_Allreduce_group(&hasNoId, &totalHasNoId, 1, MPI_INT,
1081 MPI_MAX, comm_ptr, group_ptr, coll_tag, &errflag);
1082 } else {
1083 mpi_errno = MPIR_Allreduce_impl(&hasNoId, &totalHasNoId, 1, MPI_INT,
1084 MPI_MAX, comm_ptr, &errflag);
1085 }
1086 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1087 MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
1088 if (totalHasNoId == 1) {
1089 /* --BEGIN ERROR HANDLING-- */
1090 /* Release the mask for use by other threads */
1091 if (own_mask) {
1092 MPIU_THREAD_CS_ENTER(CONTEXTID,);
1093 mask_in_use = 0;
1094 MPIU_THREAD_CS_EXIT(CONTEXTID,);
1095 }
1096 MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**toomanycomm");
1097 /* --END ERROR HANDLING-- */
1098 }
1099 else { /* reinitialize testCount */
1100 testCount = NUM_CTX_TESTS;
1101 MPIU_DBG_MSG_D(COMM, VERBOSE, "reinitialized testCount to %d", testCount);
1102 }
1103 }
1104
1105 first_iter = 0;
1106 }
1107
1108
1109 fn_exit:
1110 if (ignore_id)
1111 *context_id = MPIR_INVALID_CONTEXT_ID;
1112 MPIU_DBG_MSG_S(COMM,VERBOSE,"Context mask = %s",MPIR_ContextMaskToStr());
1113 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID);
1114 return mpi_errno;
1115
1116 /* --BEGIN ERROR HANDLING-- */
1117 fn_fail:
1118 /* Release the masks */
1119 if (own_mask) {
1120 /* is it safe to access this without holding the CS? */
1121 mask_in_use = 0;
1122 }
1123 goto fn_exit;
1124 /* --END ERROR HANDLING-- */
1125 }
1126
1127 #endif
1128
1129 struct gcn_state {
1130 MPIR_Context_id_t *ctx0;
1131 MPIR_Context_id_t *ctx1;
1132 uint32_t local_mask[MPIR_MAX_CONTEXT_MASK];
1133 };
1134
1135 #undef FUNCNAME
1136 #define FUNCNAME gcn_helper
1137 #undef FCNAME
1138 #define FCNAME MPIU_QUOTE(FUNCNAME)
gcn_helper(MPID_Comm * comm,int tag,void * state)1139 static int gcn_helper(MPID_Comm *comm, int tag, void *state)
1140 {
1141 int mpi_errno = MPI_SUCCESS;
1142 struct gcn_state *st = state;
1143 MPIR_Context_id_t newctxid;
1144
1145 newctxid = MPIR_Find_and_allocate_context_id(st->local_mask);
1146 MPIU_ERR_CHKANDJUMP(!newctxid, mpi_errno, MPIR_ERR_RECOVERABLE, "**toomanycomm");
1147
1148 if (st->ctx0)
1149 *st->ctx0 = newctxid;
1150 if (st->ctx1)
1151 *st->ctx1 = newctxid;
1152
1153 fn_fail:
1154 return mpi_errno;
1155 }
1156
1157
1158 /* Does the meat of the algorithm, adds the relevant entries to the schedule.
1159 * Assigns the resulting value to *ctx0 and *ctx1, as long as those respective
1160 * pointers are non-NULL. */
1161 /* FIXME this version only works for single-threaded code, it will totally fail
1162 * for any multithreaded communicator creation */
1163 #undef FUNCNAME
1164 #define FUNCNAME gcn_sch
1165 #undef FCNAME
1166 #define FCNAME MPIU_QUOTE(FUNCNAME)
gcn_sch(MPID_Comm * comm_ptr,MPIR_Context_id_t * ctx0,MPIR_Context_id_t * ctx1,MPID_Sched_t s)1167 static int gcn_sch(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0, MPIR_Context_id_t *ctx1, MPID_Sched_t s)
1168 {
1169 int mpi_errno = MPI_SUCCESS;
1170 struct gcn_state *st = NULL;
1171 MPIU_CHKPMEM_DECL(1);
1172
1173 MPIU_Assert(comm_ptr->comm_kind == MPID_INTRACOMM);
1174
1175 MPIU_ERR_CHKANDJUMP(MPIU_ISTHREADED, mpi_errno, MPI_ERR_INTERN, "**notsuppmultithread");
1176
1177 /* first do as much local setup as we can */
1178 if (initialize_context_mask) {
1179 MPIR_Init_contextid();
1180 }
1181
1182 MPIU_CHKPMEM_MALLOC(st, struct gcn_state *, sizeof(struct gcn_state), mpi_errno, "gcn_state");
1183 st->ctx0 = ctx0;
1184 st->ctx1 = ctx1;
1185 MPIU_Memcpy(st->local_mask, context_mask, MPIR_MAX_CONTEXT_MASK * sizeof(uint32_t));
1186
1187 mpi_errno = comm_ptr->coll_fns->Iallreduce(MPI_IN_PLACE, st->local_mask, MPIR_MAX_CONTEXT_MASK,
1188 MPI_UINT32_T, MPI_BAND, comm_ptr, s);
1189 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1190
1191 MPID_SCHED_BARRIER(s);
1192
1193 mpi_errno = MPID_Sched_cb(&gcn_helper, st, s);
1194 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1195
1196 MPID_SCHED_BARRIER(s);
1197
1198 mpi_errno = MPID_Sched_cb(&MPIR_Sched_cb_free_buf, st, s);
1199 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1200
1201 MPIU_CHKPMEM_COMMIT();
1202 fn_exit:
1203 return mpi_errno;
1204 /* --BEGIN ERROR HANDLING-- */
1205 fn_fail:
1206 MPIU_CHKPMEM_REAP();
1207 goto fn_exit;
1208 /* --END ERROR HANDLING-- */
1209 }
1210
1211
1212 #undef FUNCNAME
1213 #define FUNCNAME MPIR_Get_contextid_nonblock
1214 #undef FCNAME
1215 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Get_contextid_nonblock(MPID_Comm * comm_ptr,MPID_Comm * newcommp,MPID_Request ** req)1216 int MPIR_Get_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcommp, MPID_Request **req)
1217 {
1218 int mpi_errno = MPI_SUCCESS;
1219 int tag;
1220 MPID_Sched_t s;
1221
1222 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
1223
1224 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
1225
1226 /* now create a schedule */
1227 mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
1228 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1229 mpi_errno = MPID_Sched_create(&s);
1230 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1231
1232 /* add some entries to it */
1233 mpi_errno = gcn_sch(comm_ptr, &newcommp->context_id, &newcommp->recvcontext_id, s);
1234 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1235
1236 /* finally, kick off the schedule and give the caller a request */
1237 mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, req);
1238 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1239
1240 fn_exit:
1241 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
1242 return mpi_errno;
1243 /* --BEGIN ERROR HANDLING-- */
1244 fn_fail:
1245 goto fn_exit;
1246 /* --END ERROR HANDLING-- */
1247 }
1248
1249 #undef FUNCNAME
1250 #define FUNCNAME MPIR_Get_intercomm_contextid_nonblock
1251 #undef FCNAME
1252 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Get_intercomm_contextid_nonblock(MPID_Comm * comm_ptr,MPID_Comm * newcommp,MPID_Request ** req)1253 int MPIR_Get_intercomm_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcommp, MPID_Request **req)
1254 {
1255 int mpi_errno = MPI_SUCCESS;
1256 int tag;
1257 MPID_Sched_t s;
1258 MPID_Comm *lcomm = NULL;
1259 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
1260
1261 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
1262
1263 /* do as much local setup as possible */
1264 if (!comm_ptr->local_comm) {
1265 mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
1266 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1267 }
1268 lcomm = comm_ptr->local_comm;
1269
1270 /* now create a schedule */
1271 mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
1272 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1273 mpi_errno = MPID_Sched_create(&s);
1274 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1275
1276 /* add some entries to it */
1277
1278 /* first get a context ID over the local comm */
1279 mpi_errno = gcn_sch(lcomm, &newcommp->context_id, NULL, s);
1280 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1281
1282 MPID_SCHED_BARRIER(s);
1283
1284 if (comm_ptr->rank == 0) {
1285 newcommp->recvcontext_id = -1;
1286 mpi_errno = MPID_Sched_recv(&newcommp->recvcontext_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, comm_ptr, s);
1287 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1288 mpi_errno = MPID_Sched_send(&newcommp->context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, comm_ptr, s);
1289 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1290 MPID_SCHED_BARRIER(s);
1291 }
1292
1293 mpi_errno = lcomm->coll_fns->Ibcast(&newcommp->recvcontext_id, 1,
1294 MPIR_CONTEXT_ID_T_DATATYPE, 0, lcomm, s);
1295 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1296
1297 /* finally, kick off the schedule and give the caller a request */
1298 mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, req);
1299 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1300
1301 fn_fail:
1302 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
1303 return mpi_errno;
1304 }
1305
1306
1307 /* Get a context for a new intercomm. There are two approaches
1308 here (for MPI-1 codes only)
1309 (a) Each local group gets a context; the groups exchange, and
1310 the low value is accepted and the high one returned. This
1311 works because the context ids are taken from the same pool.
1312 (b) Form a temporary intracomm over all processes and use that
1313 with the regular algorithm.
1314
1315 In some ways, (a) is the better approach because it is the one that
1316 extends to MPI-2 (where the last step, returning the context, is
1317 not used and instead separate send and receive context id value
1318 are kept). For this reason, we'll use (a).
1319
1320 Even better is to separate the local and remote context ids. Then
1321 each group of processes can manage their context ids separately.
1322 */
1323 /*
1324 * This uses the thread-safe (if necessary) routine to get a context id
1325 * and does not need its own thread-safe version.
1326 */
1327 #undef FUNCNAME
1328 #define FUNCNAME MPIR_Get_intercomm_contextid
1329 #undef FCNAME
1330 #define FCNAME "MPIR_Get_intercomm_contextid"
MPIR_Get_intercomm_contextid(MPID_Comm * comm_ptr,MPIR_Context_id_t * context_id,MPIR_Context_id_t * recvcontext_id)1331 int MPIR_Get_intercomm_contextid( MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id,
1332 MPIR_Context_id_t *recvcontext_id )
1333 {
1334 MPIR_Context_id_t mycontext_id, remote_context_id;
1335 int mpi_errno = MPI_SUCCESS;
1336 int tag = 31567; /* FIXME - we need an internal tag or
1337 communication channel. Can we use a different
1338 context instead?. Or can we use the tag
1339 provided in the intercomm routine? (not on a dup,
1340 but in that case it can use the collective context) */
1341 int errflag = FALSE;
1342 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
1343
1344 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
1345
1346 if (!comm_ptr->local_comm) {
1347 /* Manufacture the local communicator */
1348 mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
1349 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1350 }
1351
1352 mpi_errno = MPIR_Get_contextid( comm_ptr->local_comm, &mycontext_id );
1353 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1354 MPIU_Assert(mycontext_id != 0);
1355
1356 /* MPIC routine uses an internal context id. The local leads (process 0)
1357 exchange data */
1358 remote_context_id = -1;
1359 if (comm_ptr->rank == 0) {
1360 mpi_errno = MPIC_Sendrecv( &mycontext_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, tag,
1361 &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, tag,
1362 comm_ptr->handle, MPI_STATUS_IGNORE );
1363 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1364 }
1365
1366 /* Make sure that all of the local processes now have this
1367 id */
1368 mpi_errno = MPIR_Bcast_impl( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
1369 0, comm_ptr->local_comm, &errflag );
1370 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1371 MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
1372 /* The recvcontext_id must be the one that was allocated out of the local
1373 * group, not the remote group. Otherwise we could end up posting two
1374 * MPI_ANY_SOURCE,MPI_ANY_TAG recvs on the same context IDs even though we
1375 * are attempting to post them for two separate communicators. */
1376 *context_id = remote_context_id;
1377 *recvcontext_id = mycontext_id;
1378 fn_fail:
1379 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
1380 return mpi_errno;
1381 }
1382
1383 #undef FUNCNAME
1384 #define FUNCNAME MPIR_Free_contextid
1385 #undef FCNAME
1386 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Free_contextid(MPIR_Context_id_t context_id)1387 void MPIR_Free_contextid( MPIR_Context_id_t context_id )
1388 {
1389 int idx, bitpos, raw_prefix;
1390 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_FREE_CONTEXTID);
1391
1392 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_FREE_CONTEXTID);
1393
1394 /* Convert the context id to the bit position */
1395 raw_prefix = MPID_CONTEXT_READ_FIELD(PREFIX,context_id);
1396 idx = raw_prefix / MPIR_CONTEXT_INT_BITS;
1397 bitpos = raw_prefix % MPIR_CONTEXT_INT_BITS;
1398
1399 /* --BEGIN ERROR HANDLING-- */
1400 if (idx < 0 || idx >= MPIR_MAX_CONTEXT_MASK) {
1401 MPID_Abort( 0, MPI_ERR_INTERN, 1,
1402 "In MPIR_Free_contextid, idx is out of range" );
1403 }
1404 /* --END ERROR HANDLING-- */
1405
1406 /* The low order bits for dynamic context IDs don't have meaning the
1407 * same way that low bits of non-dynamic ctx IDs do. So we have to
1408 * check the dynamic case first. */
1409 if (MPID_CONTEXT_READ_FIELD(DYNAMIC_PROC, context_id)) {
1410 MPIU_DBG_MSG_D(COMM,VERBOSE,"skipping dynamic process ctx id, context_id=%d", context_id);
1411 goto fn_exit;
1412 }
1413 else { /* non-dynamic context ID */
1414 /* In terms of the context ID bit vector, intercomms and their constituent
1415 * localcomms have the same value. To avoid a double-free situation we just
1416 * don't free the context ID for localcomms and assume it will be cleaned up
1417 * when the parent intercomm is itself completely freed. */
1418 if (MPID_CONTEXT_READ_FIELD(IS_LOCALCOMM, context_id)) {
1419 #ifdef USE_DBG_LOGGING
1420 char dump_str[1024];
1421 MPIR_Comm_dump_context_id(context_id, dump_str, sizeof(dump_str));
1422 MPIU_DBG_MSG_S(COMM,VERBOSE,"skipping localcomm id: %s", dump_str);
1423 #endif
1424 goto fn_exit;
1425 }
1426 else if (MPID_CONTEXT_READ_FIELD(SUBCOMM, context_id)) {
1427 MPIU_DBG_MSG_D(COMM,VERBOSE,"skipping non-parent communicator ctx id, context_id=%d", context_id);
1428 goto fn_exit;
1429 }
1430 }
1431
1432 /* --BEGIN ERROR HANDLING-- */
1433 /* Check that this context id has been allocated */
1434 if ( (context_mask[idx] & (0x1 << bitpos)) != 0 ) {
1435 #ifdef USE_DBG_LOGGING
1436 char dump_str[1024];
1437 MPIR_Comm_dump_context_id(context_id, dump_str, sizeof(dump_str));
1438 MPIU_DBG_MSG_S(COMM,VERBOSE,"context dump: %s", dump_str);
1439 MPIU_DBG_MSG_S(COMM,VERBOSE,"context mask = %s",MPIR_ContextMaskToStr());
1440 #endif
1441 MPID_Abort( 0, MPI_ERR_INTERN, 1,
1442 "In MPIR_Free_contextid, the context id is not in use" );
1443 }
1444 /* --END ERROR HANDLING-- */
1445
1446 MPIU_THREAD_CS_ENTER(CONTEXTID,);
1447 /* MT: Note that this update must be done atomically in the multithreaedd
1448 case. In the "one, single lock" implementation, that lock is indeed
1449 held when this operation is called. */
1450 context_mask[idx] |= (0x1 << bitpos);
1451 MPIU_THREAD_CS_EXIT(CONTEXTID,);
1452
1453 MPIU_DBG_MSG_FMT(COMM,VERBOSE,
1454 (MPIU_DBG_FDEST,
1455 "Freed context %d, mask[%d] bit %d (prefix=%#x)",
1456 context_id, idx, bitpos, raw_prefix));
1457 fn_exit:
1458 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_FREE_CONTEXTID);
1459 }
1460
1461 /*
1462 * Copy a communicator, including creating a new context and copying the
1463 * virtual connection tables and clearing the various fields.
1464 * Does *not* copy attributes. If size is < the size of the local group
1465 * in the input communicator, copy only the first size elements.
1466 * If this process is not a member, return a null pointer in outcomm_ptr.
1467 * This is only supported in the case where the communicator is in
1468 * Intracomm (not an Intercomm). Note that this is all that is required
1469 * for cart_create and graph_create.
1470 *
1471 * Used by cart_create, graph_create, and dup_create
1472 */
1473 #undef FUNCNAME
1474 #define FUNCNAME MPIR_Comm_copy
1475 #undef FCNAME
1476 #define FCNAME "MPIR_Comm_copy"
MPIR_Comm_copy(MPID_Comm * comm_ptr,int size,MPID_Comm ** outcomm_ptr)1477 int MPIR_Comm_copy( MPID_Comm *comm_ptr, int size, MPID_Comm **outcomm_ptr )
1478 {
1479 int mpi_errno = MPI_SUCCESS;
1480 MPIR_Context_id_t new_context_id, new_recvcontext_id;
1481 MPID_Comm *newcomm_ptr = NULL;
1482 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COPY);
1483
1484 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COPY);
1485
1486 /* Get a new context first. We need this to be collective over the
1487 input communicator */
1488 /* If there is a context id cache in oldcomm, use it here. Otherwise,
1489 use the appropriate algorithm to get a new context id. Be careful
1490 of intercomms here */
1491 if (comm_ptr->comm_kind == MPID_INTERCOMM) {
1492 mpi_errno =
1493 MPIR_Get_intercomm_contextid(
1494 comm_ptr, &new_context_id, &new_recvcontext_id );
1495 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1496 }
1497 else {
1498 mpi_errno = MPIR_Get_contextid( comm_ptr, &new_context_id );
1499 new_recvcontext_id = new_context_id;
1500 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1501 MPIU_Assert(new_context_id != 0);
1502 }
1503 /* --BEGIN ERROR HANDLING-- */
1504 if (new_context_id == 0) {
1505 MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**toomanycomm" );
1506 }
1507 /* --END ERROR HANDLING-- */
1508
1509 /* This is the local size, not the remote size, in the case of
1510 an intercomm */
1511 if (comm_ptr->rank >= size) {
1512 *outcomm_ptr = 0;
1513 /* always free the recvcontext ID, never the "send" ID */
1514 MPIR_Free_contextid(new_recvcontext_id);
1515 goto fn_exit;
1516 }
1517
1518 /* We're left with the processes that will have a non-null communicator.
1519 Create the object, initialize the data, and return the result */
1520
1521 mpi_errno = MPIR_Comm_create( &newcomm_ptr );
1522 if (mpi_errno) goto fn_fail;
1523
1524 newcomm_ptr->context_id = new_context_id;
1525 newcomm_ptr->recvcontext_id = new_recvcontext_id;
1526
1527 /* Save the kind of the communicator */
1528 newcomm_ptr->comm_kind = comm_ptr->comm_kind;
1529 newcomm_ptr->local_comm = 0;
1530
1531 /* There are two cases here - size is the same as the old communicator,
1532 or it is smaller. If the size is the same, we can just add a reference.
1533 Otherwise, we need to create a new VCRT. Note that this is the
1534 test that matches the test on rank above. */
1535 if (size == comm_ptr->local_size) {
1536 /* Duplicate the VCRT references */
1537 MPID_VCRT_Add_ref( comm_ptr->vcrt );
1538 newcomm_ptr->vcrt = comm_ptr->vcrt;
1539 newcomm_ptr->vcr = comm_ptr->vcr;
1540 }
1541 else {
1542 int i;
1543 /* The "remote" vcr gets the shortened vcrt */
1544 MPID_VCRT_Create( size, &newcomm_ptr->vcrt );
1545 MPID_VCRT_Get_ptr( newcomm_ptr->vcrt,
1546 &newcomm_ptr->vcr );
1547 for (i=0; i<size; i++) {
1548 /* For rank i in the new communicator, find the corresponding
1549 rank in the input communicator */
1550 MPID_VCR_Dup( comm_ptr->vcr[i], &newcomm_ptr->vcr[i] );
1551 }
1552 }
1553
1554 /* If it is an intercomm, duplicate the local vcrt references */
1555 if (comm_ptr->comm_kind == MPID_INTERCOMM) {
1556 MPID_VCRT_Add_ref( comm_ptr->local_vcrt );
1557 newcomm_ptr->local_vcrt = comm_ptr->local_vcrt;
1558 newcomm_ptr->local_vcr = comm_ptr->local_vcr;
1559 }
1560
1561 /* Set the sizes and ranks */
1562 newcomm_ptr->rank = comm_ptr->rank;
1563 if (comm_ptr->comm_kind == MPID_INTERCOMM) {
1564 newcomm_ptr->local_size = comm_ptr->local_size;
1565 newcomm_ptr->remote_size = comm_ptr->remote_size;
1566 newcomm_ptr->is_low_group = comm_ptr->is_low_group;
1567 }
1568 else {
1569 newcomm_ptr->local_size = size;
1570 newcomm_ptr->remote_size = size;
1571 }
1572
1573 /* Inherit the error handler (if any) */
1574 MPIU_THREAD_CS_ENTER(MPI_OBJ, comm_ptr);
1575 newcomm_ptr->errhandler = comm_ptr->errhandler;
1576 if (comm_ptr->errhandler) {
1577 MPIR_Errhandler_add_ref( comm_ptr->errhandler );
1578 }
1579 MPIU_THREAD_CS_EXIT(MPI_OBJ, comm_ptr);
1580
1581 /* FIXME do we want to copy coll_fns here? */
1582
1583 mpi_errno = MPIR_Comm_commit(newcomm_ptr);
1584 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1585
1586 /* Start with no attributes on this communicator */
1587 newcomm_ptr->attributes = 0;
1588 *outcomm_ptr = newcomm_ptr;
1589
1590 fn_fail:
1591 fn_exit:
1592
1593 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COPY);
1594
1595 return mpi_errno;
1596 }
1597
1598 /* Copy a communicator, including copying the virtual connection tables and
1599 * clearing the various fields. Does *not* allocate a context ID or commit the
1600 * communicator. Does *not* copy attributes.
1601 *
1602 * Used by comm_idup.
1603 */
1604 #undef FUNCNAME
1605 #define FUNCNAME MPIR_Comm_copy_data
1606 #undef FCNAME
1607 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Comm_copy_data(MPID_Comm * comm_ptr,MPID_Comm ** outcomm_ptr)1608 int MPIR_Comm_copy_data(MPID_Comm *comm_ptr, MPID_Comm **outcomm_ptr)
1609 {
1610 int mpi_errno = MPI_SUCCESS;
1611 MPID_Comm *newcomm_ptr = NULL;
1612 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COPY_DATA);
1613
1614 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COPY_DATA);
1615
1616 mpi_errno = MPIR_Comm_create(&newcomm_ptr);
1617 if (mpi_errno) goto fn_fail;
1618
1619 /* use a large garbage value to ensure errors are caught more easily */
1620 newcomm_ptr->context_id = 32767;
1621 newcomm_ptr->recvcontext_id = 32767;
1622
1623 /* Save the kind of the communicator */
1624 newcomm_ptr->comm_kind = comm_ptr->comm_kind;
1625 newcomm_ptr->local_comm = 0;
1626
1627 /* Duplicate the VCRT references */
1628 MPID_VCRT_Add_ref(comm_ptr->vcrt);
1629 newcomm_ptr->vcrt = comm_ptr->vcrt;
1630 newcomm_ptr->vcr = comm_ptr->vcr;
1631
1632 /* If it is an intercomm, duplicate the local vcrt references */
1633 if (comm_ptr->comm_kind == MPID_INTERCOMM) {
1634 MPID_VCRT_Add_ref(comm_ptr->local_vcrt);
1635 newcomm_ptr->local_vcrt = comm_ptr->local_vcrt;
1636 newcomm_ptr->local_vcr = comm_ptr->local_vcr;
1637 }
1638
1639 /* Set the sizes and ranks */
1640 newcomm_ptr->rank = comm_ptr->rank;
1641 newcomm_ptr->local_size = comm_ptr->local_size;
1642 newcomm_ptr->remote_size = comm_ptr->remote_size;
1643 newcomm_ptr->is_low_group = comm_ptr->is_low_group; /* only relevant for intercomms */
1644
1645 /* Inherit the error handler (if any) */
1646 MPIU_THREAD_CS_ENTER(MPI_OBJ, comm_ptr);
1647 newcomm_ptr->errhandler = comm_ptr->errhandler;
1648 if (comm_ptr->errhandler) {
1649 MPIR_Errhandler_add_ref(comm_ptr->errhandler);
1650 }
1651 MPIU_THREAD_CS_EXIT(MPI_OBJ, comm_ptr);
1652
1653 /* FIXME do we want to copy coll_fns here? */
1654
1655 mpi_errno = MPIR_Comm_commit(newcomm_ptr);
1656 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1657
1658 /* Start with no attributes on this communicator */
1659 newcomm_ptr->attributes = 0;
1660 *outcomm_ptr = newcomm_ptr;
1661
1662 fn_fail:
1663 fn_exit:
1664 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COPY_DATA);
1665 return mpi_errno;
1666 }
1667 /* Common body between MPIR_Comm_release and MPIR_comm_release_always. This
1668 * helper function frees the actual MPID_Comm structure and any associated
1669 * storage. It also releases any refernces to other objects, such as the VCRT.
1670 * This function should only be called when the communicator's reference count
1671 * has dropped to 0.
1672 *
1673 * !!! This routine should *never* be called outside of MPIR_Comm_release{,_always} !!!
1674 */
1675 #undef FUNCNAME
1676 #define FUNCNAME MPIR_Comm_delete_internal
1677 #undef FCNAME
1678 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Comm_delete_internal(MPID_Comm * comm_ptr,int isDisconnect)1679 int MPIR_Comm_delete_internal(MPID_Comm * comm_ptr, int isDisconnect)
1680 {
1681 int in_use;
1682 int mpi_errno = MPI_SUCCESS;
1683 MPID_MPI_STATE_DECL(MPID_STATE_COMM_DELETE_INTERNAL);
1684
1685 MPID_MPI_FUNC_ENTER(MPID_STATE_COMM_DELETE_INTERNAL);
1686
1687 MPIU_Assert(MPIU_Object_get_ref(comm_ptr) == 0); /* sanity check */
1688
1689 /* Remove the attributes, executing the attribute delete routine.
1690 Do this only if the attribute functions are defined.
1691 This must be done first, because if freeing the attributes
1692 returns an error, the communicator is not freed */
1693 if (MPIR_Process.attr_free && comm_ptr->attributes) {
1694 /* Temporarily add a reference to this communicator because
1695 the attr_free code requires a valid communicator */
1696 MPIU_Object_add_ref( comm_ptr );
1697 mpi_errno = MPIR_Process.attr_free( comm_ptr->handle,
1698 &comm_ptr->attributes );
1699 /* Release the temporary reference added before the call to
1700 attr_free */
1701 MPIU_Object_release_ref( comm_ptr, &in_use);
1702 }
1703
1704 /* If the attribute delete functions return failure, the
1705 communicator must not be freed. That is the reason for the
1706 test on mpi_errno here. */
1707 if (mpi_errno == MPI_SUCCESS) {
1708 /* If this communicator is our parent, and we're disconnecting
1709 from the parent, mark that fact */
1710 if (MPIR_Process.comm_parent == comm_ptr)
1711 MPIR_Process.comm_parent = NULL;
1712
1713 /* Notify the device that the communicator is about to be
1714 destroyed */
1715 mpi_errno = MPID_Dev_comm_destroy_hook(comm_ptr);
1716 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1717
1718 /* release our reference to the collops structure, comes after the
1719 * destroy_hook to allow the device to manage these vtables in a custom
1720 * fashion */
1721 if (comm_ptr->coll_fns && --comm_ptr->coll_fns->ref_count == 0) {
1722 MPIU_Free(comm_ptr->coll_fns);
1723 comm_ptr->coll_fns = NULL;
1724 }
1725
1726 /* Free the VCRT */
1727 mpi_errno = MPID_VCRT_Release(comm_ptr->vcrt, isDisconnect);
1728 if (mpi_errno != MPI_SUCCESS) {
1729 MPIU_ERR_POP(mpi_errno);
1730 }
1731 if (comm_ptr->comm_kind == MPID_INTERCOMM) {
1732 mpi_errno = MPID_VCRT_Release(
1733 comm_ptr->local_vcrt, isDisconnect);
1734 if (mpi_errno != MPI_SUCCESS) {
1735 MPIU_ERR_POP(mpi_errno);
1736 }
1737 if (comm_ptr->local_comm)
1738 MPIR_Comm_release(comm_ptr->local_comm, isDisconnect );
1739 }
1740
1741 /* Free the local and remote groups, if they exist */
1742 if (comm_ptr->local_group)
1743 MPIR_Group_release(comm_ptr->local_group);
1744 if (comm_ptr->remote_group)
1745 MPIR_Group_release(comm_ptr->remote_group);
1746
1747 /* free the intra/inter-node communicators, if they exist */
1748 if (comm_ptr->node_comm)
1749 MPIR_Comm_release(comm_ptr->node_comm, isDisconnect);
1750 if (comm_ptr->node_roots_comm)
1751 MPIR_Comm_release(comm_ptr->node_roots_comm, isDisconnect);
1752 if (comm_ptr->intranode_table != NULL)
1753 MPIU_Free(comm_ptr->intranode_table);
1754 if (comm_ptr->internode_table != NULL)
1755 MPIU_Free(comm_ptr->internode_table);
1756
1757 /* Free the context value. This should come after freeing the
1758 * intra/inter-node communicators since those free calls won't
1759 * release this context ID and releasing this before then could lead
1760 * to races once we make threading finer grained. */
1761 /* This must be the recvcontext_id (i.e. not the (send)context_id)
1762 * because in the case of intercommunicators the send context ID is
1763 * allocated out of the remote group's bit vector, not ours. */
1764 MPIR_Free_contextid( comm_ptr->recvcontext_id );
1765
1766 /* We need to release the error handler */
1767 /* no MPI_OBJ CS needed */
1768 if (comm_ptr->errhandler &&
1769 ! (HANDLE_GET_KIND(comm_ptr->errhandler->handle) ==
1770 HANDLE_KIND_BUILTIN) ) {
1771 int errhInuse;
1772 MPIR_Errhandler_release_ref( comm_ptr->errhandler,&errhInuse);
1773 if (!errhInuse) {
1774 MPIU_Handle_obj_free( &MPID_Errhandler_mem,
1775 comm_ptr->errhandler );
1776 }
1777 }
1778
1779 /* Remove from the list of active communicators if
1780 we are supporting message-queue debugging. We make this
1781 conditional on having debugger support since the
1782 operation is not constant-time */
1783 MPIR_COMML_FORGET( comm_ptr );
1784
1785 /* Check for predefined communicators - these should not
1786 be freed */
1787 if (! (HANDLE_GET_KIND(comm_ptr->handle) == HANDLE_KIND_BUILTIN) )
1788 MPIU_Handle_obj_free( &MPID_Comm_mem, comm_ptr );
1789 }
1790 else {
1791 /* If the user attribute free function returns an error,
1792 then do not free the communicator */
1793 MPIR_Comm_add_ref( comm_ptr );
1794 }
1795
1796 fn_exit:
1797 MPID_MPI_FUNC_EXIT(MPID_STATE_COMM_DELETE_INTERNAL);
1798 return mpi_errno;
1799 fn_fail:
1800 goto fn_exit;
1801 }
1802
1803 /* Release a reference to a communicator. If there are no pending
1804 references, delete the communicator and recover all storage and
1805 context ids. This version of the function always manipulates the reference
1806 counts, even for predefined objects. */
1807 #undef FUNCNAME
1808 #define FUNCNAME MPIR_Comm_release_always
1809 #undef FCNAME
1810 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Comm_release_always(MPID_Comm * comm_ptr,int isDisconnect)1811 int MPIR_Comm_release_always(MPID_Comm *comm_ptr, int isDisconnect)
1812 {
1813 int mpi_errno = MPI_SUCCESS;
1814 int in_use;
1815 MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
1816
1817 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
1818
1819 /* we want to short-circuit any optimization that avoids reference counting
1820 * predefined communicators, such as MPI_COMM_WORLD or MPI_COMM_SELF. */
1821 MPIU_Object_release_ref_always(comm_ptr, &in_use);
1822 if (!in_use) {
1823 mpi_errno = MPIR_Comm_delete_internal(comm_ptr, isDisconnect);
1824 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1825 }
1826
1827 fn_exit:
1828 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
1829 return mpi_errno;
1830 fn_fail:
1831 goto fn_exit;
1832 }
1833
1834