1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3  *  (C) 2011 by Argonne National Laboratory.
4  *      See COPYRIGHT in top-level directory.
5  */
6 
7 #include "mpidimpl.h"
8 #include "mpl_utlist.h"
9 
10 static int register_hook_finalize(void *param);
11 static int comm_created(MPID_Comm *comm, void *param);
12 static int comm_destroyed(MPID_Comm *comm, void *param);
13 
14 /* macros and head for list of communicators */
15 #define COMM_ADD(comm) MPL_DL_PREPEND_NP(comm_list, comm, ch.next, ch.prev)
16 #define COMM_DEL(comm) MPL_DL_DELETE_NP(comm_list, comm, ch.next, ch.prev)
17 #define COMM_FOREACH(elt) MPL_DL_FOREACH_NP(comm_list, elt, ch.next, ch.prev)
18 static MPID_Comm *comm_list = NULL;
19 
20 typedef struct hook_elt
21 {
22     int (*hook_fn)(struct MPID_Comm *, void *);
23     void *param;
24     struct hook_elt *prev;
25     struct hook_elt *next;
26 } hook_elt;
27 
28 static hook_elt *create_hooks = NULL;
29 static hook_elt *destroy_hooks = NULL;
30 
31 #undef FUNCNAME
32 #define FUNCNAME MPIDI_CH3U_Comm_init
33 #undef FCNAME
34 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDI_CH3I_Comm_init(void)35 int MPIDI_CH3I_Comm_init(void)
36 {
37     int mpi_errno = MPI_SUCCESS;
38     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_INIT);
39 
40     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_INIT);
41 
42     MPIR_Add_finalize(register_hook_finalize, NULL, MPIR_FINALIZE_CALLBACK_PRIO-1);
43 
44     /* register hooks for keeping track of communicators */
45     mpi_errno = MPIDI_CH3U_Comm_register_create_hook(comm_created, NULL);
46     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
47     mpi_errno = MPIDI_CH3U_Comm_register_destroy_hook(comm_destroyed, NULL);
48     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
49 
50  fn_exit:
51     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_INIT);
52     return mpi_errno;
53  fn_fail:
54     goto fn_exit;
55 }
56 
57 
58 #undef FUNCNAME
59 #define FUNCNAME MPIDI_CH3U_Comm_create_hook
60 #undef FCNAME
61 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDI_CH3I_Comm_create_hook(MPID_Comm * comm)62 int MPIDI_CH3I_Comm_create_hook(MPID_Comm *comm)
63 {
64     int mpi_errno = MPI_SUCCESS;
65     hook_elt *elt;
66 
67     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
68 
69     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
70 
71     MPL_LL_FOREACH(create_hooks, elt) {
72         mpi_errno = elt->hook_fn(comm, elt->param);
73         if (mpi_errno) MPIU_ERR_POP(mpi_errno);;
74     }
75 
76  fn_exit:
77     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
78     return mpi_errno;
79  fn_fail:
80     goto fn_exit;
81 }
82 
83 #undef FUNCNAME
84 #define FUNCNAME MPIDI_CH3U_Comm_destroy_hook
85 #undef FCNAME
86 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDI_CH3I_Comm_destroy_hook(MPID_Comm * comm)87 int MPIDI_CH3I_Comm_destroy_hook(MPID_Comm *comm)
88 {
89     int mpi_errno = MPI_SUCCESS;
90     hook_elt *elt;
91     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
92 
93     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
94 
95     MPL_LL_FOREACH(destroy_hooks, elt) {
96         mpi_errno = elt->hook_fn(comm, elt->param);
97         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
98     }
99 
100  fn_exit:
101     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
102     return mpi_errno;
103  fn_fail:
104     goto fn_exit;
105 }
106 
107 
108 #undef FUNCNAME
109 #define FUNCNAME MPIDI_CH3U_Comm_register_create_hook
110 #undef FCNAME
111 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDI_CH3U_Comm_register_create_hook(int (* hook_fn)(struct MPID_Comm *,void *),void * param)112 int MPIDI_CH3U_Comm_register_create_hook(int (*hook_fn)(struct MPID_Comm *, void *), void *param)
113 {
114     int mpi_errno = MPI_SUCCESS;
115     hook_elt *elt;
116     MPIU_CHKPMEM_DECL(1);
117     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
118 
119     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
120 
121     MPIU_CHKPMEM_MALLOC(elt, hook_elt *, sizeof(hook_elt), mpi_errno, "hook_elt");
122 
123     elt->hook_fn = hook_fn;
124     elt->param = param;
125 
126     MPL_LL_PREPEND(create_hooks, elt);
127 
128  fn_exit:
129     MPIU_CHKPMEM_COMMIT();
130     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
131     return mpi_errno;
132  fn_fail:
133     MPIU_CHKPMEM_REAP();
134     goto fn_exit;
135 }
136 
137 #undef FUNCNAME
138 #define FUNCNAME MPIDI_CH3U_Comm_register_destroy_hook
139 #undef FCNAME
140 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDI_CH3U_Comm_register_destroy_hook(int (* hook_fn)(struct MPID_Comm *,void *),void * param)141 int MPIDI_CH3U_Comm_register_destroy_hook(int (*hook_fn)(struct MPID_Comm *, void *), void *param)
142 {
143     int mpi_errno = MPI_SUCCESS;
144     hook_elt *elt;
145     MPIU_CHKPMEM_DECL(1);
146     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
147 
148     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
149 
150     MPIU_CHKPMEM_MALLOC(elt, hook_elt *, sizeof(hook_elt), mpi_errno, "hook_elt");
151 
152     elt->hook_fn = hook_fn;
153     elt->param = param;
154 
155     MPL_LL_PREPEND(destroy_hooks, elt);
156 
157  fn_exit:
158     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
159     return mpi_errno;
160  fn_fail:
161     MPIU_CHKPMEM_REAP();
162     goto fn_exit;
163 }
164 
165 #undef FUNCNAME
166 #define FUNCNAME register_hook_finalize
167 #undef FCNAME
168 #define FCNAME MPIU_QUOTE(FUNCNAME)
register_hook_finalize(void * param)169 static int register_hook_finalize(void *param)
170 {
171     int mpi_errno = MPI_SUCCESS;
172     hook_elt *elt, *tmp;
173     MPIDI_STATE_DECL(MPID_STATE_REGISTER_HOOK_FINALIZE);
174 
175     MPIDI_FUNC_ENTER(MPID_STATE_REGISTER_HOOK_FINALIZE);
176 
177     MPL_LL_FOREACH_SAFE(create_hooks, elt, tmp) {
178         MPL_LL_DELETE(create_hooks, elt);
179         MPIU_Free(elt);
180     }
181 
182     MPL_LL_FOREACH_SAFE(destroy_hooks, elt, tmp) {
183         MPL_LL_DELETE(destroy_hooks, elt);
184         MPIU_Free(elt);
185     }
186 
187  fn_exit:
188     MPIDI_FUNC_EXIT(MPID_STATE_REGISTER_HOOK_FINALIZE);
189     return mpi_errno;
190  fn_fail:
191     goto fn_exit;
192 }
193 
194 
195 #undef FUNCNAME
196 #define FUNCNAME comm_created
197 #undef FCNAME
198 #define FCNAME MPIU_QUOTE(FUNCNAME)
comm_created(MPID_Comm * comm,void * param)199 int comm_created(MPID_Comm *comm, void *param)
200 {
201     int mpi_errno = MPI_SUCCESS;
202     MPIDI_STATE_DECL(MPID_STATE_COMM_CREATED);
203 
204     MPIDI_FUNC_ENTER(MPID_STATE_COMM_CREATED);
205 
206     comm->ch.coll_active = TRUE;
207     comm->ch.anysource_enabled = TRUE;
208 
209     COMM_ADD(comm);
210 
211  fn_exit:
212     MPIDI_FUNC_EXIT(MPID_STATE_COMM_CREATED);
213     return mpi_errno;
214  fn_fail:
215     goto fn_exit;
216 }
217 
218 #undef FUNCNAME
219 #define FUNCNAME comm_destroyed
220 #undef FCNAME
221 #define FCNAME MPIU_QUOTE(FUNCNAME)
comm_destroyed(MPID_Comm * comm,void * param)222 int comm_destroyed(MPID_Comm *comm, void *param)
223 {
224     int mpi_errno = MPI_SUCCESS;
225     MPIDI_STATE_DECL(MPID_STATE_COMM_DESTROYED);
226 
227     MPIDI_FUNC_ENTER(MPID_STATE_COMM_DESTROYED);
228 
229     COMM_DEL(comm);
230     comm->ch.next = NULL;
231     comm->ch.prev = NULL;
232 
233  fn_exit:
234     MPIDI_FUNC_EXIT(MPID_STATE_COMM_DESTROYED);
235     return mpi_errno;
236  fn_fail:
237     goto fn_exit;
238 }
239 
240 
241 /* flag==TRUE iff a member of group is also a member of comm */
242 #undef FUNCNAME
243 #define FUNCNAME nonempty_intersection
244 #undef FCNAME
245 #define FCNAME MPIU_QUOTE(FUNCNAME)
nonempty_intersection(MPID_Comm * comm,MPID_Group * group,int * flag)246 static int nonempty_intersection(MPID_Comm *comm, MPID_Group *group, int *flag)
247 {
248     int mpi_errno = MPI_SUCCESS;
249     int i_g, i_c;
250     MPIDI_VC_t *vc_g, *vc_c;
251     MPIDI_STATE_DECL(MPID_STATE_NONEMPTY_INTERSECTION);
252 
253     MPIDI_FUNC_ENTER(MPID_STATE_NONEMPTY_INTERSECTION);
254 
255     /* handle common case fast */
256     if (comm == MPIR_Process.comm_world || comm == MPIR_Process.icomm_world) {
257         *flag = TRUE;
258         MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "comm is comm_world or icomm_world");
259         goto fn_exit;
260     }
261     *flag = FALSE;
262 
263     /* FIXME: This algorithm assumes that the number of processes in group is
264        very small (like 1).  So doing a linear search for them in comm is better
265        than sorting the procs in comm and group then doing a binary search */
266 
267     for (i_g = 0; i_g < group->size; ++i_g) {
268         /* FIXME: This won't work for dynamic procs */
269         MPIDI_PG_Get_vc(MPIDI_Process.my_pg, group->lrank_to_lpid[i_g].lpid, &vc_g);
270         for (i_c = 0; i_c < comm->remote_size; ++i_c) {
271             MPIDI_Comm_get_vc(comm, i_c, &vc_c);
272             if (vc_g == vc_c) {
273                 *flag = TRUE;
274                 goto fn_exit;
275             }
276         }
277     }
278 
279  fn_exit:
280     MPIDI_FUNC_EXIT(MPID_STATE_NONEMPTY_INTERSECTION);
281     return mpi_errno;
282  fn_fail:
283     goto fn_exit;
284 }
285 
286 
287 #undef FUNCNAME
288 #define FUNCNAME MPIDI_CH3I_Comm_handle_failed_procs
289 #undef FCNAME
290 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDI_CH3I_Comm_handle_failed_procs(MPID_Group * new_failed_procs)291 int MPIDI_CH3I_Comm_handle_failed_procs(MPID_Group *new_failed_procs)
292 {
293     int mpi_errno = MPI_SUCCESS;
294     MPID_Comm *comm;
295     int flag = FALSE;
296     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
297 
298     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
299 
300     /* mark communicators with new failed processes as collectively inactive and
301        disable posting anysource receives */
302     COMM_FOREACH(comm) {
303         /* if this comm is already collectively inactive and
304            anysources are disabled, there's no need to check */
305         if (!comm->ch.coll_active && !comm->ch.anysource_enabled)
306             continue;
307 
308         mpi_errno = nonempty_intersection(comm, new_failed_procs, &flag);
309         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
310 
311         if (flag) {
312             MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
313                              (MPIU_DBG_FDEST, "disabling AS and coll on communicator %p (%#08x)",
314                               comm, comm->handle));
315             comm->ch.coll_active = FALSE;
316             comm->ch.anysource_enabled = FALSE;
317         }
318     }
319 
320     /* Now that we've marked communicators with disable anysource, we
321        complete-with-an-error all anysource receives posted on those
322        communicators */
323     mpi_errno = MPIDI_CH3U_Complete_disabled_anysources();
324     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
325 
326  fn_exit:
327     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
328     return mpi_errno;
329  fn_fail:
330     goto fn_exit;
331 }
332