1 /*
2 * Copyright (C) 2010. See COPYRIGHT in top-level directory.
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7
8 #include <armci.h>
9 #include <armcix.h>
10 #include <armci_internals.h>
11 #include <debug.h>
12
13
14 /** The ARMCI world group. This is accessed from outside via
15 * ARMCI_Group_get_world.
16 */
17 ARMCI_Group ARMCI_GROUP_WORLD = {0};
18 ARMCI_Group ARMCI_GROUP_DEFAULT = {0};
19
20
21 /** Initialize an ARMCI group's remaining fields using the communicator field.
22 */
ARMCII_Group_init_from_comm(ARMCI_Group * group)23 void ARMCII_Group_init_from_comm(ARMCI_Group *group) {
24 if (group->comm != MPI_COMM_NULL) {
25 MPI_Comm_size(group->comm, &group->size);
26 MPI_Comm_rank(group->comm, &group->rank);
27
28 } else {
29 group->rank = -1;
30 group->size = 0;
31 }
32
33 /* If noncollective groups are in use, create a separate communicator that
34 can be used for noncollective group creation with this group as the parent.
35 This ensures that calls to MPI_Intercomm_create can't clash with any user
36 communication. */
37
38 if (ARMCII_GLOBAL_STATE.noncollective_groups && group->comm != MPI_COMM_NULL)
39 MPI_Comm_dup(group->comm, &group->noncoll_pgroup_comm);
40 else
41 group->noncoll_pgroup_comm = MPI_COMM_NULL;
42
43 /* Check if translation caching is enabled */
44 if (ARMCII_GLOBAL_STATE.cache_rank_translation) {
45 if (group->comm != MPI_COMM_NULL) {
46 int *ranks, i;
47 MPI_Group world_group, sub_group;
48
49 group->abs_to_grp = malloc(sizeof(int)*ARMCI_GROUP_WORLD.size);
50 group->grp_to_abs = malloc(sizeof(int)*group->size);
51 ranks = malloc(sizeof(int)*ARMCI_GROUP_WORLD.size);
52
53 ARMCII_Assert(group->abs_to_grp != NULL && group->grp_to_abs != NULL && ranks != NULL);
54
55 for (i = 0; i < ARMCI_GROUP_WORLD.size; i++)
56 ranks[i] = i;
57
58 MPI_Comm_group(ARMCI_GROUP_WORLD.comm, &world_group);
59 MPI_Comm_group(group->comm, &sub_group);
60
61 MPI_Group_translate_ranks(sub_group, group->size, ranks, world_group, group->grp_to_abs);
62 MPI_Group_translate_ranks(world_group, ARMCI_GROUP_WORLD.size, ranks, sub_group, group->abs_to_grp);
63
64 MPI_Group_free(&world_group);
65 MPI_Group_free(&sub_group);
66
67 free(ranks);
68 }
69 }
70
71 /* Translation caching is disabled */
72 else {
73 group->abs_to_grp = NULL;
74 group->grp_to_abs = NULL;
75 }
76 }
77
78
79 /** Create an ARMCI group that contains a subset of the nodes in the current
80 * default group. Collective across the default group.
81 *
82 * @param[in] grp_size Number of entries in pid_list.
83 * @param[in] pid_list List of process ids that will be in the new group.
84 * @param[out] armci_grp_out The new ARMCI group.
85 * @param[in] armci_grp_parent The parent of the new ARMCI group.
86 */
ARMCI_Group_create(int grp_size,int * pid_list,ARMCI_Group * group_out)87 void ARMCI_Group_create(int grp_size, int *pid_list, ARMCI_Group *group_out) {
88 ARMCI_Group_create_child(grp_size, pid_list, group_out, &ARMCI_GROUP_DEFAULT);
89 }
90
91
92 /** Create an ARMCI group that contains a subset of the nodes in the parent
93 * group. Collective across output group.
94 *
95 * @param[in] grp_size Number of entries in pid_list.
96 * @param[in] pid_list List of process ids that will be in the new group.
97 * @param[out] armci_grp_out The new ARMCI group, only valid on group members.
98 * @param[in] armci_grp_parent The parent of the new ARMCI group.
99 */
ARMCI_Group_create_comm_collective(int grp_size,int * pid_list,ARMCI_Group * armci_grp_out,ARMCI_Group * armci_grp_parent)100 static inline void ARMCI_Group_create_comm_collective(int grp_size, int *pid_list, ARMCI_Group *armci_grp_out,
101 ARMCI_Group *armci_grp_parent) {
102
103 MPI_Group mpi_grp_parent;
104 MPI_Group mpi_grp_child;
105
106 MPI_Comm_group(armci_grp_parent->comm, &mpi_grp_parent);
107 MPI_Group_incl(mpi_grp_parent, grp_size, pid_list, &mpi_grp_child);
108
109 MPI_Comm_create(armci_grp_parent->comm, mpi_grp_child, &armci_grp_out->comm);
110
111 MPI_Group_free(&mpi_grp_parent);
112 MPI_Group_free(&mpi_grp_child);
113 }
114
115
116 /** Create an ARMCI group that contains a subset of the nodes in the parent
117 * group. Collective across output group.
118 *
119 * @param[in] grp_size Number of entries in pid_list.
120 * @param[in] pid_list Sorted list of process ids that will be in the new group.
121 * @param[out] armci_grp_out The new ARMCI group, only valid on group members.
122 * @param[in] armci_grp_parent The parent of the new ARMCI group.
123 */
ARMCI_Group_create_comm_noncollective(int grp_size,int * pid_list,ARMCI_Group * armci_grp_out,ARMCI_Group * armci_grp_parent)124 static inline void ARMCI_Group_create_comm_noncollective(int grp_size, int *pid_list, ARMCI_Group *armci_grp_out,
125 ARMCI_Group *armci_grp_parent) {
126
127 const int INTERCOMM_TAG = 42;
128 int i, grp_me, me, nproc, merge_size;
129 MPI_Comm pgroup, inter_pgroup;
130
131 me = armci_grp_parent->rank;
132 nproc = armci_grp_parent->size;
133
134 /* CHECK: If I'm not a member, return COMM_NULL */
135 grp_me = -1;
136 for (i = 0; i < grp_size; i++) {
137 if (pid_list[i] == me) {
138 grp_me = i;
139 break;
140 }
141 }
142
143 if (grp_me < 0) {
144 armci_grp_out->comm = MPI_COMM_NULL;
145 return;
146 }
147
148 /* CASE: Group size 1 */
149 else if (grp_size == 1 && pid_list[0] == me) {
150 MPI_Comm_dup(MPI_COMM_SELF, &armci_grp_out->comm);
151 return;
152 }
153
154 pgroup = MPI_COMM_SELF;
155
156 /* Recursively merge adjacent groups until only one group remains. */
157 for (merge_size = 1; merge_size < grp_size; merge_size *= 2) {
158 int gid = grp_me / merge_size;
159 MPI_Comm pgroup_old = pgroup;
160
161 if (gid % 2 == 0) {
162 /* Check if right partner doesn't exist */
163 if ((gid+1)*merge_size >= grp_size)
164 continue;
165
166 MPI_Intercomm_create(pgroup, 0, armci_grp_parent->noncoll_pgroup_comm, pid_list[(gid+1)*merge_size], INTERCOMM_TAG, &inter_pgroup);
167 MPI_Intercomm_merge(inter_pgroup, 0 /* LOW */, &pgroup);
168 } else {
169 MPI_Intercomm_create(pgroup, 0, armci_grp_parent->noncoll_pgroup_comm, pid_list[(gid-1)*merge_size], INTERCOMM_TAG, &inter_pgroup);
170 MPI_Intercomm_merge(inter_pgroup, 1 /* HIGH */, &pgroup);
171 }
172
173 MPI_Comm_free(&inter_pgroup);
174 if (pgroup_old != MPI_COMM_SELF) MPI_Comm_free(&pgroup_old);
175 }
176
177 armci_grp_out->comm = pgroup;
178 }
179
180
181 /** Create an ARMCI group that contains a subset of the nodes in the parent
182 * group. Collective.
183 *
184 * @param[in] grp_size Number of entries in pid_list.
185 * @param[in] pid_list Sorted list of process ids that will be in the new group.
186 * @param[out] armci_grp_out The new ARMCI group, only valid on group members.
187 * @param[in] armci_grp_parent The parent of the new ARMCI group.
188 */
ARMCI_Group_create_child(int grp_size,int * pid_list,ARMCI_Group * armci_grp_out,ARMCI_Group * armci_grp_parent)189 void ARMCI_Group_create_child(int grp_size, int *pid_list, ARMCI_Group *armci_grp_out,
190 ARMCI_Group *armci_grp_parent) {
191
192 if (ARMCII_GLOBAL_STATE.noncollective_groups)
193 ARMCI_Group_create_comm_noncollective(grp_size, pid_list, armci_grp_out, armci_grp_parent);
194 else
195 ARMCI_Group_create_comm_collective(grp_size, pid_list, armci_grp_out, armci_grp_parent);
196
197 ARMCII_Group_init_from_comm(armci_grp_out);
198 }
199
200
201 /** Free an ARMCI group. Collective across group.
202 *
203 * @param[in] group The group to be freed
204 */
ARMCI_Group_free(ARMCI_Group * group)205 void ARMCI_Group_free(ARMCI_Group *group) {
206 if (group->comm != MPI_COMM_NULL) {
207 MPI_Comm_free(&group->comm);
208
209 if (ARMCII_GLOBAL_STATE.noncollective_groups)
210 MPI_Comm_free(&group->noncoll_pgroup_comm);
211 }
212
213 /* If the group has translation caches, free them */
214 if (group->abs_to_grp != NULL)
215 free(group->abs_to_grp);
216 if (group->grp_to_abs != NULL)
217 free(group->grp_to_abs);
218
219 group->rank = -1;
220 group->size = 0;
221 }
222
223
224 /** Query the calling process' rank in a given group.
225 *
226 * @param[in] group Group to query on.
227 * @param[out] rank Location to store the rank.
228 * @return Zero on success, error code otherwise.
229 */
ARMCI_Group_rank(ARMCI_Group * group,int * rank)230 int ARMCI_Group_rank(ARMCI_Group *group, int *rank) {
231 *rank = group->rank;
232
233 if (*rank >= 0)
234 return 0;
235 else
236 return 1;
237 }
238
239
240 /** Query the size of the given group.
241 *
242 * @param[in] group Group to query.
243 * @param[out] size Variable to store the size in.
244 */
ARMCI_Group_size(ARMCI_Group * group,int * size)245 void ARMCI_Group_size(ARMCI_Group *group, int *size) {
246 *size = group->size;
247 }
248
249
250 /** Set the default group.
251 *
252 * @param[in] group The new default group
253 */
ARMCI_Group_set_default(ARMCI_Group * group)254 void ARMCI_Group_set_default(ARMCI_Group *group) {
255 ARMCI_GROUP_DEFAULT = *group;
256 }
257
258
259 /** Get the default group.
260 *
261 * @param[out] group_out Pointer to the default group.
262 */
ARMCI_Group_get_default(ARMCI_Group * group_out)263 void ARMCI_Group_get_default(ARMCI_Group *group_out) {
264 *group_out = ARMCI_GROUP_DEFAULT;
265 }
266
267
268 /** Fetch the world group.
269 *
270 * @param[out] group_out Output group.
271 */
ARMCI_Group_get_world(ARMCI_Group * group_out)272 void ARMCI_Group_get_world(ARMCI_Group *group_out) {
273 *group_out = ARMCI_GROUP_WORLD;
274 }
275
276
277 /** Translate a group process rank to the corresponding process rank in the
278 * ARMCI world group.
279 *
280 * @param[in] group Group to translate from.
281 * @param[in] group_rank Rank of the process in group.
282 */
ARMCI_Absolute_id(ARMCI_Group * group,int group_rank)283 int ARMCI_Absolute_id(ARMCI_Group *group, int group_rank) {
284 int world_rank;
285 MPI_Group world_group, sub_group;
286
287 ARMCII_Assert(group_rank >= 0 && group_rank < group->size);
288
289 /* Check if group is the world group */
290 if (group->comm == ARMCI_GROUP_WORLD.comm)
291 world_rank = group_rank;
292
293 /* Check for translation cache */
294 else if (group->grp_to_abs != NULL)
295 world_rank = group->grp_to_abs[group_rank];
296
297 else {
298 /* Translate the rank */
299 MPI_Comm_group(ARMCI_GROUP_WORLD.comm, &world_group);
300 MPI_Comm_group(group->comm, &sub_group);
301
302 MPI_Group_translate_ranks(sub_group, 1, &group_rank, world_group, &world_rank);
303
304 MPI_Group_free(&world_group);
305 MPI_Group_free(&sub_group);
306 }
307
308 /* Check if translation failed */
309 if (world_rank == MPI_UNDEFINED)
310 return -1;
311 else
312 return world_rank;
313 }
314
315
316 /** Split a parent group into multiple child groups. This is similar to
317 * MPI_Comm_split. Collective across the parent group.
318 *
319 * @param[in] parent The parent group.
320 * @param[in] color The id number of the new group. Processes are grouped
321 * together so allthat give the same color will be placed
322 * in the same new group.
323 * @param[in] key Relative ordering of processes in the new group.
324 * @param[out] new_group Pointer to a handle where group info will be stored.
325 */
ARMCIX_Group_split(ARMCI_Group * parent,int color,int key,ARMCI_Group * new_group)326 int ARMCIX_Group_split(ARMCI_Group *parent, int color, int key, ARMCI_Group *new_group) {
327 int err;
328
329 err = MPI_Comm_split(parent->comm, color, key, &new_group->comm);
330
331 if (err != MPI_SUCCESS)
332 return err;
333
334 ARMCII_Group_init_from_comm(new_group);
335
336 return 0;
337 }
338
339
340 /** Duplicate an ARMCI group. Collective across the parent group.
341 *
342 * @param[in] parent The parent group.
343 * @param[in] color The id number of the new group. Processes are grouped
344 * together so allthat give the same color will be placed
345 * in the same new group.
346 * @param[in] key Relative ordering of processes in the new group.
347 * @param[out] new_group Pointer to a handle where group info will be stored.
348 */
ARMCIX_Group_dup(ARMCI_Group * parent,ARMCI_Group * new_group)349 int ARMCIX_Group_dup(ARMCI_Group *parent, ARMCI_Group *new_group) {
350 int err;
351
352 err = MPI_Comm_dup(parent->comm, &new_group->comm);
353
354 if (err != MPI_SUCCESS)
355 return err;
356
357 ARMCII_Group_init_from_comm(new_group);
358
359 return 0;
360 }
361