1 /*
2  * Copyright (C) 2010. See COPYRIGHT in top-level directory.
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 
8 #include <armci.h>
9 #include <armcix.h>
10 #include <armci_internals.h>
11 #include <debug.h>
12 
13 
14 /** The ARMCI world group.  This is accessed from outside via
15   * ARMCI_Group_get_world.
16   */
17 ARMCI_Group ARMCI_GROUP_WORLD   = {0};
18 ARMCI_Group ARMCI_GROUP_DEFAULT = {0};
19 
20 
21 /** Initialize an ARMCI group's remaining fields using the communicator field.
22   */
ARMCII_Group_init_from_comm(ARMCI_Group * group)23 void ARMCII_Group_init_from_comm(ARMCI_Group *group) {
24   if (group->comm != MPI_COMM_NULL) {
25     MPI_Comm_size(group->comm, &group->size);
26     MPI_Comm_rank(group->comm, &group->rank);
27 
28   } else {
29     group->rank = -1;
30     group->size =  0;
31   }
32 
33   /* If noncollective groups are in use, create a separate communicator that
34     can be used for noncollective group creation with this group as the parent.
35     This ensures that calls to MPI_Intercomm_create can't clash with any user
36     communication. */
37 
38   if (ARMCII_GLOBAL_STATE.noncollective_groups && group->comm != MPI_COMM_NULL)
39     MPI_Comm_dup(group->comm, &group->noncoll_pgroup_comm);
40   else
41     group->noncoll_pgroup_comm = MPI_COMM_NULL;
42 
43   /* Check if translation caching is enabled */
44   if (ARMCII_GLOBAL_STATE.cache_rank_translation) {
45     if (group->comm != MPI_COMM_NULL) {
46       int      *ranks, i;
47       MPI_Group world_group, sub_group;
48 
49       group->abs_to_grp = malloc(sizeof(int)*ARMCI_GROUP_WORLD.size);
50       group->grp_to_abs = malloc(sizeof(int)*group->size);
51       ranks = malloc(sizeof(int)*ARMCI_GROUP_WORLD.size);
52 
53       ARMCII_Assert(group->abs_to_grp != NULL && group->grp_to_abs != NULL && ranks != NULL);
54 
55       for (i = 0; i < ARMCI_GROUP_WORLD.size; i++)
56         ranks[i] = i;
57 
58       MPI_Comm_group(ARMCI_GROUP_WORLD.comm, &world_group);
59       MPI_Comm_group(group->comm, &sub_group);
60 
61       MPI_Group_translate_ranks(sub_group, group->size, ranks, world_group, group->grp_to_abs);
62       MPI_Group_translate_ranks(world_group, ARMCI_GROUP_WORLD.size, ranks, sub_group, group->abs_to_grp);
63 
64       MPI_Group_free(&world_group);
65       MPI_Group_free(&sub_group);
66 
67       free(ranks);
68     }
69   }
70 
71   /* Translation caching is disabled */
72   else {
73     group->abs_to_grp = NULL;
74     group->grp_to_abs = NULL;
75   }
76 }
77 
78 
79 /** Create an ARMCI group that contains a subset of the nodes in the current
80   * default group.  Collective across the default group.
81   *
82   * @param[in]  grp_size         Number of entries in pid_list.
83   * @param[in]  pid_list         List of process ids that will be in the new group.
84   * @param[out] armci_grp_out    The new ARMCI group.
85   * @param[in]  armci_grp_parent The parent of the new ARMCI group.
86   */
ARMCI_Group_create(int grp_size,int * pid_list,ARMCI_Group * group_out)87 void ARMCI_Group_create(int grp_size, int *pid_list, ARMCI_Group *group_out) {
88   ARMCI_Group_create_child(grp_size, pid_list, group_out, &ARMCI_GROUP_DEFAULT);
89 }
90 
91 
92 /** Create an ARMCI group that contains a subset of the nodes in the parent
93   * group. Collective across output group.
94   *
95   * @param[in]  grp_size         Number of entries in pid_list.
96   * @param[in]  pid_list         List of process ids that will be in the new group.
97   * @param[out] armci_grp_out    The new ARMCI group, only valid on group members.
98   * @param[in]  armci_grp_parent The parent of the new ARMCI group.
99   */
ARMCI_Group_create_comm_collective(int grp_size,int * pid_list,ARMCI_Group * armci_grp_out,ARMCI_Group * armci_grp_parent)100 static inline void ARMCI_Group_create_comm_collective(int grp_size, int *pid_list, ARMCI_Group *armci_grp_out,
101     ARMCI_Group *armci_grp_parent) {
102 
103   MPI_Group mpi_grp_parent;
104   MPI_Group mpi_grp_child;
105 
106   MPI_Comm_group(armci_grp_parent->comm, &mpi_grp_parent);
107   MPI_Group_incl(mpi_grp_parent, grp_size, pid_list, &mpi_grp_child);
108 
109   MPI_Comm_create(armci_grp_parent->comm, mpi_grp_child, &armci_grp_out->comm);
110 
111   MPI_Group_free(&mpi_grp_parent);
112   MPI_Group_free(&mpi_grp_child);
113 }
114 
115 
116 /** Create an ARMCI group that contains a subset of the nodes in the parent
117   * group. Collective across output group.
118   *
119   * @param[in]  grp_size         Number of entries in pid_list.
120   * @param[in]  pid_list         Sorted list of process ids that will be in the new group.
121   * @param[out] armci_grp_out    The new ARMCI group, only valid on group members.
122   * @param[in]  armci_grp_parent The parent of the new ARMCI group.
123   */
ARMCI_Group_create_comm_noncollective(int grp_size,int * pid_list,ARMCI_Group * armci_grp_out,ARMCI_Group * armci_grp_parent)124 static inline void ARMCI_Group_create_comm_noncollective(int grp_size, int *pid_list, ARMCI_Group *armci_grp_out,
125     ARMCI_Group *armci_grp_parent) {
126 
127   const int INTERCOMM_TAG = 42;
128   int       i, grp_me, me, nproc, merge_size;
129   MPI_Comm  pgroup, inter_pgroup;
130 
131   me    = armci_grp_parent->rank;
132   nproc = armci_grp_parent->size;
133 
134   /* CHECK: If I'm not a member, return COMM_NULL */
135   grp_me = -1;
136   for (i = 0; i < grp_size; i++) {
137     if (pid_list[i] == me) {
138       grp_me = i;
139       break;
140     }
141   }
142 
143   if (grp_me < 0) {
144     armci_grp_out->comm = MPI_COMM_NULL;
145     return;
146   }
147 
148   /* CASE: Group size 1 */
149   else if (grp_size == 1 && pid_list[0] == me) {
150     MPI_Comm_dup(MPI_COMM_SELF, &armci_grp_out->comm);
151     return;
152   }
153 
154   pgroup = MPI_COMM_SELF;
155 
156   /* Recursively merge adjacent groups until only one group remains.  */
157   for (merge_size = 1; merge_size < grp_size; merge_size *= 2) {
158     int      gid        = grp_me / merge_size;
159     MPI_Comm pgroup_old = pgroup;
160 
161     if (gid % 2 == 0) {
162       /* Check if right partner doesn't exist */
163       if ((gid+1)*merge_size >= grp_size)
164         continue;
165 
166       MPI_Intercomm_create(pgroup, 0, armci_grp_parent->noncoll_pgroup_comm, pid_list[(gid+1)*merge_size], INTERCOMM_TAG, &inter_pgroup);
167       MPI_Intercomm_merge(inter_pgroup, 0 /* LOW */, &pgroup);
168     } else {
169       MPI_Intercomm_create(pgroup, 0, armci_grp_parent->noncoll_pgroup_comm, pid_list[(gid-1)*merge_size], INTERCOMM_TAG, &inter_pgroup);
170       MPI_Intercomm_merge(inter_pgroup, 1 /* HIGH */, &pgroup);
171     }
172 
173     MPI_Comm_free(&inter_pgroup);
174     if (pgroup_old != MPI_COMM_SELF) MPI_Comm_free(&pgroup_old);
175   }
176 
177   armci_grp_out->comm = pgroup;
178 }
179 
180 
181 /** Create an ARMCI group that contains a subset of the nodes in the parent
182   * group. Collective.
183   *
184   * @param[in]  grp_size         Number of entries in pid_list.
185   * @param[in]  pid_list         Sorted list of process ids that will be in the new group.
186   * @param[out] armci_grp_out    The new ARMCI group, only valid on group members.
187   * @param[in]  armci_grp_parent The parent of the new ARMCI group.
188   */
ARMCI_Group_create_child(int grp_size,int * pid_list,ARMCI_Group * armci_grp_out,ARMCI_Group * armci_grp_parent)189 void ARMCI_Group_create_child(int grp_size, int *pid_list, ARMCI_Group *armci_grp_out,
190     ARMCI_Group *armci_grp_parent) {
191 
192   if (ARMCII_GLOBAL_STATE.noncollective_groups)
193     ARMCI_Group_create_comm_noncollective(grp_size, pid_list, armci_grp_out, armci_grp_parent);
194   else
195     ARMCI_Group_create_comm_collective(grp_size, pid_list, armci_grp_out, armci_grp_parent);
196 
197   ARMCII_Group_init_from_comm(armci_grp_out);
198 }
199 
200 
201 /** Free an ARMCI group.  Collective across group.
202   *
203   * @param[in] group The group to be freed
204   */
ARMCI_Group_free(ARMCI_Group * group)205 void ARMCI_Group_free(ARMCI_Group *group) {
206   if (group->comm != MPI_COMM_NULL) {
207     MPI_Comm_free(&group->comm);
208 
209     if (ARMCII_GLOBAL_STATE.noncollective_groups)
210       MPI_Comm_free(&group->noncoll_pgroup_comm);
211   }
212 
213   /* If the group has translation caches, free them */
214   if (group->abs_to_grp != NULL)
215     free(group->abs_to_grp);
216   if (group->grp_to_abs != NULL)
217     free(group->grp_to_abs);
218 
219   group->rank = -1;
220   group->size = 0;
221 }
222 
223 
224 /** Query the calling process' rank in a given group.
225   *
226   * @param[in]  group Group to query on.
227   * @param[out] rank  Location to store the rank.
228   * @return           Zero on success, error code otherwise.
229   */
ARMCI_Group_rank(ARMCI_Group * group,int * rank)230 int  ARMCI_Group_rank(ARMCI_Group *group, int *rank) {
231   *rank = group->rank;
232 
233   if (*rank >= 0)
234     return 0;
235   else
236     return 1;
237 }
238 
239 
240 /** Query the size of the given group.
241   *
242   * @param[in]  group Group to query.
243   * @param[out] size  Variable to store the size in.
244   */
ARMCI_Group_size(ARMCI_Group * group,int * size)245 void ARMCI_Group_size(ARMCI_Group *group, int *size) {
246   *size = group->size;
247 }
248 
249 
250 /** Set the default group.
251   *
252   * @param[in] group The new default group
253   */
ARMCI_Group_set_default(ARMCI_Group * group)254 void ARMCI_Group_set_default(ARMCI_Group *group) {
255   ARMCI_GROUP_DEFAULT = *group;
256 }
257 
258 
259 /** Get the default group.
260   *
261   * @param[out] group_out Pointer to the default group.
262   */
ARMCI_Group_get_default(ARMCI_Group * group_out)263 void ARMCI_Group_get_default(ARMCI_Group *group_out) {
264   *group_out = ARMCI_GROUP_DEFAULT;
265 }
266 
267 
268 /** Fetch the world group.
269   *
270   * @param[out] group_out Output group.
271   */
ARMCI_Group_get_world(ARMCI_Group * group_out)272 void ARMCI_Group_get_world(ARMCI_Group *group_out) {
273   *group_out = ARMCI_GROUP_WORLD;
274 }
275 
276 
277 /** Translate a group process rank to the corresponding process rank in the
278   * ARMCI world group.
279   *
280   * @param[in] group      Group to translate from.
281   * @param[in] group_rank Rank of the process in group.
282   */
ARMCI_Absolute_id(ARMCI_Group * group,int group_rank)283 int ARMCI_Absolute_id(ARMCI_Group *group, int group_rank) {
284   int       world_rank;
285   MPI_Group world_group, sub_group;
286 
287   ARMCII_Assert(group_rank >= 0 && group_rank < group->size);
288 
289   /* Check if group is the world group */
290   if (group->comm == ARMCI_GROUP_WORLD.comm)
291     world_rank = group_rank;
292 
293   /* Check for translation cache */
294   else if (group->grp_to_abs != NULL)
295     world_rank = group->grp_to_abs[group_rank];
296 
297   else {
298     /* Translate the rank */
299     MPI_Comm_group(ARMCI_GROUP_WORLD.comm, &world_group);
300     MPI_Comm_group(group->comm, &sub_group);
301 
302     MPI_Group_translate_ranks(sub_group, 1, &group_rank, world_group, &world_rank);
303 
304     MPI_Group_free(&world_group);
305     MPI_Group_free(&sub_group);
306   }
307 
308   /* Check if translation failed */
309   if (world_rank == MPI_UNDEFINED)
310     return -1;
311   else
312     return world_rank;
313 }
314 
315 
316 /** Split a parent group into multiple child groups.  This is similar to
317   * MPI_Comm_split.  Collective across the parent group.
318   *
319   * @param[in]  parent The parent group.
320   * @param[in]  color  The id number of the new group.  Processes are grouped
321   *                    together so allthat give the same color will be placed
322   *                    in the same new group.
323   * @param[in]  key    Relative ordering of processes in the new group.
324   * @param[out] new_group Pointer to a handle where group info will be stored.
325   */
ARMCIX_Group_split(ARMCI_Group * parent,int color,int key,ARMCI_Group * new_group)326 int ARMCIX_Group_split(ARMCI_Group *parent, int color, int key, ARMCI_Group *new_group) {
327   int err;
328 
329   err = MPI_Comm_split(parent->comm, color, key, &new_group->comm);
330 
331   if (err != MPI_SUCCESS)
332     return err;
333 
334   ARMCII_Group_init_from_comm(new_group);
335 
336   return 0;
337 }
338 
339 
340 /** Duplicate an ARMCI group.  Collective across the parent group.
341   *
342   * @param[in]  parent The parent group.
343   * @param[in]  color  The id number of the new group.  Processes are grouped
344   *                    together so allthat give the same color will be placed
345   *                    in the same new group.
346   * @param[in]  key    Relative ordering of processes in the new group.
347   * @param[out] new_group Pointer to a handle where group info will be stored.
348   */
ARMCIX_Group_dup(ARMCI_Group * parent,ARMCI_Group * new_group)349 int ARMCIX_Group_dup(ARMCI_Group *parent, ARMCI_Group *new_group) {
350   int err;
351 
352   err = MPI_Comm_dup(parent->comm, &new_group->comm);
353 
354   if (err != MPI_SUCCESS)
355     return err;
356 
357   ARMCII_Group_init_from_comm(new_group);
358 
359   return 0;
360 }
361