1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2017 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2007      Lawrence Livermore National Security, LLC.  All
14  *                         rights reserved.
15  * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
16  * Copyright (c) 2008-2014 Cisco Systems, Inc.  All rights reserved.
17  * Copyright (c) 2012      Oak Ridge National Laboratory.  All rights reserved.
18  * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
19  *                         reserved.
20  * Copyright (c) 2014      Research Organization for Information Science
21  *                         and Technology (RIST). All rights reserved.
22  * Copyright (c) 2016-2017 IBM Corporation.  All rights reserved.
23  * Copyright (c) 2017      FUJITSU LIMITED.  All rights reserved.
24  * Copyright (c) 2020      BULL S.A.S. All rights reserved.
25  * $COPYRIGHT$
26  *
27  * Additional copyrights may follow
28  *
29  * $HEADER$
30  */
31 
32 #include "ompi_config.h"
33 
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 
38 #include "mpi.h"
39 #include "ompi/communicator/communicator.h"
40 #include "opal/util/output.h"
41 #include "opal/util/argv.h"
42 #include "opal/util/show_help.h"
43 #include "opal/class/opal_list.h"
44 #include "opal/class/opal_object.h"
45 #include "ompi/mca/mca.h"
46 #include "opal/mca/base/base.h"
47 #include "ompi/mca/coll/coll.h"
48 #include "ompi/mca/coll/base/base.h"
49 #include "ompi/mca/coll/base/coll_base_util.h"
50 
51 /*
52  * Stuff for the OBJ interface
53  */
54 OBJ_CLASS_INSTANCE(mca_coll_base_avail_coll_t, opal_list_item_t, NULL, NULL);
55 
56 /*
57  * Local functions
58  */
59 static opal_list_t *check_components(opal_list_t * components,
60                                      ompi_communicator_t * comm);
61 static int check_one_component(ompi_communicator_t * comm,
62                                const mca_base_component_t * component,
63                                mca_coll_base_module_2_3_0_t ** module);
64 
65 static int query(const mca_base_component_t * component,
66                  ompi_communicator_t * comm, int *priority,
67                  mca_coll_base_module_2_3_0_t ** module);
68 
69 static int query_2_0_0(const mca_coll_base_component_2_0_0_t *
70                        coll_component, ompi_communicator_t * comm,
71                        int *priority,
72                        mca_coll_base_module_2_3_0_t ** module);
73 
74 #define COPY(module, comm, func)                                        \
75     do {                                                                \
76         if (NULL != module->coll_ ## func) {                            \
77             if (NULL != comm->c_coll->coll_ ## func ## _module) {       \
78                 OBJ_RELEASE(comm->c_coll->coll_ ## func ## _module);    \
79             }                                                           \
80             comm->c_coll->coll_ ## func = module->coll_ ## func;        \
81             comm->c_coll->coll_ ## func ## _module = module;            \
82             OBJ_RETAIN(module);                                         \
83         }                                                               \
84     } while (0)
85 
86 #define CHECK_NULL(what, comm, func)                                    \
87   ( (what) = # func , NULL == (comm)->c_coll->coll_ ## func)
88 
89 /*
90  * This function is called at the initialization time of every
91  * communicator.  It is used to select which coll component will be
92  * active for a given communicator.
93  *
94  * This selection logic is not for the weak.
95  */
mca_coll_base_comm_select(ompi_communicator_t * comm)96 int mca_coll_base_comm_select(ompi_communicator_t * comm)
97 {
98     opal_list_t *selectable;
99     opal_list_item_t *item;
100     char* which_func = "unknown";
101     int ret;
102 
103     /* Announce */
104     opal_output_verbose(9, ompi_coll_base_framework.framework_output,
105                         "coll:base:comm_select: new communicator: %s (cid %d)",
106                         comm->c_name, comm->c_contextid);
107 
108     /* Initialize all the relevant pointers, since they're used as
109      * sentinel values */
110     comm->c_coll = (mca_coll_base_comm_coll_t*)calloc(1, sizeof(mca_coll_base_comm_coll_t));
111 
112     opal_output_verbose(10, ompi_coll_base_framework.framework_output,
113                         "coll:base:comm_select: Checking all available modules");
114     selectable = check_components(&ompi_coll_base_framework.framework_components, comm);
115 
116     /* Upon return from the above, the modules list will contain the
117        list of modules that returned (priority >= 0).  If we have no
118        collective modules available, then print error and return. */
119     if (NULL == selectable) {
120         /* There's no modules available */
121         opal_show_help("help-mca-coll-base.txt",
122                        "comm-select:none-available", true);
123         return OMPI_ERROR;
124     }
125 
126     /* FIX ME - Do some kind of collective operation to find a module
127        that everyone has available */
128 
129     /* List to store every valid module */
130     comm->c_coll->module_list =  OBJ_NEW(opal_list_t);
131 
132     /* do the selection loop */
133     for (item = opal_list_remove_first(selectable);
134          NULL != item; item = opal_list_remove_first(selectable)) {
135 
136         mca_coll_base_avail_coll_t *avail = (mca_coll_base_avail_coll_t *) item;
137 
138         /* initialize the module */
139         ret = avail->ac_module->coll_module_enable(avail->ac_module, comm);
140 
141         opal_output_verbose(9, ompi_coll_base_framework.framework_output,
142                             "coll:base:comm_select: selecting  %10s, priority %3d, %s",
143                             avail->ac_component_name, avail->ac_priority,
144                             (OMPI_SUCCESS == ret ? "Enabled": "Disabled") );
145 
146         if (OMPI_SUCCESS == ret) {
147             /* Save every component that is initialized,
148              * queried and enabled successfully */
149             opal_list_append(comm->c_coll->module_list, &avail->super);
150 
151             /* copy over any of the pointers */
152             COPY(avail->ac_module, comm, allgather);
153             COPY(avail->ac_module, comm, allgatherv);
154             COPY(avail->ac_module, comm, allreduce);
155             COPY(avail->ac_module, comm, alltoall);
156             COPY(avail->ac_module, comm, alltoallv);
157             COPY(avail->ac_module, comm, alltoallw);
158             COPY(avail->ac_module, comm, barrier);
159             COPY(avail->ac_module, comm, bcast);
160             COPY(avail->ac_module, comm, exscan);
161             COPY(avail->ac_module, comm, gather);
162             COPY(avail->ac_module, comm, gatherv);
163             COPY(avail->ac_module, comm, reduce);
164             COPY(avail->ac_module, comm, reduce_scatter_block);
165             COPY(avail->ac_module, comm, reduce_scatter);
166             COPY(avail->ac_module, comm, scan);
167             COPY(avail->ac_module, comm, scatter);
168             COPY(avail->ac_module, comm, scatterv);
169 
170             COPY(avail->ac_module, comm, iallgather);
171             COPY(avail->ac_module, comm, iallgatherv);
172             COPY(avail->ac_module, comm, iallreduce);
173             COPY(avail->ac_module, comm, ialltoall);
174             COPY(avail->ac_module, comm, ialltoallv);
175             COPY(avail->ac_module, comm, ialltoallw);
176             COPY(avail->ac_module, comm, ibarrier);
177             COPY(avail->ac_module, comm, ibcast);
178             COPY(avail->ac_module, comm, iexscan);
179             COPY(avail->ac_module, comm, igather);
180             COPY(avail->ac_module, comm, igatherv);
181             COPY(avail->ac_module, comm, ireduce);
182             COPY(avail->ac_module, comm, ireduce_scatter_block);
183             COPY(avail->ac_module, comm, ireduce_scatter);
184             COPY(avail->ac_module, comm, iscan);
185             COPY(avail->ac_module, comm, iscatter);
186             COPY(avail->ac_module, comm, iscatterv);
187 
188             COPY(avail->ac_module, comm, allgather_init);
189             COPY(avail->ac_module, comm, allgatherv_init);
190             COPY(avail->ac_module, comm, allreduce_init);
191             COPY(avail->ac_module, comm, alltoall_init);
192             COPY(avail->ac_module, comm, alltoallv_init);
193             COPY(avail->ac_module, comm, alltoallw_init);
194             COPY(avail->ac_module, comm, barrier_init);
195             COPY(avail->ac_module, comm, bcast_init);
196             COPY(avail->ac_module, comm, exscan_init);
197             COPY(avail->ac_module, comm, gather_init);
198             COPY(avail->ac_module, comm, gatherv_init);
199             COPY(avail->ac_module, comm, reduce_init);
200             COPY(avail->ac_module, comm, reduce_scatter_block_init);
201             COPY(avail->ac_module, comm, reduce_scatter_init);
202             COPY(avail->ac_module, comm, scan_init);
203             COPY(avail->ac_module, comm, scatter_init);
204             COPY(avail->ac_module, comm, scatterv_init);
205 
206             /* We can not reliably check if this comm has a topology
207              * at this time. The flags are set *after* coll_select */
208             COPY(avail->ac_module, comm, neighbor_allgather);
209             COPY(avail->ac_module, comm, neighbor_allgatherv);
210             COPY(avail->ac_module, comm, neighbor_alltoall);
211             COPY(avail->ac_module, comm, neighbor_alltoallv);
212             COPY(avail->ac_module, comm, neighbor_alltoallw);
213 
214             COPY(avail->ac_module, comm, ineighbor_allgather);
215             COPY(avail->ac_module, comm, ineighbor_allgatherv);
216             COPY(avail->ac_module, comm, ineighbor_alltoall);
217             COPY(avail->ac_module, comm, ineighbor_alltoallv);
218             COPY(avail->ac_module, comm, ineighbor_alltoallw);
219 
220             COPY(avail->ac_module, comm, neighbor_allgather_init);
221             COPY(avail->ac_module, comm, neighbor_allgatherv_init);
222             COPY(avail->ac_module, comm, neighbor_alltoall_init);
223             COPY(avail->ac_module, comm, neighbor_alltoallv_init);
224             COPY(avail->ac_module, comm, neighbor_alltoallw_init);
225 
226             COPY(avail->ac_module, comm, reduce_local);
227         } else {
228             /* release the original module reference and the list item */
229             OBJ_RELEASE(avail->ac_module);
230             OBJ_RELEASE(avail);
231         }
232     }
233 
234     /* Done with the list from the check_components() call so release it. */
235     OBJ_RELEASE(selectable);
236 
237     /* check to make sure no NULLs */
238     if (CHECK_NULL(which_func, comm, allgather) ||
239         CHECK_NULL(which_func, comm, allgatherv) ||
240         CHECK_NULL(which_func, comm, allreduce) ||
241         CHECK_NULL(which_func, comm, alltoall) ||
242         CHECK_NULL(which_func, comm, alltoallv) ||
243         CHECK_NULL(which_func, comm, alltoallw) ||
244         CHECK_NULL(which_func, comm, barrier) ||
245         CHECK_NULL(which_func, comm, bcast) ||
246         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, exscan)) ||
247         CHECK_NULL(which_func, comm, gather) ||
248         CHECK_NULL(which_func, comm, gatherv) ||
249         CHECK_NULL(which_func, comm, reduce) ||
250         CHECK_NULL(which_func, comm, reduce_scatter_block) ||
251         CHECK_NULL(which_func, comm, reduce_scatter) ||
252         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, scan)) ||
253         CHECK_NULL(which_func, comm, scatter) ||
254         CHECK_NULL(which_func, comm, scatterv) ||
255         CHECK_NULL(which_func, comm, iallgather) ||
256         CHECK_NULL(which_func, comm, iallgatherv) ||
257         CHECK_NULL(which_func, comm, iallreduce) ||
258         CHECK_NULL(which_func, comm, ialltoall) ||
259         CHECK_NULL(which_func, comm, ialltoallv) ||
260         CHECK_NULL(which_func, comm, ialltoallw) ||
261         CHECK_NULL(which_func, comm, ibarrier) ||
262         CHECK_NULL(which_func, comm, ibcast) ||
263         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, iexscan)) ||
264         CHECK_NULL(which_func, comm, igather) ||
265         CHECK_NULL(which_func, comm, igatherv) ||
266         CHECK_NULL(which_func, comm, ireduce) ||
267         CHECK_NULL(which_func, comm, ireduce_scatter_block) ||
268         CHECK_NULL(which_func, comm, ireduce_scatter) ||
269         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, iscan)) ||
270         CHECK_NULL(which_func, comm, iscatter) ||
271         CHECK_NULL(which_func, comm, iscatterv) ||
272         CHECK_NULL(which_func, comm, allgather_init) ||
273         CHECK_NULL(which_func, comm, allgatherv_init) ||
274         CHECK_NULL(which_func, comm, allreduce_init) ||
275         CHECK_NULL(which_func, comm, alltoall_init) ||
276         CHECK_NULL(which_func, comm, alltoallv_init) ||
277         CHECK_NULL(which_func, comm, alltoallw_init) ||
278         CHECK_NULL(which_func, comm, barrier_init) ||
279         CHECK_NULL(which_func, comm, bcast_init) ||
280         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, exscan_init)) ||
281         CHECK_NULL(which_func, comm, gather_init) ||
282         CHECK_NULL(which_func, comm, gatherv_init) ||
283         CHECK_NULL(which_func, comm, reduce_init) ||
284         CHECK_NULL(which_func, comm, reduce_scatter_block_init) ||
285         CHECK_NULL(which_func, comm, reduce_scatter_init) ||
286         ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, scan_init)) ||
287         CHECK_NULL(which_func, comm, scatter_init) ||
288         CHECK_NULL(which_func, comm, scatterv_init) ||
289         CHECK_NULL(which_func, comm, reduce_local) ) {
290         /* TODO -- Once the topology flags are set before coll_select then
291          * check if neighborhood collectives have been set. */
292 
293         opal_show_help("help-mca-coll-base.txt",
294                        "comm-select:no-function-available", true, which_func);
295 
296          mca_coll_base_comm_unselect(comm);
297         return OMPI_ERR_NOT_FOUND;
298     }
299     return OMPI_SUCCESS;
300 }
301 
avail_coll_compare(opal_list_item_t ** a,opal_list_item_t ** b)302 static int avail_coll_compare (opal_list_item_t **a,
303                                opal_list_item_t **b) {
304     mca_coll_base_avail_coll_t *acoll = (mca_coll_base_avail_coll_t *) *a;
305     mca_coll_base_avail_coll_t *bcoll = (mca_coll_base_avail_coll_t *) *b;
306 
307     if (acoll->ac_priority > bcoll->ac_priority) {
308         return 1;
309     } else if (acoll->ac_priority < bcoll->ac_priority) {
310         return -1;
311     }
312 
313     return 0;
314 }
315 
316 static inline int
component_in_argv(char ** argv,const char * component_name)317 component_in_argv(char **argv, const char* component_name)
318 {
319     if( NULL != argv ) {
320         while( NULL != *argv ) {
321             if( 0 == strcmp(component_name, *argv) ) {
322                 return 1;
323             }
324             argv++;  /* move to the next argument */
325         }
326     }
327     return 0;
328 }
329 
330 /*
331  * For each module in the list, check and see if it wants to run, and
332  * do the resulting priority comparison.  Make a list of modules to be
333  * only those who returned that they want to run, and put them in
334  * priority order.
335  */
check_components(opal_list_t * components,ompi_communicator_t * comm)336 static opal_list_t *check_components(opal_list_t * components,
337                                      ompi_communicator_t * comm)
338 {
339     int priority, flag;
340     int count_include = 0;
341     const mca_base_component_t *component;
342     mca_base_component_list_item_t *cli;
343     mca_coll_base_module_2_3_0_t *module;
344     opal_list_t *selectable;
345     mca_coll_base_avail_coll_t *avail;
346     char info_val[OPAL_MAX_INFO_VAL+1];
347     char **coll_argv = NULL, **coll_exclude = NULL, **coll_include = NULL;
348 
349     /* Check if this communicator comes with restrictions on the collective modules
350      * it wants to use. The restrictions are consistent with the MCA parameter
351      * to limit the collective components loaded, but it applies for each
352      * communicator and is provided as an info key during the communicator
353      * creation. Unlike the MCA param, this info key is used not to select
354      * components but either to prevent components from being used or to
355      * force a change in the component priority.
356      */
357     if( NULL != comm->super.s_info) {
358         opal_info_get(comm->super.s_info, "ompi_comm_coll_preference",
359                       sizeof(info_val), info_val, &flag);
360         if( !flag ) {
361             goto proceed_to_select;
362         }
363         coll_argv = opal_argv_split(info_val, ',');
364         if(NULL == coll_argv) {
365             goto proceed_to_select;
366         }
367         int idx2;
368         count_include = opal_argv_count(coll_argv);
369         /* Allocate the coll_include argv */
370         coll_include = (char**)malloc((count_include + 1) * sizeof(char*));
371         coll_include[count_include] = NULL; /* NULL terminated array */
372         /* Dispatch the include/exclude in the corresponding arrays */
373         for( int idx = 0; NULL != coll_argv[idx]; idx++ ) {
374             if( '^' == coll_argv[idx][0] ) {
375                 coll_include[idx] = NULL;  /* NULL terminated array */
376 
377                 /* Allocate the coll_exclude argv */
378                 coll_exclude = (char**)malloc((count_include - idx + 1) * sizeof(char*));
379                 /* save the exclude components */
380                 for( idx2 = idx; NULL != coll_argv[idx2]; idx2++ ) {
381                     coll_exclude[idx2 - idx] = coll_argv[idx2];
382                 }
383                 coll_exclude[idx2 - idx] = NULL;  /* NULL-terminated array */
384                 coll_exclude[0] = coll_exclude[0] + 1;  /* get rid of the ^ */
385                 count_include = idx;
386                 break;
387             }
388             coll_include[idx] = coll_argv[idx];
389         }
390     }
391  proceed_to_select:
392     /* Make a list of the components that query successfully */
393     selectable = OBJ_NEW(opal_list_t);
394 
395     /* Scan through the list of components */
396     OPAL_LIST_FOREACH(cli, &ompi_coll_base_framework.framework_components, mca_base_component_list_item_t) {
397         component = cli->cli_component;
398 
399         /* dont bother is we have this component in the exclusion list */
400         if( component_in_argv(coll_exclude, component->mca_component_name) ) {
401             opal_output_verbose(10, ompi_coll_base_framework.framework_output,
402                                 "coll:base:comm_select: component disqualified: %s (due to communicator info key)",
403                                 component->mca_component_name );
404             continue;
405         }
406         priority = check_one_component(comm, component, &module);
407         if (priority >= 0) {
408             /* We have a component that indicated that it wants to run
409                by giving us a module */
410             avail = OBJ_NEW(mca_coll_base_avail_coll_t);
411             avail->ac_priority = priority;
412             avail->ac_module = module;
413             // Point to the string so we don't have to free later
414             avail->ac_component_name = component->mca_component_name;
415 
416             opal_list_append(selectable, &avail->super);
417         }
418         else {
419             opal_output_verbose(10, ompi_coll_base_framework.framework_output,
420                                 "coll:base:comm_select: component disqualified: %s (priority %d < 0)",
421                                 component->mca_component_name, priority );
422 
423             // If the disqualified collective returned a module make sure we
424             // release it here, since it will become a leak otherwise.
425             if( NULL != module ) {
426                 OBJ_RELEASE(module);
427                 module = NULL;
428             }
429         }
430     }
431 
432     /* If we didn't find any available components, return an error */
433     if (0 == opal_list_get_size(selectable)) {
434         OBJ_RELEASE(selectable);
435         if( NULL != coll_exclude ) {
436             free(coll_exclude);
437         }
438         if( NULL != coll_include ) {
439             free(coll_include);
440         }
441         return NULL;
442     }
443 
444     /* Put this list in priority order */
445     opal_list_sort(selectable, avail_coll_compare);
446 
447     /* For all valid component reorder them not on their provided priorities but on
448      * the order requested in the info key. As at this point the coll_include is
449      * already ordered backward we can simply append the components.
450      * Note that the last element in selectable will have the highest priorty.
451      */
452     for (int idx = count_include-1; idx >= 0; --idx) {
453         mca_coll_base_avail_coll_t *item;
454         OPAL_LIST_FOREACH(item, selectable, mca_coll_base_avail_coll_t) {
455             if (0 == strcmp(item->ac_component_name, coll_include[idx])) {
456                 opal_list_remove_item(selectable, &item->super);
457                 opal_list_append(selectable, &item->super);
458                 break;
459             }
460         }
461     }
462 
463     opal_argv_free(coll_argv);
464     if( NULL != coll_exclude ) {
465         free(coll_exclude);
466     }
467     if( NULL != coll_include ) {
468         free(coll_include);
469     }
470 
471     /* All done */
472     return selectable;
473 }
474 
475 
476 /*
477  * Check a single component
478  */
check_one_component(ompi_communicator_t * comm,const mca_base_component_t * component,mca_coll_base_module_2_3_0_t ** module)479 static int check_one_component(ompi_communicator_t * comm,
480                                const mca_base_component_t * component,
481                                mca_coll_base_module_2_3_0_t ** module)
482 {
483     int err;
484     int priority = -1;
485 
486     err = query(component, comm, &priority, module);
487 
488     if (OMPI_SUCCESS == err) {
489         priority = (priority < 100) ? priority : 100;
490         opal_output_verbose(10, ompi_coll_base_framework.framework_output,
491                             "coll:base:comm_select: component available: %s, priority: %d",
492                             component->mca_component_name, priority);
493 
494     } else {
495         priority = -1;
496         opal_output_verbose(10, ompi_coll_base_framework.framework_output,
497                             "coll:base:comm_select: component not available: %s",
498                             component->mca_component_name);
499     }
500 
501     return priority;
502 }
503 
504 /**************************************************************************
505  * Query functions
506  **************************************************************************/
507 
508 /*
509  * Take any version of a coll module, query it, and return the right
510  * module struct
511  */
query(const mca_base_component_t * component,ompi_communicator_t * comm,int * priority,mca_coll_base_module_2_3_0_t ** module)512 static int query(const mca_base_component_t * component,
513                  ompi_communicator_t * comm,
514                  int *priority, mca_coll_base_module_2_3_0_t ** module)
515 {
516     *module = NULL;
517     if (2 == component->mca_type_major_version &&
518         0 == component->mca_type_minor_version &&
519         0 == component->mca_type_release_version) {
520         const mca_coll_base_component_2_0_0_t *coll100 =
521             (mca_coll_base_component_2_0_0_t *) component;
522 
523         return query_2_0_0(coll100, comm, priority, module);
524     }
525 
526     /* Unknown coll API version -- return error */
527 
528     return OMPI_ERROR;
529 }
530 
531 
query_2_0_0(const mca_coll_base_component_2_0_0_t * component,ompi_communicator_t * comm,int * priority,mca_coll_base_module_2_3_0_t ** module)532 static int query_2_0_0(const mca_coll_base_component_2_0_0_t * component,
533                        ompi_communicator_t * comm, int *priority,
534                        mca_coll_base_module_2_3_0_t ** module)
535 {
536     mca_coll_base_module_2_3_0_t *ret;
537 
538     /* There's currently no need for conversion */
539 
540     ret = component->collm_comm_query(comm, priority);
541     if (NULL != ret) {
542         *module = ret;
543         return OMPI_SUCCESS;
544     }
545 
546     return OMPI_ERROR;
547 }
548