1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2017 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2007 Lawrence Livermore National Security, LLC. All
14 * rights reserved.
15 * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
16 * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
17 * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved.
18 * Copyright (c) 2013 Los Alamos National Security, LLC. All rights
19 * reserved.
20 * Copyright (c) 2014 Research Organization for Information Science
21 * and Technology (RIST). All rights reserved.
22 * Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
23 * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved.
24 * Copyright (c) 2020 BULL S.A.S. All rights reserved.
25 * $COPYRIGHT$
26 *
27 * Additional copyrights may follow
28 *
29 * $HEADER$
30 */
31
32 #include "ompi_config.h"
33
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "mpi.h"
39 #include "ompi/communicator/communicator.h"
40 #include "opal/util/output.h"
41 #include "opal/util/argv.h"
42 #include "opal/util/show_help.h"
43 #include "opal/class/opal_list.h"
44 #include "opal/class/opal_object.h"
45 #include "ompi/mca/mca.h"
46 #include "opal/mca/base/base.h"
47 #include "ompi/mca/coll/coll.h"
48 #include "ompi/mca/coll/base/base.h"
49 #include "ompi/mca/coll/base/coll_base_util.h"
50
51 /*
52 * Stuff for the OBJ interface
53 */
54 OBJ_CLASS_INSTANCE(mca_coll_base_avail_coll_t, opal_list_item_t, NULL, NULL);
55
56 /*
57 * Local functions
58 */
59 static opal_list_t *check_components(opal_list_t * components,
60 ompi_communicator_t * comm);
61 static int check_one_component(ompi_communicator_t * comm,
62 const mca_base_component_t * component,
63 mca_coll_base_module_2_3_0_t ** module);
64
65 static int query(const mca_base_component_t * component,
66 ompi_communicator_t * comm, int *priority,
67 mca_coll_base_module_2_3_0_t ** module);
68
69 static int query_2_0_0(const mca_coll_base_component_2_0_0_t *
70 coll_component, ompi_communicator_t * comm,
71 int *priority,
72 mca_coll_base_module_2_3_0_t ** module);
73
74 #define COPY(module, comm, func) \
75 do { \
76 if (NULL != module->coll_ ## func) { \
77 if (NULL != comm->c_coll->coll_ ## func ## _module) { \
78 OBJ_RELEASE(comm->c_coll->coll_ ## func ## _module); \
79 } \
80 comm->c_coll->coll_ ## func = module->coll_ ## func; \
81 comm->c_coll->coll_ ## func ## _module = module; \
82 OBJ_RETAIN(module); \
83 } \
84 } while (0)
85
86 #define CHECK_NULL(what, comm, func) \
87 ( (what) = # func , NULL == (comm)->c_coll->coll_ ## func)
88
89 /*
90 * This function is called at the initialization time of every
91 * communicator. It is used to select which coll component will be
92 * active for a given communicator.
93 *
94 * This selection logic is not for the weak.
95 */
mca_coll_base_comm_select(ompi_communicator_t * comm)96 int mca_coll_base_comm_select(ompi_communicator_t * comm)
97 {
98 opal_list_t *selectable;
99 opal_list_item_t *item;
100 char* which_func = "unknown";
101 int ret;
102
103 /* Announce */
104 opal_output_verbose(9, ompi_coll_base_framework.framework_output,
105 "coll:base:comm_select: new communicator: %s (cid %d)",
106 comm->c_name, comm->c_contextid);
107
108 /* Initialize all the relevant pointers, since they're used as
109 * sentinel values */
110 comm->c_coll = (mca_coll_base_comm_coll_t*)calloc(1, sizeof(mca_coll_base_comm_coll_t));
111
112 opal_output_verbose(10, ompi_coll_base_framework.framework_output,
113 "coll:base:comm_select: Checking all available modules");
114 selectable = check_components(&ompi_coll_base_framework.framework_components, comm);
115
116 /* Upon return from the above, the modules list will contain the
117 list of modules that returned (priority >= 0). If we have no
118 collective modules available, then print error and return. */
119 if (NULL == selectable) {
120 /* There's no modules available */
121 opal_show_help("help-mca-coll-base.txt",
122 "comm-select:none-available", true);
123 return OMPI_ERROR;
124 }
125
126 /* FIX ME - Do some kind of collective operation to find a module
127 that everyone has available */
128
129 /* List to store every valid module */
130 comm->c_coll->module_list = OBJ_NEW(opal_list_t);
131
132 /* do the selection loop */
133 for (item = opal_list_remove_first(selectable);
134 NULL != item; item = opal_list_remove_first(selectable)) {
135
136 mca_coll_base_avail_coll_t *avail = (mca_coll_base_avail_coll_t *) item;
137
138 /* initialize the module */
139 ret = avail->ac_module->coll_module_enable(avail->ac_module, comm);
140
141 opal_output_verbose(9, ompi_coll_base_framework.framework_output,
142 "coll:base:comm_select: selecting %10s, priority %3d, %s",
143 avail->ac_component_name, avail->ac_priority,
144 (OMPI_SUCCESS == ret ? "Enabled": "Disabled") );
145
146 if (OMPI_SUCCESS == ret) {
147 /* Save every component that is initialized,
148 * queried and enabled successfully */
149 opal_list_append(comm->c_coll->module_list, &avail->super);
150
151 /* copy over any of the pointers */
152 COPY(avail->ac_module, comm, allgather);
153 COPY(avail->ac_module, comm, allgatherv);
154 COPY(avail->ac_module, comm, allreduce);
155 COPY(avail->ac_module, comm, alltoall);
156 COPY(avail->ac_module, comm, alltoallv);
157 COPY(avail->ac_module, comm, alltoallw);
158 COPY(avail->ac_module, comm, barrier);
159 COPY(avail->ac_module, comm, bcast);
160 COPY(avail->ac_module, comm, exscan);
161 COPY(avail->ac_module, comm, gather);
162 COPY(avail->ac_module, comm, gatherv);
163 COPY(avail->ac_module, comm, reduce);
164 COPY(avail->ac_module, comm, reduce_scatter_block);
165 COPY(avail->ac_module, comm, reduce_scatter);
166 COPY(avail->ac_module, comm, scan);
167 COPY(avail->ac_module, comm, scatter);
168 COPY(avail->ac_module, comm, scatterv);
169
170 COPY(avail->ac_module, comm, iallgather);
171 COPY(avail->ac_module, comm, iallgatherv);
172 COPY(avail->ac_module, comm, iallreduce);
173 COPY(avail->ac_module, comm, ialltoall);
174 COPY(avail->ac_module, comm, ialltoallv);
175 COPY(avail->ac_module, comm, ialltoallw);
176 COPY(avail->ac_module, comm, ibarrier);
177 COPY(avail->ac_module, comm, ibcast);
178 COPY(avail->ac_module, comm, iexscan);
179 COPY(avail->ac_module, comm, igather);
180 COPY(avail->ac_module, comm, igatherv);
181 COPY(avail->ac_module, comm, ireduce);
182 COPY(avail->ac_module, comm, ireduce_scatter_block);
183 COPY(avail->ac_module, comm, ireduce_scatter);
184 COPY(avail->ac_module, comm, iscan);
185 COPY(avail->ac_module, comm, iscatter);
186 COPY(avail->ac_module, comm, iscatterv);
187
188 COPY(avail->ac_module, comm, allgather_init);
189 COPY(avail->ac_module, comm, allgatherv_init);
190 COPY(avail->ac_module, comm, allreduce_init);
191 COPY(avail->ac_module, comm, alltoall_init);
192 COPY(avail->ac_module, comm, alltoallv_init);
193 COPY(avail->ac_module, comm, alltoallw_init);
194 COPY(avail->ac_module, comm, barrier_init);
195 COPY(avail->ac_module, comm, bcast_init);
196 COPY(avail->ac_module, comm, exscan_init);
197 COPY(avail->ac_module, comm, gather_init);
198 COPY(avail->ac_module, comm, gatherv_init);
199 COPY(avail->ac_module, comm, reduce_init);
200 COPY(avail->ac_module, comm, reduce_scatter_block_init);
201 COPY(avail->ac_module, comm, reduce_scatter_init);
202 COPY(avail->ac_module, comm, scan_init);
203 COPY(avail->ac_module, comm, scatter_init);
204 COPY(avail->ac_module, comm, scatterv_init);
205
206 /* We can not reliably check if this comm has a topology
207 * at this time. The flags are set *after* coll_select */
208 COPY(avail->ac_module, comm, neighbor_allgather);
209 COPY(avail->ac_module, comm, neighbor_allgatherv);
210 COPY(avail->ac_module, comm, neighbor_alltoall);
211 COPY(avail->ac_module, comm, neighbor_alltoallv);
212 COPY(avail->ac_module, comm, neighbor_alltoallw);
213
214 COPY(avail->ac_module, comm, ineighbor_allgather);
215 COPY(avail->ac_module, comm, ineighbor_allgatherv);
216 COPY(avail->ac_module, comm, ineighbor_alltoall);
217 COPY(avail->ac_module, comm, ineighbor_alltoallv);
218 COPY(avail->ac_module, comm, ineighbor_alltoallw);
219
220 COPY(avail->ac_module, comm, neighbor_allgather_init);
221 COPY(avail->ac_module, comm, neighbor_allgatherv_init);
222 COPY(avail->ac_module, comm, neighbor_alltoall_init);
223 COPY(avail->ac_module, comm, neighbor_alltoallv_init);
224 COPY(avail->ac_module, comm, neighbor_alltoallw_init);
225
226 COPY(avail->ac_module, comm, reduce_local);
227 } else {
228 /* release the original module reference and the list item */
229 OBJ_RELEASE(avail->ac_module);
230 OBJ_RELEASE(avail);
231 }
232 }
233
234 /* Done with the list from the check_components() call so release it. */
235 OBJ_RELEASE(selectable);
236
237 /* check to make sure no NULLs */
238 if (CHECK_NULL(which_func, comm, allgather) ||
239 CHECK_NULL(which_func, comm, allgatherv) ||
240 CHECK_NULL(which_func, comm, allreduce) ||
241 CHECK_NULL(which_func, comm, alltoall) ||
242 CHECK_NULL(which_func, comm, alltoallv) ||
243 CHECK_NULL(which_func, comm, alltoallw) ||
244 CHECK_NULL(which_func, comm, barrier) ||
245 CHECK_NULL(which_func, comm, bcast) ||
246 ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, exscan)) ||
247 CHECK_NULL(which_func, comm, gather) ||
248 CHECK_NULL(which_func, comm, gatherv) ||
249 CHECK_NULL(which_func, comm, reduce) ||
250 CHECK_NULL(which_func, comm, reduce_scatter_block) ||
251 CHECK_NULL(which_func, comm, reduce_scatter) ||
252 ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, scan)) ||
253 CHECK_NULL(which_func, comm, scatter) ||
254 CHECK_NULL(which_func, comm, scatterv) ||
255 CHECK_NULL(which_func, comm, iallgather) ||
256 CHECK_NULL(which_func, comm, iallgatherv) ||
257 CHECK_NULL(which_func, comm, iallreduce) ||
258 CHECK_NULL(which_func, comm, ialltoall) ||
259 CHECK_NULL(which_func, comm, ialltoallv) ||
260 CHECK_NULL(which_func, comm, ialltoallw) ||
261 CHECK_NULL(which_func, comm, ibarrier) ||
262 CHECK_NULL(which_func, comm, ibcast) ||
263 ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, iexscan)) ||
264 CHECK_NULL(which_func, comm, igather) ||
265 CHECK_NULL(which_func, comm, igatherv) ||
266 CHECK_NULL(which_func, comm, ireduce) ||
267 CHECK_NULL(which_func, comm, ireduce_scatter_block) ||
268 CHECK_NULL(which_func, comm, ireduce_scatter) ||
269 ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, iscan)) ||
270 CHECK_NULL(which_func, comm, iscatter) ||
271 CHECK_NULL(which_func, comm, iscatterv) ||
272 CHECK_NULL(which_func, comm, allgather_init) ||
273 CHECK_NULL(which_func, comm, allgatherv_init) ||
274 CHECK_NULL(which_func, comm, allreduce_init) ||
275 CHECK_NULL(which_func, comm, alltoall_init) ||
276 CHECK_NULL(which_func, comm, alltoallv_init) ||
277 CHECK_NULL(which_func, comm, alltoallw_init) ||
278 CHECK_NULL(which_func, comm, barrier_init) ||
279 CHECK_NULL(which_func, comm, bcast_init) ||
280 ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, exscan_init)) ||
281 CHECK_NULL(which_func, comm, gather_init) ||
282 CHECK_NULL(which_func, comm, gatherv_init) ||
283 CHECK_NULL(which_func, comm, reduce_init) ||
284 CHECK_NULL(which_func, comm, reduce_scatter_block_init) ||
285 CHECK_NULL(which_func, comm, reduce_scatter_init) ||
286 ((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, scan_init)) ||
287 CHECK_NULL(which_func, comm, scatter_init) ||
288 CHECK_NULL(which_func, comm, scatterv_init) ||
289 CHECK_NULL(which_func, comm, reduce_local) ) {
290 /* TODO -- Once the topology flags are set before coll_select then
291 * check if neighborhood collectives have been set. */
292
293 opal_show_help("help-mca-coll-base.txt",
294 "comm-select:no-function-available", true, which_func);
295
296 mca_coll_base_comm_unselect(comm);
297 return OMPI_ERR_NOT_FOUND;
298 }
299 return OMPI_SUCCESS;
300 }
301
avail_coll_compare(opal_list_item_t ** a,opal_list_item_t ** b)302 static int avail_coll_compare (opal_list_item_t **a,
303 opal_list_item_t **b) {
304 mca_coll_base_avail_coll_t *acoll = (mca_coll_base_avail_coll_t *) *a;
305 mca_coll_base_avail_coll_t *bcoll = (mca_coll_base_avail_coll_t *) *b;
306
307 if (acoll->ac_priority > bcoll->ac_priority) {
308 return 1;
309 } else if (acoll->ac_priority < bcoll->ac_priority) {
310 return -1;
311 }
312
313 return 0;
314 }
315
316 static inline int
component_in_argv(char ** argv,const char * component_name)317 component_in_argv(char **argv, const char* component_name)
318 {
319 if( NULL != argv ) {
320 while( NULL != *argv ) {
321 if( 0 == strcmp(component_name, *argv) ) {
322 return 1;
323 }
324 argv++; /* move to the next argument */
325 }
326 }
327 return 0;
328 }
329
330 /*
331 * For each module in the list, check and see if it wants to run, and
332 * do the resulting priority comparison. Make a list of modules to be
333 * only those who returned that they want to run, and put them in
334 * priority order.
335 */
check_components(opal_list_t * components,ompi_communicator_t * comm)336 static opal_list_t *check_components(opal_list_t * components,
337 ompi_communicator_t * comm)
338 {
339 int priority, flag;
340 int count_include = 0;
341 const mca_base_component_t *component;
342 mca_base_component_list_item_t *cli;
343 mca_coll_base_module_2_3_0_t *module;
344 opal_list_t *selectable;
345 mca_coll_base_avail_coll_t *avail;
346 char info_val[OPAL_MAX_INFO_VAL+1];
347 char **coll_argv = NULL, **coll_exclude = NULL, **coll_include = NULL;
348
349 /* Check if this communicator comes with restrictions on the collective modules
350 * it wants to use. The restrictions are consistent with the MCA parameter
351 * to limit the collective components loaded, but it applies for each
352 * communicator and is provided as an info key during the communicator
353 * creation. Unlike the MCA param, this info key is used not to select
354 * components but either to prevent components from being used or to
355 * force a change in the component priority.
356 */
357 if( NULL != comm->super.s_info) {
358 opal_info_get(comm->super.s_info, "ompi_comm_coll_preference",
359 sizeof(info_val), info_val, &flag);
360 if( !flag ) {
361 goto proceed_to_select;
362 }
363 coll_argv = opal_argv_split(info_val, ',');
364 if(NULL == coll_argv) {
365 goto proceed_to_select;
366 }
367 int idx2;
368 count_include = opal_argv_count(coll_argv);
369 /* Allocate the coll_include argv */
370 coll_include = (char**)malloc((count_include + 1) * sizeof(char*));
371 coll_include[count_include] = NULL; /* NULL terminated array */
372 /* Dispatch the include/exclude in the corresponding arrays */
373 for( int idx = 0; NULL != coll_argv[idx]; idx++ ) {
374 if( '^' == coll_argv[idx][0] ) {
375 coll_include[idx] = NULL; /* NULL terminated array */
376
377 /* Allocate the coll_exclude argv */
378 coll_exclude = (char**)malloc((count_include - idx + 1) * sizeof(char*));
379 /* save the exclude components */
380 for( idx2 = idx; NULL != coll_argv[idx2]; idx2++ ) {
381 coll_exclude[idx2 - idx] = coll_argv[idx2];
382 }
383 coll_exclude[idx2 - idx] = NULL; /* NULL-terminated array */
384 coll_exclude[0] = coll_exclude[0] + 1; /* get rid of the ^ */
385 count_include = idx;
386 break;
387 }
388 coll_include[idx] = coll_argv[idx];
389 }
390 }
391 proceed_to_select:
392 /* Make a list of the components that query successfully */
393 selectable = OBJ_NEW(opal_list_t);
394
395 /* Scan through the list of components */
396 OPAL_LIST_FOREACH(cli, &ompi_coll_base_framework.framework_components, mca_base_component_list_item_t) {
397 component = cli->cli_component;
398
399 /* dont bother is we have this component in the exclusion list */
400 if( component_in_argv(coll_exclude, component->mca_component_name) ) {
401 opal_output_verbose(10, ompi_coll_base_framework.framework_output,
402 "coll:base:comm_select: component disqualified: %s (due to communicator info key)",
403 component->mca_component_name );
404 continue;
405 }
406 priority = check_one_component(comm, component, &module);
407 if (priority >= 0) {
408 /* We have a component that indicated that it wants to run
409 by giving us a module */
410 avail = OBJ_NEW(mca_coll_base_avail_coll_t);
411 avail->ac_priority = priority;
412 avail->ac_module = module;
413 // Point to the string so we don't have to free later
414 avail->ac_component_name = component->mca_component_name;
415
416 opal_list_append(selectable, &avail->super);
417 }
418 else {
419 opal_output_verbose(10, ompi_coll_base_framework.framework_output,
420 "coll:base:comm_select: component disqualified: %s (priority %d < 0)",
421 component->mca_component_name, priority );
422
423 // If the disqualified collective returned a module make sure we
424 // release it here, since it will become a leak otherwise.
425 if( NULL != module ) {
426 OBJ_RELEASE(module);
427 module = NULL;
428 }
429 }
430 }
431
432 /* If we didn't find any available components, return an error */
433 if (0 == opal_list_get_size(selectable)) {
434 OBJ_RELEASE(selectable);
435 if( NULL != coll_exclude ) {
436 free(coll_exclude);
437 }
438 if( NULL != coll_include ) {
439 free(coll_include);
440 }
441 return NULL;
442 }
443
444 /* Put this list in priority order */
445 opal_list_sort(selectable, avail_coll_compare);
446
447 /* For all valid component reorder them not on their provided priorities but on
448 * the order requested in the info key. As at this point the coll_include is
449 * already ordered backward we can simply append the components.
450 * Note that the last element in selectable will have the highest priorty.
451 */
452 for (int idx = count_include-1; idx >= 0; --idx) {
453 mca_coll_base_avail_coll_t *item;
454 OPAL_LIST_FOREACH(item, selectable, mca_coll_base_avail_coll_t) {
455 if (0 == strcmp(item->ac_component_name, coll_include[idx])) {
456 opal_list_remove_item(selectable, &item->super);
457 opal_list_append(selectable, &item->super);
458 break;
459 }
460 }
461 }
462
463 opal_argv_free(coll_argv);
464 if( NULL != coll_exclude ) {
465 free(coll_exclude);
466 }
467 if( NULL != coll_include ) {
468 free(coll_include);
469 }
470
471 /* All done */
472 return selectable;
473 }
474
475
476 /*
477 * Check a single component
478 */
check_one_component(ompi_communicator_t * comm,const mca_base_component_t * component,mca_coll_base_module_2_3_0_t ** module)479 static int check_one_component(ompi_communicator_t * comm,
480 const mca_base_component_t * component,
481 mca_coll_base_module_2_3_0_t ** module)
482 {
483 int err;
484 int priority = -1;
485
486 err = query(component, comm, &priority, module);
487
488 if (OMPI_SUCCESS == err) {
489 priority = (priority < 100) ? priority : 100;
490 opal_output_verbose(10, ompi_coll_base_framework.framework_output,
491 "coll:base:comm_select: component available: %s, priority: %d",
492 component->mca_component_name, priority);
493
494 } else {
495 priority = -1;
496 opal_output_verbose(10, ompi_coll_base_framework.framework_output,
497 "coll:base:comm_select: component not available: %s",
498 component->mca_component_name);
499 }
500
501 return priority;
502 }
503
504 /**************************************************************************
505 * Query functions
506 **************************************************************************/
507
508 /*
509 * Take any version of a coll module, query it, and return the right
510 * module struct
511 */
query(const mca_base_component_t * component,ompi_communicator_t * comm,int * priority,mca_coll_base_module_2_3_0_t ** module)512 static int query(const mca_base_component_t * component,
513 ompi_communicator_t * comm,
514 int *priority, mca_coll_base_module_2_3_0_t ** module)
515 {
516 *module = NULL;
517 if (2 == component->mca_type_major_version &&
518 0 == component->mca_type_minor_version &&
519 0 == component->mca_type_release_version) {
520 const mca_coll_base_component_2_0_0_t *coll100 =
521 (mca_coll_base_component_2_0_0_t *) component;
522
523 return query_2_0_0(coll100, comm, priority, module);
524 }
525
526 /* Unknown coll API version -- return error */
527
528 return OMPI_ERROR;
529 }
530
531
query_2_0_0(const mca_coll_base_component_2_0_0_t * component,ompi_communicator_t * comm,int * priority,mca_coll_base_module_2_3_0_t ** module)532 static int query_2_0_0(const mca_coll_base_component_2_0_0_t * component,
533 ompi_communicator_t * comm, int *priority,
534 mca_coll_base_module_2_3_0_t ** module)
535 {
536 mca_coll_base_module_2_3_0_t *ret;
537
538 /* There's currently no need for conversion */
539
540 ret = component->collm_comm_query(comm, priority);
541 if (NULL != ret) {
542 *module = ret;
543 return OMPI_SUCCESS;
544 }
545
546 return OMPI_ERROR;
547 }
548