1 /*
2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2011 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
7 * reserved.
8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
12 * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
13 * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
14 * All rights reserved.
15 * Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
16 * Copyright (c) 2014-2015 Research Organization for Information Science
17 * and Technology (RIST). All rights reserved.
18 * $COPYRIGHT$
19 *
20 * Additional copyrights may follow
21 *
22 * $HEADER$
23 */
24
25 #include "orte_config.h"
26 #include "orte/constants.h"
27
28 #include <string.h>
29
30 #include "orte/mca/mca.h"
31 #include "opal/util/argv.h"
32 #include "opal/util/output.h"
33 #include "opal/mca/base/base.h"
34
35 #include "orte/runtime/orte_globals.h"
36 #include "orte/util/show_help.h"
37 #include "orte/mca/errmgr/errmgr.h"
38
39 #include "orte/mca/rmaps/base/rmaps_private.h"
40 #include "orte/mca/rmaps/base/base.h"
41 /*
42 * The following file was created by configure. It contains extern
43 * statements and the definition of an array of pointers to each
44 * component's public mca_base_component_t struct.
45 */
46
47 #include "orte/mca/rmaps/base/static-components.h"
48
49 /*
50 * Global variables
51 */
52 orte_rmaps_base_t orte_rmaps_base = {{{0}}};
53 bool orte_rmaps_base_pernode = false;
54 int orte_rmaps_base_n_pernode = 0;
55 int orte_rmaps_base_n_persocket = 0;
56
57 /*
58 * Local variables
59 */
60 static char *rmaps_base_mapping_policy = NULL;
61 static char *rmaps_base_ranking_policy = NULL;
62 static bool rmaps_base_bycore = false;
63 static bool rmaps_base_byslot = false;
64 static bool rmaps_base_bynode = false;
65 static bool rmaps_base_no_schedule_local = false;
66 static bool rmaps_base_no_oversubscribe = false;
67 static bool rmaps_base_oversubscribe = false;
68 static bool rmaps_base_display_devel_map = false;
69 static bool rmaps_base_display_diffable_map = false;
70 static char *rmaps_base_topo_file = NULL;
71 static char *rmaps_dist_device = NULL;
72 static bool rmaps_base_inherit = false;
73
orte_rmaps_base_register(mca_base_register_flag_t flags)74 static int orte_rmaps_base_register(mca_base_register_flag_t flags)
75 {
76 int var_id;
77
78 orte_rmaps_base_pernode = false;
79 var_id = mca_base_var_register("orte", "rmaps", "base", "pernode",
80 "Launch one ppn as directed",
81 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
82 OPAL_INFO_LVL_9,
83 MCA_BASE_VAR_SCOPE_READONLY,
84 &orte_rmaps_base_pernode);
85 (void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "ppr", "pernode", 0);
86
87 orte_rmaps_base_n_pernode = 0;
88 var_id = mca_base_var_register("orte", "rmaps", "base", "n_pernode",
89 "Launch n procs/node", MCA_BASE_VAR_TYPE_INT,
90 NULL, 0, 0,
91 OPAL_INFO_LVL_9,
92 MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base_n_pernode);
93 (void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "n_pernode", 0);
94
95 orte_rmaps_base_n_persocket = 0;
96 var_id = mca_base_var_register("orte", "rmaps", "base", "n_persocket",
97 "Launch n procs/socket", MCA_BASE_VAR_TYPE_INT,
98 NULL, 0, 0,
99 OPAL_INFO_LVL_9,
100 MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base_n_persocket);
101 (void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "n_persocket", 0);
102
103 orte_rmaps_base.ppr = NULL;
104 var_id = mca_base_var_register("orte", "rmaps", "base", "pattern",
105 "Comma-separated list of number of processes on a given resource type [default: none]",
106 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
107 MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base.ppr);
108 (void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "pattern", 0);
109
110 /* define default mapping policy */
111 rmaps_base_mapping_policy = NULL;
112 var_id = mca_base_var_register("orte", "rmaps", "base", "mapping_policy",
113 "Mapping Policy [slot | hwthread | core (default:np<=2) | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node | seq | dist | ppr], with allowed modifiers :PE=y,SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
114 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
115 OPAL_INFO_LVL_9,
116 MCA_BASE_VAR_SCOPE_READONLY,
117 &rmaps_base_mapping_policy);
118 (void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "base", "schedule_policy",
119 MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
120
121 /* define default ranking policy */
122 rmaps_base_ranking_policy = NULL;
123 (void) mca_base_var_register("orte", "rmaps", "base", "ranking_policy",
124 "Ranking Policy [slot (default:np<=2) | hwthread | core | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node], with modifier :SPAN or :FILL",
125 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
126 OPAL_INFO_LVL_9,
127 MCA_BASE_VAR_SCOPE_READONLY,
128 &rmaps_base_ranking_policy);
129
130 /* backward compatibility */
131 rmaps_base_bycore = false;
132 (void) mca_base_var_register("orte", "rmaps", "base", "bycore",
133 "Whether to map and rank processes round-robin by core",
134 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
135 OPAL_INFO_LVL_9,
136 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_bycore);
137
138 rmaps_base_byslot = false;
139 (void) mca_base_var_register("orte", "rmaps", "base", "byslot",
140 "Whether to map and rank processes round-robin by slot",
141 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
142 OPAL_INFO_LVL_9,
143 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_byslot);
144
145 rmaps_base_bynode = false;
146 (void) mca_base_var_register("orte", "rmaps", "base", "bynode",
147 "Whether to map and rank processes round-robin by node",
148 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
149 OPAL_INFO_LVL_9,
150 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_bynode);
151
152 /* #cpus/rank to use */
153 orte_rmaps_base.cpus_per_rank = 0;
154 var_id = mca_base_var_register("orte", "rmaps", "base", "cpus_per_proc",
155 "Number of cpus to use for each rank [1-2**15 (default=1)]",
156 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
157 OPAL_INFO_LVL_9,
158 MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base.cpus_per_rank);
159 mca_base_var_register_synonym(var_id, "orte", "rmaps", "base", "cpus_per_rank", 0);
160
161 rmaps_dist_device = NULL;
162 var_id = mca_base_var_register("orte", "rmaps", NULL, "dist_device",
163 "If specified, map processes near to this device. Any device name that is identified by the lstopo hwloc utility as Net or OpenFabrics (for example eth0, mlx4_0, etc) or special name as auto ",
164 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
165 OPAL_INFO_LVL_9,
166 MCA_BASE_VAR_SCOPE_READONLY,
167 &rmaps_dist_device);
168
169 rmaps_base_no_schedule_local = false;
170 (void) mca_base_var_register("orte", "rmaps", "base", "no_schedule_local",
171 "If false, allow scheduling MPI applications on the same node as mpirun (default). If true, do not schedule any MPI applications on the same node as mpirun",
172 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
173 OPAL_INFO_LVL_9,
174 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_no_schedule_local);
175
176 /** default condition that allows oversubscription */
177 rmaps_base_no_oversubscribe = false;
178 (void) mca_base_var_register("orte", "rmaps", "base", "no_oversubscribe",
179 "If true, then do not allow oversubscription of nodes - mpirun will return an error if there aren't enough nodes to launch all processes without oversubscribing",
180 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
181 OPAL_INFO_LVL_9,
182 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_no_oversubscribe);
183
184 rmaps_base_oversubscribe = false;
185 (void) mca_base_var_register("orte", "rmaps", "base", "oversubscribe",
186 "If true, then allow oversubscription of nodes and overloading of processing elements",
187 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
188 OPAL_INFO_LVL_9,
189 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_oversubscribe);
190
191 /* should we display the map after determining it? */
192 orte_rmaps_base.display_map = false;
193 (void) mca_base_var_register("orte", "rmaps", "base", "display_map",
194 "Whether to display the process map after it is computed",
195 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
196 OPAL_INFO_LVL_9,
197 MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base.display_map);
198
199 rmaps_base_display_devel_map = false;
200 (void) mca_base_var_register("orte", "rmaps", "base", "display_devel_map",
201 "Whether to display a developer-detail process map after it is computed",
202 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
203 OPAL_INFO_LVL_9,
204 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_display_devel_map);
205
206 /* should we display the topology along with the map? */
207 orte_display_topo_with_map = false;
208 (void) mca_base_var_register("orte", "rmaps", "base", "display_topo_with_map",
209 "Whether to display the topology with the map",
210 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
211 OPAL_INFO_LVL_9,
212 MCA_BASE_VAR_SCOPE_READONLY, &orte_display_topo_with_map);
213
214 rmaps_base_display_diffable_map = false;
215 (void) mca_base_var_register("orte", "rmaps", "base", "display_diffable_map",
216 "Whether to display a diffable process map after it is computed",
217 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
218 OPAL_INFO_LVL_9,
219 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_display_diffable_map);
220
221 rmaps_base_topo_file = NULL;
222 (void) mca_base_var_register("orte", "rmaps", "base", "topology",
223 "hwloc topology file (xml format) describing the topology of the compute nodes [default: none]",
224 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
225 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_topo_file);
226
227 rmaps_base_inherit = false;
228 (void) mca_base_var_register("orte", "rmaps", "base", "inherit",
229 "Whether child jobs shall inherit launch directives",
230 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
231 OPAL_INFO_LVL_9,
232 MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_inherit);
233
234 return ORTE_SUCCESS;
235 }
236
orte_rmaps_base_close(void)237 static int orte_rmaps_base_close(void)
238 {
239 opal_list_item_t *item;
240
241 /* cleanup globals */
242 while (NULL != (item = opal_list_remove_first(&orte_rmaps_base.selected_modules))) {
243 OBJ_RELEASE(item);
244 }
245 OBJ_DESTRUCT(&orte_rmaps_base.selected_modules);
246
247 return mca_base_framework_components_close(&orte_rmaps_base_framework, NULL);
248 }
249
250 /**
251 * Function for finding and opening either all MCA components, or the one
252 * that was specifically requested via a MCA parameter.
253 */
orte_rmaps_base_open(mca_base_open_flag_t flags)254 static int orte_rmaps_base_open(mca_base_open_flag_t flags)
255 {
256 int rc;
257
258 /* init the globals */
259 OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
260 orte_rmaps_base.slot_list = NULL;
261 orte_rmaps_base.mapping = 0;
262 orte_rmaps_base.ranking = 0;
263 orte_rmaps_base.device = NULL;
264 orte_rmaps_base.inherit = rmaps_base_inherit;
265
266 /* if a topology file was given, then set our topology
267 * from it. Even though our actual topology may differ,
268 * mpirun only needs to see the compute node topology
269 * for mapping purposes
270 */
271 if (NULL != rmaps_base_topo_file) {
272 if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) {
273 orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file);
274 return ORTE_ERR_SILENT;
275 }
276 }
277
278 /* check for violations that has to be detected before we parse the mapping option */
279 if (NULL != orte_rmaps_base.ppr) {
280 orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
281 "--ppr, -ppr", "--map-by ppr:<pattern>",
282 "rmaps_base_pattern, rmaps_ppr_pattern",
283 "rmaps_base_mapping_policy=ppr:<pattern>");
284 /* if the mapping policy is NULL, then we can proceed */
285 if (NULL == rmaps_base_mapping_policy) {
286 asprintf(&rmaps_base_mapping_policy, "ppr:%s", orte_rmaps_base.ppr);
287 } else {
288 return ORTE_ERR_SILENT;
289 }
290 }
291
292 if (0 < orte_rmaps_base.cpus_per_rank) {
293 orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
294 "--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
295 "--map-by <obj>:PE=N, default <obj>=NUMA",
296 "rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA");
297 }
298
299 if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(NULL, &orte_rmaps_base.mapping,
300 &orte_rmaps_base.device,
301 rmaps_base_mapping_policy))) {
302 return rc;
303 }
304
305 if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking,
306 orte_rmaps_base.mapping,
307 rmaps_base_ranking_policy))) {
308 return rc;
309 }
310
311 if (rmaps_base_bycore) {
312 orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
313 "--bycore, -bycore", "--map-by core",
314 "rmaps_base_bycore", "rmaps_base_mapping_policy=core");
315 /* set mapping policy to bycore - error if something else already set */
316 if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
317 ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYCORE) {
318 /* error - cannot redefine the default mapping policy */
319 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
320 "bycore", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
321 return ORTE_ERR_SILENT;
322 }
323 ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYCORE);
324 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
325 /* set ranking policy to bycore - error if something else already set */
326 if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
327 ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_CORE) {
328 /* error - cannot redefine the default ranking policy */
329 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
330 "bycore", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
331 return ORTE_ERR_SILENT;
332 }
333 ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_CORE);
334 ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
335 }
336
337 if (rmaps_base_byslot) {
338 orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
339 "--byslot, -byslot", "--map-by slot",
340 "rmaps_base_byslot", "rmaps_base_mapping_policy=slot");
341 /* set mapping policy to byslot - error if something else already set */
342 if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
343 ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYSLOT) {
344 /* error - cannot redefine the default mapping policy */
345 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
346 "byslot", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
347 return ORTE_ERR_SILENT;
348 }
349 ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
350 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
351 /* set ranking policy to byslot - error if something else already set */
352 if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
353 ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_SLOT) {
354 /* error - cannot redefine the default ranking policy */
355 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
356 "byslot", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
357 return ORTE_ERR_SILENT;
358 }
359 ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
360 ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
361 }
362
363 if (rmaps_base_bynode) {
364 orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
365 "--bynode, -bynode", "--map-by node",
366 "rmaps_base_bynode", "rmaps_base_mapping_policy=node");
367 /* set mapping policy to bynode - error if something else already set */
368 if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
369 ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYNODE) {
370 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
371 "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
372 return ORTE_ERR_SILENT;
373 }
374 ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE);
375 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
376 /* set ranking policy to bynode - error if something else already set */
377 if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
378 ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_NODE) {
379 /* error - cannot redefine the default ranking policy */
380 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
381 "bynode", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
382 return ORTE_ERR_SILENT;
383 }
384 ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE);
385 ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
386 }
387
388 if (0 < orte_rmaps_base.cpus_per_rank) {
389 /* if we were asked for cpus/proc, then we have to
390 * bind to those cpus - any other binding policy is an
391 * error
392 */
393 if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
394 if (opal_hwloc_use_hwthreads_as_cpus) {
395 if (OPAL_BIND_TO_HWTHREAD != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) &&
396 OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
397 orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true,
398 orte_rmaps_base.cpus_per_rank, "use-hwthreads-as-cpus",
399 opal_hwloc_base_print_binding(opal_hwloc_binding_policy),
400 "bind-to hwthread");
401 return ORTE_ERR_SILENT;
402 }
403 } else if (OPAL_BIND_TO_CORE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) &&
404 OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
405 orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true,
406 orte_rmaps_base.cpus_per_rank, "cores as cpus",
407 opal_hwloc_base_print_binding(opal_hwloc_binding_policy),
408 "bind-to core");
409 return ORTE_ERR_SILENT;
410 }
411 } else {
412 if (opal_hwloc_use_hwthreads_as_cpus) {
413 OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
414 } else {
415 OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
416 }
417 }
418 if (1 < orte_rmaps_base.cpus_per_rank) {
419 /* we need to ensure we are mapping to a high-enough level to have
420 * multiple cpus beneath it - by default, we'll go to the NUMA level */
421 if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
422 if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYHWTHREAD ||
423 (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYCORE &&
424 !opal_hwloc_use_hwthreads_as_cpus)) {
425 orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true);
426 return ORTE_ERR_SILENT;
427 }
428 } else {
429 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
430 "%s rmaps:base pe/rank set - setting mapping to BYNUMA",
431 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
432 ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA);
433 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
434 }
435 }
436 }
437
438 if (orte_rmaps_base_pernode) {
439 /* if the user didn't specify a mapping directive, then match it */
440 if (!(ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
441 /* ensure we set the mapping policy to ppr */
442 ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
443 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
444 /* define the ppr */
445 orte_rmaps_base.ppr = strdup("1:node");
446 }
447 }
448
449 if (0 < orte_rmaps_base_n_pernode) {
450 /* if the user didn't specify a mapping directive, then match it */
451 if (!(ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
452 /* ensure we set the mapping policy to ppr */
453 ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
454 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
455 /* define the ppr */
456 asprintf(&orte_rmaps_base.ppr, "%d:node", orte_rmaps_base_n_pernode);
457 }
458 }
459
460 if (0 < orte_rmaps_base_n_persocket) {
461 /* if the user didn't specify a mapping directive, then match it */
462 if (!(ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
463 /* ensure we set the mapping policy to ppr */
464 ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
465 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
466 /* define the ppr */
467 asprintf(&orte_rmaps_base.ppr, "%d:socket", orte_rmaps_base_n_persocket);
468 }
469 }
470
471 /* Should we schedule on the local node or not? */
472 if (rmaps_base_no_schedule_local) {
473 orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL;
474 }
475
476 /* Should we oversubscribe or not? */
477 if (rmaps_base_no_oversubscribe) {
478 if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
479 !(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
480 /* error - cannot redefine the default mapping policy */
481 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
482 "no-oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
483 return ORTE_ERR_SILENT;
484 }
485 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
486 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
487 }
488
489 /** force oversubscription permission */
490 if (rmaps_base_oversubscribe) {
491 if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
492 (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
493 /* error - cannot redefine the default mapping policy */
494 orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
495 "oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
496 return ORTE_ERR_SILENT;
497 }
498 ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
499 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
500 /* also set the overload allowed flag */
501 opal_hwloc_binding_policy |= OPAL_BIND_ALLOW_OVERLOAD;
502 }
503
504 /* should we display a detailed (developer-quality) version of the map after determining it? */
505 if (rmaps_base_display_devel_map) {
506 orte_rmaps_base.display_map = true;
507 orte_devel_level_output = true;
508 }
509
510 /* should we display a diffable report of proc locations after determining it? */
511 if (rmaps_base_display_diffable_map) {
512 orte_rmaps_base.display_map = true;
513 orte_display_diffable_output = true;
514 }
515
516 /* Open up all available components */
517 rc = mca_base_framework_components_open(&orte_rmaps_base_framework, flags);
518
519 /* check to see if any component indicated a problem */
520 if (ORTE_MAPPING_CONFLICTED & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
521 /* the component would have already reported the error, so
522 * tell the rest of the chain to shut up
523 */
524 return ORTE_ERR_SILENT;
525 }
526
527 /* All done */
528 return rc;
529 }
530
531 MCA_BASE_FRAMEWORK_DECLARE(orte, rmaps, "ORTE Mapping Subsystem",
532 orte_rmaps_base_register, orte_rmaps_base_open, orte_rmaps_base_close,
533 mca_rmaps_base_static_components, 0);
534
535 OBJ_CLASS_INSTANCE(orte_rmaps_base_selected_module_t,
536 opal_list_item_t,
537 NULL, NULL);
538
539
check_modifiers(char * ck,orte_mapping_policy_t * tmp)540 static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
541 {
542 char **ck2, *ptr;
543 int i;
544 bool found = false;
545
546 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
547 "%s rmaps:base check modifiers with %s",
548 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
549 (NULL == ck) ? "NULL" : ck);
550
551 if (NULL == ck) {
552 return ORTE_SUCCESS;
553 }
554
555 ck2 = opal_argv_split(ck, ',');
556 for (i=0; NULL != ck2[i]; i++) {
557 if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
558 ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SPAN);
559 ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_GIVEN);
560 found = true;
561 } else if (0 == strncasecmp(ck2[i], "pe", strlen("pe"))) {
562 /* break this at the = sign to get the number */
563 if (NULL == (ptr = strchr(ck2[i], '='))) {
564 /* missing the value */
565 orte_show_help("help-orte-rmaps-base.txt", "missing-value", true, "pe", ck2[i]);
566 opal_argv_free(ck2);
567 return ORTE_ERR_SILENT;
568 }
569 ptr++;
570 orte_rmaps_base.cpus_per_rank = strtol(ptr, NULL, 10);
571 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
572 "%s rmaps:base setting pe/rank to %d",
573 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
574 orte_rmaps_base.cpus_per_rank);
575 found = true;
576 } else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
577 ORTE_UNSET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
578 ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
579 found = true;
580 } else if (0 == strncasecmp(ck2[i], "nooversubscribe", strlen(ck2[i]))) {
581 ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
582 ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
583 found = true;
584 } else {
585 /* unrecognized modifier */
586 opal_argv_free(ck2);
587 return ORTE_ERR_BAD_PARAM;
588 }
589 }
590 opal_argv_free(ck2);
591 if (found) {
592 return ORTE_SUCCESS;
593 }
594 return ORTE_ERR_TAKE_NEXT_OPTION;
595 }
596
orte_rmaps_base_set_mapping_policy(orte_job_t * jdata,orte_mapping_policy_t * policy,char ** device,char * inspec)597 int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata,
598 orte_mapping_policy_t *policy,
599 char **device, char *inspec)
600 {
601 char *ck;
602 char *ptr, *cptr;
603 orte_mapping_policy_t tmp;
604 int rc;
605 size_t len;
606 char *spec;
607 char *pch;
608
609 /* set defaults */
610 tmp = 0;
611 if (NULL != device) {
612 *device = NULL;
613 }
614
615 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
616 "%s rmaps:base set policy with %s device %s",
617 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
618 (NULL == inspec) ? "NULL" : inspec,
619 (NULL == device) ? "NULL" : "NONNULL");
620
621 if (NULL == inspec) {
622 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
623 goto setpolicy;
624 }
625
626 spec = strdup(inspec); // protect the input string
627 /* see if a colon was included - if so, then we have a policy + modifier */
628 ck = strchr(spec, ':');
629 if (NULL != ck) {
630 /* if the colon is the first character of the string, then we
631 * just have modifiers on the default mapping policy */
632 if (ck == spec) {
633 ck++; // step over the colon
634 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
635 "%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
636 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
637 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
638 if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
639 ORTE_ERR_BAD_PARAM != rc) {
640 free(spec);
641 return ORTE_ERR_SILENT;
642 }
643 free(spec);
644 goto setpolicy;
645 }
646 *ck = '\0'; // terminate spec where the colon was
647 ck++; // step past the colon
648 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
649 "%s rmaps:base policy %s modifiers %s provided",
650 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
651
652 if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
653 /* at this point, ck points to a string that contains at least
654 * two fields (specifying the #procs/obj and the object we are
655 * to map by). we have to allow additional modifiers here - e.g.,
656 * specifying #pe's/proc or oversubscribe - so check for modifiers. if
657 * they are present, ck will look like "N:obj:mod1,mod2,mod3"
658 */
659 if (NULL == (ptr = strchr(ck, ':'))) {
660 /* this is an error - there had to be at least one
661 * colon to delimit the number from the object type
662 */
663 orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
664 free(spec);
665 return ORTE_ERR_SILENT;
666 }
667 ptr++; // move past the colon
668 /* at this point, ptr is pointing to the beginning of the string that describes
669 * the object plus any modifiers (i.e., "obj:mod1,mod2". We first check to see if there
670 * is another colon indicating that there are modifiers to the request */
671 if (NULL != (cptr = strchr(ptr, ':'))) {
672 /* there are modifiers, so we terminate the object string
673 * at the location of the colon */
674 *cptr = '\0';
675 /* step over that colon */
676 cptr++;
677 /* now check for modifiers - may be none, so
678 * don't emit an error message if the modifier
679 * isn't recognized */
680 if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) &&
681 ORTE_ERR_BAD_PARAM != rc) {
682 free(spec);
683 return ORTE_ERR_SILENT;
684 }
685 }
686 /* now save the pattern */
687 if (NULL == jdata || NULL == jdata->map) {
688 orte_rmaps_base.ppr = strdup(ck);
689 } else {
690 jdata->map->ppr = strdup(ck);
691 }
692 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
693 ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
694 free(spec);
695 goto setpolicy;
696 }
697 if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
698 ORTE_ERR_TAKE_NEXT_OPTION != rc) {
699 if (ORTE_ERR_BAD_PARAM == rc) {
700 orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
701 }
702 free(spec);
703 return rc;
704 }
705 }
706 len = strlen(spec);
707 if (0 == strncasecmp(spec, "slot", len)) {
708 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
709 } else if (0 == strncasecmp(spec, "node", len)) {
710 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
711 } else if (0 == strncasecmp(spec, "seq", len)) {
712 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
713 } else if (0 == strncasecmp(spec, "core", len)) {
714 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
715 } else if (0 == strncasecmp(spec, "l1cache", len)) {
716 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
717 } else if (0 == strncasecmp(spec, "l2cache", len)) {
718 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
719 } else if (0 == strncasecmp(spec, "l3cache", len)) {
720 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
721 } else if (0 == strncasecmp(spec, "socket", len)) {
722 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
723 } else if (0 == strncasecmp(spec, "numa", len)) {
724 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
725 } else if (0 == strncasecmp(spec, "board", len)) {
726 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
727 } else if (0 == strncasecmp(spec, "hwthread", len)) {
728 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
729 /* if we are mapping processes to individual hwthreads, then
730 * we need to treat those hwthreads as separate cpus
731 */
732 opal_hwloc_use_hwthreads_as_cpus = true;
733 } else if (0 == strncasecmp(spec, "dist", len)) {
734 if (NULL != rmaps_dist_device) {
735 if (NULL != (pch = strchr(rmaps_dist_device, ':'))) {
736 *pch = '\0';
737 }
738 if (NULL != device) {
739 *device = strdup(rmaps_dist_device);
740 }
741 ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
742 } else {
743 orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true);
744 free(spec);
745 return ORTE_ERR_SILENT;
746 }
747 } else {
748 orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
749 free(spec);
750 return ORTE_ERR_SILENT;
751 }
752 free(spec);
753 ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
754
755 setpolicy:
756 if (NULL == jdata || NULL == jdata->map) {
757 *policy = tmp;
758 } else {
759 jdata->map->mapping = tmp;
760 }
761
762 return ORTE_SUCCESS;
763 }
764
orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t * policy,orte_mapping_policy_t mapping,char * spec)765 int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *policy,
766 orte_mapping_policy_t mapping,
767 char *spec)
768 {
769 orte_mapping_policy_t map;
770 orte_ranking_policy_t tmp;
771 char **ck;
772 size_t len;
773
774 /* set default */
775 tmp = 0;
776
777 if (NULL == spec) {
778 /* check for map-by object directives - we set the
779 * ranking to match if one was given
780 */
781 if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(mapping)) {
782 map = ORTE_GET_MAPPING_POLICY(mapping);
783 switch (map) {
784 case ORTE_MAPPING_BYSLOT:
785 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SLOT);
786 break;
787 case ORTE_MAPPING_BYNODE:
788 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NODE);
789 break;
790 case ORTE_MAPPING_BYCORE:
791 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_CORE);
792 break;
793 case ORTE_MAPPING_BYL1CACHE:
794 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L1CACHE);
795 break;
796 case ORTE_MAPPING_BYL2CACHE:
797 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L2CACHE);
798 break;
799 case ORTE_MAPPING_BYL3CACHE:
800 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L3CACHE);
801 break;
802 case ORTE_MAPPING_BYSOCKET:
803 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SOCKET);
804 break;
805 case ORTE_MAPPING_BYNUMA:
806 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NUMA);
807 break;
808 case ORTE_MAPPING_BYBOARD:
809 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_BOARD);
810 break;
811 case ORTE_MAPPING_BYHWTHREAD:
812 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_HWTHREAD);
813 break;
814 default:
815 /* anything not tied to a specific hw obj can rank by slot */
816 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SLOT);
817 break;
818 }
819 } else {
820 /* if no map-by was given, default to by-slot */
821 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SLOT);
822 }
823 } else {
824 ck = opal_argv_split(spec, ':');
825 if (2 < opal_argv_count(ck)) {
826 /* incorrect format */
827 orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "ranking", policy);
828 opal_argv_free(ck);
829 return ORTE_ERR_SILENT;
830 }
831 if (2 == opal_argv_count(ck)) {
832 if (0 == strncasecmp(ck[1], "span", strlen(ck[1]))) {
833 ORTE_SET_RANKING_DIRECTIVE(tmp, ORTE_RANKING_SPAN);
834 } else if (0 == strncasecmp(ck[1], "fill", strlen(ck[1]))) {
835 ORTE_SET_RANKING_DIRECTIVE(tmp, ORTE_RANKING_FILL);
836 } else {
837 /* unrecognized modifier */
838 orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, ck[1]);
839 opal_argv_free(ck);
840 return ORTE_ERR_SILENT;
841 }
842 }
843 len = strlen(ck[0]);
844 if (0 == strncasecmp(ck[0], "slot", len)) {
845 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SLOT);
846 } else if (0 == strncasecmp(ck[0], "node", len)) {
847 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NODE);
848 } else if (0 == strncasecmp(ck[0], "hwthread", len)) {
849 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_HWTHREAD);
850 } else if (0 == strncasecmp(ck[0], "core", len)) {
851 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_CORE);
852 } else if (0 == strncasecmp(ck[0], "l1cache", len)) {
853 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L1CACHE);
854 } else if (0 == strncasecmp(ck[0], "l2cache", len)) {
855 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L2CACHE);
856 } else if (0 == strncasecmp(ck[0], "l3cache", len)) {
857 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L3CACHE);
858 } else if (0 == strncasecmp(ck[0], "socket", len)) {
859 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SOCKET);
860 } else if (0 == strncasecmp(ck[0], "numa", len)) {
861 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NUMA);
862 } else if (0 == strncasecmp(ck[0], "board", len)) {
863 ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_BOARD);
864 } else {
865 orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "ranking", rmaps_base_ranking_policy);
866 opal_argv_free(ck);
867 return ORTE_ERR_SILENT;
868 }
869 opal_argv_free(ck);
870 ORTE_SET_RANKING_DIRECTIVE(tmp, ORTE_RANKING_GIVEN);
871 }
872
873 *policy = tmp;
874 return ORTE_SUCCESS;
875 }
876