1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2014 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2006      Los Alamos National Security, LLC.  All rights
14  *                         reserved.
15  * Copyright (c) 2008-2015 Cisco Systems, Inc.  All rights reserved.
16  * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved.
17  * Copyright (c) 2010-2014 Los Alamos National Security, LLC.
18  *                         All rights reserved.
19  * Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
20  * Copyright (c) 2015      Research Organization for Information Science
21  *                         and Technology (RIST). All rights reserved.
22  * Copyright (c) 2015      Mellanox Technologies, Inc.
23  *                         All rights reserved.
24  * Copyright (c) 2017      IBM Corporation.  All rights reserved.
25  * $COPYRIGHT$
26  *
27  * Additional copyrights may follow
28  *
29  * $HEADER$
30  */
31 
32 #include "opal_config.h"
33 
34 #include <time.h>
35 #include <signal.h>
36 
37 #include "opal/constants.h"
38 #include "opal/runtime/opal.h"
39 #include "opal/datatype/opal_datatype.h"
40 #include "opal/mca/base/mca_base_var.h"
41 #include "opal/threads/mutex.h"
42 #include "opal/threads/threads.h"
43 #include "opal/mca/shmem/base/base.h"
44 #include "opal/mca/base/mca_base_var.h"
45 #include "opal/runtime/opal_params.h"
46 #include "opal/dss/dss.h"
47 #include "opal/util/opal_environ.h"
48 #include "opal/util/show_help.h"
49 #include "opal/util/timings.h"
50 
51 char *opal_signal_string = NULL;
52 char *opal_stacktrace_output_filename = NULL;
53 char *opal_net_private_ipv4 = NULL;
54 char *opal_set_max_sys_limits = NULL;
55 
56 #if OPAL_ENABLE_TIMING
57 char *opal_timing_sync_file = NULL;
58 char *opal_timing_output = NULL;
59 bool opal_timing_overhead = true;
60 #endif
61 
62 bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
63 bool opal_cuda_support = false;
64 bool opal_warn_on_missing_libcuda = true;
65 #if OPAL_ENABLE_FT_CR == 1
66 bool opal_base_distill_checkpoint_ready = false;
67 #endif
68 
69 /**
70  * Globals imported from the OMPI layer.
71  */
72 int opal_leave_pinned = -1;
73 bool opal_leave_pinned_pipeline = false;
74 bool opal_abort_print_stack = false;
75 int opal_abort_delay = 0;
76 
77 static bool opal_register_done = false;
78 
opal_register_params(void)79 int opal_register_params(void)
80 {
81     int ret;
82     char *string = NULL;
83 
84     if (opal_register_done) {
85         return OPAL_SUCCESS;
86     }
87 
88     opal_register_done = true;
89 
90     /*
91      * This string is going to be used in opal/util/stacktrace.c
92      */
93     {
94         int j;
95         int signals[] = {
96 #ifdef SIGABRT
97             SIGABRT,
98 #endif
99 #ifdef SIGBUS
100             SIGBUS,
101 #endif
102 #ifdef SIGFPE
103             SIGFPE,
104 #endif
105 #ifdef SIGSEGV
106             SIGSEGV,
107 #endif
108             -1
109         };
110         for (j = 0 ; signals[j] != -1 ; ++j) {
111             if (j == 0) {
112                 asprintf(&string, "%d", signals[j]);
113             } else {
114                 char *tmp;
115                 asprintf(&tmp, "%s,%d", string, signals[j]);
116                 free(string);
117                 string = tmp;
118             }
119         }
120 
121         opal_signal_string = string;
122         ret = mca_base_var_register ("opal", "opal", NULL, "signal",
123 				     "Comma-delimited list of integer signal numbers to Open MPI to attempt to intercept.  Upon receipt of the intercepted signal, Open MPI will display a stack trace and abort.  Open MPI will *not* replace signals if handlers are already installed by the time MPI_INIT is invoked.  Optionally append \":complain\" to any signal number in the comma-delimited list to make Open MPI complain if it detects another signal handler (and therefore does not insert its own).",
124 				     MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
125 				     OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
126 				     &opal_signal_string);
127         free (string);
128         if (0 > ret) {
129             return ret;
130         }
131     }
132 
133     /*
134      * Where should the stack trace output be directed
135      * This string is going to be used in opal/util/stacktrace.c
136      */
137     string = strdup("stderr");
138     opal_stacktrace_output_filename = string;
139     ret = mca_base_var_register ("opal", "opal", NULL, "stacktrace_output",
140                                  "Specifies where the stack trace output stream goes.  "
141                                  "Accepts one of the following: none (disabled), stderr (default), stdout, file[:filename].   "
142                                  "If 'filename' is not specified, a default filename of 'stacktrace' is used.  "
143                                  "The 'filename' is appended with either '.PID' or '.RANK.PID', if RANK is available.  "
144                                  "The 'filename' can be an absolute path or a relative path to the current working directory.",
145                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
146                                  OPAL_INFO_LVL_3,
147                                  MCA_BASE_VAR_SCOPE_LOCAL,
148                                  &opal_stacktrace_output_filename);
149     free (string);
150     if (0 > ret) {
151         return ret;
152     }
153 
154 
155 #if defined(HAVE_SCHED_YIELD)
156     opal_progress_yield_when_idle = false;
157     ret = mca_base_var_register ("opal", "opal", "progress", "yield_when_idle",
158                                  "Yield the processor when waiting on progress",
159                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
160                                  OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL,
161                                  &opal_progress_yield_when_idle);
162 #endif
163 
164 #if OPAL_ENABLE_DEBUG
165     opal_progress_debug = false;
166     ret = mca_base_var_register ("opal", "opal", "progress", "debug",
167 				 "Set to non-zero to debug progress engine features",
168 				 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
169 				 OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL,
170 				 &opal_progress_debug);
171     if (0 > ret) {
172         return ret;
173     }
174 
175     opal_debug_threads = false;
176     ret = mca_base_var_register ("opal", "opal", "debug", "threads",
177 				 "Debug thread usage within OPAL. Reports out "
178 				 "when threads are acquired and released.",
179 				 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
180 				 OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL,
181 				 &opal_debug_threads);
182     if (0 > ret) {
183         return ret;
184     }
185 #endif
186 
187 #if OPAL_ENABLE_FT_CR == 1
188     opal_base_distill_checkpoint_ready = false;
189     ret = mca_base_var_register("opal", "opal", "base", "distill_checkpoint_ready",
190                                 "Distill only those components that are Checkpoint Ready",
191                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
192                                 OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL,
193                                 &opal_base_distill_checkpoint_ready);
194 
195     if (0 > ret) {
196         return ret;
197     }
198 #endif
199 
200     /* RFC1918 defines
201        - 10.0.0./8
202        - 172.16.0.0/12
203        - 192.168.0.0/16
204 
205        RFC3330 also mentions
206        - 169.254.0.0/16 for DHCP onlink iff there's no DHCP server
207     */
208     opal_net_private_ipv4 = "10.0.0.0/8;172.16.0.0/12;192.168.0.0/16;169.254.0.0/16";
209     ret = mca_base_var_register ("opal", "opal", "net", "private_ipv4",
210 				 "Semicolon-delimited list of CIDR notation entries specifying what networks are considered \"private\" (default value based on RFC1918 and RFC3330)",
211 				 MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
212 				 OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL_EQ,
213 				 &opal_net_private_ipv4);
214     if (0 > ret) {
215         return ret;
216     }
217 
218     opal_set_max_sys_limits = NULL;
219     ret = mca_base_var_register ("opal", "opal", NULL, "set_max_sys_limits",
220 				 "Set the specified system-imposed limits to the specified value, including \"unlimited\"."
221                                  "Supported params: core, filesize, maxmem, openfiles, stacksize, maxchildren",
222 				 MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
223 				 OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL_EQ,
224 				 &opal_set_max_sys_limits);
225     if (0 > ret) {
226         return ret;
227     }
228 
229     ret = mca_base_var_register("opal", "opal", NULL, "built_with_cuda_support",
230                                 "Whether CUDA GPU buffer support is built into library or not",
231                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
232                                 OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_CONSTANT,
233                                 &opal_built_with_cuda_support);
234     if (0 > ret) {
235         return ret;
236     }
237 
238     /* Current default is to enable CUDA support if it is built into library */
239     opal_cuda_support = opal_built_with_cuda_support;
240     ret = mca_base_var_register ("opal", "opal", NULL, "cuda_support",
241                                  "Whether CUDA GPU buffer support is enabled or not",
242                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
243                                  OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL_EQ,
244                                  &opal_cuda_support);
245     if (0 > ret) {
246         return ret;
247     }
248 
249     opal_warn_on_missing_libcuda = true;
250     ret = mca_base_var_register ("opal", "opal", NULL, "warn_on_missing_libcuda",
251                                  "Whether to print a message when CUDA support is enabled but libcuda is not found",
252                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
253                                  OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL_EQ,
254                                  &opal_warn_on_missing_libcuda);
255     if (0 > ret) {
256         return ret;
257     }
258 
259     /* Leave pinned parameter */
260     opal_leave_pinned = -1;
261     ret = mca_base_var_register("ompi", "mpi", NULL, "leave_pinned",
262                                 "Whether to use the \"leave pinned\" protocol or not.  Enabling this setting can help bandwidth performance when repeatedly sending and receiving large messages with the same buffers over RDMA-based networks (false = do not use \"leave pinned\" protocol, true = use \"leave pinned\" protocol, auto = allow network to choose at runtime).",
263                                 MCA_BASE_VAR_TYPE_INT, &mca_base_var_enum_auto_bool, 0, 0,
264                                 OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
265                                 &opal_leave_pinned);
266     mca_base_var_register_synonym(ret, "opal", "opal", NULL, "leave_pinned",
267                                   MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
268 
269     opal_leave_pinned_pipeline = false;
270     ret = mca_base_var_register("ompi", "mpi", NULL, "leave_pinned_pipeline",
271                                 "Whether to use the \"leave pinned pipeline\" protocol or not.",
272                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
273                                  OPAL_INFO_LVL_9,
274                                  MCA_BASE_VAR_SCOPE_READONLY,
275                                  &opal_leave_pinned_pipeline);
276     mca_base_var_register_synonym(ret, "opal", "opal", NULL, "leave_pinned_pipeline",
277                                   MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
278 
279     if (opal_leave_pinned > 0 && opal_leave_pinned_pipeline) {
280         opal_leave_pinned_pipeline = 0;
281         opal_show_help("help-opal-runtime.txt",
282                        "mpi-params:leave-pinned-and-pipeline-selected",
283                        true);
284     }
285 
286     opal_warn_on_fork = true;
287     (void) mca_base_var_register("ompi", "mpi", NULL, "warn_on_fork",
288                                  "If nonzero, issue a warning if program forks under conditions that could cause system errors",
289                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
290                                  OPAL_INFO_LVL_9,
291                                  MCA_BASE_VAR_SCOPE_READONLY,
292                                  &opal_warn_on_fork);
293 
294     opal_abort_delay = 0;
295     ret = mca_base_var_register("opal", "opal", NULL, "abort_delay",
296                                 "If nonzero, print out an identifying message when abort operation is invoked (hostname, PID of the process that called abort) and delay for that many seconds before exiting (a negative delay value means to never abort).  This allows attaching of a debugger before quitting the job.",
297                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
298                                  OPAL_INFO_LVL_5,
299                                  MCA_BASE_VAR_SCOPE_READONLY,
300                                  &opal_abort_delay);
301     if (0 > ret) {
302         return ret;
303     }
304 
305     opal_abort_print_stack = false;
306     ret = mca_base_var_register("opal", "opal", NULL, "abort_print_stack",
307                                  "If nonzero, print out a stack trace when abort is invoked",
308                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
309                                 /* If we do not have stack trace
310                                    capability, make this a constant
311                                    MCA variable */
312 #if OPAL_WANT_PRETTY_PRINT_STACKTRACE
313                                  0,
314                                  OPAL_INFO_LVL_5,
315                                  MCA_BASE_VAR_SCOPE_READONLY,
316 #else
317                                  MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
318                                  OPAL_INFO_LVL_5,
319                                  MCA_BASE_VAR_SCOPE_CONSTANT,
320 #endif
321                                  &opal_abort_print_stack);
322     if (0 > ret) {
323         return ret;
324     }
325 
326     /* register the envar-forwarding params */
327     (void)mca_base_var_register ("opal", "mca", "base", "env_list",
328                                  "Set SHELL env variables",
329                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
330                                  MCA_BASE_VAR_SCOPE_READONLY, &mca_base_env_list);
331 
332     mca_base_env_list_sep = MCA_BASE_ENV_LIST_SEP_DEFAULT;
333     (void)mca_base_var_register ("opal", "mca", "base", "env_list_delimiter",
334                                  "Set SHELL env variables delimiter. Default: semicolon ';'",
335                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
336                                  MCA_BASE_VAR_SCOPE_READONLY, &mca_base_env_list_sep);
337 
338     /* Set OMPI_MCA_mca_base_env_list variable, it might not be set before
339      * if mca variable was taken from amca conf file. Need to set it
340      * here because mca_base_var_process_env_list is called from schizo_ompi.c
341      * only when this env variable was set.
342      */
343     if (NULL != mca_base_env_list) {
344         char *name = NULL;
345         (void) mca_base_var_env_name ("mca_base_env_list", &name);
346         if (NULL != name) {
347             opal_setenv(name, mca_base_env_list, false, &environ);
348             free(name);
349         }
350     }
351 
352     /* Register internal MCA variable mca_base_env_list_internal. It can be set only during
353      * parsing of amca conf file and contains SHELL env variables specified via -x there.
354      * Its format is the same as for mca_base_env_list.
355      */
356     (void)mca_base_var_register ("opal", "mca", "base", "env_list_internal",
357             "Store SHELL env variables from amca conf file",
358             MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL, OPAL_INFO_LVL_3,
359             MCA_BASE_VAR_SCOPE_READONLY, &mca_base_env_list_internal);
360 
361     /* The ddt engine has a few parameters */
362     ret = opal_datatype_register_params();
363     if (OPAL_SUCCESS != ret) {
364         return ret;
365     }
366 
367     /* dss has parameters */
368     ret = opal_dss_register_vars ();
369     if (OPAL_SUCCESS != ret) {
370         return ret;
371     }
372 
373     return OPAL_SUCCESS;
374 }
375 
opal_deregister_params(void)376 int opal_deregister_params(void)
377 {
378     opal_register_done = false;
379 
380     return OPAL_SUCCESS;
381 }
382