1 /*
2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3  *                         University Research and Technology
4  *                         Corporation.  All rights reserved.
5  * Copyright (c) 2004-2011 The University of Tennessee and The University
6  *                         of Tennessee Research Foundation.  All rights
7  *                         reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  *                         University of Stuttgart.  All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  *                         All rights reserved.
12  * Copyright (c) 2010-2012 Oak Ridge National Labs.  All rights reserved.
13  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.  All rights
14  *                         reserved.
15  * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
16  * Copyright (c) 2014-2016 Research Organization for Information Science
17  *                         and Technology (RIST). All rights reserved.
18  * Copyright (c) 2015      Cisco Systems, Inc.  All rights reserved.
19  * $COPYRIGHT$
20  *
21  * Additional copyrights may follow
22  *
23  * $HEADER$
24  */
25 
26 #include "orte_config.h"
27 #include "orte/constants.h"
28 
29 #include <sys/types.h>
30 #include <stdio.h>
31 #ifdef HAVE_FCNTL_H
32 #include <fcntl.h>
33 #endif
34 #ifdef HAVE_UNISTD_H
35 #include <unistd.h>
36 #endif
37 #ifdef HAVE_SYS_STAT_H
38 #include <sys/stat.h>
39 #endif
40 
41 #include "opal/mca/event/event.h"
42 #include "opal/mca/pmix/base/base.h"
43 #include "opal/util/arch.h"
44 #include "opal/util/os_path.h"
45 #include "opal/util/output.h"
46 #include "opal/util/proc.h"
47 #include "opal/runtime/opal.h"
48 #include "opal/runtime/opal_cr.h"
49 
50 #include "orte/mca/rml/base/base.h"
51 #include "orte/mca/routed/base/base.h"
52 #include "orte/mca/errmgr/errmgr.h"
53 #include "orte/mca/dfs/base/base.h"
54 #include "orte/mca/grpcomm/base/base.h"
55 #include "orte/mca/oob/base/base.h"
56 #include "orte/mca/rml/rml.h"
57 #include "orte/mca/rml/base/rml_contact.h"
58 #include "orte/mca/odls/odls_types.h"
59 #include "orte/mca/filem/base/base.h"
60 #include "orte/mca/errmgr/base/base.h"
61 #if OPAL_ENABLE_FT_CR == 1
62 #include "orte/mca/snapc/base/base.h"
63 #include "orte/mca/sstore/base/base.h"
64 #endif
65 #include "orte/mca/state/base/base.h"
66 #include "orte/util/proc_info.h"
67 #include "orte/util/session_dir.h"
68 #include "orte/util/name_fns.h"
69 #include "orte/util/show_help.h"
70 
71 #include "orte/runtime/orte_cr.h"
72 #include "orte/runtime/orte_globals.h"
73 #include "orte/runtime/orte_wait.h"
74 
75 #include "orte/mca/ess/base/base.h"
76 
orte_ess_base_app_setup(bool db_restrict_local)77 int orte_ess_base_app_setup(bool db_restrict_local)
78 {
79     int ret;
80     char *error = NULL;
81     opal_list_t transports;
82 
83     /*
84      * stdout/stderr buffering
85      * If the user requested to override the default setting then do
86      * as they wish.
87      */
88     if( orte_ess_base_std_buffering > -1 ) {
89         if( 0 == orte_ess_base_std_buffering ) {
90             setvbuf(stdout, NULL, _IONBF, 0);
91             setvbuf(stderr, NULL, _IONBF, 0);
92         }
93         else if( 1 == orte_ess_base_std_buffering ) {
94             setvbuf(stdout, NULL, _IOLBF, 0);
95             setvbuf(stderr, NULL, _IOLBF, 0);
96         }
97         else if( 2 == orte_ess_base_std_buffering ) {
98             setvbuf(stdout, NULL, _IOFBF, 0);
99             setvbuf(stderr, NULL, _IOFBF, 0);
100         }
101     }
102 
103     /* if I am an MPI app, we will let the MPI layer define and
104      * control the opal_proc_t structure. Otherwise, we need to
105      * do so here */
106     if (ORTE_PROC_NON_MPI) {
107         orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
108         orte_process_info.super.proc_hostname = orte_process_info.nodename;
109         orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
110         orte_process_info.super.proc_arch = opal_local_arch;
111         opal_proc_local_set(&orte_process_info.super);
112     }
113 
114     /* open and setup the state machine */
115     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
116         ORTE_ERROR_LOG(ret);
117         error = "orte_state_base_open";
118         goto error;
119     }
120     if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
121         ORTE_ERROR_LOG(ret);
122         error = "orte_state_base_select";
123         goto error;
124     }
125 
126     /* open the errmgr */
127     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
128         ORTE_ERROR_LOG(ret);
129         error = "orte_errmgr_base_open";
130         goto error;
131     }
132 
133     /* setup my session directory */
134     if (orte_create_session_dirs) {
135         OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
136                              "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
137                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
138                              (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
139                              orte_process_info.nodename));
140         if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
141             ORTE_ERROR_LOG(ret);
142             error = "orte_session_dir";
143             goto error;
144         }
145         /* Once the session directory location has been established, set
146            the opal_output env file location to be in the
147            proc-specific session directory. */
148         opal_output_set_output_file_info(orte_process_info.proc_session_dir,
149                                          "output-", NULL, NULL);
150     }
151     /* Setup the communication infrastructure */
152     /* Routed system */
153     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
154         ORTE_ERROR_LOG(ret);
155         error = "orte_routed_base_open";
156         goto error;
157     }
158     if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
159         ORTE_ERROR_LOG(ret);
160         error = "orte_routed_base_select";
161         goto error;
162     }
163     /*
164      * OOB Layer
165      */
166     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
167         ORTE_ERROR_LOG(ret);
168         error = "orte_oob_base_open";
169         goto error;
170     }
171     if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
172         ORTE_ERROR_LOG(ret);
173         error = "orte_oob_base_select";
174         goto error;
175     }
176     /* Runtime Messaging Layer */
177     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
178         ORTE_ERROR_LOG(ret);
179         error = "orte_rml_base_open";
180         goto error;
181     }
182     if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
183         ORTE_ERROR_LOG(ret);
184         error = "orte_rml_base_select";
185         goto error;
186     }
187     /* if we have info on the HNP and local daemon, process it */
188     if (NULL != orte_process_info.my_hnp_uri) {
189         /* we have to set the HNP's name, even though we won't route messages directly
190          * to it. This is required to ensure that we -do- send messages to the correct
191          * HNP name
192          */
193         if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
194                                                             ORTE_PROC_MY_HNP, NULL))) {
195             ORTE_ERROR_LOG(ret);
196             error = "orte_rml_parse_HNP";
197             goto error;
198         }
199     }
200     if (NULL != orte_process_info.my_daemon_uri) {
201         opal_value_t val;
202 
203         /* extract the daemon's name so we can update the routing table */
204         if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
205                                                             ORTE_PROC_MY_DAEMON, NULL))) {
206             ORTE_ERROR_LOG(ret);
207             error = "orte_rml_parse_daemon";
208             goto error;
209         }
210         /* Set the contact info in the database - this won't actually establish
211          * the connection, but just tells us how to reach the daemon
212          * if/when we attempt to send to it
213          */
214         OBJ_CONSTRUCT(&val, opal_value_t);
215         val.key = OPAL_PMIX_PROC_URI;
216         val.type = OPAL_STRING;
217         val.data.string = orte_process_info.my_daemon_uri;
218         if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_DAEMON, &val))) {
219             ORTE_ERROR_LOG(ret);
220             val.key = NULL;
221             val.data.string = NULL;
222             OBJ_DESTRUCT(&val);
223             error = "store DAEMON URI";
224             goto error;
225         }
226         val.key = NULL;
227         val.data.string = NULL;
228         OBJ_DESTRUCT(&val);
229     }
230 
231     /* setup the errmgr */
232     if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
233         ORTE_ERROR_LOG(ret);
234         error = "orte_errmgr_base_select";
235         goto error;
236     }
237 
238     /* get a conduit for our use - we never route IO over fabric */
239     OBJ_CONSTRUCT(&transports, opal_list_t);
240     orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
241                        ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
242     if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
243         ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
244         error = "orte_rml_open_mgmt_conduit";
245         goto error;
246     }
247     OPAL_LIST_DESTRUCT(&transports);
248 
249     OBJ_CONSTRUCT(&transports, opal_list_t);
250     orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
251                        ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
252     if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
253         ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
254         error = "orte_rml_open_coll_conduit";
255         goto error;
256     }
257     OPAL_LIST_DESTRUCT(&transports);
258 
259     /*
260      * Group communications
261      */
262     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
263         ORTE_ERROR_LOG(ret);
264         error = "orte_grpcomm_base_open";
265         goto error;
266     }
267     if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
268         ORTE_ERROR_LOG(ret);
269         error = "orte_grpcomm_base_select";
270         goto error;
271     }
272 
273 #if OPAL_ENABLE_FT_CR == 1
274     /*
275      * Setup the SnapC
276      */
277     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
278         ORTE_ERROR_LOG(ret);
279         error = "orte_snapc_base_open";
280         goto error;
281     }
282     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
283         ORTE_ERROR_LOG(ret);
284         error = "orte_sstore_base_open";
285         goto error;
286     }
287     if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) {
288         ORTE_ERROR_LOG(ret);
289         error = "orte_snapc_base_select";
290         goto error;
291     }
292     if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
293         ORTE_ERROR_LOG(ret);
294         error = "orte_sstore_base_select";
295         goto error;
296     }
297     /* apps need the OPAL CR stuff */
298     opal_cr_set_enabled(true);
299 #else
300     opal_cr_set_enabled(false);
301 #endif
302     /* Initalize the CR setup
303      * Note: Always do this, even in non-FT builds.
304      * If we don't some user level tools may hang.
305      */
306     if (ORTE_SUCCESS != (ret = orte_cr_init())) {
307         ORTE_ERROR_LOG(ret);
308         error = "orte_cr_init";
309         goto error;
310     }
311     /* open the distributed file system */
312     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
313         ORTE_ERROR_LOG(ret);
314         error = "orte_dfs_base_open";
315         goto error;
316     }
317     if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) {
318         ORTE_ERROR_LOG(ret);
319         error = "orte_dfs_base_select";
320         goto error;
321     }
322     return ORTE_SUCCESS;
323  error:
324     orte_show_help("help-orte-runtime.txt",
325                    "orte_init:startup:internal-failure",
326                    true, error, ORTE_ERROR_NAME(ret), ret);
327     return ret;
328 }
329 
orte_ess_base_app_finalize(void)330 int orte_ess_base_app_finalize(void)
331 {
332     orte_cr_finalize();
333 
334 #if OPAL_ENABLE_FT_CR == 1
335     (void) mca_base_framework_close(&orte_snapc_base_framework);
336     (void) mca_base_framework_close(&orte_sstore_base_framework);
337 #endif
338 
339     /* release the conduits */
340     orte_rml.close_conduit(orte_mgmt_conduit);
341     orte_rml.close_conduit(orte_coll_conduit);
342 
343     /* close frameworks */
344     (void) mca_base_framework_close(&orte_filem_base_framework);
345     (void) mca_base_framework_close(&orte_errmgr_base_framework);
346 
347     /* now can close the rml and its friendly group comm */
348     (void) mca_base_framework_close(&orte_grpcomm_base_framework);
349     (void) mca_base_framework_close(&orte_dfs_base_framework);
350     (void) mca_base_framework_close(&orte_routed_base_framework);
351 
352     (void) mca_base_framework_close(&orte_rml_base_framework);
353     if (NULL != opal_pmix.finalize) {
354         opal_pmix.finalize();
355         (void) mca_base_framework_close(&opal_pmix_base_framework);
356     }
357     (void) mca_base_framework_close(&orte_oob_base_framework);
358     (void) mca_base_framework_close(&orte_state_base_framework);
359 
360     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
361     /* cleanup the process info */
362     orte_proc_info_finalize();
363 
364     return ORTE_SUCCESS;
365 }
366 
367 /*
368  * We do NOT call the regular C-library "abort" function, even
369  * though that would have alerted us to the fact that this is
370  * an abnormal termination, because it would automatically cause
371  * a core file to be generated. On large systems, that can be
372  * overwhelming (imagine a few thousand Gbyte-sized files hitting
373                  * a shared file system simultaneously...ouch!).
374  *
375  * However, this causes a problem for OpenRTE as the system truly
376  * needs to know that this actually IS an abnormal termination.
377  * To get around the problem, we drop a marker in the proc-level
378  * session dir. If session dir's were not allowed, then we just
379  * ignore this question.
380  *
381  * In some cases, however, we DON'T want to create that alert. For
382  * example, if an orted detects that the HNP has died, then there
383  * is truly nobody to alert! In these cases, we pass report=false
384  * to indicate that we don't want the marker dropped.
385  */
orte_ess_base_app_abort(int status,bool report)386 void orte_ess_base_app_abort(int status, bool report)
387 {
388     int fd;
389     char *myfile;
390     struct timespec tp = {0, 100000};
391 
392     /* Exit - do NOT do a normal finalize as this will very likely
393      * hang the process. We are aborting due to an abnormal condition
394      * that precludes normal cleanup
395      *
396      * We do need to do the following bits to make sure we leave a
397      * clean environment. Taken from orte_finalize():
398      * - Assume errmgr cleans up child processes before we exit.
399      */
400     /* CRS cleanup since it may have a named pipe and thread active */
401     orte_cr_finalize();
402     /* If we were asked to report this termination, do so.
403      * Since singletons don't start an HNP unless necessary, and
404      * direct-launched procs don't have daemons at all, only send
405      * the message if routing is enabled as this indicates we
406      * have someone to send to
407      */
408     if (report && orte_routing_is_enabled && orte_create_session_dirs) {
409         myfile = opal_os_path(false, orte_process_info.proc_session_dir, "aborted", NULL);
410         fd = open(myfile, O_CREAT, S_IRUSR);
411         close(fd);
412         /* now introduce a short delay to allow any pending
413          * messages (e.g., from a call to "show_help") to
414          * have a chance to be sent */
415         nanosleep(&tp, NULL);
416     }
417     /* - Clean out the global structures
418      * (not really necessary, but good practice) */
419     orte_proc_info_finalize();
420     /* Now Exit */
421     _exit(status);
422 }
423