1 /*
2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3  *                         University Research and Technology
4  *                         Corporation.  All rights reserved.
5  * Copyright (c) 2004-2005 The University of Tennessee and The University
6  *                         of Tennessee Research Foundation.  All rights
7  *                         reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  *                         University of Stuttgart.  All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  *                         All rights reserved.
12  * Copyright (c) 2007      Los Alamos National Security, LLC.  All rights
13  *                         reserved.
14  * $COPYRIGHT$
15  *
16  * Additional copyrights may follow
17  *
18  * $HEADER$
19  */
20 
21 /** @file
22  *
23  * ORTE Layer Checkpoint/Restart Runtime functions
24  *
25  */
26 
27 #include "orte_config.h"
28 #include "orte/constants.h"
29 
30 #include <errno.h>
31 #ifdef HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif  /* HAVE_UNISTD_H */
34 #ifdef HAVE_FCNTL_H
35 #include <fcntl.h>
36 #endif  /* HAVE_FCNTL_H */
37 #ifdef HAVE_SYS_TYPES_H
38 #include <sys/types.h>
39 #endif  /* HAVE_SYS_TYPES_H */
40 #ifdef HAVE_SYS_STAT_H
41 #include <sys/stat.h>  /* for mkfifo */
42 #endif  /* HAVE_SYS_STAT_H */
43 
44 #include "opal/util/opal_environ.h"
45 #include "opal/util/output.h"
46 #include "opal/util/basename.h"
47 #include "opal/mca/event/event.h"
48 #include "opal/mca/crs/crs.h"
49 #include "opal/mca/crs/base/base.h"
50 #include "opal/runtime/opal_cr.h"
51 
52 #include "orte/runtime/orte_cr.h"
53 #include "orte/util/proc_info.h"
54 #include "orte/runtime/orte_globals.h"
55 
56 #include "orte/mca/plm/base/base.h"
57 #include "orte/mca/ess/ess.h"
58 #include "orte/mca/ess/base/base.h"
59 #include "orte/mca/routed/base/base.h"
60 #include "orte/mca/routed/routed.h"
61 #include "orte/mca/rml/base/base.h"
62 #include "orte/mca/iof/base/base.h"
63 #include "orte/mca/snapc/snapc.h"
64 #include "orte/mca/snapc/base/base.h"
65 #include "orte/mca/filem/base/base.h"
66 
67 /*************
68  * Local functions
69  *************/
70 static int orte_cr_coord_pre_ckpt(void);
71 static int orte_cr_coord_pre_restart(void);
72 static int orte_cr_coord_pre_continue(void);
73 
74 static int orte_cr_coord_post_ckpt(void);
75 static int orte_cr_coord_post_restart(void);
76 static int orte_cr_coord_post_continue(void);
77 
78 bool orte_cr_flush_restart_files = true;
79 
80 /*************
81  * Local vars
82  *************/
83 static opal_cr_coord_callback_fn_t  prev_coord_callback = NULL;
84 
85 static int orte_cr_output = -1;
86 static int orte_cr_verbose = 0;
87 
88 /*
89  * CR Init
90  */
orte_cr_init(void)91 int orte_cr_init(void)
92 {
93     int ret, exit_status = ORTE_SUCCESS;
94 
95     /*
96      * OPAL Frameworks
97      */
98     if (OPAL_SUCCESS != (ret = opal_cr_init() ) ) {
99         exit_status = ret;
100         goto cleanup;
101     }
102 
103     /*
104      * Register MCA Parameters
105      */
106     orte_cr_verbose = 0;
107     (void) mca_base_var_register ("orte", "orte_cr", NULL, "verbose",
108                                   "Verbose output for the ORTE Checkpoint/Restart functionality",
109                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
110                                   OPAL_INFO_LVL_8,
111                                   MCA_BASE_VAR_SCOPE_READONLY,
112                                   &orte_cr_verbose);
113 
114     /*** RHC: This is going to crash-and-burn when the output conversion is
115      * completed as opal_output will have no idea what opal_cr_output stream means,
116      * or even worse, will have assigned it to someone else!
117      */
118 
119     if(0 != orte_cr_verbose) {
120         orte_cr_output = opal_output_open(NULL);
121         opal_output_set_verbosity(orte_cr_output, orte_cr_verbose);
122     } else {
123         orte_cr_output = opal_cr_output;
124     }
125 
126     opal_output_verbose(10, orte_cr_output,
127                         "orte_cr: init: orte_cr_init()\n");
128 
129     /* Init ORTE Entry Point Function */
130     if( ORTE_SUCCESS != (ret = orte_cr_entry_point_init()) ) {
131         exit_status = ret;
132         goto cleanup;
133     }
134 
135     /* Register the ORTE interlevel coordination callback */
136     opal_cr_reg_coord_callback(orte_cr_coord, &prev_coord_callback);
137 
138     /* Typically this is not needed. Individual BTLs will set this as needed */
139     opal_cr_continue_like_restart = false;
140     orte_cr_flush_restart_files   = true;
141 
142  cleanup:
143 
144     return exit_status;
145 }
146 
147 /*
148  * Finalize
149  */
orte_cr_finalize(void)150 int orte_cr_finalize(void)
151 {
152     opal_output_verbose(10, orte_cr_output,
153                         "orte_cr: finalize: orte_cr_finalize()");
154 
155     orte_cr_entry_point_finalize();
156 
157     /*
158      * OPAL Frameworks...
159      */
160     opal_cr_finalize();
161 
162     return ORTE_SUCCESS;
163 }
164 
165 /*
166  * Interlayer coordination callback
167  */
orte_cr_coord(int state)168 int orte_cr_coord(int state)
169 {
170     int ret, exit_status = ORTE_SUCCESS;
171 
172     opal_output_verbose(10, orte_cr_output,
173                         "orte_cr: coord: orte_cr_coord(%s)",
174                         opal_crs_base_state_str((opal_crs_state_type_t)state));
175 
176     /*
177      * Before calling the previous callback, we have the opportunity to
178      * take action given the state.
179      */
180     if(OPAL_CRS_CHECKPOINT == state) {
181         /* Do Checkpoint Phase work */
182         orte_cr_coord_pre_ckpt();
183     }
184     else if (OPAL_CRS_CONTINUE == state ) {
185         /* Do Continue Phase work */
186         orte_cr_coord_pre_continue();
187     }
188     else if (OPAL_CRS_RESTART == state ) {
189         /* Do Restart Phase work */
190         orte_cr_coord_pre_restart();
191     }
192     else if (OPAL_CRS_TERM == state ) {
193         /* Do Continue Phase work in prep to terminate the application */
194     }
195     else {
196         /* We must have been in an error state from the checkpoint
197          * recreate everything, as in the Continue Phase
198          */
199     }
200 
201     /*
202      * Call the previous callback, which should be OPAL
203      */
204     if(OPAL_SUCCESS != (ret = prev_coord_callback(state)) ) {
205         exit_status = ret;
206         goto cleanup;
207     }
208 
209 
210     /*
211      * After calling the previous callback, we have the opportunity to
212      * take action given the state to tidy up.
213      */
214     if(OPAL_CRS_CHECKPOINT == state) {
215         /* Do Checkpoint Phase work */
216         orte_cr_coord_post_ckpt();
217     }
218     else if (OPAL_CRS_CONTINUE == state ) {
219         /* Do Continue Phase work */
220         orte_cr_coord_post_continue();
221     }
222     else if (OPAL_CRS_RESTART == state ) {
223         /* Do Restart Phase work */
224         orte_cr_coord_post_restart();
225     }
226     else if (OPAL_CRS_TERM == state ) {
227         /* Do Continue Phase work in prep to terminate the application */
228     }
229     else {
230         /* We must have been in an error state from the checkpoint
231          * recreate everything, as in the Continue Phase
232          */
233     }
234 
235  cleanup:
236     return exit_status;
237 }
238 
239 /*************
240  * Pre Lower Layer
241  *************/
orte_cr_coord_pre_ckpt(void)242 static int orte_cr_coord_pre_ckpt(void) {
243     int ret, exit_status = ORTE_SUCCESS;
244 
245     /*
246      * All the checkpoint heavey lifting in here...
247      */
248     opal_output_verbose(10, orte_cr_output,
249                         "orte_cr: coord_pre_ckpt: orte_cr_coord_pre_ckpt()");
250 
251     /*
252      * Notify the ESS
253      */
254     if( NULL != orte_ess.ft_event ) {
255         if( ORTE_SUCCESS != (ret = orte_ess.ft_event(OPAL_CRS_CHECKPOINT))) {
256             exit_status = ret;
257             goto cleanup;
258         }
259     }
260 
261  cleanup:
262     return exit_status;
263 }
264 
orte_cr_coord_pre_restart(void)265 static int orte_cr_coord_pre_restart(void) {
266     /*
267      * Can not really do much until OPAL is up and running,
268      * so defer action until the post_restart function.
269      */
270     opal_output_verbose(10, orte_cr_output,
271                         "orte_cr: coord_pre_restart: orte_cr_coord_pre_restart()");
272 
273     return ORTE_SUCCESS;
274 }
275 
orte_cr_coord_pre_continue(void)276 static int orte_cr_coord_pre_continue(void) {
277     /*
278      * Can not really do much until OPAL is up and running,
279      * so defer action until the post_continue function.
280      */
281     opal_output_verbose(10, orte_cr_output,
282                         "orte_cr: coord_pre_continue: orte_cr_coord_pre_continue()");
283 
284     return ORTE_SUCCESS;
285 }
286 
287 /*************
288  * Post Lower Layer
289  *************/
orte_cr_coord_post_ckpt(void)290 static int orte_cr_coord_post_ckpt(void) {
291     /*
292      * Now that OPAL is shutdown, we really can't do much
293      * so assume pre_ckpt took care of everything.
294      */
295     opal_output_verbose(10, orte_cr_output,
296                         "orte_cr: coord_post_ckpt: orte_cr_coord_post_ckpt()");
297 
298     return ORTE_SUCCESS;
299 }
300 
orte_cr_coord_post_restart(void)301 static int orte_cr_coord_post_restart(void) {
302     int ret, exit_status = ORTE_SUCCESS;
303     orte_proc_type_t prev_type = ORTE_PROC_TYPE_NONE;
304     char * tmp_dir = NULL;
305 
306     opal_output_verbose(10, orte_cr_output,
307                         "orte_cr: coord_post_restart: orte_cr_coord_post_restart()");
308 
309     /*
310      * Add the previous session directory for cleanup
311      */
312     opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
313     tmp_dir = orte_process_info.jobfam_session_dir;
314     if( NULL != tmp_dir ) {
315         opal_crs_base_cleanup_append(tmp_dir, true);
316         free(tmp_dir);
317         tmp_dir = NULL;
318     }
319 
320     /*
321      * Refresh System information
322      */
323     prev_type = orte_process_info.proc_type;
324     if( ORTE_SUCCESS != (ret = orte_proc_info_finalize()) ) {
325         exit_status = ret;
326     }
327 
328     if( NULL != orte_process_info.my_hnp_uri ) {
329         free(orte_process_info.my_hnp_uri);
330         orte_process_info.my_hnp_uri = NULL;
331     }
332 
333     if( NULL != orte_process_info.my_daemon_uri ) {
334         free(orte_process_info.my_daemon_uri);
335         orte_process_info.my_daemon_uri = NULL;
336     }
337 
338     if( ORTE_SUCCESS != (ret = orte_proc_info()) ) {
339         exit_status = ret;
340     }
341 
342     orte_process_info.proc_type = prev_type;
343     orte_process_info.my_name = *ORTE_NAME_INVALID;
344 
345     /*
346      * Notify the ESS
347      */
348     if( NULL != orte_ess.ft_event ) {
349         if( ORTE_SUCCESS != (ret = orte_ess.ft_event(OPAL_CRS_RESTART))) {
350             exit_status = ret;
351             goto cleanup;
352         }
353     }
354 
355  cleanup:
356     return exit_status;
357 }
358 
orte_cr_coord_post_continue(void)359 static int orte_cr_coord_post_continue(void) {
360     int ret, exit_status = ORTE_SUCCESS;
361 
362     opal_output_verbose(10, orte_cr_output,
363                         "orte_cr: coord_post_continue: orte_cr_coord_post_continue()\n");
364 
365     /*
366      * Notify the ESS
367      */
368     if( NULL != orte_ess.ft_event ) {
369         if( ORTE_SUCCESS != (ret = orte_ess.ft_event(OPAL_CRS_CONTINUE))) {
370             exit_status = ret;
371             goto cleanup;
372         }
373     }
374 
375  cleanup:
376 
377     return exit_status;
378 }
379 
380 /*************************************************
381  * ORTE Entry Point functionality
382  *************************************************/
orte_cr_entry_point_init(void)383 int orte_cr_entry_point_init(void)
384 {
385 #if 0
386     /* JJH XXX
387      * Make sure to finalize the OPAL Entry Point function if it is active.
388      */
389     opal_cr_entry_point_finalize();
390 #endif
391 
392     return ORTE_SUCCESS;
393 }
394 
orte_cr_entry_point_finalize(void)395 int orte_cr_entry_point_finalize(void)
396 {
397     /* Nothing to do here... */
398     return ORTE_SUCCESS;
399 }
400 
401