1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3 * (C) 2004 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
5 */
6
7 /* OWNER=gropp */
8
9 /* An example mpiexec program that uses a remote shell program to create
10 new processes on the selected hosts.
11
12 This code also shows how to use the pmutil routines (in ../util)
13 to provide many of the services required by mpiexec
14
15 Steps:
16 1. Read and process that command line. Build a ProcessList. (A ProcessList
17 may have one entry for a request to create n separate processes)
18
19 2. Convert the ProcessList into a ProcessTable. In the forker mpiexec,
20 this simply expands the requested number of processes into an
21 array with one entry per process. These entries contain information
22 on both the setup of the processes and the file descriptors used for
23 stdin,out,err, and for the PMI calls.
24
25 3. (Optionally) allow the forked processes to use a host:port to
26 contact this program, rather than just sharing a pipe. This allows the
27 forker to start other programs, such as debuggers.
28
29 4. Establish a signal handler for SIGCHLD. This will allow us to
30 get information about process termination; in particular, the exit
31 status.
32
33 5. Start the programs.
34
35 6. Process input from the programs; send stdin given to this process
36 to the selected processes (usually rank 0 or everyone). Handle all
37 PMI commands, including spawn. Another "input" is the expiration of the
38 specified timelimit for the run, if any.
39
40 7. Process rundown commands and handle any abnormal termination.
41
42 8. Wait for any processes to exit; gather the exit status and reason
43 for exit (if abnormal, such as signaled with SEGV or BUS)
44
45 9. Release all resources and compute the exit status for this program
46 (using one of several approaches, such as taking the maximum of the
47 exit statuses).
48
49 Special Case to support Singleton Init:
50 To support a singleton init of a process that then wants to
51 create processes with MPI_Comm_spawn(_multiple), a special form of
52 mpiexec is supported:
53
54 mpiexec -pmi_args <port> <interfacename> <securitykey> <pid>
55
56 The singleton process (in a routine in simple_pmi.c) forks a process and
57 execs mpiexe with these arguments, where port is the port to which
58 mpiexec should connect, interfacename is the name of the network interface
59 (BUG: may not be correctly set as mpd currently ignores it), securitykey
60 is a place-holder for a key used by the singleton init process to verify
61 that the process connecting on the port is the one that was intended, and
62 pid is the pid of the singleton init process.
63
64 FIXME: The above has not been implemented yet.
65 */
66
67 #include "mpichconf.h"
68 #include <stdio.h>
69 #include <string.h>
70 #ifdef HAVE_UNISTD_H
71 #include <unistd.h>
72 #endif
73 #include <stdlib.h>
74
75 #include "pmutil.h"
76 #include "process.h"
77 #include "cmnargs.h"
78 #include "pmiserv.h"
79 #include "ioloop.h"
80 #include "labelout.h"
81 #include "rm.h"
82 #include "simple_pmiutil.h"
83 #include "env.h" /* MPIE_Putenv */
84 /* mpimem.h contains prototypes for MPIU_Strncpy etc. */
85 /* We no longer can use these because they are MPI device specific */
86 /* #include "mpimem.h" */
87
88 typedef struct { PMISetup pmiinfo; IOLabelSetup labelinfo; } SetupInfo;
89
90 /* Forward declarations */
91 int mypreamble( void *, ProcessState* );
92 int mypostfork( void *, void *, ProcessState* );
93 int mypostamble( void *, void *, ProcessState* );
94 int myspawn( ProcessWorld *, void * );
95
96 static int AddEnvSetToCmdLine( const char *, const char *, const char ** );
97
98 /* Set printFailure to 1 to get an explanation of the failure reason
99 for each process when a process fails */
100 static int printFailure = 0;
101
102 #ifndef MAX_PORT_STRING
103 #define MAX_PORT_STRING 1024
104 #endif
105
106 /* Note that envp is common but not standard */
main(int argc,char * argv[],char * envp[])107 int main( int argc, char *argv[], char *envp[] )
108 {
109 int rc;
110 int erc = 0; /* Other (exceptional) return codes */
111 int reason, signaled = 0;
112 SetupInfo s;
113 char portString[MAX_PORT_STRING];
114
115 /* MPIE_ProcessInit initializes the global pUniv */
116 MPIE_ProcessInit();
117 /* Set a default for the universe size */
118 pUniv.size = 64;
119
120 /* Set defaults for any arguments that are options. Also check the
121 environment for special options, such as debugging. Set
122 some defaults in pUniv */
123 MPIE_CheckEnv( &pUniv, 0, 0 );
124 IOLabelCheckEnv( );
125
126 /* Handle the command line arguments. Use the routine from util/cmnargs.c
127 to fill in the universe */
128 MPIE_Args( argc, argv, &pUniv, 0, 0 );
129 /* If there were any soft arguments, we need to handle them now */
130 rc = MPIE_InitWorldWithSoft( &pUniv.worlds[0], pUniv.size );
131 if (!rc) {
132 MPIU_Error_printf( "Unable to process soft arguments\n" );
133 exit(1);
134 }
135
136 if (pUniv.fromSingleton) {
137 /* The MPI process is already running. We create a simple entry
138 for a single process rather than creating the process */
139 MPIE_SetupSingleton( &pUniv );
140 }
141
142
143 rc = MPIE_ChooseHosts( &pUniv.worlds[0], MPIE_ReadMachines, 0 );
144 if (rc) {
145 MPIU_Error_printf( "Unable to assign hosts to processes\n" );
146 exit(1);
147 }
148
149 if (MPIE_Debug) MPIE_PrintProcessUniverse( stdout, &pUniv );
150
151 DBG_PRINTF( ("timeout_seconds = %d\n", pUniv.timeout) );
152
153 /* Get the common port for creating PMI connections to the created
154 processes */
155 rc = PMIServSetupPort( &pUniv, portString, sizeof(portString) );
156 if (rc) {
157 MPIU_Error_printf( "Unable to setup port for listener\n" );
158 exit(1);
159 }
160 s.pmiinfo.portName = portString;
161
162 #ifdef USE_MPI_STAGE_EXECUTABLES
163 /* Hook for later use in staging executables */
164 if (?stageExes) {
165 rc = MPIE_StageExecutables( &pUniv.worlds[0] );
166 if (!rc) ...;
167 }
168 #endif
169
170 PMIServInit(myspawn,&s);
171 s.pmiinfo.pWorld = &pUniv.worlds[0];
172 PMISetupNewGroup( pUniv.worlds[0].nProcess, 0 );
173 MPIE_ForwardCommonSignals();
174 if (!pUniv.fromSingleton) {
175 MPIE_ForkProcesses( &pUniv.worlds[0], envp, mypreamble, &s,
176 mypostfork, 0, mypostamble, 0 );
177 }
178 else {
179 /* FIXME: The singleton code goes here */
180 MPIU_Error_printf( "Singleton init not supported\n" );
181 exit(1);
182 }
183 reason = MPIE_IOLoop( pUniv.timeout );
184
185 if (reason == IOLOOP_TIMEOUT) {
186 /* Exited due to timeout. Generate an error message and
187 terminate the children */
188 if (pUniv.timeout > 60) {
189 MPIU_Error_printf( "Timeout of %d minutes expired; job aborted\n",
190 pUniv.timeout / 60 );
191 }
192 else {
193 MPIU_Error_printf( "Timeout of %d seconds expired; job aborted\n",
194 pUniv.timeout );
195 }
196 erc = 1;
197 MPIE_KillUniverse( &pUniv );
198 }
199
200 /* Wait for all processes to exit and gather information on them.
201 We do this through the SIGCHLD handler. We also bound the length
202 of time that we wait to 2 seconds.
203 */
204 MPIE_WaitForProcesses( &pUniv, 2 );
205
206 /* Compute the return code (max for now) */
207 rc = MPIE_ProcessGetExitStatus( &signaled );
208
209 /* Optionally provide detailed information about failed processes */
210 if ( (rc && printFailure) || signaled)
211 MPIE_PrintFailureReasons( stderr );
212
213 /* If the processes exited normally (or were already gone) but we
214 had an exceptional exit, such as a timeout, use the erc value */
215 if (!rc && erc) rc = erc;
216
217 return( rc );
218 }
219
mpiexec_usage(const char * msg)220 void mpiexec_usage( const char *msg )
221 {
222 if (msg) {
223 MPIU_Error_printf( msg );
224 if (msg[strlen(msg)-1] != '\n') {
225 MPIU_Error_printf( "\n" );
226 }
227 }
228 MPIU_Usage_printf( "Usage: mpiexec %s\n", MPIE_ArgDescription() );
229 exit( -1 );
230 }
231
232 /* Redirect stdout and stderr to a handler */
mypreamble(void * data,ProcessState * pState)233 int mypreamble( void *data, ProcessState *pState )
234 {
235 SetupInfo *s = (SetupInfo *)data;
236 int rc;
237
238 IOLabelSetupFDs( &s->labelinfo );
239 rc = PMISetupSockets( 1, &s->pmiinfo );
240 /* We must use communication over the socket, rather than the
241 environment, to pass initialization data */
242 pState->initWithEnv = 0;
243
244 return rc;
245 }
246
247 /* Close one side of each pipe pair and replace stdout/err with the pipes */
mypostfork(void * predata,void * data,ProcessState * pState)248 int mypostfork( void *predata, void *data, ProcessState *pState )
249 {
250 SetupInfo *s = (SetupInfo *)predata;
251 int curarg=0;
252
253 IOLabelSetupInClient( &s->labelinfo );
254 PMISetupInClient( 1, &s->pmiinfo );
255
256 /* Now, we *also* change the process state to insert the
257 interposed remote shell routine. This is probably not
258 where we want this in the final version (because MPIE_ExecProgram
259 does a lot under the assumption that the started program will
260 know what to do with new environment variables), but this
261 will allow us to start. */
262 {
263 ProcessApp *app = pState->app;
264 const char **newargs = 0;
265 char *pmiDebugStr = 0;
266 int j;
267 char rankStr[12];
268
269 /* Insert into app->args */
270 newargs = (const char **) MPIU_Malloc( (app->nArgs + 14 + 1) *
271 sizeof(char *) );
272 if (!pState->hostname) {
273 MPIU_Error_printf( "No hostname avaliable for %s\n", app->exename );
274 exit(1);
275 }
276
277 snprintf( rankStr, sizeof(rankStr)-1, "%d", pState->id );
278 rankStr[12-1] = 0;
279 curarg = 0;
280 newargs[curarg++] = MPIU_Strdup( "-Y" );
281
282 newargs[curarg++] = pState->hostname;
283 curarg += AddEnvSetToCmdLine( "PMI_PORT", s->pmiinfo.portName,
284 newargs + curarg );
285 curarg += AddEnvSetToCmdLine( "PMI_ID", rankStr, newargs + curarg );
286 pmiDebugStr = getenv( "PMI_DEBUG" );
287 if (pmiDebugStr) {
288 /* Use this to help debug the connection process */
289 curarg += AddEnvSetToCmdLine( "PMI_DEBUG", pmiDebugStr,
290 newargs + curarg );
291 }
292
293 newargs[curarg++] = app->exename;
294 for (j=0; j<app->nArgs; j++) {
295 newargs[j+curarg] = app->args[j];
296 }
297 newargs[j+curarg] = 0;
298 app->exename = MPIU_Strdup( "/usr/bin/ssh" );
299
300 app->args = newargs;
301 app->nArgs += curarg;
302
303 if (MPIE_Debug) {
304 printf( "cmd = %s\n", app->exename ); fflush(stdout);
305 printf( "Number of args = %d\n", app->nArgs );
306 for (j=0; j<app->nArgs; j++) {
307 printf( "argv[%d] = %s\n", j, app->args[j] ); fflush(stdout);
308 }
309 }
310 }
311
312 return 0;
313 }
314
315 /* Close one side of the pipe pair and register a handler for the I/O */
mypostamble(void * predata,void * data,ProcessState * pState)316 int mypostamble( void *predata, void *data, ProcessState *pState )
317 {
318 SetupInfo *s = (SetupInfo *)predata;
319
320 IOLabelSetupFinishInServer( &s->labelinfo, pState );
321 PMISetupFinishInServer( 1, &s->pmiinfo, pState );
322
323 return 0;
324 }
325
myspawn(ProcessWorld * pWorld,void * data)326 int myspawn( ProcessWorld *pWorld, void *data )
327 {
328 SetupInfo *s = (SetupInfo *)data;
329 ProcessWorld *p, **pPtr;
330
331 p = pUniv.worlds;
332 pPtr = &(pUniv.worlds);
333 while (p) {
334 pPtr = &p->nextWorld;
335 p = *pPtr;
336 }
337 *pPtr = pWorld;
338
339 /* Fork Processes may call a routine that is passed s but not pWorld;
340 this makes sure that all routines can access the current world */
341 s->pmiinfo.pWorld = pWorld;
342
343 /* FIXME: This should be part of the PMI initialization in the clients */
344 MPIE_Putenv( pWorld, "PMI_SPAWNED=1" );
345
346 MPIE_ForkProcesses( pWorld, 0, mypreamble, s,
347 mypostfork, 0, mypostamble, 0 );
348 return 0;
349 }
350
351 /* Temp test for the replacement for the simple "spawn == fork" */
352
353 /*
354 * Approach:
355 * Processes are created using a remote shell program. This requires
356 * changing the command line from
357 *
358 * a.out args ...
359 *
360 * to
361 *
362 * remshell-program remshell-args /bin/sh -c PMI_PORT=string &&
363 * export PMI_PORT && PMI_ID=rank-in-world && export PMI_ID &&
364 * a.out args
365 *
366 * (the export PMI_PORT=string syntax is not valid in all versions of sh)
367 *
368 * Using PMI_ID ensures that we correctly identify each process (this was
369 * a major problem in the setup used by the p4 device in MPICH1).
370 * Using environment variables instead of command line arguments keeps
371 * the commaand line clean.
372 *
373 * Two alternatives should be considered
374 * 1) Use an intermediate manager. This would allow us to set up the
375 * environment as well:
376 * remshell-program remshell-args manager -port string
377 * One possibilty for the manager is the mpd manager
378 * 2) Use the secure server (even the same one as in MPICH1); then
379 * there is no remote shell command.
380 *
381 * We can handle the transformation of the command line by adding a
382 * to the postfork routine; this is called after the fork but before the
383 * exec, and it can change the command line by making a copy of the app
384 * structure, changing the command line, and setting the pState structure
385 * to point to this new app (after the fork, these changes are visable only
386 * to the forked process).
387 *
388 * Enhancements:
389 * Allow the code to avoid the remote shell if the process is being created
390 * on the local host.
391 *
392 * Handle the user of -l username and -n options to remshell
393 * (-n makes stdin /dev/null, necessary for backgrounding).
394 * (-l username allows login to hosts where the user's username is
395 * different)
396 *
397 * Provide an option to add a backslash before any - to deal with the
398 * serious bug in the GNU inetutils remote shell programs that process
399 * *all* arguments on the remote shell command line, even those for the
400 * *program*!
401 *
402 * To best support the errcodes return from MPI_Comm_spawn,
403 * we need a way to communicate the array of error codes back to the
404 * spawn and spawn multiple commands. Query: how is that done in
405 * PMI?
406 *
407 */
408
AddEnvSetToCmdLine(const char * envName,const char * envValue,const char ** args)409 static int AddEnvSetToCmdLine( const char *envName, const char *envValue,
410 const char **args )
411 {
412 int nArgs = 0;
413 static int useCSHFormat = -1;
414
415 /* Determine the Shell type the first time*/
416 if (useCSHFormat == -1) {
417 char *shell = getenv( "SHELL" ), *sname;
418 if (shell) {
419 /* printf( "Shell is %s\n", shell ); */
420 sname = strrchr( shell, '/' );
421 if (!sname) sname = shell;
422 else sname++;
423 /* printf( "Sname is %s\n", sname ); */
424 if (strcmp( sname, "bash" ) == 0 || strcmp( sname, "sh" ) ||
425 strcmp( sname, "ash" ) == 0) useCSHFormat = 0;
426 else
427 useCSHFormat = 1;
428 }
429 else {
430 /* Default is to assume csh (setenv) format */
431 useCSHFormat = 1;
432 }
433 }
434
435 if (useCSHFormat) {
436 args[nArgs++] = MPIU_Strdup( "setenv" );
437 args[nArgs++] = MPIU_Strdup( envName );
438 args[nArgs++] = MPIU_Strdup( envValue );
439 args[nArgs++] = MPIU_Strdup( ";" );
440 }
441 else {
442 char tmpBuf[1024];
443 args[nArgs++] = MPIU_Strdup( "export" );
444 MPIU_Strncpy( tmpBuf, envName, sizeof(tmpBuf) );
445 MPIU_Strnapp( tmpBuf, "=", sizeof(tmpBuf) );
446 MPIU_Strnapp( tmpBuf, envValue, sizeof(tmpBuf) );
447 args[nArgs++] = MPIU_Strdup( tmpBuf );
448 args[nArgs++] = MPIU_Strdup( ";" );
449 }
450 return nArgs;
451 }
452