1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  *  (C) 2001 by Argonne National Laboratory.
4  *      See COPYRIGHT in top-level directory.
5  */
6 
7 /*
8  * This file provides a set of routines that can be used to record debug
9  * messages in a ring so that the may be dumped at a later time.  For example,
10  * this can be used to record debug messages without printing them; when
11  * a special event, such as an error occurs, a call to
12  * MPIU_dump_dbg_memlog( stderr ) will print the contents of the file ring
13  * to stderr.
14  */
15 
16 #include "mpiimpl.h"
17 
18 #include <stdio.h>
19 #ifdef HAVE_STDARG_H
20 #include <stdarg.h>
21 #endif
22 #ifdef HAVE_STRING_H
23 #include <string.h>
24 #endif
25 #ifdef HAVE_STDLIB_H
26 #include <stdlib.h>
27 #endif
28 #ifdef HAVE_UNISTD_H
29 #  include <unistd.h>
30 #endif
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 
35 #if defined( HAVE_MKSTEMP ) && defined( NEEDS_MKSTEMP_DECL )
36 extern int mkstemp(char *t);
37 #endif
38 
39 #if defined( HAVE_FDOPEN ) && defined( NEEDS_FDOPEN_DECL )
40 extern FILE *fdopen(int fd, const char *mode);
41 #endif
42 
43 /* Temporary.  sig values will change */
44 /* style: allow:vprintf:3 sig:0 */
45 /* style: allow:fputs:1 sig:0 */
46 /* style: allow:printf:2 sig:0 */
47 /* style: allow:fprintf:7 sig:0 */
48 
49 #ifdef HAVE_VA_COPY
50 # define va_copy_end(a) va_end(a)
51 #else
52 # ifdef HAVE___VA_COPY
53 #  define va_copy(a,b) __va_copy(a,b)
54 #  define va_copy_end(a)
55 # else
56 #  define va_copy(a,b) ((a) = (b))
57 /* Some writers recommend define va_copy(a,b) memcpy(&a,&b,sizeof(va_list)) */
58 #  define va_copy_end(a)
59 # endif
60 #endif
61 
62 #if !defined(MPICH_DBG_MEMLOG_NUM_LINES)
63 #define MPICH_DBG_MEMLOG_NUM_LINES 1024
64 #endif
65 #if !defined(MPICH_DBG_MEMLOG_LINE_SIZE)
66 #define MPICH_DBG_MEMLOG_LINE_SIZE 256
67 #endif
68 
69 MPIU_dbg_state_t MPIU_dbg_state = MPIU_DBG_STATE_UNINIT;
70 FILE * MPIU_dbg_fp = NULL;
71 static int dbg_memlog_num_lines = MPICH_DBG_MEMLOG_NUM_LINES;
72 static int dbg_memlog_line_size = MPICH_DBG_MEMLOG_LINE_SIZE;
73 static char **dbg_memlog = NULL;
74 static int dbg_memlog_next = 0;
75 static int dbg_memlog_count = 0;
76 static int dbg_rank = -1;
77 
78 static void dbg_init(void);
79 
MPIU_dbg_init(int rank)80 int MPIU_dbg_init(int rank)
81 {
82     dbg_rank = rank;
83 
84     if (MPIU_dbg_state == MPIU_DBG_STATE_UNINIT)
85     {
86 	dbg_init();
87     }
88 
89     /* If file logging is enable, we need to open a file */
90     if (MPIU_dbg_state & MPIU_DBG_STATE_FILE)
91     {
92 	char fn[128];
93 
94 	/* Only open the file only once in case MPIU_dbg_init is called more
95 	   than once */
96 	if (MPIU_dbg_fp == NULL)
97 	{
98 	    MPIU_Snprintf(fn, 128, "mpich2-dbg-%d.log", dbg_rank);
99 	    MPIU_dbg_fp = fopen(fn, "w");
100 	    setvbuf(MPIU_dbg_fp, NULL, _IONBF, 0);
101 	}
102     }
103 
104     return 0;
105 }
106 
dbg_init(void)107 static void dbg_init(void)
108 {
109     char * envstr;
110 
111     MPIU_dbg_state = MPIU_DBG_STATE_NONE;
112 
113     /* FIXME: This should use MPIU_Param_get_string */
114     envstr = getenv("MPICH_DBG_OUTPUT");
115     if (envstr == NULL)
116     {
117 	return;
118     }
119 
120     /*
121      * TODO:
122      *
123      * - parse environment variable to determine number of log lines, etc.
124      *
125      * - add support for writing to a (per-process or global?) file
126      *
127      * - add support for sending to a log server, perhaps with global time
128      *   sequencing information ???
129      */
130     if (strstr(envstr, "stdout"))
131     {
132 	MPIU_dbg_state = (MPIU_dbg_state_t)( MPIU_DBG_STATE_STDOUT |
133 					      MPIU_dbg_state );
134     }
135     if (strstr(envstr, "memlog"))
136     {
137 	MPIU_dbg_state = (MPIU_dbg_state_t)( MPIU_DBG_STATE_MEMLOG |
138 					      MPIU_dbg_state );
139     }
140     if (strstr(envstr, "file"))
141     {
142 	MPIU_dbg_state = (MPIU_dbg_state_t) ( MPIU_DBG_STATE_FILE |
143 					       MPIU_dbg_state );
144     }
145 
146     /* If memlog is enabled, the we need to allocate some memory for it */
147     if (MPIU_dbg_state & MPIU_DBG_STATE_MEMLOG)
148     {
149 	dbg_memlog = MPIU_Malloc(dbg_memlog_num_lines * sizeof(char *) +
150 				 dbg_memlog_num_lines * dbg_memlog_line_size);
151 	if (dbg_memlog != NULL)
152 	{
153 	    int i;
154 
155 	    for (i = 0; i < dbg_memlog_num_lines ; i++)
156 	    {
157 		dbg_memlog[i] = ((char *) &dbg_memlog[dbg_memlog_num_lines]) +
158 		    i * dbg_memlog_line_size;
159 	    }
160 	}
161 	else
162 	{
163 	    MPIU_dbg_state = (MPIU_dbg_state_t)( MPIU_dbg_state &
164 						  ~MPIU_DBG_STATE_MEMLOG );
165 	}
166     }
167 }
168 
MPIU_dbglog_printf(const char * str,...)169 int MPIU_dbglog_printf(const char *str, ...)
170 {
171     int n = 0;
172     va_list list;
173 
174     if (MPIU_dbg_state == MPIU_DBG_STATE_UNINIT)
175     {
176 	dbg_init();
177     }
178 
179     if (MPIU_dbg_state & MPIU_DBG_STATE_MEMLOG)
180     {
181 	/* FIXME: put everything on one line until a \n is found */
182 
183 	dbg_memlog[dbg_memlog_next][0] = '\0';
184 	va_start(list, str);
185 	n = vsnprintf(dbg_memlog[dbg_memlog_next], dbg_memlog_line_size, str,
186 		      list);
187 	va_end(list);
188 
189 	/* if the output was truncated, we null terminate the end of the
190 	   string, on the off chance that vsnprintf() didn't do that.  we also
191 	   check to see if any data has been written over the null we set at
192 	   the beginning of the string.  this is mostly paranoia, but the man
193 	   page does not clearly state what happens when truncation occurs.  if
194 	   data was written to the string, we would like to output it, but we
195 	   want to avoid reading past the end of the array or outputing garbage
196 	   data. */
197 
198 	if (n < 0 || n >= dbg_memlog_line_size)
199 	{
200 	    dbg_memlog[dbg_memlog_next][dbg_memlog_line_size - 1] = '\0';
201 	    n = (int)strlen(dbg_memlog[dbg_memlog_next]);
202 	}
203 
204 	if (dbg_memlog[dbg_memlog_next][0] != '\0')
205 	{
206 	    dbg_memlog_next = (dbg_memlog_next + 1) % dbg_memlog_num_lines;
207 	    dbg_memlog_count++;
208 	}
209     }
210 
211     if (MPIU_dbg_state & MPIU_DBG_STATE_STDOUT)
212     {
213 	va_start(list, str);
214 	n = vprintf(str, list);
215 	va_end(list);
216     }
217 
218     if ((MPIU_dbg_state & MPIU_DBG_STATE_FILE) && MPIU_dbg_fp != NULL)
219     {
220 	va_start(list, str);
221 	n = vfprintf(MPIU_dbg_fp, str, list);
222 	va_end(list);
223     }
224 
225     return n;
226 }
227 
MPIU_dbglog_vprintf(const char * str,va_list ap)228 int MPIU_dbglog_vprintf(const char *str, va_list ap)
229 {
230     int n = 0;
231     va_list list;
232 
233     if (MPIU_dbg_state == MPIU_DBG_STATE_UNINIT)
234     {
235 	dbg_init();
236     }
237 
238     if (MPIU_dbg_state & MPIU_DBG_STATE_MEMLOG)
239     {
240 	va_copy(list,ap);
241 	dbg_memlog[dbg_memlog_next][0] = '\0';
242 	n = vsnprintf(dbg_memlog[dbg_memlog_next], dbg_memlog_line_size, str,
243 		      list);
244         va_copy_end(list);
245 
246 	/* if the output was truncated, we null terminate the end of the
247 	   string, on the off chance that vsnprintf() didn't do that.  we also
248 	   check to see if any data has been written over the null we set at
249 	   the beginning of the string.  this is mostly paranoia, but the man
250 	   page does not clearly state what happens when truncation occurs.  if
251 	   data was written to the string, we would like to output it, but we
252 	   want to avoid reading past the end of the array or outputing garbage
253 	   data. */
254 
255 	if (n < 0 || n >= dbg_memlog_line_size)
256 	{
257 	    dbg_memlog[dbg_memlog_next][dbg_memlog_line_size - 1] = '\0';
258 	    n = (int)strlen(dbg_memlog[dbg_memlog_next]);
259 	}
260 
261 	if (dbg_memlog[dbg_memlog_next][0] != '\0')
262 	{
263 	    dbg_memlog_next = (dbg_memlog_next + 1) % dbg_memlog_num_lines;
264 	    dbg_memlog_count++;
265 	}
266     }
267 
268     if (MPIU_dbg_state & MPIU_DBG_STATE_STDOUT)
269     {
270 	va_copy(list, ap);
271 	n = vprintf(str, list);
272 	va_copy_end(list);
273     }
274 
275     if ((MPIU_dbg_state & MPIU_DBG_STATE_FILE) && MPIU_dbg_fp != NULL)
276     {
277 	va_copy(list, ap);
278 	n = vfprintf(MPIU_dbg_fp, str, list);
279 	va_end(list);
280     }
281 
282     return n;
283 }
284 
285 /* FIXME: */
MPIU_dbg_printf(const char * str,...)286 int MPIU_dbg_printf(const char * str, ...)
287 {
288     int n;
289 
290     /* MPID_Common_thread_lock(); */
291     {
292 	va_list list;
293 
294 	MPIU_dbglog_printf("[%d]", dbg_rank);
295 	va_start(list, str);
296 	n = MPIU_dbglog_vprintf(str, list);
297 	va_end(list);
298 	MPIU_dbglog_flush();
299     }
300     /* MPID_Common_thread_unlock(); */
301 
302     return n;
303 }
304 
MPIU_dump_dbg_memlog_to_stdout(void)305 void MPIU_dump_dbg_memlog_to_stdout(void)
306 {
307     MPIU_dump_dbg_memlog(stdout);
308 }
309 
MPIU_dump_dbg_memlog_to_file(const char * filename)310 void MPIU_dump_dbg_memlog_to_file(const char *filename)
311 {
312     FILE *fout;
313     fout = fopen(filename, "wb");
314     if (fout != NULL)
315     {
316 	MPIU_dump_dbg_memlog(fout);
317 	fclose(fout);
318     }
319 }
320 
MPIU_dump_dbg_memlog(FILE * fp)321 void MPIU_dump_dbg_memlog(FILE * fp){
322     if (dbg_memlog_count != 0)
323     {
324 	int ent;
325 	int last_ent;
326 
327 	/* there is a small issue with counter rollover which will need to be
328 	   fixed if more than 2^32 lines are going to be logged */
329 	ent = (dbg_memlog_next == dbg_memlog_count) ? 0 : dbg_memlog_next;
330 	last_ent = (ent + dbg_memlog_num_lines - 1) % dbg_memlog_num_lines;
331 
332 	do
333 	{
334 	    fputs(dbg_memlog[ent], fp);
335 	    ent = (ent + 1) % dbg_memlog_num_lines;
336 	}
337 	while(ent != last_ent);
338 	fflush(fp);
339     }
340 }
341 
342 #ifdef USE_DBG_LOGGING
343 /*
344  * NEW ROUTINES FOR DEBUGGING
345  */
346 #ifndef MAXPATHLEN
347 #define MAXPATHLEN 1024
348 #endif
349 
350 int MPIU_DBG_ActiveClasses = 0;
351 int MPIU_DBG_MaxLevel      = MPIU_DBG_TYPICAL;
352 static enum {MPIU_DBG_UNINIT, MPIU_DBG_PREINIT, MPIU_DBG_INITIALIZED, MPIU_DBG_ERROR}
353     mpiu_dbg_initialized = MPIU_DBG_UNINIT;
354 static char filePatternBuf[MAXPATHLEN] = "";
355 static const char *filePattern = "-stdout-"; /* "log%d.log"; */
356 static const char *defaultFilePattern = "dbg@W%w-@%d@T-%t@.log";
357 static char temp_filename[MAXPATHLEN] = "";
358 static int worldNum  = 0;
359 static int worldRank = -1;
360 static int whichRank = -1;             /* all ranks */
361 static int    resetTimeOrigin = 1;
362 static double timeOrigin = 0.0;
363 
364 static int MPIU_DBG_Usage( const char *, const char * );
365 static int MPIU_DBG_OpenFile(FILE **dbg_fp);
366 static int setDBGClass( const char * );
367 static int SetDBGLevel( const char *, const char *(names[]) );
368 static int MPIU_DBG_Get_filename(char *filename, int len);
369 
370 #ifdef MPICH_IS_THREADED
371 static MPID_Thread_tls_t dbg_tls_key;
372 #endif
373 
374 static FILE *static_dbg_fp = 0;
375 
dbg_init_tls(void)376 static void dbg_init_tls(void)
377 {
378 #ifdef MPICH_IS_THREADED
379     int err;
380 
381     MPID_Thread_tls_create(NULL, &dbg_tls_key, &err);
382     MPIU_Assert(err == 0);
383 #endif
384 }
385 
get_fp(void)386 static FILE *get_fp(void)
387 {
388 #ifdef MPICH_IS_THREADED
389     /* if we're not initialized, use the static fp, since there should
390      * only be one thread in here until then */
391     if (mpiu_dbg_initialized == MPIU_DBG_INITIALIZED && MPIU_ISTHREADED) {
392         FILE *fp;
393         MPID_Thread_tls_get(&dbg_tls_key, &fp);
394         return fp;
395     }
396     else
397         return static_dbg_fp;
398 #else
399     return static_dbg_fp;
400 #endif
401 }
402 
set_fp(FILE * fp)403 static void set_fp(FILE *fp)
404 {
405 #ifdef MPICH_IS_THREADED
406     /* if we're not initialized, use the static fp, since there should
407      * only be one thread in here until then */
408     if (mpiu_dbg_initialized == MPIU_DBG_INITIALIZED && MPIU_ISTHREADED) {
409         MPID_Thread_tls_set(&dbg_tls_key, (void *)fp);
410     }
411     else
412         static_dbg_fp = fp;
413 #else
414     static_dbg_fp = fp;
415 #endif
416 }
417 
418 
MPIU_DBG_Outevent(const char * file,int line,int class,int kind,const char * fmat,...)419 int MPIU_DBG_Outevent( const char *file, int line, int class, int kind,
420 		       const char *fmat, ... )
421 {
422     int mpi_errno = MPI_SUCCESS;
423     va_list list;
424     char *str, stmp[MPIU_DBG_MAXLINE];
425     int  i;
426     void *p;
427     MPID_Time_t t;
428     double  curtime;
429     unsigned long long int threadID  = 0;
430     int pid = -1;
431     FILE *dbg_fp = NULL;
432 
433     if (mpiu_dbg_initialized == MPIU_DBG_UNINIT || mpiu_dbg_initialized == MPIU_DBG_ERROR) goto fn_exit;
434 
435     dbg_fp = get_fp();
436 
437 #ifdef MPICH_IS_THREADED
438     {
439         /* the thread ID is not necessarily unique between processes, so a
440          * (pid,tid) pair should be used to uniquely identify output from
441          * particular threads on a system */
442 	MPIU_Thread_id_t tid;
443 	MPIU_Thread_self(&tid);
444 	threadID = (unsigned long long int)tid;
445     }
446 #endif
447 #if defined(HAVE_GETPID)
448     pid = (int)getpid();
449 #endif /* HAVE_GETPID */
450 
451     if (!dbg_fp) {
452 	mpi_errno = MPIU_DBG_OpenFile(&dbg_fp);
453         if (mpi_errno) goto fn_fail;
454         set_fp(dbg_fp);
455     }
456 
457     MPID_Wtime( &t );
458     MPID_Wtime_todouble( &t, &curtime );
459     curtime = curtime - timeOrigin;
460 
461     /* The kind values are used with the macros to simplify these cases */
462     switch (kind) {
463 	case 0:
464 	    va_start(list,fmat);
465 	    str = va_arg(list,char *);
466 	    fprintf( dbg_fp, "%d\t%d\t%llx[%d]\t%d\t%f\t%s\t%d\t%s\n",
467 		     worldNum, worldRank, threadID, pid, class, curtime,
468 		     file, line, str );
469 	    break;
470 	case 1:
471 	    va_start(list,fmat);
472 	    str = va_arg(list,char *);
473 	    MPIU_Snprintf( stmp, sizeof(stmp), fmat, str );
474 	    va_end(list);
475 	    fprintf( dbg_fp, "%d\t%d\t%llx[%d]\t%d\t%f\t%s\t%d\t%s\n",
476 		     worldNum, worldRank, threadID, pid, class, curtime,
477 		     file, line, stmp );
478 	    break;
479 	case 2:
480 	    va_start(list,fmat);
481 	    i = va_arg(list,int);
482 	    MPIU_Snprintf( stmp, sizeof(stmp), fmat, i);
483 	    va_end(list);
484 	    fprintf( dbg_fp, "%d\t%d\t%llx[%d]\t%d\t%f\t%s\t%d\t%s\n",
485 		     worldNum, worldRank, threadID, pid, class, curtime,
486 		     file, line, stmp );
487 	    break;
488 	case 3:
489 	    va_start(list,fmat);
490 	    p = va_arg(list,void *);
491 	    MPIU_Snprintf( stmp, sizeof(stmp), fmat, p);
492 	    va_end(list);
493 	    fprintf( dbg_fp, "%d\t%d\t%llx[%d]\t%d\t%f\t%s\t%d\t%s\n",
494 		     worldNum, worldRank, threadID, pid, class, curtime,
495 		     file, line, stmp );
496 	    break;
497         default:
498 	    break;
499     }
500     fflush(dbg_fp);
501 
502  fn_exit:
503  fn_fail:
504     return 0;
505 }
506 
507 /* These are used to simplify the handling of options.
508    To add a new name, add an MPIU_DBG_ClassName element to the array
509    MPIU_Classnames.  The "classbits" values are defined by MPIU_DBG_CLASS
510    in src/include/mpidbg.h
511  */
512 
513 typedef struct MPIU_DBG_ClassName {
514     int        classbits;
515     const char *UCName, *LCName;
516 } MPIU_DBG_ClassName;
517 
518 static const MPIU_DBG_ClassName MPIU_Classnames[] = {
519     { MPIU_DBG_PT2PT,         "PT2PT",         "pt2pt" },
520     { MPIU_DBG_RMA,           "RMA",           "rma"   },
521     { MPIU_DBG_THREAD,        "THREAD",        "thread" },
522     { MPIU_DBG_PM,            "PM",            "pm" },
523     { MPIU_DBG_ROUTINE_ENTER, "ROUTINE_ENTER", "routine_enter" },
524     { MPIU_DBG_ROUTINE_EXIT,  "ROUTINE_EXIT",  "routine_exit" },
525     { MPIU_DBG_ROUTINE_ENTER |
526       MPIU_DBG_ROUTINE_EXIT,  "ROUTINE",       "routine" },
527     { MPIU_DBG_SYSCALL,       "SYSCALL",       "syscall" },
528     { MPIU_DBG_DATATYPE,      "DATATYPE",      "datatype" },
529     { MPIU_DBG_HANDLE,        "HANDLE",        "handle" },
530     { MPIU_DBG_COMM,          "COMM",          "comm" },
531     { MPIU_DBG_BSEND,         "BSEND",         "bsend" },
532     { MPIU_DBG_OTHER,         "OTHER",         "other" },
533     { MPIU_DBG_CH3_CONNECT,   "CH3_CONNECT",   "ch3_connect" },
534     { MPIU_DBG_CH3_DISCONNECT,"CH3_DISCONNECT","ch3_disconnect" },
535     { MPIU_DBG_CH3_PROGRESS,  "CH3_PROGRESS",  "ch3_progress" },
536     { MPIU_DBG_CH3_CHANNEL,   "CH3_CHANNEL",   "ch3_channel" },
537     { MPIU_DBG_CH3_MSG,       "CH3_MSG",       "ch3_msg" },
538     { MPIU_DBG_CH3_OTHER,     "CH3_OTHER",     "ch3_other" },
539     { MPIU_DBG_CH3,           "CH3",           "ch3" },
540     { MPIU_DBG_NEM_SOCK_FUNC, "NEM_SOCK_FUNC", "nem_sock_func"},
541     { MPIU_DBG_NEM_SOCK_DET,  "NEM_SOCK_DET",  "nem_sock_det"},
542     { MPIU_DBG_VC,            "VC",            "vc"},
543     { MPIU_DBG_REFCOUNT,      "REFCOUNT",      "refcount"},
544     { MPIU_DBG_ROMIO,         "ROMIO",         "romio"},
545     { MPIU_DBG_ERRHAND,       "ERRHAND",       "errhand"},
546     { MPIU_DBG_ALL,           "ALL",           "all" },
547     { 0,                      0,               0 }
548 };
549 
550 /* Because the level values are simpler and are rarely changed, these
551    use a simple set of parallel arrays */
552 static const int  MPIU_Levelvalues[] = { MPIU_DBG_TERSE,
553 					 MPIU_DBG_TYPICAL,
554 					 MPIU_DBG_VERBOSE, 100 };
555 static const char *MPIU_Levelname[] = { "TERSE", "TYPICAL", "VERBOSE", 0 };
556 static const char *MPIU_LCLevelname[] = { "terse", "typical", "verbose", 0 };
557 
558 /*
559  * Initialize the DBG_MSG system.  This is called during MPI_Init to process
560  * command-line arguments as well as checking the MPICH_DBG environment
561  * variables.  The initialization is split into two steps: a preinit and an
562  * init. This makes it possible to enable most of the features before calling
563  * MPID_Init, where a significant amount of the initialization takes place.
564  */
565 
MPIU_DBG_ProcessArgs(int * argc_p,char *** argv_p)566 static int MPIU_DBG_ProcessArgs( int *argc_p, char ***argv_p )
567 {
568     int i, rc;
569 
570     /* Here's where we do the same thing with the command-line options */
571     if (argc_p) {
572 	for (i=1; i<*argc_p; i++) {
573 	    if (strncmp((*argv_p)[i],"-mpich-dbg", 10) == 0) {
574 		char *s = (*argv_p)[i] + 10;
575 		/* Found a command */
576 		if (*s == 0) {
577 		    /* Just -mpich-dbg */
578 		    MPIU_DBG_MaxLevel      = MPIU_DBG_TYPICAL;
579 		    MPIU_DBG_ActiveClasses = MPIU_DBG_ALL;
580 		}
581 		else if (*s == '=') {
582 		    /* look for file */
583 		    MPIU_DBG_MaxLevel      = MPIU_DBG_TYPICAL;
584 		    MPIU_DBG_ActiveClasses = MPIU_DBG_ALL;
585 		    s++;
586 		    if (strncmp( s, "file", 4 ) == 0) {
587 			filePattern = defaultFilePattern;
588 		    }
589 		}
590 		else if (strncmp(s,"-level",6) == 0) {
591 		    char *p = s + 6;
592 		    if (*p == '=') {
593 			p++;
594 			rc = SetDBGLevel( p, MPIU_LCLevelname );
595 			if (rc)
596 			    MPIU_DBG_Usage( "-mpich-dbg-level", "terse, typical, verbose" );
597 		    }
598 		}
599 		else if (strncmp(s,"-class",6) == 0) {
600 		    char *p = s + 6;
601 		    if (*p == '=') {
602 			p++;
603 			rc = setDBGClass( p );
604 			if (rc)
605 			    MPIU_DBG_Usage( "-mpich-dbg-class", 0 );
606 		    }
607 		}
608 		else if (strncmp( s, "-filename", 9 ) == 0) {
609 		    char *p = s + 9;
610 		    if (*p == '=') {
611 			p++;
612 			/* A special case for a filepattern of "-default",
613 			   use the predefined default pattern */
614 			if (strcmp( p, "-default" ) == 0) {
615 			    filePattern = defaultFilePattern;
616 			}
617 			else {
618                             strncpy(filePatternBuf, p, sizeof(filePatternBuf));
619 			    filePattern = filePatternBuf;
620 			}
621 		    }
622 		}
623 		else if (strncmp( s, "-rank", 5 ) == 0) {
624 		    char *p = s + 5;
625 		    if (*p == '=' && p[1] != 0) {
626 			char *sOut;
627 			p++;
628 			whichRank = strtol( p, &sOut, 10 );
629 			if (p == sOut) {
630 			    MPIU_DBG_Usage( "-mpich-dbg-rank", 0 );
631 			    whichRank = -1;
632 			}
633 		    }
634 		}
635 		else {
636 		    MPIU_DBG_Usage( (*argv_p)[i], 0 );
637 		}
638 
639 		/* Eventually, should null it out and reduce argc value */
640 	    }
641 	}
642     }
643     return MPI_SUCCESS;
644 }
645 
MPIU_DBG_ProcessEnv(void)646 static int MPIU_DBG_ProcessEnv( void )
647 {
648     char *s;
649     int rc;
650 
651     s = getenv( "MPICH_DBG" );
652     if (s) {
653 	/* Set the defaults */
654 	MPIU_DBG_MaxLevel = MPIU_DBG_TYPICAL;
655 	MPIU_DBG_ActiveClasses = MPIU_DBG_ALL;
656 	if (strncmp(s,"FILE",4) == 0) {
657 	    filePattern = defaultFilePattern;
658 	}
659     }
660     s = getenv( "MPICH_DBG_LEVEL" );
661     if (s) {
662 	rc = SetDBGLevel( s, MPIU_Levelname );
663 	if (rc)
664 	    MPIU_DBG_Usage( "MPICH_DBG_LEVEL", "TERSE, TYPICAL, VERBOSE" );
665     }
666 
667     s = getenv( "MPICH_DBG_CLASS" );
668     rc = setDBGClass( s );
669     if (rc)
670 	MPIU_DBG_Usage( "MPICH_DBG_CLASS", 0 );
671 
672     s = getenv( "MPICH_DBG_FILENAME" );
673     if (s) {
674         strncpy(filePatternBuf, s, sizeof(filePatternBuf));
675         filePattern = filePatternBuf;
676     }
677 
678     s = getenv( "MPICH_DBG_RANK" );
679     if (s) {
680 	char *sOut;
681 	whichRank = strtol( s, &sOut, 10 );
682 	if (s == sOut) {
683 	    MPIU_DBG_Usage( "MPICH_DBG_RANK", 0 );
684 	    whichRank = -1;
685 	}
686     }
687     return MPI_SUCCESS;
688 }
689 
690 /*
691  * Attempt to initialize the logging system.  This works only if MPID_Init
692  * is not responsible for updating the environment and/or command-line
693  * arguments.
694  */
MPIU_DBG_PreInit(int * argc_p,char *** argv_p,int wtimeNotReady)695 int MPIU_DBG_PreInit( int *argc_p, char ***argv_p, int wtimeNotReady )
696 {
697     MPID_Time_t t;
698 
699     /* if the DBG_MSG system was already initialized, say by the device, then
700        return immediately */
701     if (mpiu_dbg_initialized != MPIU_DBG_UNINIT) return MPI_SUCCESS;
702 
703     dbg_init_tls();
704 
705     /* Check to see if any debugging was selected.  The order of these
706        tests is important, as they allow general defaults to be set,
707        followed by more specific modifications */
708     /* First, the environment variables */
709     MPIU_DBG_ProcessEnv();
710 
711     MPIU_DBG_ProcessArgs( argc_p, argv_p );
712 
713     if (wtimeNotReady == 0) {
714 	MPID_Wtime( &t );
715 	MPID_Wtime_todouble( &t, &timeOrigin );
716 	resetTimeOrigin = 0;
717     }
718 
719     mpiu_dbg_initialized = MPIU_DBG_PREINIT;
720 
721     return MPI_SUCCESS;
722 }
723 
MPIU_DBG_Init(int * argc_p,char *** argv_p,int has_args,int has_env,int wrank)724 int MPIU_DBG_Init( int *argc_p, char ***argv_p, int has_args, int has_env,
725 		   int wrank )
726 {
727     int ret;
728     FILE *dbg_fp = NULL;
729 
730     /* if the DBG_MSG system was already initialized, say by the device, then
731        return immediately.  Note that the device is then responsible
732        for handling the file mode (e.g., reopen when the rank become
733        available) */
734     if (mpiu_dbg_initialized == MPIU_DBG_INITIALIZED || mpiu_dbg_initialized == MPIU_DBG_ERROR) return MPI_SUCCESS;
735 
736     if (mpiu_dbg_initialized != MPIU_DBG_PREINIT)
737         dbg_init_tls();
738 
739     dbg_fp = get_fp();
740 
741     /* We may need to wait until the device is set up to initialize the timer */
742     if (resetTimeOrigin) {
743 	MPID_Time_t t;
744 	MPID_Wtime( &t );
745 	MPID_Wtime_todouble( &t, &timeOrigin );
746 	resetTimeOrigin = 0;
747     }
748     /* Check to see if any debugging was selected.  The order of these
749        tests is important, as they allow general defaults to be set,
750        followed by more specific modifications. */
751     /* Both of these may have already been set in the PreInit call;
752        if the command line and/or environment variables are set before
753        MPID_Init, then don't call the routines to check those values
754        (as they were already handled in DBG_PreInit) */
755     /* First, the environment variables */
756     if (!has_env)
757 	MPIU_DBG_ProcessEnv();
758     /* Now the command-line arguments */
759     if (!has_args)
760 	MPIU_DBG_ProcessArgs( argc_p, argv_p );
761 
762     worldRank = wrank;
763 
764     if (whichRank >= 0 && whichRank != wrank) {
765 	/* Turn off logging on this process */
766 	MPIU_DBG_ActiveClasses = 0;
767     }
768 
769     /* If the file has already been opened with a temp filename,
770        rename it. */
771     if (dbg_fp && dbg_fp != stdout && dbg_fp != stderr)
772     {
773         char filename[MAXPATHLEN] = "";
774 
775         MPIU_DBG_Get_filename(filename, MAXPATHLEN);
776         ret = rename(temp_filename, filename);
777         if (ret){
778             /* Retry renaming file after closing it */
779             fclose(dbg_fp);
780             ret = rename(temp_filename, filename);
781             if(ret){
782                 MPIU_Error_printf("Could not rename temp log file to %s\n", filename );
783                 goto fn_fail;
784             }
785             else{
786                 dbg_fp = fopen(filename, "a+");
787                 set_fp(dbg_fp);
788                 if(dbg_fp == NULL){
789                     MPIU_Error_printf("Error re-opening log file, %s\n", filename);
790                     goto fn_fail;
791                 }
792             }
793         }
794     }
795 
796     mpiu_dbg_initialized = MPIU_DBG_INITIALIZED;
797  fn_exit:
798     return MPI_SUCCESS;
799  fn_fail:
800     mpiu_dbg_initialized = MPIU_DBG_ERROR;
801     goto fn_exit;
802 }
803 
804 /* Print the usage statement to stderr */
MPIU_DBG_Usage(const char * cmd,const char * vals)805 static int MPIU_DBG_Usage( const char *cmd, const char *vals )
806 {
807     if (vals) {
808 	fprintf( stderr, "Incorrect value for %s, should be one of %s\n",
809 		 cmd, vals );
810     }
811     else {
812 	fprintf( stderr, "Incorrect value for %s\n", cmd );
813     }
814     fprintf( stderr,
815 "Command line for debug switches\n\
816     -mpich-dbg-class=name[,name,...]\n\
817     -mpich-dbg-level=name   (one of terse, typical, verbose)\n\
818     -mpich-dbg-filename=pattern (includes %%d for world rank, %%t for thread id\n\
819     -mpich-dbg-rank=val    (only this rank in COMM_WORLD will be logged)\n\
820     -mpich-dbg   (shorthand for -mpich-dbg-class=all -mpich-dbg-level=typical)\n\
821     -mpich-dbg=file (shorthand for -mpich-dbg -mpich-dbg-filename=%s)\n\
822 Environment variables\n\
823     MPICH_DBG_CLASS=NAME[,NAME...]\n\
824     MPICH_DBG_LEVEL=NAME\n\
825     MPICH_DBG_FILENAME=pattern\n\
826     MPICH_DBG_RANK=val\n\
827     MPICH_DBG=YES or FILE\n", defaultFilePattern );
828 
829     fflush(stderr);
830 
831     return 0;
832 }
833 
834 #if defined (HAVE_MKSTEMP) && defined (HAVE_FDOPEN)
835 /* creates a temporary file in the same directory the
836    user specified for the log file */
837 #undef FUNCNAME
838 #define FUNCNAME MPIU_DBG_Open_temp_file
839 #undef FCNAME
840 #define FCNAME MPIDI_QUOTE(FUNCNAME)
MPIU_DBG_Open_temp_file(FILE ** dbg_fp)841 static int MPIU_DBG_Open_temp_file(FILE **dbg_fp)
842 {
843     int mpi_errno = MPI_SUCCESS;
844     const char temp_pattern[] = "templogXXXXXX";
845     int fd;
846     char *basename;
847     int ret;
848 
849     ret = MPIU_Strncpy(temp_filename, filePattern, MAXPATHLEN);
850     if (ret) goto fn_fail;
851 
852     MPIU_Basename(temp_filename, &basename);
853 
854     /* make sure there's enough room in temp_filename to store temp_pattern */
855     if (basename - temp_filename > MAXPATHLEN - sizeof(temp_pattern)) goto fn_fail;
856 
857     MPIU_Strncpy(basename, temp_pattern, sizeof(temp_pattern));
858 
859     fd = mkstemp(temp_filename);
860     if (fd == -1) goto fn_fail;
861 
862     *dbg_fp = fdopen(fd, "a+");
863     if (*dbg_fp == NULL) goto fn_fail;
864 
865  fn_exit:
866     return mpi_errno;
867  fn_fail:
868     MPIU_Error_printf( "Could not open log file %s\n", temp_filename );
869     mpiu_dbg_initialized = MPIU_DBG_ERROR;
870     mpi_errno = MPI_ERR_INTERN;
871     goto fn_exit;
872 }
873 #elif defined(HAVE__MKTEMP_S) && defined(HAVE_FOPEN_S)
874 /* creates a temporary file in the same directory the
875    user specified for the log file */
876 #undef FUNCNAME
877 #define FUNCNAME MPIU_DBG_Open_temp_file
878 #undef FCNAME
879 #define FCNAME MPIDI_QUOTE(FUNCNAME)
MPIU_DBG_Open_temp_file(FILE ** dbg_fp)880 static int MPIU_DBG_Open_temp_file(FILE **dbg_fp)
881 {
882     int mpi_errno = MPI_SUCCESS;
883     const char temp_pattern[] = "templogXXXXXX";
884     int fd;
885     char *basename;
886     int ret;
887     errno_t ret_errno;
888 
889     ret = MPIU_Strncpy(temp_filename, filePattern, MAXPATHLEN);
890     if (ret) goto fn_fail;
891 
892     MPIU_Basename(temp_filename, &basename);
893 
894     /* make sure there's enough room in temp_filename to store temp_pattern */
895     if (basename - temp_filename > MAXPATHLEN - sizeof(temp_pattern)) goto fn_fail;
896 
897     MPIU_Strncpy(basename, temp_pattern, sizeof(temp_pattern));
898 
899     ret_errno = _mktemp_s(temp_filename, MAXPATHLEN);
900     if (ret_errno != 0) goto fn_fail;
901 
902     ret_errno = fopen_s(dbg_fp, temp_filename, "a+");
903     if (ret_errno != 0) goto fn_fail;
904 
905  fn_exit:
906     return mpi_errno;
907  fn_fail:
908     MPIU_Error_printf( "Could not open log file %s\n", temp_filename );
909     mpiu_dbg_initialized = MPIU_DBG_ERROR;
910     mpi_errno = MPI_ERR_INTERN;
911     goto fn_exit;
912 }
913 #else
914 /* creates a temporary file in some directory, which may not be where
915    the user wants the log file.  When the file is renamed later, it
916    may require a copy.
917 
918    Note that this is not safe: By the time we call fopen(), another
919    file with the same name may exist.  That file would get clobbered.
920 */
921 #undef FUNCNAME
922 #define FUNCNAME MPIU_DBG_Open_temp_file
923 #undef FCNAME
924 #define FCNAME MPIDI_QUOTE(FUNCNAME)
MPIU_DBG_Open_temp_file(FILE ** dbg_fp)925 static int MPIU_DBG_Open_temp_file(FILE **dbg_fp)
926 {
927     int mpi_errno = MPI_SUCCESS;
928     const char temp_pattern[] = "templogXXXXXX";
929     int fd;
930     char *basename;
931     int ret;
932     char *cret;
933 
934     cret = tmpnam(temp_filename);
935     if (cret == NULL) goto fn_fail;
936 
937     *dbg_fp = fopen(temp_filename, "w");
938     if (*dbg_fp == NULL) goto fn_fail;
939 
940  fn_exit:
941     return mpi_errno;
942  fn_fail:
943     MPIU_Error_printf( "Could not open log file %s\n", temp_filename );
944     mpiu_dbg_initialized = MPIU_DBG_ERROR;
945     mpi_errno = MPI_ERR_INTERN;
946     goto fn_exit;
947 }
948 
949 #endif
950 
951 /* This routine can make no MPI calls, since it may be logging those
952    calls. */
MPIU_DBG_Get_filename(char * filename,int len)953 static int MPIU_DBG_Get_filename(char *filename, int len)
954 {
955     int withinMworld = 0,         /* True if within an @W...@ */
956 	withinMthread = 0;        /* True if within an @T...@ */
957     /* FIXME: Need to know how many MPI_COMM_WORLDs are known */
958     int nWorld = 1;
959 #ifdef MPICH_IS_THREADED
960     unsigned long long int threadID = 0;
961     int nThread = 2;
962 #else
963     int nThread = 1;
964 #endif
965     static char worldNumAsChar[10] = "0";
966     char *pDest;
967     const char *p;
968 
969     /* FIXME: This is a hack to handle the common case of two worlds */
970     if (MPIR_Process.comm_parent != NULL) {
971 	nWorld = 2;
972 	worldNumAsChar[0] = '1';
973 	worldNumAsChar[1] = '\0';
974     }
975 
976     p     = filePattern;
977     pDest = filename;
978     *filename = 0;
979     while (*p && (pDest-filename) < len-1) {
980         /* There are two special cases that allow text to
981            be optionally included.  Those patterns are
982            @T...@ (only if multi-threaded) and
983            @W...@ (only if more than one MPI_COMM_WORLD)
984            UNIMPLEMENTED/UNTESTED */
985         if (*p == '@') {
986             /* Escaped @? */
987             if (p[1] == '@') {
988                 *pDest++ = *++p;
989                 continue;
990             }
991             /* If within an @...@, terminate it */
992             if (withinMworld) {
993                 withinMworld = 0;
994                 p++;
995             }
996             else if (withinMthread) {
997                 withinMthread = 0;
998                 p++;
999             }
1000             else {
1001                 /* Look for command */
1002                 p++;
1003                 if (*p == 'W') {
1004                     p++;
1005                     withinMworld = 1;
1006                 }
1007                 else if (*p == 'T') {
1008                     p++;
1009                     withinMthread = 1;
1010                 }
1011                 else {
1012                     /* Unrecognized char */
1013                     *pDest++ = *p++;
1014                 }
1015             }
1016         }
1017         else if ( (withinMworld && nWorld == 1) ||
1018                   (withinMthread && nThread == 1) ) {
1019             /* Simply skip this character since we're not showing
1020                this string */
1021             p++;
1022         }
1023         else if (*p == '%') {
1024             p++;
1025             if (*p == 'd') {
1026                 char rankAsChar[20];
1027                 MPIU_Snprintf( rankAsChar, sizeof(rankAsChar), "%d",
1028                                worldRank );
1029                 *pDest = 0;
1030                 MPIU_Strnapp( filename, rankAsChar, len );
1031                 pDest += strlen(rankAsChar);
1032             }
1033             else if (*p == 't') {
1034 #ifdef MPICH_IS_THREADED
1035                 char threadIDAsChar[30];
1036                 MPIU_Thread_id_t tid;
1037                 MPIU_Thread_self(&tid);
1038                 threadID = (unsigned long long int)tid;
1039 
1040                 MPIU_Snprintf( threadIDAsChar, sizeof(threadIDAsChar),
1041                                "%llx", threadID );
1042                 *pDest = 0;
1043                 MPIU_Strnapp( filename, threadIDAsChar, len );
1044                 pDest += strlen(threadIDAsChar);
1045 #else
1046                 *pDest++ = '0';
1047 #endif /* MPICH_IS_THREADED */
1048             }
1049             else if (*p == 'w') {
1050                 /* FIXME: Get world number */
1051                 /* *pDest++ = '0'; */
1052                 *pDest = 0;
1053                 MPIU_Strnapp( filename, worldNumAsChar, len );
1054                 pDest += strlen(worldNumAsChar);
1055             }
1056             else if (*p == 'p') {
1057                 /* Appends the pid of the proceess to the file name. */
1058                 char pidAsChar[20];
1059 #if defined(HAVE_GETPID)
1060                 pid_t pid = getpid();
1061 #else
1062                 int pid = -1;
1063 #endif /* HAVE_GETPID */
1064                 MPIU_Snprintf( pidAsChar, sizeof(pidAsChar), "%d", (int)pid );
1065                 *pDest = 0;
1066                 MPIU_Strnapp( filename, pidAsChar, len );
1067                 pDest += strlen(pidAsChar);
1068             }
1069             else {
1070                 *pDest++ = '%';
1071                 *pDest++ = *p;
1072             }
1073             p++;
1074         }
1075         else {
1076             *pDest++ = *p++;
1077         }
1078     }
1079     *pDest = 0;
1080 
1081     return 0;
1082 }
1083 
1084 /* This routine can make no MPI calls, since it may be logging those
1085    calls. */
MPIU_DBG_OpenFile(FILE ** dbg_fp)1086 static int MPIU_DBG_OpenFile(FILE **dbg_fp)
1087 {
1088     int mpi_errno = MPI_SUCCESS;
1089     if (!filePattern || *filePattern == 0 ||
1090 	strcmp(filePattern, "-stdout-" ) == 0) {
1091 	*dbg_fp = stdout;
1092     }
1093     else if (strcmp( filePattern, "-stderr-" ) == 0) {
1094 	*dbg_fp = stderr;
1095     }
1096     else {
1097 	char filename[MAXPATHLEN];
1098 
1099         /* if we're not at MPIU_DBG_INITIALIZED, we don't know our
1100            rank yet, so we create a temp file, to be renamed later */
1101         if (mpiu_dbg_initialized != MPIU_DBG_INITIALIZED)
1102         {
1103             mpi_errno = MPIU_DBG_Open_temp_file(dbg_fp);
1104             if (mpi_errno) goto fn_fail;
1105         }
1106         else
1107         {
1108             mpi_errno = MPIU_DBG_Get_filename(filename, MAXPATHLEN);
1109             if (mpi_errno) goto fn_fail;
1110 
1111             *dbg_fp = fopen( filename, "w" );
1112             if (!*dbg_fp) {
1113                 MPIU_Error_printf( "Could not open log file %s\n", filename );
1114                 if (mpi_errno) goto fn_fail;
1115             }
1116         }
1117     }
1118  fn_exit:
1119     return mpi_errno;
1120  fn_fail:
1121     mpiu_dbg_initialized = MPIU_DBG_ERROR;
1122     mpi_errno = MPI_ERR_INTERN;
1123     goto fn_exit;
1124 }
1125 
1126 /* Support routines for processing mpich-dbg values */
1127 /* Update the GLOBAL variable MPIU_DBG_ActiveClasses with
1128    the bits corresponding to this name */
setDBGClass(const char * s)1129 static int setDBGClass( const char *s )
1130 {
1131     int i;
1132     int slen = 0;
1133     int len = 0;
1134 
1135     if (s && *s) slen = strlen(s);
1136 
1137     while (s && *s) {
1138 	for (i=0; MPIU_Classnames[i].LCName; i++) {
1139 	    /* The LCLen and UCLen *should* be the same, but
1140 	       just in case, we separate them */
1141 	    int LClen = strlen(MPIU_Classnames[i].LCName);
1142 	    int UClen = strlen(MPIU_Classnames[i].UCName);
1143 	    int matchClass = 0;
1144 
1145 	    /* Allow the upper case and lower case in all cases */
1146 	    if (slen >= LClen &&
1147 		strncmp(s,MPIU_Classnames[i].LCName, LClen) == 0 &&
1148 		(s[LClen] == ',' || s[LClen] == 0) ) {
1149 		matchClass = 1;
1150 		len = LClen;
1151 	    }
1152 	    else if (slen >= UClen &&
1153 		strncmp(s,MPIU_Classnames[i].UCName, UClen) == 0 &&
1154 		(s[UClen] == ',' || s[UClen] == 0) ) {
1155 		matchClass = 1;
1156 		len = UClen;
1157 	    }
1158 	    if (matchClass) {
1159 		MPIU_DBG_ActiveClasses |= MPIU_Classnames[i].classbits;
1160 		s += len;
1161 		slen -= len;
1162 		if (*s == ',') { s++; slen--; }
1163 		/* If we found a name, we need to restart the for loop */
1164 		break;
1165 	    }
1166 	}
1167 	if (!MPIU_Classnames[i].LCName) {
1168 	    return 1;
1169 	}
1170     }
1171     return 0;
1172 }
1173 
1174 /* Set the global MPIU_DBG_MaxLevel if there is a match with the known level
1175    names
1176 */
SetDBGLevel(const char * s,const char * (names[]))1177 static int SetDBGLevel( const char *s, const char *(names[]) )
1178 {
1179     int i;
1180 
1181     for (i=0; names[i]; i++) {
1182 	if (strcmp( names[i], s ) == 0) {
1183 	    MPIU_DBG_MaxLevel = MPIU_Levelvalues[i];
1184 	    return 0;
1185 	}
1186     }
1187     return 1;
1188 }
1189 #endif /* USE_DBG_LOGGING */
1190