1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 /* style: allow:fprintf:4 sig:0 */
7 
8 #include "mpiimpl.h"
9 
10 /* stdarg is required to handle the variable argument lists for
11    MPIR_Err_create_code */
12 #include <stdarg.h>
13 /* Define USE_ERR_CODE_VALIST to get the prototype for the valist version
14    of MPIR_Err_create_code in mpir_err.h (without this definition,
15    the prototype is not included.  The "valist" version of the function
16    is used in only a few places, here and potentially in ROMIO) */
17 #define USE_ERR_CODE_VALIST
18 
19 /* errcodes.h contains the macros used to access fields within an error
20    code and a description of the bits in an error code.  A brief
21    version of that description is included below */
22 
23 #include "errcodes.h"
24 
25 /* defmsg is generated automatically from the source files and contains
26    all of the error messages, both the generic and specific.  Depending
27    on the value of MPICH_ERROR_MSG_LEVEL, different amounts of message
28    information will be included from the file defmsg.h */
29 #include "defmsg.h"
30 
31 /* stdio is needed for vsprintf and vsnprintf */
32 #include <stdio.h>
33 
34 /*
35 === BEGIN_MPI_T_CVAR_INFO_BLOCK ===
36 
37 categories:
38     - name        : ERROR_HANDLING
39       description : cvars that control error handling behavior (stack traces, aborts, etc)
40 
41 cvars:
42     - name        : MPIR_CVAR_PRINT_ERROR_STACK
43       category    : ERROR_HANDLING
44       type        : boolean
45       default     : true
46       class       : none
47       verbosity   : MPI_T_VERBOSITY_USER_BASIC
48       scope       : MPI_T_SCOPE_LOCAL
49       description : >-
50         If true, print an error stack trace at error handling time.
51 
52     - name        : MPIR_CVAR_CHOP_ERROR_STACK
53       category    : ERROR_HANDLING
54       type        : int
55       default     : 0
56       class       : none
57       verbosity   : MPI_T_VERBOSITY_USER_BASIC
58       scope       : MPI_T_SCOPE_LOCAL
59       description : >-
60         If >0, truncate error stack output lines this many characters
61         wide.  If 0, do not truncate, and if <0 use a sensible default.
62 
63 === END_MPI_T_CVAR_INFO_BLOCK ===
64 */
65 
66 /*
67  * Structure of this file
68  *
69  * This file contains several groups of routines user for error handling
70  * and reporting.
71  *
72  * The first group provides memory for the MPIR_Errhandler objects
73  * and the routines to free and manipulate them
74  *
75  * The second group provides routines to call the appropriate error handler,
76  * which may be predefined or user defined.  These also return the
77  * appropriate return code.  These routines have names of the form
78  * MPIR_Err_return_xxx.  Specifically, for each of the MPI types on which an
79  * error handler can be defined, there is an MPIR_Err_return_xxx routine
80  * that determines what error handler function to call and whether to
81  * abort the program.  The comm and win versions are here; ROMIO
82  * provides its own routines for invoking the error handlers for Files.
83  *
84  * The third group of code handles the error messages.  There are four
85  * options, controlled by the value of MPICH_ERROR_MSG_LEVEL.
86  *
87  * MPICH_ERROR_MSG__NONE - No text messages at all
88  * MPICH_ERROR_MSG__CLASS - Only messages for the MPI error classes
89  * MPICH_ERROR_MSG__GENERIC - Only predefiend messages for the MPI error codes
90  * MPICH_ERROR_MSG__ALL - Instance specific error messages (and error message
91  *                       stack)
92  *
93  * In only the latter (MPICH_ERROR_MSG__ALL) case are instance-specific
94  * messages maintained (including the error message "stack" that you may
95  * see mentioned in various places.  In the other cases, an error code
96  * identifies a fixed message string (unless MPICH_ERROR_MSG__NONE,
97  * when there are no strings) from the "generic" strings defined in defmsg.h
98  *
99  * A major subgroup in this section is the code to handle the instance-specific
100  * messages (MPICH_ERROR_MSG__ALL only).
101  *
102  * An MPI error code is made up of a number of fields (see errcodes.h)
103  * These ar
104  *   is-dynamic? specific-msg-sequence# specific-msg-index
105  *                                            generic-code is-fatal? class
106  *
107  * There are macros (defined in errcodes.h) that define these fields,
108  * their sizes, and masks and shifts that may be used to extract them.
109  */
110 
111 static int did_err_init = FALSE;        /* helps us solve a bootstrapping problem */
112 
113 /* A few prototypes.  These routines are called from the MPIR_Err_return
114    routines.  checkValidErrcode depends on the MPICH_ERROR_MSG_LEVEL.
115    If the error code is *not* valid, checkValidErrcode may replace it
116    with a valid value.  */
117 
118 static int checkValidErrcode(int, const char[], int *);
119 
120 #if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
121 static int ErrGetInstanceString(int, char[], int);
122 static void MPIR_Err_stack_init(void);
123 static int checkForUserErrcode(int);
124 #else
125 /* We only need special handling for user error codes when we support the
126    error message stack */
127 #define checkForUserErrcode(_a) _a
128 #endif /* ERROR_MSG_LEVEL >= ERROR_MSG_ALL */
129 
130 
131 /* ------------------------------------------------------------------------- */
132 /* Provide the MPIR_Errhandler space and the routines to free and set them
133    from C++ and Fortran */
134 /* ------------------------------------------------------------------------- */
135 /*
136  * Error handlers.  These are handled just like the other opaque objects
137  * in MPICH
138  */
139 
140 #ifndef MPIR_ERRHANDLER_PREALLOC
141 #define MPIR_ERRHANDLER_PREALLOC 8
142 #endif
143 
144 /* Preallocated errorhandler objects */
145 MPIR_Errhandler MPIR_Errhandler_builtin[MPIR_ERRHANDLER_N_BUILTIN];
146 MPIR_Errhandler MPIR_Errhandler_direct[MPIR_ERRHANDLER_PREALLOC];
147 
148 MPIR_Object_alloc_t MPIR_Errhandler_mem = { 0, 0, 0, 0, MPIR_ERRHANDLER,
149     sizeof(MPIR_Errhandler),
150     MPIR_Errhandler_direct,
151     MPIR_ERRHANDLER_PREALLOC,
152     NULL
153 };
154 
MPIR_Errhandler_free(MPIR_Errhandler * errhan_ptr)155 void MPIR_Errhandler_free(MPIR_Errhandler * errhan_ptr)
156 {
157     MPIR_Handle_obj_free(&MPIR_Errhandler_mem, errhan_ptr);
158 }
159 
MPIR_Err_init(void)160 void MPIR_Err_init(void)
161 {
162     /* these are "stub" objects, so the other fields (which are statically
163      * initialized to zero) don't really matter */
164     MPIR_Errhandler_builtin[0].handle = MPI_ERRORS_ARE_FATAL;
165     MPIR_Errhandler_builtin[1].handle = MPI_ERRORS_RETURN;
166     MPIR_Errhandler_builtin[2].handle = MPIR_ERRORS_THROW_EXCEPTIONS;
167 
168 #if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
169     MPIR_Err_stack_init();
170 #endif
171     did_err_init = TRUE;
172 }
173 
174 /* Language Callbacks */
175 
176 #ifdef HAVE_CXX_BINDING
177 /* This routine is used to install a callback used by the C++ binding
178  to invoke the (C++) error handler.  The callback routine is a C routine,
179  defined in the C++ binding. */
MPII_Errhandler_set_cxx(MPI_Errhandler errhand,void (* errcall)(void))180 void MPII_Errhandler_set_cxx(MPI_Errhandler errhand, void (*errcall) (void))
181 {
182     MPIR_Errhandler *errhand_ptr;
183 
184     MPIR_Errhandler_get_ptr(errhand, errhand_ptr);
185     errhand_ptr->language = MPIR_LANG__CXX;
186     MPIR_Process.cxx_call_errfn = (void (*)(int, int *, int *, void (*)(void))) errcall;
187 }
188 #endif /* HAVE_CXX_BINDING */
189 
190 #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT)
MPII_Errhandler_set_fc(MPI_Errhandler errhand)191 void MPII_Errhandler_set_fc(MPI_Errhandler errhand)
192 {
193     MPIR_Errhandler *errhand_ptr;
194 
195     MPIR_Errhandler_get_ptr(errhand, errhand_ptr);
196     errhand_ptr->language = MPIR_LANG__FORTRAN;
197 }
198 
199 #endif
200 
201 /* ------------------------------------------------------------------------- */
202 /* Group 2: These routines are called on error exit from most
203    top-level MPI routines to invoke the appropriate error handler.
204    Also included is the routine to call if MPI has not been
205    initialized (MPIR_Err_preinit) and to determine if an error code
206    represents a fatal error (MPIR_Err_is_fatal). */
207 /* ------------------------------------------------------------------------- */
208 /* Special error handler to call if we are not yet initialized, or if we
209    have finalized */
210 /* --BEGIN ERROR HANDLING-- */
MPIR_Err_preOrPostInit(void)211 void MPIR_Err_preOrPostInit(void)
212 {
213     if (MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__PRE_INIT) {
214         MPL_error_printf("Attempting to use an MPI routine before initializing MPICH\n");
215     } else if (MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__POST_FINALIZED) {
216         MPL_error_printf("Attempting to use an MPI routine after finalizing MPICH\n");
217     } else {
218         MPL_error_printf
219             ("Internal Error: Unknown state of MPI (neither initialized nor finalized)\n");
220     }
221     exit(1);
222 }
223 
224 /* --END ERROR HANDLING-- */
225 
226 /* Return true if the error code indicates a fatal error */
MPIR_Err_is_fatal(int errcode)227 int MPIR_Err_is_fatal(int errcode)
228 {
229     return (errcode & ERROR_FATAL_MASK) ? TRUE : FALSE;
230 }
231 
232 /*
233  * This is the routine that is invoked by most MPI routines to
234  * report an error.  It is legitimate to pass NULL for comm_ptr in order to get
235  * the default (MPI_COMM_WORLD) error handling.
236  */
MPIR_Err_return_comm(MPIR_Comm * comm_ptr,const char fcname[],int errcode)237 int MPIR_Err_return_comm(MPIR_Comm * comm_ptr, const char fcname[], int errcode)
238 {
239     const int error_class = ERROR_GET_CLASS(errcode);
240     MPIR_Errhandler *errhandler = NULL;
241 
242     checkValidErrcode(error_class, fcname, &errcode);
243 
244     /* --BEGIN ERROR HANDLING-- */
245     if (MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__PRE_INIT ||
246         MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__POST_FINALIZED) {
247         /* for whatever reason, we aren't initialized (perhaps error
248          * during MPI_Init) */
249         MPIR_Handle_fatal_error(MPIR_Process.comm_world, fcname, errcode);
250         return MPI_ERR_INTERN;
251     }
252     /* --END ERROR HANDLING-- */
253 
254     MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TERSE,
255                     (MPL_DBG_FDEST, "MPIR_Err_return_comm(comm_ptr=%p, fcname=%s, errcode=%d)",
256                      comm_ptr, fcname, errcode));
257 
258     if (comm_ptr) {
259         MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
260         errhandler = comm_ptr->errhandler;
261         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
262     }
263 
264     if (errhandler == NULL) {
265         /* Try to replace with the default handler, which is the one on
266          * MPI_COMM_WORLD.  This gives us correct behavior for the
267          * case where the error handler on MPI_COMM_WORLD has been changed. */
268         if (MPIR_Process.comm_world) {
269             comm_ptr = MPIR_Process.comm_world;
270         }
271     }
272 
273     /* --BEGIN ERROR HANDLING-- */
274     if (MPIR_Err_is_fatal(errcode) || comm_ptr == NULL) {
275         /* Calls MPID_Abort */
276         MPIR_Handle_fatal_error(comm_ptr, fcname, errcode);
277         /* never get here */
278     }
279     /* --END ERROR HANDLING-- */
280 
281     MPIR_Assert(comm_ptr != NULL);
282 
283     /* comm_ptr may have changed to comm_world.  Keep this locked as long as we
284      * are using the errhandler to prevent it from disappearing out from under
285      * us. */
286     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
287     errhandler = comm_ptr->errhandler;
288 
289     /* --BEGIN ERROR HANDLING-- */
290     if (errhandler == NULL || errhandler->handle == MPI_ERRORS_ARE_FATAL) {
291         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
292         /* Calls MPID_Abort */
293         MPIR_Handle_fatal_error(comm_ptr, fcname, errcode);
294         /* never get here */
295     }
296     /* --END ERROR HANDLING-- */
297 
298     /* Check for the special case of a user-provided error code */
299     errcode = checkForUserErrcode(errcode);
300 
301     if (errhandler->handle != MPI_ERRORS_RETURN &&
302         errhandler->handle != MPIR_ERRORS_THROW_EXCEPTIONS) {
303         /* We pass a final 0 (for a null pointer) to these routines
304          * because MPICH-1 expected that */
305         switch (comm_ptr->errhandler->language) {
306             case MPIR_LANG__C:
307                 (*comm_ptr->errhandler->errfn.C_Comm_Handler_function) (&comm_ptr->handle, &errcode,
308                                                                         0);
309                 break;
310 #ifdef HAVE_CXX_BINDING
311             case MPIR_LANG__CXX:
312                 (*MPIR_Process.cxx_call_errfn) (0, &comm_ptr->handle, &errcode,
313                                                 (void (*)(void)) *comm_ptr->errhandler->
314                                                 errfn.C_Comm_Handler_function);
315                 /* The C++ code throws an exception if the error handler
316                  * returns something other than MPI_SUCCESS. There is no "return"
317                  * of an error code. */
318                 errcode = MPI_SUCCESS;
319                 break;
320 #endif /* CXX_BINDING */
321 #ifdef HAVE_FORTRAN_BINDING
322             case MPIR_LANG__FORTRAN90:
323             case MPIR_LANG__FORTRAN:
324                 {
325                     /* If int and MPI_Fint aren't the same size, we need to
326                      * convert.  As this is not performance critical, we
327                      * do this even if MPI_Fint and int are the same size. */
328                     MPI_Fint ferr = errcode;
329                     MPI_Fint commhandle = comm_ptr->handle;
330                     (*comm_ptr->errhandler->errfn.F77_Handler_function) (&commhandle, &ferr);
331                 }
332                 break;
333 #endif /* FORTRAN_BINDING */
334         }
335 
336     }
337 
338     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
339     return errcode;
340 }
341 
342 /*
343  * MPI routines that detect errors on window objects use this to report errors
344  */
MPIR_Err_return_win(MPIR_Win * win_ptr,const char fcname[],int errcode)345 int MPIR_Err_return_win(MPIR_Win * win_ptr, const char fcname[], int errcode)
346 {
347     const int error_class = ERROR_GET_CLASS(errcode);
348 
349     if (win_ptr == NULL || win_ptr->errhandler == NULL)
350         return MPIR_Err_return_comm(NULL, fcname, errcode);
351 
352     /* We don't test for MPI initialized because to call this routine,
353      * we will have had to call an MPI routine that would make that test */
354 
355     checkValidErrcode(error_class, fcname, &errcode);
356 
357     MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TERSE,
358                     (MPL_DBG_FDEST, "MPIR_Err_return_win(win_ptr=%p, fcname=%s, errcode=%d)",
359                      win_ptr, fcname, errcode));
360 
361     /* --BEGIN ERROR HANDLING-- */
362     if (MPIR_Err_is_fatal(errcode) ||
363         win_ptr == NULL || win_ptr->errhandler == NULL ||
364         win_ptr->errhandler->handle == MPI_ERRORS_ARE_FATAL) {
365         /* Calls MPID_Abort */
366         MPIR_Handle_fatal_error(NULL, fcname, errcode);
367     }
368     /* --END ERROR HANDLING-- */
369 
370     /* Check for the special case of a user-provided error code */
371     errcode = checkForUserErrcode(errcode);
372 
373     if (win_ptr->errhandler->handle == MPI_ERRORS_RETURN ||
374         win_ptr->errhandler->handle == MPIR_ERRORS_THROW_EXCEPTIONS) {
375         return errcode;
376     } else {
377         /* Now, invoke the error handler for the window */
378 
379         /* We pass a final 0 (for a null pointer) to these routines
380          * because MPICH-1 expected that */
381         switch (win_ptr->errhandler->language) {
382             case MPIR_LANG__C:
383                 (*win_ptr->errhandler->errfn.C_Win_Handler_function) (&win_ptr->handle, &errcode,
384                                                                       0);
385                 break;
386 #ifdef HAVE_CXX_BINDING
387             case MPIR_LANG__CXX:
388                 (*MPIR_Process.cxx_call_errfn) (2, &win_ptr->handle, &errcode,
389                                                 (void (*)(void)) *win_ptr->errhandler->
390                                                 errfn.C_Win_Handler_function);
391                 /* The C++ code throws an exception if the error handler
392                  * returns something other than MPI_SUCCESS. There is no "return"
393                  * of an error code. */
394                 errcode = MPI_SUCCESS;
395                 break;
396 #endif /* CXX_BINDING */
397 #ifdef HAVE_FORTRAN_BINDING
398             case MPIR_LANG__FORTRAN90:
399             case MPIR_LANG__FORTRAN:
400                 {
401                     /* If int and MPI_Fint aren't the same size, we need to
402                      * convert.  As this is not performance critical, we
403                      * do this even if MPI_Fint and int are the same size. */
404                     MPI_Fint ferr = errcode;
405                     MPI_Fint winhandle = win_ptr->handle;
406                     (*win_ptr->errhandler->errfn.F77_Handler_function) (&winhandle, &ferr);
407                 }
408                 break;
409 #endif /* FORTRAN_BINDING */
410         }
411 
412     }
413     return errcode;
414 }
415 
416 /* ------------------------------------------------------------------------- */
417 /* Group 3: Routines to handle error messages.  These are organized into
418  * several subsections:
419  *  General service routines (used by more than one error reporting level)
420  *  Routines of specific error message levels
421  *
422  */
423 /* ------------------------------------------------------------------------- */
424 /* Forward reference */
425 static void CombineSpecificCodes(int, int, int);
426 static const char *get_class_msg(int);
427 
428 /* --BEGIN ERROR HANDLING-- */
MPIR_Handle_fatal_error(MPIR_Comm * comm_ptr,const char fcname[],int errcode)429 void MPIR_Handle_fatal_error(MPIR_Comm * comm_ptr, const char fcname[], int errcode)
430 {
431     /* Define length of the the maximum error message line (or string with
432      * newlines?).  This definition is used only within this routine.  */
433     /* Ensure that the error message string is sufficiently long to
434      * hold enough information about the error.  Use the size of the
435      * MPI error messages unless it is too short (defined as shown here) */
436 #if MPI_MAX_ERROR_STRING < 4096
437 #define MAX_ERRMSG_STRING 4096
438 #else
439 #define MAX_ERRMSG_STRING MPI_MAX_ERROR_STRING
440 #endif
441     char error_msg[MAX_ERRMSG_STRING];
442     int len;
443 
444     /* FIXME: Not internationalized.  Since we are using MPIR_Err_get_string,
445      * we are assuming that the code is still able to execute a full
446      * MPICH error code to message conversion. */
447     MPL_snprintf(error_msg, MAX_ERRMSG_STRING, "Fatal error in %s: ", fcname);
448     len = (int) strlen(error_msg);
449     MPIR_Err_get_string(errcode, &error_msg[len], MAX_ERRMSG_STRING - len, NULL);
450 
451     /* The third argument is a return code. We simply pass the error code. */
452     MPID_Abort(comm_ptr, MPI_SUCCESS, errcode, error_msg);
453 }
454 
455 /* --END ERROR HANDLING-- */
456 
457 /* Check for a valid error code.  If the code is not valid, attempt to
458    print out something sensible; reset the error code to have class
459    ERR_UNKNOWN */
460 /* FIXME: Now that error codes are chained, this does not produce a valid
461    error code since there is no valid ring index corresponding to this code */
462 /* FIXME: No one uses the return value */
checkValidErrcode(int error_class,const char fcname[],int * errcode_p)463 static int checkValidErrcode(int error_class, const char fcname[], int *errcode_p)
464 {
465     int errcode = *errcode_p;
466     int rc = 0;
467 
468     if (error_class > MPICH_ERR_LAST_MPIX) {
469         /* --BEGIN ERROR HANDLING-- */
470         if (errcode & ~ERROR_CLASS_MASK) {
471             MPL_error_printf
472                 ("INTERNAL ERROR: Invalid error class (%d) encountered while returning from\n"
473                  "%s.  Please file a bug report.\n", error_class, fcname);
474             /* Note that we don't try to print the error stack; if the
475              * error code is invalid, it can't be used to find
476              * the error stack.  We could consider dumping the
477              * contents of the error ring instead (without trying
478              * to interpret them) */
479         } else {
480             /* FIXME: The error stack comment only applies to MSG_ALL */
481             MPL_error_printf
482                 ("INTERNAL ERROR: Invalid error class (%d) encountered while returning from\n"
483                  "%s.  Please file a bug report.  No error stack is available.\n", error_class,
484                  fcname);
485         }
486         /* FIXME: We probably want to set this to MPI_ERR_UNKNOWN
487          * and discard the rest of the bits */
488         errcode = (errcode & ~ERROR_CLASS_MASK) | MPI_ERR_UNKNOWN;
489         rc = 1;
490         /* --END ERROR HANDLING-- */
491     }
492     *errcode_p = errcode;
493     return rc;
494 }
495 
496 /* Append an error code, error2, to the end of a list of messages in the error
497    ring whose head endcoded in error1_code.  An error code pointing at the
498    combination is returned.  If the list of messages does not terminate cleanly
499    (i.e. ring wrap has occurred), then the append is not performed. and error1
500    is returned (although it may include the class of error2 if the class of
501    error1 was MPI_ERR_OTHER). */
MPIR_Err_combine_codes(int error1,int error2)502 int MPIR_Err_combine_codes(int error1, int error2)
503 {
504     int error1_code = error1;
505     int error2_code = error2;
506     int error2_class;
507 
508     /* If either error code is success, return the other */
509     if (error1_code == MPI_SUCCESS)
510         return error2_code;
511     if (error2_code == MPI_SUCCESS)
512         return error1_code;
513 
514     /* If an error code is dynamic, return that.  If both are, we choose
515      * error1. */
516     if (error1_code & ERROR_DYN_MASK)
517         return error1_code;
518     if (error2_code & ERROR_DYN_MASK)
519         return error2_code;
520 
521     error2_class = MPIR_ERR_GET_CLASS(error2_code);
522     if (error2_class < MPI_SUCCESS || error2_class > MPICH_ERR_LAST_MPIX) {
523         error2_class = MPI_ERR_OTHER;
524     }
525 
526     /* Note that this call may simply discard an error code if the error
527      * message level does not support multiple codes */
528     CombineSpecificCodes(error1_code, error2_code, error2_class);
529 
530     if (MPIR_ERR_GET_CLASS(error1_code) == MPI_ERR_OTHER) {
531         error1_code = (error1_code & ~(ERROR_CLASS_MASK)) | error2_class;
532     }
533 
534     return error1_code;
535 }
536 
537 /* FIXME: This routine isn't quite right yet */
538 /*
539  * Notes:
540  * One complication is that in the instance-specific case, a ??
541  */
542 /*
543  * Given an errorcode, place the corresponding message in msg[length].
544  * The argument fn must be NULL and is otherwise ignored.
545  */
MPIR_Err_get_string(int errorcode,char * msg,int length,MPIR_Err_get_class_string_func_t fn)546 void MPIR_Err_get_string(int errorcode, char *msg, int length, MPIR_Err_get_class_string_func_t fn)
547 {
548     int error_class;
549     int len, num_remaining = length;
550 
551     /* The fn (fourth) argument was added improperly and is no longer
552      * used. */
553     MPIR_Assert(fn == NULL);
554 
555     /* There was code to set num_remaining to MPI_MAX_ERROR_STRING
556      * if it was zero.  But based on the usage of this routine,
557      * such a choice would overwrite memory. (This was caught by
558      * reading the coverage reports and looking into why this
559      * code was (thankfully!) never executed.) */
560     /* if (num_remaining == 0)
561      * num_remaining = MPI_MAX_ERROR_STRING; */
562     if (num_remaining == 0)
563         goto fn_exit;
564 
565     /* Convert the code to a string.  The cases are:
566      * simple class.  Find the corresponding string.
567      * <not done>
568      * if (user code) { go to code that extracts user error messages }
569      * else {
570      * is specific message code set and available?  if so, use it
571      * else use generic code (lookup index in table of messages)
572      * }
573      */
574     if (errorcode & ERROR_DYN_MASK) {
575         /* This is a dynamically created error code (e.g., with
576          * MPI_Err_add_class).  If a dynamic error code was created,
577          * the function to convert them into strings has been set.
578          * Check to see that it was; this is a safeguard against a
579          * bogus error code */
580         if (!MPIR_Process.errcode_to_string) {
581             /* FIXME: not internationalized */
582             /* --BEGIN ERROR HANDLING-- */
583             if (MPL_strncpy(msg, "Undefined dynamic error code", num_remaining)) {
584                 msg[num_remaining - 1] = '\0';
585             }
586             /* --END ERROR HANDLING-- */
587         } else {
588             if (MPL_strncpy(msg, MPIR_Process.errcode_to_string(errorcode), num_remaining)) {
589                 msg[num_remaining - 1] = '\0';
590             }
591         }
592     } else if ((errorcode & ERROR_CLASS_MASK) == errorcode) {
593         error_class = MPIR_ERR_GET_CLASS(errorcode);
594 
595         if (MPL_strncpy(msg, get_class_msg(errorcode), num_remaining)) {
596             msg[num_remaining - 1] = '\0';
597         }
598     } else {
599         /* print the class message first */
600         /* FIXME: Why print the class message first? The instance
601          * message is supposed to be complete by itself. */
602         error_class = MPIR_ERR_GET_CLASS(errorcode);
603 
604         MPL_strncpy(msg, get_class_msg(error_class), num_remaining);
605         msg[num_remaining - 1] = '\0';
606         len = (int) strlen(msg);
607         msg += len;
608         num_remaining -= len;
609 
610         /* then print the stack or the last specific error message */
611 
612         /* FIXME: Replace with function to add instance string or
613          * error code string */
614 #if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
615         if (ErrGetInstanceString(errorcode, msg, num_remaining))
616             goto fn_exit;
617 #elif MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS
618         {
619             int generic_idx;
620 
621             generic_idx = ((errorcode & ERROR_GENERIC_MASK) >> ERROR_GENERIC_SHIFT) - 1;
622 
623             if (generic_idx >= 0) {
624                 MPL_snprintf(msg, num_remaining, ", %s", generic_err_msgs[generic_idx].long_name);
625                 msg[num_remaining - 1] = '\0';
626                 goto fn_exit;
627             }
628         }
629 #endif /* MSG_LEVEL >= MSG_ALL */
630     }
631 
632   fn_exit:
633     return;
634 }
635 
636 #if MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__NONE
637 /* No error message support */
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)638 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
639                          int line, int error_class, const char generic_msg[],
640                          const char specific_msg[], ...)
641 {
642     MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
643                     (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
644                      fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
645     return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
646 }
647 
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)648 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
649                                 int line, int error_class,
650                                 const char generic_msg[], const char specific_msg[], va_list Argp)
651 {
652     return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
653 }
654 
655 /* Internal routines */
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)656 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
657 {
658 }
659 
get_class_msg(int error_class)660 static const char *get_class_msg(int error_class)
661 {
662     /* FIXME: Not internationalized */
663     return "Error message texts are not available";
664 }
665 
666 #elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__CLASS
667 /* Only class error messages.  Note this is nearly the same as
668    MPICH_ERROR_MSG_LEVEL == NONE, since the handling of error codes
669    is the same */
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)670 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
671                          int line, int error_class, const char generic_msg[],
672                          const char specific_msg[], ...)
673 {
674     MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
675                     (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
676                      fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
677     return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
678 }
679 
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)680 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
681                                 int line, int error_class,
682                                 const char generic_msg[], const char specific_msg[], va_list Argp)
683 {
684     return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
685 }
686 
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)687 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
688 {
689 }
690 
get_class_msg(int error_class)691 static const char *get_class_msg(int error_class)
692 {
693     if (error_class >= 0 && error_class < MPIR_MAX_ERROR_CLASS_INDEX) {
694         return classToMsg[error_class];
695     } else {
696         /* --BEGIN ERROR HANDLING-- */
697         return "Unknown error class";
698         /* --END ERROR HANDLING-- */
699     }
700 }
701 
702 #elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__GENERIC
703 #define NEEDS_FIND_GENERIC_MSG_INDEX
704 static int FindGenericMsgIndex(const char[]);
705 
706 /* Only generic error messages (more than class, but all predefined) */
707 
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)708 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
709                          int line, int error_class, const char generic_msg[],
710                          const char specific_msg[], ...)
711 {
712     int rc;
713     va_list Argp;
714     va_start(Argp, specific_msg);
715     MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
716                     (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
717                      fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
718     rc = MPIR_Err_create_code_valist(lastcode, fatal, fcname, line, error_class, generic_msg,
719                                      specific_msg, Argp);
720     va_end(Argp);
721     /* Looks like Coverity has a hard time understanding that logic that
722      * (error_class != MPI_SUCCESS => rc != MPI_SUCCESS), so adding an explicit assertion here. */
723     MPIR_Assert(error_class == MPI_SUCCESS || rc != MPI_SUCCESS);
724     return rc;
725 }
726 
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)727 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
728                                 int line, int error_class,
729                                 const char generic_msg[], const char specific_msg[], va_list Argp)
730 {
731     int generic_idx;
732     int errcode = lastcode;
733     if (lastcode == MPI_SUCCESS) {
734         generic_idx = FindGenericMsgIndex(generic_msg);
735         if (generic_idx >= 0) {
736             errcode = (generic_idx << ERROR_GENERIC_SHIFT) | error_class;
737             if (fatal)
738                 errcode |= ERROR_FATAL_MASK;
739         }
740     }
741     return errcode;
742 }
743 
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)744 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
745 {
746 }
747 
get_class_msg(int error_class)748 static const char *get_class_msg(int error_class)
749 {
750     if (error_class >= 0 && error_class < MPIR_MAX_ERROR_CLASS_INDEX) {
751         return generic_err_msgs[class_to_index[error_class]].long_name;
752     } else {
753         /* --BEGIN ERROR HANDLING-- */
754         return "Unknown error class";
755         /* --END ERROR HANDLING-- */
756     }
757 }
758 
759 #elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__ALL
760 /* General error message support, including the error message stack */
761 
762 static int checkErrcodeIsValid(int);
763 static const char *ErrcodeInvalidReasonStr(int);
764 #define NEEDS_FIND_GENERIC_MSG_INDEX
765 static int FindGenericMsgIndex(const char[]);
766 static int FindSpecificMsgIndex(const char[]);
767 static int vsnprintf_mpi(char *str, size_t maxlen, const char *fmt_orig, va_list list);
768 static void ErrcodeCreateID(int error_class, int generic_idx, const char *msg, int *id, int *seq);
769 static int convertErrcodeToIndexes(int errcode, int *ring_idx, int *ring_id, int *generic_idx);
770 static void MPIR_Err_print_stack_string(int errcode, char *str, int maxlen);
771 
772 #define MAX_ERROR_RING ERROR_SPECIFIC_INDEX_SIZE
773 #define MAX_LOCATION_LEN 63
774 
775 /* The maximum error string in this case may be a multi-line message,
776    constructed from multiple entries in the error message ring.  The
777    individual ring messages should be shorter than MPI_MAX_ERROR_STRING,
778    perhaps as small as 256. We define a separate value for the error lines.
779  */
780 #define MPIR_MAX_ERROR_LINE 256
781 
782 /* See the description above for the fields in this structure */
783 typedef struct MPIR_Err_msg {
784     int id;
785     int prev_error;
786     int use_user_error_code;
787     int user_error_code;
788 
789     char location[MAX_LOCATION_LEN + 1];
790     char msg[MPIR_MAX_ERROR_LINE + 1];
791 } MPIR_Err_msg_t;
792 
793 static MPIR_Err_msg_t ErrorRing[MAX_ERROR_RING];
794 static volatile unsigned int error_ring_loc = 0;
795 static volatile unsigned int max_error_ring_loc = 0;
796 
797 /* FIXME: This needs to be made consistent with the different thread levels,
798    since in the "global" thread level, an extra thread mutex is not required. */
799 #if defined(MPID_REQUIRES_THREAD_SAFETY)
800 /* if the device requires internal MPICH routines to be thread safe, the
801    MPID_THREAD_CHECK macros are not appropriate */
802 static MPID_Thread_mutex_t error_ring_mutex;
803 #define error_ring_mutex_create(_mpi_errno_p_)                  \
804     MPID_Thread_mutex_create(&error_ring_mutex, _mpi_errno_p_)
805 #define error_ring_mutex_destroy(_mpi_errno_p)                  \
806     MPID_Thread_mutex_destroy(&error_ring_mutex, _mpi_errno_p_)
807 #define error_ring_mutex_lock()                                 \
808     do {                                                        \
809         int err;                                                \
810         MPID_Thread_mutex_lock(&error_ring_mutex, &err);        \
811     } while (0)
812 #define error_ring_mutex_unlock()                               \
813     do {                                                        \
814         int err;                                                \
815         MPID_Thread_mutex_unlock(&error_ring_mutex, &err);      \
816     } while (0)
817 #elif defined(MPICH_IS_THREADED)
818 static MPID_Thread_mutex_t error_ring_mutex;
819 #define error_ring_mutex_create(_mpi_errno_p) MPID_Thread_mutex_create(&error_ring_mutex,_mpi_errno_p)
820 #define error_ring_mutex_destroy(_mpi_errno_p) MPID_Thread_mutex_destroy(&error_ring_mutex,_mpi_errno_p)
821 #define error_ring_mutex_lock()                                 \
822     do {                                                        \
823         int err;                                                \
824         if (did_err_init) {                                     \
825             MPIR_THREAD_CHECK_BEGIN;                            \
826             MPID_Thread_mutex_lock(&error_ring_mutex,&err);     \
827             MPIR_THREAD_CHECK_END;                              \
828         }                                                       \
829     } while (0)
830 #define error_ring_mutex_unlock()                               \
831     do {                                                        \
832         int err;                                                \
833         if (did_err_init) {                                     \
834             MPIR_THREAD_CHECK_BEGIN;                            \
835             MPID_Thread_mutex_unlock(&error_ring_mutex,&err);   \
836             MPIR_THREAD_CHECK_END;                              \
837         }                                                       \
838     } while (0)
839 #else
840 #define error_ring_mutex_create(_a)
841 #define error_ring_mutex_destroy(_a)
842 #define error_ring_mutex_lock()
843 #define error_ring_mutex_unlock()
844 #endif /* REQUIRES_THREAD_SAFETY */
845 
846 
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)847 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
848                          int line, int error_class, const char generic_msg[],
849                          const char specific_msg[], ...)
850 {
851     int rc;
852     va_list Argp;
853     va_start(Argp, specific_msg);
854     MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
855                     (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
856                      fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
857     rc = MPIR_Err_create_code_valist(lastcode, fatal, fcname, line, error_class, generic_msg,
858                                      specific_msg, Argp);
859     va_end(Argp);
860     return rc;
861 }
862 
863 /*
864  * This is the real routine for generating an error code.  It takes
865  * a va_list so that it can be called by any routine that accepts a
866  * variable number of arguments.
867  */
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)868 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
869                                 int line, int error_class,
870                                 const char generic_msg[], const char specific_msg[], va_list Argp)
871 {
872     int err_code;
873     int generic_idx;
874     int use_user_error_code = 0;
875     int user_error_code = -1;
876     char user_ring_msg[MPIR_MAX_ERROR_LINE + 1];
877 
878     /* Create the code from the class and the message ring index */
879 
880     /* Check that lastcode is valid */
881     if (lastcode != MPI_SUCCESS) {
882         int reason;
883         reason = checkErrcodeIsValid(lastcode);
884         if (reason) {
885             /* --BEGIN ERROR HANDLING-- */
886             MPL_error_printf("INTERNAL ERROR: invalid error code %x (%s) in %s:%d\n",
887                              lastcode, ErrcodeInvalidReasonStr(reason), fcname, line);
888             lastcode = MPI_SUCCESS;
889             /* --END ERROR HANDLING-- */
890         }
891     }
892 
893     /* FIXME: ERR_OTHER is overloaded; this may mean "OTHER" or it may
894      * mean "No additional error, just routine stack info" */
895     if (error_class == MPI_ERR_OTHER) {
896         if (MPIR_ERR_GET_CLASS(lastcode) > MPI_SUCCESS &&
897             MPIR_ERR_GET_CLASS(lastcode) <= MPICH_ERR_LAST_MPIX) {
898             /* If the last class is more specific (and is valid), then pass it
899              * through */
900             error_class = MPIR_ERR_GET_CLASS(lastcode);
901         } else {
902             error_class = MPI_ERR_OTHER;
903         }
904     }
905 
906     /* Handle special case of MPI_ERR_IN_STATUS.  According to the standard,
907      * the code must be equal to the class. See section 3.7.5.
908      * Information on the particular error is in the MPI_ERROR field
909      * of the status. */
910     if (error_class == MPI_ERR_IN_STATUS) {
911         return MPI_ERR_IN_STATUS;
912     }
913 
914     err_code = error_class;
915 
916     /* Handle the generic message.  This selects a subclass, based on a text
917      * string */
918     generic_idx = FindGenericMsgIndex(generic_msg);
919     if (generic_idx >= 0) {
920         if (strcmp(generic_err_msgs[generic_idx].short_name, "**user") == 0) {
921             use_user_error_code = 1;
922             /* This is a special case.  The format is
923              * "**user", "**userxxx %d", intval
924              * (generic, specific, parameter).  In this
925              * case we must ... save the user value because
926              * we store it explicitly in the ring.
927              * We do this here because we cannot both access the
928              * user error code and pass the argp to vsnprintf_mpi . */
929             if (specific_msg) {
930                 const char *specific_fmt;
931                 int specific_idx;
932                 user_error_code = va_arg(Argp, int);
933                 specific_idx = FindSpecificMsgIndex(specific_msg);
934                 if (specific_idx >= 0) {
935                     specific_fmt = specific_err_msgs[specific_idx].long_name;
936                 } else {
937                     specific_fmt = specific_msg;
938                 }
939                 MPL_snprintf(user_ring_msg, sizeof(user_ring_msg), specific_fmt, user_error_code);
940             } else {
941                 user_ring_msg[0] = 0;
942             }
943         }
944         err_code |= (generic_idx + 1) << ERROR_GENERIC_SHIFT;
945     } else {
946         /* TODO: lookup index for class error message */
947         err_code &= ~ERROR_GENERIC_MASK;
948 
949 #ifdef MPICH_DBG_OUTPUT
950         {
951             if (generic_msg[0] == '*' && generic_msg[1] == '*') {
952                 MPL_error_printf("INTERNAL ERROR: Could not find %s in list of messages\n",
953                                  generic_msg);
954             }
955         }
956 #endif /* DBG_OUTPUT */
957     }
958 
959     /* Handle the instance-specific part of the error message */
960     {
961         int specific_idx;
962         const char *specific_fmt = 0;
963         int ring_idx, ring_seq = 0;
964         char *ring_msg;
965 
966         error_ring_mutex_lock();
967         {
968             /* Get the next entry in the ring; keep track of what part of the
969              * ring is in use (max_error_ring_loc) */
970             ring_idx = error_ring_loc++;
971             if (error_ring_loc >= MAX_ERROR_RING)
972                 error_ring_loc %= MAX_ERROR_RING;
973             if (error_ring_loc > max_error_ring_loc)
974                 max_error_ring_loc = error_ring_loc;
975 
976             ring_msg = ErrorRing[ring_idx].msg;
977 
978             if (specific_msg != NULL) {
979                 specific_idx = FindSpecificMsgIndex(specific_msg);
980                 if (specific_idx >= 0) {
981                     specific_fmt = specific_err_msgs[specific_idx].long_name;
982                 } else {
983                     specific_fmt = specific_msg;
984                 }
985                 /* See the code above for handling user errors */
986                 if (!use_user_error_code) {
987                     vsnprintf_mpi(ring_msg, MPIR_MAX_ERROR_LINE, specific_fmt, Argp);
988                 } else {
989                     MPL_strncpy(ring_msg, user_ring_msg, MPIR_MAX_ERROR_LINE);
990                 }
991             } else if (generic_idx >= 0) {
992                 MPL_strncpy(ring_msg, generic_err_msgs[generic_idx].long_name, MPIR_MAX_ERROR_LINE);
993             } else {
994                 MPL_strncpy(ring_msg, generic_msg, MPIR_MAX_ERROR_LINE);
995             }
996 
997             ring_msg[MPIR_MAX_ERROR_LINE] = '\0';
998 
999             /* Get the ring sequence number and set the ring id */
1000             ErrcodeCreateID(error_class, generic_idx, ring_msg, &ErrorRing[ring_idx].id, &ring_seq);
1001             /* Set the previous code. */
1002             ErrorRing[ring_idx].prev_error = lastcode;
1003 
1004             /* */
1005             if (use_user_error_code) {
1006                 ErrorRing[ring_idx].use_user_error_code = 1;
1007                 ErrorRing[ring_idx].user_error_code = user_error_code;
1008             } else if (lastcode != MPI_SUCCESS) {
1009                 int last_ring_idx;
1010                 int last_ring_id;
1011                 int last_generic_idx;
1012 
1013                 if (convertErrcodeToIndexes(lastcode, &last_ring_idx,
1014                                             &last_ring_id, &last_generic_idx) != 0) {
1015                     /* --BEGIN ERROR HANDLING-- */
1016                     MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1017                                      lastcode, last_ring_idx);
1018                     /* --END ERROR HANDLING-- */
1019                 } else {
1020                     if (last_generic_idx >= 0 && ErrorRing[last_ring_idx].id == last_ring_id) {
1021                         if (ErrorRing[last_ring_idx].use_user_error_code) {
1022                             ErrorRing[ring_idx].use_user_error_code = 1;
1023                             ErrorRing[ring_idx].user_error_code =
1024                                 ErrorRing[last_ring_idx].user_error_code;
1025                         }
1026                     }
1027                 }
1028             }
1029 
1030             if (fcname != NULL) {
1031                 MPL_snprintf(ErrorRing[ring_idx].location, MAX_LOCATION_LEN, "%s(%d)", fcname,
1032                              line);
1033                 ErrorRing[ring_idx].location[MAX_LOCATION_LEN] = '\0';
1034             } else {
1035                 ErrorRing[ring_idx].location[0] = '\0';
1036             }
1037             {
1038                 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1039                                 (MPL_DBG_FDEST, "New ErrorRing[%d]", ring_idx));
1040                 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1041                                 (MPL_DBG_FDEST, "    id         = %#010x", ErrorRing[ring_idx].id));
1042                 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1043                                 (MPL_DBG_FDEST, "    prev_error = %#010x",
1044                                  ErrorRing[ring_idx].prev_error));
1045                 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1046                                 (MPL_DBG_FDEST, "    user=%d",
1047                                  ErrorRing[ring_idx].use_user_error_code));
1048             }
1049         }
1050         error_ring_mutex_unlock();
1051 
1052         err_code |= ring_idx << ERROR_SPECIFIC_INDEX_SHIFT;
1053         err_code |= ring_seq << ERROR_SPECIFIC_SEQ_SHIFT;
1054 
1055     }
1056 
1057     if (fatal || MPIR_Err_is_fatal(lastcode)) {
1058         err_code |= ERROR_FATAL_MASK;
1059     }
1060 
1061     return err_code;
1062 }
1063 
1064 /* FIXME: Shouldn't str be const char * ? - no, but you don't know that without
1065    some documentation */
MPIR_Err_print_stack_string(int errcode,char * str,int maxlen)1066 static void MPIR_Err_print_stack_string(int errcode, char *str, int maxlen)
1067 {
1068     char *str_orig = str;
1069     int len;
1070 
1071     error_ring_mutex_lock();
1072     {
1073         /* Find the longest fcname in the stack */
1074         int max_location_len = 0;
1075         int tmp_errcode = errcode;
1076         while (tmp_errcode != MPI_SUCCESS) {
1077             int ring_idx;
1078             int ring_id;
1079             int generic_idx;
1080 
1081             if (convertErrcodeToIndexes(tmp_errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1082                 /* --BEGIN ERROR HANDLING-- */
1083                 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1084                                  errcode, ring_idx);
1085                 break;
1086                 /* --END ERROR HANDLING-- */
1087             }
1088 
1089             if (generic_idx < 0) {
1090                 break;
1091             }
1092 
1093             if (ErrorRing[ring_idx].id == ring_id) {
1094                 len = (int) strlen(ErrorRing[ring_idx].location);
1095                 max_location_len = MPL_MAX(max_location_len, len);
1096                 tmp_errcode = ErrorRing[ring_idx].prev_error;
1097             } else {
1098                 break;
1099             }
1100         }
1101         max_location_len += 2;  /* add space for the ": " */
1102         /* print the error stack */
1103         while (errcode != MPI_SUCCESS) {
1104             int ring_idx;
1105             int ring_id;
1106             int generic_idx;
1107             int i;
1108             char *cur_pos;
1109 
1110             if (convertErrcodeToIndexes(errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1111                 /* --BEGIN ERROR HANDLING-- */
1112                 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1113                                  errcode, ring_idx);
1114                 /* --END ERROR HANDLING-- */
1115             }
1116 
1117             if (generic_idx < 0) {
1118                 break;
1119             }
1120 
1121             if (ErrorRing[ring_idx].id == ring_id) {
1122                 int nchrs;
1123                 MPL_snprintf(str, maxlen, "%s", ErrorRing[ring_idx].location);
1124                 len = (int) strlen(str);
1125                 maxlen -= len;
1126                 str += len;
1127                 nchrs = max_location_len - (int) strlen(ErrorRing[ring_idx].location) - 2;
1128                 while (nchrs > 0 && maxlen > 0) {
1129                     *str++ = '.';
1130                     nchrs--;
1131                     maxlen--;
1132                 }
1133                 if (maxlen > 0) {
1134                     *str++ = ':';
1135                     maxlen--;
1136                 }
1137                 if (maxlen > 0) {
1138                     *str++ = ' ';
1139                     maxlen--;
1140                 }
1141 
1142                 if (MPIR_CVAR_CHOP_ERROR_STACK > 0) {
1143                     cur_pos = ErrorRing[ring_idx].msg;
1144                     len = (int) strlen(cur_pos);
1145                     if (len == 0 && maxlen > 0) {
1146                         *str++ = '\n';
1147                         maxlen--;
1148                     }
1149                     while (len) {
1150                         if (len >= MPIR_CVAR_CHOP_ERROR_STACK - max_location_len) {
1151                             if (len > maxlen)
1152                                 break;
1153                             /* FIXME: Don't use Snprint to append a string ! */
1154                             MPL_snprintf(str, MPIR_CVAR_CHOP_ERROR_STACK - 1 - max_location_len,
1155                                          "%s", cur_pos);
1156                             str[MPIR_CVAR_CHOP_ERROR_STACK - 1 - max_location_len] = '\n';
1157                             cur_pos += MPIR_CVAR_CHOP_ERROR_STACK - 1 - max_location_len;
1158                             str += MPIR_CVAR_CHOP_ERROR_STACK - max_location_len;
1159                             maxlen -= MPIR_CVAR_CHOP_ERROR_STACK - max_location_len;
1160                             if (maxlen < max_location_len)
1161                                 break;
1162                             for (i = 0; i < max_location_len; i++) {
1163                                 MPL_snprintf(str, maxlen, " ");
1164                                 maxlen--;
1165                                 str++;
1166                             }
1167                             len = (int) strlen(cur_pos);
1168                         } else {
1169                             MPL_snprintf(str, maxlen, "%s\n", cur_pos);
1170                             len = (int) strlen(str);
1171                             maxlen -= len;
1172                             str += len;
1173                             len = 0;
1174                         }
1175                     }
1176                 } else {
1177                     MPL_snprintf(str, maxlen, "%s\n", ErrorRing[ring_idx].msg);
1178                     len = (int) strlen(str);
1179                     maxlen -= len;
1180                     str += len;
1181                 }
1182                 errcode = ErrorRing[ring_idx].prev_error;
1183             } else {
1184                 break;
1185             }
1186         }
1187     }
1188     error_ring_mutex_unlock();
1189 
1190     if (errcode == MPI_SUCCESS) {
1191         goto fn_exit;
1192     }
1193 
1194     /* FIXME: The following code is broken as described above (if the errcode
1195      * is not valid, then this code is just going to cause more problems) */
1196     {
1197         int generic_idx;
1198 
1199         generic_idx = ((errcode & ERROR_GENERIC_MASK) >> ERROR_GENERIC_SHIFT) - 1;
1200 
1201         if (generic_idx >= 0) {
1202             const char *p;
1203             /* FIXME: (Here and elsewhere)  Make sure any string is
1204              * non-null before you use it */
1205             p = generic_err_msgs[generic_idx].long_name;
1206             if (!p) {
1207                 p = "<NULL>";
1208             }
1209             MPL_snprintf(str, maxlen, "(unknown)(): %s\n", p);
1210             len = (int) strlen(str);
1211             maxlen -= len;
1212             str += len;
1213             goto fn_exit;
1214         }
1215     }
1216 
1217     {
1218         int error_class;
1219 
1220         error_class = ERROR_GET_CLASS(errcode);
1221 
1222         if (error_class <= MPICH_ERR_LAST_MPIX) {
1223             MPL_snprintf(str, maxlen, "(unknown)(): %s\n", get_class_msg(ERROR_GET_CLASS(errcode)));
1224             len = (int) strlen(str);
1225             maxlen -= len;
1226             str += len;
1227         } else {
1228             /* FIXME: Not internationalized */
1229             MPL_snprintf(str, maxlen, "Error code contains an invalid class (%d)\n", error_class);
1230             len = (int) strlen(str);
1231             maxlen -= len;
1232             str += len;
1233         }
1234     }
1235 
1236   fn_exit:
1237     if (str_orig != str) {
1238         str--;
1239         *str = '\0';
1240     }
1241     return;
1242 }
1243 
1244 
1245 /* Internal Routines */
1246 
get_class_msg(int error_class)1247 static const char *get_class_msg(int error_class)
1248 {
1249     if (error_class >= 0 && error_class < MPIR_MAX_ERROR_CLASS_INDEX) {
1250         return generic_err_msgs[class_to_index[error_class]].long_name;
1251     } else {
1252         /* --BEGIN ERROR HANDLING-- */
1253         return "Unknown error class";
1254         /* --END ERROR HANDLING-- */
1255     }
1256 }
1257 
1258 /*
1259  * Given a message string abbreviation (e.g., one that starts "**"), return
1260  * the corresponding index.  For the specific
1261  * (parameterized messages), use idx = FindSpecificMsgIndex("**msg");
1262  * Note: Identical to FindGeneric, but with a different array.  Should
1263  * use a single routine.
1264  */
FindSpecificMsgIndex(const char msg[])1265 static int FindSpecificMsgIndex(const char msg[])
1266 {
1267     int i, c;
1268     for (i = 0; i < specific_msgs_len; i++) {
1269         /* Check the sentinals to insure that the values are ok first */
1270         if (specific_err_msgs[i].sentinal1 != 0xacebad03 ||
1271             specific_err_msgs[i].sentinal2 != 0xcb0bfa11) {
1272             /* Something bad has happened! Don't risk trying the
1273              * short_name pointer; it may have been corrupted */
1274             break;
1275         }
1276         c = strcmp(specific_err_msgs[i].short_name, msg);
1277         if (c == 0)
1278             return i;
1279         if (c > 0) {
1280             /* don't return here if the string partially matches */
1281             if (strncmp(specific_err_msgs[i].short_name, msg, strlen(msg)) != 0)
1282                 return -1;
1283         }
1284     }
1285     return -1;
1286 }
1287 
1288 /* See FindGenericMsgIndex comments for a more efficient search routine that
1289    could be used here as well. */
1290 
1291 /* Support for the instance-specific messages */
1292 /* ------------------------------------------------------------------------- */
1293 /* Routines to convert instance-specific messages into a string              */
1294 /* This is the only case that supports instance-specific messages            */
1295 /* ------------------------------------------------------------------------- */
1296 /* ------------------------------------------------------------------------ */
1297 /* This block of code is used to convert various MPI values into descriptive*/
1298 /* strings.  The routines are                                               */
1299 /*     GetAssertString - handle MPI_MODE_xxx (RMA asserts)                  */
1300 /*     GetDTypeString  - handle MPI_Datatypes                               */
1301 /*     GetMPIOpString  - handle MPI_Op                                      */
1302 /* These routines are used in vsnprintf_mpi                                 */
1303 /* FIXME: These functions are not thread safe                               */
1304 /* ------------------------------------------------------------------------ */
1305 #define ASSERT_STR_MAXLEN 256
1306 
GetAssertString(int d)1307 static const char *GetAssertString(int d)
1308 {
1309     static char str[ASSERT_STR_MAXLEN] = "";
1310     char *cur;
1311     size_t len = ASSERT_STR_MAXLEN;
1312     size_t n;
1313 
1314     if (d == 0) {
1315         MPL_strncpy(str, "assert=0", ASSERT_STR_MAXLEN);
1316         return str;
1317     }
1318     cur = str;
1319     if (d & MPI_MODE_NOSTORE) {
1320         MPL_strncpy(cur, "MPI_MODE_NOSTORE", len);
1321         n = strlen(cur);
1322         cur += n;
1323         len -= n;
1324         d ^= MPI_MODE_NOSTORE;
1325     }
1326     if (d & MPI_MODE_NOCHECK) {
1327         if (len < ASSERT_STR_MAXLEN)
1328             MPL_strncpy(cur, " | MPI_MODE_NOCHECK", len);
1329         else
1330             MPL_strncpy(cur, "MPI_MODE_NOCHECK", len);
1331         n = strlen(cur);
1332         cur += n;
1333         len -= n;
1334         d ^= MPI_MODE_NOCHECK;
1335     }
1336     if (d & MPI_MODE_NOPUT) {
1337         if (len < ASSERT_STR_MAXLEN)
1338             MPL_strncpy(cur, " | MPI_MODE_NOPUT", len);
1339         else
1340             MPL_strncpy(cur, "MPI_MODE_NOPUT", len);
1341         n = strlen(cur);
1342         cur += n;
1343         len -= n;
1344         d ^= MPI_MODE_NOPUT;
1345     }
1346     if (d & MPI_MODE_NOPRECEDE) {
1347         if (len < ASSERT_STR_MAXLEN)
1348             MPL_strncpy(cur, " | MPI_MODE_NOPRECEDE", len);
1349         else
1350             MPL_strncpy(cur, "MPI_MODE_NOPRECEDE", len);
1351         n = strlen(cur);
1352         cur += n;
1353         len -= n;
1354         d ^= MPI_MODE_NOPRECEDE;
1355     }
1356     if (d & MPI_MODE_NOSUCCEED) {
1357         if (len < ASSERT_STR_MAXLEN)
1358             MPL_strncpy(cur, " | MPI_MODE_NOSUCCEED", len);
1359         else
1360             MPL_strncpy(cur, "MPI_MODE_NOSUCCEED", len);
1361         n = strlen(cur);
1362         cur += n;
1363         len -= n;
1364         d ^= MPI_MODE_NOSUCCEED;
1365     }
1366     if (d) {
1367         if (len < ASSERT_STR_MAXLEN)
1368             MPL_snprintf(cur, len, " | 0x%x", d);
1369         else
1370             MPL_snprintf(cur, len, "assert=0x%x", d);
1371     }
1372     return str;
1373 }
1374 
GetDTypeString(MPI_Datatype d)1375 static const char *GetDTypeString(MPI_Datatype d)
1376 {
1377     static char default_str[64];
1378     int num_integers, num_addresses, num_datatypes, combiner = 0;
1379     char *str;
1380 
1381     if (HANDLE_GET_MPI_KIND(d) != MPIR_DATATYPE ||
1382         (HANDLE_GET_KIND(d) == HANDLE_KIND_INVALID && d != MPI_DATATYPE_NULL))
1383         return "INVALID DATATYPE";
1384 
1385 
1386     if (d == MPI_DATATYPE_NULL)
1387         return "MPI_DATATYPE_NULL";
1388 
1389     if (d == 0) {
1390         MPL_strncpy(default_str, "dtype=0x0", sizeof(default_str));
1391         return default_str;
1392     }
1393 
1394     MPIR_Type_get_envelope(d, &num_integers, &num_addresses, &num_datatypes, &combiner);
1395     if (combiner == MPI_COMBINER_NAMED) {
1396         str = MPIR_Datatype_builtin_to_string(d);
1397         if (str == NULL) {
1398             MPL_snprintf(default_str, sizeof(default_str), "dtype=0x%08x", d);
1399             return default_str;
1400         }
1401         return str;
1402     }
1403 
1404     /* default is not thread safe */
1405     str = MPIR_Datatype_combiner_to_string(combiner);
1406     if (str == NULL) {
1407         MPL_snprintf(default_str, sizeof(default_str), "dtype=USER<0x%08x>", d);
1408         return default_str;
1409     }
1410     MPL_snprintf(default_str, sizeof(default_str), "dtype=USER<%s>", str);
1411     return default_str;
1412 }
1413 
GetMPIOpString(MPI_Op o)1414 static const char *GetMPIOpString(MPI_Op o)
1415 {
1416     static char default_str[64];
1417 
1418     switch (o) {
1419         case MPI_OP_NULL:
1420             return "MPI_OP_NULL";
1421         case MPI_MAX:
1422             return "MPI_MAX";
1423         case MPI_MIN:
1424             return "MPI_MIN";
1425         case MPI_SUM:
1426             return "MPI_SUM";
1427         case MPI_PROD:
1428             return "MPI_PROD";
1429         case MPI_LAND:
1430             return "MPI_LAND";
1431         case MPI_BAND:
1432             return "MPI_BAND";
1433         case MPI_LOR:
1434             return "MPI_LOR";
1435         case MPI_BOR:
1436             return "MPI_BOR";
1437         case MPI_LXOR:
1438             return "MPI_LXOR";
1439         case MPI_BXOR:
1440             return "MPI_BXOR";
1441         case MPI_MINLOC:
1442             return "MPI_MINLOC";
1443         case MPI_MAXLOC:
1444             return "MPI_MAXLOC";
1445         case MPI_REPLACE:
1446             return "MPI_REPLACE";
1447         case MPI_NO_OP:
1448             return "MPI_NO_OP";
1449     }
1450     /* FIXME: default is not thread safe */
1451     MPL_snprintf(default_str, sizeof(default_str), "op=0x%x", o);
1452     return default_str;
1453 }
1454 
1455 /* ------------------------------------------------------------------------ */
1456 /* This routine takes an instance-specific string with format specifiers    */
1457 /* This routine makes use of the above routines, along with some inlined    */
1458 /* code, to process the format specifiers for the MPI objects               */
1459 /* The current set of format specifiers is undocumented except for their
1460    use in this routine.  In addition, these choices do not permit the
1461    use of GNU extensions to check the validity of these arguments.
1462    At some point, a documented set that can exploit those GNU extensions
1463    will replace these. */
1464 /* ------------------------------------------------------------------------ */
1465 
vsnprintf_mpi(char * str,size_t maxlen,const char * fmt_orig,va_list list)1466 static int vsnprintf_mpi(char *str, size_t maxlen, const char *fmt_orig, va_list list)
1467 {
1468     char *begin, *end, *fmt;
1469     size_t len;
1470     MPI_Comm C;
1471     MPI_Info info;
1472     MPI_Datatype D;
1473     MPI_Win W;
1474     MPI_Group G;
1475     MPI_Op O;
1476     MPI_Request R;
1477     MPI_Errhandler E;
1478     char *s;
1479     int t, i, d, mpi_errno = MPI_SUCCESS;
1480     long long ll;
1481     MPI_Count c;
1482     void *p;
1483 
1484     fmt = MPL_strdup(fmt_orig);
1485     if (fmt == NULL) {
1486         if (maxlen > 0 && str != NULL)
1487             *str = '\0';
1488         return 0;
1489     }
1490 
1491     begin = fmt;
1492     end = strchr(fmt, '%');
1493     while (end) {
1494         len = maxlen;
1495         if (len > (size_t) (end - begin)) {
1496             len = (size_t) (end - begin);
1497         }
1498         if (len) {
1499             MPIR_Memcpy(str, begin, len);
1500             str += len;
1501             maxlen -= len;
1502         }
1503         end++;
1504         begin = end + 1;
1505         switch ((int) (*end)) {
1506             case (int) 's':
1507                 s = va_arg(list, char *);
1508                 if (s)
1509                     MPL_strncpy(str, s, maxlen);
1510                 else {
1511                     MPL_strncpy(str, "<NULL>", maxlen);
1512                 }
1513                 break;
1514             case (int) 'd':
1515                 d = va_arg(list, int);
1516                 MPL_snprintf(str, maxlen, "%d", d);
1517                 break;
1518             case (int) 'L':
1519                 ll = va_arg(list, long long);
1520                 MPL_snprintf(str, maxlen, "%lld", ll);
1521                 break;
1522             case (int) 'x':
1523                 d = va_arg(list, int);
1524                 MPL_snprintf(str, maxlen, "%x", d);
1525                 break;
1526             case (int) 'X':
1527                 ll = va_arg(list, long long);
1528                 MPL_snprintf(str, maxlen, "%llx", ll);
1529                 break;
1530             case (int) 'i':
1531                 i = va_arg(list, int);
1532                 switch (i) {
1533                     case MPI_ANY_SOURCE:
1534                         MPL_strncpy(str, "MPI_ANY_SOURCE", maxlen);
1535                         break;
1536                     case MPI_PROC_NULL:
1537                         MPL_strncpy(str, "MPI_PROC_NULL", maxlen);
1538                         break;
1539                     case MPI_ROOT:
1540                         MPL_strncpy(str, "MPI_ROOT", maxlen);
1541                         break;
1542                     default:
1543                         MPL_snprintf(str, maxlen, "%d", i);
1544                         break;
1545                 }
1546                 break;
1547             case (int) 't':
1548                 t = va_arg(list, int);
1549                 switch (t) {
1550                     case MPI_ANY_TAG:
1551                         MPL_strncpy(str, "MPI_ANY_TAG", maxlen);
1552                         break;
1553                     default:
1554                         /* Note that MPI_UNDEFINED is not a valid tag value,
1555                          * though there is one example in the MPI-3.0 standard
1556                          * that sets status.MPI_TAG to MPI_UNDEFINED in a
1557                          * generalized request example. */
1558                         MPL_snprintf(str, maxlen, "%d", t);
1559                         break;
1560                 }
1561                 break;
1562             case (int) 'p':
1563                 p = va_arg(list, void *);
1564                 /* FIXME: A check for MPI_IN_PLACE should only be used
1565                  * where that is valid */
1566                 if (p == MPI_IN_PLACE) {
1567                     MPL_strncpy(str, "MPI_IN_PLACE", maxlen);
1568                 } else {
1569                     /* FIXME: We may want to use 0x%p for systems that
1570                      * (including Windows) that don't prefix %p with 0x.
1571                      * This must be done with a capability, not a test on
1572                      * particular OS or header files */
1573                     MPL_snprintf(str, maxlen, "%p", p);
1574                 }
1575                 break;
1576             case (int) 'C':
1577                 C = va_arg(list, MPI_Comm);
1578                 switch (C) {
1579                     case MPI_COMM_WORLD:
1580                         MPL_strncpy(str, "MPI_COMM_WORLD", maxlen);
1581                         break;
1582                     case MPI_COMM_SELF:
1583                         MPL_strncpy(str, "MPI_COMM_SELF", maxlen);
1584                         break;
1585                     case MPI_COMM_NULL:
1586                         MPL_strncpy(str, "MPI_COMM_NULL", maxlen);
1587                         break;
1588                     default:
1589                         MPL_snprintf(str, maxlen, "comm=0x%x", C);
1590                         break;
1591                 }
1592                 break;
1593             case (int) 'I':
1594                 info = va_arg(list, MPI_Info);
1595                 if (info == MPI_INFO_NULL) {
1596                     MPL_strncpy(str, "MPI_INFO_NULL", maxlen);
1597                 } else {
1598                     MPL_snprintf(str, maxlen, "info=0x%x", info);
1599                 }
1600                 break;
1601             case (int) 'D':
1602                 D = va_arg(list, MPI_Datatype);
1603                 MPL_snprintf(str, maxlen, "%s", GetDTypeString(D));
1604                 break;
1605                 /* Include support for %F only if MPI-IO is enabled */
1606 #ifdef MPI_MODE_RDWR
1607             case (int) 'F':
1608                 {
1609                     MPI_File F;
1610                     F = va_arg(list, MPI_File);
1611                     if (F == MPI_FILE_NULL) {
1612                         MPL_strncpy(str, "MPI_FILE_NULL", maxlen);
1613                     } else {
1614                         MPL_snprintf(str, maxlen, "file=0x%lx", (unsigned long) F);
1615                     }
1616                 }
1617                 break;
1618 #endif /* MODE_RDWR */
1619             case (int) 'W':
1620                 W = va_arg(list, MPI_Win);
1621                 if (W == MPI_WIN_NULL) {
1622                     MPL_strncpy(str, "MPI_WIN_NULL", maxlen);
1623                 } else {
1624                     MPL_snprintf(str, maxlen, "win=0x%x", W);
1625                 }
1626                 break;
1627             case (int) 'A':
1628                 d = va_arg(list, int);
1629                 MPL_snprintf(str, maxlen, "%s", GetAssertString(d));
1630                 break;
1631             case (int) 'G':
1632                 G = va_arg(list, MPI_Group);
1633                 if (G == MPI_GROUP_NULL) {
1634                     MPL_strncpy(str, "MPI_GROUP_NULL", maxlen);
1635                 } else {
1636                     MPL_snprintf(str, maxlen, "group=0x%x", G);
1637                 }
1638                 break;
1639             case (int) 'O':
1640                 O = va_arg(list, MPI_Op);
1641                 MPL_snprintf(str, maxlen, "%s", GetMPIOpString(O));
1642                 break;
1643             case (int) 'R':
1644                 R = va_arg(list, MPI_Request);
1645                 if (R == MPI_REQUEST_NULL) {
1646                     MPL_strncpy(str, "MPI_REQUEST_NULL", maxlen);
1647                 } else {
1648                     MPL_snprintf(str, maxlen, "req=0x%x", R);
1649                 }
1650                 break;
1651             case (int) 'E':
1652                 E = va_arg(list, MPI_Errhandler);
1653                 if (E == MPI_ERRHANDLER_NULL) {
1654                     MPL_strncpy(str, "MPI_ERRHANDLER_NULL", maxlen);
1655                 } else {
1656                     MPL_snprintf(str, maxlen, "errh=0x%x", E);
1657                 }
1658                 break;
1659             case (int) 'c':
1660                 c = va_arg(list, MPI_Count);
1661                 MPIR_Assert(sizeof(long long) >= sizeof(MPI_Count));
1662                 MPL_snprintf(str, maxlen, "%lld", (long long) c);
1663                 break;
1664             default:
1665                 /* Error: unhandled output type */
1666                 MPL_free(fmt);
1667                 return 0;
1668                 /*
1669                  * if (maxlen > 0 && str != NULL)
1670                  * *str = '\0';
1671                  * break;
1672                  */
1673         }
1674         len = strlen(str);
1675         maxlen -= len;
1676         str += len;
1677         end = strchr(begin, '%');
1678     }
1679     if (*begin != '\0') {
1680         MPL_strncpy(str, begin, maxlen);
1681     }
1682     /* Free the dup'ed format string */
1683     MPL_free(fmt);
1684 
1685     return mpi_errno;
1686 }
1687 
1688 /* ------------------------------------------------------------------------- */
1689 /* Manage the error reporting stack                                          */
1690 /* ------------------------------------------------------------------------- */
1691 
1692 /*
1693  * Support for multiple messages, including the error message ring.
1694  * In principle, the error message ring could use used to provide
1695  * support for multiple error classes or codes, without providing
1696  * instance-specific support.  However, for now, we combine the two
1697  * capabilities.
1698  */
1699 
1700 
MPIR_Err_stack_init(void)1701 static void MPIR_Err_stack_init(void)
1702 {
1703     int mpi_errno = MPI_SUCCESS;
1704 
1705     error_ring_mutex_create(&mpi_errno);
1706 
1707     if (MPIR_CVAR_CHOP_ERROR_STACK < 0) {
1708         MPIR_CVAR_CHOP_ERROR_STACK = 80;
1709 #ifdef HAVE_WINDOWS_H
1710         {
1711             /* If windows, set the default width to the window size */
1712             HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
1713             if (hConsole != INVALID_HANDLE_VALUE) {
1714                 CONSOLE_SCREEN_BUFFER_INFO info;
1715                 if (GetConsoleScreenBufferInfo(hConsole, &info)) {
1716                     /* override the parameter system in this case */
1717                     MPIR_CVAR_CHOP_ERROR_STACK = info.dwMaximumWindowSize.X;
1718                 }
1719             }
1720         }
1721 #endif /* WINDOWS_H */
1722     }
1723 }
1724 
1725 /* Create the ring id from information about the message */
ErrcodeCreateID(int error_class,int generic_idx,const char * msg,int * id,int * seq)1726 static void ErrcodeCreateID(int error_class, int generic_idx, const char *msg, int *id, int *seq)
1727 {
1728     int i;
1729     int ring_seq = 0, ring_id;
1730 
1731     /* Create a simple hash function of the message to serve as the
1732      * sequence number */
1733     ring_seq = 0;
1734     for (i = 0; msg[i]; i++)
1735         ring_seq += (unsigned int) msg[i];
1736 
1737     ring_seq %= ERROR_SPECIFIC_SEQ_SIZE;
1738 
1739     ring_id = (error_class & ERROR_CLASS_MASK) |
1740         ((generic_idx + 1) << ERROR_GENERIC_SHIFT) | (ring_seq << ERROR_SPECIFIC_SEQ_SHIFT);
1741 
1742     *id = ring_id;
1743     *seq = ring_seq;
1744 }
1745 
1746 /* Convert an error code into ring_idx, ring_id, and generic_idx.
1747    Return non-zero if there is a problem with the decode values
1748    (e.g., out of range for the ring index) */
convertErrcodeToIndexes(int errcode,int * ring_idx,int * ring_id,int * generic_idx)1749 static int convertErrcodeToIndexes(int errcode, int *ring_idx, int *ring_id, int *generic_idx)
1750 {
1751     *ring_idx = (errcode & ERROR_SPECIFIC_INDEX_MASK) >> ERROR_SPECIFIC_INDEX_SHIFT;
1752     *ring_id = errcode & (ERROR_CLASS_MASK | ERROR_GENERIC_MASK | ERROR_SPECIFIC_SEQ_MASK);
1753     *generic_idx = ((errcode & ERROR_GENERIC_MASK) >> ERROR_GENERIC_SHIFT) - 1;
1754 
1755     /* Test on both the max_error_ring_loc and MAX_ERROR_RING to guard
1756      * against memory overwrites */
1757     if (*ring_idx < 0 || *ring_idx >= MAX_ERROR_RING ||
1758         (unsigned int) *ring_idx > max_error_ring_loc)
1759         return 1;
1760 
1761     return 0;
1762 }
1763 
checkErrcodeIsValid(int errcode)1764 static int checkErrcodeIsValid(int errcode)
1765 {
1766     int ring_id, generic_idx, ring_idx;
1767 
1768     /* If the errcode is a class, then it is valid */
1769     if (errcode <= MPIR_MAX_ERROR_CLASS_INDEX && errcode >= 0)
1770         return 0;
1771 
1772     if (convertErrcodeToIndexes(errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1773         /* --BEGIN ERROR HANDLING-- */
1774         MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1775                          errcode, ring_idx);
1776         /* --END ERROR HANDLING-- */
1777     }
1778 
1779     MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1780                     (MPL_DBG_FDEST, "code=%#010x ring_idx=%d ring_id=%#010x generic_idx=%d",
1781                      errcode, ring_idx, ring_id, generic_idx));
1782 
1783     if (ring_idx < 0 || ring_idx >= MAX_ERROR_RING || (unsigned int) ring_idx > max_error_ring_loc)
1784         return 1;
1785     if (ErrorRing[ring_idx].id != ring_id)
1786         return 2;
1787     /* It looks like the code uses a generic idx of -1 to indicate no
1788      * generic message */
1789     if (generic_idx < -1 || generic_idx > generic_msgs_len)
1790         return 3;
1791     return 0;
1792 }
1793 
1794 /* Check to see if the error code is a user-specified error code
1795    (e.g., from the attribute delete function) and if so, set the error code
1796    to the value provide by the user */
checkForUserErrcode(int errcode)1797 static int checkForUserErrcode(int errcode)
1798 {
1799     error_ring_mutex_lock();
1800     {
1801         if (errcode != MPI_SUCCESS) {
1802             int ring_idx;
1803             int ring_id;
1804             int generic_idx;
1805 
1806             if (convertErrcodeToIndexes(errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1807                 /* --BEGIN ERROR HANDLING-- */
1808                 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1809                                  errcode, ring_idx);
1810                 /* --END ERROR HANDLING-- */
1811             } else {
1812                 /* Can we get a more specific error message */
1813                 if (generic_idx >= 0 &&
1814                     ErrorRing[ring_idx].id == ring_id && ErrorRing[ring_idx].use_user_error_code) {
1815                     errcode = ErrorRing[ring_idx].user_error_code;
1816                 }
1817             }
1818         }
1819     }
1820     error_ring_mutex_unlock();
1821     return errcode;
1822 }
1823 
1824 
1825 /* --BEGIN ERROR HANDLING-- */
ErrcodeInvalidReasonStr(int reason)1826 static const char *ErrcodeInvalidReasonStr(int reason)
1827 {
1828     const char *str = 0;
1829     switch (reason) {
1830         case 1:
1831             str = "Ring Index out of range";
1832             break;
1833         case 2:
1834             str = "Ring ids do not match";
1835             break;
1836         case 3:
1837             str = "Generic message index out of range";
1838             break;
1839         default:
1840             str = "Unknown reason for invalid errcode";
1841             break;
1842     }
1843     return str;
1844 }
1845 
1846 /* --END ERROR HANDLING-- */
1847 
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)1848 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
1849 {
1850     int error_code;
1851 
1852     error_code = error1_code;
1853 
1854     error_ring_mutex_lock();
1855     {
1856         for (;;) {
1857             int error_class;
1858             int ring_idx;
1859             int ring_id;
1860             int generic_idx;
1861 
1862             if (convertErrcodeToIndexes(error_code, &ring_idx, &ring_id,
1863                                         &generic_idx) != 0 || generic_idx < 0 ||
1864                 ErrorRing[ring_idx].id != ring_id) {
1865                 break;
1866             }
1867 
1868             error_code = ErrorRing[ring_idx].prev_error;
1869 
1870             if (error_code == MPI_SUCCESS) {
1871                 ErrorRing[ring_idx].prev_error = error2_code;
1872                 break;
1873             }
1874 
1875             error_class = MPIR_ERR_GET_CLASS(error_code);
1876 
1877             if (error_class == MPI_ERR_OTHER) {
1878                 ErrorRing[ring_idx].prev_error &= ~(ERROR_CLASS_MASK);
1879                 ErrorRing[ring_idx].prev_error |= error2_class;
1880             }
1881         }
1882     }
1883     error_ring_mutex_unlock();
1884 }
1885 
ErrGetInstanceString(int errorcode,char msg[],int num_remaining)1886 static int ErrGetInstanceString(int errorcode, char msg[], int num_remaining)
1887 {
1888     int len;
1889 
1890     if (MPIR_CVAR_PRINT_ERROR_STACK) {
1891         MPL_strncpy(msg, ", error stack:\n", num_remaining);
1892         msg[num_remaining - 1] = '\0';
1893         len = (int) strlen(msg);
1894         msg += len;
1895         num_remaining -= len;
1896         /* note: this took the "fn" arg, but that appears to be unused
1897          * and is undocumented.  */
1898         MPIR_Err_print_stack_string(errorcode, msg, num_remaining);
1899         msg[num_remaining - 1] = '\0';
1900     } else {
1901         error_ring_mutex_lock();
1902         {
1903             while (errorcode != MPI_SUCCESS) {
1904                 int ring_idx;
1905                 int ring_id;
1906                 int generic_idx;
1907 
1908                 if (convertErrcodeToIndexes(errorcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1909                     /* --BEGIN ERROR HANDLING-- */
1910                     MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1911                                      errorcode, ring_idx);
1912                     break;
1913                     /* --END ERROR HANDLING-- */
1914                 }
1915 
1916                 if (generic_idx < 0) {
1917                     break;
1918                 }
1919 
1920                 if (ErrorRing[ring_idx].id == ring_id) {
1921                     /* just keep clobbering old values until the
1922                      * end of the stack is reached */
1923                     MPL_snprintf(msg, num_remaining, ", %s", ErrorRing[ring_idx].msg);
1924                     msg[num_remaining - 1] = '\0';
1925                     errorcode = ErrorRing[ring_idx].prev_error;
1926                 } else {
1927                     break;
1928                 }
1929             }
1930         }
1931         error_ring_mutex_unlock();
1932     }
1933     /* FIXME: How do we determine that we failed to unwind the stack? */
1934     if (errorcode != MPI_SUCCESS)
1935         return 1;
1936 
1937     return 0;
1938 }
1939 
1940 #else
1941 #error MPICH_ERROR_MSG_LEVEL undefined or has invalid value
1942 #endif
1943 
1944 /* Common routines that are used by two or more error-message levels.
1945    Very simple routines are defined inline */
1946 #ifdef NEEDS_FIND_GENERIC_MSG_INDEX
1947 /*
1948  * Given a message string abbreviation (e.g., one that starts "**"), return
1949  * the corresponding index.  For the generic (non
1950  * parameterized messages), use idx = FindGenericMsgIndex("**msg");
1951  * Returns -1 on failure to find the matching message
1952  *
1953  * The values are in increasing, sorted order, so once we find a
1954  * comparison where the current generic_err_msg is greater than the
1955  * message we are attempting to match, we have missed the match and
1956  * there is an internal error (all short messages should exist in defmsg.h)
1957  */
1958 /* Question: Could be a service routine for message level >= generic */
FindGenericMsgIndex(const char msg[])1959 static int FindGenericMsgIndex(const char msg[])
1960 {
1961     int i, c;
1962     for (i = 0; i < generic_msgs_len; i++) {
1963         /* Check the sentinals to insure that the values are ok first */
1964         if (generic_err_msgs[i].sentinal1 != 0xacebad03 ||
1965             generic_err_msgs[i].sentinal2 != 0xcb0bfa11) {
1966             /* Something bad has happened! Don't risk trying the
1967              * short_name pointer; it may have been corrupted */
1968             break;
1969         }
1970         c = strcmp(generic_err_msgs[i].short_name, msg);
1971         if (c == 0)
1972             return i;
1973         if (c > 0) {
1974             /* In case the generic messages are not sorted exactly the
1975              * way that strcmp compares, we check for the case that
1976              * the short msg matches the current generic message.  If
1977              * that is the case, we do *not* fail */
1978             if (strncmp(generic_err_msgs[i].short_name, msg, strlen(msg)) != 0)
1979                 return -1;
1980         }
1981     }
1982     /* --BEGIN ERROR HANDLING-- */
1983     return -1;
1984     /* --END ERROR HANDLING-- */
1985 }
1986 #endif
1987