1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 /* style: allow:fprintf:4 sig:0 */
7
8 #include "mpiimpl.h"
9
10 /* stdarg is required to handle the variable argument lists for
11 MPIR_Err_create_code */
12 #include <stdarg.h>
13 /* Define USE_ERR_CODE_VALIST to get the prototype for the valist version
14 of MPIR_Err_create_code in mpir_err.h (without this definition,
15 the prototype is not included. The "valist" version of the function
16 is used in only a few places, here and potentially in ROMIO) */
17 #define USE_ERR_CODE_VALIST
18
19 /* errcodes.h contains the macros used to access fields within an error
20 code and a description of the bits in an error code. A brief
21 version of that description is included below */
22
23 #include "errcodes.h"
24
25 /* defmsg is generated automatically from the source files and contains
26 all of the error messages, both the generic and specific. Depending
27 on the value of MPICH_ERROR_MSG_LEVEL, different amounts of message
28 information will be included from the file defmsg.h */
29 #include "defmsg.h"
30
31 /* stdio is needed for vsprintf and vsnprintf */
32 #include <stdio.h>
33
34 /*
35 === BEGIN_MPI_T_CVAR_INFO_BLOCK ===
36
37 categories:
38 - name : ERROR_HANDLING
39 description : cvars that control error handling behavior (stack traces, aborts, etc)
40
41 cvars:
42 - name : MPIR_CVAR_PRINT_ERROR_STACK
43 category : ERROR_HANDLING
44 type : boolean
45 default : true
46 class : none
47 verbosity : MPI_T_VERBOSITY_USER_BASIC
48 scope : MPI_T_SCOPE_LOCAL
49 description : >-
50 If true, print an error stack trace at error handling time.
51
52 - name : MPIR_CVAR_CHOP_ERROR_STACK
53 category : ERROR_HANDLING
54 type : int
55 default : 0
56 class : none
57 verbosity : MPI_T_VERBOSITY_USER_BASIC
58 scope : MPI_T_SCOPE_LOCAL
59 description : >-
60 If >0, truncate error stack output lines this many characters
61 wide. If 0, do not truncate, and if <0 use a sensible default.
62
63 === END_MPI_T_CVAR_INFO_BLOCK ===
64 */
65
66 /*
67 * Structure of this file
68 *
69 * This file contains several groups of routines user for error handling
70 * and reporting.
71 *
72 * The first group provides memory for the MPIR_Errhandler objects
73 * and the routines to free and manipulate them
74 *
75 * The second group provides routines to call the appropriate error handler,
76 * which may be predefined or user defined. These also return the
77 * appropriate return code. These routines have names of the form
78 * MPIR_Err_return_xxx. Specifically, for each of the MPI types on which an
79 * error handler can be defined, there is an MPIR_Err_return_xxx routine
80 * that determines what error handler function to call and whether to
81 * abort the program. The comm and win versions are here; ROMIO
82 * provides its own routines for invoking the error handlers for Files.
83 *
84 * The third group of code handles the error messages. There are four
85 * options, controlled by the value of MPICH_ERROR_MSG_LEVEL.
86 *
87 * MPICH_ERROR_MSG__NONE - No text messages at all
88 * MPICH_ERROR_MSG__CLASS - Only messages for the MPI error classes
89 * MPICH_ERROR_MSG__GENERIC - Only predefiend messages for the MPI error codes
90 * MPICH_ERROR_MSG__ALL - Instance specific error messages (and error message
91 * stack)
92 *
93 * In only the latter (MPICH_ERROR_MSG__ALL) case are instance-specific
94 * messages maintained (including the error message "stack" that you may
95 * see mentioned in various places. In the other cases, an error code
96 * identifies a fixed message string (unless MPICH_ERROR_MSG__NONE,
97 * when there are no strings) from the "generic" strings defined in defmsg.h
98 *
99 * A major subgroup in this section is the code to handle the instance-specific
100 * messages (MPICH_ERROR_MSG__ALL only).
101 *
102 * An MPI error code is made up of a number of fields (see errcodes.h)
103 * These ar
104 * is-dynamic? specific-msg-sequence# specific-msg-index
105 * generic-code is-fatal? class
106 *
107 * There are macros (defined in errcodes.h) that define these fields,
108 * their sizes, and masks and shifts that may be used to extract them.
109 */
110
111 static int did_err_init = FALSE; /* helps us solve a bootstrapping problem */
112
113 /* A few prototypes. These routines are called from the MPIR_Err_return
114 routines. checkValidErrcode depends on the MPICH_ERROR_MSG_LEVEL.
115 If the error code is *not* valid, checkValidErrcode may replace it
116 with a valid value. */
117
118 static int checkValidErrcode(int, const char[], int *);
119
120 #if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
121 static int ErrGetInstanceString(int, char[], int);
122 static void MPIR_Err_stack_init(void);
123 static int checkForUserErrcode(int);
124 #else
125 /* We only need special handling for user error codes when we support the
126 error message stack */
127 #define checkForUserErrcode(_a) _a
128 #endif /* ERROR_MSG_LEVEL >= ERROR_MSG_ALL */
129
130
131 /* ------------------------------------------------------------------------- */
132 /* Provide the MPIR_Errhandler space and the routines to free and set them
133 from C++ and Fortran */
134 /* ------------------------------------------------------------------------- */
135 /*
136 * Error handlers. These are handled just like the other opaque objects
137 * in MPICH
138 */
139
140 #ifndef MPIR_ERRHANDLER_PREALLOC
141 #define MPIR_ERRHANDLER_PREALLOC 8
142 #endif
143
144 /* Preallocated errorhandler objects */
145 MPIR_Errhandler MPIR_Errhandler_builtin[MPIR_ERRHANDLER_N_BUILTIN];
146 MPIR_Errhandler MPIR_Errhandler_direct[MPIR_ERRHANDLER_PREALLOC];
147
148 MPIR_Object_alloc_t MPIR_Errhandler_mem = { 0, 0, 0, 0, MPIR_ERRHANDLER,
149 sizeof(MPIR_Errhandler),
150 MPIR_Errhandler_direct,
151 MPIR_ERRHANDLER_PREALLOC,
152 NULL
153 };
154
MPIR_Errhandler_free(MPIR_Errhandler * errhan_ptr)155 void MPIR_Errhandler_free(MPIR_Errhandler * errhan_ptr)
156 {
157 MPIR_Handle_obj_free(&MPIR_Errhandler_mem, errhan_ptr);
158 }
159
MPIR_Err_init(void)160 void MPIR_Err_init(void)
161 {
162 /* these are "stub" objects, so the other fields (which are statically
163 * initialized to zero) don't really matter */
164 MPIR_Errhandler_builtin[0].handle = MPI_ERRORS_ARE_FATAL;
165 MPIR_Errhandler_builtin[1].handle = MPI_ERRORS_RETURN;
166 MPIR_Errhandler_builtin[2].handle = MPIR_ERRORS_THROW_EXCEPTIONS;
167
168 #if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
169 MPIR_Err_stack_init();
170 #endif
171 did_err_init = TRUE;
172 }
173
174 /* Language Callbacks */
175
176 #ifdef HAVE_CXX_BINDING
177 /* This routine is used to install a callback used by the C++ binding
178 to invoke the (C++) error handler. The callback routine is a C routine,
179 defined in the C++ binding. */
MPII_Errhandler_set_cxx(MPI_Errhandler errhand,void (* errcall)(void))180 void MPII_Errhandler_set_cxx(MPI_Errhandler errhand, void (*errcall) (void))
181 {
182 MPIR_Errhandler *errhand_ptr;
183
184 MPIR_Errhandler_get_ptr(errhand, errhand_ptr);
185 errhand_ptr->language = MPIR_LANG__CXX;
186 MPIR_Process.cxx_call_errfn = (void (*)(int, int *, int *, void (*)(void))) errcall;
187 }
188 #endif /* HAVE_CXX_BINDING */
189
190 #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT)
MPII_Errhandler_set_fc(MPI_Errhandler errhand)191 void MPII_Errhandler_set_fc(MPI_Errhandler errhand)
192 {
193 MPIR_Errhandler *errhand_ptr;
194
195 MPIR_Errhandler_get_ptr(errhand, errhand_ptr);
196 errhand_ptr->language = MPIR_LANG__FORTRAN;
197 }
198
199 #endif
200
201 /* ------------------------------------------------------------------------- */
202 /* Group 2: These routines are called on error exit from most
203 top-level MPI routines to invoke the appropriate error handler.
204 Also included is the routine to call if MPI has not been
205 initialized (MPIR_Err_preinit) and to determine if an error code
206 represents a fatal error (MPIR_Err_is_fatal). */
207 /* ------------------------------------------------------------------------- */
208 /* Special error handler to call if we are not yet initialized, or if we
209 have finalized */
210 /* --BEGIN ERROR HANDLING-- */
MPIR_Err_preOrPostInit(void)211 void MPIR_Err_preOrPostInit(void)
212 {
213 if (MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__PRE_INIT) {
214 MPL_error_printf("Attempting to use an MPI routine before initializing MPICH\n");
215 } else if (MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__POST_FINALIZED) {
216 MPL_error_printf("Attempting to use an MPI routine after finalizing MPICH\n");
217 } else {
218 MPL_error_printf
219 ("Internal Error: Unknown state of MPI (neither initialized nor finalized)\n");
220 }
221 exit(1);
222 }
223
224 /* --END ERROR HANDLING-- */
225
226 /* Return true if the error code indicates a fatal error */
MPIR_Err_is_fatal(int errcode)227 int MPIR_Err_is_fatal(int errcode)
228 {
229 return (errcode & ERROR_FATAL_MASK) ? TRUE : FALSE;
230 }
231
232 /*
233 * This is the routine that is invoked by most MPI routines to
234 * report an error. It is legitimate to pass NULL for comm_ptr in order to get
235 * the default (MPI_COMM_WORLD) error handling.
236 */
MPIR_Err_return_comm(MPIR_Comm * comm_ptr,const char fcname[],int errcode)237 int MPIR_Err_return_comm(MPIR_Comm * comm_ptr, const char fcname[], int errcode)
238 {
239 const int error_class = ERROR_GET_CLASS(errcode);
240 MPIR_Errhandler *errhandler = NULL;
241
242 checkValidErrcode(error_class, fcname, &errcode);
243
244 /* --BEGIN ERROR HANDLING-- */
245 if (MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__PRE_INIT ||
246 MPL_atomic_load_int(&MPIR_Process.mpich_state) == MPICH_MPI_STATE__POST_FINALIZED) {
247 /* for whatever reason, we aren't initialized (perhaps error
248 * during MPI_Init) */
249 MPIR_Handle_fatal_error(MPIR_Process.comm_world, fcname, errcode);
250 return MPI_ERR_INTERN;
251 }
252 /* --END ERROR HANDLING-- */
253
254 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TERSE,
255 (MPL_DBG_FDEST, "MPIR_Err_return_comm(comm_ptr=%p, fcname=%s, errcode=%d)",
256 comm_ptr, fcname, errcode));
257
258 if (comm_ptr) {
259 MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
260 errhandler = comm_ptr->errhandler;
261 MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
262 }
263
264 if (errhandler == NULL) {
265 /* Try to replace with the default handler, which is the one on
266 * MPI_COMM_WORLD. This gives us correct behavior for the
267 * case where the error handler on MPI_COMM_WORLD has been changed. */
268 if (MPIR_Process.comm_world) {
269 comm_ptr = MPIR_Process.comm_world;
270 }
271 }
272
273 /* --BEGIN ERROR HANDLING-- */
274 if (MPIR_Err_is_fatal(errcode) || comm_ptr == NULL) {
275 /* Calls MPID_Abort */
276 MPIR_Handle_fatal_error(comm_ptr, fcname, errcode);
277 /* never get here */
278 }
279 /* --END ERROR HANDLING-- */
280
281 MPIR_Assert(comm_ptr != NULL);
282
283 /* comm_ptr may have changed to comm_world. Keep this locked as long as we
284 * are using the errhandler to prevent it from disappearing out from under
285 * us. */
286 MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
287 errhandler = comm_ptr->errhandler;
288
289 /* --BEGIN ERROR HANDLING-- */
290 if (errhandler == NULL || errhandler->handle == MPI_ERRORS_ARE_FATAL) {
291 MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
292 /* Calls MPID_Abort */
293 MPIR_Handle_fatal_error(comm_ptr, fcname, errcode);
294 /* never get here */
295 }
296 /* --END ERROR HANDLING-- */
297
298 /* Check for the special case of a user-provided error code */
299 errcode = checkForUserErrcode(errcode);
300
301 if (errhandler->handle != MPI_ERRORS_RETURN &&
302 errhandler->handle != MPIR_ERRORS_THROW_EXCEPTIONS) {
303 /* We pass a final 0 (for a null pointer) to these routines
304 * because MPICH-1 expected that */
305 switch (comm_ptr->errhandler->language) {
306 case MPIR_LANG__C:
307 (*comm_ptr->errhandler->errfn.C_Comm_Handler_function) (&comm_ptr->handle, &errcode,
308 0);
309 break;
310 #ifdef HAVE_CXX_BINDING
311 case MPIR_LANG__CXX:
312 (*MPIR_Process.cxx_call_errfn) (0, &comm_ptr->handle, &errcode,
313 (void (*)(void)) *comm_ptr->errhandler->
314 errfn.C_Comm_Handler_function);
315 /* The C++ code throws an exception if the error handler
316 * returns something other than MPI_SUCCESS. There is no "return"
317 * of an error code. */
318 errcode = MPI_SUCCESS;
319 break;
320 #endif /* CXX_BINDING */
321 #ifdef HAVE_FORTRAN_BINDING
322 case MPIR_LANG__FORTRAN90:
323 case MPIR_LANG__FORTRAN:
324 {
325 /* If int and MPI_Fint aren't the same size, we need to
326 * convert. As this is not performance critical, we
327 * do this even if MPI_Fint and int are the same size. */
328 MPI_Fint ferr = errcode;
329 MPI_Fint commhandle = comm_ptr->handle;
330 (*comm_ptr->errhandler->errfn.F77_Handler_function) (&commhandle, &ferr);
331 }
332 break;
333 #endif /* FORTRAN_BINDING */
334 }
335
336 }
337
338 MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
339 return errcode;
340 }
341
342 /*
343 * MPI routines that detect errors on window objects use this to report errors
344 */
MPIR_Err_return_win(MPIR_Win * win_ptr,const char fcname[],int errcode)345 int MPIR_Err_return_win(MPIR_Win * win_ptr, const char fcname[], int errcode)
346 {
347 const int error_class = ERROR_GET_CLASS(errcode);
348
349 if (win_ptr == NULL || win_ptr->errhandler == NULL)
350 return MPIR_Err_return_comm(NULL, fcname, errcode);
351
352 /* We don't test for MPI initialized because to call this routine,
353 * we will have had to call an MPI routine that would make that test */
354
355 checkValidErrcode(error_class, fcname, &errcode);
356
357 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TERSE,
358 (MPL_DBG_FDEST, "MPIR_Err_return_win(win_ptr=%p, fcname=%s, errcode=%d)",
359 win_ptr, fcname, errcode));
360
361 /* --BEGIN ERROR HANDLING-- */
362 if (MPIR_Err_is_fatal(errcode) ||
363 win_ptr == NULL || win_ptr->errhandler == NULL ||
364 win_ptr->errhandler->handle == MPI_ERRORS_ARE_FATAL) {
365 /* Calls MPID_Abort */
366 MPIR_Handle_fatal_error(NULL, fcname, errcode);
367 }
368 /* --END ERROR HANDLING-- */
369
370 /* Check for the special case of a user-provided error code */
371 errcode = checkForUserErrcode(errcode);
372
373 if (win_ptr->errhandler->handle == MPI_ERRORS_RETURN ||
374 win_ptr->errhandler->handle == MPIR_ERRORS_THROW_EXCEPTIONS) {
375 return errcode;
376 } else {
377 /* Now, invoke the error handler for the window */
378
379 /* We pass a final 0 (for a null pointer) to these routines
380 * because MPICH-1 expected that */
381 switch (win_ptr->errhandler->language) {
382 case MPIR_LANG__C:
383 (*win_ptr->errhandler->errfn.C_Win_Handler_function) (&win_ptr->handle, &errcode,
384 0);
385 break;
386 #ifdef HAVE_CXX_BINDING
387 case MPIR_LANG__CXX:
388 (*MPIR_Process.cxx_call_errfn) (2, &win_ptr->handle, &errcode,
389 (void (*)(void)) *win_ptr->errhandler->
390 errfn.C_Win_Handler_function);
391 /* The C++ code throws an exception if the error handler
392 * returns something other than MPI_SUCCESS. There is no "return"
393 * of an error code. */
394 errcode = MPI_SUCCESS;
395 break;
396 #endif /* CXX_BINDING */
397 #ifdef HAVE_FORTRAN_BINDING
398 case MPIR_LANG__FORTRAN90:
399 case MPIR_LANG__FORTRAN:
400 {
401 /* If int and MPI_Fint aren't the same size, we need to
402 * convert. As this is not performance critical, we
403 * do this even if MPI_Fint and int are the same size. */
404 MPI_Fint ferr = errcode;
405 MPI_Fint winhandle = win_ptr->handle;
406 (*win_ptr->errhandler->errfn.F77_Handler_function) (&winhandle, &ferr);
407 }
408 break;
409 #endif /* FORTRAN_BINDING */
410 }
411
412 }
413 return errcode;
414 }
415
416 /* ------------------------------------------------------------------------- */
417 /* Group 3: Routines to handle error messages. These are organized into
418 * several subsections:
419 * General service routines (used by more than one error reporting level)
420 * Routines of specific error message levels
421 *
422 */
423 /* ------------------------------------------------------------------------- */
424 /* Forward reference */
425 static void CombineSpecificCodes(int, int, int);
426 static const char *get_class_msg(int);
427
428 /* --BEGIN ERROR HANDLING-- */
MPIR_Handle_fatal_error(MPIR_Comm * comm_ptr,const char fcname[],int errcode)429 void MPIR_Handle_fatal_error(MPIR_Comm * comm_ptr, const char fcname[], int errcode)
430 {
431 /* Define length of the the maximum error message line (or string with
432 * newlines?). This definition is used only within this routine. */
433 /* Ensure that the error message string is sufficiently long to
434 * hold enough information about the error. Use the size of the
435 * MPI error messages unless it is too short (defined as shown here) */
436 #if MPI_MAX_ERROR_STRING < 4096
437 #define MAX_ERRMSG_STRING 4096
438 #else
439 #define MAX_ERRMSG_STRING MPI_MAX_ERROR_STRING
440 #endif
441 char error_msg[MAX_ERRMSG_STRING];
442 int len;
443
444 /* FIXME: Not internationalized. Since we are using MPIR_Err_get_string,
445 * we are assuming that the code is still able to execute a full
446 * MPICH error code to message conversion. */
447 MPL_snprintf(error_msg, MAX_ERRMSG_STRING, "Fatal error in %s: ", fcname);
448 len = (int) strlen(error_msg);
449 MPIR_Err_get_string(errcode, &error_msg[len], MAX_ERRMSG_STRING - len, NULL);
450
451 /* The third argument is a return code. We simply pass the error code. */
452 MPID_Abort(comm_ptr, MPI_SUCCESS, errcode, error_msg);
453 }
454
455 /* --END ERROR HANDLING-- */
456
457 /* Check for a valid error code. If the code is not valid, attempt to
458 print out something sensible; reset the error code to have class
459 ERR_UNKNOWN */
460 /* FIXME: Now that error codes are chained, this does not produce a valid
461 error code since there is no valid ring index corresponding to this code */
462 /* FIXME: No one uses the return value */
checkValidErrcode(int error_class,const char fcname[],int * errcode_p)463 static int checkValidErrcode(int error_class, const char fcname[], int *errcode_p)
464 {
465 int errcode = *errcode_p;
466 int rc = 0;
467
468 if (error_class > MPICH_ERR_LAST_MPIX) {
469 /* --BEGIN ERROR HANDLING-- */
470 if (errcode & ~ERROR_CLASS_MASK) {
471 MPL_error_printf
472 ("INTERNAL ERROR: Invalid error class (%d) encountered while returning from\n"
473 "%s. Please file a bug report.\n", error_class, fcname);
474 /* Note that we don't try to print the error stack; if the
475 * error code is invalid, it can't be used to find
476 * the error stack. We could consider dumping the
477 * contents of the error ring instead (without trying
478 * to interpret them) */
479 } else {
480 /* FIXME: The error stack comment only applies to MSG_ALL */
481 MPL_error_printf
482 ("INTERNAL ERROR: Invalid error class (%d) encountered while returning from\n"
483 "%s. Please file a bug report. No error stack is available.\n", error_class,
484 fcname);
485 }
486 /* FIXME: We probably want to set this to MPI_ERR_UNKNOWN
487 * and discard the rest of the bits */
488 errcode = (errcode & ~ERROR_CLASS_MASK) | MPI_ERR_UNKNOWN;
489 rc = 1;
490 /* --END ERROR HANDLING-- */
491 }
492 *errcode_p = errcode;
493 return rc;
494 }
495
496 /* Append an error code, error2, to the end of a list of messages in the error
497 ring whose head endcoded in error1_code. An error code pointing at the
498 combination is returned. If the list of messages does not terminate cleanly
499 (i.e. ring wrap has occurred), then the append is not performed. and error1
500 is returned (although it may include the class of error2 if the class of
501 error1 was MPI_ERR_OTHER). */
MPIR_Err_combine_codes(int error1,int error2)502 int MPIR_Err_combine_codes(int error1, int error2)
503 {
504 int error1_code = error1;
505 int error2_code = error2;
506 int error2_class;
507
508 /* If either error code is success, return the other */
509 if (error1_code == MPI_SUCCESS)
510 return error2_code;
511 if (error2_code == MPI_SUCCESS)
512 return error1_code;
513
514 /* If an error code is dynamic, return that. If both are, we choose
515 * error1. */
516 if (error1_code & ERROR_DYN_MASK)
517 return error1_code;
518 if (error2_code & ERROR_DYN_MASK)
519 return error2_code;
520
521 error2_class = MPIR_ERR_GET_CLASS(error2_code);
522 if (error2_class < MPI_SUCCESS || error2_class > MPICH_ERR_LAST_MPIX) {
523 error2_class = MPI_ERR_OTHER;
524 }
525
526 /* Note that this call may simply discard an error code if the error
527 * message level does not support multiple codes */
528 CombineSpecificCodes(error1_code, error2_code, error2_class);
529
530 if (MPIR_ERR_GET_CLASS(error1_code) == MPI_ERR_OTHER) {
531 error1_code = (error1_code & ~(ERROR_CLASS_MASK)) | error2_class;
532 }
533
534 return error1_code;
535 }
536
537 /* FIXME: This routine isn't quite right yet */
538 /*
539 * Notes:
540 * One complication is that in the instance-specific case, a ??
541 */
542 /*
543 * Given an errorcode, place the corresponding message in msg[length].
544 * The argument fn must be NULL and is otherwise ignored.
545 */
MPIR_Err_get_string(int errorcode,char * msg,int length,MPIR_Err_get_class_string_func_t fn)546 void MPIR_Err_get_string(int errorcode, char *msg, int length, MPIR_Err_get_class_string_func_t fn)
547 {
548 int error_class;
549 int len, num_remaining = length;
550
551 /* The fn (fourth) argument was added improperly and is no longer
552 * used. */
553 MPIR_Assert(fn == NULL);
554
555 /* There was code to set num_remaining to MPI_MAX_ERROR_STRING
556 * if it was zero. But based on the usage of this routine,
557 * such a choice would overwrite memory. (This was caught by
558 * reading the coverage reports and looking into why this
559 * code was (thankfully!) never executed.) */
560 /* if (num_remaining == 0)
561 * num_remaining = MPI_MAX_ERROR_STRING; */
562 if (num_remaining == 0)
563 goto fn_exit;
564
565 /* Convert the code to a string. The cases are:
566 * simple class. Find the corresponding string.
567 * <not done>
568 * if (user code) { go to code that extracts user error messages }
569 * else {
570 * is specific message code set and available? if so, use it
571 * else use generic code (lookup index in table of messages)
572 * }
573 */
574 if (errorcode & ERROR_DYN_MASK) {
575 /* This is a dynamically created error code (e.g., with
576 * MPI_Err_add_class). If a dynamic error code was created,
577 * the function to convert them into strings has been set.
578 * Check to see that it was; this is a safeguard against a
579 * bogus error code */
580 if (!MPIR_Process.errcode_to_string) {
581 /* FIXME: not internationalized */
582 /* --BEGIN ERROR HANDLING-- */
583 if (MPL_strncpy(msg, "Undefined dynamic error code", num_remaining)) {
584 msg[num_remaining - 1] = '\0';
585 }
586 /* --END ERROR HANDLING-- */
587 } else {
588 if (MPL_strncpy(msg, MPIR_Process.errcode_to_string(errorcode), num_remaining)) {
589 msg[num_remaining - 1] = '\0';
590 }
591 }
592 } else if ((errorcode & ERROR_CLASS_MASK) == errorcode) {
593 error_class = MPIR_ERR_GET_CLASS(errorcode);
594
595 if (MPL_strncpy(msg, get_class_msg(errorcode), num_remaining)) {
596 msg[num_remaining - 1] = '\0';
597 }
598 } else {
599 /* print the class message first */
600 /* FIXME: Why print the class message first? The instance
601 * message is supposed to be complete by itself. */
602 error_class = MPIR_ERR_GET_CLASS(errorcode);
603
604 MPL_strncpy(msg, get_class_msg(error_class), num_remaining);
605 msg[num_remaining - 1] = '\0';
606 len = (int) strlen(msg);
607 msg += len;
608 num_remaining -= len;
609
610 /* then print the stack or the last specific error message */
611
612 /* FIXME: Replace with function to add instance string or
613 * error code string */
614 #if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
615 if (ErrGetInstanceString(errorcode, msg, num_remaining))
616 goto fn_exit;
617 #elif MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS
618 {
619 int generic_idx;
620
621 generic_idx = ((errorcode & ERROR_GENERIC_MASK) >> ERROR_GENERIC_SHIFT) - 1;
622
623 if (generic_idx >= 0) {
624 MPL_snprintf(msg, num_remaining, ", %s", generic_err_msgs[generic_idx].long_name);
625 msg[num_remaining - 1] = '\0';
626 goto fn_exit;
627 }
628 }
629 #endif /* MSG_LEVEL >= MSG_ALL */
630 }
631
632 fn_exit:
633 return;
634 }
635
636 #if MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__NONE
637 /* No error message support */
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)638 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
639 int line, int error_class, const char generic_msg[],
640 const char specific_msg[], ...)
641 {
642 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
643 (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
644 fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
645 return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
646 }
647
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)648 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
649 int line, int error_class,
650 const char generic_msg[], const char specific_msg[], va_list Argp)
651 {
652 return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
653 }
654
655 /* Internal routines */
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)656 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
657 {
658 }
659
get_class_msg(int error_class)660 static const char *get_class_msg(int error_class)
661 {
662 /* FIXME: Not internationalized */
663 return "Error message texts are not available";
664 }
665
666 #elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__CLASS
667 /* Only class error messages. Note this is nearly the same as
668 MPICH_ERROR_MSG_LEVEL == NONE, since the handling of error codes
669 is the same */
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)670 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
671 int line, int error_class, const char generic_msg[],
672 const char specific_msg[], ...)
673 {
674 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
675 (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
676 fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
677 return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
678 }
679
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)680 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
681 int line, int error_class,
682 const char generic_msg[], const char specific_msg[], va_list Argp)
683 {
684 return (lastcode == MPI_SUCCESS) ? error_class : lastcode;
685 }
686
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)687 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
688 {
689 }
690
get_class_msg(int error_class)691 static const char *get_class_msg(int error_class)
692 {
693 if (error_class >= 0 && error_class < MPIR_MAX_ERROR_CLASS_INDEX) {
694 return classToMsg[error_class];
695 } else {
696 /* --BEGIN ERROR HANDLING-- */
697 return "Unknown error class";
698 /* --END ERROR HANDLING-- */
699 }
700 }
701
702 #elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__GENERIC
703 #define NEEDS_FIND_GENERIC_MSG_INDEX
704 static int FindGenericMsgIndex(const char[]);
705
706 /* Only generic error messages (more than class, but all predefined) */
707
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)708 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
709 int line, int error_class, const char generic_msg[],
710 const char specific_msg[], ...)
711 {
712 int rc;
713 va_list Argp;
714 va_start(Argp, specific_msg);
715 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
716 (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
717 fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
718 rc = MPIR_Err_create_code_valist(lastcode, fatal, fcname, line, error_class, generic_msg,
719 specific_msg, Argp);
720 va_end(Argp);
721 /* Looks like Coverity has a hard time understanding that logic that
722 * (error_class != MPI_SUCCESS => rc != MPI_SUCCESS), so adding an explicit assertion here. */
723 MPIR_Assert(error_class == MPI_SUCCESS || rc != MPI_SUCCESS);
724 return rc;
725 }
726
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)727 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
728 int line, int error_class,
729 const char generic_msg[], const char specific_msg[], va_list Argp)
730 {
731 int generic_idx;
732 int errcode = lastcode;
733 if (lastcode == MPI_SUCCESS) {
734 generic_idx = FindGenericMsgIndex(generic_msg);
735 if (generic_idx >= 0) {
736 errcode = (generic_idx << ERROR_GENERIC_SHIFT) | error_class;
737 if (fatal)
738 errcode |= ERROR_FATAL_MASK;
739 }
740 }
741 return errcode;
742 }
743
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)744 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
745 {
746 }
747
get_class_msg(int error_class)748 static const char *get_class_msg(int error_class)
749 {
750 if (error_class >= 0 && error_class < MPIR_MAX_ERROR_CLASS_INDEX) {
751 return generic_err_msgs[class_to_index[error_class]].long_name;
752 } else {
753 /* --BEGIN ERROR HANDLING-- */
754 return "Unknown error class";
755 /* --END ERROR HANDLING-- */
756 }
757 }
758
759 #elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__ALL
760 /* General error message support, including the error message stack */
761
762 static int checkErrcodeIsValid(int);
763 static const char *ErrcodeInvalidReasonStr(int);
764 #define NEEDS_FIND_GENERIC_MSG_INDEX
765 static int FindGenericMsgIndex(const char[]);
766 static int FindSpecificMsgIndex(const char[]);
767 static int vsnprintf_mpi(char *str, size_t maxlen, const char *fmt_orig, va_list list);
768 static void ErrcodeCreateID(int error_class, int generic_idx, const char *msg, int *id, int *seq);
769 static int convertErrcodeToIndexes(int errcode, int *ring_idx, int *ring_id, int *generic_idx);
770 static void MPIR_Err_print_stack_string(int errcode, char *str, int maxlen);
771
772 #define MAX_ERROR_RING ERROR_SPECIFIC_INDEX_SIZE
773 #define MAX_LOCATION_LEN 63
774
775 /* The maximum error string in this case may be a multi-line message,
776 constructed from multiple entries in the error message ring. The
777 individual ring messages should be shorter than MPI_MAX_ERROR_STRING,
778 perhaps as small as 256. We define a separate value for the error lines.
779 */
780 #define MPIR_MAX_ERROR_LINE 256
781
782 /* See the description above for the fields in this structure */
783 typedef struct MPIR_Err_msg {
784 int id;
785 int prev_error;
786 int use_user_error_code;
787 int user_error_code;
788
789 char location[MAX_LOCATION_LEN + 1];
790 char msg[MPIR_MAX_ERROR_LINE + 1];
791 } MPIR_Err_msg_t;
792
793 static MPIR_Err_msg_t ErrorRing[MAX_ERROR_RING];
794 static volatile unsigned int error_ring_loc = 0;
795 static volatile unsigned int max_error_ring_loc = 0;
796
797 /* FIXME: This needs to be made consistent with the different thread levels,
798 since in the "global" thread level, an extra thread mutex is not required. */
799 #if defined(MPID_REQUIRES_THREAD_SAFETY)
800 /* if the device requires internal MPICH routines to be thread safe, the
801 MPID_THREAD_CHECK macros are not appropriate */
802 static MPID_Thread_mutex_t error_ring_mutex;
803 #define error_ring_mutex_create(_mpi_errno_p_) \
804 MPID_Thread_mutex_create(&error_ring_mutex, _mpi_errno_p_)
805 #define error_ring_mutex_destroy(_mpi_errno_p) \
806 MPID_Thread_mutex_destroy(&error_ring_mutex, _mpi_errno_p_)
807 #define error_ring_mutex_lock() \
808 do { \
809 int err; \
810 MPID_Thread_mutex_lock(&error_ring_mutex, &err); \
811 } while (0)
812 #define error_ring_mutex_unlock() \
813 do { \
814 int err; \
815 MPID_Thread_mutex_unlock(&error_ring_mutex, &err); \
816 } while (0)
817 #elif defined(MPICH_IS_THREADED)
818 static MPID_Thread_mutex_t error_ring_mutex;
819 #define error_ring_mutex_create(_mpi_errno_p) MPID_Thread_mutex_create(&error_ring_mutex,_mpi_errno_p)
820 #define error_ring_mutex_destroy(_mpi_errno_p) MPID_Thread_mutex_destroy(&error_ring_mutex,_mpi_errno_p)
821 #define error_ring_mutex_lock() \
822 do { \
823 int err; \
824 if (did_err_init) { \
825 MPIR_THREAD_CHECK_BEGIN; \
826 MPID_Thread_mutex_lock(&error_ring_mutex,&err); \
827 MPIR_THREAD_CHECK_END; \
828 } \
829 } while (0)
830 #define error_ring_mutex_unlock() \
831 do { \
832 int err; \
833 if (did_err_init) { \
834 MPIR_THREAD_CHECK_BEGIN; \
835 MPID_Thread_mutex_unlock(&error_ring_mutex,&err); \
836 MPIR_THREAD_CHECK_END; \
837 } \
838 } while (0)
839 #else
840 #define error_ring_mutex_create(_a)
841 #define error_ring_mutex_destroy(_a)
842 #define error_ring_mutex_lock()
843 #define error_ring_mutex_unlock()
844 #endif /* REQUIRES_THREAD_SAFETY */
845
846
MPIR_Err_create_code(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],...)847 int MPIR_Err_create_code(int lastcode, int fatal, const char fcname[],
848 int line, int error_class, const char generic_msg[],
849 const char specific_msg[], ...)
850 {
851 int rc;
852 va_list Argp;
853 va_start(Argp, specific_msg);
854 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, TYPICAL,
855 (MPL_DBG_FDEST, "%sError created: last=%#010x class=%#010x %s(%d) %s",
856 fatal ? "Fatal " : "", lastcode, error_class, fcname, line, generic_msg));
857 rc = MPIR_Err_create_code_valist(lastcode, fatal, fcname, line, error_class, generic_msg,
858 specific_msg, Argp);
859 va_end(Argp);
860 return rc;
861 }
862
863 /*
864 * This is the real routine for generating an error code. It takes
865 * a va_list so that it can be called by any routine that accepts a
866 * variable number of arguments.
867 */
MPIR_Err_create_code_valist(int lastcode,int fatal,const char fcname[],int line,int error_class,const char generic_msg[],const char specific_msg[],va_list Argp)868 int MPIR_Err_create_code_valist(int lastcode, int fatal, const char fcname[],
869 int line, int error_class,
870 const char generic_msg[], const char specific_msg[], va_list Argp)
871 {
872 int err_code;
873 int generic_idx;
874 int use_user_error_code = 0;
875 int user_error_code = -1;
876 char user_ring_msg[MPIR_MAX_ERROR_LINE + 1];
877
878 /* Create the code from the class and the message ring index */
879
880 /* Check that lastcode is valid */
881 if (lastcode != MPI_SUCCESS) {
882 int reason;
883 reason = checkErrcodeIsValid(lastcode);
884 if (reason) {
885 /* --BEGIN ERROR HANDLING-- */
886 MPL_error_printf("INTERNAL ERROR: invalid error code %x (%s) in %s:%d\n",
887 lastcode, ErrcodeInvalidReasonStr(reason), fcname, line);
888 lastcode = MPI_SUCCESS;
889 /* --END ERROR HANDLING-- */
890 }
891 }
892
893 /* FIXME: ERR_OTHER is overloaded; this may mean "OTHER" or it may
894 * mean "No additional error, just routine stack info" */
895 if (error_class == MPI_ERR_OTHER) {
896 if (MPIR_ERR_GET_CLASS(lastcode) > MPI_SUCCESS &&
897 MPIR_ERR_GET_CLASS(lastcode) <= MPICH_ERR_LAST_MPIX) {
898 /* If the last class is more specific (and is valid), then pass it
899 * through */
900 error_class = MPIR_ERR_GET_CLASS(lastcode);
901 } else {
902 error_class = MPI_ERR_OTHER;
903 }
904 }
905
906 /* Handle special case of MPI_ERR_IN_STATUS. According to the standard,
907 * the code must be equal to the class. See section 3.7.5.
908 * Information on the particular error is in the MPI_ERROR field
909 * of the status. */
910 if (error_class == MPI_ERR_IN_STATUS) {
911 return MPI_ERR_IN_STATUS;
912 }
913
914 err_code = error_class;
915
916 /* Handle the generic message. This selects a subclass, based on a text
917 * string */
918 generic_idx = FindGenericMsgIndex(generic_msg);
919 if (generic_idx >= 0) {
920 if (strcmp(generic_err_msgs[generic_idx].short_name, "**user") == 0) {
921 use_user_error_code = 1;
922 /* This is a special case. The format is
923 * "**user", "**userxxx %d", intval
924 * (generic, specific, parameter). In this
925 * case we must ... save the user value because
926 * we store it explicitly in the ring.
927 * We do this here because we cannot both access the
928 * user error code and pass the argp to vsnprintf_mpi . */
929 if (specific_msg) {
930 const char *specific_fmt;
931 int specific_idx;
932 user_error_code = va_arg(Argp, int);
933 specific_idx = FindSpecificMsgIndex(specific_msg);
934 if (specific_idx >= 0) {
935 specific_fmt = specific_err_msgs[specific_idx].long_name;
936 } else {
937 specific_fmt = specific_msg;
938 }
939 MPL_snprintf(user_ring_msg, sizeof(user_ring_msg), specific_fmt, user_error_code);
940 } else {
941 user_ring_msg[0] = 0;
942 }
943 }
944 err_code |= (generic_idx + 1) << ERROR_GENERIC_SHIFT;
945 } else {
946 /* TODO: lookup index for class error message */
947 err_code &= ~ERROR_GENERIC_MASK;
948
949 #ifdef MPICH_DBG_OUTPUT
950 {
951 if (generic_msg[0] == '*' && generic_msg[1] == '*') {
952 MPL_error_printf("INTERNAL ERROR: Could not find %s in list of messages\n",
953 generic_msg);
954 }
955 }
956 #endif /* DBG_OUTPUT */
957 }
958
959 /* Handle the instance-specific part of the error message */
960 {
961 int specific_idx;
962 const char *specific_fmt = 0;
963 int ring_idx, ring_seq = 0;
964 char *ring_msg;
965
966 error_ring_mutex_lock();
967 {
968 /* Get the next entry in the ring; keep track of what part of the
969 * ring is in use (max_error_ring_loc) */
970 ring_idx = error_ring_loc++;
971 if (error_ring_loc >= MAX_ERROR_RING)
972 error_ring_loc %= MAX_ERROR_RING;
973 if (error_ring_loc > max_error_ring_loc)
974 max_error_ring_loc = error_ring_loc;
975
976 ring_msg = ErrorRing[ring_idx].msg;
977
978 if (specific_msg != NULL) {
979 specific_idx = FindSpecificMsgIndex(specific_msg);
980 if (specific_idx >= 0) {
981 specific_fmt = specific_err_msgs[specific_idx].long_name;
982 } else {
983 specific_fmt = specific_msg;
984 }
985 /* See the code above for handling user errors */
986 if (!use_user_error_code) {
987 vsnprintf_mpi(ring_msg, MPIR_MAX_ERROR_LINE, specific_fmt, Argp);
988 } else {
989 MPL_strncpy(ring_msg, user_ring_msg, MPIR_MAX_ERROR_LINE);
990 }
991 } else if (generic_idx >= 0) {
992 MPL_strncpy(ring_msg, generic_err_msgs[generic_idx].long_name, MPIR_MAX_ERROR_LINE);
993 } else {
994 MPL_strncpy(ring_msg, generic_msg, MPIR_MAX_ERROR_LINE);
995 }
996
997 ring_msg[MPIR_MAX_ERROR_LINE] = '\0';
998
999 /* Get the ring sequence number and set the ring id */
1000 ErrcodeCreateID(error_class, generic_idx, ring_msg, &ErrorRing[ring_idx].id, &ring_seq);
1001 /* Set the previous code. */
1002 ErrorRing[ring_idx].prev_error = lastcode;
1003
1004 /* */
1005 if (use_user_error_code) {
1006 ErrorRing[ring_idx].use_user_error_code = 1;
1007 ErrorRing[ring_idx].user_error_code = user_error_code;
1008 } else if (lastcode != MPI_SUCCESS) {
1009 int last_ring_idx;
1010 int last_ring_id;
1011 int last_generic_idx;
1012
1013 if (convertErrcodeToIndexes(lastcode, &last_ring_idx,
1014 &last_ring_id, &last_generic_idx) != 0) {
1015 /* --BEGIN ERROR HANDLING-- */
1016 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1017 lastcode, last_ring_idx);
1018 /* --END ERROR HANDLING-- */
1019 } else {
1020 if (last_generic_idx >= 0 && ErrorRing[last_ring_idx].id == last_ring_id) {
1021 if (ErrorRing[last_ring_idx].use_user_error_code) {
1022 ErrorRing[ring_idx].use_user_error_code = 1;
1023 ErrorRing[ring_idx].user_error_code =
1024 ErrorRing[last_ring_idx].user_error_code;
1025 }
1026 }
1027 }
1028 }
1029
1030 if (fcname != NULL) {
1031 MPL_snprintf(ErrorRing[ring_idx].location, MAX_LOCATION_LEN, "%s(%d)", fcname,
1032 line);
1033 ErrorRing[ring_idx].location[MAX_LOCATION_LEN] = '\0';
1034 } else {
1035 ErrorRing[ring_idx].location[0] = '\0';
1036 }
1037 {
1038 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1039 (MPL_DBG_FDEST, "New ErrorRing[%d]", ring_idx));
1040 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1041 (MPL_DBG_FDEST, " id = %#010x", ErrorRing[ring_idx].id));
1042 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1043 (MPL_DBG_FDEST, " prev_error = %#010x",
1044 ErrorRing[ring_idx].prev_error));
1045 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1046 (MPL_DBG_FDEST, " user=%d",
1047 ErrorRing[ring_idx].use_user_error_code));
1048 }
1049 }
1050 error_ring_mutex_unlock();
1051
1052 err_code |= ring_idx << ERROR_SPECIFIC_INDEX_SHIFT;
1053 err_code |= ring_seq << ERROR_SPECIFIC_SEQ_SHIFT;
1054
1055 }
1056
1057 if (fatal || MPIR_Err_is_fatal(lastcode)) {
1058 err_code |= ERROR_FATAL_MASK;
1059 }
1060
1061 return err_code;
1062 }
1063
1064 /* FIXME: Shouldn't str be const char * ? - no, but you don't know that without
1065 some documentation */
MPIR_Err_print_stack_string(int errcode,char * str,int maxlen)1066 static void MPIR_Err_print_stack_string(int errcode, char *str, int maxlen)
1067 {
1068 char *str_orig = str;
1069 int len;
1070
1071 error_ring_mutex_lock();
1072 {
1073 /* Find the longest fcname in the stack */
1074 int max_location_len = 0;
1075 int tmp_errcode = errcode;
1076 while (tmp_errcode != MPI_SUCCESS) {
1077 int ring_idx;
1078 int ring_id;
1079 int generic_idx;
1080
1081 if (convertErrcodeToIndexes(tmp_errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1082 /* --BEGIN ERROR HANDLING-- */
1083 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1084 errcode, ring_idx);
1085 break;
1086 /* --END ERROR HANDLING-- */
1087 }
1088
1089 if (generic_idx < 0) {
1090 break;
1091 }
1092
1093 if (ErrorRing[ring_idx].id == ring_id) {
1094 len = (int) strlen(ErrorRing[ring_idx].location);
1095 max_location_len = MPL_MAX(max_location_len, len);
1096 tmp_errcode = ErrorRing[ring_idx].prev_error;
1097 } else {
1098 break;
1099 }
1100 }
1101 max_location_len += 2; /* add space for the ": " */
1102 /* print the error stack */
1103 while (errcode != MPI_SUCCESS) {
1104 int ring_idx;
1105 int ring_id;
1106 int generic_idx;
1107 int i;
1108 char *cur_pos;
1109
1110 if (convertErrcodeToIndexes(errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1111 /* --BEGIN ERROR HANDLING-- */
1112 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1113 errcode, ring_idx);
1114 /* --END ERROR HANDLING-- */
1115 }
1116
1117 if (generic_idx < 0) {
1118 break;
1119 }
1120
1121 if (ErrorRing[ring_idx].id == ring_id) {
1122 int nchrs;
1123 MPL_snprintf(str, maxlen, "%s", ErrorRing[ring_idx].location);
1124 len = (int) strlen(str);
1125 maxlen -= len;
1126 str += len;
1127 nchrs = max_location_len - (int) strlen(ErrorRing[ring_idx].location) - 2;
1128 while (nchrs > 0 && maxlen > 0) {
1129 *str++ = '.';
1130 nchrs--;
1131 maxlen--;
1132 }
1133 if (maxlen > 0) {
1134 *str++ = ':';
1135 maxlen--;
1136 }
1137 if (maxlen > 0) {
1138 *str++ = ' ';
1139 maxlen--;
1140 }
1141
1142 if (MPIR_CVAR_CHOP_ERROR_STACK > 0) {
1143 cur_pos = ErrorRing[ring_idx].msg;
1144 len = (int) strlen(cur_pos);
1145 if (len == 0 && maxlen > 0) {
1146 *str++ = '\n';
1147 maxlen--;
1148 }
1149 while (len) {
1150 if (len >= MPIR_CVAR_CHOP_ERROR_STACK - max_location_len) {
1151 if (len > maxlen)
1152 break;
1153 /* FIXME: Don't use Snprint to append a string ! */
1154 MPL_snprintf(str, MPIR_CVAR_CHOP_ERROR_STACK - 1 - max_location_len,
1155 "%s", cur_pos);
1156 str[MPIR_CVAR_CHOP_ERROR_STACK - 1 - max_location_len] = '\n';
1157 cur_pos += MPIR_CVAR_CHOP_ERROR_STACK - 1 - max_location_len;
1158 str += MPIR_CVAR_CHOP_ERROR_STACK - max_location_len;
1159 maxlen -= MPIR_CVAR_CHOP_ERROR_STACK - max_location_len;
1160 if (maxlen < max_location_len)
1161 break;
1162 for (i = 0; i < max_location_len; i++) {
1163 MPL_snprintf(str, maxlen, " ");
1164 maxlen--;
1165 str++;
1166 }
1167 len = (int) strlen(cur_pos);
1168 } else {
1169 MPL_snprintf(str, maxlen, "%s\n", cur_pos);
1170 len = (int) strlen(str);
1171 maxlen -= len;
1172 str += len;
1173 len = 0;
1174 }
1175 }
1176 } else {
1177 MPL_snprintf(str, maxlen, "%s\n", ErrorRing[ring_idx].msg);
1178 len = (int) strlen(str);
1179 maxlen -= len;
1180 str += len;
1181 }
1182 errcode = ErrorRing[ring_idx].prev_error;
1183 } else {
1184 break;
1185 }
1186 }
1187 }
1188 error_ring_mutex_unlock();
1189
1190 if (errcode == MPI_SUCCESS) {
1191 goto fn_exit;
1192 }
1193
1194 /* FIXME: The following code is broken as described above (if the errcode
1195 * is not valid, then this code is just going to cause more problems) */
1196 {
1197 int generic_idx;
1198
1199 generic_idx = ((errcode & ERROR_GENERIC_MASK) >> ERROR_GENERIC_SHIFT) - 1;
1200
1201 if (generic_idx >= 0) {
1202 const char *p;
1203 /* FIXME: (Here and elsewhere) Make sure any string is
1204 * non-null before you use it */
1205 p = generic_err_msgs[generic_idx].long_name;
1206 if (!p) {
1207 p = "<NULL>";
1208 }
1209 MPL_snprintf(str, maxlen, "(unknown)(): %s\n", p);
1210 len = (int) strlen(str);
1211 maxlen -= len;
1212 str += len;
1213 goto fn_exit;
1214 }
1215 }
1216
1217 {
1218 int error_class;
1219
1220 error_class = ERROR_GET_CLASS(errcode);
1221
1222 if (error_class <= MPICH_ERR_LAST_MPIX) {
1223 MPL_snprintf(str, maxlen, "(unknown)(): %s\n", get_class_msg(ERROR_GET_CLASS(errcode)));
1224 len = (int) strlen(str);
1225 maxlen -= len;
1226 str += len;
1227 } else {
1228 /* FIXME: Not internationalized */
1229 MPL_snprintf(str, maxlen, "Error code contains an invalid class (%d)\n", error_class);
1230 len = (int) strlen(str);
1231 maxlen -= len;
1232 str += len;
1233 }
1234 }
1235
1236 fn_exit:
1237 if (str_orig != str) {
1238 str--;
1239 *str = '\0';
1240 }
1241 return;
1242 }
1243
1244
1245 /* Internal Routines */
1246
get_class_msg(int error_class)1247 static const char *get_class_msg(int error_class)
1248 {
1249 if (error_class >= 0 && error_class < MPIR_MAX_ERROR_CLASS_INDEX) {
1250 return generic_err_msgs[class_to_index[error_class]].long_name;
1251 } else {
1252 /* --BEGIN ERROR HANDLING-- */
1253 return "Unknown error class";
1254 /* --END ERROR HANDLING-- */
1255 }
1256 }
1257
1258 /*
1259 * Given a message string abbreviation (e.g., one that starts "**"), return
1260 * the corresponding index. For the specific
1261 * (parameterized messages), use idx = FindSpecificMsgIndex("**msg");
1262 * Note: Identical to FindGeneric, but with a different array. Should
1263 * use a single routine.
1264 */
FindSpecificMsgIndex(const char msg[])1265 static int FindSpecificMsgIndex(const char msg[])
1266 {
1267 int i, c;
1268 for (i = 0; i < specific_msgs_len; i++) {
1269 /* Check the sentinals to insure that the values are ok first */
1270 if (specific_err_msgs[i].sentinal1 != 0xacebad03 ||
1271 specific_err_msgs[i].sentinal2 != 0xcb0bfa11) {
1272 /* Something bad has happened! Don't risk trying the
1273 * short_name pointer; it may have been corrupted */
1274 break;
1275 }
1276 c = strcmp(specific_err_msgs[i].short_name, msg);
1277 if (c == 0)
1278 return i;
1279 if (c > 0) {
1280 /* don't return here if the string partially matches */
1281 if (strncmp(specific_err_msgs[i].short_name, msg, strlen(msg)) != 0)
1282 return -1;
1283 }
1284 }
1285 return -1;
1286 }
1287
1288 /* See FindGenericMsgIndex comments for a more efficient search routine that
1289 could be used here as well. */
1290
1291 /* Support for the instance-specific messages */
1292 /* ------------------------------------------------------------------------- */
1293 /* Routines to convert instance-specific messages into a string */
1294 /* This is the only case that supports instance-specific messages */
1295 /* ------------------------------------------------------------------------- */
1296 /* ------------------------------------------------------------------------ */
1297 /* This block of code is used to convert various MPI values into descriptive*/
1298 /* strings. The routines are */
1299 /* GetAssertString - handle MPI_MODE_xxx (RMA asserts) */
1300 /* GetDTypeString - handle MPI_Datatypes */
1301 /* GetMPIOpString - handle MPI_Op */
1302 /* These routines are used in vsnprintf_mpi */
1303 /* FIXME: These functions are not thread safe */
1304 /* ------------------------------------------------------------------------ */
1305 #define ASSERT_STR_MAXLEN 256
1306
GetAssertString(int d)1307 static const char *GetAssertString(int d)
1308 {
1309 static char str[ASSERT_STR_MAXLEN] = "";
1310 char *cur;
1311 size_t len = ASSERT_STR_MAXLEN;
1312 size_t n;
1313
1314 if (d == 0) {
1315 MPL_strncpy(str, "assert=0", ASSERT_STR_MAXLEN);
1316 return str;
1317 }
1318 cur = str;
1319 if (d & MPI_MODE_NOSTORE) {
1320 MPL_strncpy(cur, "MPI_MODE_NOSTORE", len);
1321 n = strlen(cur);
1322 cur += n;
1323 len -= n;
1324 d ^= MPI_MODE_NOSTORE;
1325 }
1326 if (d & MPI_MODE_NOCHECK) {
1327 if (len < ASSERT_STR_MAXLEN)
1328 MPL_strncpy(cur, " | MPI_MODE_NOCHECK", len);
1329 else
1330 MPL_strncpy(cur, "MPI_MODE_NOCHECK", len);
1331 n = strlen(cur);
1332 cur += n;
1333 len -= n;
1334 d ^= MPI_MODE_NOCHECK;
1335 }
1336 if (d & MPI_MODE_NOPUT) {
1337 if (len < ASSERT_STR_MAXLEN)
1338 MPL_strncpy(cur, " | MPI_MODE_NOPUT", len);
1339 else
1340 MPL_strncpy(cur, "MPI_MODE_NOPUT", len);
1341 n = strlen(cur);
1342 cur += n;
1343 len -= n;
1344 d ^= MPI_MODE_NOPUT;
1345 }
1346 if (d & MPI_MODE_NOPRECEDE) {
1347 if (len < ASSERT_STR_MAXLEN)
1348 MPL_strncpy(cur, " | MPI_MODE_NOPRECEDE", len);
1349 else
1350 MPL_strncpy(cur, "MPI_MODE_NOPRECEDE", len);
1351 n = strlen(cur);
1352 cur += n;
1353 len -= n;
1354 d ^= MPI_MODE_NOPRECEDE;
1355 }
1356 if (d & MPI_MODE_NOSUCCEED) {
1357 if (len < ASSERT_STR_MAXLEN)
1358 MPL_strncpy(cur, " | MPI_MODE_NOSUCCEED", len);
1359 else
1360 MPL_strncpy(cur, "MPI_MODE_NOSUCCEED", len);
1361 n = strlen(cur);
1362 cur += n;
1363 len -= n;
1364 d ^= MPI_MODE_NOSUCCEED;
1365 }
1366 if (d) {
1367 if (len < ASSERT_STR_MAXLEN)
1368 MPL_snprintf(cur, len, " | 0x%x", d);
1369 else
1370 MPL_snprintf(cur, len, "assert=0x%x", d);
1371 }
1372 return str;
1373 }
1374
GetDTypeString(MPI_Datatype d)1375 static const char *GetDTypeString(MPI_Datatype d)
1376 {
1377 static char default_str[64];
1378 int num_integers, num_addresses, num_datatypes, combiner = 0;
1379 char *str;
1380
1381 if (HANDLE_GET_MPI_KIND(d) != MPIR_DATATYPE ||
1382 (HANDLE_GET_KIND(d) == HANDLE_KIND_INVALID && d != MPI_DATATYPE_NULL))
1383 return "INVALID DATATYPE";
1384
1385
1386 if (d == MPI_DATATYPE_NULL)
1387 return "MPI_DATATYPE_NULL";
1388
1389 if (d == 0) {
1390 MPL_strncpy(default_str, "dtype=0x0", sizeof(default_str));
1391 return default_str;
1392 }
1393
1394 MPIR_Type_get_envelope(d, &num_integers, &num_addresses, &num_datatypes, &combiner);
1395 if (combiner == MPI_COMBINER_NAMED) {
1396 str = MPIR_Datatype_builtin_to_string(d);
1397 if (str == NULL) {
1398 MPL_snprintf(default_str, sizeof(default_str), "dtype=0x%08x", d);
1399 return default_str;
1400 }
1401 return str;
1402 }
1403
1404 /* default is not thread safe */
1405 str = MPIR_Datatype_combiner_to_string(combiner);
1406 if (str == NULL) {
1407 MPL_snprintf(default_str, sizeof(default_str), "dtype=USER<0x%08x>", d);
1408 return default_str;
1409 }
1410 MPL_snprintf(default_str, sizeof(default_str), "dtype=USER<%s>", str);
1411 return default_str;
1412 }
1413
GetMPIOpString(MPI_Op o)1414 static const char *GetMPIOpString(MPI_Op o)
1415 {
1416 static char default_str[64];
1417
1418 switch (o) {
1419 case MPI_OP_NULL:
1420 return "MPI_OP_NULL";
1421 case MPI_MAX:
1422 return "MPI_MAX";
1423 case MPI_MIN:
1424 return "MPI_MIN";
1425 case MPI_SUM:
1426 return "MPI_SUM";
1427 case MPI_PROD:
1428 return "MPI_PROD";
1429 case MPI_LAND:
1430 return "MPI_LAND";
1431 case MPI_BAND:
1432 return "MPI_BAND";
1433 case MPI_LOR:
1434 return "MPI_LOR";
1435 case MPI_BOR:
1436 return "MPI_BOR";
1437 case MPI_LXOR:
1438 return "MPI_LXOR";
1439 case MPI_BXOR:
1440 return "MPI_BXOR";
1441 case MPI_MINLOC:
1442 return "MPI_MINLOC";
1443 case MPI_MAXLOC:
1444 return "MPI_MAXLOC";
1445 case MPI_REPLACE:
1446 return "MPI_REPLACE";
1447 case MPI_NO_OP:
1448 return "MPI_NO_OP";
1449 }
1450 /* FIXME: default is not thread safe */
1451 MPL_snprintf(default_str, sizeof(default_str), "op=0x%x", o);
1452 return default_str;
1453 }
1454
1455 /* ------------------------------------------------------------------------ */
1456 /* This routine takes an instance-specific string with format specifiers */
1457 /* This routine makes use of the above routines, along with some inlined */
1458 /* code, to process the format specifiers for the MPI objects */
1459 /* The current set of format specifiers is undocumented except for their
1460 use in this routine. In addition, these choices do not permit the
1461 use of GNU extensions to check the validity of these arguments.
1462 At some point, a documented set that can exploit those GNU extensions
1463 will replace these. */
1464 /* ------------------------------------------------------------------------ */
1465
vsnprintf_mpi(char * str,size_t maxlen,const char * fmt_orig,va_list list)1466 static int vsnprintf_mpi(char *str, size_t maxlen, const char *fmt_orig, va_list list)
1467 {
1468 char *begin, *end, *fmt;
1469 size_t len;
1470 MPI_Comm C;
1471 MPI_Info info;
1472 MPI_Datatype D;
1473 MPI_Win W;
1474 MPI_Group G;
1475 MPI_Op O;
1476 MPI_Request R;
1477 MPI_Errhandler E;
1478 char *s;
1479 int t, i, d, mpi_errno = MPI_SUCCESS;
1480 long long ll;
1481 MPI_Count c;
1482 void *p;
1483
1484 fmt = MPL_strdup(fmt_orig);
1485 if (fmt == NULL) {
1486 if (maxlen > 0 && str != NULL)
1487 *str = '\0';
1488 return 0;
1489 }
1490
1491 begin = fmt;
1492 end = strchr(fmt, '%');
1493 while (end) {
1494 len = maxlen;
1495 if (len > (size_t) (end - begin)) {
1496 len = (size_t) (end - begin);
1497 }
1498 if (len) {
1499 MPIR_Memcpy(str, begin, len);
1500 str += len;
1501 maxlen -= len;
1502 }
1503 end++;
1504 begin = end + 1;
1505 switch ((int) (*end)) {
1506 case (int) 's':
1507 s = va_arg(list, char *);
1508 if (s)
1509 MPL_strncpy(str, s, maxlen);
1510 else {
1511 MPL_strncpy(str, "<NULL>", maxlen);
1512 }
1513 break;
1514 case (int) 'd':
1515 d = va_arg(list, int);
1516 MPL_snprintf(str, maxlen, "%d", d);
1517 break;
1518 case (int) 'L':
1519 ll = va_arg(list, long long);
1520 MPL_snprintf(str, maxlen, "%lld", ll);
1521 break;
1522 case (int) 'x':
1523 d = va_arg(list, int);
1524 MPL_snprintf(str, maxlen, "%x", d);
1525 break;
1526 case (int) 'X':
1527 ll = va_arg(list, long long);
1528 MPL_snprintf(str, maxlen, "%llx", ll);
1529 break;
1530 case (int) 'i':
1531 i = va_arg(list, int);
1532 switch (i) {
1533 case MPI_ANY_SOURCE:
1534 MPL_strncpy(str, "MPI_ANY_SOURCE", maxlen);
1535 break;
1536 case MPI_PROC_NULL:
1537 MPL_strncpy(str, "MPI_PROC_NULL", maxlen);
1538 break;
1539 case MPI_ROOT:
1540 MPL_strncpy(str, "MPI_ROOT", maxlen);
1541 break;
1542 default:
1543 MPL_snprintf(str, maxlen, "%d", i);
1544 break;
1545 }
1546 break;
1547 case (int) 't':
1548 t = va_arg(list, int);
1549 switch (t) {
1550 case MPI_ANY_TAG:
1551 MPL_strncpy(str, "MPI_ANY_TAG", maxlen);
1552 break;
1553 default:
1554 /* Note that MPI_UNDEFINED is not a valid tag value,
1555 * though there is one example in the MPI-3.0 standard
1556 * that sets status.MPI_TAG to MPI_UNDEFINED in a
1557 * generalized request example. */
1558 MPL_snprintf(str, maxlen, "%d", t);
1559 break;
1560 }
1561 break;
1562 case (int) 'p':
1563 p = va_arg(list, void *);
1564 /* FIXME: A check for MPI_IN_PLACE should only be used
1565 * where that is valid */
1566 if (p == MPI_IN_PLACE) {
1567 MPL_strncpy(str, "MPI_IN_PLACE", maxlen);
1568 } else {
1569 /* FIXME: We may want to use 0x%p for systems that
1570 * (including Windows) that don't prefix %p with 0x.
1571 * This must be done with a capability, not a test on
1572 * particular OS or header files */
1573 MPL_snprintf(str, maxlen, "%p", p);
1574 }
1575 break;
1576 case (int) 'C':
1577 C = va_arg(list, MPI_Comm);
1578 switch (C) {
1579 case MPI_COMM_WORLD:
1580 MPL_strncpy(str, "MPI_COMM_WORLD", maxlen);
1581 break;
1582 case MPI_COMM_SELF:
1583 MPL_strncpy(str, "MPI_COMM_SELF", maxlen);
1584 break;
1585 case MPI_COMM_NULL:
1586 MPL_strncpy(str, "MPI_COMM_NULL", maxlen);
1587 break;
1588 default:
1589 MPL_snprintf(str, maxlen, "comm=0x%x", C);
1590 break;
1591 }
1592 break;
1593 case (int) 'I':
1594 info = va_arg(list, MPI_Info);
1595 if (info == MPI_INFO_NULL) {
1596 MPL_strncpy(str, "MPI_INFO_NULL", maxlen);
1597 } else {
1598 MPL_snprintf(str, maxlen, "info=0x%x", info);
1599 }
1600 break;
1601 case (int) 'D':
1602 D = va_arg(list, MPI_Datatype);
1603 MPL_snprintf(str, maxlen, "%s", GetDTypeString(D));
1604 break;
1605 /* Include support for %F only if MPI-IO is enabled */
1606 #ifdef MPI_MODE_RDWR
1607 case (int) 'F':
1608 {
1609 MPI_File F;
1610 F = va_arg(list, MPI_File);
1611 if (F == MPI_FILE_NULL) {
1612 MPL_strncpy(str, "MPI_FILE_NULL", maxlen);
1613 } else {
1614 MPL_snprintf(str, maxlen, "file=0x%lx", (unsigned long) F);
1615 }
1616 }
1617 break;
1618 #endif /* MODE_RDWR */
1619 case (int) 'W':
1620 W = va_arg(list, MPI_Win);
1621 if (W == MPI_WIN_NULL) {
1622 MPL_strncpy(str, "MPI_WIN_NULL", maxlen);
1623 } else {
1624 MPL_snprintf(str, maxlen, "win=0x%x", W);
1625 }
1626 break;
1627 case (int) 'A':
1628 d = va_arg(list, int);
1629 MPL_snprintf(str, maxlen, "%s", GetAssertString(d));
1630 break;
1631 case (int) 'G':
1632 G = va_arg(list, MPI_Group);
1633 if (G == MPI_GROUP_NULL) {
1634 MPL_strncpy(str, "MPI_GROUP_NULL", maxlen);
1635 } else {
1636 MPL_snprintf(str, maxlen, "group=0x%x", G);
1637 }
1638 break;
1639 case (int) 'O':
1640 O = va_arg(list, MPI_Op);
1641 MPL_snprintf(str, maxlen, "%s", GetMPIOpString(O));
1642 break;
1643 case (int) 'R':
1644 R = va_arg(list, MPI_Request);
1645 if (R == MPI_REQUEST_NULL) {
1646 MPL_strncpy(str, "MPI_REQUEST_NULL", maxlen);
1647 } else {
1648 MPL_snprintf(str, maxlen, "req=0x%x", R);
1649 }
1650 break;
1651 case (int) 'E':
1652 E = va_arg(list, MPI_Errhandler);
1653 if (E == MPI_ERRHANDLER_NULL) {
1654 MPL_strncpy(str, "MPI_ERRHANDLER_NULL", maxlen);
1655 } else {
1656 MPL_snprintf(str, maxlen, "errh=0x%x", E);
1657 }
1658 break;
1659 case (int) 'c':
1660 c = va_arg(list, MPI_Count);
1661 MPIR_Assert(sizeof(long long) >= sizeof(MPI_Count));
1662 MPL_snprintf(str, maxlen, "%lld", (long long) c);
1663 break;
1664 default:
1665 /* Error: unhandled output type */
1666 MPL_free(fmt);
1667 return 0;
1668 /*
1669 * if (maxlen > 0 && str != NULL)
1670 * *str = '\0';
1671 * break;
1672 */
1673 }
1674 len = strlen(str);
1675 maxlen -= len;
1676 str += len;
1677 end = strchr(begin, '%');
1678 }
1679 if (*begin != '\0') {
1680 MPL_strncpy(str, begin, maxlen);
1681 }
1682 /* Free the dup'ed format string */
1683 MPL_free(fmt);
1684
1685 return mpi_errno;
1686 }
1687
1688 /* ------------------------------------------------------------------------- */
1689 /* Manage the error reporting stack */
1690 /* ------------------------------------------------------------------------- */
1691
1692 /*
1693 * Support for multiple messages, including the error message ring.
1694 * In principle, the error message ring could use used to provide
1695 * support for multiple error classes or codes, without providing
1696 * instance-specific support. However, for now, we combine the two
1697 * capabilities.
1698 */
1699
1700
MPIR_Err_stack_init(void)1701 static void MPIR_Err_stack_init(void)
1702 {
1703 int mpi_errno = MPI_SUCCESS;
1704
1705 error_ring_mutex_create(&mpi_errno);
1706
1707 if (MPIR_CVAR_CHOP_ERROR_STACK < 0) {
1708 MPIR_CVAR_CHOP_ERROR_STACK = 80;
1709 #ifdef HAVE_WINDOWS_H
1710 {
1711 /* If windows, set the default width to the window size */
1712 HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
1713 if (hConsole != INVALID_HANDLE_VALUE) {
1714 CONSOLE_SCREEN_BUFFER_INFO info;
1715 if (GetConsoleScreenBufferInfo(hConsole, &info)) {
1716 /* override the parameter system in this case */
1717 MPIR_CVAR_CHOP_ERROR_STACK = info.dwMaximumWindowSize.X;
1718 }
1719 }
1720 }
1721 #endif /* WINDOWS_H */
1722 }
1723 }
1724
1725 /* Create the ring id from information about the message */
ErrcodeCreateID(int error_class,int generic_idx,const char * msg,int * id,int * seq)1726 static void ErrcodeCreateID(int error_class, int generic_idx, const char *msg, int *id, int *seq)
1727 {
1728 int i;
1729 int ring_seq = 0, ring_id;
1730
1731 /* Create a simple hash function of the message to serve as the
1732 * sequence number */
1733 ring_seq = 0;
1734 for (i = 0; msg[i]; i++)
1735 ring_seq += (unsigned int) msg[i];
1736
1737 ring_seq %= ERROR_SPECIFIC_SEQ_SIZE;
1738
1739 ring_id = (error_class & ERROR_CLASS_MASK) |
1740 ((generic_idx + 1) << ERROR_GENERIC_SHIFT) | (ring_seq << ERROR_SPECIFIC_SEQ_SHIFT);
1741
1742 *id = ring_id;
1743 *seq = ring_seq;
1744 }
1745
1746 /* Convert an error code into ring_idx, ring_id, and generic_idx.
1747 Return non-zero if there is a problem with the decode values
1748 (e.g., out of range for the ring index) */
convertErrcodeToIndexes(int errcode,int * ring_idx,int * ring_id,int * generic_idx)1749 static int convertErrcodeToIndexes(int errcode, int *ring_idx, int *ring_id, int *generic_idx)
1750 {
1751 *ring_idx = (errcode & ERROR_SPECIFIC_INDEX_MASK) >> ERROR_SPECIFIC_INDEX_SHIFT;
1752 *ring_id = errcode & (ERROR_CLASS_MASK | ERROR_GENERIC_MASK | ERROR_SPECIFIC_SEQ_MASK);
1753 *generic_idx = ((errcode & ERROR_GENERIC_MASK) >> ERROR_GENERIC_SHIFT) - 1;
1754
1755 /* Test on both the max_error_ring_loc and MAX_ERROR_RING to guard
1756 * against memory overwrites */
1757 if (*ring_idx < 0 || *ring_idx >= MAX_ERROR_RING ||
1758 (unsigned int) *ring_idx > max_error_ring_loc)
1759 return 1;
1760
1761 return 0;
1762 }
1763
checkErrcodeIsValid(int errcode)1764 static int checkErrcodeIsValid(int errcode)
1765 {
1766 int ring_id, generic_idx, ring_idx;
1767
1768 /* If the errcode is a class, then it is valid */
1769 if (errcode <= MPIR_MAX_ERROR_CLASS_INDEX && errcode >= 0)
1770 return 0;
1771
1772 if (convertErrcodeToIndexes(errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1773 /* --BEGIN ERROR HANDLING-- */
1774 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1775 errcode, ring_idx);
1776 /* --END ERROR HANDLING-- */
1777 }
1778
1779 MPL_DBG_MSG_FMT(MPIR_DBG_ERRHAND, VERBOSE,
1780 (MPL_DBG_FDEST, "code=%#010x ring_idx=%d ring_id=%#010x generic_idx=%d",
1781 errcode, ring_idx, ring_id, generic_idx));
1782
1783 if (ring_idx < 0 || ring_idx >= MAX_ERROR_RING || (unsigned int) ring_idx > max_error_ring_loc)
1784 return 1;
1785 if (ErrorRing[ring_idx].id != ring_id)
1786 return 2;
1787 /* It looks like the code uses a generic idx of -1 to indicate no
1788 * generic message */
1789 if (generic_idx < -1 || generic_idx > generic_msgs_len)
1790 return 3;
1791 return 0;
1792 }
1793
1794 /* Check to see if the error code is a user-specified error code
1795 (e.g., from the attribute delete function) and if so, set the error code
1796 to the value provide by the user */
checkForUserErrcode(int errcode)1797 static int checkForUserErrcode(int errcode)
1798 {
1799 error_ring_mutex_lock();
1800 {
1801 if (errcode != MPI_SUCCESS) {
1802 int ring_idx;
1803 int ring_id;
1804 int generic_idx;
1805
1806 if (convertErrcodeToIndexes(errcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1807 /* --BEGIN ERROR HANDLING-- */
1808 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1809 errcode, ring_idx);
1810 /* --END ERROR HANDLING-- */
1811 } else {
1812 /* Can we get a more specific error message */
1813 if (generic_idx >= 0 &&
1814 ErrorRing[ring_idx].id == ring_id && ErrorRing[ring_idx].use_user_error_code) {
1815 errcode = ErrorRing[ring_idx].user_error_code;
1816 }
1817 }
1818 }
1819 }
1820 error_ring_mutex_unlock();
1821 return errcode;
1822 }
1823
1824
1825 /* --BEGIN ERROR HANDLING-- */
ErrcodeInvalidReasonStr(int reason)1826 static const char *ErrcodeInvalidReasonStr(int reason)
1827 {
1828 const char *str = 0;
1829 switch (reason) {
1830 case 1:
1831 str = "Ring Index out of range";
1832 break;
1833 case 2:
1834 str = "Ring ids do not match";
1835 break;
1836 case 3:
1837 str = "Generic message index out of range";
1838 break;
1839 default:
1840 str = "Unknown reason for invalid errcode";
1841 break;
1842 }
1843 return str;
1844 }
1845
1846 /* --END ERROR HANDLING-- */
1847
CombineSpecificCodes(int error1_code,int error2_code,int error2_class)1848 static void CombineSpecificCodes(int error1_code, int error2_code, int error2_class)
1849 {
1850 int error_code;
1851
1852 error_code = error1_code;
1853
1854 error_ring_mutex_lock();
1855 {
1856 for (;;) {
1857 int error_class;
1858 int ring_idx;
1859 int ring_id;
1860 int generic_idx;
1861
1862 if (convertErrcodeToIndexes(error_code, &ring_idx, &ring_id,
1863 &generic_idx) != 0 || generic_idx < 0 ||
1864 ErrorRing[ring_idx].id != ring_id) {
1865 break;
1866 }
1867
1868 error_code = ErrorRing[ring_idx].prev_error;
1869
1870 if (error_code == MPI_SUCCESS) {
1871 ErrorRing[ring_idx].prev_error = error2_code;
1872 break;
1873 }
1874
1875 error_class = MPIR_ERR_GET_CLASS(error_code);
1876
1877 if (error_class == MPI_ERR_OTHER) {
1878 ErrorRing[ring_idx].prev_error &= ~(ERROR_CLASS_MASK);
1879 ErrorRing[ring_idx].prev_error |= error2_class;
1880 }
1881 }
1882 }
1883 error_ring_mutex_unlock();
1884 }
1885
ErrGetInstanceString(int errorcode,char msg[],int num_remaining)1886 static int ErrGetInstanceString(int errorcode, char msg[], int num_remaining)
1887 {
1888 int len;
1889
1890 if (MPIR_CVAR_PRINT_ERROR_STACK) {
1891 MPL_strncpy(msg, ", error stack:\n", num_remaining);
1892 msg[num_remaining - 1] = '\0';
1893 len = (int) strlen(msg);
1894 msg += len;
1895 num_remaining -= len;
1896 /* note: this took the "fn" arg, but that appears to be unused
1897 * and is undocumented. */
1898 MPIR_Err_print_stack_string(errorcode, msg, num_remaining);
1899 msg[num_remaining - 1] = '\0';
1900 } else {
1901 error_ring_mutex_lock();
1902 {
1903 while (errorcode != MPI_SUCCESS) {
1904 int ring_idx;
1905 int ring_id;
1906 int generic_idx;
1907
1908 if (convertErrcodeToIndexes(errorcode, &ring_idx, &ring_id, &generic_idx) != 0) {
1909 /* --BEGIN ERROR HANDLING-- */
1910 MPL_error_printf("Invalid error code (%d) (error ring index %d invalid)\n",
1911 errorcode, ring_idx);
1912 break;
1913 /* --END ERROR HANDLING-- */
1914 }
1915
1916 if (generic_idx < 0) {
1917 break;
1918 }
1919
1920 if (ErrorRing[ring_idx].id == ring_id) {
1921 /* just keep clobbering old values until the
1922 * end of the stack is reached */
1923 MPL_snprintf(msg, num_remaining, ", %s", ErrorRing[ring_idx].msg);
1924 msg[num_remaining - 1] = '\0';
1925 errorcode = ErrorRing[ring_idx].prev_error;
1926 } else {
1927 break;
1928 }
1929 }
1930 }
1931 error_ring_mutex_unlock();
1932 }
1933 /* FIXME: How do we determine that we failed to unwind the stack? */
1934 if (errorcode != MPI_SUCCESS)
1935 return 1;
1936
1937 return 0;
1938 }
1939
1940 #else
1941 #error MPICH_ERROR_MSG_LEVEL undefined or has invalid value
1942 #endif
1943
1944 /* Common routines that are used by two or more error-message levels.
1945 Very simple routines are defined inline */
1946 #ifdef NEEDS_FIND_GENERIC_MSG_INDEX
1947 /*
1948 * Given a message string abbreviation (e.g., one that starts "**"), return
1949 * the corresponding index. For the generic (non
1950 * parameterized messages), use idx = FindGenericMsgIndex("**msg");
1951 * Returns -1 on failure to find the matching message
1952 *
1953 * The values are in increasing, sorted order, so once we find a
1954 * comparison where the current generic_err_msg is greater than the
1955 * message we are attempting to match, we have missed the match and
1956 * there is an internal error (all short messages should exist in defmsg.h)
1957 */
1958 /* Question: Could be a service routine for message level >= generic */
FindGenericMsgIndex(const char msg[])1959 static int FindGenericMsgIndex(const char msg[])
1960 {
1961 int i, c;
1962 for (i = 0; i < generic_msgs_len; i++) {
1963 /* Check the sentinals to insure that the values are ok first */
1964 if (generic_err_msgs[i].sentinal1 != 0xacebad03 ||
1965 generic_err_msgs[i].sentinal2 != 0xcb0bfa11) {
1966 /* Something bad has happened! Don't risk trying the
1967 * short_name pointer; it may have been corrupted */
1968 break;
1969 }
1970 c = strcmp(generic_err_msgs[i].short_name, msg);
1971 if (c == 0)
1972 return i;
1973 if (c > 0) {
1974 /* In case the generic messages are not sorted exactly the
1975 * way that strcmp compares, we check for the case that
1976 * the short msg matches the current generic message. If
1977 * that is the case, we do *not* fail */
1978 if (strncmp(generic_err_msgs[i].short_name, msg, strlen(msg)) != 0)
1979 return -1;
1980 }
1981 }
1982 /* --BEGIN ERROR HANDLING-- */
1983 return -1;
1984 /* --END ERROR HANDLING-- */
1985 }
1986 #endif
1987