1 #if HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 /* $Id: signaltrap.c,v 1.28 2005-05-13 19:06:40 vinod Exp $ */
6 /******************************************************\
7 * Signal handler functions for the following signals: *
8 * SIGINT, SIGCHLD, SIGBUS, SIGFPE, SIGILL, *
9 * SIGSEGV, SIGSYS, SIGTRAP, SIGHUP, SIGTERM *
10 * Used to call armci_error that frees up IPC resources *
11 \******************************************************/
12
13
14 #if HAVE_SIGNAL_H
15 # include <signal.h>
16 #endif
17 #if HAVE_STDIO_H
18 # include <stdio.h>
19 #endif
20 #if HAVE_STDIO_H
21 # include <stdio.h>
22 #endif
23 #if HAVE_SYS_TYPES_H
24 # include <sys/types.h>
25 #endif
26 #if HAVE_SYS_WAIT_H
27 # include <sys/wait.h>
28 #endif
29 #if HAVE_UNISTD_H
30 # include <unistd.h>
31 #endif
32 #if HAVE_ERRNO_H
33 # include <errno.h>
34 #endif
35 #include "armci.h"
36 #include "armcip.h"
37
38 #define PAUSE_ON_ERROR__
39
40 #define Error armci_die
41 #if !defined(armci_die)
42 extern void Error();
43 #endif
44
45 #if (defined(ENCORE) || defined(SEQUENT) || defined(ARDENT))
46 # define SigType int
47 #else
48 # define SigType void
49 #endif
50
51 #ifndef SIG_ERR
52 # define SIG_ERR (SigType (*)())-1
53 #endif
54
55 extern int armci_me;
56
57 int AR_caught_sigint=0;
58 int AR_caught_sigterm=0;
59 int AR_caught_sigchld=0;
60 int AR_caught_sigsegv=0;
61 int AR_caught_sig=0;
62
63 SigType (*SigChldOrig)(), (*SigIntOrig)(), (*SigHupOrig)(), (*SigTermOrig)();
64 SigType (*SigSegvOrig)();
65
66
67 /*********************** SIGINT *************************************/
68 #if defined(SUN) && !defined(SOLARIS)
SigIntHandler(sig,code,scp,addr)69 SigType SigIntHandler(sig, code, scp, addr)
70 int code;
71 struct sigcontext *scp;
72 char *addr;
73 #else
74 SigType SigIntHandler(sig)
75 #endif
76 int sig;
77 {
78 AR_caught_sigint = 1;
79 AR_caught_sig= sig;
80 Error("SigIntHandler: interrupt signal was caught",(int) sig);
81 }
82
TrapSigInt()83 void TrapSigInt()
84 /*
85 Trap the signal SIGINT so that we can propagate error
86 conditions and also tidy up shared system resources in a
87 manner not possible just by killing everyone
88 */
89 {
90 if ( (SigIntOrig = signal(SIGINT, SigIntHandler)) == SIG_ERR)
91 Error("TrapSigInt: error from signal setting SIGINT",0);
92 }
93
RestoreSigInt()94 void RestoreSigInt()
95 /*
96 Restore the original signal handler
97 */
98 {
99 if(AR_caught_sigint) SigIntOrig(SIGINT);
100 if ( signal(SIGINT, SigIntOrig) == SIG_ERR)
101 Error("RestoreSigInt: error from restoring signal SIGINT",0);
102 }
103
104
105 /*********************** SIGABORT *************************************/
106 #if defined(SUN) && !defined(SOLARIS)
SigAbortHandler(sig,code,scp,addr)107 SigType SigAbortHandler(sig, code, scp, addr)
108 int code;
109 struct sigcontext *scp;
110 char *addr;
111 #else
112 SigType SigAbortHandler(sig)
113 #endif
114 int sig;
115 {
116 AR_caught_sig= sig;
117 Error("SigIntHandler: abort signal was caught: cleaning up",(int) sig);
118 }
119
TrapSigAbort()120 void TrapSigAbort()
121 /*
122 Trap the signal SIGINT so that we can propagate error
123 conditions and also tidy up shared system resources in a
124 manner not possible just by killing everyone
125 */
126 {
127 if ( signal(SIGINT, SigAbortHandler) == SIG_ERR)
128 Error("TrapSigAbort: error from signal setting SIGABORT",0);
129 }
130
131
132
133 /*********************** SIGCHLD *************************************/
134 #if defined(SUN) && !defined(SOLARIS)
SigChldHandler(sig,code,scp,addr)135 SigType SigChldHandler(sig, code, scp, addr)
136 int code;
137 struct sigcontext *scp;
138 char *addr;
139 #else
140 SigType SigChldHandler(sig)
141 #endif
142 int sig;
143 {
144 int status;
145 #if defined(ALLIANT) || defined(ENCORE) || defined(SEQUENT) || defined(NEXT)
146 union wait ustatus;
147 #endif
148
149 #if defined(LINUX)
150 pid_t ret;
151 /* Trap signal as soon as possible to avoid race */
152 if ( (SigChldOrig = signal(SIGCHLD, SigChldHandler)) == SIG_ERR)
153 Error("SigChldHandler: error from signal setting SIGCHLD",0);
154 #endif
155
156 #if defined(ALLIANT) || defined(ENCORE) || defined(SEQUENT) || defined(NEXT)
157
158 # if defined(LINUX)
159 ret = wait(&ustatus);
160 if((ret == 0) || ((ret == -1) && (errno == ECHILD))) { return; }
161 # else
162 (void) wait(&ustatus);
163 # endif
164 status = ustatus.w_status;
165
166 #else
167
168 # if defined(LINUX)
169 ret = waitpid(0, &status, WNOHANG);
170 if((ret == 0) || ((ret == -1) && (errno == ECHILD))) { return; }
171 # else
172 (void)wait(&status);
173 # endif
174
175 #endif
176 AR_caught_sigchld=1;
177 AR_caught_sig= sig;
178 Error("Child process terminated prematurely, status=",(int) status);
179 }
180
TrapSigChld()181 void TrapSigChld()
182 /*
183 Trap SIGCHLD so that can tell if children die unexpectedly.
184 */
185 {
186 if ( (SigChldOrig = signal(SIGCHLD, SigChldHandler)) == SIG_ERR)
187 Error("TrapSigChld: error from signal setting SIGCHLD",0);
188 }
189
190
RestoreSigChld()191 void RestoreSigChld()
192 {
193 if(AR_caught_sigchld)
194 SigChldOrig(SIGCHLD);
195 if (signal(SIGCHLD, SigChldOrig) == SIG_ERR)
196 Error("RestoreSigChld: error from restoring signal SIGChld",0);
197 }
198
199
RestoreSigChldDfl()200 void RestoreSigChldDfl()
201 {
202 (void) signal(SIGCHLD, SIG_DFL);
203 }
204
205
206 /*********************** SIGBUS *************************************/
207 #if defined(SUN) && !defined(SOLARIS)
SigBusHandler(sig,code,scp,addr)208 SigType SigBusHandler(sig, code, scp, addr)
209 int code;
210 struct sigcontext *scp;
211 char *addr;
212 #else
213 SigType SigBusHandler(sig)
214 #endif
215 int sig;
216 {
217 AR_caught_sig= sig;
218 #ifdef PAUSE_ON_ERROR
219 fprintf(stderr,"%d(%d): Bus Error ... pausing\n",
220 armci_me, getpid() );pause();
221 #endif
222 Error("Bus error, status=",(int) sig);
223 }
224
TrapSigBus()225 void TrapSigBus()
226 /*
227 Trap SIGBUS
228 */
229 {
230 if ( signal(SIGBUS, SigBusHandler) == SIG_ERR)
231 Error("TrapSigBus: error from signal setting SIGBUS", 0);
232 }
233
234
235
236
237 /*********************** SIGFPE *************************************/
238 #if defined(SUN) && !defined(SOLARIS)
SigFpeHandler(sig,code,scp,addr)239 SigType SigFpeHandler(sig, code, scp, addr)
240 int code;
241 struct sigcontext *scp;
242 char *addr;
243 #else
244 SigType SigFpeHandler(sig)
245 #endif
246 int sig;
247 {
248 AR_caught_sig= sig;
249 #ifdef PAUSE_ON_ERROR
250 fprintf(stderr,"%d(%s:%d): Sig FPE ... pausing\n",
251 armci_me, armci_clus_info[armci_clus_me].hostname,
252 getpid() );pause();
253 #endif
254 Error("Floating Point Exception error, status=",(int) sig);
255 }
256
TrapSigFpe()257 void TrapSigFpe()
258 /*
259 Trap SIGFPE
260 */
261 {
262 if ( signal(SIGFPE, SigFpeHandler) == SIG_ERR)
263 Error("TrapSigFpe: error from signal setting SIGFPE", 0);
264 }
265
266
267
268
269 /*********************** SIGILL *************************************/
270 #if defined(SUN) && !defined(SOLARIS)
SigIllHandler(sig,code,scp,addr)271 SigType SigIllHandler(sig, code, scp, addr)
272 int code;
273 struct sigcontext *scp;
274 char *addr;
275 #else
276 SigType SigIllHandler(sig)
277 #endif
278 int sig;
279 {
280 AR_caught_sig= sig;
281 Error("Illegal Instruction error, status=",(int) sig);
282 }
283
TrapSigIll()284 void TrapSigIll()
285 /*
286 Trap SIGILL
287 */
288 {
289 if ( signal(SIGILL, SigIllHandler) == SIG_ERR)
290 Error("TrapSigIll: error from signal setting SIGILL", 0);
291 }
292
293
294
295
296 /*********************** SIGSEGV *************************************/
297 #if defined(SUN) && !defined(SOLARIS)
SigSegvHandler(sig,code,scp,addr)298 SigType SigSegvHandler(sig, code, scp, addr)
299 int code;
300 struct sigcontext *scp;
301 char *addr;
302 #else
303 SigType SigSegvHandler(sig)
304 #endif
305 int sig;
306 {
307 AR_caught_sig= sig;
308 AR_caught_sigsegv=1;
309 #ifdef PAUSE_ON_ERROR
310 fprintf(stderr,"%d(%s:%d): Segmentation Violation ... pausing\n",
311 armci_me, armci_clus_info[armci_clus_me].hostname,
312 getpid() );pause();
313 #endif
314
315 Error("Segmentation Violation error, status=",(int) sig);
316 }
317 #ifdef ENABLE_CHECKPOINT
318 static void * signal_arr[100];
SigSegvActionSa(int sig,siginfo_t * sinfo,void * ptr)319 SigType SigSegvActionSa(int sig,siginfo_t *sinfo, void *ptr)
320 {
321 int (*func)();
322 AR_caught_sig= sig;
323 AR_caught_sigsegv=1;
324 func = signal_arr[sig];
325 /*printf("\n%d:in sigaction %p, %d\n",armci_me,sinfo->si_addr,sinfo->si_errno);fflush(stdout);*/
326
327 if(func(sinfo->si_addr,sinfo->si_errno,sinfo->si_fd))
328 Error("Segmentation Violation error, status=",(int) SIGSEGV);
329 }
330
TrapSigSegvSigaction()331 void TrapSigSegvSigaction()
332 {
333 struct sigaction sa;
334 sa.sa_sigaction = (void *)SigSegvActionSa;
335 sigemptyset(&sa.sa_mask);
336 sa.sa_flags = SA_RESTART;
337 sigaction(SIGSEGV, &sa, NULL);
338 }
339 #endif
340
TrapSigSegv()341 void TrapSigSegv()
342 /*
343 Trap SIGSEGV
344 */
345 {
346 if ( (SigSegvOrig=signal(SIGSEGV, SigSegvHandler)) == SIG_ERR)
347 Error("TrapSigSegv: error from signal setting SIGSEGV", 0);
348 }
349
350
RestoreSigSegv()351 void RestoreSigSegv()
352 /*
353 Restore the original signal handler
354 */
355 {
356 /*
357 if(AR_caught_sigsegv) SigSegvOrig(SIGSEGV);
358 */
359 #ifdef ENABLE_CHECKPOINT__
360 struct sigaction sa;
361 sa.sa_handler = (void *)SigSegvOrig;
362 sigemptyset(&sa.sa_mask);
363 sa.sa_flags = SA_RESTART;
364 sigaction(SIGSEGV, &sa, NULL);
365 sigaction(SIGSEGV,&sa,NULL);
366 #else
367 if ( signal(SIGSEGV,SigSegvOrig) == SIG_ERR)
368 Error("RestoreSigSegv: error from restoring signal SIGSEGV",0);
369 #endif
370 }
371
372
373 /*********************** SIGSYS *************************************/
374 #if defined(SUN) && !defined(SOLARIS)
SigSysHandler(sig,code,scp,addr)375 SigType SigSysHandler(sig, code, scp, addr)
376 int code;
377 struct sigcontext *scp;
378 char *addr;
379 #else
380 SigType SigSysHandler(sig)
381 #endif
382 int sig;
383 {
384 AR_caught_sig= sig;
385 Error("Bad Argument To System Call error, status=",(int) sig);
386 }
387
TrapSigSys()388 void TrapSigSys()
389 /*
390 Trap SIGSYS
391 */
392 {
393 #ifndef LINUX
394 if ( signal(SIGSYS, SigSysHandler) == SIG_ERR)
395 Error("TrapSigSys: error from signal setting SIGSYS", 0);
396 #endif
397 }
398
399
400
401 /*********************** SIGTRAP *************************************/
402 #if defined(SUN) && !defined(SOLARIS)
SigTrapHandler(sig,code,scp,addr)403 SigType SigTrapHandler(sig, code, scp, addr)
404 int code;
405 struct sigcontext *scp;
406 char *addr;
407 #else
408 SigType SigTrapHandler(sig)
409 #endif
410 int sig;
411 {
412 AR_caught_sig= sig;
413 Error("Trace Trap error, status=",(int) sig);
414 }
415
TrapSigTrap()416 void TrapSigTrap()
417 /*
418 Trap SIGTRAP
419 */
420 {
421 if ( signal(SIGTRAP, SigTrapHandler) == SIG_ERR)
422 Error("TrapSigTrap: error from signal setting SIGTRAP", 0);
423 }
424
425
426
427 /*********************** SIGHUP *************************************/
428 #if defined(SUN) && !defined(SOLARIS)
SigHupHandler(sig,code,scp,addr)429 SigType SigHupHandler(sig, code, scp, addr)
430 int code;
431 struct sigcontext *scp;
432 char *addr;
433 #else
434 SigType SigHupHandler(sig)
435 #endif
436 int sig;
437 {
438 AR_caught_sig= sig;
439 Error("Hangup error, status=",(int) sig);
440 }
441
TrapSigHup()442 void TrapSigHup()
443 /*
444 Trap SIGHUP
445 */
446 {
447 if ( (SigHupOrig = signal(SIGHUP, SigHupHandler)) == SIG_ERR)
448 Error("TrapSigHup: error from signal setting SIGHUP", 0);
449 }
450
451
RestoreSigHup()452 void RestoreSigHup()
453 /*
454 Restore the original signal handler
455 */
456 {
457 if(AR_caught_sig== SIGHUP) SigHupOrig(SIGHUP);
458 if ( signal(SIGHUP, SigHupOrig) == SIG_ERR)
459 Error("RestoreSigHUP: error from restoring signal SIGHUP",0);
460 }
461
462
463
464 /*********************** SIGTERM *************************************/
465 #if defined(SUN) && !defined(SOLARIS)
SigTermHandler(sig,code,scp,addr)466 SigType SigTermHandler(sig, code, scp, addr)
467 int code;
468 struct sigcontext *scp;
469 char *addr;
470 #else
471 SigType SigTermHandler(sig)
472 #endif
473 int sig;
474 {
475 AR_caught_sigterm = 1;
476 AR_caught_sig= sig;
477 Error("Terminate signal was sent, status=",(int) sig);
478 }
479
TrapSigTerm()480 void TrapSigTerm()
481 /*
482 Trap SIGTERM
483 */
484 {
485 if ( (SigTermOrig = signal(SIGTERM, SigTermHandler)) == SIG_ERR)
486 Error("TrapSigTerm: error from signal setting SIGTERM", 0);
487 }
488
RestoreSigTerm()489 void RestoreSigTerm()
490 /*
491 Restore the original signal handler
492 */
493 {
494 if(AR_caught_sigterm && (SigTermOrig != SIG_DFL) ) SigTermOrig(SIGTERM);
495 if ( signal(SIGTERM, SigTermOrig) == SIG_ERR)
496 Error("RestoreSigTerm: error from restoring signal SIGTerm",0);
497 }
498
499
500 /*********************** SIGIOT *************************************/
501 #ifdef SIGIOT
502 #if defined(SUN) && !defined(SOLARIS)
SigIotHandler(sig,code,scp,addr)503 SigType SigIotHandler(sig, code, scp, addr)
504 int code;
505 struct sigcontext *scp;
506 char *addr;
507 #else
508 SigType SigIotHandler(sig)
509 #endif
510 int sig;
511 {
512 AR_caught_sig= sig;
513 Error("IOT signal was sent, status=",(int) sig);
514 }
515
TrapSigIot()516 void TrapSigIot()
517 /*
518 Trap SIGIOT
519 */
520 {
521 if ( signal(SIGIOT, SigIotHandler) == SIG_ERR)
522 Error("TrapSigIot: error from signal setting SIGIOT", 0);
523 }
524 #endif
525
526
527
528 /*********************** SIGCONT *************************************/
529 #if defined(SUN) && !defined(SOLARIS)
SigContHandler(sig,code,scp,addr)530 SigType SigContHandler(sig, code, scp, addr)
531 int code;
532 struct sigcontext *scp;
533 char *addr;
534 #else
535 SigType SigContHandler(sig)
536 #endif
537 int sig;
538 {
539 /* Error("Trace Cont error, status=",(int) sig);*/
540 AR_caught_sig= sig;
541 }
542
TrapSigCont()543 void TrapSigCont()
544 /*
545 Trap SIGCONT
546 */
547 {
548 if ( signal(SIGCONT, SigContHandler) == SIG_ERR)
549 Error("TrapSigCont: error from signal setting SIGCONT", 0);
550 }
551
552 /*********************** SIGXCPU *************************************/
553 #if defined(SUN) && !defined(SOLARIS)
SigXcpuHandler(sig,code,scp,addr)554 SigType SigXcpuHandler(sig, code, scp, addr)
555 int code;
556 struct sigcontext *scp;
557 char *addr;
558 #else
559 SigType SigXcpuHandler(sig)
560 #endif
561 int sig;
562 {
563 AR_caught_sig= sig;
564 Error("Terminate signal was sent, status=",(int) sig);
565 }
566
TrapSigXcpu()567 void TrapSigXcpu()
568 /*
569 Trap SIGXCPU
570 */
571 {
572 if ( signal(SIGXCPU, SigXcpuHandler) == SIG_ERR)
573 Error("TrapSigXcpu: error from signal setting SIGXCPU", 0);
574 }
575
576 /******************* external API *********************************/
577
ARMCI_ChildrenTrapSignals()578 void ARMCI_ChildrenTrapSignals()
579 {
580 #ifndef LAPI
581 TrapSigBus();
582 #endif
583 TrapSigFpe();
584 TrapSigIll();
585 #ifdef ENABLE_CHECKPOINT
586 TrapSigSegvSigaction();
587 #else
588 TrapSigSegv();
589 #endif
590 TrapSigSys();
591 TrapSigTrap();
592 TrapSigAbort();
593 TrapSigTerm();
594 TrapSigInt();
595
596 #if defined(LAPI) || defined(SGI)
597 TrapSigIot();
598 #endif
599
600 #ifdef SGI
601 TrapSigXcpu();
602 #endif
603
604 }
605
606
ARMCI_ParentTrapSignals()607 void ARMCI_ParentTrapSignals()
608 {
609 #ifndef LAPI
610 TrapSigChld();
611 #endif
612 TrapSigHup();
613 }
614
615
ARMCI_RestoreSignals()616 void ARMCI_RestoreSignals()
617 {
618 RestoreSigTerm();
619 RestoreSigInt();
620 RestoreSigSegv();
621 }
622
623
ARMCI_ParentRestoreSignals()624 void ARMCI_ParentRestoreSignals()
625 {
626 #ifndef LAPI
627 RestoreSigChld();
628 #endif
629 ARMCI_RestoreSignals();
630 RestoreSigHup();
631 }
632
633 #ifdef ENABLE_CHECKPOINT
634 /*user can register a function with 3 parameters, 1st offending address
635 * 2nd err number and third file descriptor*/
ARMCI_Register_Signal_Handler(int sig,void (* func)())636 void ARMCI_Register_Signal_Handler(int sig, void (*func)())
637 {
638 signal_arr[sig]=func;
639 }
640 #endif
641