1 /* $NetBSD: watchdog.c,v 1.1.1.1 2009/06/23 10:09:01 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* watchdog 3 6 /* SUMMARY 7 /* watchdog timer 8 /* SYNOPSIS 9 /* #include <watchdog.h> 10 /* 11 /* WATCHDOG *watchdog_create(timeout, action, context) 12 /* unsigned timeout; 13 /* void (*action)(WATCHDOG *watchdog, char *context); 14 /* char *context; 15 /* 16 /* void watchdog_start(watchdog) 17 /* WATCHDOG *watchdog; 18 /* 19 /* void watchdog_stop(watchdog) 20 /* WATCHDOG *watchdog; 21 /* 22 /* void watchdog_destroy(watchdog) 23 /* WATCHDOG *watchdog; 24 /* 25 /* void watchdog_pat() 26 /* DESCRIPTION 27 /* This module implements watchdog timers that are based on ugly 28 /* UNIX alarm timers. The module is designed to survive systems 29 /* with clocks that jump occasionally. 30 /* 31 /* Watchdog timers can be stacked. Only one watchdog timer can be 32 /* active at a time. Only the last created watchdog timer can be 33 /* manipulated. Watchdog timers must be destroyed in reverse order 34 /* of creation. 35 /* 36 /* watchdog_create() suspends the current watchdog timer, if any, 37 /* and instantiates a new watchdog timer. 38 /* 39 /* watchdog_start() starts or restarts the watchdog timer. 40 /* 41 /* watchdog_stop() stops the watchdog timer. 42 /* 43 /* watchdog_destroy() stops the watchdog timer, and resumes the 44 /* watchdog timer instance that was suspended by watchdog_create(). 45 /* 46 /* watchdog_pat() pats the watchdog, so it stays quiet. 47 /* 48 /* Arguments: 49 /* .IP timeout 50 /* The watchdog time limit. When the watchdog timer runs, the 51 /* process must invoke watchdog_start(), watchdog_stop() or 52 /* watchdog_destroy() before the time limit is reached. 53 /* .IP action 54 /* A null pointer, or pointer to function that is called when the 55 /* watchdog alarm goes off. The default action is to terminate 56 /* the process with a fatal error. 57 /* .IP context 58 /* Application context that is passed to the action routine. 59 /* .IP watchdog 60 /* Must be a pointer to the most recently created watchdog instance. 61 /* This argument is checked upon each call. 62 /* BUGS 63 /* UNIX alarm timers are not stackable, so there can be at most one 64 /* watchdog instance active at any given time. 65 /* SEE ALSO 66 /* msg(3) diagnostics interface 67 /* DIAGNOSTICS 68 /* Fatal errors: memory allocation problem, system call failure. 69 /* Panics: interface violations. 70 /* LICENSE 71 /* .ad 72 /* .fi 73 /* The Secure Mailer license must be distributed with this software. 74 /* AUTHOR(S) 75 /* Wietse Venema 76 /* IBM T.J. Watson Research 77 /* P.O. Box 704 78 /* Yorktown Heights, NY 10598, USA 79 /*--*/ 80 81 /* System library. */ 82 83 #include <sys_defs.h> 84 #include <unistd.h> 85 #include <signal.h> 86 #include <posix_signals.h> 87 88 /* Utility library. */ 89 90 #include <msg.h> 91 #include <mymalloc.h> 92 #include <killme_after.h> 93 #include <watchdog.h> 94 95 /* Application-specific. */ 96 97 /* 98 * Rather than having one timer that goes off when it is too late, we break 99 * up the time limit into smaller intervals so that we can deal with clocks 100 * that jump occasionally. 101 */ 102 #define WATCHDOG_STEPS 3 103 104 /* 105 * UNIX alarms are not stackable, but we can save and restore state, so that 106 * watchdogs can at least be nested, sort of. 107 */ 108 struct WATCHDOG { 109 unsigned timeout; /* our time resolution */ 110 WATCHDOG_FN action; /* application routine */ 111 char *context; /* application context */ 112 int trip_run; /* number of successive timeouts */ 113 WATCHDOG *saved_watchdog; /* saved state */ 114 struct sigaction saved_action; /* saved state */ 115 unsigned saved_time; /* saved state */ 116 }; 117 118 /* 119 * However, only one watchdog instance can be current, and the caller has to 120 * restore state before a prior watchdog instance can be manipulated. 121 */ 122 static WATCHDOG *watchdog_curr; 123 124 /* watchdog_event - handle timeout event */ 125 126 static void watchdog_event(int unused_sig) 127 { 128 const char *myname = "watchdog_event"; 129 WATCHDOG *wp; 130 131 /* 132 * This routine runs as a signal handler. We should not do anything that 133 * could involve memory allocation/deallocation, but exiting without 134 * proper explanation would be unacceptable. For this reason, msg(3) was 135 * made safe for usage by signal handlers that terminate the process. 136 */ 137 if ((wp = watchdog_curr) == 0) 138 msg_panic("%s: no instance", myname); 139 if (msg_verbose > 1) 140 msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run); 141 if (++(wp->trip_run) < WATCHDOG_STEPS) { 142 alarm(wp->timeout); 143 } else { 144 if (wp->action) 145 wp->action(wp, wp->context); 146 else { 147 killme_after(5); 148 #ifdef TEST 149 pause(); 150 #endif 151 msg_fatal("watchdog timeout"); 152 } 153 } 154 } 155 156 /* watchdog_create - create watchdog instance */ 157 158 WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context) 159 { 160 const char *myname = "watchdog_create"; 161 struct sigaction sig_action; 162 WATCHDOG *wp; 163 164 wp = (WATCHDOG *) mymalloc(sizeof(*wp)); 165 if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0) 166 msg_panic("%s: timeout %d is too small", myname, timeout); 167 wp->action = action; 168 wp->context = context; 169 wp->saved_watchdog = watchdog_curr; 170 wp->saved_time = alarm(0); 171 sigemptyset(&sig_action.sa_mask); 172 #ifdef SA_RESTART 173 sig_action.sa_flags = SA_RESTART; 174 #else 175 sig_action.sa_flags = 0; 176 #endif 177 sig_action.sa_handler = watchdog_event; 178 if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0) 179 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 180 if (msg_verbose > 1) 181 msg_info("%s: %p %d", myname, (void *) wp, timeout); 182 return (watchdog_curr = wp); 183 } 184 185 /* watchdog_destroy - destroy watchdog instance, restore state */ 186 187 void watchdog_destroy(WATCHDOG *wp) 188 { 189 const char *myname = "watchdog_destroy"; 190 191 watchdog_stop(wp); 192 watchdog_curr = wp->saved_watchdog; 193 if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0) 194 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 195 if (wp->saved_time) 196 alarm(wp->saved_time); 197 myfree((char *) wp); 198 if (msg_verbose > 1) 199 msg_info("%s: %p", myname, (void *) wp); 200 } 201 202 /* watchdog_start - enable watchdog timer */ 203 204 void watchdog_start(WATCHDOG *wp) 205 { 206 const char *myname = "watchdog_start"; 207 208 if (wp != watchdog_curr) 209 msg_panic("%s: wrong watchdog instance", myname); 210 wp->trip_run = 0; 211 alarm(wp->timeout); 212 if (msg_verbose > 1) 213 msg_info("%s: %p", myname, (void *) wp); 214 } 215 216 /* watchdog_stop - disable watchdog timer */ 217 218 void watchdog_stop(WATCHDOG *wp) 219 { 220 const char *myname = "watchdog_stop"; 221 222 if (wp != watchdog_curr) 223 msg_panic("%s: wrong watchdog instance", myname); 224 alarm(0); 225 if (msg_verbose > 1) 226 msg_info("%s: %p", myname, (void *) wp); 227 } 228 229 /* watchdog_pat - pat the dog so it stays quiet */ 230 231 void watchdog_pat(void) 232 { 233 const char *myname = "watchdog_pat"; 234 235 if (watchdog_curr) 236 watchdog_curr->trip_run = 0; 237 if (msg_verbose > 1) 238 msg_info("%s: %p", myname, (void *) watchdog_curr); 239 } 240 241 #ifdef TEST 242 243 #include <vstream.h> 244 245 int main(int unused_argc, char **unused_argv) 246 { 247 WATCHDOG *wp; 248 249 msg_verbose = 2; 250 251 wp = watchdog_create(10, (WATCHDOG_FN) 0, (char *) 0); 252 watchdog_start(wp); 253 do { 254 watchdog_pat(); 255 } while (VSTREAM_GETCHAR() != VSTREAM_EOF); 256 watchdog_destroy(wp); 257 return (0); 258 } 259 260 #endif 261