1 /*	$NetBSD: watchdog.c,v 1.1.1.1 2009/06/23 10:09:01 tron Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	watchdog 3
6 /* SUMMARY
7 /*	watchdog timer
8 /* SYNOPSIS
9 /*	#include <watchdog.h>
10 /*
11 /*	WATCHDOG *watchdog_create(timeout, action, context)
12 /*	unsigned timeout;
13 /*	void	(*action)(WATCHDOG *watchdog, char *context);
14 /*	char	*context;
15 /*
16 /*	void	watchdog_start(watchdog)
17 /*	WATCHDOG *watchdog;
18 /*
19 /*	void	watchdog_stop(watchdog)
20 /*	WATCHDOG *watchdog;
21 /*
22 /*	void	watchdog_destroy(watchdog)
23 /*	WATCHDOG *watchdog;
24 /*
25 /*	void	watchdog_pat()
26 /* DESCRIPTION
27 /*	This module implements watchdog timers that are based on ugly
28 /*	UNIX alarm timers. The module is designed to survive systems
29 /*	with clocks that jump occasionally.
30 /*
31 /*	Watchdog timers can be stacked. Only one watchdog timer can be
32 /*	active at a time. Only the last created watchdog timer can be
33 /*	manipulated. Watchdog timers must be destroyed in reverse order
34 /*	of creation.
35 /*
36 /*	watchdog_create() suspends the current watchdog timer, if any,
37 /*	and instantiates a new watchdog timer.
38 /*
39 /*	watchdog_start() starts or restarts the watchdog timer.
40 /*
41 /*	watchdog_stop() stops the watchdog timer.
42 /*
43 /*	watchdog_destroy() stops the watchdog timer, and resumes the
44 /*	watchdog timer instance that was suspended by watchdog_create().
45 /*
46 /*	watchdog_pat() pats the watchdog, so it stays quiet.
47 /*
48 /*	Arguments:
49 /* .IP timeout
50 /*	The watchdog time limit. When the watchdog timer runs, the
51 /*	process must invoke watchdog_start(), watchdog_stop() or
52 /*	watchdog_destroy() before the time limit is reached.
53 /* .IP action
54 /*	A null pointer, or pointer to function that is called when the
55 /*	watchdog alarm goes off. The default action is to terminate
56 /*	the process with a fatal error.
57 /* .IP context
58 /*	Application context that is passed to the action routine.
59 /* .IP watchdog
60 /*	Must be a pointer to the most recently created watchdog instance.
61 /*	This argument is checked upon each call.
62 /* BUGS
63 /*	UNIX alarm timers are not stackable, so there can be at most one
64 /*	watchdog instance active at any given time.
65 /* SEE ALSO
66 /*	msg(3) diagnostics interface
67 /* DIAGNOSTICS
68 /*	Fatal errors: memory allocation problem, system call failure.
69 /*	Panics: interface violations.
70 /* LICENSE
71 /* .ad
72 /* .fi
73 /*	The Secure Mailer license must be distributed with this software.
74 /* AUTHOR(S)
75 /*	Wietse Venema
76 /*	IBM T.J. Watson Research
77 /*	P.O. Box 704
78 /*	Yorktown Heights, NY 10598, USA
79 /*--*/
80 
81 /* System library. */
82 
83 #include <sys_defs.h>
84 #include <unistd.h>
85 #include <signal.h>
86 #include <posix_signals.h>
87 
88 /* Utility library. */
89 
90 #include <msg.h>
91 #include <mymalloc.h>
92 #include <killme_after.h>
93 #include <watchdog.h>
94 
95 /* Application-specific. */
96 
97  /*
98   * Rather than having one timer that goes off when it is too late, we break
99   * up the time limit into smaller intervals so that we can deal with clocks
100   * that jump occasionally.
101   */
102 #define WATCHDOG_STEPS	3
103 
104  /*
105   * UNIX alarms are not stackable, but we can save and restore state, so that
106   * watchdogs can at least be nested, sort of.
107   */
108 struct WATCHDOG {
109     unsigned timeout;			/* our time resolution */
110     WATCHDOG_FN action;			/* application routine */
111     char   *context;			/* application context */
112     int     trip_run;			/* number of successive timeouts */
113     WATCHDOG *saved_watchdog;		/* saved state */
114     struct sigaction saved_action;	/* saved state */
115     unsigned saved_time;		/* saved state */
116 };
117 
118  /*
119   * However, only one watchdog instance can be current, and the caller has to
120   * restore state before a prior watchdog instance can be manipulated.
121   */
122 static WATCHDOG *watchdog_curr;
123 
124 /* watchdog_event - handle timeout event */
125 
126 static void watchdog_event(int unused_sig)
127 {
128     const char *myname = "watchdog_event";
129     WATCHDOG *wp;
130 
131     /*
132      * This routine runs as a signal handler. We should not do anything that
133      * could involve memory allocation/deallocation, but exiting without
134      * proper explanation would be unacceptable. For this reason, msg(3) was
135      * made safe for usage by signal handlers that terminate the process.
136      */
137     if ((wp = watchdog_curr) == 0)
138 	msg_panic("%s: no instance", myname);
139     if (msg_verbose > 1)
140 	msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run);
141     if (++(wp->trip_run) < WATCHDOG_STEPS) {
142 	alarm(wp->timeout);
143     } else {
144 	if (wp->action)
145 	    wp->action(wp, wp->context);
146 	else {
147 	    killme_after(5);
148 #ifdef TEST
149 	    pause();
150 #endif
151 	    msg_fatal("watchdog timeout");
152 	}
153     }
154 }
155 
156 /* watchdog_create - create watchdog instance */
157 
158 WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context)
159 {
160     const char *myname = "watchdog_create";
161     struct sigaction sig_action;
162     WATCHDOG *wp;
163 
164     wp = (WATCHDOG *) mymalloc(sizeof(*wp));
165     if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0)
166 	msg_panic("%s: timeout %d is too small", myname, timeout);
167     wp->action = action;
168     wp->context = context;
169     wp->saved_watchdog = watchdog_curr;
170     wp->saved_time = alarm(0);
171     sigemptyset(&sig_action.sa_mask);
172 #ifdef SA_RESTART
173     sig_action.sa_flags = SA_RESTART;
174 #else
175     sig_action.sa_flags = 0;
176 #endif
177     sig_action.sa_handler = watchdog_event;
178     if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0)
179 	msg_fatal("%s: sigaction(SIGALRM): %m", myname);
180     if (msg_verbose > 1)
181 	msg_info("%s: %p %d", myname, (void *) wp, timeout);
182     return (watchdog_curr = wp);
183 }
184 
185 /* watchdog_destroy - destroy watchdog instance, restore state */
186 
187 void    watchdog_destroy(WATCHDOG *wp)
188 {
189     const char *myname = "watchdog_destroy";
190 
191     watchdog_stop(wp);
192     watchdog_curr = wp->saved_watchdog;
193     if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0)
194 	msg_fatal("%s: sigaction(SIGALRM): %m", myname);
195     if (wp->saved_time)
196 	alarm(wp->saved_time);
197     myfree((char *) wp);
198     if (msg_verbose > 1)
199 	msg_info("%s: %p", myname, (void *) wp);
200 }
201 
202 /* watchdog_start - enable watchdog timer */
203 
204 void    watchdog_start(WATCHDOG *wp)
205 {
206     const char *myname = "watchdog_start";
207 
208     if (wp != watchdog_curr)
209 	msg_panic("%s: wrong watchdog instance", myname);
210     wp->trip_run = 0;
211     alarm(wp->timeout);
212     if (msg_verbose > 1)
213 	msg_info("%s: %p", myname, (void *) wp);
214 }
215 
216 /* watchdog_stop - disable watchdog timer */
217 
218 void    watchdog_stop(WATCHDOG *wp)
219 {
220     const char *myname = "watchdog_stop";
221 
222     if (wp != watchdog_curr)
223 	msg_panic("%s: wrong watchdog instance", myname);
224     alarm(0);
225     if (msg_verbose > 1)
226 	msg_info("%s: %p", myname, (void *) wp);
227 }
228 
229 /* watchdog_pat - pat the dog so it stays quiet */
230 
231 void    watchdog_pat(void)
232 {
233     const char *myname = "watchdog_pat";
234 
235     if (watchdog_curr)
236 	watchdog_curr->trip_run = 0;
237     if (msg_verbose > 1)
238 	msg_info("%s: %p", myname, (void *) watchdog_curr);
239 }
240 
241 #ifdef TEST
242 
243 #include <vstream.h>
244 
245 int     main(int unused_argc, char **unused_argv)
246 {
247     WATCHDOG *wp;
248 
249     msg_verbose = 2;
250 
251     wp = watchdog_create(10, (WATCHDOG_FN) 0, (char *) 0);
252     watchdog_start(wp);
253     do {
254 	watchdog_pat();
255     } while (VSTREAM_GETCHAR() != VSTREAM_EOF);
256     watchdog_destroy(wp);
257     return (0);
258 }
259 
260 #endif
261