xref: /illumos-gate/usr/src/uts/sun4v/os/wdt.c (revision 3db86aab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/hsvc.h>
30 #include <sys/wdt.h>
31 #include <sys/cmn_err.h>
32 #include <sys/kmem.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/hypervisor_api.h>
36 #include <sys/mach_descrip.h>
37 #include <sys/mdesc.h>
38 
39 #define	WDT_ON			1
40 #define	WDT_OFF			0
41 #define	WDT_DEFAULT_RESOLUTION	10		/* 10 milliseconds */
42 /*
43  * MILLISEC defines the number of milliseconds in a second.
44  */
45 #define	WDT_MAX_RESOLUTION	(1 * MILLISEC)	/* 1 second */
46 #define	WDT_REGULAR_TIMEOUT	(10 * MILLISEC)	/* 10 seconds */
47 #define	WDT_LONG_TIMEOUT	(60 * MILLISEC)	/* 60 seconds */
48 #define	WDT_MIN_COREAPI_MAJOR	1
49 #define	WDT_MIN_COREAPI_MINOR	1
50 /*
51  * The ratio to calculate the watchdog timer pat interval.
52  */
53 #define	WDT_PAT_INTERVAL(x)	((x) / 2)
54 
55 int watchdog_enabled = 1;
56 
57 static void set_watchdog_pat_intervals(void);
58 static void config_watchdog(uint64_t, int);
59 
60 /*
61  * Flag used to pat/suspend/resume the watchdog timer.
62  */
63 static int watchdog_activated = WDT_OFF;
64 static uint64_t watchdog_regular_timeout = WDT_REGULAR_TIMEOUT;
65 static uint64_t watchdog_long_timeout = 0;
66 static uint64_t watchdog_resolution = WDT_DEFAULT_RESOLUTION;
67 static int64_t watchdog_last_pat = 0;	/* The time of last pat. */
68 static int64_t last_pat_interval = 0;	/* The pat interval of last pat. */
69 static int64_t watchdog_long_pat_interval = 0;
70 static int64_t watchdog_regular_pat_interval = 0;
71 
72 void
73 watchdog_init(void)
74 {
75 	int num_nodes;
76 	int nplat;
77 	md_t *mdp;
78 	mde_cookie_t *listp = NULL;
79 	int listsz;
80 	uint64_t major;
81 	uint64_t minor;
82 	uint64_t watchdog_max_timeout;
83 
84 	if (!watchdog_enabled) {
85 		return;
86 	}
87 
88 	if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0 ||
89 		major != WDT_MIN_COREAPI_MAJOR ||
90 		minor < WDT_MIN_COREAPI_MINOR) {
91 		cmn_err(CE_NOTE, "Disabling watchdog as watchdog services are "
92 			"not available\n");
93 		watchdog_enabled = 0;
94 		return;
95 	}
96 
97 	/*
98 	 * Get the watchdog-max-timeout and watchdog-resolution MD properties.
99 	 */
100 	if ((mdp = md_get_handle()) == NULL) {
101 		cmn_err(CE_WARN, "Unable to initialize machine description, "
102 			"watchdog is disabled.");
103 		watchdog_enabled = 0;
104 		return;
105 	}
106 
107 	num_nodes = md_node_count(mdp);
108 	ASSERT(num_nodes > 0);
109 
110 	listsz = num_nodes * sizeof (mde_cookie_t);
111 	listp = kmem_zalloc(listsz, KM_SLEEP);
112 
113 	nplat = md_scan_dag(mdp, md_root_node(mdp),
114 		md_find_name(mdp, "platform"), md_find_name(mdp, "fwd"), listp);
115 
116 	ASSERT(nplat == 1);
117 
118 	if (md_get_prop_val(mdp, listp[0], "watchdog-max-timeout",
119 		&watchdog_max_timeout)) {
120 		cmn_err(CE_WARN, "Cannot read watchdog-max-timeout, watchdog "
121 			"is disabled.");
122 		watchdog_enabled = 0;
123 		kmem_free(listp, listsz);
124 		(void) md_fini_handle(mdp);
125 		return;
126 	}
127 
128 	if (watchdog_max_timeout < WDT_REGULAR_TIMEOUT) {
129 		cmn_err(CE_WARN, "Invalid watchdog-max-timeout value, watchdog "
130 			"is disabled.");
131 		watchdog_enabled = 0;
132 		kmem_free(listp, listsz);
133 		(void) md_fini_handle(mdp);
134 		return;
135 	}
136 
137 	if (md_get_prop_val(mdp, listp[0], "watchdog-resolution",
138 		&watchdog_resolution)) {
139 		cmn_err(CE_WARN, "Cannot read watchdog-resolution, watchdog "
140 			"is disabled.");
141 		watchdog_enabled = 0;
142 		kmem_free(listp, listsz);
143 		(void) md_fini_handle(mdp);
144 		return;
145 	}
146 
147 	if (watchdog_resolution == 0 ||
148 		watchdog_resolution > WDT_MAX_RESOLUTION) {
149 		watchdog_resolution = WDT_DEFAULT_RESOLUTION;
150 	}
151 	kmem_free(listp, listsz);
152 	(void) md_fini_handle(mdp);
153 
154 	watchdog_long_timeout = MIN(WDT_LONG_TIMEOUT, watchdog_max_timeout);
155 
156 	/*
157 	 * round the timeout to the nearest smaller value.
158 	 */
159 	watchdog_long_timeout -=
160 		watchdog_long_timeout % watchdog_resolution;
161 	watchdog_regular_timeout -=
162 		watchdog_regular_timeout % watchdog_resolution;
163 	set_watchdog_pat_intervals();
164 
165 	config_watchdog(watchdog_regular_timeout, WDT_ON);
166 }
167 
168 /*
169  * Pat the watchdog timer periodically, for regular pat in tod_get when
170  * the kernel runs normally and long pat in deadman when panicking.
171  */
172 void
173 watchdog_pat()
174 {
175 	int64_t pat_interval;
176 	int64_t current_lbolt64;
177 	uint64_t timeout;
178 
179 	if (watchdog_enabled && watchdog_activated) {
180 		if (panicstr) {
181 			/*
182 			 * long timeout is only used while panicking.
183 			 */
184 			timeout = watchdog_long_timeout;
185 			pat_interval = watchdog_long_pat_interval;
186 		} else {
187 			timeout = watchdog_regular_timeout;
188 			pat_interval = watchdog_regular_pat_interval;
189 		}
190 
191 		current_lbolt64 = lbolt64;
192 
193 		if ((current_lbolt64 - watchdog_last_pat)
194 			>= last_pat_interval) {
195 			/*
196 			 * Pat the watchdog via hv api:
197 			 */
198 			config_watchdog(timeout, WDT_ON);
199 
200 			last_pat_interval = pat_interval;
201 			watchdog_last_pat = current_lbolt64;
202 		}
203 	}
204 }
205 
206 /*
207  * We don't save/restore the remaining watchdog timeout time at present.
208  */
209 void
210 watchdog_suspend()
211 {
212 	if (watchdog_enabled && watchdog_activated) {
213 		config_watchdog(0, WDT_OFF);
214 	}
215 }
216 
217 /*
218  * We don't save/restore the remaining watchdog timeout time at present.
219  */
220 void
221 watchdog_resume()
222 {
223 	if (watchdog_enabled && !watchdog_activated) {
224 		if (panicstr) {
225 			config_watchdog(watchdog_long_timeout, WDT_ON);
226 		} else {
227 			config_watchdog(watchdog_regular_timeout, WDT_ON);
228 		}
229 	}
230 }
231 
232 void
233 watchdog_clear()
234 {
235 	if (watchdog_enabled && watchdog_activated) {
236 		config_watchdog(0, WDT_OFF);
237 	}
238 }
239 
240 /*
241  * Set the pat intervals for both regular (when Solaris is running),
242  * and long timeout (i.e., when panicking) cases.
243  */
244 static void
245 set_watchdog_pat_intervals(void)
246 {
247 	watchdog_regular_pat_interval =
248 		MSEC_TO_TICK(WDT_PAT_INTERVAL(watchdog_regular_timeout));
249 	watchdog_long_pat_interval =
250 		MSEC_TO_TICK(WDT_PAT_INTERVAL(watchdog_long_timeout));
251 }
252 
253 static void
254 config_watchdog(uint64_t timeout, int new_state)
255 {
256 	uint64_t time_remaining;
257 	uint64_t ret;
258 
259 	watchdog_activated = new_state;
260 	ret = hv_mach_set_watchdog(timeout, &time_remaining);
261 	if (ret != H_EOK) {
262 		cmn_err(CE_WARN, "Failed to operate on the watchdog. "
263 			"Error = 0x%lx", ret);
264 		watchdog_enabled = 0;
265 	}
266 }
267