xref: /netbsd/sys/kern/kern_heartbeat.c (revision 58a2df03)
1 /*	$NetBSD: kern_heartbeat.c,v 1.5 2023/07/16 10:18:19 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2023 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * heartbeat(9) -- periodic checks to ensure CPUs are making progress
31  *
32  * Manual tests to run when changing this file.  Magic numbers are for
33  * evbarm; adjust for other platforms.  Tests involving cpuctl
34  * online/offline assume a 2-CPU system -- for full testing on a >2-CPU
35  * system, offline all but one CPU.
36  *
37  * 1.	cpuctl offline 0
38  *	sleep 20
39  *	cpuctl online 0
40  *
41  * 2.	cpuctl offline 1
42  *	sleep 20
43  *	cpuctl online 1
44  *
45  * 3.	cpuctl offline 0
46  *	sysctl -w kern.heartbeat.max_period=5
47  *	sleep 10
48  *	sysctl -w kern.heartbeat.max_period=0
49  *	sleep 10
50  *	sysctl -w kern.heartbeat.max_period=5
51  *	sleep 10
52  *	cpuctl online 0
53  *
54  * 4.	sysctl -w debug.crashme_enable=1
55  *	sysctl -w debug.crashme.spl_spinout=1   # IPL_SOFTCLOCK
56  *	# verify system panics after 15sec
57  *
58  * 5.	sysctl -w debug.crashme_enable=1
59  *	sysctl -w debug.crashme.spl_spinout=6   # IPL_SCHED
60  *	# verify system panics after 15sec
61  *
62  * 6.	cpuctl offline 0
63  *	sysctl -w debug.crashme_enable=1
64  *	sysctl -w debug.crashme.spl_spinout=1   # IPL_SOFTCLOCK
65  *	# verify system panics after 15sec
66  *
67  * 7.	cpuctl offline 0
68  *	sysctl -w debug.crashme_enable=1
69  *	sysctl -w debug.crashme.spl_spinout=5   # IPL_VM
70  *	# verify system panics after 15sec
71  *
72  *	# Not this -- IPL_SCHED and IPL_HIGH spinout on a single CPU
73  *	# require a hardware watchdog timer.
74  *	#cpuctl offline 0
75  *	#sysctl -w debug.crashme_enable
76  *	#sysctl -w debug.crashme.spl_spinout=6   # IPL_SCHED
77  *	# hope watchdog timer kicks in
78  */
79 
80 #include <sys/cdefs.h>
81 __KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.5 2023/07/16 10:18:19 riastradh Exp $");
82 
83 #ifdef _KERNEL_OPT
84 #include "opt_ddb.h"
85 #include "opt_heartbeat.h"
86 #endif
87 
88 #include "heartbeat.h"
89 
90 #include <sys/param.h>
91 #include <sys/types.h>
92 
93 #include <sys/atomic.h>
94 #include <sys/cpu.h>
95 #include <sys/errno.h>
96 #include <sys/heartbeat.h>
97 #include <sys/ipi.h>
98 #include <sys/kernel.h>
99 #include <sys/mutex.h>
100 #include <sys/sysctl.h>
101 #include <sys/systm.h>
102 #include <sys/xcall.h>
103 
104 #ifdef DDB
105 #include <ddb/ddb.h>
106 #endif
107 
108 /*
109  * Global state.
110  *
111  *	heartbeat_lock serializes access to heartbeat_max_period_secs
112  *	and heartbeat_max_period_ticks.  Two separate variables so we
113  *	can avoid multiplication or division in the heartbeat routine.
114  *
115  *	heartbeat_sih is stable after initialization in
116  *	heartbeat_start.
117  */
118 kmutex_t heartbeat_lock			__cacheline_aligned;
119 unsigned heartbeat_max_period_secs	__read_mostly;
120 unsigned heartbeat_max_period_ticks	__read_mostly;
121 
122 void *heartbeat_sih			__read_mostly;
123 
124 /*
125  * heartbeat_suspend()
126  *
127  *	Suspend heartbeat monitoring of the current CPU.
128  *
129  *	Called after the current CPU has been marked offline but before
130  *	it has stopped running.  Caller must have preemption disabled.
131  */
132 void
heartbeat_suspend(void)133 heartbeat_suspend(void)
134 {
135 
136 	KASSERT(curcpu_stable());
137 
138 	/*
139 	 * Nothing to do -- we just check the SPCF_OFFLINE flag.
140 	 */
141 }
142 
143 /*
144  * heartbeat_resume_cpu(ci)
145  *
146  *	Resume heartbeat monitoring of ci.
147  *
148  *	Called at startup while cold, and whenever heartbeat monitoring
149  *	is re-enabled after being disabled or the period is changed.
150  *	When not cold, ci must be the current CPU.
151  */
152 static void
heartbeat_resume_cpu(struct cpu_info * ci)153 heartbeat_resume_cpu(struct cpu_info *ci)
154 {
155 
156 	KASSERT(__predict_false(cold) || curcpu_stable());
157 	KASSERT(__predict_false(cold) || ci == curcpu());
158 
159 	ci->ci_heartbeat_count = 0;
160 	ci->ci_heartbeat_uptime_cache = time_uptime;
161 	ci->ci_heartbeat_uptime_stamp = 0;
162 }
163 
164 /*
165  * heartbeat_resume()
166  *
167  *	Resume heartbeat monitoring of the current CPU.
168  *
169  *	Called after the current CPU has started running but before it
170  *	has been marked online.  Also used internally when starting up
171  *	heartbeat monitoring at boot or when the maximum period is set
172  *	from zero to nonzero.  Caller must have preemption disabled.
173  */
174 void
heartbeat_resume(void)175 heartbeat_resume(void)
176 {
177 	struct cpu_info *ci = curcpu();
178 	int s;
179 
180 	KASSERT(curcpu_stable());
181 
182 	/*
183 	 * Block heartbeats while we reset the state so we don't
184 	 * spuriously think we had a heart attack in the middle of
185 	 * resetting the count and the uptime stamp.
186 	 */
187 	s = splsched();
188 	heartbeat_resume_cpu(ci);
189 	splx(s);
190 }
191 
192 /*
193  * heartbeat_reset_xc(a, b)
194  *
195  *	Cross-call handler to reset heartbeat state just prior to
196  *	enabling heartbeat checks.
197  */
198 static void
heartbeat_reset_xc(void * a,void * b)199 heartbeat_reset_xc(void *a, void *b)
200 {
201 
202 	heartbeat_resume();
203 }
204 
205 /*
206  * set_max_period(max_period)
207  *
208  *	Set the maximum period, in seconds, for heartbeat checks.
209  *
210  *	- If max_period is zero, disable them.
211  *
212  *	- If the max period was zero and max_period is nonzero, ensure
213  *	  all CPUs' heartbeat uptime caches are up-to-date before
214  *	  re-enabling them.
215  *
216  *	max_period must be below UINT_MAX/4/hz to avoid arithmetic
217  *	overflow and give room for slop.
218  *
219  *	Caller must hold heartbeat_lock.
220  */
221 static void
set_max_period(unsigned max_period)222 set_max_period(unsigned max_period)
223 {
224 
225 	KASSERTMSG(max_period <= UINT_MAX/4/hz,
226 	    "max_period=%u must not exceed UINT_MAX/4/hz=%u (hz=%u)",
227 	    max_period, UINT_MAX/4/hz, hz);
228 	KASSERT(mutex_owned(&heartbeat_lock));
229 
230 	/*
231 	 * If we're enabling heartbeat checks, make sure we have a
232 	 * reasonably up-to-date time_uptime cache on all CPUs so we
233 	 * don't think we had an instant heart attack.
234 	 */
235 	if (heartbeat_max_period_secs == 0 && max_period != 0) {
236 		if (cold) {
237 			CPU_INFO_ITERATOR cii;
238 			struct cpu_info *ci;
239 
240 			for (CPU_INFO_FOREACH(cii, ci))
241 				heartbeat_resume_cpu(ci);
242 		} else {
243 			const uint64_t ticket =
244 			    xc_broadcast(0, &heartbeat_reset_xc, NULL, NULL);
245 			xc_wait(ticket);
246 		}
247 	}
248 
249 	/*
250 	 * Once the heartbeat state has been updated on all (online)
251 	 * CPUs, set the period.  At this point, heartbeat checks can
252 	 * begin.
253 	 */
254 	atomic_store_relaxed(&heartbeat_max_period_secs, max_period);
255 	atomic_store_relaxed(&heartbeat_max_period_ticks, max_period*hz);
256 }
257 
258 /*
259  * heartbeat_max_period_ticks(SYSCTLFN_ARGS)
260  *
261  *	Sysctl handler for sysctl kern.heartbeat.max_period.  Verifies
262  *	it lies within a reasonable interval and sets it.
263  */
264 static int
heartbeat_max_period_sysctl(SYSCTLFN_ARGS)265 heartbeat_max_period_sysctl(SYSCTLFN_ARGS)
266 {
267 	struct sysctlnode node;
268 	unsigned max_period;
269 	int error;
270 
271 	mutex_enter(&heartbeat_lock);
272 
273 	max_period = heartbeat_max_period_secs;
274 	node = *rnode;
275 	node.sysctl_data = &max_period;
276 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
277 	if (error || newp == NULL)
278 		goto out;
279 
280 	/*
281 	 * Ensure there's plenty of slop between heartbeats.
282 	 */
283 	if (max_period > UINT_MAX/4/hz) {
284 		error = EOVERFLOW;
285 		goto out;
286 	}
287 
288 	/*
289 	 * Success!  Set the period.  This enables heartbeat checks if
290 	 * we went from zero period to nonzero period, or disables them
291 	 * if the other way around.
292 	 */
293 	set_max_period(max_period);
294 	error = 0;
295 
296 out:	mutex_exit(&heartbeat_lock);
297 	return error;
298 }
299 
300 /*
301  * sysctl_heartbeat_setup()
302  *
303  *	Set up the kern.heartbeat.* sysctl subtree.
304  */
305 SYSCTL_SETUP(sysctl_heartbeat_setup, "sysctl kern.heartbeat setup")
306 {
307 	const struct sysctlnode *rnode;
308 	int error;
309 
310 	mutex_init(&heartbeat_lock, MUTEX_DEFAULT, IPL_NONE);
311 
312 	/* kern.heartbeat */
313 	error = sysctl_createv(NULL, 0, NULL, &rnode,
314 	    CTLFLAG_PERMANENT,
315 	    CTLTYPE_NODE, "heartbeat",
316 	    SYSCTL_DESCR("Kernel heartbeat parameters"),
317 	    NULL, 0, NULL, 0,
318 	    CTL_KERN, CTL_CREATE, CTL_EOL);
319 	if (error) {
320 		printf("%s: failed to create kern.heartbeat: %d\n",
321 		    __func__, error);
322 		return;
323 	}
324 
325 	/* kern.heartbeat.max_period */
326 	error = sysctl_createv(NULL, 0, &rnode, NULL,
327 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
328 	    CTLTYPE_INT, "max_period",
329 	    SYSCTL_DESCR("Max seconds between heartbeats before panic"),
330 	    &heartbeat_max_period_sysctl, 0, NULL, 0,
331 	    CTL_CREATE, CTL_EOL);
332 	if (error) {
333 		printf("%s: failed to create kern.heartbeat.max_period: %d\n",
334 		    __func__, error);
335 		return;
336 	}
337 }
338 
339 /*
340  * heartbeat_intr(cookie)
341  *
342  *	Soft interrupt handler to update the local CPU's view of the
343  *	system uptime.  This runs at the same priority level as
344  *	callouts, so if callouts are stuck on this CPU, it won't run,
345  *	and eventually another CPU will notice that this one is stuck.
346  *
347  *	Don't do spl* here -- keep it to a minimum so if anything goes
348  *	wrong we don't end up with hard interrupts blocked and unable
349  *	to detect a missed heartbeat.
350  */
351 static void
heartbeat_intr(void * cookie)352 heartbeat_intr(void *cookie)
353 {
354 	unsigned count = atomic_load_relaxed(&curcpu()->ci_heartbeat_count);
355 	unsigned uptime = time_uptime;
356 
357 	atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_stamp, count);
358 	atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_cache, uptime);
359 }
360 
361 /*
362  * heartbeat_start()
363  *
364  *	Start system heartbeat monitoring.
365  */
366 void
heartbeat_start(void)367 heartbeat_start(void)
368 {
369 	const unsigned max_period = HEARTBEAT_MAX_PERIOD_DEFAULT;
370 
371 	/*
372 	 * Establish a softint so we can schedule it once ready.  This
373 	 * should be at the lowest softint priority level so that we
374 	 * ensure all softint priorities are making progress.
375 	 */
376 	heartbeat_sih = softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE,
377 	    &heartbeat_intr, NULL);
378 
379 	/*
380 	 * Now that the softint is established, kick off heartbeat
381 	 * monitoring with the default period.  This will initialize
382 	 * the per-CPU state to an up-to-date cache of time_uptime.
383 	 */
384 	mutex_enter(&heartbeat_lock);
385 	set_max_period(max_period);
386 	mutex_exit(&heartbeat_lock);
387 }
388 
389 /*
390  * defibrillator(cookie)
391  *
392  *	IPI handler for defibrillation.  If the CPU's heart has stopped
393  *	beating normally, but the CPU can still execute things,
394  *	acknowledge the IPI to the doctor and then panic so we at least
395  *	get a stack trace from whatever the current CPU is stuck doing,
396  *	if not a core dump.
397  *
398  *	(This metaphor is a little stretched, since defibrillation is
399  *	usually administered when the heart is beating errattically but
400  *	hasn't stopped, and causes the heart to stop temporarily, and
401  *	one hopes it is not fatal.  But we're (software) engineers, so
402  *	we can stretch metaphors like silly putty in a blender.)
403  */
404 static void
defibrillator(void * cookie)405 defibrillator(void *cookie)
406 {
407 	bool *ack = cookie;
408 
409 	atomic_store_relaxed(ack, true);
410 	panic("%s[%d %s]: heart stopped beating", cpu_name(curcpu()),
411 	    curlwp->l_lid,
412 	    curlwp->l_name ? curlwp->l_name : curproc->p_comm);
413 }
414 
415 /*
416  * defibrillate(ci, unsigned d)
417  *
418  *	The patient CPU ci's heart has stopped beating after d seconds.
419  *	Force the patient CPU ci to panic, or panic on this CPU if the
420  *	patient CPU doesn't respond within 1sec.
421  */
422 static void __noinline
defibrillate(struct cpu_info * ci,unsigned d)423 defibrillate(struct cpu_info *ci, unsigned d)
424 {
425 	bool ack = false;
426 	ipi_msg_t msg = {
427 		.func = &defibrillator,
428 		.arg = &ack,
429 	};
430 	unsigned countdown = 1000; /* 1sec */
431 
432 	KASSERT(curcpu_stable());
433 
434 	/*
435 	 * First notify the console that the patient CPU's heart seems
436 	 * to have stopped beating.
437 	 */
438 	printf("%s: found %s heart stopped beating after %u seconds\n",
439 	    cpu_name(curcpu()), cpu_name(ci), d);
440 
441 	/*
442 	 * Next, give the patient CPU a chance to panic, so we get a
443 	 * stack trace on that CPU even if we don't get a crash dump.
444 	 */
445 	ipi_unicast(&msg, ci);
446 
447 	/*
448 	 * Busy-wait up to 1sec for the patient CPU to print a stack
449 	 * trace and panic.  If the patient CPU acknowledges the IPI,
450 	 * or if we're panicking anyway, just give up and stop here --
451 	 * the system is coming down soon and we should avoid getting
452 	 * in the way.
453 	 */
454 	while (countdown --> 0) {
455 		if (atomic_load_relaxed(&ack) ||
456 		    atomic_load_relaxed(&panicstr) != NULL)
457 			return;
458 		DELAY(1000);	/* 1ms */
459 	}
460 
461 	/*
462 	 * The patient CPU failed to acknowledge the panic request.
463 	 * Panic now; with any luck, we'll get a crash dump.
464 	 */
465 	panic("%s: found %s heart stopped beating and unresponsive",
466 	    cpu_name(curcpu()), cpu_name(ci));
467 }
468 
469 /*
470  * select_patient()
471  *
472  *	Select another CPU to check the heartbeat of.  Returns NULL if
473  *	there are no other online CPUs.  Never returns curcpu().
474  *	Caller must have kpreemption disabled.
475  */
476 static struct cpu_info *
select_patient(void)477 select_patient(void)
478 {
479 	CPU_INFO_ITERATOR cii;
480 	struct cpu_info *first = NULL, *patient = NULL, *ci;
481 	bool passedcur = false;
482 
483 	KASSERT(curcpu_stable());
484 
485 	/*
486 	 * In the iteration order of all CPUs, find the next online CPU
487 	 * after curcpu(), or the first online one if curcpu() is last
488 	 * in the iteration order.
489 	 */
490 	for (CPU_INFO_FOREACH(cii, ci)) {
491 		if (ci->ci_schedstate.spc_flags & SPCF_OFFLINE)
492 			continue;
493 		if (passedcur) {
494 			/*
495 			 * (...|curcpu()|ci|...)
496 			 *
497 			 * Found the patient right after curcpu().
498 			 */
499 			KASSERT(patient != ci);
500 			patient = ci;
501 			break;
502 		}
503 		if (ci == curcpu()) {
504 			/*
505 			 * (...|prev|ci=curcpu()|next|...)
506 			 *
507 			 * Note that we want next (or first, if there's
508 			 * nothing after curcpu()).
509 			 */
510 			passedcur = true;
511 			continue;
512 		}
513 		if (first == NULL) {
514 			/*
515 			 * (ci|...|curcpu()|...)
516 			 *
517 			 * Record ci as first in case there's nothing
518 			 * after curcpu().
519 			 */
520 			first = ci;
521 			continue;
522 		}
523 	}
524 
525 	/*
526 	 * If we hit the end, wrap around to the beginning.
527 	 */
528 	if (patient == NULL) {
529 		KASSERT(passedcur);
530 		patient = first;
531 	}
532 
533 	return patient;
534 }
535 
536 /*
537  * heartbeat()
538  *
539  *	1. Count a heartbeat on the local CPU.
540  *
541  *	2. Panic if the system uptime doesn't seem to have advanced in
542  *	   a while.
543  *
544  *	3. Panic if the soft interrupt on this CPU hasn't advanced the
545  *	   local view of the system uptime.
546  *
547  *	4. Schedule the soft interrupt to advance the local view of the
548  *	   system uptime.
549  *
550  *	5. Select another CPU to check the heartbeat of.
551  *
552  *	6. Panic if the other CPU hasn't advanced its view of the
553  *	   system uptime in a while.
554  */
555 void
heartbeat(void)556 heartbeat(void)
557 {
558 	unsigned period_ticks, period_secs;
559 	unsigned count, uptime, cache, stamp, d;
560 	struct cpu_info *patient;
561 
562 	KASSERT(curcpu_stable());
563 
564 	period_ticks = atomic_load_relaxed(&heartbeat_max_period_ticks);
565 	period_secs = atomic_load_relaxed(&heartbeat_max_period_secs);
566 	if (__predict_false(period_ticks == 0) ||
567 	    __predict_false(period_secs == 0) ||
568 	    __predict_false(curcpu()->ci_schedstate.spc_flags & SPCF_OFFLINE))
569 		return;
570 
571 	/*
572 	 * Count a heartbeat on this CPU.
573 	 */
574 	count = curcpu()->ci_heartbeat_count++;
575 
576 	/*
577 	 * If the uptime hasn't changed, make sure that we haven't
578 	 * counted too many of our own heartbeats since the uptime last
579 	 * changed, and stop here -- we only do the cross-CPU work once
580 	 * per second.
581 	 */
582 	uptime = time_uptime;
583 	cache = atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_cache);
584 	if (__predict_true(cache == uptime)) {
585 		/*
586 		 * Timecounter hasn't advanced by more than a second.
587 		 * Make sure the timecounter isn't stuck according to
588 		 * our heartbeats.
589 		 *
590 		 * Our own heartbeat count can't roll back, and
591 		 * time_uptime should be updated before it wraps
592 		 * around, so d should never go negative; hence no
593 		 * check for d < UINT_MAX/2.
594 		 */
595 		stamp =
596 		    atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_stamp);
597 		d = count - stamp;
598 		if (__predict_false(d > period_ticks)) {
599 			panic("%s: time has not advanced in %u heartbeats",
600 			    cpu_name(curcpu()), d);
601 		}
602 		return;
603 	}
604 
605 	/*
606 	 * If the uptime has changed, make sure that it hasn't changed
607 	 * so much that softints must be stuck on this CPU.  Since
608 	 * time_uptime is monotonic, this can't go negative, hence no
609 	 * check for d < UINT_MAX/2.
610 	 *
611 	 * This uses the hard timer interrupt handler on the current
612 	 * CPU to ensure soft interrupts at all priority levels have
613 	 * made progress.
614 	 */
615 	d = uptime - cache;
616 	if (__predict_false(d > period_secs)) {
617 		panic("%s: softints stuck for %u seconds",
618 		    cpu_name(curcpu()), d);
619 	}
620 
621 	/*
622 	 * Schedule a softint to update our cache of the system uptime
623 	 * so the next call to heartbeat, on this or another CPU, can
624 	 * detect progress on this one.
625 	 */
626 	softint_schedule(heartbeat_sih);
627 
628 	/*
629 	 * Select a patient to check the heartbeat of.  If there's no
630 	 * other online CPU, nothing to do.
631 	 */
632 	patient = select_patient();
633 	if (patient == NULL)
634 		return;
635 
636 	/*
637 	 * Verify that time is advancing on the patient CPU.  If the
638 	 * delta exceeds UINT_MAX/2, that means it is already ahead by
639 	 * a little on the other CPU, and the subtraction went
640 	 * negative, which is OK.  If the CPU has been
641 	 * offlined since we selected it, no worries.
642 	 *
643 	 * This uses the current CPU to ensure the other CPU has made
644 	 * progress, even if the other CPU's hard timer interrupt
645 	 * handler is stuck for some reason.
646 	 *
647 	 * XXX Maybe confirm it hasn't gone negative by more than
648 	 * max_period?
649 	 */
650 	d = uptime - atomic_load_relaxed(&patient->ci_heartbeat_uptime_cache);
651 	if (__predict_false(d > period_secs) &&
652 	    __predict_false(d < UINT_MAX/2) &&
653 	    ((patient->ci_schedstate.spc_flags & SPCF_OFFLINE) == 0))
654 		defibrillate(patient, d);
655 }
656 
657 /*
658  * heartbeat_dump()
659  *
660  *	Print the heartbeat data of all CPUs.  Can be called from ddb.
661  */
662 #ifdef DDB
663 static unsigned
db_read_unsigned(const unsigned * p)664 db_read_unsigned(const unsigned *p)
665 {
666 	unsigned x;
667 
668 	db_read_bytes((db_addr_t)p, sizeof(x), (char *)&x);
669 
670 	return x;
671 }
672 
673 void
heartbeat_dump(void)674 heartbeat_dump(void)
675 {
676 	struct cpu_info *ci;
677 
678 	db_printf("Heartbeats:\n");
679 	for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) {
680 		db_printf("cpu%u: count %u uptime %u stamp %u\n",
681 		    db_read_unsigned(&ci->ci_index),
682 		    db_read_unsigned(&ci->ci_heartbeat_count),
683 		    db_read_unsigned(&ci->ci_heartbeat_uptime_cache),
684 		    db_read_unsigned(&ci->ci_heartbeat_uptime_stamp));
685 	}
686 }
687 #endif
688