xref: /illumos-gate/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision 134a1f4e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * A CPR derivative specifically for starfire/starcat
29  */
30 
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/machparam.h>
34 #include <sys/machsystm.h>
35 #include <sys/ddi.h>
36 #define	SUNDDI_IMPL
37 #include <sys/sunddi.h>
38 #include <sys/sunndi.h>
39 #include <sys/devctl.h>
40 #include <sys/time.h>
41 #include <sys/kmem.h>
42 #include <nfs/lm.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/obpdefs.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/errno.h>
49 #include <sys/callb.h>
50 #include <sys/clock.h>
51 #include <sys/x_call.h>
52 #include <sys/cpuvar.h>
53 #include <sys/epm.h>
54 #include <sys/vfs.h>
55 
56 #include <sys/cpu_sgnblk_defs.h>
57 #include <sys/dr.h>
58 #include <sys/dr_util.h>
59 
60 #include <sys/promif.h>
61 #include <sys/conf.h>
62 #include <sys/cyclic.h>
63 
64 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
65 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
66 extern int	is_pseudo_device(dev_info_t *dip);
67 
68 extern kmutex_t	cpu_lock;
69 extern dr_unsafe_devs_t dr_unsafe_devs;
70 
71 static int		dr_is_real_device(dev_info_t *dip);
72 static int		dr_is_unsafe_major(major_t major);
73 static int		dr_bypass_device(char *dname);
74 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
75 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
76 				char *alias);
77 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
78 				int majors);
79 static int		dr_add_int(uint64_t *arr, int idx, int len,
80 				uint64_t val);
81 
82 int dr_pt_test_suspend(dr_handle_t *hp);
83 
84 /*
85  * dr_quiesce.c interface
86  * NOTE: states used internally by dr_suspend and dr_resume
87  */
88 typedef enum dr_suspend_state {
89 	DR_SRSTATE_BEGIN = 0,
90 	DR_SRSTATE_USER,
91 	DR_SRSTATE_DRIVER,
92 	DR_SRSTATE_FULL
93 } suspend_state_t;
94 
95 struct dr_sr_handle {
96 	dr_handle_t		*sr_dr_handlep;
97 	dev_info_t		*sr_failed_dip;
98 	suspend_state_t		sr_suspend_state;
99 	uint_t			sr_flags;
100 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
101 	int			sr_err_idx;
102 };
103 
104 #define	SR_FLAG_WATCHDOG	0x1
105 
106 /*
107  * XXX
108  * This hack will go away before RTI.  Just for testing.
109  * List of drivers to bypass when performing a suspend.
110  */
111 static char *dr_bypass_list[] = {
112 	""
113 };
114 
115 
116 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
117 
118 /*
119  * dr_skip_user_threads is used to control if user threads should
120  * be suspended.  If dr_skip_user_threads is true, the rest of the
121  * flags are not used; if it is false, dr_check_user_stop_result
122  * will be used to control whether or not we need to check suspend
123  * result, and dr_allow_blocked_threads will be used to control
124  * whether or not we allow suspend to continue if there are blocked
125  * threads.  We allow all combinations of dr_check_user_stop_result
126  * and dr_allow_block_threads, even though it might not make much
127  * sense to not allow block threads when we don't even check stop
128  * result.
129  */
130 static int	dr_skip_user_threads = 0;	/* default to FALSE */
131 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
132 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
133 
134 #define	DR_CPU_LOOP_MSEC	1000
135 
136 static void
137 dr_stop_intr(void)
138 {
139 	ASSERT(MUTEX_HELD(&cpu_lock));
140 
141 	kpreempt_disable();
142 	cyclic_suspend();
143 }
144 
145 static void
146 dr_enable_intr(void)
147 {
148 	ASSERT(MUTEX_HELD(&cpu_lock));
149 
150 	cyclic_resume();
151 	kpreempt_enable();
152 }
153 
154 dr_sr_handle_t *
155 dr_get_sr_handle(dr_handle_t *hp)
156 {
157 	dr_sr_handle_t *srh;
158 
159 	srh = GETSTRUCT(dr_sr_handle_t, 1);
160 	srh->sr_dr_handlep = hp;
161 
162 	return (srh);
163 }
164 
165 void
166 dr_release_sr_handle(dr_sr_handle_t *srh)
167 {
168 	ASSERT(srh->sr_failed_dip == NULL);
169 	FREESTRUCT(srh, dr_sr_handle_t, 1);
170 }
171 
172 static int
173 dr_is_real_device(dev_info_t *dip)
174 {
175 	struct regspec *regbuf = NULL;
176 	int length = 0;
177 	int rc;
178 
179 	if (ddi_get_driver(dip) == NULL)
180 		return (0);
181 
182 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
183 		return (1);
184 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
185 		return (0);
186 
187 	/*
188 	 * now the general case
189 	 */
190 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
191 	    (caddr_t)&regbuf, &length);
192 	ASSERT(rc != DDI_PROP_NO_MEMORY);
193 	if (rc != DDI_PROP_SUCCESS) {
194 		return (0);
195 	} else {
196 		if ((length > 0) && (regbuf != NULL))
197 			kmem_free(regbuf, length);
198 		return (1);
199 	}
200 }
201 
202 static int
203 dr_is_unsafe_major(major_t major)
204 {
205 	char    *dname, **cpp;
206 	int	i, ndevs;
207 
208 	if ((dname = ddi_major_to_name(major)) == NULL) {
209 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
210 		return (0);
211 	}
212 
213 	ndevs = dr_unsafe_devs.ndevs;
214 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
215 		if (strcmp(dname, *cpp++) == 0)
216 			return (1);
217 	}
218 	return (0);
219 }
220 
221 static int
222 dr_bypass_device(char *dname)
223 {
224 	int i;
225 	char **lname;
226 	/* check the bypass list */
227 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
228 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
229 			return (1);
230 	}
231 	return (0);
232 }
233 
234 static int
235 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
236 {
237 	major_t	devmajor;
238 	char	*aka, *name;
239 
240 	*buffer = *alias = 0;
241 
242 	if (dip == NULL)
243 		return (-1);
244 
245 	if ((name = ddi_get_name(dip)) == NULL)
246 		name = "<null name>";
247 
248 	aka = name;
249 
250 	if ((devmajor = ddi_name_to_major(aka)) != -1)
251 		aka = ddi_major_to_name(devmajor);
252 
253 	(void) strcpy(buffer, name);
254 
255 	if (strcmp(name, aka))
256 		(void) strcpy(alias, aka);
257 	else
258 		*alias = 0;
259 
260 	return (0);
261 }
262 
263 struct dr_ref {
264 	int		*refcount;
265 	int		*refcount_non_gldv3;
266 	uint64_t	*arr;
267 	int		*idx;
268 	int		len;
269 };
270 
271 /* ARGSUSED */
272 static int
273 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
274 {
275 	major_t		major;
276 	char		*dname;
277 	struct dr_ref	*rp = (struct dr_ref *)arg;
278 
279 	if (dip == NULL)
280 		return (DDI_WALK_CONTINUE);
281 
282 	if (!dr_is_real_device(dip))
283 		return (DDI_WALK_CONTINUE);
284 
285 	dname = ddi_binding_name(dip);
286 
287 	if (dr_bypass_device(dname))
288 		return (DDI_WALK_CONTINUE);
289 
290 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
291 		if (ref && rp->refcount) {
292 			*rp->refcount += ref;
293 			PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
294 			    major, ref);
295 		}
296 		if (ref && rp->refcount_non_gldv3) {
297 			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
298 				*rp->refcount_non_gldv3 += ref;
299 		}
300 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
301 			PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
302 			    major);
303 			if (rp->arr != NULL && rp->idx != NULL)
304 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
305 				    rp->len, (uint64_t)major);
306 		}
307 	}
308 	return (DDI_WALK_CONTINUE);
309 }
310 
311 static int
312 dr_check_unsafe_major(dev_info_t *dip, void *arg)
313 {
314 	return (dr_check_dip(dip, arg, 0));
315 }
316 
317 
318 /*ARGSUSED*/
319 void
320 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
321     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
322 {
323 	struct dr_ref bref = {0};
324 
325 	if (dip == NULL)
326 		return;
327 
328 	bref.refcount = refcount;
329 	bref.refcount_non_gldv3 = refcount_non_gldv3;
330 	bref.arr = arr;
331 	bref.idx = idx;
332 	bref.len = len;
333 
334 	ASSERT(e_ddi_branch_held(dip));
335 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
336 }
337 
338 /*
339  * The "dip" argument's parent (if it exists) must be held busy.
340  */
341 static int
342 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
343 {
344 	dr_handle_t	*handle;
345 	major_t		major;
346 	char		*dname;
347 	int		circ;
348 
349 	/*
350 	 * If dip is the root node, it has no siblings and it is
351 	 * always held. If dip is not the root node, dr_suspend_devices()
352 	 * will be invoked with the parent held busy.
353 	 */
354 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
355 		char	d_name[40], d_alias[40], *d_info;
356 
357 		ndi_devi_enter(dip, &circ);
358 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
359 			ndi_devi_exit(dip, circ);
360 			return (ENXIO);
361 		}
362 		ndi_devi_exit(dip, circ);
363 
364 		if (!dr_is_real_device(dip))
365 			continue;
366 
367 		major = (major_t)-1;
368 		if ((dname = ddi_binding_name(dip)) != NULL)
369 			major = ddi_name_to_major(dname);
370 
371 		if (dr_bypass_device(dname)) {
372 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
373 			    major);
374 			continue;
375 		}
376 
377 		if (drmach_verify_sr(dip, 1)) {
378 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
379 			    major);
380 			continue;
381 		}
382 
383 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
384 			d_info = "<null>";
385 
386 		d_name[0] = 0;
387 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
388 			if (d_alias[0] != 0) {
389 				prom_printf("\tsuspending %s@%s (aka %s)\n",
390 				    d_name, d_info, d_alias);
391 			} else {
392 				prom_printf("\tsuspending %s@%s\n", d_name,
393 				    d_info);
394 			}
395 		} else {
396 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
397 		}
398 
399 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
400 			prom_printf("\tFAILED to suspend %s@%s\n",
401 			    d_name[0] ? d_name : dname, d_info);
402 
403 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
404 			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
405 
406 			ndi_hold_devi(dip);
407 			srh->sr_failed_dip = dip;
408 
409 			handle = srh->sr_dr_handlep;
410 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
411 			    d_name[0] ? d_name : dname, d_info);
412 
413 			return (DDI_FAILURE);
414 		}
415 	}
416 
417 	return (DDI_SUCCESS);
418 }
419 
420 static void
421 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
422 {
423 	dr_handle_t	*handle;
424 	dev_info_t	*dip, *next, *last = NULL;
425 	major_t		major;
426 	char		*bn;
427 	int		circ;
428 
429 	major = (major_t)-1;
430 
431 	/* attach in reverse device tree order */
432 	while (last != start) {
433 		dip = start;
434 		next = ddi_get_next_sibling(dip);
435 		while (next != last && dip != srh->sr_failed_dip) {
436 			dip = next;
437 			next = ddi_get_next_sibling(dip);
438 		}
439 		if (dip == srh->sr_failed_dip) {
440 			/* release hold acquired in dr_suspend_devices() */
441 			srh->sr_failed_dip = NULL;
442 			ndi_rele_devi(dip);
443 		} else if (dr_is_real_device(dip) &&
444 		    srh->sr_failed_dip == NULL) {
445 
446 			if ((bn = ddi_binding_name(dip)) != NULL) {
447 				major = ddi_name_to_major(bn);
448 			} else {
449 				bn = "<null>";
450 			}
451 			if (!dr_bypass_device(bn) &&
452 			    !drmach_verify_sr(dip, 0)) {
453 				char	d_name[40], d_alias[40], *d_info;
454 
455 				d_name[0] = 0;
456 				d_info = ddi_get_name_addr(dip);
457 				if (d_info == NULL)
458 					d_info = "<null>";
459 
460 				if (!dr_resolve_devname(dip, d_name, d_alias)) {
461 					if (d_alias[0] != 0) {
462 						prom_printf("\tresuming "
463 						    "%s@%s (aka %s)\n", d_name,
464 						    d_info, d_alias);
465 					} else {
466 						prom_printf("\tresuming "
467 						    "%s@%s\n", d_name, d_info);
468 					}
469 				} else {
470 					prom_printf("\tresuming %s@%s\n", bn,
471 					    d_info);
472 				}
473 
474 				if (devi_attach(dip, DDI_RESUME) !=
475 				    DDI_SUCCESS) {
476 					/*
477 					 * Print a console warning,
478 					 * set an e_code of ESBD_RESUME,
479 					 * and save the driver major
480 					 * number in the e_rsc.
481 					 */
482 					prom_printf("\tFAILED to resume %s@%s",
483 					    d_name[0] ? d_name : bn, d_info);
484 
485 					srh->sr_err_idx =
486 					    dr_add_int(srh->sr_err_ints,
487 					    srh->sr_err_idx, DR_MAX_ERR_INT,
488 					    (uint64_t)major);
489 
490 					handle = srh->sr_dr_handlep;
491 
492 					dr_op_err(CE_IGNORE, handle,
493 					    ESBD_RESUME, "%s@%s",
494 					    d_name[0] ? d_name : bn, d_info);
495 				}
496 			}
497 		}
498 
499 		/* Hold parent busy while walking its children */
500 		ndi_devi_enter(dip, &circ);
501 		dr_resume_devices(ddi_get_child(dip), srh);
502 		ndi_devi_exit(dip, circ);
503 		last = dip;
504 	}
505 }
506 
507 /*
508  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
509  * but from DR point of view.  These user threads are waiting in
510  * the kernel.  Once they complete in the kernel, they will process
511  * the stop signal and stop.
512  */
513 #define	DR_VSTOPPED(t)			\
514 	((t)->t_state == TS_SLEEP &&	\
515 	(t)->t_wchan != NULL &&		\
516 	(t)->t_astflag &&		\
517 	((t)->t_proc_flag & TP_CHKPT))
518 
519 /* ARGSUSED */
520 static int
521 dr_stop_user_threads(dr_sr_handle_t *srh)
522 {
523 	int		count;
524 	int		bailout;
525 	dr_handle_t	*handle = srh->sr_dr_handlep;
526 	static fn_t	f = "dr_stop_user_threads";
527 	kthread_id_t 	tp;
528 
529 	extern void add_one_utstop();
530 	extern void utstop_timedwait(clock_t);
531 	extern void utstop_init(void);
532 
533 #define	DR_UTSTOP_RETRY	4
534 #define	DR_UTSTOP_WAIT	hz
535 
536 	if (dr_skip_user_threads)
537 		return (DDI_SUCCESS);
538 
539 	utstop_init();
540 
541 	/* we need to try a few times to get past fork, etc. */
542 	srh->sr_err_idx = 0;
543 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
544 		/* walk the entire threadlist */
545 		mutex_enter(&pidlock);
546 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
547 			proc_t *p = ttoproc(tp);
548 
549 			/* handle kernel threads separately */
550 			if (p->p_as == &kas || p->p_stat == SZOMB)
551 				continue;
552 
553 			mutex_enter(&p->p_lock);
554 			thread_lock(tp);
555 
556 			if (tp->t_state == TS_STOPPED) {
557 				/* add another reason to stop this thread */
558 				tp->t_schedflag &= ~TS_RESUME;
559 			} else {
560 				tp->t_proc_flag |= TP_CHKPT;
561 
562 				thread_unlock(tp);
563 				mutex_exit(&p->p_lock);
564 				add_one_utstop();
565 				mutex_enter(&p->p_lock);
566 				thread_lock(tp);
567 
568 				aston(tp);
569 
570 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
571 					setrun_locked(tp);
572 				}
573 
574 			}
575 
576 			/* grab thread if needed */
577 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
578 				poke_cpu(tp->t_cpu->cpu_id);
579 
580 
581 			thread_unlock(tp);
582 			mutex_exit(&p->p_lock);
583 		}
584 		mutex_exit(&pidlock);
585 
586 
587 		/* let everything catch up */
588 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
589 
590 
591 		/* now, walk the threadlist again to see if we are done */
592 		mutex_enter(&pidlock);
593 		for (tp = curthread->t_next, bailout = 0;
594 		    tp != curthread; tp = tp->t_next) {
595 			proc_t *p = ttoproc(tp);
596 
597 			/* handle kernel threads separately */
598 			if (p->p_as == &kas || p->p_stat == SZOMB)
599 				continue;
600 
601 			/*
602 			 * If this thread didn't stop, and we don't allow
603 			 * unstopped blocked threads, bail.
604 			 */
605 			thread_lock(tp);
606 			if (!CPR_ISTOPPED(tp) &&
607 			    !(dr_allow_blocked_threads &&
608 			    DR_VSTOPPED(tp))) {
609 				bailout = 1;
610 				if (count == DR_UTSTOP_RETRY - 1) {
611 					/*
612 					 * save the pid for later reporting
613 					 */
614 					srh->sr_err_idx =
615 					    dr_add_int(srh->sr_err_ints,
616 					    srh->sr_err_idx, DR_MAX_ERR_INT,
617 					    (uint64_t)p->p_pid);
618 
619 					cmn_err(CE_WARN, "%s: "
620 					    "failed to stop thread: "
621 					    "process=%s, pid=%d",
622 					    f, p->p_user.u_psargs, p->p_pid);
623 
624 					PR_QR("%s: failed to stop thread: "
625 					    "process=%s, pid=%d, t_id=0x%p, "
626 					    "t_state=0x%x, t_proc_flag=0x%x, "
627 					    "t_schedflag=0x%x\n",
628 					    f, p->p_user.u_psargs, p->p_pid,
629 					    (void *)tp, tp->t_state,
630 					    tp->t_proc_flag, tp->t_schedflag);
631 				}
632 
633 			}
634 			thread_unlock(tp);
635 		}
636 		mutex_exit(&pidlock);
637 
638 		/* were all the threads stopped? */
639 		if (!bailout)
640 			break;
641 	}
642 
643 	/* were we unable to stop all threads after a few tries? */
644 	if (bailout) {
645 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
646 		    srh->sr_err_idx, 0);
647 		return (ESRCH);
648 	}
649 
650 	return (DDI_SUCCESS);
651 }
652 
653 static void
654 dr_start_user_threads(void)
655 {
656 	kthread_id_t tp;
657 
658 	mutex_enter(&pidlock);
659 
660 	/* walk all threads and release them */
661 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
662 		proc_t *p = ttoproc(tp);
663 
664 		/* skip kernel threads */
665 		if (ttoproc(tp)->p_as == &kas)
666 			continue;
667 
668 		mutex_enter(&p->p_lock);
669 		tp->t_proc_flag &= ~TP_CHKPT;
670 		mutex_exit(&p->p_lock);
671 
672 		thread_lock(tp);
673 		if (CPR_ISTOPPED(tp)) {
674 			/* back on the runq */
675 			tp->t_schedflag |= TS_RESUME;
676 			setrun_locked(tp);
677 		}
678 		thread_unlock(tp);
679 	}
680 
681 	mutex_exit(&pidlock);
682 }
683 
684 static void
685 dr_signal_user(int sig)
686 {
687 	struct proc *p;
688 
689 	mutex_enter(&pidlock);
690 
691 	for (p = practive; p != NULL; p = p->p_next) {
692 		/* only user threads */
693 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
694 		    p == proc_init || p == ttoproc(curthread))
695 			continue;
696 
697 		mutex_enter(&p->p_lock);
698 		sigtoproc(p, NULL, sig);
699 		mutex_exit(&p->p_lock);
700 	}
701 
702 	mutex_exit(&pidlock);
703 
704 	/* add a bit of delay */
705 	delay(hz);
706 }
707 
708 void
709 dr_resume(dr_sr_handle_t *srh)
710 {
711 	dr_handle_t	*handle;
712 
713 	handle = srh->sr_dr_handlep;
714 
715 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
716 		/*
717 		 * Update the signature block.
718 		 * If cpus are not paused, this can be done now.
719 		 * See comments below.
720 		 */
721 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
722 		    CPU->cpu_id);
723 	}
724 
725 	switch (srh->sr_suspend_state) {
726 	case DR_SRSTATE_FULL:
727 
728 		ASSERT(MUTEX_HELD(&cpu_lock));
729 
730 		/*
731 		 * Prevent false alarm in tod_validate() due to tod
732 		 * value change between suspend and resume
733 		 */
734 		mutex_enter(&tod_lock);
735 		tod_status_set(TOD_DR_RESUME_DONE);
736 		mutex_exit(&tod_lock);
737 
738 		dr_enable_intr(); 	/* enable intr & clock */
739 
740 		start_cpus();
741 		mutex_exit(&cpu_lock);
742 
743 		/*
744 		 * Update the signature block.
745 		 * This must not be done while cpus are paused, since on
746 		 * Starcat the cpu signature update aquires an adaptive
747 		 * mutex in the iosram driver. Blocking with cpus paused
748 		 * can lead to deadlock.
749 		 */
750 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
751 		    CPU->cpu_id);
752 
753 		/*
754 		 * If we suspended hw watchdog at suspend,
755 		 * re-enable it now.
756 		 */
757 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
758 			mutex_enter(&tod_lock);
759 			tod_ops.tod_set_watchdog_timer(
760 			    watchdog_timeout_seconds);
761 			mutex_exit(&tod_lock);
762 		}
763 
764 		/*
765 		 * This should only be called if drmach_suspend_last()
766 		 * was called and state transitioned to DR_SRSTATE_FULL
767 		 * to prevent resume attempts on device instances that
768 		 * were not previously suspended.
769 		 */
770 		drmach_resume_first();
771 
772 		/* FALLTHROUGH */
773 
774 	case DR_SRSTATE_DRIVER:
775 		/*
776 		 * resume drivers
777 		 */
778 		srh->sr_err_idx = 0;
779 
780 		/* no parent dip to hold busy */
781 		dr_resume_devices(ddi_root_node(), srh);
782 
783 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
784 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
785 			    srh->sr_err_ints, srh->sr_err_idx, 1);
786 		}
787 
788 		/*
789 		 * resume the lock manager
790 		 */
791 		lm_cprresume();
792 
793 		/* FALLTHROUGH */
794 
795 	case DR_SRSTATE_USER:
796 		/*
797 		 * finally, resume user threads
798 		 */
799 		if (!dr_skip_user_threads) {
800 			prom_printf("DR: resuming user threads...\n");
801 			dr_start_user_threads();
802 		}
803 		/* FALLTHROUGH */
804 
805 	case DR_SRSTATE_BEGIN:
806 	default:
807 		/*
808 		 * let those who care know that we've just resumed
809 		 */
810 		PR_QR("sending SIGTHAW...\n");
811 		dr_signal_user(SIGTHAW);
812 		break;
813 	}
814 
815 	i_ndi_allow_device_tree_changes(handle->h_ndi);
816 
817 	/*
818 	 * update the signature block
819 	 */
820 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
821 
822 	prom_printf("DR: resume COMPLETED\n");
823 }
824 
825 int
826 dr_suspend(dr_sr_handle_t *srh)
827 {
828 	dr_handle_t	*handle;
829 	int		force;
830 	int		dev_errs_idx;
831 	uint64_t	dev_errs[DR_MAX_ERR_INT];
832 	int		rc = DDI_SUCCESS;
833 
834 	handle = srh->sr_dr_handlep;
835 
836 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
837 
838 	/*
839 	 * update the signature block
840 	 */
841 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
842 	    CPU->cpu_id);
843 
844 	i_ndi_block_device_tree_changes(&handle->h_ndi);
845 
846 	prom_printf("\nDR: suspending user threads...\n");
847 	srh->sr_suspend_state = DR_SRSTATE_USER;
848 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
849 	    dr_check_user_stop_result) {
850 		dr_resume(srh);
851 		return (rc);
852 	}
853 
854 	if (!force) {
855 		struct dr_ref drc = {0};
856 
857 		prom_printf("\nDR: checking devices...\n");
858 		dev_errs_idx = 0;
859 
860 		drc.arr = dev_errs;
861 		drc.idx = &dev_errs_idx;
862 		drc.len = DR_MAX_ERR_INT;
863 
864 		/*
865 		 * Since the root node can never go away, it
866 		 * doesn't have to be held.
867 		 */
868 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
869 		if (dev_errs_idx) {
870 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
871 			    dev_errs_idx, 1);
872 			dr_resume(srh);
873 			return (DDI_FAILURE);
874 		}
875 		PR_QR("done\n");
876 	} else {
877 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
878 	}
879 
880 #ifndef	SKIP_SYNC
881 	/*
882 	 * This sync swap out all user pages
883 	 */
884 	vfs_sync(SYNC_ALL);
885 #endif
886 
887 	/*
888 	 * special treatment for lock manager
889 	 */
890 	lm_cprsuspend();
891 
892 #ifndef	SKIP_SYNC
893 	/*
894 	 * sync the file system in case we never make it back
895 	 */
896 	sync();
897 #endif
898 
899 	/*
900 	 * now suspend drivers
901 	 */
902 	prom_printf("DR: suspending drivers...\n");
903 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
904 	srh->sr_err_idx = 0;
905 	/* No parent to hold busy */
906 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
907 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
908 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
909 			    srh->sr_err_ints, srh->sr_err_idx, 1);
910 		}
911 		dr_resume(srh);
912 		return (rc);
913 	}
914 
915 	drmach_suspend_last();
916 
917 	/*
918 	 * finally, grab all cpus
919 	 */
920 	srh->sr_suspend_state = DR_SRSTATE_FULL;
921 
922 	/*
923 	 * if watchdog was activated, disable it
924 	 */
925 	if (watchdog_activated) {
926 		mutex_enter(&tod_lock);
927 		tod_ops.tod_clear_watchdog_timer();
928 		mutex_exit(&tod_lock);
929 		srh->sr_flags |= SR_FLAG_WATCHDOG;
930 	} else {
931 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
932 	}
933 
934 	/*
935 	 * Update the signature block.
936 	 * This must be done before cpus are paused, since on Starcat the
937 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
938 	 * Blocking with cpus paused can lead to deadlock.
939 	 */
940 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
941 
942 	mutex_enter(&cpu_lock);
943 	pause_cpus(NULL);
944 	dr_stop_intr();
945 
946 	return (rc);
947 }
948 
949 int
950 dr_pt_test_suspend(dr_handle_t *hp)
951 {
952 	dr_sr_handle_t *srh;
953 	int		err;
954 	uint_t		psmerr;
955 	static fn_t	f = "dr_pt_test_suspend";
956 
957 	PR_QR("%s...\n", f);
958 
959 	srh = dr_get_sr_handle(hp);
960 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
961 		dr_resume(srh);
962 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
963 			PR_QR("%s: error on dr_resume()", f);
964 			switch (psmerr) {
965 			case ESBD_RESUME:
966 				PR_QR("Couldn't resume devices: %s\n",
967 				    DR_GET_E_RSC(hp->h_err));
968 				break;
969 
970 			case ESBD_KTHREAD:
971 				PR_ALL("psmerr is ESBD_KTHREAD\n");
972 				break;
973 			default:
974 				PR_ALL("Resume error unknown = %d\n", psmerr);
975 				break;
976 			}
977 		}
978 	} else {
979 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
980 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
981 		switch (psmerr) {
982 		case ESBD_UNSAFE:
983 			PR_ALL("Unsafe devices (major #): %s\n",
984 			    DR_GET_E_RSC(hp->h_err));
985 			break;
986 
987 		case ESBD_RTTHREAD:
988 			PR_ALL("RT threads (PIDs): %s\n",
989 			    DR_GET_E_RSC(hp->h_err));
990 			break;
991 
992 		case ESBD_UTHREAD:
993 			PR_ALL("User threads (PIDs): %s\n",
994 			    DR_GET_E_RSC(hp->h_err));
995 			break;
996 
997 		case ESBD_SUSPEND:
998 			PR_ALL("Non-suspendable devices (major #): %s\n",
999 			    DR_GET_E_RSC(hp->h_err));
1000 			break;
1001 
1002 		case ESBD_RESUME:
1003 			PR_ALL("Could not resume devices (major #): %s\n",
1004 			    DR_GET_E_RSC(hp->h_err));
1005 			break;
1006 
1007 		case ESBD_KTHREAD:
1008 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1009 			break;
1010 
1011 		case ESBD_NOERROR:
1012 			PR_ALL("sbd_error_t error code not set\n");
1013 			break;
1014 
1015 		default:
1016 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1017 			break;
1018 		}
1019 	}
1020 	dr_release_sr_handle(srh);
1021 
1022 	return (0);
1023 }
1024 
1025 /*
1026  * Add a new integer value to the end of an array.  Don't allow duplicates to
1027  * appear in the array, and don't allow the array to overflow.  Return the new
1028  * total number of entries in the array.
1029  */
1030 static int
1031 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1032 {
1033 	int i;
1034 
1035 	if (arr == NULL)
1036 		return (0);
1037 
1038 	if (idx >= len)
1039 		return (idx);
1040 
1041 	for (i = 0; i < idx; i++) {
1042 		if (arr[i] == val)
1043 			return (idx);
1044 	}
1045 
1046 	arr[idx++] = val;
1047 
1048 	return (idx);
1049 }
1050 
1051 /*
1052  * Construct an sbd_error_t featuring a string representation of an array of
1053  * integers as its e_rsc.
1054  */
1055 static sbd_error_t *
1056 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1057 {
1058 	int		i, n, buf_len, buf_idx, buf_avail;
1059 	char		*dname;
1060 	char		*buf;
1061 	sbd_error_t	*new_sbd_err;
1062 	static char	s_ellipsis[] = "...";
1063 
1064 	if (arr == NULL || idx <= 0)
1065 		return (NULL);
1066 
1067 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1068 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1069 
1070 	/*
1071 	 * This is the total working area of the buffer.  It must be computed
1072 	 * as the size of 'buf', minus reserved space for the null terminator
1073 	 * and the ellipsis string.
1074 	 */
1075 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1076 
1077 	/* Construct a string representation of the array values */
1078 	for (buf_idx = 0, i = 0; i < idx; i++) {
1079 		buf_avail = buf_len - buf_idx;
1080 		if (majors) {
1081 			dname = ddi_major_to_name(arr[i]);
1082 			if (dname) {
1083 				n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1084 				    dname);
1085 			} else {
1086 				n = snprintf(&buf[buf_idx], buf_avail,
1087 				    "major %lu, ", arr[i]);
1088 			}
1089 		} else {
1090 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ", arr[i]);
1091 		}
1092 
1093 		/* An ellipsis gets appended when no more values fit */
1094 		if (n >= buf_avail) {
1095 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1096 			break;
1097 		}
1098 
1099 		buf_idx += n;
1100 	}
1101 
1102 	/* If all the contents fit, remove the trailing comma */
1103 	if (n < buf_avail) {
1104 		buf[--buf_idx] = '\0';
1105 		buf[--buf_idx] = '\0';
1106 	}
1107 
1108 	/* Return an sbd_error_t with the buffer and e_code */
1109 	new_sbd_err = drerr_new(1, e_code, buf);
1110 	kmem_free(buf, MAXPATHLEN);
1111 	return (new_sbd_err);
1112 }
1113