xref: /illumos-gate/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision f3041bfa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * A CPR derivative specifically for starfire/starcat
28  */
29 
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/ddi.h>
35 #define	SUNDDI_IMPL
36 #include <sys/sunddi.h>
37 #include <sys/sunndi.h>
38 #include <sys/devctl.h>
39 #include <sys/time.h>
40 #include <sys/kmem.h>
41 #include <nfs/lm.h>
42 #include <sys/ddi_impldefs.h>
43 #include <sys/ndi_impldefs.h>
44 #include <sys/obpdefs.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/errno.h>
48 #include <sys/callb.h>
49 #include <sys/clock.h>
50 #include <sys/x_call.h>
51 #include <sys/cpuvar.h>
52 #include <sys/epm.h>
53 #include <sys/vfs.h>
54 
55 #include <sys/cpu_sgnblk_defs.h>
56 #include <sys/dr.h>
57 #include <sys/dr_util.h>
58 
59 #include <sys/promif.h>
60 #include <sys/conf.h>
61 #include <sys/cyclic.h>
62 
63 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
64 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
65 extern int	is_pseudo_device(dev_info_t *dip);
66 
67 extern kmutex_t	cpu_lock;
68 extern dr_unsafe_devs_t dr_unsafe_devs;
69 
70 static int		dr_is_real_device(dev_info_t *dip);
71 static int		dr_is_unsafe_major(major_t major);
72 static int		dr_bypass_device(char *dname);
73 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
74 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
75 				char *alias);
76 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
77 				int majors);
78 static int		dr_add_int(uint64_t *arr, int idx, int len,
79 				uint64_t val);
80 
81 int dr_pt_test_suspend(dr_handle_t *hp);
82 
83 /*
84  * dr_quiesce.c interface
85  * NOTE: states used internally by dr_suspend and dr_resume
86  */
87 typedef enum dr_suspend_state {
88 	DR_SRSTATE_BEGIN = 0,
89 	DR_SRSTATE_USER,
90 	DR_SRSTATE_DRIVER,
91 	DR_SRSTATE_FULL
92 } suspend_state_t;
93 
94 struct dr_sr_handle {
95 	dr_handle_t		*sr_dr_handlep;
96 	dev_info_t		*sr_failed_dip;
97 	suspend_state_t		sr_suspend_state;
98 	uint_t			sr_flags;
99 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
100 	int			sr_err_idx;
101 };
102 
103 #define	SR_FLAG_WATCHDOG	0x1
104 
105 /*
106  * XXX
107  * This hack will go away before RTI.  Just for testing.
108  * List of drivers to bypass when performing a suspend.
109  */
110 static char *dr_bypass_list[] = {
111 	""
112 };
113 
114 
115 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
116 
117 /*
118  * dr_skip_user_threads is used to control if user threads should
119  * be suspended.  If dr_skip_user_threads is true, the rest of the
120  * flags are not used; if it is false, dr_check_user_stop_result
121  * will be used to control whether or not we need to check suspend
122  * result, and dr_allow_blocked_threads will be used to control
123  * whether or not we allow suspend to continue if there are blocked
124  * threads.  We allow all combinations of dr_check_user_stop_result
125  * and dr_allow_block_threads, even though it might not make much
126  * sense to not allow block threads when we don't even check stop
127  * result.
128  */
129 static int	dr_skip_user_threads = 0;	/* default to FALSE */
130 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
131 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
132 
133 #define	DR_CPU_LOOP_MSEC	1000
134 
135 static void
136 dr_stop_intr(void)
137 {
138 	ASSERT(MUTEX_HELD(&cpu_lock));
139 
140 	kpreempt_disable();
141 	cyclic_suspend();
142 }
143 
144 static void
145 dr_enable_intr(void)
146 {
147 	ASSERT(MUTEX_HELD(&cpu_lock));
148 
149 	cyclic_resume();
150 	kpreempt_enable();
151 }
152 
153 dr_sr_handle_t *
154 dr_get_sr_handle(dr_handle_t *hp)
155 {
156 	dr_sr_handle_t *srh;
157 
158 	srh = GETSTRUCT(dr_sr_handle_t, 1);
159 	srh->sr_dr_handlep = hp;
160 
161 	return (srh);
162 }
163 
164 void
165 dr_release_sr_handle(dr_sr_handle_t *srh)
166 {
167 	ASSERT(srh->sr_failed_dip == NULL);
168 	FREESTRUCT(srh, dr_sr_handle_t, 1);
169 }
170 
171 static int
172 dr_is_real_device(dev_info_t *dip)
173 {
174 	struct regspec *regbuf = NULL;
175 	int length = 0;
176 	int rc;
177 
178 	if (ddi_get_driver(dip) == NULL)
179 		return (0);
180 
181 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
182 		return (1);
183 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
184 		return (0);
185 
186 	/*
187 	 * now the general case
188 	 */
189 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
190 	    (caddr_t)&regbuf, &length);
191 	ASSERT(rc != DDI_PROP_NO_MEMORY);
192 	if (rc != DDI_PROP_SUCCESS) {
193 		return (0);
194 	} else {
195 		if ((length > 0) && (regbuf != NULL))
196 			kmem_free(regbuf, length);
197 		return (1);
198 	}
199 }
200 
201 static int
202 dr_is_unsafe_major(major_t major)
203 {
204 	char    *dname, **cpp;
205 	int	i, ndevs;
206 
207 	if ((dname = ddi_major_to_name(major)) == NULL) {
208 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
209 		return (0);
210 	}
211 
212 	ndevs = dr_unsafe_devs.ndevs;
213 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
214 		if (strcmp(dname, *cpp++) == 0)
215 			return (1);
216 	}
217 	return (0);
218 }
219 
220 static int
221 dr_bypass_device(char *dname)
222 {
223 	int i;
224 	char **lname;
225 
226 	if (dname == NULL)
227 		return (0);
228 
229 	/* check the bypass list */
230 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
231 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
232 			return (1);
233 	}
234 	return (0);
235 }
236 
237 static int
238 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
239 {
240 	major_t	devmajor;
241 	char	*aka, *name;
242 
243 	*buffer = *alias = 0;
244 
245 	if (dip == NULL)
246 		return (-1);
247 
248 	if ((name = ddi_get_name(dip)) == NULL)
249 		name = "<null name>";
250 
251 	aka = name;
252 
253 	if ((devmajor = ddi_name_to_major(aka)) != -1)
254 		aka = ddi_major_to_name(devmajor);
255 
256 	(void) strcpy(buffer, name);
257 
258 	if (strcmp(name, aka))
259 		(void) strcpy(alias, aka);
260 	else
261 		*alias = 0;
262 
263 	return (0);
264 }
265 
266 struct dr_ref {
267 	int		*refcount;
268 	int		*refcount_non_gldv3;
269 	uint64_t	*arr;
270 	int		*idx;
271 	int		len;
272 };
273 
274 /* ARGSUSED */
275 static int
276 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
277 {
278 	major_t		major;
279 	char		*dname;
280 	struct dr_ref	*rp = (struct dr_ref *)arg;
281 
282 	if (dip == NULL)
283 		return (DDI_WALK_CONTINUE);
284 
285 	if (!dr_is_real_device(dip))
286 		return (DDI_WALK_CONTINUE);
287 
288 	dname = ddi_binding_name(dip);
289 
290 	if (dr_bypass_device(dname))
291 		return (DDI_WALK_CONTINUE);
292 
293 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
294 		if (ref && rp->refcount) {
295 			*rp->refcount += ref;
296 			PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
297 			    major, ref);
298 		}
299 		if (ref && rp->refcount_non_gldv3) {
300 			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
301 				*rp->refcount_non_gldv3 += ref;
302 		}
303 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
304 			PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
305 			    major);
306 			if (rp->arr != NULL && rp->idx != NULL)
307 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
308 				    rp->len, (uint64_t)major);
309 		}
310 	}
311 	return (DDI_WALK_CONTINUE);
312 }
313 
314 static int
315 dr_check_unsafe_major(dev_info_t *dip, void *arg)
316 {
317 	return (dr_check_dip(dip, arg, 0));
318 }
319 
320 
321 /*ARGSUSED*/
322 void
323 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
324     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
325 {
326 	struct dr_ref bref = {0};
327 
328 	if (dip == NULL)
329 		return;
330 
331 	bref.refcount = refcount;
332 	bref.refcount_non_gldv3 = refcount_non_gldv3;
333 	bref.arr = arr;
334 	bref.idx = idx;
335 	bref.len = len;
336 
337 	ASSERT(e_ddi_branch_held(dip));
338 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
339 }
340 
341 /*
342  * The "dip" argument's parent (if it exists) must be held busy.
343  */
344 static int
345 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
346 {
347 	dr_handle_t	*handle;
348 	major_t		major;
349 	char		*dname;
350 	int		circ;
351 
352 	/*
353 	 * If dip is the root node, it has no siblings and it is
354 	 * always held. If dip is not the root node, dr_suspend_devices()
355 	 * will be invoked with the parent held busy.
356 	 */
357 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
358 		char	d_name[40], d_alias[40], *d_info;
359 
360 		ndi_devi_enter(dip, &circ);
361 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
362 			ndi_devi_exit(dip, circ);
363 			return (ENXIO);
364 		}
365 		ndi_devi_exit(dip, circ);
366 
367 		if (!dr_is_real_device(dip))
368 			continue;
369 
370 		major = (major_t)-1;
371 		if ((dname = ddi_binding_name(dip)) != NULL)
372 			major = ddi_name_to_major(dname);
373 
374 		if (dr_bypass_device(dname)) {
375 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
376 			    major);
377 			continue;
378 		}
379 
380 		if (drmach_verify_sr(dip, 1)) {
381 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
382 			    major);
383 			continue;
384 		}
385 
386 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
387 			d_info = "<null>";
388 
389 		d_name[0] = 0;
390 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
391 			if (d_alias[0] != 0) {
392 				prom_printf("\tsuspending %s@%s (aka %s)\n",
393 				    d_name, d_info, d_alias);
394 			} else {
395 				prom_printf("\tsuspending %s@%s\n", d_name,
396 				    d_info);
397 			}
398 		} else {
399 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
400 		}
401 
402 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
403 			prom_printf("\tFAILED to suspend %s@%s\n",
404 			    d_name[0] ? d_name : dname, d_info);
405 
406 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
407 			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
408 
409 			ndi_hold_devi(dip);
410 			srh->sr_failed_dip = dip;
411 
412 			handle = srh->sr_dr_handlep;
413 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
414 			    d_name[0] ? d_name : dname, d_info);
415 
416 			return (DDI_FAILURE);
417 		}
418 	}
419 
420 	return (DDI_SUCCESS);
421 }
422 
423 static void
424 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
425 {
426 	dr_handle_t	*handle;
427 	dev_info_t	*dip, *next, *last = NULL;
428 	major_t		major;
429 	char		*bn;
430 	int		circ;
431 
432 	major = (major_t)-1;
433 
434 	/* attach in reverse device tree order */
435 	while (last != start) {
436 		dip = start;
437 		next = ddi_get_next_sibling(dip);
438 		while (next != last && dip != srh->sr_failed_dip) {
439 			dip = next;
440 			next = ddi_get_next_sibling(dip);
441 		}
442 		if (dip == srh->sr_failed_dip) {
443 			/* release hold acquired in dr_suspend_devices() */
444 			srh->sr_failed_dip = NULL;
445 			ndi_rele_devi(dip);
446 		} else if (dr_is_real_device(dip) &&
447 		    srh->sr_failed_dip == NULL) {
448 
449 			if ((bn = ddi_binding_name(dip)) != NULL) {
450 				major = ddi_name_to_major(bn);
451 			} else {
452 				bn = "<null>";
453 			}
454 			if (!dr_bypass_device(bn) &&
455 			    !drmach_verify_sr(dip, 0)) {
456 				char	d_name[40], d_alias[40], *d_info;
457 
458 				d_name[0] = 0;
459 				d_info = ddi_get_name_addr(dip);
460 				if (d_info == NULL)
461 					d_info = "<null>";
462 
463 				if (!dr_resolve_devname(dip, d_name, d_alias)) {
464 					if (d_alias[0] != 0) {
465 						prom_printf("\tresuming "
466 						    "%s@%s (aka %s)\n", d_name,
467 						    d_info, d_alias);
468 					} else {
469 						prom_printf("\tresuming "
470 						    "%s@%s\n", d_name, d_info);
471 					}
472 				} else {
473 					prom_printf("\tresuming %s@%s\n", bn,
474 					    d_info);
475 				}
476 
477 				if (devi_attach(dip, DDI_RESUME) !=
478 				    DDI_SUCCESS) {
479 					/*
480 					 * Print a console warning,
481 					 * set an e_code of ESBD_RESUME,
482 					 * and save the driver major
483 					 * number in the e_rsc.
484 					 */
485 					prom_printf("\tFAILED to resume %s@%s",
486 					    d_name[0] ? d_name : bn, d_info);
487 
488 					srh->sr_err_idx =
489 					    dr_add_int(srh->sr_err_ints,
490 					    srh->sr_err_idx, DR_MAX_ERR_INT,
491 					    (uint64_t)major);
492 
493 					handle = srh->sr_dr_handlep;
494 
495 					dr_op_err(CE_IGNORE, handle,
496 					    ESBD_RESUME, "%s@%s",
497 					    d_name[0] ? d_name : bn, d_info);
498 				}
499 			}
500 		}
501 
502 		/* Hold parent busy while walking its children */
503 		ndi_devi_enter(dip, &circ);
504 		dr_resume_devices(ddi_get_child(dip), srh);
505 		ndi_devi_exit(dip, circ);
506 		last = dip;
507 	}
508 }
509 
510 /*
511  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
512  * but from DR point of view.  These user threads are waiting in
513  * the kernel.  Once they complete in the kernel, they will process
514  * the stop signal and stop.
515  */
516 #define	DR_VSTOPPED(t)			\
517 	((t)->t_state == TS_SLEEP &&	\
518 	(t)->t_wchan != NULL &&		\
519 	(t)->t_astflag &&		\
520 	((t)->t_proc_flag & TP_CHKPT))
521 
522 /* ARGSUSED */
523 static int
524 dr_stop_user_threads(dr_sr_handle_t *srh)
525 {
526 	int		count;
527 	int		bailout;
528 	dr_handle_t	*handle = srh->sr_dr_handlep;
529 	static fn_t	f = "dr_stop_user_threads";
530 	kthread_id_t 	tp;
531 
532 	extern void add_one_utstop();
533 	extern void utstop_timedwait(clock_t);
534 	extern void utstop_init(void);
535 
536 #define	DR_UTSTOP_RETRY	4
537 #define	DR_UTSTOP_WAIT	hz
538 
539 	if (dr_skip_user_threads)
540 		return (DDI_SUCCESS);
541 
542 	utstop_init();
543 
544 	/* we need to try a few times to get past fork, etc. */
545 	srh->sr_err_idx = 0;
546 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
547 		/* walk the entire threadlist */
548 		mutex_enter(&pidlock);
549 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
550 			proc_t *p = ttoproc(tp);
551 
552 			/* handle kernel threads separately */
553 			if (p->p_as == &kas || p->p_stat == SZOMB)
554 				continue;
555 
556 			mutex_enter(&p->p_lock);
557 			thread_lock(tp);
558 
559 			if (tp->t_state == TS_STOPPED) {
560 				/* add another reason to stop this thread */
561 				tp->t_schedflag &= ~TS_RESUME;
562 			} else {
563 				tp->t_proc_flag |= TP_CHKPT;
564 
565 				thread_unlock(tp);
566 				mutex_exit(&p->p_lock);
567 				add_one_utstop();
568 				mutex_enter(&p->p_lock);
569 				thread_lock(tp);
570 
571 				aston(tp);
572 
573 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
574 					setrun_locked(tp);
575 				}
576 
577 			}
578 
579 			/* grab thread if needed */
580 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
581 				poke_cpu(tp->t_cpu->cpu_id);
582 
583 
584 			thread_unlock(tp);
585 			mutex_exit(&p->p_lock);
586 		}
587 		mutex_exit(&pidlock);
588 
589 
590 		/* let everything catch up */
591 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
592 
593 
594 		/* now, walk the threadlist again to see if we are done */
595 		mutex_enter(&pidlock);
596 		for (tp = curthread->t_next, bailout = 0;
597 		    tp != curthread; tp = tp->t_next) {
598 			proc_t *p = ttoproc(tp);
599 
600 			/* handle kernel threads separately */
601 			if (p->p_as == &kas || p->p_stat == SZOMB)
602 				continue;
603 
604 			/*
605 			 * If this thread didn't stop, and we don't allow
606 			 * unstopped blocked threads, bail.
607 			 */
608 			thread_lock(tp);
609 			if (!CPR_ISTOPPED(tp) &&
610 			    !(dr_allow_blocked_threads &&
611 			    DR_VSTOPPED(tp))) {
612 				bailout = 1;
613 				if (count == DR_UTSTOP_RETRY - 1) {
614 					/*
615 					 * save the pid for later reporting
616 					 */
617 					srh->sr_err_idx =
618 					    dr_add_int(srh->sr_err_ints,
619 					    srh->sr_err_idx, DR_MAX_ERR_INT,
620 					    (uint64_t)p->p_pid);
621 
622 					cmn_err(CE_WARN, "%s: "
623 					    "failed to stop thread: "
624 					    "process=%s, pid=%d",
625 					    f, p->p_user.u_psargs, p->p_pid);
626 
627 					PR_QR("%s: failed to stop thread: "
628 					    "process=%s, pid=%d, t_id=0x%p, "
629 					    "t_state=0x%x, t_proc_flag=0x%x, "
630 					    "t_schedflag=0x%x\n",
631 					    f, p->p_user.u_psargs, p->p_pid,
632 					    (void *)tp, tp->t_state,
633 					    tp->t_proc_flag, tp->t_schedflag);
634 				}
635 
636 			}
637 			thread_unlock(tp);
638 		}
639 		mutex_exit(&pidlock);
640 
641 		/* were all the threads stopped? */
642 		if (!bailout)
643 			break;
644 	}
645 
646 	/* were we unable to stop all threads after a few tries? */
647 	if (bailout) {
648 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
649 		    srh->sr_err_idx, 0);
650 		return (ESRCH);
651 	}
652 
653 	return (DDI_SUCCESS);
654 }
655 
656 static void
657 dr_start_user_threads(void)
658 {
659 	kthread_id_t tp;
660 
661 	mutex_enter(&pidlock);
662 
663 	/* walk all threads and release them */
664 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
665 		proc_t *p = ttoproc(tp);
666 
667 		/* skip kernel threads */
668 		if (ttoproc(tp)->p_as == &kas)
669 			continue;
670 
671 		mutex_enter(&p->p_lock);
672 		tp->t_proc_flag &= ~TP_CHKPT;
673 		mutex_exit(&p->p_lock);
674 
675 		thread_lock(tp);
676 		if (CPR_ISTOPPED(tp)) {
677 			/* back on the runq */
678 			tp->t_schedflag |= TS_RESUME;
679 			setrun_locked(tp);
680 		}
681 		thread_unlock(tp);
682 	}
683 
684 	mutex_exit(&pidlock);
685 }
686 
687 static void
688 dr_signal_user(int sig)
689 {
690 	struct proc *p;
691 
692 	mutex_enter(&pidlock);
693 
694 	for (p = practive; p != NULL; p = p->p_next) {
695 		/* only user threads */
696 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
697 		    p == proc_init || p == ttoproc(curthread))
698 			continue;
699 
700 		mutex_enter(&p->p_lock);
701 		sigtoproc(p, NULL, sig);
702 		mutex_exit(&p->p_lock);
703 	}
704 
705 	mutex_exit(&pidlock);
706 
707 	/* add a bit of delay */
708 	delay(hz);
709 }
710 
711 void
712 dr_resume(dr_sr_handle_t *srh)
713 {
714 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
715 		/*
716 		 * Update the signature block.
717 		 * If cpus are not paused, this can be done now.
718 		 * See comments below.
719 		 */
720 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
721 		    CPU->cpu_id);
722 	}
723 
724 	switch (srh->sr_suspend_state) {
725 	case DR_SRSTATE_FULL:
726 
727 		ASSERT(MUTEX_HELD(&cpu_lock));
728 
729 		/*
730 		 * Prevent false alarm in tod_validate() due to tod
731 		 * value change between suspend and resume
732 		 */
733 		mutex_enter(&tod_lock);
734 		tod_status_set(TOD_DR_RESUME_DONE);
735 		mutex_exit(&tod_lock);
736 
737 		dr_enable_intr(); 	/* enable intr & clock */
738 
739 		start_cpus();
740 		mutex_exit(&cpu_lock);
741 
742 		/*
743 		 * Update the signature block.
744 		 * This must not be done while cpus are paused, since on
745 		 * Starcat the cpu signature update aquires an adaptive
746 		 * mutex in the iosram driver. Blocking with cpus paused
747 		 * can lead to deadlock.
748 		 */
749 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
750 		    CPU->cpu_id);
751 
752 		/*
753 		 * If we suspended hw watchdog at suspend,
754 		 * re-enable it now.
755 		 */
756 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
757 			mutex_enter(&tod_lock);
758 			tod_ops.tod_set_watchdog_timer(
759 			    watchdog_timeout_seconds);
760 			mutex_exit(&tod_lock);
761 		}
762 
763 		/*
764 		 * This should only be called if drmach_suspend_last()
765 		 * was called and state transitioned to DR_SRSTATE_FULL
766 		 * to prevent resume attempts on device instances that
767 		 * were not previously suspended.
768 		 */
769 		drmach_resume_first();
770 
771 		/* FALLTHROUGH */
772 
773 	case DR_SRSTATE_DRIVER:
774 		/*
775 		 * resume drivers
776 		 */
777 		srh->sr_err_idx = 0;
778 
779 		/* no parent dip to hold busy */
780 		dr_resume_devices(ddi_root_node(), srh);
781 
782 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
783 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
784 			    srh->sr_err_ints, srh->sr_err_idx, 1);
785 		}
786 
787 		/*
788 		 * resume the lock manager
789 		 */
790 		lm_cprresume();
791 
792 		/* FALLTHROUGH */
793 
794 	case DR_SRSTATE_USER:
795 		/*
796 		 * finally, resume user threads
797 		 */
798 		if (!dr_skip_user_threads) {
799 			prom_printf("DR: resuming user threads...\n");
800 			dr_start_user_threads();
801 		}
802 		/* FALLTHROUGH */
803 
804 	case DR_SRSTATE_BEGIN:
805 	default:
806 		/*
807 		 * let those who care know that we've just resumed
808 		 */
809 		PR_QR("sending SIGTHAW...\n");
810 		dr_signal_user(SIGTHAW);
811 		break;
812 	}
813 
814 	/*
815 	 * update the signature block
816 	 */
817 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
818 
819 	prom_printf("DR: resume COMPLETED\n");
820 }
821 
822 int
823 dr_suspend(dr_sr_handle_t *srh)
824 {
825 	dr_handle_t	*handle;
826 	int		force;
827 	int		dev_errs_idx;
828 	uint64_t	dev_errs[DR_MAX_ERR_INT];
829 	int		rc = DDI_SUCCESS;
830 
831 	handle = srh->sr_dr_handlep;
832 
833 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
834 
835 	/*
836 	 * update the signature block
837 	 */
838 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
839 	    CPU->cpu_id);
840 
841 	prom_printf("\nDR: suspending user threads...\n");
842 	srh->sr_suspend_state = DR_SRSTATE_USER;
843 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
844 	    dr_check_user_stop_result) {
845 		dr_resume(srh);
846 		return (rc);
847 	}
848 
849 	if (!force) {
850 		struct dr_ref drc = {0};
851 
852 		prom_printf("\nDR: checking devices...\n");
853 		dev_errs_idx = 0;
854 
855 		drc.arr = dev_errs;
856 		drc.idx = &dev_errs_idx;
857 		drc.len = DR_MAX_ERR_INT;
858 
859 		/*
860 		 * Since the root node can never go away, it
861 		 * doesn't have to be held.
862 		 */
863 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
864 		if (dev_errs_idx) {
865 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
866 			    dev_errs_idx, 1);
867 			dr_resume(srh);
868 			return (DDI_FAILURE);
869 		}
870 		PR_QR("done\n");
871 	} else {
872 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
873 	}
874 
875 #ifndef	SKIP_SYNC
876 	/*
877 	 * This sync swap out all user pages
878 	 */
879 	vfs_sync(SYNC_ALL);
880 #endif
881 
882 	/*
883 	 * special treatment for lock manager
884 	 */
885 	lm_cprsuspend();
886 
887 #ifndef	SKIP_SYNC
888 	/*
889 	 * sync the file system in case we never make it back
890 	 */
891 	sync();
892 #endif
893 
894 	/*
895 	 * now suspend drivers
896 	 */
897 	prom_printf("DR: suspending drivers...\n");
898 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
899 	srh->sr_err_idx = 0;
900 	/* No parent to hold busy */
901 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
902 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
903 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
904 			    srh->sr_err_ints, srh->sr_err_idx, 1);
905 		}
906 		dr_resume(srh);
907 		return (rc);
908 	}
909 
910 	drmach_suspend_last();
911 
912 	/*
913 	 * finally, grab all cpus
914 	 */
915 	srh->sr_suspend_state = DR_SRSTATE_FULL;
916 
917 	/*
918 	 * if watchdog was activated, disable it
919 	 */
920 	if (watchdog_activated) {
921 		mutex_enter(&tod_lock);
922 		tod_ops.tod_clear_watchdog_timer();
923 		mutex_exit(&tod_lock);
924 		srh->sr_flags |= SR_FLAG_WATCHDOG;
925 	} else {
926 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
927 	}
928 
929 	/*
930 	 * Update the signature block.
931 	 * This must be done before cpus are paused, since on Starcat the
932 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
933 	 * Blocking with cpus paused can lead to deadlock.
934 	 */
935 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
936 
937 	mutex_enter(&cpu_lock);
938 	pause_cpus(NULL, NULL);
939 	dr_stop_intr();
940 
941 	return (rc);
942 }
943 
944 int
945 dr_pt_test_suspend(dr_handle_t *hp)
946 {
947 	dr_sr_handle_t *srh;
948 	int		err;
949 	uint_t		psmerr;
950 	static fn_t	f = "dr_pt_test_suspend";
951 
952 	PR_QR("%s...\n", f);
953 
954 	srh = dr_get_sr_handle(hp);
955 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
956 		dr_resume(srh);
957 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
958 			PR_QR("%s: error on dr_resume()", f);
959 			switch (psmerr) {
960 			case ESBD_RESUME:
961 				PR_QR("Couldn't resume devices: %s\n",
962 				    DR_GET_E_RSC(hp->h_err));
963 				break;
964 
965 			case ESBD_KTHREAD:
966 				PR_ALL("psmerr is ESBD_KTHREAD\n");
967 				break;
968 			default:
969 				PR_ALL("Resume error unknown = %d\n", psmerr);
970 				break;
971 			}
972 		}
973 	} else {
974 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
975 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
976 		switch (psmerr) {
977 		case ESBD_UNSAFE:
978 			PR_ALL("Unsafe devices (major #): %s\n",
979 			    DR_GET_E_RSC(hp->h_err));
980 			break;
981 
982 		case ESBD_RTTHREAD:
983 			PR_ALL("RT threads (PIDs): %s\n",
984 			    DR_GET_E_RSC(hp->h_err));
985 			break;
986 
987 		case ESBD_UTHREAD:
988 			PR_ALL("User threads (PIDs): %s\n",
989 			    DR_GET_E_RSC(hp->h_err));
990 			break;
991 
992 		case ESBD_SUSPEND:
993 			PR_ALL("Non-suspendable devices (major #): %s\n",
994 			    DR_GET_E_RSC(hp->h_err));
995 			break;
996 
997 		case ESBD_RESUME:
998 			PR_ALL("Could not resume devices (major #): %s\n",
999 			    DR_GET_E_RSC(hp->h_err));
1000 			break;
1001 
1002 		case ESBD_KTHREAD:
1003 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1004 			break;
1005 
1006 		case ESBD_NOERROR:
1007 			PR_ALL("sbd_error_t error code not set\n");
1008 			break;
1009 
1010 		default:
1011 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1012 			break;
1013 		}
1014 	}
1015 	dr_release_sr_handle(srh);
1016 
1017 	return (0);
1018 }
1019 
1020 /*
1021  * Add a new integer value to the end of an array.  Don't allow duplicates to
1022  * appear in the array, and don't allow the array to overflow.  Return the new
1023  * total number of entries in the array.
1024  */
1025 static int
1026 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1027 {
1028 	int i;
1029 
1030 	if (arr == NULL)
1031 		return (0);
1032 
1033 	if (idx >= len)
1034 		return (idx);
1035 
1036 	for (i = 0; i < idx; i++) {
1037 		if (arr[i] == val)
1038 			return (idx);
1039 	}
1040 
1041 	arr[idx++] = val;
1042 
1043 	return (idx);
1044 }
1045 
1046 /*
1047  * Construct an sbd_error_t featuring a string representation of an array of
1048  * integers as its e_rsc.
1049  */
1050 static sbd_error_t *
1051 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1052 {
1053 	int		i, n, buf_len, buf_idx, buf_avail;
1054 	char		*dname;
1055 	char		*buf;
1056 	sbd_error_t	*new_sbd_err;
1057 	static char	s_ellipsis[] = "...";
1058 
1059 	if (arr == NULL || idx <= 0)
1060 		return (NULL);
1061 
1062 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1063 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1064 
1065 	/*
1066 	 * This is the total working area of the buffer.  It must be computed
1067 	 * as the size of 'buf', minus reserved space for the null terminator
1068 	 * and the ellipsis string.
1069 	 */
1070 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1071 
1072 	/* Construct a string representation of the array values */
1073 	for (buf_idx = 0, i = 0; i < idx; i++) {
1074 		buf_avail = buf_len - buf_idx;
1075 		if (majors) {
1076 			dname = ddi_major_to_name(arr[i]);
1077 			if (dname) {
1078 				n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1079 				    dname);
1080 			} else {
1081 				n = snprintf(&buf[buf_idx], buf_avail,
1082 				    "major %lu, ", arr[i]);
1083 			}
1084 		} else {
1085 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ", arr[i]);
1086 		}
1087 
1088 		/* An ellipsis gets appended when no more values fit */
1089 		if (n >= buf_avail) {
1090 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1091 			break;
1092 		}
1093 
1094 		buf_idx += n;
1095 	}
1096 
1097 	/* If all the contents fit, remove the trailing comma */
1098 	if (n < buf_avail) {
1099 		buf[--buf_idx] = '\0';
1100 		buf[--buf_idx] = '\0';
1101 	}
1102 
1103 	/* Return an sbd_error_t with the buffer and e_code */
1104 	new_sbd_err = drerr_new(1, e_code, buf);
1105 	kmem_free(buf, MAXPATHLEN);
1106 	return (new_sbd_err);
1107 }
1108