xref: /dragonfly/sys/kern/kern_shutdown.c (revision 2020c8fe)
1 /*-
2  * Copyright (c) 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
39  * $FreeBSD: src/sys/kern/kern_shutdown.c,v 1.72.2.12 2002/02/21 19:15:10 dillon Exp $
40  */
41 
42 #include "opt_ddb.h"
43 #include "opt_ddb_trace.h"
44 #include "opt_hw_wdog.h"
45 #include "opt_panic.h"
46 #include "opt_show_busybufs.h"
47 #include "use_gpio.h"
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/eventhandler.h>
52 #include <sys/buf.h>
53 #include <sys/disk.h>
54 #include <sys/diskslice.h>
55 #include <sys/reboot.h>
56 #include <sys/proc.h>
57 #include <sys/priv.h>
58 #include <sys/fcntl.h>		/* FREAD	*/
59 #include <sys/stat.h>		/* S_IFCHR	*/
60 #include <sys/vnode.h>
61 #include <sys/kernel.h>
62 #include <sys/kerneldump.h>
63 #include <sys/kthread.h>
64 #include <sys/malloc.h>
65 #include <sys/mount.h>
66 #include <sys/queue.h>
67 #include <sys/sysctl.h>
68 #include <sys/vkernel.h>
69 #include <sys/conf.h>
70 #include <sys/sysproto.h>
71 #include <sys/device.h>
72 #include <sys/cons.h>
73 #include <sys/shm.h>
74 #include <sys/kern_syscall.h>
75 #include <vm/vm_map.h>
76 #include <vm/pmap.h>
77 
78 #include <sys/thread2.h>
79 #include <sys/buf2.h>
80 #include <sys/mplock2.h>
81 
82 #include <machine/cpu.h>
83 #include <machine/clock.h>
84 #include <machine/md_var.h>
85 #include <machine/smp.h>		/* smp_active_mask, cpuid */
86 #include <machine/vmparam.h>
87 #include <machine/thread.h>
88 
89 #include <sys/signalvar.h>
90 
91 #include <sys/wdog.h>
92 #include <dev/misc/gpio/gpio.h>
93 
94 #ifndef PANIC_REBOOT_WAIT_TIME
95 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
96 #endif
97 
98 /*
99  * Note that stdarg.h and the ANSI style va_start macro is used for both
100  * ANSI and traditional C compilers.  We use the machine version to stay
101  * within the confines of the kernel header files.
102  */
103 #include <machine/stdarg.h>
104 
105 #ifdef DDB
106 #include <ddb/ddb.h>
107 #ifdef DDB_UNATTENDED
108 int debugger_on_panic = 0;
109 #else
110 int debugger_on_panic = 1;
111 #endif
112 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
113 	&debugger_on_panic, 0, "Run debugger on kernel panic");
114 
115 #ifdef DDB_TRACE
116 int trace_on_panic = 1;
117 #else
118 int trace_on_panic = 0;
119 #endif
120 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW,
121 	&trace_on_panic, 0, "Print stack trace on kernel panic");
122 #endif
123 
124 static int sync_on_panic = 0;
125 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW,
126 	&sync_on_panic, 0, "Do a sync before rebooting from a panic");
127 
128 SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
129 
130 #ifdef	HW_WDOG
131 /*
132  * If there is a hardware watchdog, point this at the function needed to
133  * hold it off.
134  * It's needed when the kernel needs to do some lengthy operations.
135  * e.g. in wd.c when dumping core.. It's most annoying to have
136  * your precious core-dump only half written because the wdog kicked in.
137  */
138 watchdog_tickle_fn wdog_tickler = NULL;
139 #endif	/* HW_WDOG */
140 
141 /*
142  * Variable panicstr contains argument to first call to panic; used as flag
143  * to indicate that the kernel has already called panic.
144  */
145 const char *panicstr;
146 
147 int dumping;				/* system is dumping */
148 static struct dumperinfo dumper;	/* selected dumper */
149 
150 globaldata_t panic_cpu_gd;		/* which cpu took the panic */
151 struct lwkt_tokref panic_tokens[LWKT_MAXTOKENS];
152 int panic_tokens_count;
153 
154 int bootverbose = 0;			/* note: assignment to force non-bss */
155 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW,
156 	   &bootverbose, 0, "Verbose kernel messages");
157 
158 int cold = 1;				/* note: assignment to force non-bss */
159 int dumplo;				/* OBSOLETE - savecore compat */
160 u_int64_t dumplo64;
161 
162 static void boot (int) __dead2;
163 static int setdumpdev (cdev_t dev);
164 static void poweroff_wait (void *, int);
165 static void print_uptime (void);
166 static void shutdown_halt (void *junk, int howto);
167 static void shutdown_panic (void *junk, int howto);
168 static void shutdown_reset (void *junk, int howto);
169 static int shutdown_busycount1(struct buf *bp, void *info);
170 static int shutdown_busycount2(struct buf *bp, void *info);
171 static void shutdown_cleanup_proc(struct proc *p);
172 
173 /* register various local shutdown events */
174 static void
175 shutdown_conf(void *unused)
176 {
177 	EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST);
178 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100);
179 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100);
180 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200);
181 }
182 
183 SYSINIT(shutdown_conf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, shutdown_conf, NULL)
184 
185 /* ARGSUSED */
186 
187 /*
188  * The system call that results in a reboot
189  *
190  * MPALMOSTSAFE
191  */
192 int
193 sys_reboot(struct reboot_args *uap)
194 {
195 	struct thread *td = curthread;
196 	int error;
197 
198 	if ((error = priv_check(td, PRIV_REBOOT)))
199 		return (error);
200 
201 	get_mplock();
202 	boot(uap->opt);
203 	rel_mplock();
204 	return (0);
205 }
206 
207 /*
208  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
209  */
210 static int shutdown_howto = 0;
211 
212 void
213 shutdown_nice(int howto)
214 {
215 	shutdown_howto = howto;
216 
217 	/* Send a signal to init(8) and have it shutdown the world */
218 	if (initproc != NULL) {
219 		ksignal(initproc, SIGINT);
220 	} else {
221 		/* No init(8) running, so simply reboot */
222 		boot(RB_NOSYNC);
223 	}
224 	return;
225 }
226 static int	waittime = -1;
227 struct pcb dumppcb;
228 struct thread *dumpthread;
229 
230 static void
231 print_uptime(void)
232 {
233 	int f;
234 	struct timespec ts;
235 
236 	getnanouptime(&ts);
237 	kprintf("Uptime: ");
238 	f = 0;
239 	if (ts.tv_sec >= 86400) {
240 		kprintf("%ldd", ts.tv_sec / 86400);
241 		ts.tv_sec %= 86400;
242 		f = 1;
243 	}
244 	if (f || ts.tv_sec >= 3600) {
245 		kprintf("%ldh", ts.tv_sec / 3600);
246 		ts.tv_sec %= 3600;
247 		f = 1;
248 	}
249 	if (f || ts.tv_sec >= 60) {
250 		kprintf("%ldm", ts.tv_sec / 60);
251 		ts.tv_sec %= 60;
252 		f = 1;
253 	}
254 	kprintf("%lds\n", ts.tv_sec);
255 }
256 
257 /*
258  *  Go through the rigmarole of shutting down..
259  * this used to be in machdep.c but I'll be dammned if I could see
260  * anything machine dependant in it.
261  */
262 static void
263 boot(int howto)
264 {
265 	/*
266 	 * Get rid of any user scheduler baggage and then give
267 	 * us a high priority.
268 	 */
269 	if (curthread->td_release)
270 		curthread->td_release(curthread);
271 	lwkt_setpri_self(TDPRI_MAX);
272 
273 	/* collect extra flags that shutdown_nice might have set */
274 	howto |= shutdown_howto;
275 
276 #ifdef SMP
277 	/*
278 	 * We really want to shutdown on the BSP.  Subsystems such as ACPI
279 	 * can't power-down the box otherwise.
280 	 */
281 	if (smp_active_mask > 1) {
282 		kprintf("boot() called on cpu#%d\n", mycpu->gd_cpuid);
283 	}
284 	if (panicstr == NULL && mycpu->gd_cpuid != 0) {
285 		kprintf("Switching to cpu #0 for shutdown\n");
286 		lwkt_setcpu_self(globaldata_find(0));
287 	}
288 #endif
289 	/*
290 	 * Do any callouts that should be done BEFORE syncing the filesystems.
291 	 */
292 	EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
293 
294 	/*
295 	 * Try to get rid of any remaining FS references.  The calling
296 	 * process, proc0, and init may still hold references.  The
297 	 * VFS cache subsystem may still hold a root reference to root.
298 	 *
299 	 * XXX this needs work.  We really need to SIGSTOP all remaining
300 	 * processes in order to avoid blowups due to proc0's filesystem
301 	 * references going away.  For now just make sure that the init
302 	 * process is stopped.
303 	 */
304 	if (panicstr == NULL) {
305 		shutdown_cleanup_proc(curproc);
306 		shutdown_cleanup_proc(&proc0);
307 		if (initproc) {
308 			if (initproc != curproc) {
309 				ksignal(initproc, SIGSTOP);
310 				tsleep(boot, 0, "shutdn", hz / 20);
311 			}
312 			shutdown_cleanup_proc(initproc);
313 		}
314 		vfs_cache_setroot(NULL, NULL);
315 	}
316 
317 	/*
318 	 * Now sync filesystems
319 	 */
320 	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
321 		int iter, nbusy, pbusy;
322 
323 		waittime = 0;
324 		kprintf("\nsyncing disks... ");
325 
326 		sys_sync(NULL);	/* YYY was sync(&proc0, NULL). why proc0 ? */
327 
328 		/*
329 		 * With soft updates, some buffers that are
330 		 * written will be remarked as dirty until other
331 		 * buffers are written.
332 		 */
333 		for (iter = pbusy = 0; iter < 20; iter++) {
334 			nbusy = scan_all_buffers(shutdown_busycount1, NULL);
335 			if (nbusy == 0)
336 				break;
337 			kprintf("%d ", nbusy);
338 			if (nbusy < pbusy)
339 				iter = 0;
340 			pbusy = nbusy;
341 			/*
342 			 * XXX:
343 			 * Process soft update work queue if buffers don't sync
344 			 * after 6 iterations by permitting the syncer to run.
345 			 */
346 			if (iter > 5)
347 				bio_ops_sync(NULL);
348 
349 			sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */
350 			tsleep(boot, 0, "shutdn", hz * iter / 20 + 1);
351 		}
352 		kprintf("\n");
353 		/*
354 		 * Count only busy local buffers to prevent forcing
355 		 * a fsck if we're just a client of a wedged NFS server
356 		 */
357 		nbusy = scan_all_buffers(shutdown_busycount2, NULL);
358 		if (nbusy) {
359 			/*
360 			 * Failed to sync all blocks. Indicate this and don't
361 			 * unmount filesystems (thus forcing an fsck on reboot).
362 			 */
363 			kprintf("giving up on %d buffers\n", nbusy);
364 #ifdef DDB
365 			if (debugger_on_panic)
366 				Debugger("busy buffer problem");
367 #endif /* DDB */
368 			tsleep(boot, 0, "shutdn", hz * 5 + 1);
369 		} else {
370 			kprintf("done\n");
371 			/*
372 			 * Unmount filesystems
373 			 */
374 			if (panicstr == NULL)
375 				vfs_unmountall();
376 		}
377 		tsleep(boot, 0, "shutdn", hz / 10 + 1);
378 	}
379 
380 	print_uptime();
381 
382 	/*
383 	 * Dump before doing post_sync shutdown ops
384 	 */
385 	crit_enter();
386 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) {
387 		dumpsys();
388 	}
389 
390 	/*
391 	 * Ok, now do things that assume all filesystem activity has
392 	 * been completed.  This will also call the device shutdown
393 	 * methods.
394 	 */
395 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
396 
397 	/* Now that we're going to really halt the system... */
398 	EVENTHANDLER_INVOKE(shutdown_final, howto);
399 
400 	for(;;) ;	/* safety against shutdown_reset not working */
401 	/* NOTREACHED */
402 }
403 
404 /*
405  * Pass 1 - Figure out if there are any busy or dirty buffers still present.
406  *
407  *	We ignore TMPFS mounts in this pass.
408  */
409 static int
410 shutdown_busycount1(struct buf *bp, void *info)
411 {
412 	struct vnode *vp;
413 
414 	if ((vp = bp->b_vp) != NULL && vp->v_tag == VT_TMPFS)
415 		return (0);
416 	if ((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp) > 0)
417 		return(1);
418 	if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)
419 		return (1);
420 	return (0);
421 }
422 
423 /*
424  * Pass 2 - only run after pass 1 has completed or has given up
425  *
426  *	We ignore TMPFS, NFS, MFS, and SMBFS mounts in this pass.
427  */
428 static int
429 shutdown_busycount2(struct buf *bp, void *info)
430 {
431 	struct vnode *vp;
432 
433 	/*
434 	 * Ignore tmpfs and nfs mounts
435 	 */
436 	if ((vp = bp->b_vp) != NULL) {
437 		if (vp->v_tag == VT_TMPFS)
438 			return (0);
439 		if (vp->v_tag == VT_NFS)
440 			return (0);
441 		if (vp->v_tag == VT_MFS)
442 			return (0);
443 		if (vp->v_tag == VT_SMBFS)
444 			return (0);
445 	}
446 
447 	/*
448 	 * Only count buffers stuck on I/O, ignore everything else
449 	 */
450 	if (((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp)) ||
451 	    ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
452 		/*
453 		 * Only count buffers undergoing write I/O
454 		 * on the related vnode.
455 		 */
456 		if (bp->b_vp == NULL ||
457 		    bio_track_active(&bp->b_vp->v_track_write) == 0) {
458 			return (0);
459 		}
460 #if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
461 		kprintf(
462 	    "%p dev:?, flags:%08x, loffset:%jd, doffset:%jd\n",
463 		    bp,
464 		    bp->b_flags, (intmax_t)bp->b_loffset,
465 		    (intmax_t)bp->b_bio2.bio_offset);
466 #endif
467 		return(1);
468 	}
469 	return(0);
470 }
471 
472 /*
473  * If the shutdown was a clean halt, behave accordingly.
474  */
475 static void
476 shutdown_halt(void *junk, int howto)
477 {
478 	if (howto & RB_HALT) {
479 		kprintf("\n");
480 		kprintf("The operating system has halted.\n");
481 #ifdef _KERNEL_VIRTUAL
482 		cpu_halt();
483 #else
484 		kprintf("Please press any key to reboot.\n\n");
485 		switch (cngetc()) {
486 		case -1:		/* No console, just die */
487 			cpu_halt();
488 			/* NOTREACHED */
489 		default:
490 			howto &= ~RB_HALT;
491 			break;
492 		}
493 #endif
494 	}
495 }
496 
497 /*
498  * Check to see if the system paniced, pause and then reboot
499  * according to the specified delay.
500  */
501 static void
502 shutdown_panic(void *junk, int howto)
503 {
504 	int loop;
505 
506 	if (howto & RB_DUMP) {
507 		if (PANIC_REBOOT_WAIT_TIME != 0) {
508 			if (PANIC_REBOOT_WAIT_TIME != -1) {
509 				kprintf("Automatic reboot in %d seconds - "
510 				       "press a key on the console to abort\n",
511 					PANIC_REBOOT_WAIT_TIME);
512 				for (loop = PANIC_REBOOT_WAIT_TIME * 10;
513 				     loop > 0; --loop) {
514 					DELAY(1000 * 100); /* 1/10th second */
515 					/* Did user type a key? */
516 					if (cncheckc() != -1)
517 						break;
518 				}
519 				if (!loop)
520 					return;
521 			}
522 		} else { /* zero time specified - reboot NOW */
523 			return;
524 		}
525 		kprintf("--> Press a key on the console to reboot,\n");
526 		kprintf("--> or switch off the system now.\n");
527 		cngetc();
528 	}
529 }
530 
531 /*
532  * Everything done, now reset
533  */
534 static void
535 shutdown_reset(void *junk, int howto)
536 {
537 	kprintf("Rebooting...\n");
538 	DELAY(1000000);	/* wait 1 sec for kprintf's to complete and be read */
539 	/* cpu_boot(howto); */ /* doesn't do anything at the moment */
540 	cpu_reset();
541 	/* NOTREACHED */ /* assuming reset worked */
542 }
543 
544 /*
545  * Try to remove FS references in the specified process.  This function
546  * is used during shutdown
547  */
548 static
549 void
550 shutdown_cleanup_proc(struct proc *p)
551 {
552 	struct filedesc *fdp;
553 	struct vmspace *vm;
554 
555 	if (p == NULL)
556 		return;
557 	if ((fdp = p->p_fd) != NULL) {
558 		kern_closefrom(0);
559 		if (fdp->fd_cdir) {
560 			cache_drop(&fdp->fd_ncdir);
561 			vrele(fdp->fd_cdir);
562 			fdp->fd_cdir = NULL;
563 		}
564 		if (fdp->fd_rdir) {
565 			cache_drop(&fdp->fd_nrdir);
566 			vrele(fdp->fd_rdir);
567 			fdp->fd_rdir = NULL;
568 		}
569 		if (fdp->fd_jdir) {
570 			cache_drop(&fdp->fd_njdir);
571 			vrele(fdp->fd_jdir);
572 			fdp->fd_jdir = NULL;
573 		}
574 	}
575 	if (p->p_vkernel)
576 		vkernel_exit(p);
577 	if (p->p_textvp) {
578 		vrele(p->p_textvp);
579 		p->p_textvp = NULL;
580 	}
581 	vm = p->p_vmspace;
582 	if (vm != NULL) {
583 		pmap_remove_pages(vmspace_pmap(vm),
584 				  VM_MIN_USER_ADDRESS,
585 				  VM_MAX_USER_ADDRESS);
586 		vm_map_remove(&vm->vm_map,
587 			      VM_MIN_USER_ADDRESS,
588 			      VM_MAX_USER_ADDRESS);
589 	}
590 }
591 
592 /*
593  * Magic number for savecore
594  *
595  * exported (symorder) and used at least by savecore(8)
596  *
597  * Mark it as used so that gcc doesn't optimize it away.
598  */
599 __attribute__((__used__))
600 	static u_long const dumpmag = 0x8fca0101UL;
601 
602 __attribute__((__used__))
603 	static int	dumpsize = 0;		/* also for savecore */
604 
605 static int	dodump = 1;
606 
607 SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0,
608     "Try to perform coredump on kernel panic");
609 
610 void
611 mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver,
612     uint64_t dumplen, uint32_t blksz)
613 {
614 	bzero(kdh, sizeof(*kdh));
615 	strncpy(kdh->magic, magic, sizeof(kdh->magic));
616 	strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
617 	kdh->version = htod32(KERNELDUMPVERSION);
618 	kdh->architectureversion = htod32(archver);
619 	kdh->dumplength = htod64(dumplen);
620 	kdh->dumptime = htod64(time_second);
621 	kdh->blocksize = htod32(blksz);
622 	strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
623 	strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
624 	if (panicstr != NULL)
625 		strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
626 	kdh->parity = kerneldump_parity(kdh);
627 }
628 
629 static int
630 setdumpdev(cdev_t dev)
631 {
632 	int error;
633 	int doopen;
634 
635 	if (dev == NULL) {
636 		disk_dumpconf(NULL, 0/*off*/);
637 		return (0);
638 	}
639 
640 	/*
641 	 * We have to open the device before we can perform ioctls on it,
642 	 * or the slice/label data may not be present.  Device opens are
643 	 * usually tracked by specfs, but the dump device can be set in
644 	 * early boot and may not be open so this is somewhat of a hack.
645 	 */
646 	doopen = (dev->si_sysref.refcnt == 1);
647 	if (doopen) {
648 		error = dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred);
649 		if (error)
650 			return (error);
651 	}
652 	error = disk_dumpconf(dev, 1/*on*/);
653 
654 	return error;
655 }
656 
657 /* ARGSUSED */
658 static void dump_conf (void *dummy);
659 static void
660 dump_conf(void *dummy)
661 {
662 	char *path;
663 	cdev_t dev;
664 
665 	path = kmalloc(MNAMELEN, M_TEMP, M_WAITOK);
666 	if (TUNABLE_STR_FETCH("dumpdev", path, MNAMELEN) != 0) {
667 		sync_devs();
668 		dev = kgetdiskbyname(path);
669 		if (dev != NULL)
670 			dumpdev = dev;
671 	}
672 	kfree(path, M_TEMP);
673 	if (setdumpdev(dumpdev) != 0)
674 		dumpdev = NULL;
675 }
676 
677 SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL)
678 
679 static int
680 sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS)
681 {
682 	int error;
683 	udev_t ndumpdev;
684 
685 	ndumpdev = dev2udev(dumpdev);
686 	error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req);
687 	if (error == 0 && req->newptr != NULL)
688 		error = setdumpdev(udev2dev(ndumpdev, 0));
689 	return (error);
690 }
691 
692 SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW,
693 	0, sizeof dumpdev, sysctl_kern_dumpdev, "T,udev_t", "");
694 
695 /*
696  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
697  * and then reboots.  If we are called twice, then we avoid trying to sync
698  * the disks as this often leads to recursive panics.
699  */
700 void
701 panic(const char *fmt, ...)
702 {
703 	int bootopt, newpanic;
704 	globaldata_t gd = mycpu;
705 	thread_t td = gd->gd_curthread;
706 	__va_list ap;
707 	static char buf[256];
708 
709 #ifdef SMP
710 	/*
711 	 * If a panic occurs on multiple cpus before the first is able to
712 	 * halt the other cpus, only one cpu is allowed to take the panic.
713 	 * Attempt to be verbose about this situation but if the kprintf()
714 	 * itself panics don't let us overrun the kernel stack.
715 	 *
716 	 * Be very nasty about descheduling our thread at the lowest
717 	 * level possible in an attempt to freeze the thread without
718 	 * inducing further panics.
719 	 *
720 	 * Bumping gd_trap_nesting_level will also bypass assertions in
721 	 * lwkt_switch() and allow us to switch away even if we are a
722 	 * FAST interrupt or IPI.
723 	 *
724 	 * The setting of panic_cpu_gd also determines how kprintf()
725 	 * spin-locks itself.  DDB can set panic_cpu_gd as well.
726 	 */
727 	for (;;) {
728 		globaldata_t xgd = panic_cpu_gd;
729 
730 		/*
731 		 * Someone else got the panic cpu
732 		 */
733 		if (xgd && xgd != gd) {
734 			crit_enter();
735 			++mycpu->gd_trap_nesting_level;
736 			if (mycpu->gd_trap_nesting_level < 25) {
737 				kprintf("SECONDARY PANIC ON CPU %d THREAD %p\n",
738 					mycpu->gd_cpuid, td);
739 			}
740 			td->td_release = NULL;	/* be a grinch */
741 			for (;;) {
742 				lwkt_deschedule_self(td);
743 				lwkt_switch();
744 			}
745 			/* NOT REACHED */
746 			/* --mycpu->gd_trap_nesting_level */
747 			/* crit_exit() */
748 		}
749 
750 		/*
751 		 * Reentrant panic
752 		 */
753 		if (xgd && xgd == gd)
754 			break;
755 
756 		/*
757 		 * We got it
758 		 */
759 		if (atomic_cmpset_ptr(&panic_cpu_gd, NULL, gd))
760 			break;
761 	}
762 #else
763 	panic_cpu_gd = gd;
764 #endif
765 	/*
766 	 * Try to get the system into a working state.  Save information
767 	 * we are about to destroy.
768 	 */
769 	kvcreinitspin();
770 	if (panicstr == NULL) {
771 		bcopy(td->td_toks_array, panic_tokens, sizeof(panic_tokens));
772 		panic_tokens_count = td->td_toks_stop - &td->td_toks_base;
773 	}
774 	lwkt_relalltokens(td);
775 	td->td_toks_stop = &td->td_toks_base;
776 
777 	/*
778 	 * Setup
779 	 */
780 	bootopt = RB_AUTOBOOT | RB_DUMP;
781 	if (sync_on_panic == 0)
782 		bootopt |= RB_NOSYNC;
783 	newpanic = 0;
784 	if (panicstr) {
785 		bootopt |= RB_NOSYNC;
786 	} else {
787 		panicstr = fmt;
788 		newpanic = 1;
789 	}
790 
791 	/*
792 	 * Format the panic string.
793 	 */
794 	__va_start(ap, fmt);
795 	kvsnprintf(buf, sizeof(buf), fmt, ap);
796 	if (panicstr == fmt)
797 		panicstr = buf;
798 	__va_end(ap);
799 	kprintf("panic: %s\n", buf);
800 #ifdef SMP
801 	/* two separate prints in case of an unmapped page and trap */
802 	kprintf("cpuid = %d\n", mycpu->gd_cpuid);
803 #endif
804 
805 #if (NGPIO > 0) && defined(ERROR_LED_ON_PANIC)
806 	led_switch("error", 1);
807 #endif
808 
809 #if defined(WDOG_DISABLE_ON_PANIC) && defined(WATCHDOG_ENABLE)
810 	wdog_disable();
811 #endif
812 
813 	/*
814 	 * Enter the debugger or fall through & dump.  Entering the
815 	 * debugger will stop cpus.  If not entering the debugger stop
816 	 * cpus here.
817 	 */
818 #if defined(DDB)
819 	if (newpanic && trace_on_panic)
820 		print_backtrace(-1);
821 	if (debugger_on_panic)
822 		Debugger("panic");
823 	else
824 #endif
825 #ifdef SMP
826 	if (newpanic)
827 		stop_cpus(mycpu->gd_other_cpus);
828 #else
829 	;
830 #endif
831 	boot(bootopt);
832 }
833 
834 /*
835  * Support for poweroff delay.
836  */
837 #ifndef POWEROFF_DELAY
838 # define POWEROFF_DELAY 5000
839 #endif
840 static int poweroff_delay = POWEROFF_DELAY;
841 
842 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
843 	&poweroff_delay, 0, "");
844 
845 static void
846 poweroff_wait(void *junk, int howto)
847 {
848 	if(!(howto & RB_POWEROFF) || poweroff_delay <= 0)
849 		return;
850 	DELAY(poweroff_delay * 1000);
851 }
852 
853 /*
854  * Some system processes (e.g. syncer) need to be stopped at appropriate
855  * points in their main loops prior to a system shutdown, so that they
856  * won't interfere with the shutdown process (e.g. by holding a disk buf
857  * to cause sync to fail).  For each of these system processes, register
858  * shutdown_kproc() as a handler for one of shutdown events.
859  */
860 static int kproc_shutdown_wait = 60;
861 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
862     &kproc_shutdown_wait, 0, "");
863 
864 void
865 shutdown_kproc(void *arg, int howto)
866 {
867 	struct thread *td;
868 	struct proc *p;
869 	int error;
870 
871 	if (panicstr)
872 		return;
873 
874 	td = (struct thread *)arg;
875 	if ((p = td->td_proc) != NULL) {
876 	    kprintf("Waiting (max %d seconds) for system process `%s' to stop...",
877 		kproc_shutdown_wait, p->p_comm);
878 	} else {
879 	    kprintf("Waiting (max %d seconds) for system thread %s to stop...",
880 		kproc_shutdown_wait, td->td_comm);
881 	}
882 	error = suspend_kproc(td, kproc_shutdown_wait * hz);
883 
884 	if (error == EWOULDBLOCK)
885 		kprintf("timed out\n");
886 	else
887 		kprintf("stopped\n");
888 }
889 
890 /* Registration of dumpers */
891 int
892 set_dumper(struct dumperinfo *di)
893 {
894 	if (di == NULL) {
895 		bzero(&dumper, sizeof(dumper));
896 		return 0;
897 	}
898 
899 	if (dumper.dumper != NULL)
900 		return (EBUSY);
901 
902 	dumper = *di;
903 	return 0;
904 }
905 
906 void
907 dumpsys(void)
908 {
909 #if defined (_KERNEL_VIRTUAL)
910 	/* VKERNELs don't support dumps */
911 	kprintf("VKERNEL doesn't support dumps\n");
912 	return;
913 #endif
914 	/*
915 	 * If there is a dumper registered and we aren't dumping already, call
916 	 * the machine dependent dumpsys (md_dumpsys) to do the hard work.
917 	 *
918 	 * XXX: while right now the md_dumpsys() of x86 and x86_64 could be
919 	 *      factored out completely into here, I rather keep them machine
920 	 *      dependent in case we ever add a platform which does not share
921 	 *      the same dumpsys() code, such as arm.
922 	 */
923 	if (dumper.dumper != NULL && !dumping) {
924 		dumping++;
925 		md_dumpsys(&dumper);
926 	}
927 }
928 
929 int dump_stop_usertds = 0;
930 
931 #ifdef SMP
932 static
933 void
934 need_user_resched_remote(void *dummy)
935 {
936 	need_user_resched();
937 }
938 #endif
939 
940 void
941 dump_reactivate_cpus(void)
942 {
943 #ifdef SMP
944 	globaldata_t gd;
945 	int cpu, seq;
946 #endif
947 
948 	dump_stop_usertds = 1;
949 
950 	need_user_resched();
951 
952 #ifdef SMP
953 	for (cpu = 0; cpu < ncpus; cpu++) {
954 		gd = globaldata_find(cpu);
955 		seq = lwkt_send_ipiq(gd, need_user_resched_remote, NULL);
956 		lwkt_wait_ipiq(gd, seq);
957 	}
958 
959 	restart_cpus(stopped_cpus);
960 #endif
961 }
962