1 /*- 2 * Copyright (c) 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_shutdown.c,v 1.72.2.12 2002/02/21 19:15:10 dillon Exp $ 40 */ 41 42 #include "opt_ddb.h" 43 #include "opt_ddb_trace.h" 44 #include "opt_panic.h" 45 #include "opt_show_busybufs.h" 46 #include "use_gpio.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/eventhandler.h> 51 #include <sys/buf.h> 52 #include <sys/disk.h> 53 #include <sys/diskslice.h> 54 #include <sys/reboot.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/fcntl.h> /* FREAD */ 58 #include <sys/stat.h> /* S_IFCHR */ 59 #include <sys/vnode.h> 60 #include <sys/kernel.h> 61 #include <sys/kerneldump.h> 62 #include <sys/kthread.h> 63 #include <sys/malloc.h> 64 #include <sys/mount.h> 65 #include <sys/queue.h> 66 #include <sys/sysctl.h> 67 #include <sys/vkernel.h> 68 #include <sys/conf.h> 69 #include <sys/sysproto.h> 70 #include <sys/device.h> 71 #include <sys/cons.h> 72 #include <sys/shm.h> 73 #include <sys/kern_syscall.h> 74 #include <vm/vm_map.h> 75 #include <vm/pmap.h> 76 77 #include <sys/thread2.h> 78 #include <sys/buf2.h> 79 #include <sys/mplock2.h> 80 81 #include <machine/cpu.h> 82 #include <machine/clock.h> 83 #include <machine/md_var.h> 84 #include <machine/smp.h> /* smp_active_mask, cpuid */ 85 #include <machine/vmparam.h> 86 #include <machine/thread.h> 87 88 #include <sys/signalvar.h> 89 90 #include <sys/wdog.h> 91 #include <dev/misc/gpio/gpio.h> 92 93 #ifndef PANIC_REBOOT_WAIT_TIME 94 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 95 #endif 96 97 /* 98 * Note that stdarg.h and the ANSI style va_start macro is used for both 99 * ANSI and traditional C compilers. We use the machine version to stay 100 * within the confines of the kernel header files. 101 */ 102 #include <machine/stdarg.h> 103 104 #ifdef DDB 105 #include <ddb/ddb.h> 106 #ifdef DDB_UNATTENDED 107 int debugger_on_panic = 0; 108 #else 109 int debugger_on_panic = 1; 110 #endif 111 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW, 112 &debugger_on_panic, 0, "Run debugger on kernel panic"); 113 114 #ifdef DDB_TRACE 115 int trace_on_panic = 1; 116 #else 117 int trace_on_panic = 0; 118 #endif 119 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW, 120 &trace_on_panic, 0, "Print stack trace on kernel panic"); 121 #endif 122 123 static int sync_on_panic = 0; 124 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW, 125 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 126 127 SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); 128 129 /* 130 * Variable panicstr contains argument to first call to panic; used as flag 131 * to indicate that the kernel has already called panic. 132 */ 133 const char *panicstr; 134 135 int dumping; /* system is dumping */ 136 static struct dumperinfo dumper; /* selected dumper */ 137 138 globaldata_t panic_cpu_gd; /* which cpu took the panic */ 139 struct lwkt_tokref panic_tokens[LWKT_MAXTOKENS]; 140 int panic_tokens_count; 141 142 int bootverbose = 0; /* note: assignment to force non-bss */ 143 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, 144 &bootverbose, 0, "Verbose kernel messages"); 145 146 int cold = 1; /* note: assignment to force non-bss */ 147 int dumplo; /* OBSOLETE - savecore compat */ 148 u_int64_t dumplo64; 149 150 static void boot (int) __dead2; 151 static int setdumpdev (cdev_t dev); 152 static void poweroff_wait (void *, int); 153 static void print_uptime (void); 154 static void shutdown_halt (void *junk, int howto); 155 static void shutdown_panic (void *junk, int howto); 156 static void shutdown_reset (void *junk, int howto); 157 static int shutdown_busycount1(struct buf *bp, void *info); 158 static int shutdown_busycount2(struct buf *bp, void *info); 159 static void shutdown_cleanup_proc(struct proc *p); 160 161 /* register various local shutdown events */ 162 static void 163 shutdown_conf(void *unused) 164 { 165 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); 166 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); 167 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); 168 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); 169 } 170 171 SYSINIT(shutdown_conf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, shutdown_conf, NULL) 172 173 /* ARGSUSED */ 174 175 /* 176 * The system call that results in a reboot 177 * 178 * MPALMOSTSAFE 179 */ 180 int 181 sys_reboot(struct reboot_args *uap) 182 { 183 struct thread *td = curthread; 184 int error; 185 186 if ((error = priv_check(td, PRIV_REBOOT))) 187 return (error); 188 189 get_mplock(); 190 boot(uap->opt); 191 rel_mplock(); 192 return (0); 193 } 194 195 /* 196 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 197 */ 198 static int shutdown_howto = 0; 199 200 void 201 shutdown_nice(int howto) 202 { 203 shutdown_howto = howto; 204 205 /* Send a signal to init(8) and have it shutdown the world */ 206 if (initproc != NULL) { 207 ksignal(initproc, SIGINT); 208 } else { 209 /* No init(8) running, so simply reboot */ 210 boot(RB_NOSYNC); 211 } 212 return; 213 } 214 static int waittime = -1; 215 struct pcb dumppcb; 216 struct thread *dumpthread; 217 218 static void 219 print_uptime(void) 220 { 221 int f; 222 struct timespec ts; 223 224 getnanouptime(&ts); 225 kprintf("Uptime: "); 226 f = 0; 227 if (ts.tv_sec >= 86400) { 228 kprintf("%ldd", ts.tv_sec / 86400); 229 ts.tv_sec %= 86400; 230 f = 1; 231 } 232 if (f || ts.tv_sec >= 3600) { 233 kprintf("%ldh", ts.tv_sec / 3600); 234 ts.tv_sec %= 3600; 235 f = 1; 236 } 237 if (f || ts.tv_sec >= 60) { 238 kprintf("%ldm", ts.tv_sec / 60); 239 ts.tv_sec %= 60; 240 f = 1; 241 } 242 kprintf("%lds\n", ts.tv_sec); 243 } 244 245 /* 246 * Go through the rigmarole of shutting down.. 247 * this used to be in machdep.c but I'll be dammned if I could see 248 * anything machine dependant in it. 249 */ 250 static void 251 boot(int howto) 252 { 253 /* 254 * Get rid of any user scheduler baggage and then give 255 * us a high priority. 256 */ 257 if (curthread->td_release) 258 curthread->td_release(curthread); 259 lwkt_setpri_self(TDPRI_MAX); 260 261 /* collect extra flags that shutdown_nice might have set */ 262 howto |= shutdown_howto; 263 264 #ifdef SMP 265 /* 266 * We really want to shutdown on the BSP. Subsystems such as ACPI 267 * can't power-down the box otherwise. 268 */ 269 if (smp_active_mask > 1) { 270 kprintf("boot() called on cpu#%d\n", mycpu->gd_cpuid); 271 } 272 if (panicstr == NULL && mycpu->gd_cpuid != 0) { 273 kprintf("Switching to cpu #0 for shutdown\n"); 274 lwkt_setcpu_self(globaldata_find(0)); 275 } 276 #endif 277 /* 278 * Do any callouts that should be done BEFORE syncing the filesystems. 279 */ 280 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 281 282 /* 283 * Try to get rid of any remaining FS references. The calling 284 * process, proc0, and init may still hold references. The 285 * VFS cache subsystem may still hold a root reference to root. 286 * 287 * XXX this needs work. We really need to SIGSTOP all remaining 288 * processes in order to avoid blowups due to proc0's filesystem 289 * references going away. For now just make sure that the init 290 * process is stopped. 291 */ 292 if (panicstr == NULL) { 293 shutdown_cleanup_proc(curproc); 294 shutdown_cleanup_proc(&proc0); 295 if (initproc) { 296 if (initproc != curproc) { 297 ksignal(initproc, SIGSTOP); 298 tsleep(boot, 0, "shutdn", hz / 20); 299 } 300 shutdown_cleanup_proc(initproc); 301 } 302 vfs_cache_setroot(NULL, NULL); 303 } 304 305 /* 306 * Now sync filesystems 307 */ 308 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { 309 int iter, nbusy, pbusy; 310 311 waittime = 0; 312 kprintf("\nsyncing disks... "); 313 314 sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */ 315 316 /* 317 * With soft updates, some buffers that are 318 * written will be remarked as dirty until other 319 * buffers are written. 320 */ 321 for (iter = pbusy = 0; iter < 20; iter++) { 322 nbusy = scan_all_buffers(shutdown_busycount1, NULL); 323 if (nbusy == 0) 324 break; 325 kprintf("%d ", nbusy); 326 if (nbusy < pbusy) 327 iter = 0; 328 pbusy = nbusy; 329 /* 330 * XXX: 331 * Process soft update work queue if buffers don't sync 332 * after 6 iterations by permitting the syncer to run. 333 */ 334 if (iter > 5) 335 bio_ops_sync(NULL); 336 337 sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */ 338 tsleep(boot, 0, "shutdn", hz * iter / 20 + 1); 339 } 340 kprintf("\n"); 341 /* 342 * Count only busy local buffers to prevent forcing 343 * a fsck if we're just a client of a wedged NFS server 344 */ 345 nbusy = scan_all_buffers(shutdown_busycount2, NULL); 346 if (nbusy) { 347 /* 348 * Failed to sync all blocks. Indicate this and don't 349 * unmount filesystems (thus forcing an fsck on reboot). 350 */ 351 kprintf("giving up on %d buffers\n", nbusy); 352 #ifdef DDB 353 if (debugger_on_panic) 354 Debugger("busy buffer problem"); 355 #endif /* DDB */ 356 tsleep(boot, 0, "shutdn", hz * 5 + 1); 357 } else { 358 kprintf("done\n"); 359 /* 360 * Unmount filesystems 361 */ 362 if (panicstr == NULL) 363 vfs_unmountall(); 364 } 365 tsleep(boot, 0, "shutdn", hz / 10 + 1); 366 } 367 368 print_uptime(); 369 370 /* 371 * Dump before doing post_sync shutdown ops 372 */ 373 crit_enter(); 374 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) { 375 dumpsys(); 376 } 377 378 /* 379 * Ok, now do things that assume all filesystem activity has 380 * been completed. This will also call the device shutdown 381 * methods. 382 */ 383 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 384 385 /* Now that we're going to really halt the system... */ 386 EVENTHANDLER_INVOKE(shutdown_final, howto); 387 388 for(;;) ; /* safety against shutdown_reset not working */ 389 /* NOTREACHED */ 390 } 391 392 /* 393 * Pass 1 - Figure out if there are any busy or dirty buffers still present. 394 * 395 * We ignore TMPFS mounts in this pass. 396 */ 397 static int 398 shutdown_busycount1(struct buf *bp, void *info) 399 { 400 struct vnode *vp; 401 402 if ((vp = bp->b_vp) != NULL && vp->v_tag == VT_TMPFS) 403 return (0); 404 if ((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp) > 0) 405 return(1); 406 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) 407 return (1); 408 return (0); 409 } 410 411 /* 412 * Pass 2 - only run after pass 1 has completed or has given up 413 * 414 * We ignore TMPFS, NFS, MFS, and SMBFS mounts in this pass. 415 */ 416 static int 417 shutdown_busycount2(struct buf *bp, void *info) 418 { 419 struct vnode *vp; 420 421 /* 422 * Ignore tmpfs and nfs mounts 423 */ 424 if ((vp = bp->b_vp) != NULL) { 425 if (vp->v_tag == VT_TMPFS) 426 return (0); 427 if (vp->v_tag == VT_NFS) 428 return (0); 429 if (vp->v_tag == VT_MFS) 430 return (0); 431 if (vp->v_tag == VT_SMBFS) 432 return (0); 433 } 434 435 /* 436 * Only count buffers stuck on I/O, ignore everything else 437 */ 438 if (((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp)) || 439 ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) { 440 /* 441 * Only count buffers undergoing write I/O 442 * on the related vnode. 443 */ 444 if (bp->b_vp == NULL || 445 bio_track_active(&bp->b_vp->v_track_write) == 0) { 446 return (0); 447 } 448 #if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC) 449 kprintf( 450 "%p dev:?, flags:%08x, loffset:%jd, doffset:%jd\n", 451 bp, 452 bp->b_flags, (intmax_t)bp->b_loffset, 453 (intmax_t)bp->b_bio2.bio_offset); 454 #endif 455 return(1); 456 } 457 return(0); 458 } 459 460 /* 461 * If the shutdown was a clean halt, behave accordingly. 462 */ 463 static void 464 shutdown_halt(void *junk, int howto) 465 { 466 if (howto & RB_HALT) { 467 kprintf("\n"); 468 kprintf("The operating system has halted.\n"); 469 #ifdef _KERNEL_VIRTUAL 470 cpu_halt(); 471 #else 472 kprintf("Please press any key to reboot.\n\n"); 473 switch (cngetc()) { 474 case -1: /* No console, just die */ 475 cpu_halt(); 476 /* NOTREACHED */ 477 default: 478 howto &= ~RB_HALT; 479 break; 480 } 481 #endif 482 } 483 } 484 485 /* 486 * Check to see if the system paniced, pause and then reboot 487 * according to the specified delay. 488 */ 489 static void 490 shutdown_panic(void *junk, int howto) 491 { 492 int loop; 493 494 if (howto & RB_DUMP) { 495 if (PANIC_REBOOT_WAIT_TIME != 0) { 496 if (PANIC_REBOOT_WAIT_TIME != -1) { 497 kprintf("Automatic reboot in %d seconds - " 498 "press a key on the console to abort\n", 499 PANIC_REBOOT_WAIT_TIME); 500 for (loop = PANIC_REBOOT_WAIT_TIME * 10; 501 loop > 0; --loop) { 502 DELAY(1000 * 100); /* 1/10th second */ 503 /* Did user type a key? */ 504 if (cncheckc() != -1) 505 break; 506 } 507 if (!loop) 508 return; 509 } 510 } else { /* zero time specified - reboot NOW */ 511 return; 512 } 513 kprintf("--> Press a key on the console to reboot,\n"); 514 kprintf("--> or switch off the system now.\n"); 515 cngetc(); 516 } 517 } 518 519 /* 520 * Everything done, now reset 521 */ 522 static void 523 shutdown_reset(void *junk, int howto) 524 { 525 kprintf("Rebooting...\n"); 526 DELAY(1000000); /* wait 1 sec for kprintf's to complete and be read */ 527 /* cpu_boot(howto); */ /* doesn't do anything at the moment */ 528 cpu_reset(); 529 /* NOTREACHED */ /* assuming reset worked */ 530 } 531 532 /* 533 * Try to remove FS references in the specified process. This function 534 * is used during shutdown 535 */ 536 static 537 void 538 shutdown_cleanup_proc(struct proc *p) 539 { 540 struct filedesc *fdp; 541 struct vmspace *vm; 542 543 if (p == NULL) 544 return; 545 if ((fdp = p->p_fd) != NULL) { 546 kern_closefrom(0); 547 if (fdp->fd_cdir) { 548 cache_drop(&fdp->fd_ncdir); 549 vrele(fdp->fd_cdir); 550 fdp->fd_cdir = NULL; 551 } 552 if (fdp->fd_rdir) { 553 cache_drop(&fdp->fd_nrdir); 554 vrele(fdp->fd_rdir); 555 fdp->fd_rdir = NULL; 556 } 557 if (fdp->fd_jdir) { 558 cache_drop(&fdp->fd_njdir); 559 vrele(fdp->fd_jdir); 560 fdp->fd_jdir = NULL; 561 } 562 } 563 if (p->p_vkernel) 564 vkernel_exit(p); 565 if (p->p_textvp) { 566 vrele(p->p_textvp); 567 p->p_textvp = NULL; 568 } 569 vm = p->p_vmspace; 570 if (vm != NULL) { 571 pmap_remove_pages(vmspace_pmap(vm), 572 VM_MIN_USER_ADDRESS, 573 VM_MAX_USER_ADDRESS); 574 vm_map_remove(&vm->vm_map, 575 VM_MIN_USER_ADDRESS, 576 VM_MAX_USER_ADDRESS); 577 } 578 } 579 580 /* 581 * Magic number for savecore 582 * 583 * exported (symorder) and used at least by savecore(8) 584 * 585 * Mark it as used so that gcc doesn't optimize it away. 586 */ 587 __attribute__((__used__)) 588 static u_long const dumpmag = 0x8fca0101UL; 589 590 __attribute__((__used__)) 591 static int dumpsize = 0; /* also for savecore */ 592 593 static int dodump = 1; 594 595 SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, 596 "Try to perform coredump on kernel panic"); 597 598 void 599 mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, 600 uint64_t dumplen, uint32_t blksz) 601 { 602 bzero(kdh, sizeof(*kdh)); 603 strncpy(kdh->magic, magic, sizeof(kdh->magic)); 604 strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 605 kdh->version = htod32(KERNELDUMPVERSION); 606 kdh->architectureversion = htod32(archver); 607 kdh->dumplength = htod64(dumplen); 608 kdh->dumptime = htod64(time_second); 609 kdh->blocksize = htod32(blksz); 610 strncpy(kdh->hostname, hostname, sizeof(kdh->hostname)); 611 strncpy(kdh->versionstring, version, sizeof(kdh->versionstring)); 612 if (panicstr != NULL) 613 strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 614 kdh->parity = kerneldump_parity(kdh); 615 } 616 617 static int 618 setdumpdev(cdev_t dev) 619 { 620 int error; 621 int doopen; 622 623 if (dev == NULL) { 624 disk_dumpconf(NULL, 0/*off*/); 625 return (0); 626 } 627 628 /* 629 * We have to open the device before we can perform ioctls on it, 630 * or the slice/label data may not be present. Device opens are 631 * usually tracked by specfs, but the dump device can be set in 632 * early boot and may not be open so this is somewhat of a hack. 633 */ 634 doopen = (dev->si_sysref.refcnt == 1); 635 if (doopen) { 636 error = dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred); 637 if (error) 638 return (error); 639 } 640 error = disk_dumpconf(dev, 1/*on*/); 641 642 return error; 643 } 644 645 /* ARGSUSED */ 646 static void dump_conf (void *dummy); 647 static void 648 dump_conf(void *dummy) 649 { 650 char *path; 651 cdev_t dev; 652 int _dummy; 653 654 path = kmalloc(MNAMELEN, M_TEMP, M_WAITOK); 655 if (TUNABLE_STR_FETCH("dumpdev", path, MNAMELEN) != 0) { 656 /* 657 * Make sure all disk devices created so far have also been 658 * probed, and also make sure that the newly created device 659 * nodes for probed disks are ready, too. 660 * 661 * XXX - Delay an additional 2 seconds to help drivers which 662 * pickup devices asynchronously and are not caught by 663 * CAM's initial probe. 664 */ 665 sync_devs(); 666 tsleep(&_dummy, 0, "syncer", hz*2); 667 668 dev = kgetdiskbyname(path); 669 if (dev != NULL) 670 dumpdev = dev; 671 } 672 kfree(path, M_TEMP); 673 if (setdumpdev(dumpdev) != 0) 674 dumpdev = NULL; 675 } 676 677 SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL) 678 679 static int 680 sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS) 681 { 682 int error; 683 udev_t ndumpdev; 684 685 ndumpdev = dev2udev(dumpdev); 686 error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req); 687 if (error == 0 && req->newptr != NULL) 688 error = setdumpdev(udev2dev(ndumpdev, 0)); 689 return (error); 690 } 691 692 SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW, 693 0, sizeof dumpdev, sysctl_kern_dumpdev, "T,udev_t", ""); 694 695 /* 696 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 697 * and then reboots. If we are called twice, then we avoid trying to sync 698 * the disks as this often leads to recursive panics. 699 */ 700 void 701 panic(const char *fmt, ...) 702 { 703 int bootopt, newpanic; 704 globaldata_t gd = mycpu; 705 thread_t td = gd->gd_curthread; 706 __va_list ap; 707 static char buf[256]; 708 709 #ifdef SMP 710 /* 711 * If a panic occurs on multiple cpus before the first is able to 712 * halt the other cpus, only one cpu is allowed to take the panic. 713 * Attempt to be verbose about this situation but if the kprintf() 714 * itself panics don't let us overrun the kernel stack. 715 * 716 * Be very nasty about descheduling our thread at the lowest 717 * level possible in an attempt to freeze the thread without 718 * inducing further panics. 719 * 720 * Bumping gd_trap_nesting_level will also bypass assertions in 721 * lwkt_switch() and allow us to switch away even if we are a 722 * FAST interrupt or IPI. 723 * 724 * The setting of panic_cpu_gd also determines how kprintf() 725 * spin-locks itself. DDB can set panic_cpu_gd as well. 726 */ 727 for (;;) { 728 globaldata_t xgd = panic_cpu_gd; 729 730 /* 731 * Someone else got the panic cpu 732 */ 733 if (xgd && xgd != gd) { 734 crit_enter(); 735 ++mycpu->gd_trap_nesting_level; 736 if (mycpu->gd_trap_nesting_level < 25) { 737 kprintf("SECONDARY PANIC ON CPU %d THREAD %p\n", 738 mycpu->gd_cpuid, td); 739 } 740 td->td_release = NULL; /* be a grinch */ 741 for (;;) { 742 lwkt_deschedule_self(td); 743 lwkt_switch(); 744 } 745 /* NOT REACHED */ 746 /* --mycpu->gd_trap_nesting_level */ 747 /* crit_exit() */ 748 } 749 750 /* 751 * Reentrant panic 752 */ 753 if (xgd && xgd == gd) 754 break; 755 756 /* 757 * We got it 758 */ 759 if (atomic_cmpset_ptr(&panic_cpu_gd, NULL, gd)) 760 break; 761 } 762 #else 763 panic_cpu_gd = gd; 764 #endif 765 /* 766 * Try to get the system into a working state. Save information 767 * we are about to destroy. 768 */ 769 kvcreinitspin(); 770 if (panicstr == NULL) { 771 bcopy(td->td_toks_array, panic_tokens, sizeof(panic_tokens)); 772 panic_tokens_count = td->td_toks_stop - &td->td_toks_base; 773 } 774 lwkt_relalltokens(td); 775 td->td_toks_stop = &td->td_toks_base; 776 777 /* 778 * Setup 779 */ 780 bootopt = RB_AUTOBOOT | RB_DUMP; 781 if (sync_on_panic == 0) 782 bootopt |= RB_NOSYNC; 783 newpanic = 0; 784 if (panicstr) { 785 bootopt |= RB_NOSYNC; 786 } else { 787 panicstr = fmt; 788 newpanic = 1; 789 } 790 791 /* 792 * Format the panic string. 793 */ 794 __va_start(ap, fmt); 795 kvsnprintf(buf, sizeof(buf), fmt, ap); 796 if (panicstr == fmt) 797 panicstr = buf; 798 __va_end(ap); 799 kprintf("panic: %s\n", buf); 800 #ifdef SMP 801 /* two separate prints in case of an unmapped page and trap */ 802 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 803 #endif 804 805 #if (NGPIO > 0) && defined(ERROR_LED_ON_PANIC) 806 led_switch("error", 1); 807 #endif 808 809 #if defined(WDOG_DISABLE_ON_PANIC) 810 wdog_disable(); 811 #endif 812 813 /* 814 * Enter the debugger or fall through & dump. Entering the 815 * debugger will stop cpus. If not entering the debugger stop 816 * cpus here. 817 */ 818 #if defined(DDB) 819 if (newpanic && trace_on_panic) 820 print_backtrace(-1); 821 if (debugger_on_panic) 822 Debugger("panic"); 823 else 824 #endif 825 #ifdef SMP 826 if (newpanic) 827 stop_cpus(mycpu->gd_other_cpus); 828 #else 829 ; 830 #endif 831 boot(bootopt); 832 } 833 834 /* 835 * Support for poweroff delay. 836 */ 837 #ifndef POWEROFF_DELAY 838 # define POWEROFF_DELAY 5000 839 #endif 840 static int poweroff_delay = POWEROFF_DELAY; 841 842 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 843 &poweroff_delay, 0, ""); 844 845 static void 846 poweroff_wait(void *junk, int howto) 847 { 848 if(!(howto & RB_POWEROFF) || poweroff_delay <= 0) 849 return; 850 DELAY(poweroff_delay * 1000); 851 } 852 853 /* 854 * Some system processes (e.g. syncer) need to be stopped at appropriate 855 * points in their main loops prior to a system shutdown, so that they 856 * won't interfere with the shutdown process (e.g. by holding a disk buf 857 * to cause sync to fail). For each of these system processes, register 858 * shutdown_kproc() as a handler for one of shutdown events. 859 */ 860 static int kproc_shutdown_wait = 60; 861 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 862 &kproc_shutdown_wait, 0, ""); 863 864 void 865 shutdown_kproc(void *arg, int howto) 866 { 867 struct thread *td; 868 struct proc *p; 869 int error; 870 871 if (panicstr) 872 return; 873 874 td = (struct thread *)arg; 875 if ((p = td->td_proc) != NULL) { 876 kprintf("Waiting (max %d seconds) for system process `%s' to stop...", 877 kproc_shutdown_wait, p->p_comm); 878 } else { 879 kprintf("Waiting (max %d seconds) for system thread %s to stop...", 880 kproc_shutdown_wait, td->td_comm); 881 } 882 error = suspend_kproc(td, kproc_shutdown_wait * hz); 883 884 if (error == EWOULDBLOCK) 885 kprintf("timed out\n"); 886 else 887 kprintf("stopped\n"); 888 } 889 890 /* Registration of dumpers */ 891 int 892 set_dumper(struct dumperinfo *di) 893 { 894 if (di == NULL) { 895 bzero(&dumper, sizeof(dumper)); 896 return 0; 897 } 898 899 if (dumper.dumper != NULL) 900 return (EBUSY); 901 902 dumper = *di; 903 return 0; 904 } 905 906 void 907 dumpsys(void) 908 { 909 #if defined (_KERNEL_VIRTUAL) 910 /* VKERNELs don't support dumps */ 911 kprintf("VKERNEL doesn't support dumps\n"); 912 return; 913 #endif 914 /* 915 * If there is a dumper registered and we aren't dumping already, call 916 * the machine dependent dumpsys (md_dumpsys) to do the hard work. 917 * 918 * XXX: while right now the md_dumpsys() of x86 and x86_64 could be 919 * factored out completely into here, I rather keep them machine 920 * dependent in case we ever add a platform which does not share 921 * the same dumpsys() code, such as arm. 922 */ 923 if (dumper.dumper != NULL && !dumping) { 924 dumping++; 925 md_dumpsys(&dumper); 926 } 927 } 928 929 int dump_stop_usertds = 0; 930 931 #ifdef SMP 932 static 933 void 934 need_user_resched_remote(void *dummy) 935 { 936 need_user_resched(); 937 } 938 #endif 939 940 void 941 dump_reactivate_cpus(void) 942 { 943 #ifdef SMP 944 globaldata_t gd; 945 int cpu, seq; 946 #endif 947 948 dump_stop_usertds = 1; 949 950 need_user_resched(); 951 952 #ifdef SMP 953 for (cpu = 0; cpu < ncpus; cpu++) { 954 gd = globaldata_find(cpu); 955 seq = lwkt_send_ipiq(gd, need_user_resched_remote, NULL); 956 lwkt_wait_ipiq(gd, seq); 957 } 958 959 restart_cpus(stopped_cpus); 960 #endif 961 } 962