1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 */ 39 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 /* 44 * 386 Trap and System call handling 45 */ 46 47 #include "opt_clock.h" 48 #include "opt_cpu.h" 49 #include "opt_hwpmc_hooks.h" 50 #include "opt_isa.h" 51 #include "opt_kdb.h" 52 #include "opt_kdtrace.h" 53 #include "opt_ktrace.h" 54 #include "opt_npx.h" 55 #include "opt_trap.h" 56 57 #include <sys/param.h> 58 #include <sys/bus.h> 59 #include <sys/systm.h> 60 #include <sys/proc.h> 61 #include <sys/pioctl.h> 62 #include <sys/ptrace.h> 63 #include <sys/kdb.h> 64 #include <sys/kernel.h> 65 #include <sys/ktr.h> 66 #include <sys/lock.h> 67 #include <sys/mutex.h> 68 #include <sys/resourcevar.h> 69 #include <sys/signalvar.h> 70 #include <sys/syscall.h> 71 #include <sys/sysctl.h> 72 #include <sys/sysent.h> 73 #include <sys/uio.h> 74 #include <sys/vmmeter.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 #ifdef HWPMC_HOOKS 79 #include <sys/pmckern.h> 80 #endif 81 #include <security/audit/audit.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_param.h> 85 #include <vm/pmap.h> 86 #include <vm/vm_kern.h> 87 #include <vm/vm_map.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_extern.h> 90 91 #include <machine/cpu.h> 92 #include <machine/intr_machdep.h> 93 #include <machine/md_var.h> 94 #include <machine/pcb.h> 95 #ifdef SMP 96 #include <machine/smp.h> 97 #endif 98 #include <machine/tss.h> 99 #include <machine/vm86.h> 100 101 #ifdef POWERFAIL_NMI 102 #include <sys/syslog.h> 103 #include <machine/clock.h> 104 #endif 105 106 #ifdef KDTRACE_HOOKS 107 #include <sys/dtrace_bsd.h> 108 109 /* 110 * This is a hook which is initialised by the dtrace module 111 * to handle traps which might occur during DTrace probe 112 * execution. 113 */ 114 dtrace_trap_func_t dtrace_trap_func; 115 116 dtrace_doubletrap_func_t dtrace_doubletrap_func; 117 118 /* 119 * This is a hook which is initialised by the systrace module 120 * when it is loaded. This keeps the DTrace syscall provider 121 * implementation opaque. 122 */ 123 systrace_probe_func_t systrace_probe_func; 124 #endif 125 126 extern void trap(struct trapframe *frame); 127 extern void syscall(struct trapframe *frame); 128 129 static int trap_pfault(struct trapframe *, int, vm_offset_t); 130 static void trap_fatal(struct trapframe *, vm_offset_t); 131 void dblfault_handler(void); 132 133 extern inthand_t IDTVEC(lcall_syscall); 134 135 #define MAX_TRAP_MSG 30 136 static char *trap_msg[] = { 137 "", /* 0 unused */ 138 "privileged instruction fault", /* 1 T_PRIVINFLT */ 139 "", /* 2 unused */ 140 "breakpoint instruction fault", /* 3 T_BPTFLT */ 141 "", /* 4 unused */ 142 "", /* 5 unused */ 143 "arithmetic trap", /* 6 T_ARITHTRAP */ 144 "", /* 7 unused */ 145 "", /* 8 unused */ 146 "general protection fault", /* 9 T_PROTFLT */ 147 "trace trap", /* 10 T_TRCTRAP */ 148 "", /* 11 unused */ 149 "page fault", /* 12 T_PAGEFLT */ 150 "", /* 13 unused */ 151 "alignment fault", /* 14 T_ALIGNFLT */ 152 "", /* 15 unused */ 153 "", /* 16 unused */ 154 "", /* 17 unused */ 155 "integer divide fault", /* 18 T_DIVIDE */ 156 "non-maskable interrupt trap", /* 19 T_NMI */ 157 "overflow trap", /* 20 T_OFLOW */ 158 "FPU bounds check fault", /* 21 T_BOUND */ 159 "FPU device not available", /* 22 T_DNA */ 160 "double fault", /* 23 T_DOUBLEFLT */ 161 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 162 "invalid TSS fault", /* 25 T_TSSFLT */ 163 "segment not present fault", /* 26 T_SEGNPFLT */ 164 "stack fault", /* 27 T_STKFLT */ 165 "machine check trap", /* 28 T_MCHK */ 166 "SIMD floating-point exception", /* 29 T_XMMFLT */ 167 "reserved (unknown) fault", /* 30 T_RESERVED */ 168 }; 169 170 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 171 extern int has_f00f_bug; 172 #endif 173 174 #ifdef KDB 175 static int kdb_on_nmi = 1; 176 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW, 177 &kdb_on_nmi, 0, "Go to KDB on NMI"); 178 #endif 179 static int panic_on_nmi = 1; 180 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, 181 &panic_on_nmi, 0, "Panic on NMI"); 182 static int prot_fault_translation = 0; 183 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, 184 &prot_fault_translation, 0, "Select signal to deliver on protection fault"); 185 186 extern char *syscallnames[]; 187 188 /* 189 * Exception, fault, and trap interface to the FreeBSD kernel. 190 * This common code is called from assembly language IDT gate entry 191 * routines that prepare a suitable stack frame, and restore this 192 * frame after the exception has been processed. 193 */ 194 195 void 196 trap(struct trapframe *frame) 197 { 198 struct thread *td = curthread; 199 struct proc *p = td->td_proc; 200 int i = 0, ucode = 0, code; 201 u_int type; 202 register_t addr = 0; 203 vm_offset_t eva; 204 ksiginfo_t ksi; 205 #ifdef POWERFAIL_NMI 206 static int lastalert = 0; 207 #endif 208 209 PCPU_INC(cnt.v_trap); 210 type = frame->tf_trapno; 211 212 #ifdef SMP 213 #ifdef STOP_NMI 214 /* Handler for NMI IPIs used for stopping CPUs. */ 215 if (type == T_NMI) { 216 if (ipi_nmi_handler() == 0) 217 goto out; 218 } 219 #endif /* STOP_NMI */ 220 #endif /* SMP */ 221 222 #ifdef KDB 223 if (kdb_active) { 224 kdb_reenter(); 225 goto out; 226 } 227 #endif 228 229 #ifdef HWPMC_HOOKS 230 /* 231 * CPU PMCs interrupt using an NMI so we check for that first. 232 * If the HWPMC module is active, 'pmc_hook' will point to 233 * the function to be called. A return value of '1' from the 234 * hook means that the NMI was handled by it and that we can 235 * return immediately. 236 */ 237 if (type == T_NMI && pmc_intr && 238 (*pmc_intr)(PCPU_GET(cpuid), frame)) 239 goto out; 240 #endif 241 242 #ifdef KDTRACE_HOOKS 243 /* 244 * A trap can occur while DTrace executes a probe. Before 245 * executing the probe, DTrace blocks re-scheduling and sets 246 * a flag in it's per-cpu flags to indicate that it doesn't 247 * want to fault. On returning from the the probe, the no-fault 248 * flag is cleared and finally re-scheduling is enabled. 249 * 250 * If the DTrace kernel module has registered a trap handler, 251 * call it and if it returns non-zero, assume that it has 252 * handled the trap and modified the trap frame so that this 253 * function can return normally. 254 */ 255 if ((type == T_PROTFLT || type == T_PAGEFLT) && 256 dtrace_trap_func != NULL) 257 if ((*dtrace_trap_func)(frame, type)) 258 goto out; 259 #endif 260 261 if ((frame->tf_eflags & PSL_I) == 0) { 262 /* 263 * Buggy application or kernel code has disabled 264 * interrupts and then trapped. Enabling interrupts 265 * now is wrong, but it is better than running with 266 * interrupts disabled until they are accidentally 267 * enabled later. 268 */ 269 if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM)) 270 printf( 271 "pid %ld (%s): trap %d with interrupts disabled\n", 272 (long)curproc->p_pid, curthread->td_name, type); 273 else if (type != T_BPTFLT && type != T_TRCTRAP && 274 frame->tf_eip != (int)cpu_switch_load_gs) { 275 /* 276 * XXX not quite right, since this may be for a 277 * multiple fault in user mode. 278 */ 279 printf("kernel trap %d with interrupts disabled\n", 280 type); 281 /* 282 * Page faults need interrupts disabled until later, 283 * and we shouldn't enable interrupts while holding 284 * a spin lock or if servicing an NMI. 285 */ 286 if (type != T_NMI && type != T_PAGEFLT && 287 td->td_md.md_spinlock_count == 0) 288 enable_intr(); 289 } 290 } 291 eva = 0; 292 code = frame->tf_err; 293 if (type == T_PAGEFLT) { 294 /* 295 * For some Cyrix CPUs, %cr2 is clobbered by 296 * interrupts. This problem is worked around by using 297 * an interrupt gate for the pagefault handler. We 298 * are finally ready to read %cr2 and then must 299 * reenable interrupts. 300 * 301 * If we get a page fault while in a critical section, then 302 * it is most likely a fatal kernel page fault. The kernel 303 * is already going to panic trying to get a sleep lock to 304 * do the VM lookup, so just consider it a fatal trap so the 305 * kernel can print out a useful trap message and even get 306 * to the debugger. 307 * 308 * If we get a page fault while holding a non-sleepable 309 * lock, then it is most likely a fatal kernel page fault. 310 * If WITNESS is enabled, then it's going to whine about 311 * bogus LORs with various VM locks, so just skip to the 312 * fatal trap handling directly. 313 */ 314 eva = rcr2(); 315 if (td->td_critnest != 0 || 316 WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, 317 "Kernel page fault") != 0) 318 trap_fatal(frame, eva); 319 else 320 enable_intr(); 321 } 322 323 if ((ISPL(frame->tf_cs) == SEL_UPL) || 324 ((frame->tf_eflags & PSL_VM) && 325 !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) { 326 /* user trap */ 327 328 td->td_pticks = 0; 329 td->td_frame = frame; 330 addr = frame->tf_eip; 331 if (td->td_ucred != p->p_ucred) 332 cred_update_thread(td); 333 334 switch (type) { 335 case T_PRIVINFLT: /* privileged instruction fault */ 336 i = SIGILL; 337 ucode = ILL_PRVOPC; 338 break; 339 340 case T_BPTFLT: /* bpt instruction fault */ 341 case T_TRCTRAP: /* trace trap */ 342 enable_intr(); 343 frame->tf_eflags &= ~PSL_T; 344 i = SIGTRAP; 345 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); 346 break; 347 348 case T_ARITHTRAP: /* arithmetic trap */ 349 #ifdef DEV_NPX 350 ucode = npxtrap(); 351 if (ucode == -1) 352 goto userout; 353 #else 354 ucode = 0; 355 #endif 356 i = SIGFPE; 357 break; 358 359 /* 360 * The following two traps can happen in 361 * vm86 mode, and, if so, we want to handle 362 * them specially. 363 */ 364 case T_PROTFLT: /* general protection fault */ 365 case T_STKFLT: /* stack fault */ 366 if (frame->tf_eflags & PSL_VM) { 367 i = vm86_emulate((struct vm86frame *)frame); 368 if (i == 0) 369 goto user; 370 break; 371 } 372 i = SIGBUS; 373 ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; 374 break; 375 case T_SEGNPFLT: /* segment not present fault */ 376 i = SIGBUS; 377 ucode = BUS_ADRERR; 378 break; 379 case T_TSSFLT: /* invalid TSS fault */ 380 i = SIGBUS; 381 ucode = BUS_OBJERR; 382 break; 383 case T_DOUBLEFLT: /* double fault */ 384 default: 385 i = SIGBUS; 386 ucode = BUS_OBJERR; 387 break; 388 389 case T_PAGEFLT: /* page fault */ 390 391 i = trap_pfault(frame, TRUE, eva); 392 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 393 if (i == -2) { 394 /* 395 * The f00f hack workaround has triggered, so 396 * treat the fault as an illegal instruction 397 * (T_PRIVINFLT) instead of a page fault. 398 */ 399 type = frame->tf_trapno = T_PRIVINFLT; 400 401 /* Proceed as in that case. */ 402 ucode = ILL_PRVOPC; 403 i = SIGILL; 404 break; 405 } 406 #endif 407 if (i == -1) 408 goto userout; 409 if (i == 0) 410 goto user; 411 412 if (i == SIGSEGV) 413 ucode = SEGV_MAPERR; 414 else { 415 if (prot_fault_translation == 0) { 416 /* 417 * Autodetect. 418 * This check also covers the images 419 * without the ABI-tag ELF note. 420 */ 421 if (p->p_osrel >= 700004) { 422 i = SIGSEGV; 423 ucode = SEGV_ACCERR; 424 } else { 425 i = SIGBUS; 426 ucode = BUS_PAGE_FAULT; 427 } 428 } else if (prot_fault_translation == 1) { 429 /* 430 * Always compat mode. 431 */ 432 i = SIGBUS; 433 ucode = BUS_PAGE_FAULT; 434 } else { 435 /* 436 * Always SIGSEGV mode. 437 */ 438 i = SIGSEGV; 439 ucode = SEGV_ACCERR; 440 } 441 } 442 addr = eva; 443 break; 444 445 case T_DIVIDE: /* integer divide fault */ 446 ucode = FPE_INTDIV; 447 i = SIGFPE; 448 break; 449 450 #ifdef DEV_ISA 451 case T_NMI: 452 #ifdef POWERFAIL_NMI 453 #ifndef TIMER_FREQ 454 # define TIMER_FREQ 1193182 455 #endif 456 if (time_second - lastalert > 10) { 457 log(LOG_WARNING, "NMI: power fail\n"); 458 sysbeep(880, hz); 459 lastalert = time_second; 460 } 461 goto userout; 462 #else /* !POWERFAIL_NMI */ 463 /* machine/parity/power fail/"kitchen sink" faults */ 464 /* XXX Giant */ 465 if (isa_nmi(code) == 0) { 466 #ifdef KDB 467 /* 468 * NMI can be hooked up to a pushbutton 469 * for debugging. 470 */ 471 if (kdb_on_nmi) { 472 printf ("NMI ... going to debugger\n"); 473 kdb_trap(type, 0, frame); 474 } 475 #endif /* KDB */ 476 goto userout; 477 } else if (panic_on_nmi) 478 panic("NMI indicates hardware failure"); 479 break; 480 #endif /* POWERFAIL_NMI */ 481 #endif /* DEV_ISA */ 482 483 case T_OFLOW: /* integer overflow fault */ 484 ucode = FPE_INTOVF; 485 i = SIGFPE; 486 break; 487 488 case T_BOUND: /* bounds check fault */ 489 ucode = FPE_FLTSUB; 490 i = SIGFPE; 491 break; 492 493 case T_DNA: 494 #ifdef DEV_NPX 495 /* transparent fault (due to context switch "late") */ 496 if (npxdna()) 497 goto userout; 498 #endif 499 printf("pid %d killed due to lack of floating point\n", 500 p->p_pid); 501 i = SIGKILL; 502 ucode = 0; 503 break; 504 505 case T_FPOPFLT: /* FPU operand fetch fault */ 506 ucode = ILL_COPROC; 507 i = SIGILL; 508 break; 509 510 case T_XMMFLT: /* SIMD floating-point exception */ 511 ucode = 0; /* XXX */ 512 i = SIGFPE; 513 break; 514 } 515 } else { 516 /* kernel trap */ 517 518 KASSERT(cold || td->td_ucred != NULL, 519 ("kernel trap doesn't have ucred")); 520 switch (type) { 521 case T_PAGEFLT: /* page fault */ 522 (void) trap_pfault(frame, FALSE, eva); 523 goto out; 524 525 case T_DNA: 526 #ifdef DEV_NPX 527 /* 528 * The kernel is apparently using npx for copying. 529 * XXX this should be fatal unless the kernel has 530 * registered such use. 531 */ 532 if (npxdna()) 533 goto out; 534 #endif 535 break; 536 537 /* 538 * The following two traps can happen in 539 * vm86 mode, and, if so, we want to handle 540 * them specially. 541 */ 542 case T_PROTFLT: /* general protection fault */ 543 case T_STKFLT: /* stack fault */ 544 if (frame->tf_eflags & PSL_VM) { 545 i = vm86_emulate((struct vm86frame *)frame); 546 if (i != 0) 547 /* 548 * returns to original process 549 */ 550 vm86_trap((struct vm86frame *)frame); 551 goto out; 552 } 553 if (type == T_STKFLT) 554 break; 555 556 /* FALL THROUGH */ 557 558 case T_SEGNPFLT: /* segment not present fault */ 559 if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL) 560 break; 561 562 /* 563 * Invalid %fs's and %gs's can be created using 564 * procfs or PT_SETREGS or by invalidating the 565 * underlying LDT entry. This causes a fault 566 * in kernel mode when the kernel attempts to 567 * switch contexts. Lose the bad context 568 * (XXX) so that we can continue, and generate 569 * a signal. 570 */ 571 if (frame->tf_eip == (int)cpu_switch_load_gs) { 572 PCPU_GET(curpcb)->pcb_gs = 0; 573 #if 0 574 PROC_LOCK(p); 575 psignal(p, SIGBUS); 576 PROC_UNLOCK(p); 577 #endif 578 goto out; 579 } 580 581 if (td->td_intr_nesting_level != 0) 582 break; 583 584 /* 585 * Invalid segment selectors and out of bounds 586 * %eip's and %esp's can be set up in user mode. 587 * This causes a fault in kernel mode when the 588 * kernel tries to return to user mode. We want 589 * to get this fault so that we can fix the 590 * problem here and not have to check all the 591 * selectors and pointers when the user changes 592 * them. 593 */ 594 if (frame->tf_eip == (int)doreti_iret) { 595 frame->tf_eip = (int)doreti_iret_fault; 596 goto out; 597 } 598 if (frame->tf_eip == (int)doreti_popl_ds) { 599 frame->tf_eip = (int)doreti_popl_ds_fault; 600 goto out; 601 } 602 if (frame->tf_eip == (int)doreti_popl_es) { 603 frame->tf_eip = (int)doreti_popl_es_fault; 604 goto out; 605 } 606 if (frame->tf_eip == (int)doreti_popl_fs) { 607 frame->tf_eip = (int)doreti_popl_fs_fault; 608 goto out; 609 } 610 if (PCPU_GET(curpcb)->pcb_onfault != NULL) { 611 frame->tf_eip = 612 (int)PCPU_GET(curpcb)->pcb_onfault; 613 goto out; 614 } 615 break; 616 617 case T_TSSFLT: 618 /* 619 * PSL_NT can be set in user mode and isn't cleared 620 * automatically when the kernel is entered. This 621 * causes a TSS fault when the kernel attempts to 622 * `iret' because the TSS link is uninitialized. We 623 * want to get this fault so that we can fix the 624 * problem here and not every time the kernel is 625 * entered. 626 */ 627 if (frame->tf_eflags & PSL_NT) { 628 frame->tf_eflags &= ~PSL_NT; 629 goto out; 630 } 631 break; 632 633 case T_TRCTRAP: /* trace trap */ 634 if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) { 635 /* 636 * We've just entered system mode via the 637 * syscall lcall. Continue single stepping 638 * silently until the syscall handler has 639 * saved the flags. 640 */ 641 goto out; 642 } 643 if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) { 644 /* 645 * The syscall handler has now saved the 646 * flags. Stop single stepping it. 647 */ 648 frame->tf_eflags &= ~PSL_T; 649 goto out; 650 } 651 /* 652 * Ignore debug register trace traps due to 653 * accesses in the user's address space, which 654 * can happen under several conditions such as 655 * if a user sets a watchpoint on a buffer and 656 * then passes that buffer to a system call. 657 * We still want to get TRCTRAPS for addresses 658 * in kernel space because that is useful when 659 * debugging the kernel. 660 */ 661 /* XXX Giant */ 662 if (user_dbreg_trap() && 663 !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) { 664 /* 665 * Reset breakpoint bits because the 666 * processor doesn't 667 */ 668 load_dr6(rdr6() & 0xfffffff0); 669 goto out; 670 } 671 /* 672 * FALLTHROUGH (TRCTRAP kernel mode, kernel address) 673 */ 674 case T_BPTFLT: 675 /* 676 * If KDB is enabled, let it handle the debugger trap. 677 * Otherwise, debugger traps "can't happen". 678 */ 679 #ifdef KDB 680 if (kdb_trap(type, 0, frame)) 681 goto out; 682 #endif 683 break; 684 685 #ifdef DEV_ISA 686 case T_NMI: 687 #ifdef POWERFAIL_NMI 688 if (time_second - lastalert > 10) { 689 log(LOG_WARNING, "NMI: power fail\n"); 690 sysbeep(880, hz); 691 lastalert = time_second; 692 } 693 goto out; 694 #else /* !POWERFAIL_NMI */ 695 /* XXX Giant */ 696 /* machine/parity/power fail/"kitchen sink" faults */ 697 if (isa_nmi(code) == 0) { 698 #ifdef KDB 699 /* 700 * NMI can be hooked up to a pushbutton 701 * for debugging. 702 */ 703 if (kdb_on_nmi) { 704 printf ("NMI ... going to debugger\n"); 705 kdb_trap(type, 0, frame); 706 } 707 #endif /* KDB */ 708 goto out; 709 } else if (panic_on_nmi == 0) 710 goto out; 711 /* FALLTHROUGH */ 712 #endif /* POWERFAIL_NMI */ 713 #endif /* DEV_ISA */ 714 } 715 716 trap_fatal(frame, eva); 717 goto out; 718 } 719 720 /* Translate fault for emulators (e.g. Linux) */ 721 if (*p->p_sysent->sv_transtrap) 722 i = (*p->p_sysent->sv_transtrap)(i, type); 723 724 ksiginfo_init_trap(&ksi); 725 ksi.ksi_signo = i; 726 ksi.ksi_code = ucode; 727 ksi.ksi_addr = (void *)addr; 728 ksi.ksi_trapno = type; 729 trapsignal(td, &ksi); 730 731 #ifdef DEBUG 732 if (type <= MAX_TRAP_MSG) { 733 uprintf("fatal process exception: %s", 734 trap_msg[type]); 735 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 736 uprintf(", fault VA = 0x%lx", (u_long)eva); 737 uprintf("\n"); 738 } 739 #endif 740 741 user: 742 userret(td, frame); 743 mtx_assert(&Giant, MA_NOTOWNED); 744 userout: 745 out: 746 return; 747 } 748 749 static int 750 trap_pfault(frame, usermode, eva) 751 struct trapframe *frame; 752 int usermode; 753 vm_offset_t eva; 754 { 755 vm_offset_t va; 756 struct vmspace *vm = NULL; 757 vm_map_t map; 758 int rv = 0; 759 vm_prot_t ftype; 760 struct thread *td = curthread; 761 struct proc *p = td->td_proc; 762 763 va = trunc_page(eva); 764 if (va >= KERNBASE) { 765 /* 766 * Don't allow user-mode faults in kernel address space. 767 * An exception: if the faulting address is the invalid 768 * instruction entry in the IDT, then the Intel Pentium 769 * F00F bug workaround was triggered, and we need to 770 * treat it is as an illegal instruction, and not a page 771 * fault. 772 */ 773 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 774 if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) 775 return -2; 776 #endif 777 if (usermode) 778 goto nogo; 779 780 map = kernel_map; 781 } else { 782 /* 783 * This is a fault on non-kernel virtual memory. 784 * vm is initialized above to NULL. If curproc is NULL 785 * or curproc->p_vmspace is NULL the fault is fatal. 786 */ 787 if (p != NULL) 788 vm = p->p_vmspace; 789 790 if (vm == NULL) 791 goto nogo; 792 793 map = &vm->vm_map; 794 } 795 796 /* 797 * PGEX_I is defined only if the execute disable bit capability is 798 * supported and enabled. 799 */ 800 if (frame->tf_err & PGEX_W) 801 ftype = VM_PROT_WRITE; 802 #ifdef PAE 803 else if ((frame->tf_err & PGEX_I) && pg_nx != 0) 804 ftype = VM_PROT_EXECUTE; 805 #endif 806 else 807 ftype = VM_PROT_READ; 808 809 if (map != kernel_map) { 810 /* 811 * Keep swapout from messing with us during this 812 * critical time. 813 */ 814 PROC_LOCK(p); 815 ++p->p_lock; 816 PROC_UNLOCK(p); 817 818 /* Fault in the user page: */ 819 rv = vm_fault(map, va, ftype, 820 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY 821 : VM_FAULT_NORMAL); 822 823 PROC_LOCK(p); 824 --p->p_lock; 825 PROC_UNLOCK(p); 826 } else { 827 /* 828 * Don't have to worry about process locking or stacks in the 829 * kernel. 830 */ 831 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 832 } 833 if (rv == KERN_SUCCESS) 834 return (0); 835 nogo: 836 if (!usermode) { 837 if (td->td_intr_nesting_level == 0 && 838 PCPU_GET(curpcb)->pcb_onfault != NULL) { 839 frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; 840 return (0); 841 } 842 trap_fatal(frame, eva); 843 return (-1); 844 } 845 846 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 847 } 848 849 static void 850 trap_fatal(frame, eva) 851 struct trapframe *frame; 852 vm_offset_t eva; 853 { 854 int code, ss, esp; 855 u_int type; 856 struct soft_segment_descriptor softseg; 857 char *msg; 858 859 code = frame->tf_err; 860 type = frame->tf_trapno; 861 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 862 863 if (type <= MAX_TRAP_MSG) 864 msg = trap_msg[type]; 865 else 866 msg = "UNKNOWN"; 867 printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, 868 frame->tf_eflags & PSL_VM ? "vm86" : 869 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 870 #ifdef SMP 871 /* two separate prints in case of a trap on an unmapped page */ 872 printf("cpuid = %d; ", PCPU_GET(cpuid)); 873 printf("apic id = %02x\n", PCPU_GET(apic_id)); 874 #endif 875 if (type == T_PAGEFLT) { 876 printf("fault virtual address = 0x%x\n", eva); 877 printf("fault code = %s %s, %s\n", 878 code & PGEX_U ? "user" : "supervisor", 879 code & PGEX_W ? "write" : "read", 880 code & PGEX_P ? "protection violation" : "page not present"); 881 } 882 printf("instruction pointer = 0x%x:0x%x\n", 883 frame->tf_cs & 0xffff, frame->tf_eip); 884 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { 885 ss = frame->tf_ss & 0xffff; 886 esp = frame->tf_esp; 887 } else { 888 ss = GSEL(GDATA_SEL, SEL_KPL); 889 esp = (int)&frame->tf_esp; 890 } 891 printf("stack pointer = 0x%x:0x%x\n", ss, esp); 892 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); 893 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 894 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 895 printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 896 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, 897 softseg.ssd_gran); 898 printf("processor eflags = "); 899 if (frame->tf_eflags & PSL_T) 900 printf("trace trap, "); 901 if (frame->tf_eflags & PSL_I) 902 printf("interrupt enabled, "); 903 if (frame->tf_eflags & PSL_NT) 904 printf("nested task, "); 905 if (frame->tf_eflags & PSL_RF) 906 printf("resume, "); 907 if (frame->tf_eflags & PSL_VM) 908 printf("vm86, "); 909 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 910 printf("current process = "); 911 if (curproc) { 912 printf("%lu (%s)\n", (u_long)curproc->p_pid, curthread->td_name); 913 } else { 914 printf("Idle\n"); 915 } 916 917 #ifdef KDB 918 if (debugger_on_panic || kdb_active) { 919 frame->tf_err = eva; /* smuggle fault address to ddb */ 920 if (kdb_trap(type, 0, frame)) { 921 frame->tf_err = code; /* restore error code */ 922 return; 923 } 924 frame->tf_err = code; /* restore error code */ 925 } 926 #endif 927 printf("trap number = %d\n", type); 928 if (type <= MAX_TRAP_MSG) 929 panic("%s", trap_msg[type]); 930 else 931 panic("unknown/reserved trap"); 932 } 933 934 /* 935 * Double fault handler. Called when a fault occurs while writing 936 * a frame for a trap/exception onto the stack. This usually occurs 937 * when the stack overflows (such is the case with infinite recursion, 938 * for example). 939 * 940 * XXX Note that the current PTD gets replaced by IdlePTD when the 941 * task switch occurs. This means that the stack that was active at 942 * the time of the double fault is not available at <kstack> unless 943 * the machine was idle when the double fault occurred. The downside 944 * of this is that "trace <ebp>" in ddb won't work. 945 */ 946 void 947 dblfault_handler() 948 { 949 #ifdef KDTRACE_HOOKS 950 if (dtrace_doubletrap_func != NULL) 951 (*dtrace_doubletrap_func)(); 952 #endif 953 printf("\nFatal double fault:\n"); 954 printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip)); 955 printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp)); 956 printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp)); 957 #ifdef SMP 958 /* two separate prints in case of a trap on an unmapped page */ 959 printf("cpuid = %d; ", PCPU_GET(cpuid)); 960 printf("apic id = %02x\n", PCPU_GET(apic_id)); 961 #endif 962 panic("double fault"); 963 } 964 965 /* 966 * syscall - system call request C handler 967 * 968 * A system call is essentially treated as a trap. 969 */ 970 void 971 syscall(struct trapframe *frame) 972 { 973 caddr_t params; 974 struct sysent *callp; 975 struct thread *td = curthread; 976 struct proc *p = td->td_proc; 977 register_t orig_tf_eflags; 978 int error; 979 int narg; 980 int args[8]; 981 u_int code; 982 ksiginfo_t ksi; 983 984 PCPU_INC(cnt.v_syscall); 985 986 #ifdef DIAGNOSTIC 987 if (ISPL(frame->tf_cs) != SEL_UPL) { 988 panic("syscall"); 989 /* NOT REACHED */ 990 } 991 #endif 992 993 td->td_pticks = 0; 994 td->td_frame = frame; 995 if (td->td_ucred != p->p_ucred) 996 cred_update_thread(td); 997 params = (caddr_t)frame->tf_esp + sizeof(int); 998 code = frame->tf_eax; 999 orig_tf_eflags = frame->tf_eflags; 1000 1001 if (p->p_sysent->sv_prepsyscall) { 1002 /* 1003 * The prep code is MP aware. 1004 */ 1005 (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms); 1006 } else { 1007 /* 1008 * Need to check if this is a 32 bit or 64 bit syscall. 1009 * fuword is MP aware. 1010 */ 1011 if (code == SYS_syscall) { 1012 /* 1013 * Code is first argument, followed by actual args. 1014 */ 1015 code = fuword(params); 1016 params += sizeof(int); 1017 } else if (code == SYS___syscall) { 1018 /* 1019 * Like syscall, but code is a quad, so as to maintain 1020 * quad alignment for the rest of the arguments. 1021 */ 1022 code = fuword(params); 1023 params += sizeof(quad_t); 1024 } 1025 } 1026 1027 if (p->p_sysent->sv_mask) 1028 code &= p->p_sysent->sv_mask; 1029 1030 if (code >= p->p_sysent->sv_size) 1031 callp = &p->p_sysent->sv_table[0]; 1032 else 1033 callp = &p->p_sysent->sv_table[code]; 1034 1035 narg = callp->sy_narg; 1036 1037 /* 1038 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware 1039 */ 1040 if (params != NULL && narg != 0) 1041 error = copyin(params, (caddr_t)args, 1042 (u_int)(narg * sizeof(int))); 1043 else 1044 error = 0; 1045 1046 #ifdef KTRACE 1047 if (KTRPOINT(td, KTR_SYSCALL)) 1048 ktrsyscall(code, narg, args); 1049 #endif 1050 1051 CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td, 1052 td->td_proc->p_pid, td->td_name, code); 1053 1054 td->td_syscalls++; 1055 1056 if (error == 0) { 1057 td->td_retval[0] = 0; 1058 td->td_retval[1] = frame->tf_edx; 1059 1060 STOPEVENT(p, S_SCE, narg); 1061 1062 PTRACESTOP_SC(p, td, S_PT_SCE); 1063 1064 #ifdef KDTRACE_HOOKS 1065 /* 1066 * If the systrace module has registered it's probe 1067 * callback and if there is a probe active for the 1068 * syscall 'entry', process the probe. 1069 */ 1070 if (systrace_probe_func != NULL && callp->sy_entry != 0) 1071 (*systrace_probe_func)(callp->sy_entry, code, callp, 1072 args); 1073 #endif 1074 1075 AUDIT_SYSCALL_ENTER(code, td); 1076 error = (*callp->sy_call)(td, args); 1077 AUDIT_SYSCALL_EXIT(error, td); 1078 1079 /* Save the latest error return value. */ 1080 td->td_errno = error; 1081 1082 #ifdef KDTRACE_HOOKS 1083 /* 1084 * If the systrace module has registered it's probe 1085 * callback and if there is a probe active for the 1086 * syscall 'return', process the probe. 1087 */ 1088 if (systrace_probe_func != NULL && callp->sy_return != 0) 1089 (*systrace_probe_func)(callp->sy_return, code, callp, 1090 args); 1091 #endif 1092 } 1093 1094 switch (error) { 1095 case 0: 1096 frame->tf_eax = td->td_retval[0]; 1097 frame->tf_edx = td->td_retval[1]; 1098 frame->tf_eflags &= ~PSL_C; 1099 break; 1100 1101 case ERESTART: 1102 /* 1103 * Reconstruct pc, assuming lcall $X,y is 7 bytes, 1104 * int 0x80 is 2 bytes. We saved this in tf_err. 1105 */ 1106 frame->tf_eip -= frame->tf_err; 1107 break; 1108 1109 case EJUSTRETURN: 1110 break; 1111 1112 default: 1113 if (p->p_sysent->sv_errsize) { 1114 if (error >= p->p_sysent->sv_errsize) 1115 error = -1; /* XXX */ 1116 else 1117 error = p->p_sysent->sv_errtbl[error]; 1118 } 1119 frame->tf_eax = error; 1120 frame->tf_eflags |= PSL_C; 1121 break; 1122 } 1123 1124 /* 1125 * Traced syscall. 1126 */ 1127 if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { 1128 frame->tf_eflags &= ~PSL_T; 1129 ksiginfo_init_trap(&ksi); 1130 ksi.ksi_signo = SIGTRAP; 1131 ksi.ksi_code = TRAP_TRACE; 1132 ksi.ksi_addr = (void *)frame->tf_eip; 1133 trapsignal(td, &ksi); 1134 } 1135 1136 /* 1137 * Check for misbehavior. 1138 */ 1139 WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", 1140 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); 1141 KASSERT(td->td_critnest == 0, 1142 ("System call %s returning in a critical section", 1143 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???")); 1144 KASSERT(td->td_locks == 0, 1145 ("System call %s returning with %d locks held", 1146 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???", 1147 td->td_locks)); 1148 1149 /* 1150 * Handle reschedule and other end-of-syscall issues 1151 */ 1152 userret(td, frame); 1153 1154 CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td, 1155 td->td_proc->p_pid, td->td_name, code); 1156 1157 #ifdef KTRACE 1158 if (KTRPOINT(td, KTR_SYSRET)) 1159 ktrsysret(code, error, td->td_retval[0]); 1160 #endif 1161 1162 /* 1163 * This works because errno is findable through the 1164 * register set. If we ever support an emulation where this 1165 * is not the case, this code will need to be revisited. 1166 */ 1167 STOPEVENT(p, S_SCX, code); 1168 1169 PTRACESTOP_SC(p, td, S_PT_SCX); 1170 } 1171 1172