1 /* $NetBSD: trap.c,v 1.260 2010/12/20 00:25:35 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2000, 2005, 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Charles M. Hannum. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1990 The Regents of the University of California. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * the University of Utah, and William Jolitz. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)trap.c 7.4 (Berkeley) 5/13/91 64 */ 65 66 /* 67 * 386 Trap and System call handling 68 */ 69 70 #include <sys/cdefs.h> 71 __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.260 2010/12/20 00:25:35 matt Exp $"); 72 73 #include "opt_ddb.h" 74 #include "opt_kgdb.h" 75 #include "opt_lockdebug.h" 76 #include "opt_multiprocessor.h" 77 #include "opt_vm86.h" 78 #include "opt_kvm86.h" 79 #include "opt_kstack_dr0.h" 80 #include "opt_xen.h" 81 #include "opt_dtrace.h" 82 83 #include <sys/param.h> 84 #include <sys/systm.h> 85 #include <sys/proc.h> 86 #include <sys/acct.h> 87 #include <sys/kauth.h> 88 #include <sys/kernel.h> 89 #include <sys/kmem.h> 90 #include <sys/ras.h> 91 #include <sys/signal.h> 92 #include <sys/syscall.h> 93 #include <sys/cpu.h> 94 #include <sys/ucontext.h> 95 #include <sys/sa.h> 96 #include <sys/savar.h> 97 98 #include <uvm/uvm_extern.h> 99 100 #include <machine/cpufunc.h> 101 #include <machine/psl.h> 102 #include <machine/reg.h> 103 #include <machine/trap.h> 104 #include <machine/userret.h> 105 #ifdef DDB 106 #include <machine/db_machdep.h> 107 #endif 108 109 #include "mca.h" 110 #if NMCA > 0 111 #include <machine/mca_machdep.h> 112 #endif 113 114 #include <x86/nmi.h> 115 116 #include "isa.h" 117 118 #ifdef KGDB 119 #include <sys/kgdb.h> 120 #endif 121 122 #include "npx.h" 123 124 #ifdef KDTRACE_HOOKS 125 #include <sys/dtrace_bsd.h> 126 127 /* 128 * This is a hook which is initialised by the dtrace module 129 * to handle traps which might occur during DTrace probe 130 * execution. 131 */ 132 dtrace_trap_func_t dtrace_trap_func = NULL; 133 134 dtrace_doubletrap_func_t dtrace_doubletrap_func = NULL; 135 #endif 136 137 138 static inline int xmm_si_code(struct lwp *); 139 void trap(struct trapframe *); 140 void trap_tss(struct i386tss *, int, int); 141 void trap_return_fault_return(struct trapframe *) __dead; 142 143 #ifdef KVM86 144 #include <machine/kvm86.h> 145 #define KVM86MODE (kvm86_incall) 146 #else 147 #define KVM86MODE (0) 148 #endif 149 150 const char * const trap_type[] = { 151 "privileged instruction fault", /* 0 T_PRIVINFLT */ 152 "breakpoint trap", /* 1 T_BPTFLT */ 153 "arithmetic trap", /* 2 T_ARITHTRAP */ 154 "asynchronous system trap", /* 3 T_ASTFLT */ 155 "protection fault", /* 4 T_PROTFLT */ 156 "trace trap", /* 5 T_TRCTRAP */ 157 "page fault", /* 6 T_PAGEFLT */ 158 "alignment fault", /* 7 T_ALIGNFLT */ 159 "integer divide fault", /* 8 T_DIVIDE */ 160 "non-maskable interrupt", /* 9 T_NMI */ 161 "overflow trap", /* 10 T_OFLOW */ 162 "bounds check fault", /* 11 T_BOUND */ 163 "FPU not available fault", /* 12 T_DNA */ 164 "double fault", /* 13 T_DOUBLEFLT */ 165 "FPU operand fetch fault", /* 14 T_FPOPFLT */ 166 "invalid TSS fault", /* 15 T_TSSFLT */ 167 "segment not present fault", /* 16 T_SEGNPFLT */ 168 "stack fault", /* 17 T_STKFLT */ 169 "machine check fault", /* 18 T_MCA */ 170 "SSE FP exception", /* 19 T_XMM */ 171 "reserved trap", /* 20 T_RESERVED */ 172 }; 173 int trap_types = __arraycount(trap_type); 174 175 #ifdef DEBUG 176 int trapdebug = 0; 177 #endif 178 179 #define IDTVEC(name) __CONCAT(X, name) 180 181 void 182 trap_tss(struct i386tss *tss, int trapno, int code) 183 { 184 struct trapframe tf; 185 186 tf.tf_gs = tss->tss_gs; 187 tf.tf_fs = tss->tss_fs; 188 tf.tf_es = tss->__tss_es; 189 tf.tf_ds = tss->__tss_ds; 190 tf.tf_edi = tss->__tss_edi; 191 tf.tf_esi = tss->__tss_esi; 192 tf.tf_ebp = tss->tss_ebp; 193 tf.tf_ebx = tss->__tss_ebx; 194 tf.tf_edx = tss->__tss_edx; 195 tf.tf_ecx = tss->__tss_ecx; 196 tf.tf_eax = tss->__tss_eax; 197 tf.tf_trapno = trapno; 198 tf.tf_err = code | TC_TSS; 199 tf.tf_eip = tss->__tss_eip; 200 tf.tf_cs = tss->__tss_cs; 201 tf.tf_eflags = tss->__tss_eflags; 202 tf.tf_esp = tss->tss_esp; 203 tf.tf_ss = tss->__tss_ss; 204 trap(&tf); 205 } 206 207 static inline int 208 xmm_si_code(struct lwp *l) 209 { 210 struct pcb *pcb; 211 uint32_t mxcsr, mask; 212 213 if (!i386_use_fxsave) { 214 #ifdef DIAGNOSTIC 215 panic("SSE FP Exception, but no SSE"); 216 #endif 217 return 0; 218 } 219 pcb = lwp_getpcb(l); 220 mxcsr = pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr; 221 222 /* 223 * Since we only have a single status and control register, 224 * we use the exception mask bits to mask disabled exceptions 225 */ 226 mask = ~((mxcsr & __INITIAL_MXCSR__) >> 7) & 0xff; 227 switch (mask & mxcsr) { 228 case EN_SW_INVOP: 229 return FPE_FLTINV; 230 case EN_SW_DENORM: 231 case EN_SW_PRECLOSS: 232 return FPE_FLTRES; 233 case EN_SW_ZERODIV: 234 return FPE_FLTDIV; 235 case EN_SW_OVERFLOW: 236 return FPE_FLTOVF; 237 case EN_SW_UNDERFLOW: 238 return FPE_FLTUND; 239 case EN_SW_DATACHAIN: 240 return FPE_FLTSUB; 241 case 0: 242 default: 243 return 0; 244 } 245 } 246 247 static void * 248 onfault_handler(const struct pcb *pcb, const struct trapframe *tf) 249 { 250 struct onfault_table { 251 uintptr_t start; 252 uintptr_t end; 253 void *handler; 254 }; 255 extern const struct onfault_table onfault_table[]; 256 const struct onfault_table *p; 257 uintptr_t pc; 258 259 if (pcb->pcb_onfault != NULL) { 260 return pcb->pcb_onfault; 261 } 262 263 pc = tf->tf_eip; 264 for (p = onfault_table; p->start; p++) { 265 if (p->start <= pc && pc < p->end) { 266 return p->handler; 267 } 268 } 269 return NULL; 270 } 271 272 static void 273 trap_print(int type, struct trapframe *frame) 274 { 275 if (frame->tf_trapno < trap_types) 276 printf("fatal %s", trap_type[frame->tf_trapno]); 277 else 278 printf("unknown trap %d", frame->tf_trapno); 279 printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor"); 280 printf("trap type %d code %x eip %x cs %x eflags %x cr2 %lx ilevel %x\n", 281 type, frame->tf_err, frame->tf_eip, frame->tf_cs, 282 frame->tf_eflags, (long)rcr2(), curcpu()->ci_ilevel); 283 } 284 285 static void 286 check_dr0(void) 287 { 288 #ifdef KSTACK_CHECK_DR0 289 u_int mask, dr6 = rdr6(); 290 291 mask = 1 << 0; /* dr0 */ 292 if (dr6 & mask) { 293 panic("trap on DR0: maybe kernel stack overflow\n"); 294 #if 0 295 dr6 &= ~mask; 296 ldr6(dr6); 297 return; 298 #endif 299 } 300 #endif 301 } 302 303 /* 304 * trap(frame): exception, fault, and trap interface to BSD kernel. 305 * 306 * This common code is called from assembly language IDT gate entry routines 307 * that prepare a suitable stack frame, and restore this frame after the 308 * exception has been processed. Note that the effect is as if the arguments 309 * were passed call by reference. 310 */ 311 void 312 trap(struct trapframe *frame) 313 { 314 struct lwp *l = curlwp; 315 struct proc *p; 316 struct pcb *pcb; 317 extern char fusubail[], kcopy_fault[], return_address_fault[], 318 IDTVEC(osyscall)[]; 319 struct trapframe *vframe; 320 ksiginfo_t ksi; 321 void *onfault; 322 int type, error; 323 uint32_t cr2; 324 bool pfail; 325 326 if (__predict_true(l != NULL)) { 327 pcb = lwp_getpcb(l); 328 p = l->l_proc; 329 } else { 330 /* 331 * this can happen eg. on break points in early on boot. 332 */ 333 pcb = NULL; 334 p = NULL; 335 } 336 type = frame->tf_trapno; 337 338 #ifdef DEBUG 339 if (trapdebug) { 340 printf("trap %d code %x eip %x cs %x eflags %x cr2 %lx cpl %x\n", 341 type, frame->tf_err, frame->tf_eip, frame->tf_cs, 342 frame->tf_eflags, rcr2(), curcpu()->ci_ilevel); 343 printf("curlwp %p%s", curlwp, curlwp ? " " : "\n"); 344 if (curlwp) 345 printf("pid %d lid %d\n", l->l_proc->p_pid, l->l_lid); 346 } 347 #endif 348 if (type != T_NMI && !KVM86MODE && 349 !KERNELMODE(frame->tf_cs, frame->tf_eflags)) { 350 type |= T_USER; 351 l->l_md.md_regs = frame; 352 pcb->pcb_cr2 = 0; 353 LWP_CACHE_CREDS(l, p); 354 } 355 356 #ifdef KDTRACE_HOOKS 357 /* 358 * A trap can occur while DTrace executes a probe. Before 359 * executing the probe, DTrace blocks re-scheduling and sets 360 * a flag in it's per-cpu flags to indicate that it doesn't 361 * want to fault. On returning from the the probe, the no-fault 362 * flag is cleared and finally re-scheduling is enabled. 363 * 364 * If the DTrace kernel module has registered a trap handler, 365 * call it and if it returns non-zero, assume that it has 366 * handled the trap and modified the trap frame so that this 367 * function can return normally. 368 */ 369 if ((type == T_PROTFLT || type == T_PAGEFLT) && 370 dtrace_trap_func != NULL) { 371 if ((*dtrace_trap_func)(frame, type)) { 372 return; 373 } 374 } 375 #endif 376 377 switch (type) { 378 379 case T_ASTFLT: 380 if (KVM86MODE) { 381 break; 382 } 383 /*FALLTHROUGH*/ 384 385 default: 386 we_re_toast: 387 if (type == T_TRCTRAP) 388 check_dr0(); 389 else 390 trap_print(type, frame); 391 #ifdef DDB 392 if (kdb_trap(type, 0, frame)) 393 return; 394 #endif 395 #ifdef KGDB 396 if (kgdb_trap(type, frame)) 397 return; 398 else { 399 /* 400 * If this is a breakpoint, don't panic 401 * if we're not connected. 402 */ 403 if (type == T_BPTFLT) { 404 printf("kgdb: ignored %s\n", trap_type[type]); 405 return; 406 } 407 } 408 #endif 409 panic("trap"); 410 /*NOTREACHED*/ 411 412 case T_PROTFLT: 413 #ifdef KVM86 414 if (KVM86MODE) { 415 kvm86_gpfault(frame); 416 return; 417 } 418 #endif 419 case T_SEGNPFLT: 420 case T_ALIGNFLT: 421 case T_TSSFLT: 422 if (p == NULL) 423 goto we_re_toast; 424 /* Check for copyin/copyout fault. */ 425 onfault = onfault_handler(pcb, frame); 426 if (onfault != NULL) { 427 copyefault: 428 error = EFAULT; 429 copyfault: 430 frame->tf_eip = (uintptr_t)onfault; 431 frame->tf_eax = error; 432 return; 433 } 434 435 /* 436 * Check for failure during return to user mode. 437 * This can happen loading invalid values into the segment 438 * registers, or during the 'iret' itself. 439 * 440 * We do this by looking at the instruction we faulted on. 441 * The specific instructions we recognize only happen when 442 * returning from a trap, syscall, or interrupt. 443 */ 444 445 kernelfault: 446 KSI_INIT_TRAP(&ksi); 447 ksi.ksi_signo = SIGSEGV; 448 ksi.ksi_code = SEGV_ACCERR; 449 ksi.ksi_trap = type; 450 451 switch (*(u_char *)frame->tf_eip) { 452 case 0xcf: /* iret */ 453 /* 454 * The 'iret' instruction faulted, so we have the 455 * 'user' registers saved after the kernel %eip:%cs:%fl 456 * of the 'iret' and below that the user %eip:%cs:%fl 457 * the 'iret' was processing. 458 * We must delete the 3 words of kernel return address 459 * from the stack to generate a normal stack frame 460 * (eg for sending a SIGSEGV). 461 */ 462 vframe = (void *)((int *)frame + 3); 463 if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags)) 464 goto we_re_toast; 465 memmove(vframe, frame, 466 offsetof(struct trapframe, tf_eip)); 467 /* Set the faulting address to the user %eip */ 468 ksi.ksi_addr = (void *)vframe->tf_eip; 469 break; 470 case 0x8e: 471 switch (*(uint32_t *)frame->tf_eip) { 472 case 0x8e242c8e: /* mov (%esp,%gs), then */ 473 case 0x0424648e: /* mov 0x4(%esp),%fs */ 474 case 0x0824448e: /* mov 0x8(%esp),%es */ 475 case 0x0c245c8e: /* mov 0xc(%esp),%ds */ 476 break; 477 default: 478 goto we_re_toast; 479 } 480 /* 481 * We faulted loading one if the user segment registers. 482 * The stack frame containing the user registers is 483 * still valid and is just below the %eip:%cs:%fl of 484 * the kernel fault frame. 485 */ 486 vframe = (void *)(&frame->tf_eflags + 1); 487 if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags)) 488 goto we_re_toast; 489 /* There is no valid address for the fault */ 490 break; 491 default: 492 goto we_re_toast; 493 } 494 /* 495 * We might have faulted trying to execute the 496 * trampoline for a local (nested) signal handler. 497 * Only generate SIGSEGV if the user %cs isn't changed. 498 * (This is only strictly necessary in the 'iret' case.) 499 */ 500 if (!pmap_exec_fixup(&p->p_vmspace->vm_map, vframe, pcb)) { 501 /* Save outer frame for any signal return */ 502 l->l_md.md_regs = vframe; 503 (*p->p_emul->e_trapsignal)(l, &ksi); 504 } 505 /* Return to user by reloading the user frame */ 506 trap_return_fault_return(vframe); 507 /* NOTREACHED */ 508 509 case T_PROTFLT|T_USER: /* protection fault */ 510 case T_TSSFLT|T_USER: 511 case T_SEGNPFLT|T_USER: 512 case T_STKFLT|T_USER: 513 case T_ALIGNFLT|T_USER: 514 KSI_INIT_TRAP(&ksi); 515 516 ksi.ksi_addr = (void *)rcr2(); 517 switch (type) { 518 case T_SEGNPFLT|T_USER: 519 case T_STKFLT|T_USER: 520 ksi.ksi_signo = SIGBUS; 521 ksi.ksi_code = BUS_ADRERR; 522 break; 523 case T_TSSFLT|T_USER: 524 ksi.ksi_signo = SIGBUS; 525 ksi.ksi_code = BUS_OBJERR; 526 break; 527 case T_ALIGNFLT|T_USER: 528 ksi.ksi_signo = SIGBUS; 529 ksi.ksi_code = BUS_ADRALN; 530 break; 531 case T_PROTFLT|T_USER: 532 #ifdef VM86 533 if (frame->tf_eflags & PSL_VM) { 534 vm86_gpfault(l, type & ~T_USER); 535 goto out; 536 } 537 #endif 538 /* 539 * If pmap_exec_fixup does something, 540 * let's retry the trap. 541 */ 542 if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame, pcb)){ 543 goto out; 544 } 545 ksi.ksi_signo = SIGSEGV; 546 ksi.ksi_code = SEGV_ACCERR; 547 break; 548 default: 549 KASSERT(0); 550 break; 551 } 552 goto trapsignal; 553 554 case T_PRIVINFLT|T_USER: /* privileged instruction fault */ 555 case T_FPOPFLT|T_USER: /* coprocessor operand fault */ 556 KSI_INIT_TRAP(&ksi); 557 ksi.ksi_signo = SIGILL; 558 ksi.ksi_addr = (void *)rcr2(); 559 switch (type) { 560 case T_PRIVINFLT|T_USER: 561 ksi.ksi_code = ILL_PRVOPC; 562 break; 563 case T_FPOPFLT|T_USER: 564 ksi.ksi_code = ILL_COPROC; 565 break; 566 default: 567 ksi.ksi_code = 0; 568 break; 569 } 570 goto trapsignal; 571 572 case T_ASTFLT|T_USER: 573 /* Allow process switch. */ 574 //curcpu()->ci_data.cpu_nast++; 575 if (l->l_pflag & LP_OWEUPC) { 576 l->l_pflag &= ~LP_OWEUPC; 577 ADDUPROF(l); 578 } 579 /* Allow a forced task switch. */ 580 if (curcpu()->ci_want_resched) { 581 preempt(); 582 } 583 goto out; 584 585 case T_DNA|T_USER: { 586 KSI_INIT_TRAP(&ksi); 587 ksi.ksi_signo = SIGKILL; 588 ksi.ksi_addr = (void *)frame->tf_eip; 589 printf("pid %d killed due to lack of floating point\n", 590 p->p_pid); 591 goto trapsignal; 592 } 593 594 case T_XMM|T_USER: 595 case T_BOUND|T_USER: 596 case T_OFLOW|T_USER: 597 case T_DIVIDE|T_USER: 598 case T_ARITHTRAP|T_USER: 599 KSI_INIT_TRAP(&ksi); 600 ksi.ksi_signo = SIGFPE; 601 ksi.ksi_addr = (void *)frame->tf_eip; 602 switch (type) { 603 case T_XMM|T_USER: 604 ksi.ksi_code = xmm_si_code(l); 605 break; 606 case T_BOUND|T_USER: 607 ksi.ksi_code = FPE_FLTSUB; 608 break; 609 case T_OFLOW|T_USER: 610 ksi.ksi_code = FPE_INTOVF; 611 break; 612 case T_DIVIDE|T_USER: 613 ksi.ksi_code = FPE_INTDIV; 614 break; 615 case T_ARITHTRAP|T_USER: 616 ksi.ksi_code = npxtrap(l); 617 break; 618 default: 619 ksi.ksi_code = 0; 620 break; 621 } 622 goto trapsignal; 623 624 case T_PAGEFLT: 625 /* Allow page faults in kernel mode. */ 626 if (__predict_false(l == NULL)) 627 goto we_re_toast; 628 629 /* 630 * fusubail is used by [fs]uswintr() to prevent page faulting 631 * from inside the profiling interrupt. 632 */ 633 onfault = pcb->pcb_onfault; 634 if (onfault == fusubail || onfault == return_address_fault) { 635 goto copyefault; 636 } 637 if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) { 638 goto we_re_toast; 639 } 640 641 cr2 = rcr2(); 642 goto faultcommon; 643 644 case T_PAGEFLT|T_USER: { /* page fault */ 645 register vaddr_t va; 646 register struct vmspace *vm; 647 register struct vm_map *map; 648 vm_prot_t ftype; 649 extern struct vm_map *kernel_map; 650 651 cr2 = rcr2(); 652 if (l->l_flag & LW_SA) { 653 l->l_savp->savp_faultaddr = (vaddr_t)cr2; 654 l->l_pflag |= LP_SA_PAGEFAULT; 655 } 656 faultcommon: 657 vm = p->p_vmspace; 658 if (__predict_false(vm == NULL)) { 659 goto we_re_toast; 660 } 661 pcb->pcb_cr2 = cr2; 662 va = trunc_page((vaddr_t)cr2); 663 /* 664 * It is only a kernel address space fault iff: 665 * 1. (type & T_USER) == 0 and 666 * 2. pcb_onfault not set or 667 * 3. pcb_onfault set but supervisor space fault 668 * The last can occur during an exec() copyin where the 669 * argument space is lazy-allocated. 670 */ 671 if (type == T_PAGEFLT && va >= KERNBASE) 672 map = kernel_map; 673 else 674 map = &vm->vm_map; 675 if (frame->tf_err & PGEX_W) 676 ftype = VM_PROT_WRITE; 677 else if (frame->tf_err & PGEX_X) 678 ftype = VM_PROT_EXECUTE; 679 else 680 ftype = VM_PROT_READ; 681 682 #ifdef DIAGNOSTIC 683 if (map == kernel_map && va == 0) { 684 printf("trap: bad kernel access at %lx\n", va); 685 goto we_re_toast; 686 } 687 #endif 688 /* Fault the original page in. */ 689 onfault = pcb->pcb_onfault; 690 pcb->pcb_onfault = NULL; 691 error = uvm_fault(map, va, ftype); 692 pcb->pcb_onfault = onfault; 693 if (error == 0) { 694 if (map != kernel_map && (void *)va >= vm->vm_maxsaddr) 695 uvm_grow(p, va); 696 697 pfail = false; 698 while (type == T_PAGEFLT) { 699 /* 700 * we need to switch pmap now if we're in 701 * the middle of copyin/out. 702 * 703 * but we don't need to do so for kcopy as 704 * it never touch userspace. 705 */ 706 kpreempt_disable(); 707 if (curcpu()->ci_want_pmapload) { 708 onfault = onfault_handler(pcb, frame); 709 if (onfault != kcopy_fault) { 710 pmap_load(); 711 } 712 } 713 /* 714 * We need to keep the pmap loaded and 715 * so avoid being preempted until back 716 * into the copy functions. Disable 717 * interrupts at the hardware level before 718 * re-enabling preemption. Interrupts 719 * will be re-enabled by 'iret' when 720 * returning back out of the trap stub. 721 * They'll only be re-enabled when the 722 * program counter is once again in 723 * the copy functions, and so visible 724 * to cpu_kpreempt_exit(). 725 */ 726 #ifndef XEN 727 x86_disable_intr(); 728 #endif 729 l->l_nopreempt--; 730 if (l->l_nopreempt > 0 || !l->l_dopreempt || 731 pfail) { 732 return; 733 } 734 #ifndef XEN 735 x86_enable_intr(); 736 #endif 737 /* 738 * If preemption fails for some reason, 739 * don't retry it. The conditions won't 740 * change under our nose. 741 */ 742 pfail = kpreempt(0); 743 } 744 l->l_pflag &= ~LP_SA_PAGEFAULT; 745 goto out; 746 } 747 KSI_INIT_TRAP(&ksi); 748 ksi.ksi_trap = type & ~T_USER; 749 ksi.ksi_addr = (void *)cr2; 750 if (error == EACCES) { 751 ksi.ksi_code = SEGV_ACCERR; 752 error = EFAULT; 753 } else { 754 ksi.ksi_code = SEGV_MAPERR; 755 } 756 757 if (type == T_PAGEFLT) { 758 onfault = onfault_handler(pcb, frame); 759 if (onfault != NULL) 760 goto copyfault; 761 printf("uvm_fault(%p, %#lx, %d) -> %#x\n", 762 map, va, ftype, error); 763 goto kernelfault; 764 } 765 if (error == ENOMEM) { 766 ksi.ksi_signo = SIGKILL; 767 printf("UVM: pid %d (%s), uid %d killed: out of swap\n", 768 p->p_pid, p->p_comm, 769 l->l_cred ? 770 kauth_cred_geteuid(l->l_cred) : -1); 771 } else { 772 ksi.ksi_signo = SIGSEGV; 773 } 774 (*p->p_emul->e_trapsignal)(l, &ksi); 775 l->l_pflag &= ~LP_SA_PAGEFAULT; 776 break; 777 } 778 779 case T_TRCTRAP: 780 /* Check whether they single-stepped into a lcall. */ 781 if (frame->tf_eip == (int)IDTVEC(osyscall)) 782 return; 783 if (frame->tf_eip == (int)IDTVEC(osyscall) + 1) { 784 frame->tf_eflags &= ~PSL_T; 785 return; 786 } 787 goto we_re_toast; 788 789 case T_BPTFLT|T_USER: /* bpt instruction fault */ 790 case T_TRCTRAP|T_USER: /* trace trap */ 791 /* 792 * Don't go single-stepping into a RAS. 793 */ 794 if (p->p_raslist == NULL || 795 (ras_lookup(p, (void *)frame->tf_eip) == (void *)-1)) { 796 KSI_INIT_TRAP(&ksi); 797 ksi.ksi_signo = SIGTRAP; 798 ksi.ksi_trap = type & ~T_USER; 799 if (type == (T_BPTFLT|T_USER)) 800 ksi.ksi_code = TRAP_BRKPT; 801 else 802 ksi.ksi_code = TRAP_TRACE; 803 ksi.ksi_addr = (void *)frame->tf_eip; 804 (*p->p_emul->e_trapsignal)(l, &ksi); 805 } 806 break; 807 808 case T_NMI: 809 #if !defined(XEN) 810 if (nmi_dispatch(frame)) 811 return; 812 #if (NISA > 0 || NMCA > 0) 813 #if defined(KGDB) || defined(DDB) 814 /* NMI can be hooked up to a pushbutton for debugging */ 815 printf ("NMI ... going to debugger\n"); 816 #ifdef KGDB 817 818 if (kgdb_trap(type, frame)) 819 return; 820 #endif 821 #ifdef DDB 822 if (kdb_trap(type, 0, frame)) 823 return; 824 #endif 825 #endif /* KGDB || DDB */ 826 /* machine/parity/power fail/"kitchen sink" faults */ 827 828 #if NMCA > 0 829 /* mca_nmi() takes care to call x86_nmi() if appropriate */ 830 if (mca_nmi() != 0) 831 goto we_re_toast; 832 else 833 return; 834 #else /* NISA > 0 */ 835 if (x86_nmi() != 0) 836 goto we_re_toast; 837 else 838 return; 839 #endif /* NMCA > 0 */ 840 #endif /* (NISA > 0 || NMCA > 0) */ 841 #endif /* !defined(XEN) */ 842 ; /* avoid a label at end of compound statement */ 843 } 844 845 if ((type & T_USER) == 0) 846 return; 847 out: 848 userret(l); 849 return; 850 trapsignal: 851 ksi.ksi_trap = type & ~T_USER; 852 (*p->p_emul->e_trapsignal)(l, &ksi); 853 userret(l); 854 } 855 856 /* 857 * startlwp: start of a new LWP. 858 */ 859 void 860 startlwp(void *arg) 861 { 862 ucontext_t *uc = arg; 863 lwp_t *l = curlwp; 864 int error; 865 866 error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags); 867 KASSERT(error == 0); 868 869 kmem_free(uc, sizeof(ucontext_t)); 870 userret(l); 871 } 872 873 /* 874 * XXX_SA: This is a terrible name. 875 */ 876 void 877 upcallret(struct lwp *l) 878 { 879 KERNEL_UNLOCK_LAST(l); 880 userret(l); 881 } 882