1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ 39 */ 40 41 /* 42 * x86_64 Trap and System call handling 43 */ 44 45 #include "use_isa.h" 46 47 #include "opt_ddb.h" 48 #include "opt_ktrace.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/proc.h> 53 #include <sys/pioctl.h> 54 #include <sys/kernel.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/signal2.h> 58 #include <sys/syscall.h> 59 #include <sys/sysctl.h> 60 #include <sys/sysent.h> 61 #include <sys/uio.h> 62 #include <sys/vmmeter.h> 63 #include <sys/malloc.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 #include <sys/ktr.h> 68 #include <sys/vkernel.h> 69 #include <sys/sysproto.h> 70 #include <sys/sysunion.h> 71 #include <sys/vmspace.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_param.h> 75 #include <sys/lock.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_extern.h> 81 82 #include <machine/cpu.h> 83 #include <machine/md_var.h> 84 #include <machine/pcb.h> 85 #include <machine/smp.h> 86 #include <machine/tss.h> 87 #include <machine/globaldata.h> 88 89 #include <ddb/ddb.h> 90 91 #include <sys/msgport2.h> 92 #include <sys/thread2.h> 93 #include <sys/mplock2.h> 94 95 int (*pmath_emulate) (struct trapframe *); 96 97 static int trap_pfault (struct trapframe *, int, vm_offset_t); 98 static void trap_fatal (struct trapframe *, int, vm_offset_t); 99 void dblfault_handler (void); 100 extern int vmm_enabled; 101 102 static struct krate segfltrate = { 1 }; 103 104 #if 0 105 extern inthand_t IDTVEC(syscall); 106 #endif 107 108 #define MAX_TRAP_MSG 30 109 static char *trap_msg[] = { 110 "", /* 0 unused */ 111 "privileged instruction fault", /* 1 T_PRIVINFLT */ 112 "", /* 2 unused */ 113 "breakpoint instruction fault", /* 3 T_BPTFLT */ 114 "", /* 4 unused */ 115 "", /* 5 unused */ 116 "arithmetic trap", /* 6 T_ARITHTRAP */ 117 "system forced exception", /* 7 T_ASTFLT */ 118 "", /* 8 unused */ 119 "general protection fault", /* 9 T_PROTFLT */ 120 "trace trap", /* 10 T_TRCTRAP */ 121 "", /* 11 unused */ 122 "page fault", /* 12 T_PAGEFLT */ 123 "", /* 13 unused */ 124 "alignment fault", /* 14 T_ALIGNFLT */ 125 "", /* 15 unused */ 126 "", /* 16 unused */ 127 "", /* 17 unused */ 128 "integer divide fault", /* 18 T_DIVIDE */ 129 "non-maskable interrupt trap", /* 19 T_NMI */ 130 "overflow trap", /* 20 T_OFLOW */ 131 "FPU bounds check fault", /* 21 T_BOUND */ 132 "FPU device not available", /* 22 T_DNA */ 133 "double fault", /* 23 T_DOUBLEFLT */ 134 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 135 "invalid TSS fault", /* 25 T_TSSFLT */ 136 "segment not present fault", /* 26 T_SEGNPFLT */ 137 "stack fault", /* 27 T_STKFLT */ 138 "machine check trap", /* 28 T_MCHK */ 139 "SIMD floating-point exception", /* 29 T_XMMFLT */ 140 "reserved (unknown) fault", /* 30 T_RESERVED */ 141 }; 142 143 #ifdef DDB 144 static int ddb_on_nmi = 1; 145 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, 146 &ddb_on_nmi, 0, "Go to DDB on NMI"); 147 #endif 148 static int panic_on_nmi = 1; 149 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, 150 &panic_on_nmi, 0, "Panic on NMI"); 151 static int fast_release; 152 SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, 153 &fast_release, 0, "Passive Release was optimal"); 154 static int slow_release; 155 SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, 156 &slow_release, 0, "Passive Release was nonoptimal"); 157 158 /* 159 * Passively intercepts the thread switch function to increase 160 * the thread priority from a user priority to a kernel priority, reducing 161 * syscall and trap overhead for the case where no switch occurs. 162 * 163 * Synchronizes td_ucred with p_ucred. This is used by system calls, 164 * signal handling, faults, AST traps, and anything else that enters the 165 * kernel from userland and provides the kernel with a stable read-only 166 * copy of the process ucred. 167 */ 168 static __inline void 169 userenter(struct thread *curtd, struct proc *curp) 170 { 171 struct ucred *ocred; 172 struct ucred *ncred; 173 174 curtd->td_release = lwkt_passive_release; 175 176 if (curtd->td_ucred != curp->p_ucred) { 177 ncred = crhold(curp->p_ucred); 178 ocred = curtd->td_ucred; 179 curtd->td_ucred = ncred; 180 if (ocred) 181 crfree(ocred); 182 } 183 } 184 185 /* 186 * Handle signals, profiling, and other AST's and/or tasks that 187 * must be completed before we can return to or try to return to userland. 188 * 189 * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 190 * arithmatic on the delta calculation so the absolute tick values are 191 * truncated to an integer. 192 */ 193 static void 194 userret(struct lwp *lp, struct trapframe *frame, int sticks) 195 { 196 struct proc *p = lp->lwp_proc; 197 int sig; 198 int ptok; 199 200 /* 201 * Charge system time if profiling. Note: times are in microseconds. 202 * This may do a copyout and block, so do it first even though it 203 * means some system time will be charged as user time. 204 */ 205 if (p->p_flags & P_PROFIL) { 206 addupc_task(p, frame->tf_rip, 207 (u_int)((int)lp->lwp_thread->td_sticks - sticks)); 208 } 209 210 recheck: 211 /* 212 * Specific on-return-to-usermode checks (LWP_MP_WEXIT, 213 * LWP_MP_VNLRU, etc). 214 */ 215 if (lp->lwp_mpflags & LWP_MP_URETMASK) 216 lwpuserret(lp); 217 218 /* 219 * Block here if we are in a stopped state. 220 */ 221 if (STOPLWP(p, lp)) { 222 lwkt_gettoken(&p->p_token); 223 tstop(); 224 lwkt_reltoken(&p->p_token); 225 goto recheck; 226 } 227 228 /* 229 * Post any pending upcalls. If running a virtual kernel be sure 230 * to restore the virtual kernel's vmspace before posting the upcall. 231 */ 232 if (p->p_flags & (P_SIGVTALRM | P_SIGPROF)) { 233 lwkt_gettoken(&p->p_token); 234 if (p->p_flags & P_SIGVTALRM) { 235 p->p_flags &= ~P_SIGVTALRM; 236 ksignal(p, SIGVTALRM); 237 } 238 if (p->p_flags & P_SIGPROF) { 239 p->p_flags &= ~P_SIGPROF; 240 ksignal(p, SIGPROF); 241 } 242 lwkt_reltoken(&p->p_token); 243 goto recheck; 244 } 245 246 /* 247 * Post any pending signals 248 * 249 * WARNING! postsig() can exit and not return. 250 */ 251 if ((sig = CURSIG_LCK_TRACE(lp, &ptok)) != 0) { 252 postsig(sig, ptok); 253 goto recheck; 254 } 255 256 /* 257 * block here if we are swapped out, but still process signals 258 * (such as SIGKILL). proc0 (the swapin scheduler) is already 259 * aware of our situation, we do not have to wake it up. 260 */ 261 if (p->p_flags & P_SWAPPEDOUT) { 262 lwkt_gettoken(&p->p_token); 263 get_mplock(); 264 p->p_flags |= P_SWAPWAIT; 265 swapin_request(); 266 if (p->p_flags & P_SWAPWAIT) 267 tsleep(p, PCATCH, "SWOUT", 0); 268 p->p_flags &= ~P_SWAPWAIT; 269 rel_mplock(); 270 lwkt_reltoken(&p->p_token); 271 goto recheck; 272 } 273 274 /* 275 * In a multi-threaded program it is possible for a thread to change 276 * signal state during a system call which temporarily changes the 277 * signal mask. In this case postsig() might not be run and we 278 * have to restore the mask ourselves. 279 */ 280 if (lp->lwp_flags & LWP_OLDMASK) { 281 lp->lwp_flags &= ~LWP_OLDMASK; 282 lp->lwp_sigmask = lp->lwp_oldsigmask; 283 goto recheck; 284 } 285 } 286 287 /* 288 * Cleanup from userenter and any passive release that might have occured. 289 * We must reclaim the current-process designation before we can return 290 * to usermode. We also handle both LWKT and USER reschedule requests. 291 */ 292 static __inline void 293 userexit(struct lwp *lp) 294 { 295 struct thread *td = lp->lwp_thread; 296 /* globaldata_t gd = td->td_gd; */ 297 298 /* 299 * Handle stop requests at kernel priority. Any requests queued 300 * after this loop will generate another AST. 301 */ 302 while (STOPLWP(lp->lwp_proc, lp)) { 303 lwkt_gettoken(&lp->lwp_proc->p_token); 304 tstop(); 305 lwkt_reltoken(&lp->lwp_proc->p_token); 306 } 307 308 /* 309 * Reduce our priority in preparation for a return to userland. If 310 * our passive release function was still in place, our priority was 311 * never raised and does not need to be reduced. 312 */ 313 lwkt_passive_recover(td); 314 315 /* 316 * Become the current user scheduled process if we aren't already, 317 * and deal with reschedule requests and other factors. 318 */ 319 lp->lwp_proc->p_usched->acquire_curproc(lp); 320 /* WARNING: we may have migrated cpu's */ 321 /* gd = td->td_gd; */ 322 } 323 324 #if !defined(KTR_KERNENTRY) 325 #define KTR_KERNENTRY KTR_ALL 326 #endif 327 KTR_INFO_MASTER(kernentry); 328 KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, 329 "TRAP(pid %hd, tid %hd, trapno %ld, eva %lu)", 330 pid_t pid, lwpid_t tid, register_t trapno, vm_offset_t eva); 331 KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "TRAP_RET(pid %hd, tid %hd)", 332 pid_t pid, lwpid_t tid); 333 KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "SYSC(pid %hd, tid %hd, nr %ld)", 334 pid_t pid, lwpid_t tid, register_t trapno); 335 KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "SYSRET(pid %hd, tid %hd, err %d)", 336 pid_t pid, lwpid_t tid, int err); 337 KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "FORKRET(pid %hd, tid %hd)", 338 pid_t pid, lwpid_t tid); 339 340 /* 341 * Exception, fault, and trap interface to the kernel. 342 * This common code is called from assembly language IDT gate entry 343 * routines that prepare a suitable stack frame, and restore this 344 * frame after the exception has been processed. 345 * 346 * This function is also called from doreti in an interlock to handle ASTs. 347 * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap 348 * 349 * NOTE! We have to retrieve the fault address prior to obtaining the 350 * MP lock because get_mplock() may switch out. YYY cr2 really ought 351 * to be retrieved by the assembly code, not here. 352 * 353 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing 354 * if an attempt is made to switch from a fast interrupt or IPI. This is 355 * necessary to properly take fatal kernel traps on SMP machines if 356 * get_mplock() has to block. 357 */ 358 359 void 360 user_trap(struct trapframe *frame) 361 { 362 struct globaldata *gd = mycpu; 363 struct thread *td = gd->gd_curthread; 364 struct lwp *lp = td->td_lwp; 365 struct proc *p; 366 int sticks = 0; 367 int i = 0, ucode = 0, type, code; 368 #ifdef INVARIANTS 369 int crit_count = td->td_critcount; 370 lwkt_tokref_t curstop = td->td_toks_stop; 371 #endif 372 vm_offset_t eva; 373 374 p = td->td_proc; 375 376 if (frame->tf_trapno == T_PAGEFLT) 377 eva = frame->tf_addr; 378 else 379 eva = 0; 380 #if 0 381 kprintf("USER_TRAP AT %08lx xflags %ld trapno %ld eva %08lx\n", 382 frame->tf_rip, frame->tf_xflags, frame->tf_trapno, eva); 383 #endif 384 385 /* 386 * Everything coming from user mode runs through user_trap, 387 * including system calls. 388 */ 389 if (frame->tf_trapno == T_FAST_SYSCALL) { 390 syscall2(frame); 391 return; 392 } 393 394 KTR_LOG(kernentry_trap, lp->lwp_proc->p_pid, lp->lwp_tid, 395 frame->tf_trapno, eva); 396 397 #ifdef DDB 398 if (db_active) { 399 eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); 400 ++gd->gd_trap_nesting_level; 401 trap_fatal(frame, TRUE, eva); 402 --gd->gd_trap_nesting_level; 403 goto out2; 404 } 405 #endif 406 407 type = frame->tf_trapno; 408 code = frame->tf_err; 409 410 userenter(td, p); 411 412 sticks = (int)td->td_sticks; 413 lp->lwp_md.md_regs = frame; 414 415 switch (type) { 416 case T_PRIVINFLT: /* privileged instruction fault */ 417 i = SIGILL; 418 ucode = ILL_PRVOPC; 419 break; 420 421 case T_BPTFLT: /* bpt instruction fault */ 422 case T_TRCTRAP: /* trace trap */ 423 frame->tf_rflags &= ~PSL_T; 424 i = SIGTRAP; 425 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); 426 break; 427 428 case T_ARITHTRAP: /* arithmetic trap */ 429 ucode = code; 430 i = SIGFPE; 431 break; 432 433 case T_ASTFLT: /* Allow process switch */ 434 mycpu->gd_cnt.v_soft++; 435 if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { 436 atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); 437 addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); 438 } 439 goto out; 440 441 /* 442 * The following two traps can happen in 443 * vm86 mode, and, if so, we want to handle 444 * them specially. 445 */ 446 case T_PROTFLT: /* general protection fault */ 447 case T_STKFLT: /* stack fault */ 448 #if 0 449 if (frame->tf_eflags & PSL_VM) { 450 i = vm86_emulate((struct vm86frame *)frame); 451 if (i == 0) 452 goto out; 453 break; 454 } 455 #endif 456 /* FALL THROUGH */ 457 458 case T_SEGNPFLT: /* segment not present fault */ 459 case T_TSSFLT: /* invalid TSS fault */ 460 case T_DOUBLEFLT: /* double fault */ 461 default: 462 i = SIGBUS; 463 ucode = code + BUS_SEGM_FAULT ; 464 break; 465 466 case T_PAGEFLT: /* page fault */ 467 i = trap_pfault(frame, TRUE, eva); 468 if (i == -1 || i == 0) 469 goto out; 470 471 472 if (i == SIGSEGV) 473 ucode = SEGV_MAPERR; 474 else { 475 i = SIGSEGV; 476 ucode = SEGV_ACCERR; 477 } 478 break; 479 480 case T_DIVIDE: /* integer divide fault */ 481 ucode = FPE_INTDIV; 482 i = SIGFPE; 483 break; 484 485 #if NISA > 0 486 case T_NMI: 487 /* machine/parity/power fail/"kitchen sink" faults */ 488 if (isa_nmi(code) == 0) { 489 #ifdef DDB 490 /* 491 * NMI can be hooked up to a pushbutton 492 * for debugging. 493 */ 494 if (ddb_on_nmi) { 495 kprintf ("NMI ... going to debugger\n"); 496 kdb_trap(type, 0, frame); 497 } 498 #endif /* DDB */ 499 goto out2; 500 } else if (panic_on_nmi) 501 panic("NMI indicates hardware failure"); 502 break; 503 #endif /* NISA > 0 */ 504 505 case T_OFLOW: /* integer overflow fault */ 506 ucode = FPE_INTOVF; 507 i = SIGFPE; 508 break; 509 510 case T_BOUND: /* bounds check fault */ 511 ucode = FPE_FLTSUB; 512 i = SIGFPE; 513 break; 514 515 case T_DNA: 516 /* 517 * Virtual kernel intercept - pass the DNA exception 518 * to the (emulated) virtual kernel if it asked to handle 519 * it. This occurs when the virtual kernel is holding 520 * onto the FP context for a different emulated 521 * process then the one currently running. 522 * 523 * We must still call npxdna() since we may have 524 * saved FP state that the (emulated) virtual kernel 525 * needs to hand over to a different emulated process. 526 */ 527 if (lp->lwp_vkernel && lp->lwp_vkernel->ve && 528 (td->td_pcb->pcb_flags & FP_VIRTFP) 529 ) { 530 npxdna(frame); 531 break; 532 } 533 534 /* 535 * The kernel may have switched out the FP unit's 536 * state, causing the user process to take a fault 537 * when it tries to use the FP unit. Restore the 538 * state here 539 */ 540 if (npxdna(frame)) { 541 gd->gd_cnt.v_trap++; 542 goto out; 543 } 544 if (!pmath_emulate) { 545 i = SIGFPE; 546 ucode = FPE_FPU_NP_TRAP; 547 break; 548 } 549 i = (*pmath_emulate)(frame); 550 if (i == 0) { 551 if (!(frame->tf_rflags & PSL_T)) 552 goto out2; 553 frame->tf_rflags &= ~PSL_T; 554 i = SIGTRAP; 555 } 556 /* else ucode = emulator_only_knows() XXX */ 557 break; 558 559 case T_FPOPFLT: /* FPU operand fetch fault */ 560 ucode = T_FPOPFLT; 561 i = SIGILL; 562 break; 563 564 case T_XMMFLT: /* SIMD floating-point exception */ 565 ucode = 0; /* XXX */ 566 i = SIGFPE; 567 break; 568 } 569 570 /* 571 * Virtual kernel intercept - if the fault is directly related to a 572 * VM context managed by a virtual kernel then let the virtual kernel 573 * handle it. 574 */ 575 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 576 vkernel_trap(lp, frame); 577 goto out; 578 } 579 580 /* 581 * Translate fault for emulators (e.g. Linux) 582 */ 583 if (*p->p_sysent->sv_transtrap) 584 i = (*p->p_sysent->sv_transtrap)(i, type); 585 586 trapsignal(lp, i, ucode); 587 588 #ifdef DEBUG 589 if (type <= MAX_TRAP_MSG) { 590 uprintf("fatal process exception: %s", 591 trap_msg[type]); 592 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 593 uprintf(", fault VA = 0x%lx", (u_long)eva); 594 uprintf("\n"); 595 } 596 #endif 597 598 out: 599 userret(lp, frame, sticks); 600 userexit(lp); 601 out2: ; 602 KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 603 #ifdef INVARIANTS 604 KASSERT(crit_count == td->td_critcount, 605 ("trap: critical section count mismatch! %d/%d", 606 crit_count, td->td_pri)); 607 KASSERT(curstop == td->td_toks_stop, 608 ("trap: extra tokens held after trap! %ld/%ld", 609 curstop - &td->td_toks_base, 610 td->td_toks_stop - &td->td_toks_base)); 611 #endif 612 } 613 614 void 615 kern_trap(struct trapframe *frame) 616 { 617 struct globaldata *gd = mycpu; 618 struct thread *td = gd->gd_curthread; 619 struct lwp *lp; 620 struct proc *p; 621 int i = 0, ucode = 0, type, code; 622 #ifdef INVARIANTS 623 int crit_count = td->td_critcount; 624 lwkt_tokref_t curstop = td->td_toks_stop; 625 #endif 626 vm_offset_t eva; 627 628 lp = td->td_lwp; 629 p = td->td_proc; 630 631 if (frame->tf_trapno == T_PAGEFLT) 632 eva = frame->tf_addr; 633 else 634 eva = 0; 635 636 #ifdef DDB 637 if (db_active) { 638 ++gd->gd_trap_nesting_level; 639 trap_fatal(frame, FALSE, eva); 640 --gd->gd_trap_nesting_level; 641 goto out2; 642 } 643 #endif 644 645 type = frame->tf_trapno; 646 code = frame->tf_err; 647 648 #if 0 649 kernel_trap: 650 #endif 651 /* kernel trap */ 652 653 switch (type) { 654 case T_PAGEFLT: /* page fault */ 655 trap_pfault(frame, FALSE, eva); 656 goto out2; 657 658 case T_DNA: 659 /* 660 * The kernel may be using npx for copying or other 661 * purposes. 662 */ 663 panic("kernel NPX should not happen"); 664 if (npxdna(frame)) 665 goto out2; 666 break; 667 668 case T_PROTFLT: /* general protection fault */ 669 case T_SEGNPFLT: /* segment not present fault */ 670 /* 671 * Invalid segment selectors and out of bounds 672 * %eip's and %esp's can be set up in user mode. 673 * This causes a fault in kernel mode when the 674 * kernel tries to return to user mode. We want 675 * to get this fault so that we can fix the 676 * problem here and not have to check all the 677 * selectors and pointers when the user changes 678 * them. 679 */ 680 if (mycpu->gd_intr_nesting_level == 0) { 681 if (td->td_pcb->pcb_onfault) { 682 frame->tf_rip = 683 (register_t)td->td_pcb->pcb_onfault; 684 goto out2; 685 } 686 } 687 break; 688 689 case T_TSSFLT: 690 /* 691 * PSL_NT can be set in user mode and isn't cleared 692 * automatically when the kernel is entered. This 693 * causes a TSS fault when the kernel attempts to 694 * `iret' because the TSS link is uninitialized. We 695 * want to get this fault so that we can fix the 696 * problem here and not every time the kernel is 697 * entered. 698 */ 699 if (frame->tf_rflags & PSL_NT) { 700 frame->tf_rflags &= ~PSL_NT; 701 goto out2; 702 } 703 break; 704 705 case T_TRCTRAP: /* trace trap */ 706 #if 0 707 if (frame->tf_eip == (int)IDTVEC(syscall)) { 708 /* 709 * We've just entered system mode via the 710 * syscall lcall. Continue single stepping 711 * silently until the syscall handler has 712 * saved the flags. 713 */ 714 goto out2; 715 } 716 if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { 717 /* 718 * The syscall handler has now saved the 719 * flags. Stop single stepping it. 720 */ 721 frame->tf_eflags &= ~PSL_T; 722 goto out2; 723 } 724 #endif 725 #if 0 726 /* 727 * Ignore debug register trace traps due to 728 * accesses in the user's address space, which 729 * can happen under several conditions such as 730 * if a user sets a watchpoint on a buffer and 731 * then passes that buffer to a system call. 732 * We still want to get TRCTRAPS for addresses 733 * in kernel space because that is useful when 734 * debugging the kernel. 735 */ 736 if (user_dbreg_trap()) { 737 /* 738 * Reset breakpoint bits because the 739 * processor doesn't 740 */ 741 load_dr6(rdr6() & 0xfffffff0); 742 goto out2; 743 } 744 #endif 745 /* 746 * Fall through (TRCTRAP kernel mode, kernel address) 747 */ 748 case T_BPTFLT: 749 /* 750 * If DDB is enabled, let it handle the debugger trap. 751 * Otherwise, debugger traps "can't happen". 752 */ 753 #ifdef DDB 754 if (kdb_trap (type, 0, frame)) 755 goto out2; 756 #endif 757 break; 758 case T_DIVIDE: 759 trap_fatal(frame, FALSE, eva); 760 goto out2; 761 case T_NMI: 762 trap_fatal(frame, FALSE, eva); 763 goto out2; 764 case T_SYSCALL80: 765 case T_FAST_SYSCALL: 766 /* 767 * Ignore this trap generated from a spurious SIGTRAP. 768 * 769 * single stepping in / syscalls leads to spurious / SIGTRAP 770 * so ignore 771 * 772 * Haiku (c) 2007 Simon 'corecode' Schubert 773 */ 774 goto out2; 775 } 776 777 /* 778 * Translate fault for emulators (e.g. Linux) 779 */ 780 if (*p->p_sysent->sv_transtrap) 781 i = (*p->p_sysent->sv_transtrap)(i, type); 782 783 gd->gd_cnt.v_trap++; 784 trapsignal(lp, i, ucode); 785 786 #ifdef DEBUG 787 if (type <= MAX_TRAP_MSG) { 788 uprintf("fatal process exception: %s", 789 trap_msg[type]); 790 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 791 uprintf(", fault VA = 0x%lx", (u_long)eva); 792 uprintf("\n"); 793 } 794 #endif 795 796 out2: 797 ; 798 #ifdef INVARIANTS 799 KASSERT(crit_count == td->td_critcount, 800 ("trap: critical section count mismatch! %d/%d", 801 crit_count, td->td_pri)); 802 KASSERT(curstop == td->td_toks_stop, 803 ("trap: extra tokens held after trap! %ld/%ld", 804 curstop - &td->td_toks_base, 805 td->td_toks_stop - &td->td_toks_base)); 806 #endif 807 } 808 809 int 810 trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) 811 { 812 vm_offset_t va; 813 struct vmspace *vm = NULL; 814 vm_map_t map = 0; 815 int rv = 0; 816 vm_prot_t ftype; 817 thread_t td = curthread; 818 struct lwp *lp = td->td_lwp; 819 int fault_flags; 820 821 va = trunc_page(eva); 822 if (usermode == FALSE) { 823 /* 824 * This is a fault on kernel virtual memory. 825 */ 826 map = &kernel_map; 827 } else { 828 /* 829 * This is a fault on non-kernel virtual memory. 830 * vm is initialized above to NULL. If curproc is NULL 831 * or curproc->p_vmspace is NULL the fault is fatal. 832 */ 833 if (lp != NULL) 834 vm = lp->lwp_vmspace; 835 836 if (vm == NULL) 837 goto nogo; 838 839 map = &vm->vm_map; 840 } 841 842 if (frame->tf_err & PGEX_W) 843 ftype = VM_PROT_READ | VM_PROT_WRITE; 844 else if (frame->tf_err & PGEX_I) 845 ftype = VM_PROT_EXECUTE; 846 else 847 ftype = VM_PROT_READ; 848 849 if (map != &kernel_map) { 850 /* 851 * Keep swapout from messing with us during this 852 * critical time. 853 */ 854 PHOLD(lp->lwp_proc); 855 856 #if 0 857 /* 858 * Grow the stack if necessary 859 */ 860 /* grow_stack returns false only if va falls into 861 * a growable stack region and the stack growth 862 * fails. It returns true if va was not within 863 * a growable stack region, or if the stack 864 * growth succeeded. 865 */ 866 if (!grow_stack (map, va)) { 867 rv = KERN_FAILURE; 868 PRELE(lp->lwp_proc); 869 goto nogo; 870 } 871 #endif 872 873 fault_flags = 0; 874 if (usermode) 875 fault_flags |= VM_FAULT_BURST | VM_FAULT_USERMODE; 876 if (ftype & VM_PROT_WRITE) 877 fault_flags |= VM_FAULT_DIRTY; 878 else 879 fault_flags |= VM_FAULT_NORMAL; 880 rv = vm_fault(map, va, ftype, fault_flags); 881 882 PRELE(lp->lwp_proc); 883 } else { 884 /* 885 * Don't have to worry about process locking or stacks in the kernel. 886 */ 887 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 888 } 889 890 if (rv == KERN_SUCCESS) 891 return (0); 892 nogo: 893 if (!usermode) { 894 if (td->td_gd->gd_intr_nesting_level == 0 && 895 td->td_pcb->pcb_onfault) { 896 frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; 897 return (0); 898 } 899 trap_fatal(frame, usermode, eva); 900 return (-1); 901 } 902 903 /* 904 * NOTE: on x86_64 we have a tf_addr field in the trapframe, no 905 * kludge is needed to pass the fault address to signal handlers. 906 */ 907 struct proc *p = td->td_proc; 908 krateprintf(&segfltrate, 909 "seg-fault accessing address %p " 910 "rip=%p pid=%d p_comm=%s\n", 911 (void *)va, 912 (void *)frame->tf_rip, p->p_pid, p->p_comm); 913 /* Debugger("seg-fault"); */ 914 915 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 916 } 917 918 static void 919 trap_fatal(struct trapframe *frame, int usermode, vm_offset_t eva) 920 { 921 int code, type, ss; 922 long rsp; 923 924 code = frame->tf_xflags; 925 type = frame->tf_trapno; 926 927 if (type <= MAX_TRAP_MSG) { 928 kprintf("\n\nFatal trap %d: %s while in %s mode\n", 929 type, trap_msg[type], 930 (usermode ? "user" : "kernel")); 931 } 932 /* two separate prints in case of a trap on an unmapped page */ 933 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 934 if (type == T_PAGEFLT) { 935 kprintf("fault virtual address = %p\n", (void *)eva); 936 kprintf("fault code = %s %s, %s\n", 937 usermode ? "user" : "supervisor", 938 code & PGEX_W ? "write" : "read", 939 code & PGEX_P ? "protection violation" : "page not present"); 940 } 941 kprintf("instruction pointer = 0x%lx:0x%lx\n", 942 frame->tf_cs & 0xffff, frame->tf_rip); 943 if (usermode) { 944 ss = frame->tf_ss & 0xffff; 945 rsp = frame->tf_rsp; 946 } else { 947 ss = GSEL(GDATA_SEL, SEL_KPL); 948 rsp = (long)&frame->tf_rsp; 949 } 950 kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); 951 kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); 952 kprintf("processor eflags = "); 953 if (frame->tf_rflags & PSL_T) 954 kprintf("trace trap, "); 955 if (frame->tf_rflags & PSL_I) 956 kprintf("interrupt enabled, "); 957 if (frame->tf_rflags & PSL_NT) 958 kprintf("nested task, "); 959 if (frame->tf_rflags & PSL_RF) 960 kprintf("resume, "); 961 #if 0 962 if (frame->tf_eflags & PSL_VM) 963 kprintf("vm86, "); 964 #endif 965 kprintf("IOPL = %jd\n", (intmax_t)((frame->tf_rflags & PSL_IOPL) >> 12)); 966 kprintf("current process = "); 967 if (curproc) { 968 kprintf("%lu (%s)\n", 969 (u_long)curproc->p_pid, curproc->p_comm ? 970 curproc->p_comm : ""); 971 } else { 972 kprintf("Idle\n"); 973 } 974 kprintf("current thread = pri %d ", curthread->td_pri); 975 if (curthread->td_critcount) 976 kprintf("(CRIT)"); 977 kprintf("\n"); 978 /** 979 * XXX FIXME: 980 * we probably SHOULD have stopped the other CPUs before now! 981 * another CPU COULD have been touching cpl at this moment... 982 */ 983 kprintf(" <- SMP: XXX"); 984 kprintf("\n"); 985 986 #ifdef KDB 987 if (kdb_trap(&psl)) 988 return; 989 #endif 990 #ifdef DDB 991 if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) 992 return; 993 #endif 994 kprintf("trap number = %d\n", type); 995 if (type <= MAX_TRAP_MSG) 996 panic("%s", trap_msg[type]); 997 else 998 panic("unknown/reserved trap"); 999 } 1000 1001 /* 1002 * Double fault handler. Called when a fault occurs while writing 1003 * a frame for a trap/exception onto the stack. This usually occurs 1004 * when the stack overflows (such is the case with infinite recursion, 1005 * for example). 1006 * 1007 * XXX Note that the current PTD gets replaced by IdlePTD when the 1008 * task switch occurs. This means that the stack that was active at 1009 * the time of the double fault is not available at <kstack> unless 1010 * the machine was idle when the double fault occurred. The downside 1011 * of this is that "trace <ebp>" in ddb won't work. 1012 */ 1013 void 1014 dblfault_handler(void) 1015 { 1016 #if 0 /* JG */ 1017 struct mdglobaldata *gd = mdcpu; 1018 #endif 1019 1020 kprintf("\nFatal double fault:\n"); 1021 #if 0 /* JG */ 1022 kprintf("rip = 0x%lx\n", gd->gd_common_tss.tss_rip); 1023 kprintf("rsp = 0x%lx\n", gd->gd_common_tss.tss_rsp); 1024 kprintf("rbp = 0x%lx\n", gd->gd_common_tss.tss_rbp); 1025 #endif 1026 /* two separate prints in case of a trap on an unmapped page */ 1027 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 1028 panic("double fault"); 1029 } 1030 1031 /* 1032 * syscall2 - MP aware system call request C handler 1033 * 1034 * A system call is essentially treated as a trap except that the 1035 * MP lock is not held on entry or return. We are responsible for 1036 * obtaining the MP lock if necessary and for handling ASTs 1037 * (e.g. a task switch) prior to return. 1038 * 1039 * In general, only simple access and manipulation of curproc and 1040 * the current stack is allowed without having to hold MP lock. 1041 * 1042 * MPSAFE - note that large sections of this routine are run without 1043 * the MP lock. 1044 */ 1045 void 1046 syscall2(struct trapframe *frame) 1047 { 1048 struct thread *td = curthread; 1049 struct proc *p = td->td_proc; 1050 struct lwp *lp = td->td_lwp; 1051 caddr_t params; 1052 struct sysent *callp; 1053 register_t orig_tf_rflags; 1054 int sticks; 1055 int error; 1056 int narg; 1057 #ifdef INVARIANTS 1058 int crit_count = td->td_critcount; 1059 lwkt_tokref_t curstop = td->td_toks_stop; 1060 #endif 1061 register_t *argp; 1062 u_int code; 1063 int reg, regcnt; 1064 union sysunion args; 1065 register_t *argsdst; 1066 1067 mycpu->gd_cnt.v_syscall++; 1068 1069 KTR_LOG(kernentry_syscall, lp->lwp_proc->p_pid, lp->lwp_tid, 1070 frame->tf_rax); 1071 1072 userenter(td, p); /* lazy raise our priority */ 1073 1074 reg = 0; 1075 regcnt = 6; 1076 /* 1077 * Misc 1078 */ 1079 sticks = (int)td->td_sticks; 1080 orig_tf_rflags = frame->tf_rflags; 1081 1082 /* 1083 * Virtual kernel intercept - if a VM context managed by a virtual 1084 * kernel issues a system call the virtual kernel handles it, not us. 1085 * Restore the virtual kernel context and return from its system 1086 * call. The current frame is copied out to the virtual kernel. 1087 */ 1088 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 1089 vkernel_trap(lp, frame); 1090 error = EJUSTRETURN; 1091 callp = NULL; 1092 code = 0; 1093 goto out; 1094 } 1095 1096 /* 1097 * Get the system call parameters and account for time 1098 */ 1099 lp->lwp_md.md_regs = frame; 1100 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 1101 code = frame->tf_rax; 1102 1103 if (code == SYS_syscall || code == SYS___syscall) { 1104 code = frame->tf_rdi; 1105 reg++; 1106 regcnt--; 1107 } 1108 1109 if (code >= p->p_sysent->sv_size) 1110 callp = &p->p_sysent->sv_table[0]; 1111 else 1112 callp = &p->p_sysent->sv_table[code]; 1113 1114 narg = callp->sy_narg & SYF_ARGMASK; 1115 1116 /* 1117 * On x86_64 we get up to six arguments in registers. The rest are 1118 * on the stack. The first six members of 'struct trapframe' happen 1119 * to be the registers used to pass arguments, in exactly the right 1120 * order. 1121 */ 1122 argp = &frame->tf_rdi; 1123 argp += reg; 1124 argsdst = (register_t *)(&args.nosys.sysmsg + 1); 1125 1126 /* 1127 * JG can we overflow the space pointed to by 'argsdst' 1128 * either with 'bcopy' or with 'copyin'? 1129 */ 1130 bcopy(argp, argsdst, sizeof(register_t) * regcnt); 1131 1132 /* 1133 * copyin is MP aware, but the tracing code is not 1134 */ 1135 if (narg > regcnt) { 1136 KASSERT(params != NULL, ("copyin args with no params!")); 1137 error = copyin(params, &argsdst[regcnt], 1138 (narg - regcnt) * sizeof(register_t)); 1139 if (error) { 1140 #ifdef KTRACE 1141 if (KTRPOINT(td, KTR_SYSCALL)) { 1142 ktrsyscall(lp, code, narg, 1143 (void *)(&args.nosys.sysmsg + 1)); 1144 } 1145 #endif 1146 goto bad; 1147 } 1148 } 1149 1150 #ifdef KTRACE 1151 if (KTRPOINT(td, KTR_SYSCALL)) { 1152 ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); 1153 } 1154 #endif 1155 1156 /* 1157 * Default return value is 0 (will be copied to %rax). Double-value 1158 * returns use %rax and %rdx. %rdx is left unchanged for system 1159 * calls which return only one result. 1160 */ 1161 args.sysmsg_fds[0] = 0; 1162 args.sysmsg_fds[1] = frame->tf_rdx; 1163 1164 /* 1165 * The syscall might manipulate the trap frame. If it does it 1166 * will probably return EJUSTRETURN. 1167 */ 1168 args.sysmsg_frame = frame; 1169 1170 STOPEVENT(p, S_SCE, narg); /* MP aware */ 1171 1172 /* 1173 * NOTE: All system calls run MPSAFE now. The system call itself 1174 * is responsible for getting the MP lock. 1175 */ 1176 error = (*callp->sy_call)(&args); 1177 1178 #if 0 1179 kprintf("system call %d returned %d\n", code, error); 1180 #endif 1181 1182 out: 1183 /* 1184 * MP SAFE (we may or may not have the MP lock at this point) 1185 */ 1186 switch (error) { 1187 case 0: 1188 /* 1189 * Reinitialize proc pointer `p' as it may be different 1190 * if this is a child returning from fork syscall. 1191 */ 1192 p = curproc; 1193 lp = curthread->td_lwp; 1194 frame->tf_rax = args.sysmsg_fds[0]; 1195 frame->tf_rdx = args.sysmsg_fds[1]; 1196 frame->tf_rflags &= ~PSL_C; 1197 break; 1198 case ERESTART: 1199 /* 1200 * Reconstruct pc, we know that 'syscall' is 2 bytes. 1201 * We have to do a full context restore so that %r10 1202 * (which was holding the value of %rcx) is restored for 1203 * the next iteration. 1204 */ 1205 frame->tf_rip -= frame->tf_err; 1206 frame->tf_r10 = frame->tf_rcx; 1207 break; 1208 case EJUSTRETURN: 1209 break; 1210 case EASYNC: 1211 panic("Unexpected EASYNC return value (for now)"); 1212 default: 1213 bad: 1214 if (p->p_sysent->sv_errsize) { 1215 if (error >= p->p_sysent->sv_errsize) 1216 error = -1; /* XXX */ 1217 else 1218 error = p->p_sysent->sv_errtbl[error]; 1219 } 1220 frame->tf_rax = error; 1221 frame->tf_rflags |= PSL_C; 1222 break; 1223 } 1224 1225 /* 1226 * Traced syscall. trapsignal() is not MP aware. 1227 */ 1228 if (orig_tf_rflags & PSL_T) { 1229 frame->tf_rflags &= ~PSL_T; 1230 trapsignal(lp, SIGTRAP, 0); 1231 } 1232 1233 /* 1234 * Handle reschedule and other end-of-syscall issues 1235 */ 1236 userret(lp, frame, sticks); 1237 1238 #ifdef KTRACE 1239 if (KTRPOINT(td, KTR_SYSRET)) { 1240 ktrsysret(lp, code, error, args.sysmsg_result); 1241 } 1242 #endif 1243 1244 /* 1245 * This works because errno is findable through the 1246 * register set. If we ever support an emulation where this 1247 * is not the case, this code will need to be revisited. 1248 */ 1249 STOPEVENT(p, S_SCX, code); 1250 1251 userexit(lp); 1252 KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error); 1253 #ifdef INVARIANTS 1254 KASSERT(&td->td_toks_base == td->td_toks_stop, 1255 ("syscall: critical section count mismatch! %d/%d", 1256 crit_count, td->td_pri)); 1257 KASSERT(curstop == td->td_toks_stop, 1258 ("syscall: extra tokens held after trap! %ld", 1259 td->td_toks_stop - &td->td_toks_base)); 1260 #endif 1261 } 1262 1263 /* 1264 * NOTE: mplock not held at any point 1265 */ 1266 void 1267 fork_return(struct lwp *lp, struct trapframe *frame) 1268 { 1269 frame->tf_rax = 0; /* Child returns zero */ 1270 frame->tf_rflags &= ~PSL_C; /* success */ 1271 frame->tf_rdx = 1; 1272 1273 generic_lwp_return(lp, frame); 1274 KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 1275 } 1276 1277 /* 1278 * Simplified back end of syscall(), used when returning from fork() 1279 * directly into user mode. 1280 * 1281 * This code will return back into the fork trampoline code which then 1282 * runs doreti. 1283 * 1284 * NOTE: The mplock is not held at any point. 1285 */ 1286 void 1287 generic_lwp_return(struct lwp *lp, struct trapframe *frame) 1288 { 1289 struct proc *p = lp->lwp_proc; 1290 1291 /* 1292 * Check for exit-race. If one lwp exits the process concurrent with 1293 * another lwp creating a new thread, the two operations may cross 1294 * each other resulting in the newly-created lwp not receiving a 1295 * KILL signal. 1296 */ 1297 if (p->p_flags & P_WEXIT) { 1298 lwpsignal(p, lp, SIGKILL); 1299 } 1300 1301 /* 1302 * Newly forked processes are given a kernel priority. We have to 1303 * adjust the priority to a normal user priority and fake entry 1304 * into the kernel (call userenter()) to install a passive release 1305 * function just in case userret() decides to stop the process. This 1306 * can occur when ^Z races a fork. If we do not install the passive 1307 * release function the current process designation will not be 1308 * released when the thread goes to sleep. 1309 */ 1310 lwkt_setpri_self(TDPRI_USER_NORM); 1311 userenter(lp->lwp_thread, p); 1312 userret(lp, frame, 0); 1313 #ifdef KTRACE 1314 if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) 1315 ktrsysret(lp, SYS_fork, 0, 0); 1316 #endif 1317 lp->lwp_flags |= LWP_PASSIVE_ACQ; 1318 userexit(lp); 1319 lp->lwp_flags &= ~LWP_PASSIVE_ACQ; 1320 } 1321 1322 /* 1323 * doreti has turned into this. The frame is directly on the stack. We 1324 * pull everything else we need (fpu and tls context) from the current 1325 * thread. 1326 * 1327 * Note on fpu interactions: In a virtual kernel, the fpu context for 1328 * an emulated user mode process is not shared with the virtual kernel's 1329 * fpu context, so we only have to 'stack' fpu contexts within the virtual 1330 * kernel itself, and not even then since the signal() contexts that we care 1331 * about save and restore the FPU state (I think anyhow). 1332 * 1333 * vmspace_ctl() returns an error only if it had problems instaling the 1334 * context we supplied or problems copying data to/from our VM space. 1335 */ 1336 void 1337 go_user(struct intrframe *frame) 1338 { 1339 struct trapframe *tf = (void *)&frame->if_rdi; 1340 globaldata_t gd; 1341 int r; 1342 void *id; 1343 1344 /* 1345 * Interrupts may be disabled on entry, make sure all signals 1346 * can be received before beginning our loop. 1347 */ 1348 sigsetmask(0); 1349 1350 /* 1351 * Switch to the current simulated user process, then call 1352 * user_trap() when we break out of it (usually due to a signal). 1353 */ 1354 for (;;) { 1355 #if 1 1356 /* 1357 * Always make the FPU state correct. This should generally 1358 * be faster because the cost of taking a #NM fault through 1359 * the vkernel to the real kernel is astronomical. 1360 */ 1361 crit_enter(); 1362 tf->tf_xflags &= ~PGEX_FPFAULT; 1363 if (mdcpu->gd_npxthread != curthread) { 1364 if (mdcpu->gd_npxthread) 1365 npxsave(mdcpu->gd_npxthread->td_savefpu); 1366 npxdna(tf); 1367 } 1368 #else 1369 /* 1370 * Tell the real kernel whether it is ok to use the FP 1371 * unit or not, allowing us to take a T_DNA exception 1372 * if the context tries to use the FP. 1373 */ 1374 if (mdcpu->gd_npxthread == curthread) { 1375 tf->tf_xflags &= ~PGEX_FPFAULT; 1376 } else { 1377 tf->tf_xflags |= PGEX_FPFAULT; 1378 } 1379 #endif 1380 1381 /* 1382 * Run emulated user process context. This call interlocks 1383 * with new mailbox signals. 1384 * 1385 * Set PGEX_U unconditionally, indicating a user frame (the 1386 * bit is normally set only by T_PAGEFLT). 1387 */ 1388 if (vmm_enabled) 1389 id = (void *)vtophys(curproc->p_vmspace->vm_pmap.pm_pml4); 1390 else 1391 id = &curproc->p_vmspace->vm_pmap; 1392 1393 /* 1394 * The GDF_VIRTUSER hack helps statclock() figure out who 1395 * the tick belongs to. 1396 */ 1397 gd = mycpu; 1398 gd->gd_flags |= GDF_VIRTUSER; 1399 r = vmspace_ctl(id, VMSPACE_CTL_RUN, tf, 1400 &curthread->td_savevext); 1401 gd->gd_flags &= ~GDF_VIRTUSER; 1402 1403 frame->if_xflags |= PGEX_U; 1404 1405 /* 1406 * Immediately save the user FPU state. The vkernel is a 1407 * user program and libraries like libc will use the FP 1408 * unit. 1409 */ 1410 if (mdcpu->gd_npxthread == curthread) { 1411 npxsave(mdcpu->gd_npxthread->td_savefpu); 1412 } 1413 crit_exit(); 1414 #if 0 1415 kprintf("GO USER %d trap %ld EVA %08lx RIP %08lx RSP %08lx XFLAGS %02lx/%02lx\n", 1416 r, tf->tf_trapno, tf->tf_addr, tf->tf_rip, tf->tf_rsp, 1417 tf->tf_xflags, frame->if_xflags); 1418 #endif 1419 if (r < 0) { 1420 if (errno != EINTR) 1421 panic("vmspace_ctl failed error %d", errno); 1422 } else { 1423 if (tf->tf_trapno) { 1424 user_trap(tf); 1425 } 1426 } 1427 if (mycpu->gd_reqflags & RQF_AST_MASK) { 1428 tf->tf_trapno = T_ASTFLT; 1429 user_trap(tf); 1430 } 1431 tf->tf_trapno = 0; 1432 } 1433 } 1434 1435 /* 1436 * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA 1437 * fault (which is then passed back to the virtual kernel) if an attempt is 1438 * made to use the FP unit. 1439 * 1440 * XXX this is a fairly big hack. 1441 */ 1442 void 1443 set_vkernel_fp(struct trapframe *frame) 1444 { 1445 struct thread *td = curthread; 1446 1447 if (frame->tf_xflags & PGEX_FPFAULT) { 1448 td->td_pcb->pcb_flags |= FP_VIRTFP; 1449 if (mdcpu->gd_npxthread == td) 1450 npxexit(); 1451 } else { 1452 td->td_pcb->pcb_flags &= ~FP_VIRTFP; 1453 } 1454 } 1455 1456 /* 1457 * Called from vkernel_trap() to fixup the vkernel's syscall 1458 * frame for vmspace_ctl() return. 1459 */ 1460 void 1461 cpu_vkernel_trap(struct trapframe *frame, int error) 1462 { 1463 frame->tf_rax = error; 1464 if (error) 1465 frame->tf_rflags |= PSL_C; 1466 else 1467 frame->tf_rflags &= ~PSL_C; 1468 } 1469